Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public Highlight getHighlight() {

public void setHighlight(Highlight highlight) {
this.highlight = highlight;
if (this.highlight != null) this.highlight.pr = this.pr;
}

public Breaking getBreaking() {
Expand Down Expand Up @@ -160,6 +161,7 @@ public static class Highlight {
private boolean notable;
private String title;
private String body;
private Integer pr;

public boolean isNotable() {
return notable;
Expand Down Expand Up @@ -189,6 +191,10 @@ public String getAnchor() {
return generatedAnchor(this.title);
}

public Integer getPr() {
return pr;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.io.IOException;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -50,6 +51,7 @@ static String generateFile(QualifiedVersion version, String template, List<Chang
final Map<Boolean, List<ChangelogEntry.Highlight>> groupedHighlights = entries.stream()
.map(ChangelogEntry::getHighlight)
.filter(Objects::nonNull)
.sorted(Comparator.comparingInt(ChangelogEntry.Highlight::getPr))
.collect(Collectors.groupingBy(ChangelogEntry.Highlight::isNotable, Collectors.toList()));

final List<ChangelogEntry.Highlight> notableHighlights = groupedHighlights.getOrDefault(true, List.of());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ if (notableHighlights.isEmpty()) { %>
<% for (highlight in notableHighlights) { %>
[discrete]
[[${ highlight.anchor }]]
=== ${highlight.title}
=== {es-pull}${highlight.pr}[${highlight.title}]
${highlight.body.trim()}
<% } %>
// end::notable-highlights[]
<% } %>
<% for (highlight in nonNotableHighlights) { %>
[discrete]
[[${ highlight.anchor }]]
=== ${highlight.title}
=== {es-pull}${highlight.pr}[${highlight.title}]
${highlight.body.trim()}
<% } %>
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,24 @@ public void generateFile_rendersCorrectMarkup() throws Exception {
}

private List<ChangelogEntry> getEntries() {
ChangelogEntry entry1 = new ChangelogEntry();
ChangelogEntry.Highlight highlight1 = new ChangelogEntry.Highlight();
entry1.setHighlight(highlight1);

highlight1.setNotable(true);
highlight1.setTitle("Notable release highlight number 1");
highlight1.setBody("Notable release body number 1");

ChangelogEntry entry2 = new ChangelogEntry();
ChangelogEntry.Highlight highlight2 = new ChangelogEntry.Highlight();
entry2.setHighlight(highlight2);

highlight2.setNotable(true);
highlight2.setTitle("Notable release highlight number 2");
highlight2.setBody("Notable release body number 2");
ChangelogEntry entry1 = makeChangelogEntry(1, true);
ChangelogEntry entry2 = makeChangelogEntry(2, true);
ChangelogEntry entry3 = makeChangelogEntry(3, false);
// Return unordered list, to test correct re-ordering
return List.of(entry2, entry1, entry3);
}

ChangelogEntry entry3 = new ChangelogEntry();
ChangelogEntry.Highlight highlight3 = new ChangelogEntry.Highlight();
entry3.setHighlight(highlight3);
private ChangelogEntry makeChangelogEntry(int pr, boolean notable) {
ChangelogEntry entry = new ChangelogEntry();
entry.setPr(pr);
ChangelogEntry.Highlight highlight = new ChangelogEntry.Highlight();
entry.setHighlight(highlight);

highlight3.setNotable(false);
highlight3.setTitle("Notable release highlight number 3");
highlight3.setBody("Notable release body number 3");
highlight.setNotable(notable);
highlight.setTitle("Notable release highlight number " + pr);
highlight.setBody("Notable release body number " + pr);

return List.of(entry1, entry2, entry3);
return entry;
}

private String getResource(String name) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@ Other versions:

[discrete]
[[notable_release_highlight_number_1]]
=== Notable release highlight number 1
=== {es-pull}1[Notable release highlight number 1]
Notable release body number 1

[discrete]
[[notable_release_highlight_number_2]]
=== Notable release highlight number 2
=== {es-pull}2[Notable release highlight number 2]
Notable release body number 2

// end::notable-highlights[]


[discrete]
[[notable_release_highlight_number_3]]
=== Notable release highlight number 3
=== {es-pull}3[Notable release highlight number 3]
Notable release body number 3

8 changes: 8 additions & 0 deletions docs/changelog/84250.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,11 @@ area: Ingest
type: enhancement
issues:
- 84274
highlight:
title: Ingest performance improvement
body: |-
We have improved the pipeline execution logic for pipelines with processors that are synchronous
by avoiding (deep) recursion.
On our nightly benchmark that simulates a Logging use-case, this resulted in a 10% reduction of
CPU time spent on ingest pipelines and a 3% overall ingestion speedup.
notable: true
34 changes: 34 additions & 0 deletions docs/changelog/86596.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
pr: 86596
summary: New geo_grid query to be used with geogrid aggregations
area: Geo
type: feature
issues:
- 85727
highlight:
title: New geo_grid query
body: |-
With the geo grid query, you can now natively return all the documents that overlap a specific geo tile.
There is no need to reconstruct the geometry or the actual boundaries of the spatial cluster as Elasticsearch
can do this for you, which saves you time and reduces complexity.
This is especially useful when geometries are spread across tiles like on a soccer ball or football.
While hexagon tiles line the sphere, calculating the boundary of each tile is not straightforward.

```
GET /example/_search
{
"query": {
"geo_grid" :{
"location" : {
"geotile" : "6/32/22"
}
}
}
}
```

Geo grid query can also help determine the single source of truth of containment.
With geo grid query, you can match exactly the intersection-test of Elasticsearch.
As an example, if a client has bounds for a grid-cell at a higher (or lower) precision than what is used
by Elasticsearch when running a corresponding aggregation, the containment-check might be slightly different.
This side-steps any disconnect based on projection/datum difference between client and Elasticsearch.
notable: true
19 changes: 19 additions & 0 deletions docs/reference/release-notes/8.3.0.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Also see <<breaking-changes-8.3,Breaking changes in 8.3>>.
Aggregations::
* Allow `serial_diff` under `min_doc_count` aggs {es-pull}86401[#86401]
* Allow bucket paths to specify `_count` within a bucket {es-pull}85720[#85720]
* Fix a bug with flattened fields in terms aggregations {es-pull}87392[#87392]
* Fix flaky `top_metrics` test {es-pull}86582[#86582] (issue: {es-issue}86377[#86377])
* Fix: check field existence before trying to merge running stats {es-pull}86926[#86926]
* Fix: ordering terms aggregation on top metrics null values {es-pull}85774[#85774]
Expand Down Expand Up @@ -44,6 +45,7 @@ Distributed::
* Enforce external id uniqueness during `DesiredNode` construction {es-pull}84227[#84227]

Engine::
* Fork to WRITE thread before failing shard in `updateCheckPoints` {es-pull}87458[#87458] (issue: {es-issue}87094[#87094])
* Removing Blocking Wait for Close in `RecoverySourceHandler` {es-pull}86127[#86127] (issue: {es-issue}85839[#85839])

Features::
Expand Down Expand Up @@ -73,6 +75,9 @@ Infra/Core::
Infra/Logging::
* Temporarily provide `SystemPropertiesPropertySource` {es-pull}87149[#87149]

Infra/Node Lifecycle::
* Upgrade folders after settings validation {es-pull}87319[#87319]

Infra/Plugins::
* Use Windows newlines when listing plugin information on Windows {es-pull}86408[#86408] (issue: {es-issue}86352[#86352])

Expand All @@ -82,6 +87,7 @@ Infra/REST API::
Infra/Scripting::
* Allow to sort by script value using `SemVer` semantics {es-pull}85990[#85990] (issues: {es-issue}85989[#85989], {es-issue}82287[#82287])
* Script: Fix setter shortcut for unbridged setters {es-pull}86868[#86868]
* Script: Load Whitelists as Resource {es-pull}87539[#87539]

Infra/Settings::
* Permit removal of archived index settings {es-pull}86107[#86107]
Expand All @@ -97,16 +103,24 @@ License::

Machine Learning::
* Fix ML task auditor exception early in cluster lifecycle {es-pull}87023[#87023] (issue: {es-issue}87002[#87002])
* Fix `WordPiece` tokenization of unknown words with known subwords {es-pull}87510[#87510]
* Fix distribution change check for `change_point` aggregation {es-pull}86423[#86423]
* Fixes inference timeout handling bug that throws unexpected `NullPointerException` {es-pull}87533[#87533]
* Correct logic for restart from failover fine tuning hyperparameters for training classification and regression models {ml-pull}2251[#2251]
* Fix possible source of "x = NaN, distribution = class boost::math::normal_distribution<..." log errors training classification and regression models {ml-pull}2249[#2249]
* Fix some bugs affecting decision to stop optimizing hyperparameters for training classification and regression models {ml-pull}2259[#2259]
* Fix cause of "Must provide points at which to evaluate function" log error training classification and regression models {ml-pull}2268[#2268]
* Fix a source of "Discarding sample = nan, weights = ..." log errors for time series anomaly detection {ml-pull}2286[#2286]

Mapping::
* Don't run `include_in_parent` when in `copy_to` context {es-pull}87123[#87123] (issue: {es-issue}87036[#87036])

Network::
* Reject `openConnection` attempt while closing {es-pull}86315[#86315] (issue: {es-issue}86249[#86249])

Recovery::
* Fail shard if STARTED after master failover {es-pull}87451[#87451] (issue: {es-issue}87367[#87367])

SQL::
* Fix FORMAT function to comply with Microsoft SQL Server specification {es-pull}86225[#86225] (issue: {es-issue}66560[#66560])
* Implement binary format support for SQL clear cursor {es-pull}84230[#84230] (issue: {es-issue}53359[#53359])
Expand All @@ -120,6 +134,7 @@ Search::

Security::
* Make user and role name constraint consistent with max document ID {es-pull}86728[#86728] (issue: {es-issue}66020[#66020])
* Security plugin close releasable realms {es-pull}87429[#87429] (issue: {es-issue}86286[#86286])

Snapshot/Restore::
* DONE should mean fully processed in snapshot status {es-pull}86414[#86414]
Expand Down Expand Up @@ -189,6 +204,7 @@ Geo::
Health::
* Add a basic check for tier preference and allocation filter clashing {es-pull}85071[#85071]
* Add preflight checks to Health API to ensure health is obtainable {es-pull}86404[#86404]
* Add tier information on health api migrate tiers user actions {es-pull}87486[#87486]
* Health api add indicator doc links {es-pull}86904[#86904] (issue: {es-issue}86892[#86892])
* Health api copy editing {es-pull}87010[#87010]
* Return a default user action if no actions could be determined {es-pull}87079[#87079]
Expand Down Expand Up @@ -276,6 +292,9 @@ Transform::
Authorization::
* Has privileges API for profiles {es-pull}85898[#85898]

Geo::
* New geo_grid query to be used with geogrid aggregations {es-pull}86596[#86596] (issue: {es-issue}85727[#85727])

Health::
* Add support for `impact_areas` to health impacts {es-pull}85830[#85830] (issue: {es-issue}85829[#85829])
* Add troubleshooting guides to shards allocation actions {es-pull}87078[#87078]
Expand Down
108 changes: 72 additions & 36 deletions docs/reference/release-notes/highlights.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,49 @@ Other versions:
// tag::notable-highlights[]

[discrete]
[[reading_indices_from_older_elasticsearch_versions]]
=== Reading indices from older Elasticsearch versions
Elasticsearch has full query and write support for indices created in the previous major
version. If you have indices created in Elasticsearch versions 5 or 6, you can now use
the archive functionality to import and query these indices as well.
The archive functionality provides slower read-only access to older data,
for compliance or regulatory reasons, the occasional lookback or investigation,
or to rehydrate parts of it. Access to the data is expected to be infrequent,
and can therefore happen with limited performance and query capabilities.
[[ingest_performance_improvement]]
=== {es-pull}84250[Ingest performance improvement]
We have improved the pipeline execution logic for pipelines with processors that are synchronous
by avoiding (deep) recursion.
On our nightly benchmark that simulates a Logging use-case, this resulted in a 10% reduction of
CPU time spent on ingest pipelines and a 3% overall ingestion speedup.

[discrete]
[[execute_self_reference_checks_once_per_pipeline]]
=== {es-pull}85926[Execute self-reference checks once per pipeline]
Ingest pipelines have a mechanism to prevent circular references in the records
they process, so that they are serializable. Prior to this change, this check was
performed after each `script` processor execution, and was ignorable.

Because of this check, a script processor configured with
```
"source": """
def x = ctx;
ctx.x = x;
"""
```

...would error with `"type" : "illegal_argument_exception", "reason" : "Iterable
object is self-referencing itself (ingest script)"`.

If the script processor also had
```
"ignore_failure" true
```

...then the handling thread would actually crash from an unrecoverable
StackOverflowError when trying to serialize the resulting event.

Now, this check is performed once per pipeline, remediating the potential for
a StackOverflowError. There are some side effects also:

- The resulting error message specifies which pipeline is causing the issue.
- There is a slight speed up for pipelines with multiple scripts, and a slight
slowdown for pipelines without scripts.

[discrete]
[[add_support_for_dots_in_field_names_for_metrics_usecases]]
=== Add support for dots in field names for metrics usecases
=== {es-pull}86166[Add support for dots in field names for metrics usecases]
Metrics data can often be made of several fields with dots in their names,
sharing common prefixes, like in the following example:

Expand Down Expand Up @@ -71,37 +101,43 @@ without expanding dots in field names to the corresponding object structure.
That makes it possible to store the metrics document above.

[discrete]
[[execute_self_reference_checks_once_per_pipeline]]
=== Execute self-reference checks once per pipeline
Ingest pipelines have a mechanism to prevent circular references in the records
they process, so that they are serializable. Prior to this change, this check was
performed after each `script` processor execution, and was ignorable.

Because of this check, a script processor configured with
```
"source": """
def x = ctx;
ctx.x = x;
"""
```
[[reading_indices_from_older_elasticsearch_versions]]
=== {es-pull}86261[Reading indices from older Elasticsearch versions]
Elasticsearch has full query and write support for indices created in the previous major
version. If you have indices created in Elasticsearch versions 5 or 6, you can now use
the archive functionality to import and query these indices as well.
The archive functionality provides slower read-only access to older data,
for compliance or regulatory reasons, the occasional lookback or investigation,
or to rehydrate parts of it. Access to the data is expected to be infrequent,
and can therefore happen with limited performance and query capabilities.

...would error with `"type" : "illegal_argument_exception", "reason" : "Iterable
object is self-referencing itself (ingest script)"`.
[discrete]
[[new_geo_grid_query]]
=== {es-pull}86596[New geo_grid query]
With the geo grid query, you can now natively return all the documents that overlap a specific geo tile.
There is no need to reconstruct the geometry or the actual boundaries of the spatial cluster as Elasticsearch
can do this for you, which saves you time and reduces complexity.
This is especially useful when geometries are spread across tiles like on a soccer ball or football.
While hexagon tiles line the sphere, calculating the boundary of each tile is not straightforward.

If the script processor also had
```
"ignore_failure" true
GET /example/_search
{
"query": {
"geo_grid" :{
"location" : {
"geotile" : "6/32/22"
}
}
}
}
```

...then the handling thread would actually crash from an unrecoverable
StackOverflowError when trying to serialize the resulting event.

Now, this check is performed once per pipeline, remediating the potential for
a StackOverflowError. There are some side effects also:

- The resulting error message specifies which pipeline is causing the issue.
- There is a slight speed up for pipelines with multiple scripts, and a slight
slowdown for pipelines without scripts.
Geo grid query can also help determine the single source of truth of containment.
With geo grid query, you can match exactly the intersection-test of Elasticsearch.
As an example, if a client has bounds for a grid-cell at a higher (or lower) precision than what is used
by Elasticsearch when running a corresponding aggregation, the containment-check might be slightly different.
This side-steps any disconnect based on projection/datum difference between client and Elasticsearch.

// end::notable-highlights[]

Expand Down