From 8fd1b973695f55fb38c718d9bd4cb41be9dea15f Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 28 Apr 2017 18:37:28 +0200 Subject: [PATCH 1/2] Add support for early termination of search request Relates #6720 This change introduce early termination of search request for indices sorted by specific fields. When the index is sorted, the option called `early_terminate` indicates that top documents must be sorted by the index sort criteria and that only the top N documents per segment should be visited. Let's say for example that we have an index sorted by timestamp: ``` PUT events { "settings" : { "index" : { "sort.field" : "timestamp", "sort.order" : "desc" <2> } }, "mappings": { "doc": { "properties": { "timestamp": { "type": "date" } } } } } ``` ... it is then possible to retrieve the N last events without visiting all the documents in the index with the following query: ``` GET /events/_search { "size": 10, "early_terminate": true } ``` The `sort` of this search request is automatically set to the index sort and each segment will visit the first 10 matching documents at most. --- .../action/search/SearchRequestBuilder.java | 9 ++ .../elasticsearch/common/lucene/Lucene.java | 10 +++ .../org/elasticsearch/index/IndexService.java | 5 +- .../elasticsearch/index/IndexSortConfig.java | 10 ++- .../elasticsearch/index/shard/IndexShard.java | 12 +-- .../index/shard/StoreRecovery.java | 6 +- .../search/DefaultSearchContext.java | 11 +++ .../elasticsearch/search/SearchService.java | 31 +++++++ .../search/builder/SearchSourceBuilder.java | 26 ++++++ .../internal/FilteredSearchContext.java | 10 +++ .../search/internal/SearchContext.java | 4 + .../search/profile/query/CollectorResult.java | 1 + .../search/query/QueryPhase.java | 10 ++- .../search/simple/SimpleSearchIT.java | 37 ++++++++ .../index-modules/index-sorting.asciidoc | 80 ++++++++++++++++- docs/reference/search/request-body.asciidoc | 8 ++ .../test/indices.sort/10_basic.yaml | 89 ++++++++++++++++++- .../elasticsearch/test/TestSearchContext.java | 11 +++ 18 files changed, 354 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java index ffe2c1b20c516..743972906b277 100644 --- a/core/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java @@ -126,6 +126,15 @@ public SearchRequestBuilder setTerminateAfter(int terminateAfter) { return this; } + /** + * Indicates whether the search should early terminate based on the index sorting. + * The search sort must not be set since documents will be returned sorted by the index sorting criteria. + */ + public SearchRequestBuilder setEarlyTerminate(boolean value) { + sourceBuilder().earlyTerminate(value); + return this; + } + /** * A comma separated list of routing values to control the shards the search will be executed on. */ diff --git a/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java index c213c384611f5..3d206e2f11e56 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -42,6 +42,7 @@ import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.EarlyTerminatingSortingCollector; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; @@ -50,6 +51,7 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.search.SortedSetSortField; @@ -259,6 +261,14 @@ public static final TimeLimitingCollector wrapTimeLimitingCollector(final Collec return new TimeLimitingCollector(delegate, counter, timeoutInMillis); } + /** + * Wraps delegate with segment count based early termination sorting collector with a threshold of maxHitsPerSegment + */ + public static final EarlyTerminatingSortingCollector wrapCountBasedEarlyTerminatingSortingCollector(final Collector delegate, final Sort sort, int maxHitsPerSegment) { + return new EarlyTerminatingSortingCollector(delegate, sort, maxHitsPerSegment); + } + + /** * Check whether there is one or more documents matching the provided query. */ diff --git a/core/src/main/java/org/elasticsearch/index/IndexService.java b/core/src/main/java/org/elasticsearch/index/IndexService.java index 9a24f8eb68df7..b838194d68ee3 100644 --- a/core/src/main/java/org/elasticsearch/index/IndexService.java +++ b/core/src/main/java/org/elasticsearch/index/IndexService.java @@ -67,6 +67,7 @@ import org.elasticsearch.indices.fielddata.cache.IndicesFieldDataCache; import org.elasticsearch.indices.mapper.MapperRegistry; import org.elasticsearch.script.ScriptService; +import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.threadpool.ThreadPool; import java.io.Closeable; @@ -120,7 +121,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private final ScriptService scriptService; private final ClusterService clusterService; private final Client client; - private Supplier indexSortSupplier; + private Supplier indexSortSupplier; public IndexService(IndexSettings indexSettings, NodeEnvironment nodeEnv, NamedXContentRegistry xContentRegistry, @@ -255,7 +256,7 @@ public SimilarityService similarityService() { return similarityService; } - public Supplier getIndexSortSupplier() { + public Supplier getIndexSortSupplier() { return indexSortSupplier; } diff --git a/core/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/core/src/main/java/org/elasticsearch/index/IndexSortConfig.java index 1d3f5f0fc23ea..a102819358b9b 100644 --- a/core/src/main/java/org/elasticsearch/index/IndexSortConfig.java +++ b/core/src/main/java/org/elasticsearch/index/IndexSortConfig.java @@ -28,7 +28,9 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.MultiValueMode; +import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.search.sort.SortOrder; import java.util.Collections; @@ -175,13 +177,14 @@ public boolean hasIndexSort() { * Builds the {@link Sort} order from the settings for this index * or returns null if this index has no sort. */ - public Sort buildIndexSort(Function fieldTypeLookup, - Function> fieldDataLookup) { + public SortAndFormats buildIndexSort(Function fieldTypeLookup, + Function> fieldDataLookup) { if (hasIndexSort() == false) { return null; } final SortField[] sortFields = new SortField[sortSpecs.length]; + final DocValueFormat[] docValueFormats = new DocValueFormat[sortSpecs.length]; for (int i = 0; i < sortSpecs.length; i++) { FieldSortSpec sortSpec = sortSpecs[i]; final MappedFieldType ft = fieldTypeLookup.apply(sortSpec.field); @@ -203,9 +206,10 @@ public Sort buildIndexSort(Function fieldTypeLookup, throw new IllegalArgumentException("docvalues not found for index sort field:[" + sortSpec.field + "]"); } sortFields[i] = fieldData.sortField(sortSpec.missingValue, mode, null, reverse); + docValueFormats[i] = ft.docValueFormat(null, null); validateIndexSortField(sortFields[i]); } - return new Sort(sortFields); + return new SortAndFormats(new Sort(sortFields), docValueFormats); } private void validateIndexSortField(SortField sortField) { diff --git a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 1da5e6763bc66..8714a4b92e890 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/core/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -124,6 +124,7 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.search.suggest.completion.CompletionFieldStats; import org.elasticsearch.search.suggest.completion.CompletionStats; import org.elasticsearch.threadpool.ThreadPool; @@ -174,7 +175,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl private final TranslogConfig translogConfig; private final IndexEventListener indexEventListener; private final QueryCachingPolicy cachingPolicy; - private final Supplier indexSortSupplier; + private final Supplier indexSortSupplier; /** * How many bytes we are currently moving to disk, via either IndexWriter.flush or refresh. IndexingMemoryController polls this @@ -230,7 +231,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl private final RefreshListeners refreshListeners; public IndexShard(ShardRouting shardRouting, IndexSettings indexSettings, ShardPath path, Store store, - Supplier indexSortSupplier, IndexCache indexCache, MapperService mapperService, SimilarityService similarityService, + Supplier indexSortSupplier, IndexCache indexCache, MapperService mapperService, SimilarityService similarityService, IndexFieldDataService indexFieldDataService, @Nullable EngineFactory engineFactory, IndexEventListener indexEventListener, IndexSearcherWrapper indexSearcherWrapper, ThreadPool threadPool, BigArrays bigArrays, Engine.Warmer warmer, Runnable globalCheckpointSyncer, List searchOperationListener, List listeners) throws IOException { @@ -297,7 +298,7 @@ public Store store() { /** * Return the sort order of this index, or null if the index has no sort. */ - public Sort getIndexSort() { + public SortAndFormats getIndexSort() { return indexSortSupplier.get(); } /** @@ -1818,11 +1819,12 @@ private DocumentMapperForType docMapper(String type) { private EngineConfig newEngineConfig(EngineConfig.OpenMode openMode) { final IndexShardRecoveryPerformer translogRecoveryPerformer = new IndexShardRecoveryPerformer(shardId, mapperService, logger); - Sort indexSort = indexSortSupplier.get(); + final SortAndFormats indexSort = indexSortSupplier.get(); return new EngineConfig(openMode, shardId, threadPool, indexSettings, warmer, store, deletionPolicy, indexSettings.getMergePolicy(), mapperService.indexAnalyzer(), similarityService.similarity(mapperService), codecService, shardEventListener, translogRecoveryPerformer, indexCache.query(), cachingPolicy, translogConfig, - IndexingMemoryController.SHARD_INACTIVE_TIME_SETTING.get(indexSettings.getSettings()), refreshListeners, indexSort); + IndexingMemoryController.SHARD_INACTIVE_TIME_SETTING.get(indexSettings.getSettings()), refreshListeners, + indexSort == null ? null : indexSort.sort); } /** diff --git a/core/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java b/core/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java index 5d5e17c19291b..a9b740ff26cb1 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java +++ b/core/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java @@ -46,6 +46,7 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.search.sort.SortAndFormats; import java.io.IOException; import java.util.Arrays; @@ -110,12 +111,13 @@ boolean recoverFromLocalShards(BiConsumer mappingUpdate } indexShard.mapperService().merge(indexMetaData, MapperService.MergeReason.MAPPING_RECOVERY, true); // now that the mapping is merged we can validate the index sort configuration. - Sort indexSort = indexShard.getIndexSort(); + SortAndFormats indexSort = indexShard.getIndexSort(); return executeRecovery(indexShard, () -> { logger.debug("starting recovery from local shards {}", shards); try { final Directory directory = indexShard.store().directory(); // don't close this directory!! - addIndices(indexShard.recoveryState().getIndex(), directory, indexSort, + addIndices(indexShard.recoveryState().getIndex(), directory, + indexSort == null ? null : indexSort.sort, shards.stream().map(s -> s.getSnapshotDirectory()) .collect(Collectors.toList()).toArray(new Directory[shards.size()])); internalRecoverFromStore(indexShard); diff --git a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java index 6619c1ab9e5a9..a3c0b8466a01c 100644 --- a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java @@ -100,6 +100,7 @@ final class DefaultSearchContext extends SearchContext { private TimeValue timeout; // terminate after count private int terminateAfter = DEFAULT_TERMINATE_AFTER; + private boolean earlyTerminate = false; private List groupStats; private ScrollContext scrollContext; private boolean explain; @@ -514,6 +515,16 @@ public void terminateAfter(int terminateAfter) { this.terminateAfter = terminateAfter; } + @Override + public boolean earlyTerminate() { + return earlyTerminate; + } + + @Override + public void earlyTerminate(boolean value) { + this.earlyTerminate = value; + } + @Override public SearchContext minimumScore(float minimumScore) { this.minimumScore = minimumScore; diff --git a/core/src/main/java/org/elasticsearch/search/SearchService.java b/core/src/main/java/org/elasticsearch/search/SearchService.java index b1192c59e4cc7..51c14833a5949 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchService.java +++ b/core/src/main/java/org/elasticsearch/search/SearchService.java @@ -40,6 +40,7 @@ import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexSortConfig; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.query.InnerHitBuilder; import org.elasticsearch.index.query.QueryShardContext; @@ -726,6 +727,36 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc final CollapseContext collapseContext = source.collapse().build(context); context.collapse(collapseContext); } + + if (source.earlyTerminate()) { + IndexSortConfig sortConfig = context.mapperService().getIndexSettings().getIndexSortConfig(); + if (sortConfig == null || sortConfig.hasIndexSort() == false) { + throw new SearchContextException(context, + "cannot use `early_terminate` when index sorting is not set"); + } + if (context.scrollContext() != null) { + throw new SearchContextException(context, + "`early_terminate` cannot be used in a scroll context"); + } + if (context.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER) { + throw new SearchContextException(context, + "cannot use `early_terminate` in conjunction with `terminate_after`"); + } + if (context.searchAfter() != null) { + throw new SearchContextException(context, + "cannot use `early_terminate` in conjunction with `search_after"); + } + SortAndFormats sortAndFormats = sortConfig.buildIndexSort(context.mapperService()::fullName, + context.fieldData()::getForField); + if (context.sort() != null && context.sort().sort.equals(sortAndFormats.sort) == false) { + throw new SearchContextException(context, "cannot use `early_terminate` when the search sort [" + + context.sort().sort + "] is different than the index sorting [" + sortAndFormats.sort + "]"); + } else { + // the search sort is null or equals to index sorting so it's safe to override it here + context.sort(sortAndFormats); + } + context.earlyTerminate(true); + } } /** diff --git a/core/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java b/core/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java index 37d7eb5b02756..7cc041156329d 100644 --- a/core/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java @@ -77,6 +77,7 @@ public final class SearchSourceBuilder extends ToXContentToBytes implements Writ public static final ParseField SIZE_FIELD = new ParseField("size"); public static final ParseField TIMEOUT_FIELD = new ParseField("timeout"); public static final ParseField TERMINATE_AFTER_FIELD = new ParseField("terminate_after"); + public static final ParseField EARLY_TERMINATE_FIELD = new ParseField("early_terminate"); public static final ParseField QUERY_FIELD = new ParseField("query"); public static final ParseField POST_FILTER_FIELD = new ParseField("post_filter"); public static final ParseField MIN_SCORE_FIELD = new ParseField("min_score"); @@ -149,6 +150,7 @@ public static HighlightBuilder highlight() { private TimeValue timeout = null; private int terminateAfter = SearchContext.DEFAULT_TERMINATE_AFTER; + private boolean earlyTerminate = false; private StoredFieldsContext storedFieldsContext; private List docValueFields; @@ -223,6 +225,9 @@ public SearchSourceBuilder(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_5_3_0_UNRELEASED)) { collapse = in.readOptionalWriteable(CollapseBuilder::new); } + if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha1_UNRELEASED)) { + earlyTerminate = in.readBoolean(); + } } @Override @@ -274,6 +279,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_5_3_0_UNRELEASED)) { out.writeOptionalWriteable(collapse); } + if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha1_UNRELEASED)) { + out.writeBoolean(earlyTerminate); + } } /** @@ -426,6 +434,22 @@ public int terminateAfter() { return terminateAfter; } + /** + * Indicates whether the search should early terminate based on the index sorting. + * The search sort must not be set since documents will be returned sorted by the index sorting criteria. + */ + public SearchSourceBuilder earlyTerminate(boolean value) { + this.earlyTerminate = value; + return this; + } + + /** + * Returns whether the search should early terminate based on the index sorting + */ + public boolean earlyTerminate() { + return earlyTerminate; + } + /** * Adds a sort against the given field name and the sort ordering. * @@ -955,6 +979,8 @@ public void parseXContent(QueryParseContext context) throws IOException { timeout = TimeValue.parseTimeValue(parser.text(), null, TIMEOUT_FIELD.getPreferredName()); } else if (TERMINATE_AFTER_FIELD.match(currentFieldName)) { terminateAfter = parser.intValue(); + } else if (EARLY_TERMINATE_FIELD.match(currentFieldName)) { + earlyTerminate = parser.booleanValue(); } else if (MIN_SCORE_FIELD.match(currentFieldName)) { minScore = parser.floatValue(); } else if (VERSION_FIELD.match(currentFieldName)) { diff --git a/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java index fadf979d911d2..92149df89b2cf 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java @@ -286,6 +286,16 @@ public void terminateAfter(int terminateAfter) { in.terminateAfter(terminateAfter); } + @Override + public boolean earlyTerminate() { + return in.earlyTerminate(); + } + + @Override + public void earlyTerminate(boolean value) { + in.earlyTerminate(value); + } + @Override public boolean lowLevelCancellation() { return in.lowLevelCancellation(); diff --git a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java index ebb2157d981e7..27e4c6ed1a835 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -220,6 +220,10 @@ public InnerHitsContext innerHits() { public abstract void terminateAfter(int terminateAfter); + public abstract boolean earlyTerminate(); + + public abstract void earlyTerminate(boolean value); + /** * Indicates if the current index should perform frequent low level search cancellation check. * diff --git a/core/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java b/core/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java index 1fa56bde7fe95..578c213a2f63e 100644 --- a/core/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java +++ b/core/src/main/java/org/elasticsearch/search/profile/query/CollectorResult.java @@ -47,6 +47,7 @@ public class CollectorResult implements ToXContentObject, Writeable { public static final String REASON_SEARCH_COUNT = "search_count"; public static final String REASON_SEARCH_TOP_HITS = "search_top_hits"; public static final String REASON_SEARCH_TERMINATE_AFTER_COUNT = "search_terminate_after_count"; + public static final String REASON_SEARCH_SORTING_EARLY_TERMINATION = "search_sorting_early_terminate"; public static final String REASON_SEARCH_POST_FILTER = "search_post_filter"; public static final String REASON_SEARCH_MIN_SCORE = "search_min_score"; public static final String REASON_SEARCH_MULTI = "search_multi"; diff --git a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java index 272c57fe98024..301c13eca643a 100644 --- a/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/core/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -42,7 +42,6 @@ import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.search.grouping.CollapsingTopDocsCollector; -import org.elasticsearch.action.search.SearchType; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.MinimumScoreCollector; import org.elasticsearch.common.lucene.search.FilteredCollector; @@ -265,6 +264,7 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher } final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER; + final boolean earlyTerminateSet = searchContext.earlyTerminate(); if (terminateAfterSet) { final Collector child = collector; // throws Lucene.EarlyTerminationException when given count is reached @@ -273,6 +273,14 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child)); } + } else if (earlyTerminateSet) { + assert searchContext.sort() != null; + final Collector child = collector; + collector = Lucene.wrapCountBasedEarlyTerminatingSortingCollector(collector, searchContext.sort().sort, numDocs); + if (doProfile) { + collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_SORTING_EARLY_TERMINATION, + Collections.singletonList((InternalProfileCollector) child)); + } } if (searchContext.parsedPostFilter() != null) { diff --git a/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java b/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java index f63f13b6dd24b..d8abcb50c73c4 100644 --- a/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/simple/SimpleSearchIT.java @@ -51,6 +51,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; public class SimpleSearchIT extends ESIntegTestCase { @@ -288,6 +289,42 @@ public void testSimpleTerminateAfterCount() throws Exception { assertFalse(searchResponse.isTerminatedEarly()); } + public void testSimpleSortedEarlyTerminate() throws Exception { + prepareCreate("test") + .setSettings(Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put("index.sort.field", "rank") + ) + .addMapping("type1", "rank", "type=integer") + .get(); + ensureGreen(); + int max = randomIntBetween(3, 29); + List docbuilders = new ArrayList<>(max); + + for (int i = max-1; i >= 0; i--) { + String id = String.valueOf(i); + docbuilders.add(client().prepareIndex("test", "type1", id).setSource("rank", i)); + } + + indexRandom(true, docbuilders); + ensureGreen(); + refresh(); + + SearchResponse searchResponse; + + for (int i = 1; i < max; i++) { + searchResponse = client().prepareSearch("test") + .addDocValueField("rank") + .setEarlyTerminate(true).setSize(i).execute().actionGet(); + assertThat(searchResponse.getHits().getHits().length, equalTo(i)); + for (int j = 0; j < i; j++) { + assertThat(searchResponse.getHits().getAt(j).field("rank").getValue(), + equalTo((long) j)); + } + } + } + public void testInsaneFromAndSize() throws Exception { createIndex("idx"); indexRandom(true, client().prepareIndex("idx", "type").setSource("{}", XContentType.JSON)); diff --git a/docs/reference/index-modules/index-sorting.asciidoc b/docs/reference/index-modules/index-sorting.asciidoc index 0c2b5c9abe979..8d147dc997804 100644 --- a/docs/reference/index-modules/index-sorting.asciidoc +++ b/docs/reference/index-modules/index-sorting.asciidoc @@ -104,4 +104,82 @@ Index sorting supports the following settings: [WARNING] Index sorting can be defined only once at index creation. It is not allowed to add or update -a sort on an existing index. +a sort on an existing index. Index sorting also has a cost in terms of indexing throughput since +documents must be sorted at flush and merge time. You should test the impact on your application +before activating this feature. + +[float] +[[early-terminate]] +=== Early termination of search request + +By default in elasticsearch a search request must visit every document that match a query to +retrieve the top documents sorted by a specified sort. +Though when the index sort and the search sort are the same it is possible to limit +the number of documents that should be visited per segment to retrieve the N top ranked documents. +Since each segment is sorted in the index we only need to visit the N first documents per segment +to retrieve the final N documents of that shard. +For example, let's say we have an index that contains events sorted by a timestamp field: + +[source,js] +-------------------------------------------------- +PUT events +{ + "settings" : { + "index" : { + "sort.field" : "timestamp", + "sort.order" : "desc" <2> + } + }, + "mappings": { + "doc": { + "properties": { + "timestamp": { + "type": "date" + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +<1> This index is sorted by timestamp in descending order (most recent first) + +You can search for the last 10 events with: + +[source,js] +-------------------------------------------------- +GET /events/_search +{ + "size": 10, + "sort": [ + { "timestamp": "desc" } + ] +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +... but this will search the entire index. +If you're only looking for the last 10 events and have no interest in +the total number of documents that match the query you can use `early_terminate` +to restrict the search to the first 10 documents per segment: + +[source,js] +-------------------------------------------------- +GET /events/_search +{ + "size": 10, + "early_terminate": true <1> +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +<1> The index sort will be used to rank the top documents and each segment will early terminate the collection after the first 10 matches. + +[WARNING] +Aggregations should not be used when `early_terminate` is set because only the top ranked documents are going +to be collected resulting in partial results in the buckets. Similarly the total number of documents returned by +a request with `early_terminate` set to true does not take all the documents into account. + diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 13df8d471eb0b..45cfef04e41b9 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -93,6 +93,14 @@ And here is a sample response: the query execution has actually terminated_early. Defaults to no terminate_after. +`early_terminate`:: + + Set to `true` to limit the search to the first `size` documents per segment. + This option can only be used when the index is sorted using the same sort than + the request. It is also possible to omit `sort` in the request which will cause + the documents to be sorted by the index sort automatically in the response. + See <> for more. + Out of the above, the `search_type` and the `request_cache` must be passed as query-string parameters. The rest of the search request should be passed diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml index 679b4f4e535c7..1b63dce669ca4 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml @@ -51,6 +51,62 @@ indices.refresh: index: test + - do: + index: + index: test + type: test + id: "5" + body: { "rank": 7 } + + - do: + index: + index: test + type: test + id: "6" + body: { "rank": 5 } + + - do: + index: + index: test + type: test + id: "7" + body: { "rank": 4 } + + - do: + index: + index: test + type: test + id: "8" + body: { "rank": 6 } + + - do: + indices.refresh: + index: test + + - do: + search: + index: test + type: test + body: + early_terminate: true + size: 1 + + - length: {hits.hits: 1 } + - match: {hits.hits.0._id: "2" } + + - do: + search: + index: test + type: test + body: + early_terminate: true + size: 3 + + - length: {hits.hits: 3 } + - match: {hits.hits.0._id: "2" } + - match: {hits.hits.1._id: "4" } + - match: {hits.hits.2._id: "3" } + - do: indices.forcemerge: index: test @@ -67,9 +123,38 @@ body: sort: _doc - - match: {hits.total: 4 } - - length: {hits.hits: 4 } + - match: {hits.total: 8 } + - length: {hits.hits: 8 } - match: {hits.hits.0._id: "2" } - match: {hits.hits.1._id: "4" } - match: {hits.hits.2._id: "3" } - match: {hits.hits.3._id: "1" } + - match: {hits.hits.4._id: "7" } + - match: {hits.hits.5._id: "6" } + - match: {hits.hits.6._id: "8" } + - match: {hits.hits.7._id: "5" } + + - do: + search: + index: test + type: test + body: + size: 3 + early_terminate: true + + - match: {hits.total: 3 } + - length: {hits.hits: 3 } + - match: {hits.hits.0._id: "2" } + - match: {hits.hits.1._id: "4" } + - match: {hits.hits.2._id: "3" } + + - do: + catch: /cannot use `early_terminate` when the search sort.+/ + search: + index: test + type: test + body: + size: 3 + sort: _doc + early_terminate: true + diff --git a/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java b/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java index f77414e7d50b3..9b37cc590266a 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java +++ b/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java @@ -85,6 +85,7 @@ public class TestSearchContext extends SearchContext { ContextIndexSearcher searcher; int size; private int terminateAfter = DEFAULT_TERMINATE_AFTER; + private boolean earlyTerminate = false; private SearchContextAggregations aggregations; private final long originNanoTime = System.nanoTime(); @@ -318,6 +319,16 @@ public void terminateAfter(int terminateAfter) { this.terminateAfter = terminateAfter; } + @Override + public boolean earlyTerminate() { + return earlyTerminate; + } + + @Override + public void earlyTerminate(boolean value) { + this.earlyTerminate = value; + } + @Override public boolean lowLevelCancellation() { return false; From cdb5b37234b621589a217047a205011a3bfe9ea5 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 28 Apr 2017 18:50:21 +0200 Subject: [PATCH 2/2] Fixed failing rest test --- .../rest-api-spec/test/indices.sort/10_basic.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml index 1b63dce669ca4..204dceb18235c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.sort/10_basic.yaml @@ -56,28 +56,28 @@ index: test type: test id: "5" - body: { "rank": 7 } + body: { "rank": 8 } - do: index: index: test type: test id: "6" - body: { "rank": 5 } + body: { "rank": 6 } - do: index: index: test type: test id: "7" - body: { "rank": 4 } + body: { "rank": 5 } - do: index: index: test type: test id: "8" - body: { "rank": 6 } + body: { "rank": 7 } - do: indices.refresh: