Skip to content

Commit 5ac5fd9

Browse files
authored
Move early termination based on index sort to TopDocs collector (#27666)
Lucene TopDocs collector are now able to early terminate the collection based on the index sort. This change plugs this new functionality directly in the query phase instead of relying on a dedicated early terminating collector.
1 parent 7d88851 commit 5ac5fd9

File tree

5 files changed

+197
-239
lines changed

5 files changed

+197
-239
lines changed

core/src/main/java/org/apache/lucene/queries/SearchAfterSortedDocQuery.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
7979
throw new IOException("search sort :[" + sort.getSort() + "] does not match the index sort:[" + segmentSort + "]");
8080
}
8181
final int afterDoc = after.doc - context.docBase;
82-
TopComparator comparator= getTopComparator(fieldComparators, reverseMuls, context, afterDoc);
82+
TopComparator comparator = getTopComparator(fieldComparators, reverseMuls, context, afterDoc);
8383
final int maxDoc = context.reader().maxDoc();
8484
final int firstDoc = searchAfterDoc(comparator, 0, context.reader().maxDoc());
8585
if (firstDoc >= maxDoc) {
@@ -143,7 +143,7 @@ static TopComparator getTopComparator(FieldComparator<?>[] fieldComparators,
143143
}
144144
}
145145

146-
if (topDoc <= doc) {
146+
if (doc <= topDoc) {
147147
return false;
148148
}
149149
return true;

core/src/main/java/org/elasticsearch/search/query/QueryCollectorContext.java

Lines changed: 3 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,10 @@
1919

2020
package org.elasticsearch.search.query;
2121

22-
import org.apache.lucene.index.IndexReader;
2322
import org.apache.lucene.search.Collector;
24-
import org.apache.lucene.search.EarlyTerminatingSortingCollector;
2523
import org.apache.lucene.search.IndexSearcher;
2624
import org.apache.lucene.search.MultiCollector;
2725
import org.apache.lucene.search.Query;
28-
import org.apache.lucene.search.Sort;
29-
import org.apache.lucene.search.TopDocs;
30-
import org.apache.lucene.search.TotalHitCountCollector;
3126
import org.apache.lucene.search.Weight;
3227
import org.elasticsearch.common.lucene.MinimumScoreCollector;
3328
import org.elasticsearch.common.lucene.search.FilteredCollector;
@@ -40,14 +35,12 @@
4035
import java.util.Collections;
4136
import java.util.List;
4237
import java.util.function.BooleanSupplier;
43-
import java.util.function.IntSupplier;
4438

4539
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_CANCELLED;
4640
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_MIN_SCORE;
4741
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_MULTI;
4842
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_POST_FILTER;
4943
import static org.elasticsearch.search.profile.query.CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT;
50-
import static org.elasticsearch.search.query.TopDocsCollectorContext.shortcutTotalHitCount;
5144

5245
abstract class QueryCollectorContext {
5346
private String profilerName;
@@ -70,22 +63,12 @@ protected InternalProfileCollector createWithProfiler(InternalProfileCollector i
7063
return new InternalProfileCollector(collector, profilerName, in != null ? Collections.singletonList(in) : Collections.emptyList());
7164
}
7265

73-
/**
74-
* A value of <code>false</code> indicates that the underlying collector can infer
75-
* its results directly from the context (search is not needed).
76-
* Default to true (search is needed).
77-
*/
78-
boolean shouldCollect() {
79-
return true;
80-
}
81-
8266
/**
8367
* Post-process <code>result</code> after search execution.
8468
*
8569
* @param result The query search result to populate
86-
* @param hasCollected True if search was executed
8770
*/
88-
void postProcess(QuerySearchResult result, boolean hasCollected) throws IOException {}
71+
void postProcess(QuerySearchResult result) throws IOException {}
8972

9073
/**
9174
* Creates the collector tree from the provided <code>collectors</code>
@@ -175,11 +158,6 @@ static QueryCollectorContext createCancellableCollectorContext(BooleanSupplier c
175158
Collector create(Collector in) throws IOException {
176159
return new CancellableCollector(cancelled, in);
177160
}
178-
179-
@Override
180-
boolean shouldCollect() {
181-
return false;
182-
}
183161
};
184162
}
185163

@@ -198,52 +176,11 @@ Collector create(Collector in) throws IOException {
198176
}
199177

200178
@Override
201-
void postProcess(QuerySearchResult result, boolean hasCollected) throws IOException {
202-
if (hasCollected && collector.terminatedEarly()) {
179+
void postProcess(QuerySearchResult result) throws IOException {
180+
if (collector.terminatedEarly()) {
203181
result.terminatedEarly(true);
204182
}
205183
}
206184
};
207185
}
208-
209-
/**
210-
* Creates a sorting termination collector limiting the collection to the first <code>numHits</code> per segment.
211-
* The total hit count matching the query is also computed if <code>trackTotalHits</code> is true.
212-
*/
213-
static QueryCollectorContext createEarlySortingTerminationCollectorContext(IndexReader reader,
214-
Query query,
215-
Sort indexSort,
216-
int numHits,
217-
boolean trackTotalHits,
218-
boolean shouldCollect) {
219-
return new QueryCollectorContext(REASON_SEARCH_TERMINATE_AFTER_COUNT) {
220-
private IntSupplier countSupplier = null;
221-
222-
@Override
223-
Collector create(Collector in) throws IOException {
224-
EarlyTerminatingSortingCollector sortingCollector = new EarlyTerminatingSortingCollector(in, indexSort, numHits);
225-
Collector collector = sortingCollector;
226-
if (trackTotalHits) {
227-
int count = shouldCollect ? -1 : shortcutTotalHitCount(reader, query);
228-
if (count == -1) {
229-
TotalHitCountCollector countCollector = new TotalHitCountCollector();
230-
collector = MultiCollector.wrap(sortingCollector, countCollector);
231-
this.countSupplier = countCollector::getTotalHits;
232-
} else {
233-
this.countSupplier = () -> count;
234-
}
235-
}
236-
return collector;
237-
}
238-
239-
@Override
240-
void postProcess(QuerySearchResult result, boolean hasCollected) throws IOException {
241-
if (countSupplier != null) {
242-
final TopDocs topDocs = result.topDocs();
243-
topDocs.totalHits = countSupplier.getAsInt();
244-
result.topDocs(topDocs, result.sortValueFormats());
245-
}
246-
}
247-
};
248-
}
249186
}

core/src/main/java/org/elasticsearch/search/query/QueryPhase.java

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.elasticsearch.search.query;
2121

2222
import org.apache.lucene.index.IndexReader;
23+
import org.apache.lucene.index.LeafReaderContext;
2324
import org.apache.lucene.queries.MinDocQuery;
2425
import org.apache.lucene.queries.SearchAfterSortedDocQuery;
2526
import org.apache.lucene.search.BooleanClause;
@@ -36,7 +37,6 @@
3637
import org.apache.lucene.search.TopDocs;
3738
import org.apache.lucene.util.Counter;
3839
import org.elasticsearch.action.search.SearchTask;
39-
import org.elasticsearch.common.Nullable;
4040
import org.elasticsearch.common.lucene.Lucene;
4141
import org.elasticsearch.common.settings.Settings;
4242
import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor;
@@ -61,7 +61,6 @@
6161
import java.util.function.Consumer;
6262

6363
import static org.elasticsearch.search.query.QueryCollectorContext.createCancellableCollectorContext;
64-
import static org.elasticsearch.search.query.QueryCollectorContext.createEarlySortingTerminationCollectorContext;
6564
import static org.elasticsearch.search.query.QueryCollectorContext.createEarlyTerminationCollectorContext;
6665
import static org.elasticsearch.search.query.QueryCollectorContext.createFilteredCollectorContext;
6766
import static org.elasticsearch.search.query.QueryCollectorContext.createMinScoreCollectorContext;
@@ -104,10 +103,8 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
104103
// request, preProcess is called on the DFS phase phase, this is why we pre-process them
105104
// here to make sure it happens during the QUERY phase
106105
aggregationPhase.preProcess(searchContext);
107-
Sort indexSort = searchContext.mapperService().getIndexSettings().getIndexSortConfig()
108-
.buildIndexSort(searchContext.mapperService()::fullName, searchContext::getForField);
109106
final ContextIndexSearcher searcher = searchContext.searcher();
110-
boolean rescore = execute(searchContext, searchContext.searcher(), searcher::setCheckCancelled, indexSort);
107+
boolean rescore = execute(searchContext, searchContext.searcher(), searcher::setCheckCancelled);
111108

112109
if (rescore) { // only if we do a regular search
113110
rescorePhase.execute(searchContext);
@@ -127,11 +124,12 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
127124
* wire everything (mapperService, etc.)
128125
* @return whether the rescoring phase should be executed
129126
*/
130-
static boolean execute(SearchContext searchContext, final IndexSearcher searcher,
131-
Consumer<Runnable> checkCancellationSetter, @Nullable Sort indexSort) throws QueryPhaseExecutionException {
127+
static boolean execute(SearchContext searchContext,
128+
final IndexSearcher searcher,
129+
Consumer<Runnable> checkCancellationSetter) throws QueryPhaseExecutionException {
130+
final IndexReader reader = searcher.getIndexReader();
132131
QuerySearchResult queryResult = searchContext.queryResult();
133132
queryResult.searchTimedOut(false);
134-
135133
try {
136134
queryResult.from(searchContext.from());
137135
queryResult.size(searchContext.size());
@@ -161,7 +159,7 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
161159
// ... and stop collecting after ${size} matches
162160
searchContext.terminateAfter(searchContext.size());
163161
searchContext.trackTotalHits(false);
164-
} else if (canEarlyTerminate(indexSort, searchContext)) {
162+
} else if (canEarlyTerminate(reader, searchContext.sort())) {
165163
// now this gets interesting: since the search sort is a prefix of the index sort, we can directly
166164
// skip to the desired doc
167165
if (after != null) {
@@ -177,10 +175,14 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
177175
}
178176

179177
final LinkedList<QueryCollectorContext> collectors = new LinkedList<>();
178+
// whether the chain contains a collector that filters documents
179+
boolean hasFilterCollector = false;
180180
if (searchContext.parsedPostFilter() != null) {
181181
// add post filters before aggregations
182182
// it will only be applied to top hits
183183
collectors.add(createFilteredCollectorContext(searcher, searchContext.parsedPostFilter().query()));
184+
// this collector can filter documents during the collection
185+
hasFilterCollector = true;
184186
}
185187
if (searchContext.queryCollectors().isEmpty() == false) {
186188
// plug in additional collectors, like aggregations
@@ -189,10 +191,14 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
189191
if (searchContext.minimumScore() != null) {
190192
// apply the minimum score after multi collector so we filter aggs as well
191193
collectors.add(createMinScoreCollectorContext(searchContext.minimumScore()));
194+
// this collector can filter documents during the collection
195+
hasFilterCollector = true;
192196
}
193197
if (searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER) {
194198
// apply terminate after after all filters collectors
195199
collectors.add(createEarlyTerminationCollectorContext(searchContext.terminateAfter()));
200+
// this collector can filter documents during the collection
201+
hasFilterCollector = true;
196202
}
197203

198204
boolean timeoutSet = scrollContext == null && searchContext.timeout() != null &&
@@ -240,21 +246,9 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
240246
// searchContext.lowLevelCancellation()
241247
collectors.add(createCancellableCollectorContext(searchContext.getTask()::isCancelled));
242248

243-
final IndexReader reader = searcher.getIndexReader();
244249
final boolean doProfile = searchContext.getProfilers() != null;
245250
// create the top docs collector last when the other collectors are known
246-
final TopDocsCollectorContext topDocsFactory = createTopDocsCollectorContext(searchContext, reader,
247-
collectors.stream().anyMatch(QueryCollectorContext::shouldCollect));
248-
final boolean shouldCollect = topDocsFactory.shouldCollect();
249-
250-
if (topDocsFactory.numHits() > 0 &&
251-
(scrollContext == null || scrollContext.totalHits != -1) &&
252-
canEarlyTerminate(indexSort, searchContext)) {
253-
// top docs collection can be early terminated based on index sort
254-
// add the collector context first so we don't early terminate aggs but only top docs
255-
collectors.addFirst(createEarlySortingTerminationCollectorContext(reader, searchContext.query(), indexSort,
256-
topDocsFactory.numHits(), searchContext.trackTotalHits(), shouldCollect));
257-
}
251+
final TopDocsCollectorContext topDocsFactory = createTopDocsCollectorContext(searchContext, reader, hasFilterCollector);
258252
// add the top docs collector, the first collector context in the chain
259253
collectors.addFirst(topDocsFactory);
260254

@@ -268,9 +262,7 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
268262
}
269263

270264
try {
271-
if (shouldCollect) {
272-
searcher.search(query, queryCollector);
273-
}
265+
searcher.search(query, queryCollector);
274266
} catch (TimeExceededException e) {
275267
assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
276268
queryResult.searchTimedOut(true);
@@ -280,7 +272,7 @@ static boolean execute(SearchContext searchContext, final IndexSearcher searcher
280272

281273
final QuerySearchResult result = searchContext.queryResult();
282274
for (QueryCollectorContext ctx : collectors) {
283-
ctx.postProcess(result, shouldCollect);
275+
ctx.postProcess(result);
284276
}
285277
EsThreadPoolExecutor executor = (EsThreadPoolExecutor)
286278
searchContext.indexShard().getThreadPool().executor(ThreadPool.Names.SEARCH);
@@ -317,13 +309,21 @@ static boolean returnsDocsInOrder(Query query, SortAndFormats sf) {
317309
}
318310

319311
/**
320-
* Returns true if the provided <code>searchContext</code> can early terminate based on <code>indexSort</code>
321-
* @param indexSort The index sort specification
322-
* @param context The search context for the request
323-
*/
324-
static boolean canEarlyTerminate(Sort indexSort, SearchContext context) {
325-
final Sort sort = context.sort() == null ? Sort.RELEVANCE : context.sort().sort;
326-
return indexSort != null && EarlyTerminatingSortingCollector.canEarlyTerminate(sort, indexSort);
312+
* Returns whether collection within the provided <code>reader</code> can be early-terminated if it sorts
313+
* with <code>sortAndFormats</code>.
314+
**/
315+
static boolean canEarlyTerminate(IndexReader reader, SortAndFormats sortAndFormats) {
316+
if (sortAndFormats == null || sortAndFormats.sort == null) {
317+
return false;
318+
}
319+
final Sort sort = sortAndFormats.sort;
320+
for (LeafReaderContext ctx : reader.leaves()) {
321+
Sort indexSort = ctx.reader().getMetaData().getSort();
322+
if (indexSort == null || EarlyTerminatingSortingCollector.canEarlyTerminate(sort, indexSort) == false) {
323+
return false;
324+
}
325+
}
326+
return true;
327327
}
328328

329329
private static class TimeExceededException extends RuntimeException {}

0 commit comments

Comments
 (0)