From a8c5d0b4070e4a2bed5d13953cb744716e348c79 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 23 Sep 2021 20:31:01 -0400 Subject: [PATCH] Disable sort optimization in search_after and scroll requests (#78230) We can't enable the sort optimization with points in scroll requests until LUCENE-10119 is integrated. --- .../search/searchafter/SearchAfterIT.java | 146 +++++++++++++++++- .../search/query/QueryPhase.java | 10 +- .../search/query/QueryPhaseTests.java | 9 +- 3 files changed, 157 insertions(+), 8 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java index cd08e12424744..817449cf2f7a3 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java @@ -9,36 +9,50 @@ package org.elasticsearch.search.searchafter; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; +import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.action.search.ClosePointInTimeAction; +import org.elasticsearch.action.search.ClosePointInTimeRequest; +import org.elasticsearch.action.search.OpenPointInTimeAction; +import org.elasticsearch.action.search.OpenPointInTimeRequest; import org.elasticsearch.action.search.SearchPhaseExecutionException; +import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.builder.PointInTimeBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.sort.SortBuilders; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESIntegTestCase; import org.hamcrest.Matchers; -import java.util.List; import java.util.ArrayList; -import java.util.Comparator; -import java.util.Collections; import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.hamcrest.Matchers.arrayContaining; +import static org.hamcrest.Matchers.arrayWithSize; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -397,4 +411,128 @@ private List convertSortValues(List sortValues) { } return converted; } + + public void testScrollAndSearchAfterWithBigIndex() { + int numDocs = randomIntBetween(5000, 10000); + List timestamps = new ArrayList<>(); + long currentTime = randomLongBetween(0, 1000); + for (int i = 0; i < numDocs; i++) { + int copies = randomIntBetween(0, 100) <= 5 ? randomIntBetween(2, 5) : 1; + for (int j = 0; j < copies; j++) { + timestamps.add(currentTime); + } + currentTime += randomIntBetween(1, 10); + } + final Settings.Builder indexSettings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)); + if (randomBoolean()) { + indexSettings.put("sort.field", "timestamp").put("sort.order", randomFrom("desc", "asc")); + } + assertAcked(client().admin().indices().prepareCreate("test") + .setSettings(indexSettings) + .addMapping("_doc", "timestamp", "type=date,format=epoch_millis")); + Randomness.shuffle(timestamps); + final BulkRequestBuilder bulk = client().prepareBulk(); + bulk.setRefreshPolicy(IMMEDIATE); + for (long timestamp : timestamps) { + bulk.add(new IndexRequest("test").source("timestamp", timestamp)); + } + bulk.get(); + Collections.sort(timestamps); + // scroll with big index + { + SearchResponse resp = client().prepareSearch("test") + .setSize(randomIntBetween(50, 100)) + .setQuery(new MatchAllQueryBuilder()) + .addSort(new FieldSortBuilder("timestamp")) + .setScroll(TimeValue.timeValueMinutes(5)) + .get(); + try { + int foundHits = 0; + do { + for (SearchHit hit : resp.getHits().getHits()) { + assertNotNull(hit.getSourceAsMap()); + final Object timestamp = hit.getSourceAsMap().get("timestamp"); + assertNotNull(timestamp); + assertThat(((Number) timestamp).longValue(), equalTo(timestamps.get(foundHits))); + foundHits++; + } + resp = client().prepareSearchScroll(resp.getScrollId()) + .setScroll(TimeValue.timeValueMinutes(5)) + .get(); + } while (resp.getHits().getHits().length > 0); + assertThat(foundHits, equalTo(timestamps.size())); + } finally { + client().prepareClearScroll().addScrollId(resp.getScrollId()).get(); + } + } + // search_after with sort with point in time + String pitID; + { + OpenPointInTimeRequest openPITRequest = new OpenPointInTimeRequest("test").keepAlive(TimeValue.timeValueMinutes(5)); + pitID = client().execute(OpenPointInTimeAction.INSTANCE, openPITRequest).actionGet().getPointInTimeId(); + SearchRequest searchRequest = new SearchRequest("test") + .source(new SearchSourceBuilder() + .pointInTimeBuilder(new PointInTimeBuilder(pitID).setKeepAlive(TimeValue.timeValueMinutes(5))) + .sort("timestamp")); + searchRequest.source().size(randomIntBetween(50, 100)); + SearchResponse resp = client().search(searchRequest).actionGet(); + try { + int foundHits = 0; + do { + Object[] after = null; + for (SearchHit hit : resp.getHits().getHits()) { + assertNotNull(hit.getSourceAsMap()); + final Object timestamp = hit.getSourceAsMap().get("timestamp"); + assertNotNull(timestamp); + assertThat(((Number) timestamp).longValue(), equalTo(timestamps.get(foundHits))); + after = hit.getSortValues(); + foundHits++; + } + searchRequest.source().size(randomIntBetween(50, 100)); + assertNotNull(after); + assertThat("Sorted by timestamp and pit tier breaker", after, arrayWithSize(2)); + searchRequest.source().searchAfter(after); + resp = client().search(searchRequest).actionGet(); + } while (resp.getHits().getHits().length > 0); + assertThat(foundHits, equalTo(timestamps.size())); + } finally { + client().execute(ClosePointInTimeAction.INSTANCE, new ClosePointInTimeRequest(pitID)).actionGet(); + } + } + + // search_after without sort with point in time + { + OpenPointInTimeRequest openPITRequest = new OpenPointInTimeRequest("test").keepAlive(TimeValue.timeValueMinutes(5)); + pitID = client().execute(OpenPointInTimeAction.INSTANCE, openPITRequest).actionGet().getPointInTimeId(); + SearchRequest searchRequest = new SearchRequest("test") + .source(new SearchSourceBuilder() + .pointInTimeBuilder(new PointInTimeBuilder(pitID).setKeepAlive(TimeValue.timeValueMinutes(5))) + .sort(SortBuilders.pitTiebreaker())); + searchRequest.source().size(randomIntBetween(50, 100)); + SearchResponse resp = client().search(searchRequest).actionGet(); + List foundSeqNos = new ArrayList<>(); + try { + do { + Object[] after = null; + for (SearchHit hit : resp.getHits().getHits()) { + assertNotNull(hit.getSourceAsMap()); + final Object timestamp = hit.getSourceAsMap().get("timestamp"); + assertNotNull(timestamp); + foundSeqNos.add(((Number) timestamp).longValue()); + after = hit.getSortValues(); + } + searchRequest.source().size(randomIntBetween(50, 100)); + assertNotNull(after); + assertThat("sorted by pit tie breaker", after, arrayWithSize(1)); + searchRequest.source().searchAfter(after); + resp = client().search(searchRequest).actionGet(); + } while (resp.getHits().getHits().length > 0); + Collections.sort(foundSeqNos); + assertThat(foundSeqNos, equalTo(timestamps)); + } finally { + client().execute(ClosePointInTimeAction.INSTANCE, new ClosePointInTimeRequest(pitID)).actionGet(); + } + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java index f67083efb6593..6322466bbb998 100644 --- a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -319,8 +319,16 @@ private static void optimizeNumericSort(SearchContext searchContext, IndexReader // Some fields there is no match (e.g. integer field uses SortField.Type.LONG, but indexed as IntegerPoint) if ((fieldType.typeName().equals("long") == false) && (fieldType instanceof DateFieldMapper.DateFieldType == false)) return; + // TODO: Enable the sort optimization with point for search_after and scroll requests when LUCENE-10119 is integrated. + if (searchContext.sort() != null && searchContext.sort().sort.getSort().length == 1) { + if (searchContext.searchAfter() != null) { + return; + } + if (searchContext.scrollContext() != null && searchContext.scrollContext().lastEmittedDoc != null) { + return; + } + } sortField.setCanUsePoints(); - return; } /** diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java index f53c56b24dae1..06274dec31757 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -726,23 +726,26 @@ public void testNumericSortOptimization() throws Exception { // 2. Test sort optimization on long field with after { + SortField longField = new SortField(fieldNameLong, SortField.Type.LONG); + longField.setMissingValue(Long.MAX_VALUE); TestSearchContext searchContext = new TestSearchContext( searchExecutionContext, indexShard, newContextSearcher(reader)); int afterDoc = (int) randomLongBetween(0, 30); long afterValue = startLongValue + afterDoc; FieldDoc after = new FieldDoc(afterDoc, Float.NaN, new Long[] {afterValue}); searchContext.searchAfter(after); - searchContext.sort(formatsLong); + searchContext.sort(new SortAndFormats(new Sort(longField), new DocValueFormat[]{DocValueFormat.RAW})); searchContext.parsedQuery(query); searchContext.setTask(task); searchContext.trackTotalHitsUpTo(10); searchContext.setSize(10); QueryPhase.executeInternal(searchContext); - assertTrue(searchContext.sort().sort.getSort()[0].getCanUsePoints()); + assertFalse(searchContext.sort().sort.getSort()[0].getCanUsePoints()); final TopDocs topDocs = searchContext.queryResult().topDocs().topDocs; long firstResult = (long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]; assertThat(firstResult, greaterThan(afterValue)); - assertSortResults(topDocs, numDocs, false); + assertThat(topDocs.totalHits.value, equalTo((long)numDocs)); + // assertSortResults(topDocs, numDocs, false); } // 3. Test sort optimization on long field + date field