Add tests for top_hits aggregation (#22754)

nik9000 · nik9000 · commit d10dcea690db · 2017-01-25T16:16:35.000-05:00
Add unit tests for `TopHitsAggregator` and convert some snippets in docs for `top_hits` aggregation to `// CONSOLE`. Relates to #22278 Relates to #18160
diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml
@@ -501,7 +501,6 @@
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]percentiles[/\\]tdigest[/\\]TDigestPercentilesAggregator.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]scripted[/\\]ScriptedMetricAggregator.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]stats[/\\]extended[/\\]ExtendedStatsAggregator.java" checks="LineLength" />
-  <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]metrics[/\\]tophits[/\\]TopHitsAggregator.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]pipeline[/\\]bucketscript[/\\]BucketScriptPipelineAggregator.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]AggregationPath.java" checks="LineLength" />
   <suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]ValuesSourceParser.java" checks="LineLength" />
diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java
@@ -124,7 +124,13 @@ public void collect(int docId, long bucket) throws IOException {
                     // In the QueryPhase we don't need this protection, because it is build into the IndexSearcher,
                     // but here we create collectors ourselves and we need prevent OOM because of crazy an offset and size.
                     topN = Math.min(topN, subSearchContext.searcher().getIndexReader().maxDoc());
-                    TopDocsCollector<?> topLevelCollector = sort != null ? TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(), subSearchContext.trackScores()) : TopScoreDocCollector.create(topN);
+                    TopDocsCollector<?> topLevelCollector;
+                    if (sort == null) {
+                        topLevelCollector = TopScoreDocCollector.create(topN);
+                    } else {
+                        topLevelCollector = TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(),
+                                subSearchContext.trackScores());
+                    }
                     collectors = new TopDocsAndLeafCollector(topLevelCollector);
                     collectors.leafCollector = collectors.topLevelCollector.getLeafCollector(ctx);
                     collectors.leafCollector.setScorer(scorer);
@@ -172,8 +178,8 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {
                     searchHitFields.sortValues(fieldDoc.fields, subSearchContext.sort().formats);
                 }
             }
-            topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(), pipelineAggregators(),
-                metaData());
+            topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(),
+                    pipelineAggregators(), metaData());
         }
         return topHits;
     }
@@ -186,7 +192,8 @@ public InternalTopHits buildEmptyAggregation() {
         } else {
             topDocs = Lucene.EMPTY_TOP_DOCS;
         }
-        return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(), pipelineAggregators(), metaData());
+        return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(),
+                pipelineAggregators(), metaData());
     }
 
     @Override
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
@@ -94,6 +94,7 @@ public boolean shouldCache(Query query) throws IOException {
 
         CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService();
         SearchContext searchContext = mock(SearchContext.class);
+        when(searchContext.numberOfShards()).thenReturn(1);
         when(searchContext.searcher()).thenReturn(contextIndexSearcher);
         when(searchContext.bigArrays()).thenReturn(new MockBigArrays(Settings.EMPTY, circuitBreakerService));
         when(searchContext.fetchPhase())
@@ -163,13 +164,17 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         List<InternalAggregation> aggs = new ArrayList<> ();
         Query rewritten = searcher.rewrite(query);
         Weight weight = searcher.createWeight(rewritten, true);
-        try (C root = createAggregator(builder, searcher, fieldTypes)) {
+        C root = createAggregator(builder, searcher, fieldTypes);
+        try {
             for (ShardSearcher subSearcher : subSearchers) {
-                try (C a = createAggregator(builder, subSearcher, fieldTypes)) {
+                C a = createAggregator(builder, subSearcher, fieldTypes);
+                try {
                     a.preCollection();
                     subSearcher.search(weight, a);
                     a.postCollection();
                     aggs.add(a.buildAggregation(0L));
+                } finally {
+                    closeAgg(a);
                 }
             }
             if (aggs.isEmpty()) {
@@ -179,6 +184,15 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
                 A internalAgg = (A) aggs.get(0).doReduce(aggs, new InternalAggregation.ReduceContext(root.context().bigArrays(), null));
                 return internalAgg;
             }
+        } finally {
+            closeAgg(root);
+        }
+    }
+
+    private void closeAgg(Aggregator agg) {
+        agg.close();
+        for (Aggregator sub : ((AggregatorBase) agg).subAggregators) {
+            closeAgg(sub);
         }
     }
 
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java
@@ -18,70 +18,133 @@
  */
 package org.elasticsearch.search.aggregations.metrics.tophits;
 
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.Uid;
 import org.elasticsearch.index.mapper.UidFieldMapper;
 import org.elasticsearch.search.SearchHits;
+import org.elasticsearch.search.aggregations.Aggregation;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
 import org.elasticsearch.search.sort.SortOrder;
 
+import java.io.IOException;
+
+import static org.elasticsearch.search.aggregations.AggregationBuilders.terms;
+import static org.elasticsearch.search.aggregations.AggregationBuilders.topHits;
+
 public class TopHitsAggregatorTests extends AggregatorTestCase {
+    public void testTopLevel() throws Exception {
+        Aggregation result;
+        if (randomBoolean()) {
+            result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
+        } else {
+            Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
+            result = testCase(query, topHits("_name"));
+        }
+        SearchHits searchHits = ((TopHits) result).getHits();
+        assertEquals(3L, searchHits.getTotalHits());
+        assertEquals("3", searchHits.getAt(0).getId());
+        assertEquals("type", searchHits.getAt(0).getType());
+        assertEquals("2", searchHits.getAt(1).getId());
+        assertEquals("type", searchHits.getAt(1).getType());
+        assertEquals("1", searchHits.getAt(2).getId());
+        assertEquals("type", searchHits.getAt(2).getType());
+    }
+
+    public void testNoResults() throws Exception {
+        TopHits result = (TopHits) testCase(new MatchNoDocsQuery(), topHits("_name").sort("string", SortOrder.DESC));
+        SearchHits searchHits = ((TopHits) result).getHits();
+        assertEquals(0L, searchHits.getTotalHits());
+    }
+
+    /**
+     * Tests {@code top_hits} inside of {@code terms}. While not strictly a unit test this is a fairly common way to run {@code top_hits}
+     * and serves as a good example of running {@code top_hits} inside of another aggregation.
+     */
+    public void testInsideTerms() throws Exception {
+        Aggregation result;
+        if (randomBoolean()) {
+            result = testCase(new MatchAllDocsQuery(),
+                    terms("term").field("string")
+                        .subAggregation(topHits("top").sort("string", SortOrder.DESC)));
+        } else {
+            Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1");
+            result = testCase(query,
+                    terms("term").field("string")
+                        .subAggregation(topHits("top")));
+        }
+        Terms terms = (Terms) result;
+
+        // The "a" bucket
+        TopHits hits = (TopHits) terms.getBucketByKey("a").getAggregations().get("top");
+        SearchHits searchHits = (hits).getHits();
+        assertEquals(2L, searchHits.getTotalHits());
+        assertEquals("2", searchHits.getAt(0).getId());
+        assertEquals("1", searchHits.getAt(1).getId());
+
+        // The "b" bucket
+        searchHits = ((TopHits) terms.getBucketByKey("b").getAggregations().get("top")).getHits();
+        assertEquals(2L, searchHits.getTotalHits());
+        assertEquals("3", searchHits.getAt(0).getId());
+        assertEquals("1", searchHits.getAt(1).getId());
 
-    public void testTermsAggregator() throws Exception {
+        // The "c" bucket
+        searchHits = ((TopHits) terms.getBucketByKey("c").getAggregations().get("top")).getHits();
+        assertEquals(1L, searchHits.getTotalHits());
+        assertEquals("2", searchHits.getAt(0).getId());
+
+        // The "d" bucket
+        searchHits = ((TopHits) terms.getBucketByKey("d").getAggregations().get("top")).getHits();
+        assertEquals(1L, searchHits.getTotalHits());
+        assertEquals("3", searchHits.getAt(0).getId());
+    }
+
+    private static final MappedFieldType STRING_FIELD_TYPE = new KeywordFieldMapper.KeywordFieldType();
+    static {
+        STRING_FIELD_TYPE.setName("string");
+        STRING_FIELD_TYPE.setHasDocValues(true);
+    }
+
+    private Aggregation testCase(Query query, AggregationBuilder builder) throws IOException {
         Directory directory = newDirectory();
-        RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
-        Document document = new Document();
-        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "1"), UidFieldMapper.Defaults.FIELD_TYPE));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
-        indexWriter.addDocument(document);
-        document = new Document();
-        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "2"), UidFieldMapper.Defaults.FIELD_TYPE));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
-        indexWriter.addDocument(document);
-        document = new Document();
-        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "3"), UidFieldMapper.Defaults.FIELD_TYPE));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
-        document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
-        indexWriter.addDocument(document);
-        indexWriter.close();
+        RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
+        iw.addDocument(document("1", "a", "b"));
+        iw.addDocument(document("2", "c", "a"));
+        iw.addDocument(document("3", "b", "d"));
+        iw.close();
 
         IndexReader indexReader = DirectoryReader.open(directory);
         IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
 
-        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
-        fieldType.setName("string");
-        fieldType.setHasDocValues(true );
-        TopHitsAggregationBuilder aggregationBuilder = new TopHitsAggregationBuilder("_name");
-        aggregationBuilder.sort("string", SortOrder.DESC);
-        try (TopHitsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)){
-            aggregator.preCollection();
-            indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-            aggregator.postCollection();
-            TopHits topHits = (TopHits) aggregator.buildAggregation(0L);
-            SearchHits searchHits = topHits.getHits();
-            assertEquals(3L, searchHits.getTotalHits());
-            assertEquals("3", searchHits.getAt(0).getId());
-            assertEquals("type", searchHits.getAt(0).getType());
-            assertEquals("2", searchHits.getAt(1).getId());
-            assertEquals("type", searchHits.getAt(1).getType());
-            assertEquals("1", searchHits.getAt(2).getId());
-            assertEquals("type", searchHits.getAt(2).getType());
-        }
+        Aggregation result = searchAndReduce(indexSearcher, query, builder, STRING_FIELD_TYPE);
         indexReader.close();
         directory.close();
+        return result;
     }
 
+    private Document document(String id, String... stringValues) {
+        Document document = new Document();
+        document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", id), UidFieldMapper.Defaults.FIELD_TYPE));
+        for (String stringValue : stringValues) {
+            document.add(new Field("string", stringValue, STRING_FIELD_TYPE));
+            document.add(new SortedSetDocValuesField("string", new BytesRef(stringValue)));
+        }
+        return document;
+    }
 }
diff --git a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc

Original file line number	Diff line number	Diff line change
`@@ -124,7 +124,13 @@ public void collect(int docId, long bucket) throws IOException {`
`124`	`124`	`// In the QueryPhase we don't need this protection, because it is build into the IndexSearcher,`
`125`	`125`	`// but here we create collectors ourselves and we need prevent OOM because of crazy an offset and size.`
`126`	`126`	`topN = Math.min(topN, subSearchContext.searcher().getIndexReader().maxDoc());`
`127`		`- TopDocsCollector<?> topLevelCollector = sort != null ? TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(), subSearchContext.trackScores()) : TopScoreDocCollector.create(topN);`
	`127`	`+ TopDocsCollector<?> topLevelCollector;`
	`128`	`+ if (sort == null) {`
	`129`	`+ topLevelCollector = TopScoreDocCollector.create(topN);`
	`130`	`+ } else {`
	`131`	`+ topLevelCollector = TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(),`
	`132`	`+ subSearchContext.trackScores());`
	`133`	`+ }`
`128`	`134`	`collectors = new TopDocsAndLeafCollector(topLevelCollector);`
`129`	`135`	`collectors.leafCollector = collectors.topLevelCollector.getLeafCollector(ctx);`
`130`	`136`	`collectors.leafCollector.setScorer(scorer);`
`@@ -172,8 +178,8 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) {`
`172`	`178`	`searchHitFields.sortValues(fieldDoc.fields, subSearchContext.sort().formats);`
`173`	`179`	`}`
`174`	`180`	`}`
`175`		`- topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(), pipelineAggregators(),`
`176`		`- metaData());`
	`181`	`+ topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(),`
	`182`	`+ pipelineAggregators(), metaData());`
`177`	`183`	`}`
`178`	`184`	`return topHits;`
`179`	`185`	`}`
`@@ -186,7 +192,8 @@ public InternalTopHits buildEmptyAggregation() {`
`186`	`192`	`} else {`
`187`	`193`	`topDocs = Lucene.EMPTY_TOP_DOCS;`
`188`	`194`	`}`
`189`		`- return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(), pipelineAggregators(), metaData());`
	`195`	`+ return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(),`
	`196`	`+ pipelineAggregators(), metaData());`
`190`	`197`	`}`
`191`	`198`
`192`	`199`	`@Override`