From f61904df3a39f5f72a5f28b7c7364300abfd452b Mon Sep 17 00:00:00 2001 From: "xunjian.sl" Date: Thu, 9 Apr 2020 09:26:14 +0800 Subject: [PATCH 1/2] count Aggregation optimization --- .../metrics/ValueCountAggregator.java | 30 ++++++++++++++++ .../metrics/ValueCountAggregatorTests.java | 34 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregator.java index b2399a8e911f2..127a59dcd2141 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregator.java @@ -19,9 +19,11 @@ package org.elasticsearch.search.aggregations.metrics; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.index.fielddata.MultiGeoPointValues; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; @@ -62,6 +64,34 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); + + if (valuesSource instanceof ValuesSource.Numeric) { + final SortedNumericDocValues values = ((ValuesSource.Numeric)valuesSource).longValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + + @Override + public void collect(int doc, long bucket) throws IOException { + counts = bigArrays.grow(counts, bucket + 1); + if (values.advanceExact(doc)) { + counts.increment(bucket, values.docValueCount()); + } + } + }; + } + if (valuesSource instanceof ValuesSource.Bytes.GeoPoint) { + MultiGeoPointValues values = ((ValuesSource.GeoPoint)valuesSource).geoPointValues(ctx); + return new LeafBucketCollectorBase(sub, null) { + + @Override + public void collect(int doc, long bucket) throws IOException { + counts = bigArrays.grow(counts, bucket + 1); + if (values.advanceExact(doc)) { + counts.increment(bucket, values.docValueCount()); + } + } + }; + } + // The following is default collector. Including the keyword FieldType final SortedBinaryDocValues values = valuesSource.bytesValues(ctx); return new LeafBucketCollectorBase(sub, values) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java index 094a4b093cf34..3c71068e91620 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java @@ -21,7 +21,9 @@ import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LatLonDocValuesField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; @@ -110,6 +112,38 @@ protected ScriptService getMockScriptService() { return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS); } + + public void testGeoField() throws IOException { + testCase(new MatchAllDocsQuery(), ValueType.GEOPOINT, iw -> { + for (int i = 0; i < 10; i++) { + Document document = new Document(); + document.add(new LatLonDocValuesField("field", 10, 10)); + iw.addDocument(document); + } + }, count -> assertEquals(10L, count.getValue())); + } + + public void testDoubleField() throws IOException { + testCase(new MatchAllDocsQuery(), ValueType.DOUBLE, iw -> { + for (int i = 0; i < 15; i++) { + Document document = new Document(); + document.add(new DoubleDocValuesField(FIELD_NAME, 23D)); + iw.addDocument(document); + } + }, count -> assertEquals(15L, count.getValue())); + } + + public void testKeyWordField() throws IOException { + testCase(new MatchAllDocsQuery(), ValueType.STRING, iw -> { + for (int i = 0; i < 20; i++) { + Document document = new Document(); + document.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("stringValue"))); + document.add(new SortedSetDocValuesField(FIELD_NAME, new BytesRef("string11Value"))); + iw.addDocument(document); + } + }, count -> assertEquals(40L, count.getValue())); + } + public void testNoDocs() throws IOException { for (ValueType valueType : ValueType.values()) { testCase(new MatchAllDocsQuery(), valueType, iw -> { From c241bd7f563dcdd1be064bb4283be8b9685f67bf Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 9 Apr 2020 14:35:07 -0400 Subject: [PATCH 2/2] Scripts change slightly. neat! --- .../metrics/ValueCountAggregatorTests.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java index 3c71068e91620..520125726b2c0 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ValueCountAggregatorTests.java @@ -78,8 +78,8 @@ public class ValueCountAggregatorTests extends AggregatorTestCase { private static final String FIELD_NAME = "field"; - /** Script to return the {@code _value} provided by aggs framework. */ - private static final String VALUE_SCRIPT = "_value"; + private static final String STRING_VALUE_SCRIPT = "string_value"; + private static final String NUMBER_VALUE_SCRIPT = "number_value"; private static final String SINGLE_SCRIPT = "single"; @Override @@ -101,7 +101,8 @@ protected List getSupportedValuesSourceTypes() { protected ScriptService getMockScriptService() { Map, Object>> scripts = new HashMap<>(); - scripts.put(VALUE_SCRIPT, vars -> (Double.valueOf((String) vars.get("_value")) + 1)); + scripts.put(STRING_VALUE_SCRIPT, vars -> (Double.valueOf((String) vars.get("_value")) + 1)); + scripts.put(NUMBER_VALUE_SCRIPT, vars -> (((Number) vars.get("_value")).doubleValue() + 1)); scripts.put(SINGLE_SCRIPT, vars -> 1); MockScriptEngine scriptEngine = new MockScriptEngine(MockScriptEngine.NAME, @@ -273,7 +274,7 @@ public void testRangeFieldValues() throws IOException { public void testValueScriptNumber() throws IOException { ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name") .field(FIELD_NAME) - .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap())); + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, NUMBER_VALUE_SCRIPT, Collections.emptyMap())); MappedFieldType fieldType = createMappedFieldType(ValueType.NUMERIC); fieldType.setName(FIELD_NAME); @@ -322,7 +323,7 @@ public void testSingleScriptNumber() throws IOException { public void testValueScriptString() throws IOException { ValueCountAggregationBuilder aggregationBuilder = new ValueCountAggregationBuilder("name") .field(FIELD_NAME) - .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, VALUE_SCRIPT, Collections.emptyMap())); + .script(new Script(ScriptType.INLINE, MockScriptEngine.NAME, STRING_VALUE_SCRIPT, Collections.emptyMap())); MappedFieldType fieldType = createMappedFieldType(ValueType.STRING); fieldType.setName(FIELD_NAME);