From 1e8be42d5c41c5d37c0e60d14eab5d8508416bda Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Wed, 29 Mar 2017 17:25:16 +0100 Subject: [PATCH 1/2] Adds tests for cardinality and filter aggregations Relates to #22278 --- .../cardinality/HyperLogLogPlusPlus.java | 47 +++++++ .../cardinality/InternalCardinality.java | 14 ++ .../bucket/FilterAggregatorTests.java | 104 ++++++++++++++ .../metrics/CardinalityAggregatorTests.java | 131 ++++++++++++++++++ .../cardinality/InternalCardinalityTests.java | 82 +++++++++++ 5 files changed, 378 insertions(+) create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java index 42b4561e07b3c..94ac0af73e8ed 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java @@ -34,6 +34,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; /** * Hyperloglog++ counter, implemented based on pseudo code from @@ -420,6 +423,33 @@ public void close() { Releasables.close(runLens, hashSet.sizes); } + private Set getComparableData(long bucket) { + Set values = new HashSet<>(); + if (algorithm.get(bucket) == LINEAR_COUNTING) { + try (IntArray hashSetValues = hashSet.values(bucket)) { + for (long i = 0; i < hashSetValues.size(); i++) { + values.add(hashSetValues.get(i)); + } + } + } else { + for (long i = 0; i < runLens.size(); i++) { + values.add(runLens.get(i)); + } + } + return values; + } + + public int hashCode(long bucket) { + return Objects.hash(p, algorithm.get(bucket), getComparableData(bucket)); + } + + public boolean equals(long bucket, HyperLogLogPlusPlus other) { + return Objects.equals(p, other.p) && + Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) + && + Objects.equals(getComparableData(bucket), getComparableData(bucket)); + } + /** * We are actually using HyperLogLog's runLens array but interpreting it as a hash set * for linear counting. @@ -578,6 +608,23 @@ void clear(long bit) { ensureCapacity(bit); impl.clear(bit); } + + @Override + public int hashCode() { + return Objects.hash(impl); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + OpenBitSet other = (OpenBitSet) obj; + return Objects.equals(impl, other.impl); + } } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java index 68e8935616f33..028e97a69ff82 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java @@ -113,4 +113,18 @@ public XContentBuilder doXContentBody(XContentBuilder builder, Params params) th return builder; } + @Override + protected int doHashCode() { + return counts.hashCode(0); + } + + @Override + protected boolean doEquals(Object obj) { + InternalCardinality other = (InternalCardinality) obj; + return counts.equals(0, other.counts); + } + + HyperLogLogPlusPlus getState() { + return counts; + } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java new file mode 100644 index 0000000000000..bbf6978348941 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java @@ -0,0 +1,104 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.aggregations.bucket; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter; +import org.junit.Before; + +public class FilterAggregatorTests extends AggregatorTestCase { + private MappedFieldType fieldType; + + @Before + public void setUpTest() throws Exception { + super.setUp(); + fieldType = new KeywordFieldMapper.KeywordFieldType(); + fieldType.setHasDocValues(true); + fieldType.setIndexOptions(IndexOptions.DOCS); + fieldType.setName("field"); + } + + public void testEmpty() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + indexWriter.close(); + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + QueryBuilder filter = QueryBuilders.termQuery("field", randomAsciiOfLength(5)); + FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter); + InternalFilter response = search(indexSearcher, new MatchAllDocsQuery(), builder, + fieldType); + assertEquals(response.getDocCount(), 0); + indexReader.close(); + directory.close(); + } + + public void testRandom() throws Exception { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + int numDocs = randomIntBetween(100, 200); + int maxTerm = randomIntBetween(10, 50); + int[] expectedBucketCount = new int[maxTerm]; + Document document = new Document(); + for (int i = 0; i < numDocs; i++) { + if (frequently()) { + // make sure we have more than one segment to test the merge + indexWriter.commit(); + } + int value = randomInt(maxTerm-1); + expectedBucketCount[value] += 1; + document.add(new Field("field", Integer.toString(value), fieldType)); + indexWriter.addDocument(document); + document.clear(); + } + indexWriter.close(); + + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + int value = randomInt(maxTerm - 1); + QueryBuilder filter = QueryBuilders.termQuery("field", Integer.toString(value)); + FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter); + + for (boolean doReduce : new boolean[] {true, false}) { + final InternalFilter response; + if (doReduce) { + response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType); + } else { + response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType); + } + assertEquals(response.getDocCount(), (long) expectedBucketCount[value]); + } + indexReader.close(); + directory.close(); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java new file mode 100644 index 0000000000000..b80dd163fc97f --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java @@ -0,0 +1,131 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.metrics; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.FieldValueQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregator; +import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality; +import org.elasticsearch.search.aggregations.support.ValueType; + +import java.io.IOException; +import java.util.Arrays; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; + +public class CardinalityAggregatorTests extends AggregatorTestCase { + public void testNoDocs() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + // Intentionally not writing any docs + }, card -> { + assertEquals(0.0, card.getValue(), 0); + }); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 7))); + iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 1))); + }, card -> { + assertEquals(0.0, card.getValue(), 0); + }); + } + + public void testSomeMatchesSortedNumericDocValues() throws IOException { + testCase(new FieldValueQuery("number"), iw -> { + iw.addDocument(singleton(new SortedNumericDocValuesField("number", 7))); + iw.addDocument(singleton(new SortedNumericDocValuesField("number", 1))); + }, card -> { + assertEquals(2, card.getValue(), 0); + }); + } + + public void testSomeMatchesNumericDocValues() throws IOException { + testCase(new FieldValueQuery("number"), iw -> { + iw.addDocument(singleton(new NumericDocValuesField("number", 7))); + iw.addDocument(singleton(new NumericDocValuesField("number", 1))); + }, card -> { + assertEquals(2, card.getValue(), 0); + }); + } + + public void testQueryFiltering() throws IOException { + testCase(IntPoint.newRangeQuery("number", 0, 5), iw -> { + iw.addDocument(Arrays.asList(new IntPoint("number", 7), + new SortedNumericDocValuesField("number", 7))); + iw.addDocument(Arrays.asList(new IntPoint("number", 1), + new SortedNumericDocValuesField("number", 1))); + }, card -> { + assertEquals(1, card.getValue(), 0); + }); + } + + public void testQueryFiltersAll() throws IOException { + testCase(IntPoint.newRangeQuery("number", -1, 0), iw -> { + iw.addDocument(Arrays.asList(new IntPoint("number", 7), + new SortedNumericDocValuesField("number", 7))); + iw.addDocument(Arrays.asList(new IntPoint("number", 1), + new SortedNumericDocValuesField("number", 1))); + }, card -> { + assertEquals(0.0, card.getValue(), 0); + }); + } + + private void testCase(Query query, CheckedConsumer buildIndex, + Consumer verify) throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); + buildIndex.accept(indexWriter); + indexWriter.close(); + + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + + CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder( + "_name", ValueType.NUMERIC).field("number"); + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType( + NumberFieldMapper.NumberType.LONG); + fieldType.setName("number"); + try (CardinalityAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, + fieldType)) { + aggregator.preCollection(); + indexSearcher.search(query, aggregator); + aggregator.postCollection(); + verify.accept((InternalCardinality) aggregator.buildAggregation(0L)); + } + indexReader.close(); + directory.close(); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java new file mode 100644 index 0000000000000..7c5809f323bdf --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.metrics.cardinality; + +import org.elasticsearch.common.io.stream.Writeable.Reader; +import org.elasticsearch.common.lease.Releasables; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.search.aggregations.InternalAggregationTestCase; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.junit.After; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class InternalCardinalityTests extends InternalAggregationTestCase { + private static List algos; + private static int p; + + @Before + public void setup() { + algos = new ArrayList<>(); + p = randomIntBetween(HyperLogLogPlusPlus.MIN_PRECISION, HyperLogLogPlusPlus.MAX_PRECISION); + } + + @Override + protected InternalCardinality createTestInstance(String name, + List pipelineAggregators, Map metaData) { + HyperLogLogPlusPlus hllpp = new HyperLogLogPlusPlus(p, + new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService()), 1); + algos.add(hllpp); + for (int i = 0; i < 100; i++) { + hllpp.collect(0, randomIntBetween(1, 100)); + } + return new InternalCardinality(name, hllpp, pipelineAggregators, metaData); + } + + @Override + protected Reader instanceReader() { + return InternalCardinality::new; + } + + @Override + protected void assertReduced(InternalCardinality reduced, List inputs) { + HyperLogLogPlusPlus[] algos = inputs.stream().map(InternalCardinality::getState) + .toArray(size -> new HyperLogLogPlusPlus[size]); + if (algos.length > 0) { + HyperLogLogPlusPlus result = algos[0]; + for (int i = 1; i < algos.length; i++) { + result.merge(0, algos[i], 0); + } + assertEquals(result.cardinality(0), reduced.value(), 0); + } + } + + @After + public void cleanup() { + Releasables.close(algos); + algos.clear(); + algos = null; + } +} From 3a444429a7080dbbd716504a3ae2ce0af137b69a Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Fri, 31 Mar 2017 09:43:19 +0100 Subject: [PATCH 2/2] addresses review comments --- .../cardinality/HyperLogLogPlusPlus.java | 22 ++----------------- .../bucket/FilterAggregatorTests.java | 2 +- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java index 94ac0af73e8ed..6425cc3b68a2e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java @@ -433,7 +433,7 @@ private Set getComparableData(long bucket) { } } else { for (long i = 0; i < runLens.size(); i++) { - values.add(runLens.get(i)); + values.add(runLens.get((bucket << p) + i)); } } return values; @@ -445,8 +445,7 @@ public int hashCode(long bucket) { public boolean equals(long bucket, HyperLogLogPlusPlus other) { return Objects.equals(p, other.p) && - Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) - && + Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) && Objects.equals(getComparableData(bucket), getComparableData(bucket)); } @@ -608,23 +607,6 @@ void clear(long bit) { ensureCapacity(bit); impl.clear(bit); } - - @Override - public int hashCode() { - return Objects.hash(impl); - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - OpenBitSet other = (OpenBitSet) obj; - return Objects.equals(impl, other.impl); - } } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java index bbf6978348941..491f445fdfaff 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java @@ -73,7 +73,7 @@ public void testRandom() throws Exception { for (int i = 0; i < numDocs; i++) { if (frequently()) { // make sure we have more than one segment to test the merge - indexWriter.commit(); + indexWriter.getReader().close(); } int value = randomInt(maxTerm-1); expectedBucketCount[value] += 1;