elastic · colings86 · Apr 3, 2017 · Mar 29, 2017 · Mar 31, 2017 · jpountz
diff --git a/...n/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java b/...n/java/org/elasticsearch/search/aggregations/metrics/cardinality/HyperLogLogPlusPlus.java
@@ -34,6 +34,9 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
 
 /**
  * Hyperloglog++ counter, implemented based on pseudo code from
@@ -420,6 +423,32 @@ public void close() {
         Releasables.close(runLens, hashSet.sizes);
     }
 
+    private Set<Object> getComparableData(long bucket) {
+        Set<Object> values = new HashSet<>();
+        if (algorithm.get(bucket) == LINEAR_COUNTING) {
+            try (IntArray hashSetValues = hashSet.values(bucket)) {
+                for (long i = 0; i < hashSetValues.size(); i++) {
+                    values.add(hashSetValues.get(i));
+                }
+            }
+        } else {
+            for (long i = 0; i < runLens.size(); i++) {
+                values.add(runLens.get((bucket << p) + i));
+            }
+        }
+        return values;
+    }
+
+    public int hashCode(long bucket) {
+        return Objects.hash(p, algorithm.get(bucket), getComparableData(bucket));
+    }
+
+    public boolean equals(long bucket, HyperLogLogPlusPlus other) {
+        return Objects.equals(p, other.p) &&
+                Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) &&
+                Objects.equals(getComparableData(bucket), getComparableData(bucket));
+    }
+
     /**
      * We are actually using HyperLogLog's runLens array but interpreting it as a hash set
      * for linear counting.

diff --git a/...n/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java b/...n/java/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinality.java
@@ -113,4 +113,18 @@ public XContentBuilder doXContentBody(XContentBuilder builder, Params params) th
         return builder;
     }
 
+    @Override
+    protected int doHashCode() {
+        return counts.hashCode(0);
+    }
+
+    @Override
+    protected boolean doEquals(Object obj) {
+        InternalCardinality other = (InternalCardinality) obj;
+        return counts.equals(0, other.counts);
+    }
+
+    HyperLogLogPlusPlus getState() {
+        return counts;
+    }
 }
diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/FilterAggregatorTests.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.aggregations.AggregatorTestCase;
+import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter;
+import org.junit.Before;
+
+public class FilterAggregatorTests extends AggregatorTestCase {
+    private MappedFieldType fieldType;
+
+    @Before
+    public void setUpTest() throws Exception {
+        super.setUp();
+        fieldType = new KeywordFieldMapper.KeywordFieldType();
+        fieldType.setHasDocValues(true);
+        fieldType.setIndexOptions(IndexOptions.DOCS);
+        fieldType.setName("field");
+    }
+
+    public void testEmpty() throws Exception {
+        Directory directory = newDirectory();
+        RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
+        indexWriter.close();
+        IndexReader indexReader = DirectoryReader.open(directory);
+        IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
+        QueryBuilder filter = QueryBuilders.termQuery("field", randomAsciiOfLength(5));
+        FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);
+        InternalFilter response = search(indexSearcher, new MatchAllDocsQuery(), builder,
+                fieldType);
+        assertEquals(response.getDocCount(), 0);
+        indexReader.close();
+        directory.close();
+    }
+
+    public void testRandom() throws Exception {
+        Directory directory = newDirectory();
+        RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
+        int numDocs = randomIntBetween(100, 200);
+        int maxTerm = randomIntBetween(10, 50);
+        int[] expectedBucketCount = new int[maxTerm];
+        Document document = new Document();
+        for (int i = 0; i < numDocs; i++) {
+            if (frequently()) {
+                // make sure we have more than one segment to test the merge
+                indexWriter.getReader().close();
+            }
+            int value = randomInt(maxTerm-1);
+            expectedBucketCount[value] += 1;
+            document.add(new Field("field", Integer.toString(value), fieldType));
+            indexWriter.addDocument(document);
+            document.clear();
+        }
+        indexWriter.close();
+
+        IndexReader indexReader = DirectoryReader.open(directory);
+        IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
+        int value = randomInt(maxTerm - 1);
+        QueryBuilder filter = QueryBuilders.termQuery("field", Integer.toString(value));
+        FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);
+
+        for (boolean doReduce : new boolean[] {true, false}) {
+            final InternalFilter response;
+            if (doReduce) {
+                response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
+            } else {
+                response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
+            }
+            assertEquals(response.getDocCount(), (long) expectedBucketCount[value]);
+        }
+        indexReader.close();
+        directory.close();
+    }
+}
diff --git a/...c/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java b/...c/test/java/org/elasticsearch/search/aggregations/metrics/CardinalityAggregatorTests.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.metrics;
+
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.FieldValueQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.elasticsearch.common.CheckedConsumer;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.NumberFieldMapper;
+import org.elasticsearch.search.aggregations.AggregatorTestCase;
+import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregationBuilder;
+import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregator;
+import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality;
+import org.elasticsearch.search.aggregations.support.ValueType;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.function.Consumer;
+
+import static java.util.Collections.singleton;
+
+public class CardinalityAggregatorTests extends AggregatorTestCase {
+    public void testNoDocs() throws IOException {
+        testCase(new MatchAllDocsQuery(), iw -> {
+            // Intentionally not writing any docs
+        }, card -> {
+            assertEquals(0.0, card.getValue(), 0);
+        });
+    }
+
+    public void testNoMatchingField() throws IOException {
+        testCase(new MatchAllDocsQuery(), iw -> {
+            iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 7)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 1)));
+        }, card -> {
+            assertEquals(0.0, card.getValue(), 0);
+        });
+    }
+
+    public void testSomeMatchesSortedNumericDocValues() throws IOException {
+        testCase(new FieldValueQuery("number"), iw -> {
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 7)));
+            iw.addDocument(singleton(new SortedNumericDocValuesField("number", 1)));
+        }, card -> {
+            assertEquals(2, card.getValue(), 0);
+        });
+    }
+
+    public void testSomeMatchesNumericDocValues() throws IOException {
+        testCase(new FieldValueQuery("number"), iw -> {
+            iw.addDocument(singleton(new NumericDocValuesField("number", 7)));
+            iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
+        }, card -> {
+            assertEquals(2, card.getValue(), 0);
+        });
+    }
+
+    public void testQueryFiltering() throws IOException {
+        testCase(IntPoint.newRangeQuery("number", 0, 5), iw -> {
+            iw.addDocument(Arrays.asList(new IntPoint("number", 7),
+                    new SortedNumericDocValuesField("number", 7)));
+            iw.addDocument(Arrays.asList(new IntPoint("number", 1),
+                    new SortedNumericDocValuesField("number", 1)));
+        }, card -> {
+            assertEquals(1, card.getValue(), 0);
+        });
+    }
+
+    public void testQueryFiltersAll() throws IOException {
+        testCase(IntPoint.newRangeQuery("number", -1, 0), iw -> {
+            iw.addDocument(Arrays.asList(new IntPoint("number", 7),
+                    new SortedNumericDocValuesField("number", 7)));
+            iw.addDocument(Arrays.asList(new IntPoint("number", 1),
+                    new SortedNumericDocValuesField("number", 1)));
+        }, card -> {
+            assertEquals(0.0, card.getValue(), 0);
+        });
+    }
+
+    private void testCase(Query query, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
+            Consumer<InternalCardinality> verify) throws IOException {
+        Directory directory = newDirectory();
+        RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
+        buildIndex.accept(indexWriter);
+        indexWriter.close();
+
+        IndexReader indexReader = DirectoryReader.open(directory);
+        IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
+
+        CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder(
+                "_name", ValueType.NUMERIC).field("number");
+        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(
+                NumberFieldMapper.NumberType.LONG);
+        fieldType.setName("number");
+        try (CardinalityAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher,
+                fieldType)) {
+            aggregator.preCollection();
+            indexSearcher.search(query, aggregator);
+            aggregator.postCollection();
+            verify.accept((InternalCardinality) aggregator.buildAggregation(0L));
+        }
+        indexReader.close();
+        directory.close();
+    }
+}
diff --git a/...a/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java b/...a/org/elasticsearch/search/aggregations/metrics/cardinality/InternalCardinalityTests.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.metrics.cardinality;
+
+import org.elasticsearch.common.io.stream.Writeable.Reader;
+import org.elasticsearch.common.lease.Releasables;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.MockBigArrays;
+import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
+import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.junit.After;
+import org.junit.Before;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+public class InternalCardinalityTests extends InternalAggregationTestCase<InternalCardinality> {
+    private static List<HyperLogLogPlusPlus> algos;
+    private static int p;
+
+    @Before
+    public void setup() {
+        algos = new ArrayList<>();
+        p = randomIntBetween(HyperLogLogPlusPlus.MIN_PRECISION, HyperLogLogPlusPlus.MAX_PRECISION);
+    }
+
+    @Override
+    protected InternalCardinality createTestInstance(String name,
+            List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
+        HyperLogLogPlusPlus hllpp = new HyperLogLogPlusPlus(p,
+                new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService()), 1);
+        algos.add(hllpp);
+        for (int i = 0; i < 100; i++) {
+            hllpp.collect(0, randomIntBetween(1, 100));
+        }
+        return new InternalCardinality(name, hllpp, pipelineAggregators, metaData);
+    }
+
+    @Override
+    protected Reader<InternalCardinality> instanceReader() {
+        return InternalCardinality::new;
+    }
+
+    @Override
+    protected void assertReduced(InternalCardinality reduced, List<InternalCardinality> inputs) {
+        HyperLogLogPlusPlus[] algos = inputs.stream().map(InternalCardinality::getState)
+                .toArray(size -> new HyperLogLogPlusPlus[size]);
+        if (algos.length > 0) {
+            HyperLogLogPlusPlus result = algos[0];
+            for (int i = 1; i < algos.length; i++) {
+                result.merge(0, algos[i], 0);
+            }
+            assertEquals(result.cardinality(0), reduced.value(), 0);
+        }
+    }
+
+    @After
+    public void cleanup() {
+        Releasables.close(algos);
+        algos.clear();
+        algos = null;
+    }
+}