Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;

/**
* Hyperloglog++ counter, implemented based on pseudo code from
Expand Down Expand Up @@ -420,6 +423,32 @@ public void close() {
Releasables.close(runLens, hashSet.sizes);
}

private Set<Object> getComparableData(long bucket) {
Set<Object> values = new HashSet<>();
if (algorithm.get(bucket) == LINEAR_COUNTING) {
try (IntArray hashSetValues = hashSet.values(bucket)) {
for (long i = 0; i < hashSetValues.size(); i++) {
values.add(hashSetValues.get(i));
}
}
} else {
for (long i = 0; i < runLens.size(); i++) {
values.add(runLens.get((bucket << p) + i));
}
}
return values;
}

public int hashCode(long bucket) {
return Objects.hash(p, algorithm.get(bucket), getComparableData(bucket));
}

public boolean equals(long bucket, HyperLogLogPlusPlus other) {
return Objects.equals(p, other.p) &&
Objects.equals(algorithm.get(bucket), other.algorithm.get(bucket)) &&
Objects.equals(getComparableData(bucket), getComparableData(bucket));
}

/**
* We are actually using HyperLogLog's runLens array but interpreting it as a hash set
* for linear counting.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,18 @@ public XContentBuilder doXContentBody(XContentBuilder builder, Params params) th
return builder;
}

@Override
protected int doHashCode() {
return counts.hashCode(0);
}

@Override
protected boolean doEquals(Object obj) {
InternalCardinality other = (InternalCardinality) obj;
return counts.equals(0, other.counts);
}

HyperLogLogPlusPlus getState() {
return counts;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.filter.InternalFilter;
import org.junit.Before;

public class FilterAggregatorTests extends AggregatorTestCase {
private MappedFieldType fieldType;

@Before
public void setUpTest() throws Exception {
super.setUp();
fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setHasDocValues(true);
fieldType.setIndexOptions(IndexOptions.DOCS);
fieldType.setName("field");
}

public void testEmpty() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
QueryBuilder filter = QueryBuilders.termQuery("field", randomAsciiOfLength(5));
FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);
InternalFilter response = search(indexSearcher, new MatchAllDocsQuery(), builder,
fieldType);
assertEquals(response.getDocCount(), 0);
indexReader.close();
directory.close();
}

public void testRandom() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
int numDocs = randomIntBetween(100, 200);
int maxTerm = randomIntBetween(10, 50);
int[] expectedBucketCount = new int[maxTerm];
Document document = new Document();
for (int i = 0; i < numDocs; i++) {
if (frequently()) {
// make sure we have more than one segment to test the merge
indexWriter.getReader().close();
}
int value = randomInt(maxTerm-1);
expectedBucketCount[value] += 1;
document.add(new Field("field", Integer.toString(value), fieldType));
indexWriter.addDocument(document);
document.clear();
}
indexWriter.close();

IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
int value = randomInt(maxTerm - 1);
QueryBuilder filter = QueryBuilders.termQuery("field", Integer.toString(value));
FilterAggregationBuilder builder = new FilterAggregationBuilder("test", filter);

for (boolean doReduce : new boolean[] {true, false}) {
final InternalFilter response;
if (doReduce) {
response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
} else {
response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
}
assertEquals(response.getDocCount(), (long) expectedBucketCount[value]);
}
indexReader.close();
directory.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.aggregations.metrics;

import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.FieldValueQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityAggregator;
import org.elasticsearch.search.aggregations.metrics.cardinality.InternalCardinality;
import org.elasticsearch.search.aggregations.support.ValueType;

import java.io.IOException;
import java.util.Arrays;
import java.util.function.Consumer;

import static java.util.Collections.singleton;

public class CardinalityAggregatorTests extends AggregatorTestCase {
public void testNoDocs() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
// Intentionally not writing any docs
}, card -> {
assertEquals(0.0, card.getValue(), 0);
});
}

public void testNoMatchingField() throws IOException {
testCase(new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 7)));
iw.addDocument(singleton(new SortedNumericDocValuesField("wrong_number", 1)));
}, card -> {
assertEquals(0.0, card.getValue(), 0);
});
}

public void testSomeMatchesSortedNumericDocValues() throws IOException {
testCase(new FieldValueQuery("number"), iw -> {
iw.addDocument(singleton(new SortedNumericDocValuesField("number", 7)));
iw.addDocument(singleton(new SortedNumericDocValuesField("number", 1)));
}, card -> {
assertEquals(2, card.getValue(), 0);
});
}

public void testSomeMatchesNumericDocValues() throws IOException {
testCase(new FieldValueQuery("number"), iw -> {
iw.addDocument(singleton(new NumericDocValuesField("number", 7)));
iw.addDocument(singleton(new NumericDocValuesField("number", 1)));
}, card -> {
assertEquals(2, card.getValue(), 0);
});
}

public void testQueryFiltering() throws IOException {
testCase(IntPoint.newRangeQuery("number", 0, 5), iw -> {
iw.addDocument(Arrays.asList(new IntPoint("number", 7),
new SortedNumericDocValuesField("number", 7)));
iw.addDocument(Arrays.asList(new IntPoint("number", 1),
new SortedNumericDocValuesField("number", 1)));
}, card -> {
assertEquals(1, card.getValue(), 0);
});
}

public void testQueryFiltersAll() throws IOException {
testCase(IntPoint.newRangeQuery("number", -1, 0), iw -> {
iw.addDocument(Arrays.asList(new IntPoint("number", 7),
new SortedNumericDocValuesField("number", 7)));
iw.addDocument(Arrays.asList(new IntPoint("number", 1),
new SortedNumericDocValuesField("number", 1)));
}, card -> {
assertEquals(0.0, card.getValue(), 0);
});
}

private void testCase(Query query, CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalCardinality> verify) throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();

IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);

CardinalityAggregationBuilder aggregationBuilder = new CardinalityAggregationBuilder(
"_name", ValueType.NUMERIC).field("number");
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(
NumberFieldMapper.NumberType.LONG);
fieldType.setName("number");
try (CardinalityAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher,
fieldType)) {
aggregator.preCollection();
indexSearcher.search(query, aggregator);
aggregator.postCollection();
verify.accept((InternalCardinality) aggregator.buildAggregation(0L));
}
indexReader.close();
directory.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.aggregations.metrics.cardinality;

import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import org.junit.After;
import org.junit.Before;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class InternalCardinalityTests extends InternalAggregationTestCase<InternalCardinality> {
private static List<HyperLogLogPlusPlus> algos;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this variable never seems to be read?

Copy link
Contributor Author

@colings86 colings86 Mar 31, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed so we keep a reference to the HyperLogLogPlusPlus implementations we create in createTestInstance() so we can close them and release the appropriate BigArray pages after the test is complete (see the cleanup() method).

private static int p;

@Before
public void setup() {
algos = new ArrayList<>();
p = randomIntBetween(HyperLogLogPlusPlus.MIN_PRECISION, HyperLogLogPlusPlus.MAX_PRECISION);
}

@Override
protected InternalCardinality createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
HyperLogLogPlusPlus hllpp = new HyperLogLogPlusPlus(p,
new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService()), 1);
algos.add(hllpp);
for (int i = 0; i < 100; i++) {
hllpp.collect(0, randomIntBetween(1, 100));
}
return new InternalCardinality(name, hllpp, pipelineAggregators, metaData);
}

@Override
protected Reader<InternalCardinality> instanceReader() {
return InternalCardinality::new;
}

@Override
protected void assertReduced(InternalCardinality reduced, List<InternalCardinality> inputs) {
HyperLogLogPlusPlus[] algos = inputs.stream().map(InternalCardinality::getState)
.toArray(size -> new HyperLogLogPlusPlus[size]);
if (algos.length > 0) {
HyperLogLogPlusPlus result = algos[0];
for (int i = 1; i < algos.length; i++) {
result.merge(0, algos[i], 0);
}
assertEquals(result.cardinality(0), reduced.value(), 0);
}
}

@After
public void cleanup() {
Releasables.close(algos);
algos.clear();
algos = null;
}
}