Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ protected int doHashCode() {
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
builder.field(BG_COUNT, supersetSize);
builder.startArray(CommonFields.BUCKETS.getPreferredName());
for (Bucket bucket : buckets) {
//There is a condition (presumably when only one shard has a bucket?) where reduce is not called
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ public String getType() {
}

public static ParsedSignificantLongTerms fromXContent(XContentParser parser, String name) throws IOException {
ParsedSignificantLongTerms aggregation = PARSER.parse(parser, null);
aggregation.setName(name);
return aggregation;
return parseSignificantTermsXContent(() -> PARSER.parse(parser, null), name);
}

public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ public String getType() {
}

public static ParsedSignificantStringTerms fromXContent(XContentParser parser, String name) throws IOException {
ParsedSignificantStringTerms aggregation = PARSER.parse(parser, null);
aggregation.setName(name);
return aggregation;
return parseSignificantTermsXContent(() -> PARSER.parse(parser, null), name);
}

public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
Expand All @@ -42,11 +44,16 @@ public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregatio

private Map<String, ParsedBucket> bucketMap;
protected long subsetSize;
protected long supersetSize;

protected long getSubsetSize() {
return subsetSize;
}

protected long getSupersetSize() {
return supersetSize;
}

@Override
public List<? extends SignificantTerms.Bucket> getBuckets() {
return buckets;
Expand All @@ -68,6 +75,7 @@ public Iterator<SignificantTerms.Bucket> iterator() {
@Override
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
builder.field(InternalMappedSignificantTerms.BG_COUNT, supersetSize);
builder.startArray(CommonFields.BUCKETS.getPreferredName());
for (SignificantTerms.Bucket bucket : buckets) {
bucket.toXContent(builder, params);
Expand All @@ -76,16 +84,31 @@ protected XContentBuilder doXContentBody(XContentBuilder builder, Params params)
return builder;
}

static <T extends ParsedSignificantTerms> T parseSignificantTermsXContent(final CheckedSupplier<T, IOException> aggregationSupplier,
final String name) throws IOException {
T aggregation = aggregationSupplier.get();
aggregation.setName(name);
for (ParsedBucket bucket : aggregation.buckets) {
bucket.subsetSize = aggregation.subsetSize;
bucket.supersetSize = aggregation.supersetSize;
}
return aggregation;
}

static void declareParsedSignificantTermsFields(final ObjectParser<? extends ParsedSignificantTerms, Void> objectParser,
final CheckedFunction<XContentParser, ParsedSignificantTerms.ParsedBucket, IOException> bucketParser) {
declareMultiBucketAggregationFields(objectParser, bucketParser::apply, bucketParser::apply);
objectParser.declareLong((parsedTerms, value) -> parsedTerms.subsetSize = value , CommonFields.DOC_COUNT);
objectParser.declareLong((parsedTerms, value) -> parsedTerms.supersetSize = value ,
new ParseField(InternalMappedSignificantTerms.BG_COUNT));
}

public abstract static class ParsedBucket extends ParsedMultiBucketAggregation.ParsedBucket implements SignificantTerms.Bucket {

protected long subsetDf;
protected long subsetSize;
protected long supersetDf;
protected long supersetSize;
protected double score;

@Override
Expand All @@ -110,12 +133,12 @@ public double getSignificanceScore() {

@Override
public long getSupersetSize() {
throw new UnsupportedOperationException();
return supersetSize;
}

@Override
public long getSubsetSize() {
throw new UnsupportedOperationException();
return subsetSize;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,39 @@ public interface SignificantTerms extends MultiBucketsAggregation, Iterable<Sign

interface Bucket extends MultiBucketsAggregation.Bucket {

/**
* @return The significant score for the subset
*/
double getSignificanceScore();

Number getKeyAsNumber();

/**
* @return The number of docs in the subset containing a particular term.
* This number is equal to the document count of the bucket.
*/
long getSubsetDf();

/**
* @return The numbers of docs in the subset (also known as "foreground set").
* This number is equal to the document count of the containing aggregation.
*/
long getSubsetSize();

/**
* @return The number of docs in the superset containing a particular term (also
* known as the "background count" of the bucket)
*/
long getSupersetDf();

/**
* @return The numbers of docs in the superset (ordinarily the background count
* of the containing aggregation).
*/
long getSupersetSize();

long getSubsetSize();
/**
* @return The key, expressed as a number
*/
Number getKeyAsNumber();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ public void testXContentResponse() throws Exception {
+ "\"doc_count\":4,"
+ "\"sig_terms\":{"
+ "\"doc_count\":4,"
+ "\"bg_count\":7,"
+ "\"buckets\":["
+ "{"
+ "\"key\":" + (type.equals("long") ? "0," : "\"0\",")
Expand All @@ -325,6 +326,7 @@ public void testXContentResponse() throws Exception {
+ "\"doc_count\":3,"
+ "\"sig_terms\":{"
+ "\"doc_count\":3,"
+ "\"bg_count\":7,"
+ "\"buckets\":["
+ "{"
+ "\"key\":" + (type.equals("long") ? "1," : "\"1\",")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@

package org.elasticsearch.search.aggregations.bucket.significant;

import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregationTestCase;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;

import java.util.Arrays;
Expand All @@ -33,6 +39,51 @@

public abstract class InternalSignificantTermsTestCase extends InternalMultiBucketAggregationTestCase<InternalSignificantTerms<?, ?>> {

private SignificanceHeuristic significanceHeuristic;

@Override
public void setUp() throws Exception {
super.setUp();
significanceHeuristic = randomSignificanceHeuristic();
}

@Override
protected final InternalSignificantTerms createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
InternalAggregations aggregations) {
final int requiredSize = randomIntBetween(1, 5);
final int numBuckets = randomInt(requiredSize + 2);

long subsetSize = 0;
long supersetSize = 0;

int[] subsetDfs = new int[numBuckets];
int[] supersetDfs = new int[numBuckets];

for (int i = 0; i < numBuckets; ++i) {
int subsetDf = randomIntBetween(1, 10);
subsetDfs[i] = subsetDf;

int supersetDf = randomIntBetween(subsetDf, 20);
supersetDfs[i] = supersetDf;

subsetSize += subsetDf;
supersetSize += supersetDf;
}
return createTestInstance(name, pipelineAggregators, metaData, aggregations, requiredSize, numBuckets, subsetSize, subsetDfs,
supersetSize, supersetDfs, significanceHeuristic);
}

protected abstract InternalSignificantTerms createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
InternalAggregations aggregations,
int requiredSize, int numBuckets,
long subsetSize, int[] subsetDfs,
long supersetSize, int[] supersetDfs,
SignificanceHeuristic significanceHeuristic);

@Override
protected InternalSignificantTerms createUnmappedInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Expand Down Expand Up @@ -72,6 +123,7 @@ protected void assertMultiBucketsAggregation(MultiBucketsAggregation expected, M
InternalSignificantTerms expectedSigTerms = (InternalSignificantTerms) expected;
ParsedSignificantTerms actualSigTerms = (ParsedSignificantTerms) actual;
assertEquals(expectedSigTerms.getSubsetSize(), actualSigTerms.getSubsetSize());
assertEquals(expectedSigTerms.getSupersetSize(), actualSigTerms.getSupersetSize());

for (SignificantTerms.Bucket bucket : (SignificantTerms) expected) {
String key = bucket.getKeyAsString();
Expand All @@ -91,14 +143,22 @@ protected void assertBucket(MultiBucketsAggregation.Bucket expected, MultiBucket

assertEquals(expectedSigTerm.getSignificanceScore(), actualSigTerm.getSignificanceScore(), 0.0);
assertEquals(expectedSigTerm.getSubsetDf(), actualSigTerm.getSubsetDf());
assertEquals(expectedSigTerm.getDocCount(), actualSigTerm.getSubsetDf());
assertEquals(expectedSigTerm.getSupersetDf(), actualSigTerm.getSupersetDf());

expectThrows(UnsupportedOperationException.class, actualSigTerm::getSubsetSize);
expectThrows(UnsupportedOperationException.class, actualSigTerm::getSupersetSize);
assertEquals(expectedSigTerm.getSubsetSize(), actualSigTerm.getSubsetSize());
assertEquals(expectedSigTerm.getSupersetSize(), actualSigTerm.getSupersetSize());
}

private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
Function<SignificantTerms.Bucket, Long> fn) {
return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
}

private static SignificanceHeuristic randomSignificanceHeuristic() {
return randomFrom(
new JLHScore(),
new MutualInformation(randomBoolean(), randomBoolean()),
new GND(randomBoolean()),
new ChiSquare(randomBoolean(), randomBoolean()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;

Expand All @@ -38,44 +34,32 @@

public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {

private SignificanceHeuristic significanceHeuristic;
private DocValueFormat format;

@Override
public void setUp() throws Exception {
super.setUp();
significanceHeuristic = randomSignificanceHeuristic();
format = randomNumericDocValueFormat();
}

@Override
protected InternalSignificantTerms createTestInstance(String name,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData,
InternalAggregations aggregations) {
int requiredSize = randomIntBetween(1, 5);
int shardSize = requiredSize + 2;
final int numBuckets = randomInt(shardSize);

long globalSubsetSize = 0;
long globalSupersetSize = 0;
InternalAggregations aggs,
int requiredSize, int numBuckets,
long subsetSize, int[] subsetDfs,
long supersetSize, int[] supersetDfs,
SignificanceHeuristic significanceHeuristic) {

List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
Set<Long> terms = new HashSet<>();
for (int i = 0; i < numBuckets; ++i) {
long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);

int subsetDf = randomIntBetween(1, 10);
int supersetDf = randomIntBetween(subsetDf, 20);
int supersetSize = randomIntBetween(supersetDf, 30);

globalSubsetSize += subsetDf;
globalSupersetSize += supersetSize;

buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, aggregations, format));
buckets.add(new SignificantLongTerms.Bucket(subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, term, aggs, format));
}
return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
globalSupersetSize, significanceHeuristic, buckets);
return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize,
supersetSize, significanceHeuristic, buckets);
}

@Override
Expand All @@ -87,12 +71,4 @@ protected InternalSignificantTerms createTestInstance(String name,
protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
return ParsedSignificantLongTerms.class;
}

private static SignificanceHeuristic randomSignificanceHeuristic() {
return randomFrom(
new JLHScore(),
new MutualInformation(randomBoolean(), randomBoolean()),
new GND(randomBoolean()),
new ChiSquare(randomBoolean(), randomBoolean()));
}
}
Loading