Skip to content

Commit 0a5cb63

Browse files
committed
Add superset size to Significant Term REST response
This commit adds a new bg_count field to the REST response of SignificantTerms aggregations. Similarly to the bg_count that already exists in significant terms buckets, this new bg_count field is set at the aggregation level and is populated with the superset size value.
1 parent fc35d51 commit 0a5cb63

File tree

12 files changed

+153
-99
lines changed

12 files changed

+153
-99
lines changed

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ protected int doHashCode() {
128128
@Override
129129
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
130130
builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
131+
builder.field(BG_COUNT, supersetSize);
131132
builder.startArray(CommonFields.BUCKETS.getPreferredName());
132133
for (Bucket bucket : buckets) {
133134
//There is a condition (presumably when only one shard has a bucket?) where reduce is not called

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantLongTerms.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,7 @@ public String getType() {
3939
}
4040

4141
public static ParsedSignificantLongTerms fromXContent(XContentParser parser, String name) throws IOException {
42-
ParsedSignificantLongTerms aggregation = PARSER.parse(parser, null);
43-
aggregation.setName(name);
44-
return aggregation;
42+
return parseSignificantTermsXContent(() -> PARSER.parse(parser, null), name);
4543
}
4644

4745
public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantStringTerms.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,7 @@ public String getType() {
4040
}
4141

4242
public static ParsedSignificantStringTerms fromXContent(XContentParser parser, String name) throws IOException {
43-
ParsedSignificantStringTerms aggregation = PARSER.parse(parser, null);
44-
aggregation.setName(name);
45-
return aggregation;
43+
return parseSignificantTermsXContent(() -> PARSER.parse(parser, null), name);
4644
}
4745

4846
public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/ParsedSignificantTerms.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
import org.elasticsearch.common.CheckedBiConsumer;
2323
import org.elasticsearch.common.CheckedFunction;
24+
import org.elasticsearch.common.CheckedSupplier;
25+
import org.elasticsearch.common.ParseField;
2426
import org.elasticsearch.common.xcontent.ObjectParser;
2527
import org.elasticsearch.common.xcontent.XContentBuilder;
2628
import org.elasticsearch.common.xcontent.XContentParser;
@@ -42,11 +44,16 @@ public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregatio
4244

4345
private Map<String, ParsedBucket> bucketMap;
4446
protected long subsetSize;
47+
protected long supersetSize;
4548

4649
protected long getSubsetSize() {
4750
return subsetSize;
4851
}
4952

53+
protected long getSupersetSize() {
54+
return supersetSize;
55+
}
56+
5057
@Override
5158
public List<? extends SignificantTerms.Bucket> getBuckets() {
5259
return buckets;
@@ -68,6 +75,7 @@ public Iterator<SignificantTerms.Bucket> iterator() {
6875
@Override
6976
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
7077
builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
78+
builder.field(InternalMappedSignificantTerms.BG_COUNT, supersetSize);
7179
builder.startArray(CommonFields.BUCKETS.getPreferredName());
7280
for (SignificantTerms.Bucket bucket : buckets) {
7381
bucket.toXContent(builder, params);
@@ -76,16 +84,31 @@ protected XContentBuilder doXContentBody(XContentBuilder builder, Params params)
7684
return builder;
7785
}
7886

87+
static <T extends ParsedSignificantTerms> T parseSignificantTermsXContent(final CheckedSupplier<T, IOException> aggregationSupplier,
88+
final String name) throws IOException {
89+
T aggregation = aggregationSupplier.get();
90+
aggregation.setName(name);
91+
for (ParsedBucket bucket : aggregation.buckets) {
92+
bucket.subsetSize = aggregation.subsetSize;
93+
bucket.supersetSize = aggregation.supersetSize;
94+
}
95+
return aggregation;
96+
}
97+
7998
static void declareParsedSignificantTermsFields(final ObjectParser<? extends ParsedSignificantTerms, Void> objectParser,
8099
final CheckedFunction<XContentParser, ParsedSignificantTerms.ParsedBucket, IOException> bucketParser) {
81100
declareMultiBucketAggregationFields(objectParser, bucketParser::apply, bucketParser::apply);
82101
objectParser.declareLong((parsedTerms, value) -> parsedTerms.subsetSize = value , CommonFields.DOC_COUNT);
102+
objectParser.declareLong((parsedTerms, value) -> parsedTerms.supersetSize = value ,
103+
new ParseField(InternalMappedSignificantTerms.BG_COUNT));
83104
}
84105

85106
public abstract static class ParsedBucket extends ParsedMultiBucketAggregation.ParsedBucket implements SignificantTerms.Bucket {
86107

87108
protected long subsetDf;
109+
protected long subsetSize;
88110
protected long supersetDf;
111+
protected long supersetSize;
89112
protected double score;
90113

91114
@Override
@@ -110,12 +133,12 @@ public double getSignificanceScore() {
110133

111134
@Override
112135
public long getSupersetSize() {
113-
throw new UnsupportedOperationException();
136+
return supersetSize;
114137
}
115138

116139
@Override
117140
public long getSubsetSize() {
118-
throw new UnsupportedOperationException();
141+
return subsetSize;
119142
}
120143

121144
@Override

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTerms.java

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,39 @@ public interface SignificantTerms extends MultiBucketsAggregation, Iterable<Sign
2929

3030
interface Bucket extends MultiBucketsAggregation.Bucket {
3131

32+
/**
33+
* @return The significant score for the subset
34+
*/
3235
double getSignificanceScore();
3336

34-
Number getKeyAsNumber();
35-
37+
/**
38+
* @return The number of docs in the subset containing a particular term.
39+
* This number is equal to the document count of the bucket.
40+
*/
3641
long getSubsetDf();
3742

43+
/**
44+
* @return The numbers of docs in the subset (also known as "foreground set").
45+
* This number is equal to the document count of the containing aggregation.
46+
*/
47+
long getSubsetSize();
48+
49+
/**
50+
* @return The number of docs in the superset containing a particular term (also
51+
* known as the "background count" of the bucket)
52+
*/
3853
long getSupersetDf();
3954

55+
/**
56+
* @return The numbers of docs in the superset (also known as the background count
57+
* of the containing aggregation).
58+
*/
4059
long getSupersetSize();
4160

42-
long getSubsetSize();
61+
/**
62+
* @return The key, expressed as a number
63+
*/
64+
Number getKeyAsNumber();
4365
}
4466

4567
@Override

core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ public void testXContentResponse() throws Exception {
310310
+ "\"doc_count\":4,"
311311
+ "\"sig_terms\":{"
312312
+ "\"doc_count\":4,"
313+
+ "\"bg_count\":7,"
313314
+ "\"buckets\":["
314315
+ "{"
315316
+ "\"key\":" + (type.equals("long") ? "0," : "\"0\",")
@@ -325,6 +326,7 @@ public void testXContentResponse() throws Exception {
325326
+ "\"doc_count\":3,"
326327
+ "\"sig_terms\":{"
327328
+ "\"doc_count\":3,"
329+
+ "\"bg_count\":7,"
328330
+ "\"buckets\":["
329331
+ "{"
330332
+ "\"key\":" + (type.equals("long") ? "1," : "\"1\",")

core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,14 @@
1919

2020
package org.elasticsearch.search.aggregations.bucket.significant;
2121

22+
import org.elasticsearch.search.aggregations.InternalAggregations;
2223
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregationTestCase;
2324
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
25+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
26+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
27+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
28+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
29+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
2430
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
2531

2632
import java.util.Arrays;
@@ -33,6 +39,51 @@
3339

3440
public abstract class InternalSignificantTermsTestCase extends InternalMultiBucketAggregationTestCase<InternalSignificantTerms<?, ?>> {
3541

42+
private SignificanceHeuristic significanceHeuristic;
43+
44+
@Override
45+
public void setUp() throws Exception {
46+
super.setUp();
47+
significanceHeuristic = randomSignificanceHeuristic();
48+
}
49+
50+
@Override
51+
protected final InternalSignificantTerms createTestInstance(String name,
52+
List<PipelineAggregator> pipelineAggregators,
53+
Map<String, Object> metaData,
54+
InternalAggregations aggregations) {
55+
final int requiredSize = randomIntBetween(1, 5);
56+
final int numBuckets = randomInt(requiredSize + 2);
57+
58+
long subsetSize = 0;
59+
long supersetSize = 0;
60+
61+
int[] subsetDfs = new int[numBuckets];
62+
int[] supersetDfs = new int[numBuckets];
63+
64+
for (int i = 0; i < numBuckets; ++i) {
65+
int subsetDf = randomIntBetween(1, 10);
66+
subsetDfs[i] = subsetDf;
67+
68+
int supersetDf = randomIntBetween(subsetDf, 20);
69+
supersetDfs[i] = supersetDf;
70+
71+
subsetSize += subsetDf;
72+
supersetSize += supersetDf;
73+
}
74+
return createTestInstance(name, pipelineAggregators, metaData, aggregations, requiredSize, numBuckets, subsetSize, subsetDfs,
75+
supersetSize, supersetDfs, significanceHeuristic);
76+
}
77+
78+
protected abstract InternalSignificantTerms createTestInstance(String name,
79+
List<PipelineAggregator> pipelineAggregators,
80+
Map<String, Object> metaData,
81+
InternalAggregations aggregations,
82+
int requiredSize, int numBuckets,
83+
long subsetSize, int[] subsetDfs,
84+
long supersetSize, int[] supersetDfs,
85+
SignificanceHeuristic significanceHeuristic);
86+
3687
@Override
3788
protected InternalSignificantTerms createUnmappedInstance(String name,
3889
List<PipelineAggregator> pipelineAggregators,
@@ -72,6 +123,7 @@ protected void assertMultiBucketsAggregation(MultiBucketsAggregation expected, M
72123
InternalSignificantTerms expectedSigTerms = (InternalSignificantTerms) expected;
73124
ParsedSignificantTerms actualSigTerms = (ParsedSignificantTerms) actual;
74125
assertEquals(expectedSigTerms.getSubsetSize(), actualSigTerms.getSubsetSize());
126+
assertEquals(expectedSigTerms.getSupersetSize(), actualSigTerms.getSupersetSize());
75127

76128
for (SignificantTerms.Bucket bucket : (SignificantTerms) expected) {
77129
String key = bucket.getKeyAsString();
@@ -91,14 +143,22 @@ protected void assertBucket(MultiBucketsAggregation.Bucket expected, MultiBucket
91143

92144
assertEquals(expectedSigTerm.getSignificanceScore(), actualSigTerm.getSignificanceScore(), 0.0);
93145
assertEquals(expectedSigTerm.getSubsetDf(), actualSigTerm.getSubsetDf());
146+
assertEquals(actualSigTerm.getDocCount(), actualSigTerm.getSubsetDf());
94147
assertEquals(expectedSigTerm.getSupersetDf(), actualSigTerm.getSupersetDf());
95-
96-
expectThrows(UnsupportedOperationException.class, actualSigTerm::getSubsetSize);
97-
expectThrows(UnsupportedOperationException.class, actualSigTerm::getSupersetSize);
148+
assertEquals(expectedSigTerm.getSubsetSize(), actualSigTerm.getSubsetSize());
149+
assertEquals(expectedSigTerm.getSupersetSize(), actualSigTerm.getSupersetSize());
98150
}
99151

100152
private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
101153
Function<SignificantTerms.Bucket, Long> fn) {
102154
return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
103155
}
156+
157+
private static SignificanceHeuristic randomSignificanceHeuristic() {
158+
return randomFrom(
159+
new JLHScore(),
160+
new MutualInformation(randomBoolean(), randomBoolean()),
161+
new GND(randomBoolean()),
162+
new ChiSquare(randomBoolean(), randomBoolean()));
163+
}
104164
}

core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@
2323
import org.elasticsearch.search.DocValueFormat;
2424
import org.elasticsearch.search.aggregations.InternalAggregations;
2525
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
26-
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
27-
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
28-
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
29-
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
3026
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
3127
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
3228

@@ -38,44 +34,32 @@
3834

3935
public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {
4036

41-
private SignificanceHeuristic significanceHeuristic;
4237
private DocValueFormat format;
4338

4439
@Override
4540
public void setUp() throws Exception {
4641
super.setUp();
47-
significanceHeuristic = randomSignificanceHeuristic();
4842
format = randomNumericDocValueFormat();
4943
}
5044

5145
@Override
5246
protected InternalSignificantTerms createTestInstance(String name,
5347
List<PipelineAggregator> pipelineAggregators,
5448
Map<String, Object> metaData,
55-
InternalAggregations aggregations) {
56-
int requiredSize = randomIntBetween(1, 5);
57-
int shardSize = requiredSize + 2;
58-
final int numBuckets = randomInt(shardSize);
59-
60-
long globalSubsetSize = 0;
61-
long globalSupersetSize = 0;
49+
InternalAggregations aggs,
50+
int requiredSize, int numBuckets,
51+
long subsetSize, int[] subsetDfs,
52+
long supersetSize, int[] supersetDfs,
53+
SignificanceHeuristic significanceHeuristic) {
6254

6355
List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
6456
Set<Long> terms = new HashSet<>();
6557
for (int i = 0; i < numBuckets; ++i) {
6658
long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
67-
68-
int subsetDf = randomIntBetween(1, 10);
69-
int supersetDf = randomIntBetween(subsetDf, 20);
70-
int supersetSize = randomIntBetween(supersetDf, 30);
71-
72-
globalSubsetSize += subsetDf;
73-
globalSupersetSize += supersetSize;
74-
75-
buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, aggregations, format));
59+
buckets.add(new SignificantLongTerms.Bucket(subsetDfs[i], subsetSize, supersetDfs[i], supersetSize, term, aggs, format));
7660
}
77-
return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
78-
globalSupersetSize, significanceHeuristic, buckets);
61+
return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, subsetSize,
62+
supersetSize, significanceHeuristic, buckets);
7963
}
8064

8165
@Override
@@ -87,12 +71,4 @@ protected InternalSignificantTerms createTestInstance(String name,
8771
protected Class<? extends ParsedMultiBucketAggregation> implementationClass() {
8872
return ParsedSignificantLongTerms.class;
8973
}
90-
91-
private static SignificanceHeuristic randomSignificanceHeuristic() {
92-
return randomFrom(
93-
new JLHScore(),
94-
new MutualInformation(randomBoolean(), randomBoolean()),
95-
new GND(randomBoolean()),
96-
new ChiSquare(randomBoolean(), randomBoolean()));
97-
}
9874
}

0 commit comments

Comments
 (0)