Skip to content

Commit d5fc520

Browse files
authored
Add parsing to Significant Terms aggregations (#24682)
Related to #23331
1 parent ef7c2e6 commit d5fc520

File tree

10 files changed

+432
-27
lines changed

10 files changed

+432
-27
lines changed

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
public abstract class InternalSignificantTerms<A extends InternalSignificantTerms<A, B>, B extends InternalSignificantTerms.Bucket<B>>
4545
extends InternalMultiBucketAggregation<A, B> implements SignificantTerms, ToXContent {
4646

47-
private static final String SCORE = "score";
48-
private static final String BG_COUNT = "bg_count";
47+
public static final String SCORE = "score";
48+
public static final String BG_COUNT = "bg_count";
4949

5050
@SuppressWarnings("PMD.ConstructorCallsOverridableMethod")
5151
public abstract static class Bucket<B extends Bucket<B>> extends InternalMultiBucketAggregation.InternalBucket
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket.significant;
21+
22+
import org.elasticsearch.common.xcontent.ObjectParser;
23+
import org.elasticsearch.common.xcontent.XContentBuilder;
24+
import org.elasticsearch.common.xcontent.XContentParser;
25+
26+
import java.io.IOException;
27+
28+
public class ParsedSignificantLongTerms extends ParsedSignificantTerms {
29+
30+
@Override
31+
public String getType() {
32+
return SignificantLongTerms.NAME;
33+
}
34+
35+
private static ObjectParser<ParsedSignificantLongTerms, Void> PARSER =
36+
new ObjectParser<>(ParsedSignificantLongTerms.class.getSimpleName(), true, ParsedSignificantLongTerms::new);
37+
static {
38+
declareParsedSignificantTermsFields(PARSER, ParsedBucket::fromXContent);
39+
}
40+
41+
public static ParsedSignificantLongTerms fromXContent(XContentParser parser, String name) throws IOException {
42+
ParsedSignificantLongTerms aggregation = PARSER.parse(parser, null);
43+
aggregation.setName(name);
44+
return aggregation;
45+
}
46+
47+
public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {
48+
49+
private Long key;
50+
51+
@Override
52+
public Object getKey() {
53+
return key;
54+
}
55+
56+
@Override
57+
public String getKeyAsString() {
58+
String keyAsString = super.getKeyAsString();
59+
if (keyAsString != null) {
60+
return keyAsString;
61+
}
62+
return Long.toString(key);
63+
}
64+
65+
public Number getKeyAsNumber() {
66+
return key;
67+
}
68+
69+
@Override
70+
public int compareTerm(SignificantTerms.Bucket other) {
71+
return key.compareTo(((ParsedBucket) other).key);
72+
}
73+
74+
@Override
75+
protected XContentBuilder keyToXContent(XContentBuilder builder) throws IOException {
76+
builder.field(CommonFields.KEY.getPreferredName(), key);
77+
if (super.getKeyAsString() != null) {
78+
builder.field(CommonFields.KEY_AS_STRING.getPreferredName(), getKeyAsString());
79+
}
80+
return builder;
81+
}
82+
83+
static ParsedBucket fromXContent(XContentParser parser) throws IOException {
84+
return parseSignificantTermsBucketXContent(parser, new ParsedBucket(), (p, bucket) -> bucket.key = p.longValue());
85+
}
86+
}
87+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket.significant;
21+
22+
import org.apache.lucene.util.BytesRef;
23+
import org.elasticsearch.common.xcontent.ObjectParser;
24+
import org.elasticsearch.common.xcontent.XContentBuilder;
25+
import org.elasticsearch.common.xcontent.XContentParser;
26+
27+
import java.io.IOException;
28+
29+
public class ParsedSignificantStringTerms extends ParsedSignificantTerms {
30+
31+
@Override
32+
public String getType() {
33+
return SignificantStringTerms.NAME;
34+
}
35+
36+
private static ObjectParser<ParsedSignificantStringTerms, Void> PARSER =
37+
new ObjectParser<>(ParsedSignificantStringTerms.class.getSimpleName(), true, ParsedSignificantStringTerms::new);
38+
static {
39+
declareParsedSignificantTermsFields(PARSER, ParsedBucket::fromXContent);
40+
}
41+
42+
public static ParsedSignificantStringTerms fromXContent(XContentParser parser, String name) throws IOException {
43+
ParsedSignificantStringTerms aggregation = PARSER.parse(parser, null);
44+
aggregation.setName(name);
45+
return aggregation;
46+
}
47+
48+
public static class ParsedBucket extends ParsedSignificantTerms.ParsedBucket {
49+
50+
private BytesRef key;
51+
52+
@Override
53+
public Object getKey() {
54+
return getKeyAsString();
55+
}
56+
57+
@Override
58+
public String getKeyAsString() {
59+
String keyAsString = super.getKeyAsString();
60+
if (keyAsString != null) {
61+
return keyAsString;
62+
}
63+
return key.utf8ToString();
64+
}
65+
66+
public Number getKeyAsNumber() {
67+
return Double.parseDouble(key.utf8ToString());
68+
}
69+
70+
@Override
71+
public int compareTerm(SignificantTerms.Bucket other) {
72+
return key.compareTo(((ParsedBucket) other).key);
73+
}
74+
75+
@Override
76+
protected XContentBuilder keyToXContent(XContentBuilder builder) throws IOException {
77+
return builder.field(CommonFields.KEY.getPreferredName(), getKey());
78+
}
79+
80+
static ParsedBucket fromXContent(XContentParser parser) throws IOException {
81+
return parseSignificantTermsBucketXContent(parser, new ParsedBucket(), (p, bucket) -> bucket.key = p.utf8BytesOrNull());
82+
}
83+
}
84+
}
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket.significant;
21+
22+
import org.elasticsearch.common.CheckedBiConsumer;
23+
import org.elasticsearch.common.CheckedFunction;
24+
import org.elasticsearch.common.xcontent.ObjectParser;
25+
import org.elasticsearch.common.xcontent.XContentBuilder;
26+
import org.elasticsearch.common.xcontent.XContentParser;
27+
import org.elasticsearch.common.xcontent.XContentParserUtils;
28+
import org.elasticsearch.search.aggregations.Aggregation;
29+
import org.elasticsearch.search.aggregations.Aggregations;
30+
import org.elasticsearch.search.aggregations.ParsedMultiBucketAggregation;
31+
32+
import java.io.IOException;
33+
import java.util.ArrayList;
34+
import java.util.Iterator;
35+
import java.util.List;
36+
import java.util.Map;
37+
import java.util.function.Function;
38+
import java.util.stream.Collectors;
39+
40+
public abstract class ParsedSignificantTerms extends ParsedMultiBucketAggregation<ParsedSignificantTerms.ParsedBucket>
41+
implements SignificantTerms {
42+
43+
private Map<String, ParsedBucket> bucketMap;
44+
protected long subsetSize;
45+
46+
protected long getSubsetSize() {
47+
return subsetSize;
48+
}
49+
50+
@Override
51+
public List<? extends SignificantTerms.Bucket> getBuckets() {
52+
return buckets;
53+
}
54+
55+
@Override
56+
public SignificantTerms.Bucket getBucketByKey(String term) {
57+
if (bucketMap == null) {
58+
bucketMap = buckets.stream().collect(Collectors.toMap(SignificantTerms.Bucket::getKeyAsString, Function.identity()));
59+
}
60+
return bucketMap.get(term);
61+
}
62+
63+
@Override
64+
public Iterator<SignificantTerms.Bucket> iterator() {
65+
return buckets.stream().map(bucket -> (SignificantTerms.Bucket) bucket).collect(Collectors.toList()).iterator();
66+
}
67+
68+
@Override
69+
protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
70+
builder.field(CommonFields.DOC_COUNT.getPreferredName(), subsetSize);
71+
builder.startArray(CommonFields.BUCKETS.getPreferredName());
72+
for (SignificantTerms.Bucket bucket : buckets) {
73+
bucket.toXContent(builder, params);
74+
}
75+
builder.endArray();
76+
return builder;
77+
}
78+
79+
static void declareParsedSignificantTermsFields(final ObjectParser<? extends ParsedSignificantTerms, Void> objectParser,
80+
final CheckedFunction<XContentParser, ParsedSignificantTerms.ParsedBucket, IOException> bucketParser) {
81+
declareMultiBucketAggregationFields(objectParser, bucketParser::apply, bucketParser::apply);
82+
objectParser.declareLong((parsedTerms, value) -> parsedTerms.subsetSize = value , CommonFields.DOC_COUNT);
83+
}
84+
85+
public abstract static class ParsedBucket extends ParsedMultiBucketAggregation.ParsedBucket implements SignificantTerms.Bucket {
86+
87+
protected long subsetDf;
88+
protected long supersetDf;
89+
protected double score;
90+
91+
@Override
92+
public long getDocCount() {
93+
return getSubsetDf();
94+
}
95+
96+
@Override
97+
public long getSubsetDf() {
98+
return subsetDf;
99+
}
100+
101+
@Override
102+
public long getSupersetDf() {
103+
return supersetDf;
104+
}
105+
106+
@Override
107+
public double getSignificanceScore() {
108+
return score;
109+
}
110+
111+
@Override
112+
public long getSupersetSize() {
113+
throw new UnsupportedOperationException();
114+
}
115+
116+
@Override
117+
public long getSubsetSize() {
118+
throw new UnsupportedOperationException();
119+
}
120+
121+
@Override
122+
public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
123+
builder.startObject();
124+
keyToXContent(builder);
125+
builder.field(CommonFields.DOC_COUNT.getPreferredName(), getDocCount());
126+
builder.field(InternalSignificantTerms.SCORE, getSignificanceScore());
127+
builder.field(InternalSignificantTerms.BG_COUNT, getSupersetDf());
128+
getAggregations().toXContentInternal(builder, params);
129+
builder.endObject();
130+
return builder;
131+
}
132+
133+
protected abstract XContentBuilder keyToXContent(XContentBuilder builder) throws IOException;
134+
135+
static <B extends ParsedBucket> B parseSignificantTermsBucketXContent(final XContentParser parser, final B bucket,
136+
final CheckedBiConsumer<XContentParser, B, IOException> keyConsumer) throws IOException {
137+
138+
final List<Aggregation> aggregations = new ArrayList<>();
139+
XContentParser.Token token;
140+
String currentFieldName = parser.currentName();
141+
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
142+
if (token == XContentParser.Token.FIELD_NAME) {
143+
currentFieldName = parser.currentName();
144+
} else if (token.isValue()) {
145+
if (CommonFields.KEY_AS_STRING.getPreferredName().equals(currentFieldName)) {
146+
bucket.setKeyAsString(parser.text());
147+
} else if (CommonFields.KEY.getPreferredName().equals(currentFieldName)) {
148+
keyConsumer.accept(parser, bucket);
149+
} else if (CommonFields.DOC_COUNT.getPreferredName().equals(currentFieldName)) {
150+
long value = parser.longValue();
151+
bucket.subsetDf = value;
152+
bucket.setDocCount(value);
153+
} else if (InternalSignificantTerms.SCORE.equals(currentFieldName)) {
154+
bucket.score = parser.longValue();
155+
} else if (InternalSignificantTerms.BG_COUNT.equals(currentFieldName)) {
156+
bucket.supersetDf = parser.longValue();
157+
}
158+
} else if (token == XContentParser.Token.START_OBJECT) {
159+
aggregations.add(XContentParserUtils.parseTypedKeysObject(parser, Aggregation.TYPED_KEYS_DELIMITER, Aggregation.class));
160+
}
161+
}
162+
bucket.setAggregations(new Aggregations(aggregations));
163+
return bucket;
164+
}
165+
}
166+
}

core/src/test/java/org/elasticsearch/search/aggregations/AggregationsTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
import org.elasticsearch.search.aggregations.bucket.range.date.InternalDateRangeTests;
4141
import org.elasticsearch.search.aggregations.bucket.range.geodistance.InternalGeoDistanceTests;
4242
import org.elasticsearch.search.aggregations.bucket.sampler.InternalSamplerTests;
43+
import org.elasticsearch.search.aggregations.bucket.significant.SignificantLongTermsTests;
44+
import org.elasticsearch.search.aggregations.bucket.significant.SignificantStringTermsTests;
4345
import org.elasticsearch.search.aggregations.bucket.terms.DoubleTermsTests;
4446
import org.elasticsearch.search.aggregations.bucket.terms.LongTermsTests;
4547
import org.elasticsearch.search.aggregations.bucket.terms.StringTermsTests;
@@ -126,6 +128,8 @@ private static List<InternalAggregationTestCase> getAggsTests() {
126128
aggsTests.add(new InternalGeoDistanceTests());
127129
aggsTests.add(new InternalFiltersTests());
128130
aggsTests.add(new InternalAdjacencyMatrixTests());
131+
aggsTests.add(new SignificantLongTermsTests());
132+
aggsTests.add(new SignificantStringTermsTests());
129133
return Collections.unmodifiableList(aggsTests);
130134
}
131135

0 commit comments

Comments
 (0)