Skip to content

Commit d3833d7

Browse files
committed
Tests: Add unit test for SignificantLongTerms and SignificantStringTerms
Relates to #22278
1 parent a54daad commit d3833d7

File tree

7 files changed

+325
-0
lines changed

7 files changed

+325
-0
lines changed

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.io.IOException;
2929
import java.util.List;
3030
import java.util.Map;
31+
import java.util.Objects;
3132
import java.util.function.Function;
3233
import java.util.stream.Collectors;
3334

@@ -99,4 +100,21 @@ protected long getSupersetSize() {
99100
protected SignificanceHeuristic getSignificanceHeuristic() {
100101
return significanceHeuristic;
101102
}
103+
104+
@Override
105+
protected boolean doEquals(Object obj) {
106+
InternalMappedSignificantTerms<?, ?> that = (InternalMappedSignificantTerms<?, ?>) obj;
107+
return super.doEquals(obj)
108+
&& Objects.equals(format, that.format)
109+
&& subsetSize == that.subsetSize
110+
&& supersetSize == that.supersetSize
111+
&& Objects.equals(significanceHeuristic, that.significanceHeuristic)
112+
&& Objects.equals(buckets, that.buckets)
113+
&& Objects.equals(bucketMap, that.bucketMap);
114+
}
115+
116+
@Override
117+
protected int doHashCode() {
118+
return Objects.hash(super.doHashCode(), format, subsetSize, supersetSize, significanceHeuristic, buckets, bucketMap);
119+
}
102120
}

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.util.Iterator;
3737
import java.util.List;
3838
import java.util.Map;
39+
import java.util.Objects;
3940

4041
import static java.util.Collections.unmodifiableList;
4142

@@ -127,6 +128,27 @@ public B reduce(List<B> buckets, ReduceContext context) {
127128
public double getSignificanceScore() {
128129
return score;
129130
}
131+
132+
@Override
133+
public boolean equals(Object o) {
134+
if (this == o) {
135+
return true;
136+
}
137+
if (o == null || getClass() != o.getClass()) {
138+
return false;
139+
}
140+
141+
Bucket<?> that = (Bucket<?>) o;
142+
return bucketOrd == that.bucketOrd &&
143+
Double.compare(that.score, score) == 0 &&
144+
Objects.equals(aggregations, that.aggregations) &&
145+
Objects.equals(format, that.format);
146+
}
147+
148+
@Override
149+
public int hashCode() {
150+
return Objects.hash(getClass(), bucketOrd, aggregations, score, format);
151+
}
130152
}
131153

132154
protected final int requiredSize;
@@ -226,4 +248,16 @@ public InternalAggregation doReduce(List<InternalAggregation> aggregations, Redu
226248
protected abstract long getSupersetSize();
227249

228250
protected abstract SignificanceHeuristic getSignificanceHeuristic();
251+
252+
@Override
253+
protected int doHashCode() {
254+
return Objects.hash(minDocCount, requiredSize);
255+
}
256+
257+
@Override
258+
protected boolean doEquals(Object obj) {
259+
InternalSignificantTerms<?, ?> that = (InternalSignificantTerms<?, ?>) obj;
260+
return Objects.equals(minDocCount, that.minDocCount)
261+
&& Objects.equals(requiredSize, that.requiredSize);
262+
}
229263
}

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.io.IOException;
3030
import java.util.List;
3131
import java.util.Map;
32+
import java.util.Objects;
3233

3334
/**
3435
* Result of the running the significant terms aggregation on a numeric field.
@@ -109,6 +110,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
109110
builder.endObject();
110111
return builder;
111112
}
113+
114+
@Override
115+
public boolean equals(Object obj) {
116+
return super.equals(obj) && Objects.equals(term, ((Bucket) obj).term);
117+
}
118+
119+
@Override
120+
public int hashCode() {
121+
return Objects.hash(super.hashCode(), term);
122+
}
112123
}
113124

114125
public SignificantLongTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,

core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import java.io.IOException;
3131
import java.util.List;
3232
import java.util.Map;
33+
import java.util.Objects;
3334

3435
/**
3536
* Result of the running the significant terms aggregation on a String field.
@@ -111,6 +112,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
111112
builder.endObject();
112113
return builder;
113114
}
115+
116+
@Override
117+
public boolean equals(Object obj) {
118+
return super.equals(obj) && Objects.equals(termBytes, ((SignificantStringTerms.Bucket) obj).termBytes);
119+
}
120+
121+
@Override
122+
public int hashCode() {
123+
return Objects.hash(super.hashCode(), termBytes);
124+
}
114125
}
115126

116127
public SignificantStringTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket.significant;
21+
22+
import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
23+
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
24+
25+
import java.util.Arrays;
26+
import java.util.HashMap;
27+
import java.util.List;
28+
import java.util.Map;
29+
import java.util.function.Function;
30+
import java.util.stream.Collectors;
31+
import java.util.stream.Stream;
32+
33+
public abstract class InternalSignificantTermsTestCase extends InternalAggregationTestCase<InternalSignificantTerms<?, ?>> {
34+
35+
@Override
36+
protected InternalSignificantTerms createUnmappedInstance(String name,
37+
List<PipelineAggregator> pipelineAggregators,
38+
Map<String, Object> metaData) {
39+
InternalSignificantTerms<?, ?> testInstance = createTestInstance(name, pipelineAggregators, metaData);
40+
return new UnmappedSignificantTerms(name, testInstance.requiredSize, testInstance.minDocCount, pipelineAggregators, metaData);
41+
}
42+
43+
@Override
44+
protected void assertReduced(InternalSignificantTerms<?, ?> reduced, List<InternalSignificantTerms<?, ?>> inputs) {
45+
assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSubsetSize).sum(), reduced.getSubsetSize());
46+
assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSupersetSize).sum(), reduced.getSupersetSize());
47+
48+
List<Function<SignificantTerms.Bucket, Long>> counts = Arrays.asList(
49+
SignificantTerms.Bucket::getSubsetDf,
50+
SignificantTerms.Bucket::getSupersetDf,
51+
SignificantTerms.Bucket::getDocCount
52+
);
53+
54+
for (Function<SignificantTerms.Bucket, Long> count : counts) {
55+
Map<Object, Long> reducedCounts = toCounts(reduced.getBuckets().stream(), count);
56+
Map<Object, Long> totalCounts = toCounts(inputs.stream().map(SignificantTerms::getBuckets).flatMap(List::stream), count);
57+
58+
Map<Object, Long> expectedReducedCounts = new HashMap<>(totalCounts);
59+
expectedReducedCounts.keySet().retainAll(reducedCounts.keySet());
60+
assertEquals(expectedReducedCounts, reducedCounts);
61+
}
62+
}
63+
64+
private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
65+
Function<SignificantTerms.Bucket, Long> fn) {
66+
return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
67+
}
68+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket.significant;
21+
22+
import org.elasticsearch.common.io.stream.Writeable;
23+
import org.elasticsearch.search.DocValueFormat;
24+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
25+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
26+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
27+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
28+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
29+
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
30+
import org.junit.Before;
31+
32+
import java.util.ArrayList;
33+
import java.util.HashSet;
34+
import java.util.List;
35+
import java.util.Map;
36+
import java.util.Set;
37+
38+
import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
39+
40+
public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {
41+
42+
private SignificanceHeuristic significanceHeuristic;
43+
44+
@Before
45+
public void setUpSignificanceHeuristic() {
46+
significanceHeuristic = randomSignificanceHeuristic();
47+
}
48+
49+
@Override
50+
protected InternalSignificantTerms createTestInstance(String name,
51+
List<PipelineAggregator> pipelineAggregators,
52+
Map<String, Object> metaData) {
53+
DocValueFormat format = DocValueFormat.RAW;
54+
int requiredSize = randomIntBetween(1, 5);
55+
int shardSize = requiredSize + 2;
56+
final int numBuckets = randomInt(shardSize);
57+
58+
long globalSubsetSize = 0;
59+
long globalSupersetSize = 0;
60+
61+
List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
62+
Set<Long> terms = new HashSet<>();
63+
for (int i = 0; i < numBuckets; ++i) {
64+
long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
65+
66+
int subsetDf = randomIntBetween(1, 10);
67+
int supersetDf = randomIntBetween(subsetDf, 20);
68+
int supersetSize = randomIntBetween(supersetDf, 30);
69+
70+
globalSubsetSize += subsetDf;
71+
globalSupersetSize += supersetSize;
72+
73+
buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, EMPTY, format));
74+
}
75+
return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
76+
globalSupersetSize, significanceHeuristic, buckets);
77+
}
78+
79+
@Override
80+
protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
81+
return SignificantLongTerms::new;
82+
}
83+
84+
private static SignificanceHeuristic randomSignificanceHeuristic() {
85+
return randomFrom(
86+
new JLHScore(),
87+
new MutualInformation(randomBoolean(), randomBoolean()),
88+
new GND(randomBoolean()),
89+
new ChiSquare(randomBoolean(), randomBoolean()));
90+
}
91+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket.significant;
21+
22+
import org.apache.lucene.util.BytesRef;
23+
import org.elasticsearch.common.io.stream.Writeable;
24+
import org.elasticsearch.search.DocValueFormat;
25+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
26+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
27+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
28+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
29+
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
30+
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
31+
import org.junit.Before;
32+
33+
import java.util.ArrayList;
34+
import java.util.HashSet;
35+
import java.util.List;
36+
import java.util.Map;
37+
import java.util.Set;
38+
39+
import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
40+
41+
public class SignificantStringTermsTests extends InternalSignificantTermsTestCase {
42+
43+
private SignificanceHeuristic significanceHeuristic;
44+
45+
@Before
46+
public void setUpSignificanceHeuristic() {
47+
significanceHeuristic = randomSignificanceHeuristic();
48+
}
49+
50+
@Override
51+
protected InternalSignificantTerms createTestInstance(String name,
52+
List<PipelineAggregator> pipelineAggregators,
53+
Map<String, Object> metaData) {
54+
DocValueFormat format = DocValueFormat.RAW;
55+
int requiredSize = randomIntBetween(1, 5);
56+
int shardSize = requiredSize + 2;
57+
final int numBuckets = randomInt(shardSize);
58+
59+
long globalSubsetSize = 0;
60+
long globalSupersetSize = 0;
61+
62+
List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets);
63+
Set<BytesRef> terms = new HashSet<>();
64+
for (int i = 0; i < numBuckets; ++i) {
65+
BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAsciiOfLength(10)));
66+
67+
int subsetDf = randomIntBetween(1, 10);
68+
int supersetDf = randomIntBetween(subsetDf, 20);
69+
int supersetSize = randomIntBetween(supersetDf, 30);
70+
71+
globalSubsetSize += subsetDf;
72+
globalSupersetSize += supersetSize;
73+
74+
buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, EMPTY, format));
75+
}
76+
return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
77+
globalSupersetSize, significanceHeuristic, buckets);
78+
}
79+
80+
@Override
81+
protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
82+
return SignificantStringTerms::new;
83+
}
84+
85+
private static SignificanceHeuristic randomSignificanceHeuristic() {
86+
return randomFrom(
87+
new JLHScore(),
88+
new MutualInformation(randomBoolean(), randomBoolean()),
89+
new GND(randomBoolean()),
90+
new ChiSquare(randomBoolean(), randomBoolean()));
91+
}
92+
}

0 commit comments

Comments
 (0)