Skip to content

Commit 79ac69c

Browse files
authored
[7.x Backport] Prevent SigTerms/SigText from running on fields they do not support (#57485)
SigTerms cannot run on fields that are not searchable, and SigText cannot run on fields that do not have analyzers. Both of these situations fail today with an esoteric exception, so this just formalizes the constraint by throwing an IllegalArgumentException up front. In practice, the only affected field seems to be the `binary` field, which is neither searchable or has a default analyzer (e.g. even numeric and keyword fields have a default analyzer despite not being tokenized) Adds supported-type tests, and makes some changes to the test itself to allow testing sigtext (indexing _source). Also a few tweaks to the test to avoid bad randomization (negative numbers, etc).
1 parent f360020 commit 79ac69c

File tree

4 files changed

+56
-8
lines changed

4 files changed

+56
-8
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,12 @@ public boolean needsToCollectFromSingleBucket() {
205205
}
206206
}
207207

208-
if (!config.unmapped()) {
208+
if (config.unmapped() == false) {
209+
if (config.fieldContext().fieldType().isSearchable() == false) {
210+
throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but ["
211+
+ config.fieldContext().fieldType().name() + "] is not");
212+
}
213+
209214
this.fieldType = config.fieldContext().fieldType();
210215
this.indexedFieldName = fieldType.name();
211216
}
@@ -248,6 +253,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
248253
}
249254

250255
private long getBackgroundFrequency(String value) throws IOException {
256+
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
257+
// when constructing buckets. Assert to ensure this is the case
258+
// TODO this is a bad setup and it should be refactored
259+
assert fieldType != null;
251260
Query query = fieldType.termQuery(value, queryShardContext);
252261
if (query instanceof TermQuery) {
253262
// for types that use the inverted index, we prefer using a caching terms

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ public SignificantTextAggregatorFactory(String name,
7979
// Note that if the field is unmapped (its field type is null), we don't fail,
8080
// and just use the given field name as a placeholder.
8181
this.fieldType = queryShardContext.fieldMapper(fieldName);
82+
if (fieldType != null && fieldType.indexAnalyzer() == null) {
83+
throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " +
84+
"requires an analyzed field");
85+
}
8286
this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName;
8387
this.sourceFieldNames = sourceFieldNames == null
8488
? new String[] { indexedFieldName }
@@ -120,6 +124,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
120124
}
121125

122126
private long getBackgroundFrequency(String value) throws IOException {
127+
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
128+
// when constructing buckets. Assert to ensure this is the case
129+
// TODO this is a bad setup and it should be refactored
130+
assert fieldType != null;
123131
Query query = fieldType.termQuery(value, queryShardContext);
124132
if (query instanceof TermQuery) {
125133
// for types that use the inverted index, we prefer using a caching terms

server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.apache.lucene.util.BytesRef;
4040
import org.elasticsearch.index.analysis.AnalyzerScope;
4141
import org.elasticsearch.index.analysis.NamedAnalyzer;
42+
import org.elasticsearch.index.mapper.BinaryFieldMapper;
4243
import org.elasticsearch.index.mapper.KeywordFieldMapper;
4344
import org.elasticsearch.index.mapper.MappedFieldType;
4445
import org.elasticsearch.index.mapper.NumberFieldMapper;
@@ -52,6 +53,8 @@
5253
import org.elasticsearch.search.aggregations.AggregationBuilder;
5354
import org.elasticsearch.search.aggregations.AggregatorTestCase;
5455
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTermsAggregatorFactory.ExecutionMode;
56+
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
57+
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
5558
import org.junit.Before;
5659

5760
import java.io.IOException;
@@ -83,25 +86,25 @@ protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldTy
8386
return new SignificantTermsAggregationBuilder("foo").field(fieldName);
8487
}
8588

86-
/* NOTE - commented out instead of deleted, we need to backport https://github.com/elastic/elasticsearch/pull/52851 to support this.
8789
@Override
8890
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
8991
return Arrays.asList(CoreValuesSourceType.NUMERIC,
90-
CoreValuesSourceType.BYTES);
92+
CoreValuesSourceType.BYTES,
93+
CoreValuesSourceType.BOOLEAN,
94+
CoreValuesSourceType.DATE,
95+
CoreValuesSourceType.IP);
9196
}
9297

9398
@Override
9499
protected List<String> unsupportedMappedFieldTypes() {
95-
return List.of(
100+
return Arrays.asList(
96101
NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all
97102
NumberFieldMapper.NumberType.FLOAT.typeName(),
98103
NumberFieldMapper.NumberType.HALF_FLOAT.typeName(),
99104
BinaryFieldMapper.CONTENT_TYPE // binary fields are not supported because they cannot be searched
100105
);
101106
}
102107

103-
*/
104-
105108
/**
106109
* For each provided field type, we also register an alias with name {@code <field>-alias}.
107110
*/

server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,17 @@
3434
import org.apache.lucene.util.BytesRef;
3535
import org.elasticsearch.index.analysis.AnalyzerScope;
3636
import org.elasticsearch.index.analysis.NamedAnalyzer;
37+
import org.elasticsearch.index.mapper.BinaryFieldMapper;
38+
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
3739
import org.elasticsearch.index.mapper.MappedFieldType;
3840
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
41+
import org.elasticsearch.search.aggregations.AggregationBuilder;
3942
import org.elasticsearch.search.aggregations.AggregatorTestCase;
4043
import org.elasticsearch.search.aggregations.bucket.sampler.InternalSampler;
4144
import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder;
42-
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTerms;
43-
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTextAggregationBuilder;
4445
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
46+
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
47+
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
4548

4649
import java.io.IOException;
4750
import java.util.Arrays;
@@ -65,6 +68,31 @@ protected Map<String, MappedFieldType> getFieldAliases(MappedFieldType... fieldT
6568
Function.identity()));
6669
}
6770

71+
@Override
72+
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
73+
return new SignificantTextAggregationBuilder("foo", fieldName);
74+
}
75+
76+
@Override
77+
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
78+
// TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields
79+
return Arrays.asList(CoreValuesSourceType.NUMERIC,
80+
CoreValuesSourceType.BYTES,
81+
CoreValuesSourceType.RANGE,
82+
CoreValuesSourceType.GEOPOINT,
83+
CoreValuesSourceType.BOOLEAN,
84+
CoreValuesSourceType.DATE,
85+
CoreValuesSourceType.IP);
86+
}
87+
88+
@Override
89+
protected List<String> unsupportedMappedFieldTypes() {
90+
return Arrays.asList(
91+
BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers
92+
GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries
93+
);
94+
}
95+
6896
/**
6997
* Uses the significant text aggregation to find the keywords in text fields
7098
*/

0 commit comments

Comments
 (0)