Skip to content

Commit ec4c699

Browse files
authored
Prevent SigTerms/SigText from running on fields they do not support (#52851)
* Prevent SigTerms/SigText from running on fields they do not support SigTerms cannot run on fields that are not searchable, and SigText cannot run on fields that do not have analyzers. Both of these situations fail today with an esoteric exception, so this just formalizes the constraint by throwing an IllegalArgumentException up front. In practice, the only affected field seems to be the `binary` field, which is neither searchable or has a default analyzer (e.g. even numeric and keyword fields have a default analyzer despite not being tokenized) Adds supported-type tests, and makes some changes to the test itself to allow testing sigtext (indexing _source). Also a few tweaks to the test to avoid bad randomization (negative numbers, etc). * Tweak exception text
1 parent da9273a commit ec4c699

File tree

6 files changed

+152
-24
lines changed

6 files changed

+152
-24
lines changed

server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.elasticsearch.index.mapper;
2121

2222
import com.fasterxml.jackson.core.JsonParseException;
23-
2423
import org.apache.lucene.document.DoublePoint;
2524
import org.apache.lucene.document.Field;
2625
import org.apache.lucene.document.FloatPoint;

server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,12 @@ public SignificantTermsAggregatorFactory(String name,
8888
Map<String, Object> metaData) throws IOException {
8989
super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData);
9090

91-
if (!config.unmapped()) {
91+
if (config.unmapped() == false) {
92+
if (config.fieldContext().fieldType().isSearchable() == false) {
93+
throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but ["
94+
+ config.fieldContext().fieldType().name() + "] is not");
95+
}
96+
9297
this.fieldType = config.fieldContext().fieldType();
9398
this.indexedFieldName = fieldType.name();
9499
}
@@ -129,6 +134,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
129134
}
130135

131136
private long getBackgroundFrequency(String value) throws IOException {
137+
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
138+
// when constructing buckets. Assert to ensure this is the case
139+
// TODO this is a bad setup and it should be refactored
140+
assert fieldType != null;
132141
Query query = fieldType.termQuery(value, queryShardContext);
133142
if (query instanceof TermQuery) {
134143
// for types that use the inverted index, we prefer using a caching terms

server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorFactory.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ public SignificantTextAggregatorFactory(String name,
8383
// Note that if the field is unmapped (its field type is null), we don't fail,
8484
// and just use the given field name as a placeholder.
8585
this.fieldType = queryShardContext.fieldMapper(fieldName);
86+
if (fieldType != null && fieldType.indexAnalyzer() == null) {
87+
throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " +
88+
"requires an analyzed field");
89+
}
8690
this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName;
8791
this.sourceFieldNames = sourceFieldNames == null
8892
? new String[] { indexedFieldName }
@@ -124,6 +128,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
124128
}
125129

126130
private long getBackgroundFrequency(String value) throws IOException {
131+
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
132+
// when constructing buckets. Assert to ensure this is the case
133+
// TODO this is a bad setup and it should be refactored
134+
assert fieldType != null;
127135
Query query = fieldType.termQuery(value, queryShardContext);
128136
if (query instanceof TermQuery) {
129137
// for types that use the inverted index, we prefer using a caching terms

server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.lucene.util.BytesRef;
3737
import org.elasticsearch.index.analysis.AnalyzerScope;
3838
import org.elasticsearch.index.analysis.NamedAnalyzer;
39+
import org.elasticsearch.index.mapper.BinaryFieldMapper;
3940
import org.elasticsearch.index.mapper.KeywordFieldMapper;
4041
import org.elasticsearch.index.mapper.MappedFieldType;
4142
import org.elasticsearch.index.mapper.NumberFieldMapper;
@@ -46,10 +47,14 @@
4647
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
4748
import org.elasticsearch.index.query.QueryBuilder;
4849
import org.elasticsearch.index.query.QueryBuilders;
50+
import org.elasticsearch.search.aggregations.AggregationBuilder;
4951
import org.elasticsearch.search.aggregations.AggregationExecutionException;
5052
import org.elasticsearch.search.aggregations.AggregatorTestCase;
5153
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
5254
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
55+
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
56+
import org.elasticsearch.search.aggregations.support.ValueType;
57+
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
5358
import org.junit.Before;
5459

5560
import java.io.IOException;
@@ -75,6 +80,27 @@ public void setUpTest() throws Exception {
7580
fieldType.setName("field");
7681
}
7782

83+
@Override
84+
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
85+
return new SignificantTermsAggregationBuilder("foo", ValueType.STRING).field(fieldName);
86+
}
87+
88+
@Override
89+
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
90+
return List.of(CoreValuesSourceType.NUMERIC,
91+
CoreValuesSourceType.BYTES);
92+
}
93+
94+
@Override
95+
protected List<String> unsupportedMappedFieldTypes() {
96+
return List.of(
97+
NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all
98+
NumberFieldMapper.NumberType.FLOAT.typeName(),
99+
NumberFieldMapper.NumberType.HALF_FLOAT.typeName(),
100+
BinaryFieldMapper.CONTENT_TYPE // binary fields are not supported because they cannot be searched
101+
);
102+
}
103+
78104
/**
79105
* For each provided field type, we also register an alias with name {@code <field>-alias}.
80106
*/

server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,17 @@
3434
import org.apache.lucene.util.BytesRef;
3535
import org.elasticsearch.index.analysis.AnalyzerScope;
3636
import org.elasticsearch.index.analysis.NamedAnalyzer;
37+
import org.elasticsearch.index.mapper.BinaryFieldMapper;
38+
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
3739
import org.elasticsearch.index.mapper.MappedFieldType;
3840
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
41+
import org.elasticsearch.search.aggregations.AggregationBuilder;
3942
import org.elasticsearch.search.aggregations.AggregatorTestCase;
4043
import org.elasticsearch.search.aggregations.bucket.sampler.InternalSampler;
4144
import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder;
4245
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
46+
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
47+
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
4348

4449
import java.io.IOException;
4550
import java.util.Arrays;
@@ -63,6 +68,28 @@ protected Map<String, MappedFieldType> getFieldAliases(MappedFieldType... fieldT
6368
Function.identity()));
6469
}
6570

71+
@Override
72+
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
73+
return new SignificantTextAggregationBuilder("foo", fieldName);
74+
}
75+
76+
@Override
77+
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
78+
// TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields
79+
return List.of(CoreValuesSourceType.NUMERIC,
80+
CoreValuesSourceType.BYTES,
81+
CoreValuesSourceType.RANGE,
82+
CoreValuesSourceType.GEOPOINT);
83+
}
84+
85+
@Override
86+
protected List<String> unsupportedMappedFieldTypes() {
87+
return List.of(
88+
BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers
89+
GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries
90+
);
91+
}
92+
6693
/**
6794
* Uses the significant text aggregation to find the keywords in text fields
6895
*/

0 commit comments

Comments
 (0)