elastic · polyfractal · Mar 23, 2020 · Feb 25, 2020 · Mar 2, 2020 · Mar 2, 2020
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java
@@ -20,7 +20,6 @@
 package org.elasticsearch.index.mapper;
 
 import com.fasterxml.jackson.core.JsonParseException;
-
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatPoint;

diff --git a/...asticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/...asticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java
@@ -88,7 +88,12 @@ public SignificantTermsAggregatorFactory(String name,
                                              Map<String, Object> metaData) throws IOException {
         super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData);
 
-        if (!config.unmapped()) {
+        if (config.unmapped() == false) {
+            if (config.fieldContext().fieldType().isSearchable() == false) {
+                throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but ["
+                    + config.fieldContext().fieldType().name() + "] is not");
+            }
+
             this.fieldType = config.fieldContext().fieldType();
             this.indexedFieldName = fieldType.name();
         }
@@ -129,6 +134,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
     }
 
     private long getBackgroundFrequency(String value) throws IOException {
+        // fieldType can be null if the field is unmapped, but theoretically this method should only be called
+        // when constructing buckets.  Assert to ensure this is the case
+        // TODO this is a bad setup and it should be refactored
+        assert fieldType != null;
         Query query = fieldType.termQuery(value, queryShardContext);
         if (query instanceof TermQuery) {
             // for types that use the inverted index, we prefer using a caching terms

diff --git a/...lasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorFactory.java b/...lasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorFactory.java
@@ -83,6 +83,10 @@ public SignificantTextAggregatorFactory(String name,
         // Note that if the field is unmapped (its field type is null), we don't fail,
         // and just use the given field name as a placeholder.
         this.fieldType = queryShardContext.fieldMapper(fieldName);
+        if (fieldType != null && fieldType.indexAnalyzer() == null) {
+            throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " +
+                "requires an analyzed field");
+        }
         this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName;
         this.sourceFieldNames = sourceFieldNames == null
             ? new String[] { indexedFieldName }
@@ -124,6 +128,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
     }
 
     private long getBackgroundFrequency(String value) throws IOException {
+        // fieldType can be null if the field is unmapped, but theoretically this method should only be called
+        // when constructing buckets.  Assert to ensure this is the case
+        // TODO this is a bad setup and it should be refactored
+        assert fieldType != null;
         Query query = fieldType.termQuery(value, queryShardContext);
         if (query instanceof TermQuery) {
             // for types that use the inverted index, we prefer using a caching terms

diff --git a/...elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java b/...elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java
@@ -36,6 +36,7 @@
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.mapper.BinaryFieldMapper;
 import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
@@ -46,10 +47,14 @@
 import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregationExecutionException;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
 import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
 import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
+import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
+import org.elasticsearch.search.aggregations.support.ValueType;
+import org.elasticsearch.search.aggregations.support.ValuesSourceType;
 import org.junit.Before;
 
 import java.io.IOException;
@@ -75,6 +80,27 @@ public void setUpTest() throws Exception {
         fieldType.setName("field");
     }
 
+    @Override
+    protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
+        return new SignificantTermsAggregationBuilder("foo", ValueType.STRING).field(fieldName);
+    }
+
+    @Override
+    protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
+        return List.of(CoreValuesSourceType.NUMERIC,
+            CoreValuesSourceType.BYTES);
+    }
+
+    @Override
+    protected List<String> unsupportedMappedFieldTypes() {
+        return List.of(
+            NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all
+            NumberFieldMapper.NumberType.FLOAT.typeName(),
+            NumberFieldMapper.NumberType.HALF_FLOAT.typeName(),
+            BinaryFieldMapper.CONTENT_TYPE // binary fields are not supported because they cannot be searched
+        );
+    }
+
     /**
      * For each provided field type, we also register an alias with name {@code <field>-alias}.
      */

diff --git a/.../elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java b/.../elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java
@@ -34,12 +34,17 @@
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.mapper.BinaryFieldMapper;
+import org.elasticsearch.index.mapper.GeoPointFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.AggregatorTestCase;
 import org.elasticsearch.search.aggregations.bucket.sampler.InternalSampler;
 import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder;
 import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
+import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
+import org.elasticsearch.search.aggregations.support.ValuesSourceType;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -63,6 +68,28 @@ protected Map<String, MappedFieldType> getFieldAliases(MappedFieldType... fieldT
             Function.identity()));
     }
 
+    @Override
+    protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
+        return new SignificantTextAggregationBuilder("foo", fieldName);
+    }
+
+    @Override
+    protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
+        // TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields
+        return List.of(CoreValuesSourceType.NUMERIC,
+            CoreValuesSourceType.BYTES,
+            CoreValuesSourceType.RANGE,
+            CoreValuesSourceType.GEOPOINT);
+    }
+
+    @Override
+    protected List<String> unsupportedMappedFieldTypes() {
+        return List.of(
+            BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers
+            GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries
+        );
+    }
+
     /**
      * Uses the significant text aggregation to find the keywords in text fields
      */