Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.index.mapper;

import com.fasterxml.jackson.core.JsonParseException;

import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,12 @@ public SignificantTermsAggregatorFactory(String name,
Map<String, Object> metaData) throws IOException {
super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData);

if (!config.unmapped()) {
if (config.unmapped() == false) {
if (config.fieldContext().fieldType().isSearchable() == false) {
throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but ["
+ config.fieldContext().fieldType().name() + "] is not");
}

this.fieldType = config.fieldContext().fieldType();
this.indexedFieldName = fieldType.name();
}
Expand Down Expand Up @@ -129,6 +134,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
}

private long getBackgroundFrequency(String value) throws IOException {
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
// when constructing buckets. Assert to ensure this is the case
// TODO this is a bad setup and it should be refactored
assert fieldType != null;
Query query = fieldType.termQuery(value, queryShardContext);
if (query instanceof TermQuery) {
// for types that use the inverted index, we prefer using a caching terms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ public SignificantTextAggregatorFactory(String name,
// Note that if the field is unmapped (its field type is null), we don't fail,
// and just use the given field name as a placeholder.
this.fieldType = queryShardContext.fieldMapper(fieldName);
if (fieldType != null && fieldType.indexAnalyzer() == null) {
throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " +
"requires an analyzed field");
}
this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName;
this.sourceFieldNames = sourceFieldNames == null
? new String[] { indexedFieldName }
Expand Down Expand Up @@ -124,6 +128,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
}

private long getBackgroundFrequency(String value) throws IOException {
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
// when constructing buckets. Assert to ensure this is the case
// TODO this is a bad setup and it should be refactored
assert fieldType != null;
Query query = fieldType.termQuery(value, queryShardContext);
if (query instanceof TermQuery) {
// for types that use the inverted index, we prefer using a caching terms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.BinaryFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
Expand All @@ -46,10 +47,14 @@
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.junit.Before;

import java.io.IOException;
Expand All @@ -75,6 +80,27 @@ public void setUpTest() throws Exception {
fieldType.setName("field");
}

@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new SignificantTermsAggregationBuilder("foo", ValueType.STRING).field(fieldName);
}

@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
return List.of(CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BYTES);
}

@Override
protected List<String> unsupportedMappedFieldTypes() {
return List.of(
NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all
NumberFieldMapper.NumberType.FLOAT.typeName(),
NumberFieldMapper.NumberType.HALF_FLOAT.typeName(),
BinaryFieldMapper.CONTENT_TYPE // binary fields are not supported because they cannot be searched
);
}

/**
* For each provided field type, we also register an alias with name {@code <field>-alias}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,17 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.BinaryFieldMapper;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.sampler.InternalSampler;
import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -63,6 +68,28 @@ protected Map<String, MappedFieldType> getFieldAliases(MappedFieldType... fieldT
Function.identity()));
}

@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new SignificantTextAggregationBuilder("foo", fieldName);
}

@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
// TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields
return List.of(CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BYTES,
CoreValuesSourceType.RANGE,
CoreValuesSourceType.GEOPOINT);
}

@Override
protected List<String> unsupportedMappedFieldTypes() {
return List.of(
BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers
GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries
);
}

/**
* Uses the significant text aggregation to find the keywords in text fields
*/
Expand Down
Loading