From babbda594870a6cf920cf4778503d1752f3bde3a Mon Sep 17 00:00:00 2001 From: markharwood Date: Tue, 17 May 2016 14:15:53 +0100 Subject: [PATCH 1/2] Aggregations fix: support include/exclude strings formatted for IP and date fields in terms and significant_terms aggregations. Closes #17705 --- .../SignificantTermsAggregatorFactory.java | 8 ++-- .../bucket/terms/TermsAggregatorFactory.java | 8 ++-- .../bucket/terms/support/IncludeExclude.java | 40 +++++++++++------ .../test/search.aggregation/20_terms.yaml | 43 +++++++++++++++++++ .../test/search.aggregation/30_sig_terms.yaml | 19 ++++++++ 5 files changed, 98 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 13126029b8e36..b2af1a004c0d0 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -227,7 +227,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare } IncludeExclude.LongFilter longFilter = null; if (includeExclude != null) { - longFilter = includeExclude.convertToLongFilter(); + longFilter = includeExclude.convertToLongFilter(config.format()); } return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, @@ -248,7 +248,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, Map metaData) throws IOException { - final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(); + final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format); return new SignificantStringTermsAggregator(name, factories, valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); } @@ -262,7 +262,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, Map metaData) throws IOException { - final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsSignificantTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); @@ -277,7 +277,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value AggregationContext aggregationContext, Aggregator parent, SignificanceHeuristic significanceHeuristic, SignificantTermsAggregatorFactory termsAggregatorFactory, List pipelineAggregators, Map metaData) throws IOException { - final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsSignificantTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource, format, bucketCountThresholds, filter, aggregationContext, parent, significanceHeuristic, termsAggregatorFactory, pipelineAggregators, metaData); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 1ccf4a115704c..64a079554eb53 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -171,7 +171,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare pipelineAggregators, metaData); } if (includeExclude != null) { - longFilter = includeExclude.convertToLongFilter(); + longFilter = includeExclude.convertToLongFilter(config.format()); } return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, collectMode, showTermDocCountError, longFilter, pipelineAggregators, @@ -192,7 +192,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List pipelineAggregators, Map metaData) throws IOException { - final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(); + final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format); return new StringTermsAggregator(name, factories, valuesSource, order, format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); } @@ -211,7 +211,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List pipelineAggregators, Map metaData) throws IOException { - final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); @@ -231,7 +231,7 @@ Aggregator create(String name, AggregatorFactories factories, ValuesSource value AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode subAggCollectMode, boolean showTermDocCountError, List pipelineAggregators, Map metaData) throws IOException { - final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(); + final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); return new GlobalOrdinalsStringTermsAggregator.WithHash(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, format, bucketCountThresholds, filter, aggregationContext, parent, subAggCollectMode, showTermDocCountError, pipelineAggregators, metaData); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java index 101291d01e143..209700b86d9a5 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/support/IncludeExclude.java @@ -43,6 +43,7 @@ import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals; @@ -135,7 +136,8 @@ public boolean accept(BytesRef value) { } public static abstract class OrdinalsFilter { - public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException; + public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) + throws IOException; } @@ -152,7 +154,8 @@ private AutomatonBackedOrdinalsFilter(Automaton automaton) { * */ @Override - public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException { + public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) + throws IOException { LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); TermsEnum globalTermsEnum; Terms globalTerms = new DocValuesTerms(globalOrdinals); @@ -179,7 +182,7 @@ public TermListBackedOrdinalsFilter(SortedSet includeValues, SortedSet @Override public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, WithOrdinals valueSource) throws IOException { LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount()); - if(includeValues!=null){ + if (includeValues != null) { for (BytesRef term : includeValues) { long ord = globalOrdinals.lookupTerm(term); if (ord >= 0) { @@ -534,33 +537,46 @@ private Automaton toAutomaton() { return a; } - public StringFilter convertToStringFilter() { + public StringFilter convertToStringFilter(DocValueFormat format) { if (isRegexBased()) { return new AutomatonBackedStringFilter(toAutomaton()); } - return new TermListBackedStringFilter(includeValues, excludeValues); + return new TermListBackedStringFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format)); } - public OrdinalsFilter convertToOrdinalsFilter() { + private static SortedSet parseForDocValues(SortedSet endUserFormattedValues, DocValueFormat format) { + SortedSet result = endUserFormattedValues; + if (endUserFormattedValues != null) { + if (format != DocValueFormat.RAW) { + result = new TreeSet<>(); + for (BytesRef formattedVal : endUserFormattedValues) { + result.add(format.parseBytesRef(formattedVal.utf8ToString())); + } + } + } + return result; + } + + public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) { if (isRegexBased()) { return new AutomatonBackedOrdinalsFilter(toAutomaton()); } - return new TermListBackedOrdinalsFilter(includeValues, excludeValues); + return new TermListBackedOrdinalsFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format)); } - public LongFilter convertToLongFilter() { + public LongFilter convertToLongFilter(DocValueFormat format) { int numValids = includeValues == null ? 0 : includeValues.size(); int numInvalids = excludeValues == null ? 0 : excludeValues.size(); LongFilter result = new LongFilter(numValids, numInvalids); if (includeValues != null) { for (BytesRef val : includeValues) { - result.addAccept(Long.parseLong(val.utf8ToString())); + result.addAccept(format.parseLong(val.utf8ToString(), false, null)); } } if (excludeValues != null) { for (BytesRef val : excludeValues) { - result.addReject(Long.parseLong(val.utf8ToString())); + result.addReject(format.parseLong(val.utf8ToString(), false, null)); } } return result; @@ -572,13 +588,13 @@ public LongFilter convertToDoubleFilter() { LongFilter result = new LongFilter(numValids, numInvalids); if (includeValues != null) { for (BytesRef val : includeValues) { - double dval=Double.parseDouble(val.utf8ToString()); + double dval = Double.parseDouble(val.utf8ToString()); result.addAccept(NumericUtils.doubleToSortableLong(dval)); } } if (excludeValues != null) { for (BytesRef val : excludeValues) { - double dval=Double.parseDouble(val.utf8ToString()); + double dval = Double.parseDouble(val.utf8ToString()); result.addReject(NumericUtils.doubleToSortableLong(dval)); } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml index 71d1a1e7ca218..152cf893d23af 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml @@ -117,6 +117,26 @@ setup: - match: { aggregations.ip_terms.buckets.1.doc_count: 1 } + - do: + search: + body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "include" : [ "127.0.0.1" ] } } } } + + - match: { hits.total: 3 } + + - length: { aggregations.ip_terms.buckets: 1 } + + - match: { aggregations.ip_terms.buckets.0.key: "127.0.0.1" } + + - do: + search: + body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : [ "127.0.0.1" ] } } } } + + - match: { hits.total: 3 } + + - length: { aggregations.ip_terms.buckets: 1 } + + - match: { aggregations.ip_terms.buckets.0.key: "::1" } + --- "Boolean test": - do: @@ -300,4 +320,27 @@ setup: - match: { aggregations.date_terms.buckets.1.key_as_string: "2014-09-01T00:00:00.000Z" } - match: { aggregations.date_terms.buckets.1.doc_count: 1 } + + - do: + search: + body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "include" : [ "2016-05-03" ] } } } } + + - match: { hits.total: 3 } + + - length: { aggregations.date_terms.buckets: 1 } + + - match: { aggregations.date_terms.buckets.0.key_as_string: "2016-05-03T00:00:00.000Z" } + + - match: { aggregations.date_terms.buckets.0.doc_count: 2 } + + - do: + search: + body: { "size" : 0, "aggs" : { "date_terms" : { "terms" : { "field" : "date", "exclude" : [ "2016-05-03" ] } } } } + + - match: { hits.total: 3 } + + - length: { aggregations.date_terms.buckets: 1 } + + - match: { aggregations.date_terms.buckets.0.key_as_string: "2014-09-01T00:00:00.000Z" } + - match: { aggregations.date_terms.buckets.0.doc_count: 1 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml index 45c042baea435..15c9cfce4f962 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml @@ -121,3 +121,22 @@ - is_false: aggregations.ip_terms.buckets.0.key_as_string - match: { aggregations.ip_terms.buckets.0.doc_count: 1 } + + - do: + search: + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "include" : [ "::1" ] } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 1 } + + - match: { aggregations.ip_terms.buckets.0.key: "::1" } + + - do: + search: + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "exclude" : [ "::1" ] } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 0 } + From 786a470b5228c2740ae16825277b0bd63d8ddad2 Mon Sep 17 00:00:00 2001 From: markharwood Date: Tue, 17 May 2016 17:28:02 +0100 Subject: [PATCH 2/2] Added test for illegal regex-style include clauses on IP fields --- .../significant/SignificantTermsAggregatorFactory.java | 9 ++++++++- .../bucket/terms/TermsAggregatorFactory.java | 7 ++++++- .../rest-api-spec/test/search.aggregation/20_terms.yaml | 7 +++++++ .../test/search.aggregation/30_sig_terms.yaml | 6 ++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index b2af1a004c0d0..4b9e3acb87375 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -211,7 +211,14 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare } } assert execution != null; - return execution.create(name, factories, valuesSource, config.format(), bucketCountThresholds, includeExclude, context, parent, + + DocValueFormat format = config.format(); + if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { + throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses"); + } + + return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData); } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 64a079554eb53..62374ae7d1995 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -150,8 +150,13 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare } } } + DocValueFormat format = config.format(); + if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { + throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses"); + } - return execution.create(name, factories, valuesSource, order, config.format(), bucketCountThresholds, includeExclude, context, parent, + return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, collectMode, showTermDocCountError, pipelineAggregators, metaData); } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml index 152cf893d23af..c35e79e6cfe38 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/20_terms.yaml @@ -137,6 +137,13 @@ setup: - match: { aggregations.ip_terms.buckets.0.key: "::1" } + - do: + catch: request + search: + body: { "size" : 0, "aggs" : { "ip_terms" : { "terms" : { "field" : "ip", "exclude" : "127.*" } } } } + + + --- "Boolean test": - do: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml index 15c9cfce4f962..a708ff19d7e34 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms.yaml @@ -139,4 +139,10 @@ - match: { hits.total: 1 } - length: { aggregations.ip_terms.buckets: 0 } + + - do: + catch: request + search: + body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" : "127.*" } } } } +