diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index 4e5f330a35c1f..9d4476c779997 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -2,9 +2,9 @@ === Percentiles Aggregation A `multi-value` metrics aggregation that calculates one or more percentiles -over numeric values extracted from the aggregated documents. These values -can be extracted either from specific numeric fields in the documents, or -be generated by a provided script. +over numeric values extracted from the aggregated documents. These values can be +generated by a provided script or extracted from specific numeric or +<> in the documents. Percentiles show the point at which a certain percentage of observed values occur. For example, the 95th percentile is the value which is greater than 95% diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc index ba05bdb902990..d0765ea026846 100644 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc @@ -2,9 +2,9 @@ === Percentile Ranks Aggregation A `multi-value` metrics aggregation that calculates one or more percentile ranks -over numeric values extracted from the aggregated documents. These values -can be extracted either from specific numeric fields in the documents, or -be generated by a provided script. +over numeric values extracted from the aggregated documents. These values can be +generated by a provided script or extracted from specific numeric or +<> in the documents. [NOTE] ================================================== diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index 3ba7ff1d1b16d..1472d1bf641eb 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -32,6 +32,7 @@ string:: <> and <> <>:: `ip` for IPv4 and IPv6 addresses <>:: `completion` to provide auto-complete suggestions + <>:: `token_count` to count the number of tokens in a string {plugins}/mapper-murmur3.html[`mapper-murmur3`]:: `murmur3` to compute hashes of values at index-time and store them in the index {plugins}/mapper-annotated-text.html[`mapper-annotated-text`]:: `annotated-text` to index text containing special markup (typically used for identifying named entities) @@ -54,6 +55,8 @@ string:: <> and <> <>:: `shape` for arbitrary cartesian geometries. +<>:: `histogram` for pre-aggregated numerical values for percentiles aggregations. + [float] [[types-array-handling]] === Arrays @@ -89,6 +92,8 @@ include::types/date_nanos.asciidoc[] include::types/dense-vector.asciidoc[] +include::types/histogram.asciidoc[] + include::types/flattened.asciidoc[] include::types/geo-point.asciidoc[] diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc new file mode 100644 index 0000000000000..fe4209c52b772 --- /dev/null +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -0,0 +1,119 @@ +[role="xpack"] +[testenv="basic"] +[[histogram]] +=== Histogram datatype +++++ +Histogram +++++ + +A field to store pre-aggregated numerical data representing a histogram. +This data is defined using two paired arrays: + +* A `values` array of <> numbers, representing the buckets for +the histogram. These values must be provided in ascending order. +* A corresponding `counts` array of <> numbers, representing how +many values fall into each bucket. These numbers must be positive or zero. + +Because the elements in the `values` array correspond to the elements in the +same position of the `count` array, these two arrays must have the same length. + +[IMPORTANT] +======== +* A `histogram` field can only store a single pair of `values` and `count` arrays +per document. Nested arrays are not supported. +* `histogram` fields do not support sorting. +======== + +[[histogram-uses]] +==== Uses + +`histogram` fields are primarily intended for use with aggregations. To make it +more readily accessible for aggregations, `histogram` field data is stored as a +binary <> and not indexed. Its size in bytes is at most +`13 * numValues`, where `numValues` is the length of the provided arrays. + +Because the data is not indexed, you only can use `histogram` fields for the +following aggregations and queries: + +* <> aggregation +* <> aggregation +* <> query + +[[mapping-types-histogram-building-histogram]] +==== Building a histogram + +When using a histogram as part of an aggregation, the accuracy of the results will depend on how the +histogram was constructed. It is important to consider the percentiles aggregation mode that will be used +to build it. Some possibilities include: + +- For the <> mode, the `values` array represents +the mean centroid positions and the `counts` array represents the number of values that are attributed to each +centroid. If the algorithm has already started to approximate the percentiles, this inaccuracy is +carried over in the histogram. + +- For the <<_hdr_histogram,High Dynamic Range (HDR)>> histogram mode, the `values` array represents fixed upper +limits of each bucket interval, and the `counts` array represents the number of values that are attributed to each +interval. This implementation maintains a fixed worse-case percentage error (specified as a number of significant digits), +therefore the value used when generating the histogram would be the maximum accuracy you can achieve at aggregation time. + +The histogram field is "algorithm agnostic" and does not store data specific to either T-Digest or HDRHistogram. While this +means the field can technically be aggregated with either algorithm, in practice the user should chose one algorithm and +index data in that manner (e.g. centroids for T-Digest or intervals for HDRHistogram) to ensure best accuracy. + +[[histogram-ex]] +==== Examples + +The following <> API request creates a new index with two field mappings: + +* `my_histogram`, a `histogram` field used to store percentile data +* `my_text`, a `keyword` field used to store a title for the histogram + +[ INSERT CREATE INDEX SNIPPET ] +[source,console] +-------------------------------------------------- +PUT my_index +{ + "mappings": { + "properties": { + "my_histogram": { + "type" : "histogram" + }, + "my_text" : { + "type" : "keyword" + } + } + } +} +-------------------------------------------------- + +The following <> API requests store pre-aggregated for +two histograms: `histogram_1` and `histogram_2`. + +[source,console] +-------------------------------------------------- +PUT my_index/_doc/1 +{ + "my_text" : "histogram_1", + "my_histogram" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> + "counts" : [3, 7, 23, 12, 6] <2> + } +} + +PUT my_index/_doc/2 +{ + "my_text" : "histogram_2", + "my_histogram" : { + "values" : [0.1, 0.25, 0.35, 0.4, 0.45, 0.5], <1> + "counts" : [8, 17, 8, 7, 6, 2] <2> + } +} +-------------------------------------------------- +<1> Values for each bucket. Values in the array are treated as doubles and must be given in +increasing order. For <> +histograms this value represents the mean value. In case of HDR histograms this represents the value iterated to. +<2> Count for each bucket. Values in the arrays are treated as integers and must be positive or zero. +Negative values will be rejected. The relation between a bucket and a count is given by the position in the array. + + + diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java new file mode 100644 index 0000000000000..1678ca0df7783 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java @@ -0,0 +1,34 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.fielddata; + + +import java.io.IOException; + +/** + * {@link AtomicFieldData} specialization for histogram data. + */ +public interface AtomicHistogramFieldData extends AtomicFieldData { + + /** + * Return Histogram values. + */ + HistogramValues getHistogramValues() throws IOException; + +} diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java new file mode 100644 index 0000000000000..0f35f82bb703a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata; + +import java.io.IOException; + +/** + * Per-document histogram value. Every value of the histogram consist on + * a value and a count. + */ +public abstract class HistogramValue { + + /** + * Advance this instance to the next value of the histogram + * @return true if there is a next value + */ + public abstract boolean next() throws IOException; + + /** + * the current value of the histogram + * @return the current value of the histogram + */ + public abstract double value(); + + /** + * The current count of the histogram + * @return the current count of the histogram + */ + public abstract int count(); + +} diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java new file mode 100644 index 0000000000000..f2a0a9c9092d6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java @@ -0,0 +1,41 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata; + +import java.io.IOException; + +/** + * Per-segment histogram values. + */ +public abstract class HistogramValues { + + /** + * Advance this instance to the given document id + * @return true if there is a value for this document + */ + public abstract boolean advanceExact(int doc) throws IOException; + + /** + * Get the {@link HistogramValue} associated with the current document. + * The returned {@link HistogramValue} might be reused across calls. + */ + public abstract HistogramValue histogram() throws IOException; + +} diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/IndexHistogramFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/IndexHistogramFieldData.java new file mode 100644 index 0000000000000..0a8160f427f43 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/IndexHistogramFieldData.java @@ -0,0 +1,34 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata; + + +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; + +/** + * Specialization of {@link IndexFieldData} for histograms. + */ +public abstract class IndexHistogramFieldData extends DocValuesIndexFieldData implements IndexFieldData { + + public IndexHistogramFieldData(Index index, String fieldName) { + super(index, fieldName); + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index 0848a494c7454..5d9e616a39718 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -26,12 +26,13 @@ import org.elasticsearch.common.util.ArrayUtils; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; -import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; @@ -47,13 +48,13 @@ private static int indexOfKey(double[] keys, double key) { } protected final double[] keys; - protected final ValuesSource.Numeric valuesSource; + protected final ValuesSource valuesSource; protected final DocValueFormat format; protected ObjectArray states; protected final int numberOfSignificantValueDigits; protected final boolean keyed; - AbstractHDRPercentilesAggregator(String name, ValuesSource.Numeric valuesSource, SearchContext context, Aggregator parent, + AbstractHDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] keys, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); @@ -77,25 +78,22 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + if (valuesSource instanceof ValuesSource.Histogram) { + final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx); + return collectHistogramValues(values, bigArrays, sub); + } else { + final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx); + return collectNumeric(values, bigArrays, sub); + } + + } + + private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { - states = bigArrays.grow(states, bucket + 1); - - DoubleHistogram state = states.get(bucket); - if (state == null) { - state = new DoubleHistogram(numberOfSignificantValueDigits); - // Set the histogram to autosize so it can resize itself as - // the data range increases. Resize operations should be - // rare as the histogram buckets are exponential (on the top - // level). In the future we could expose the range as an - // option on the request so the histogram can be fixed at - // initialisation and doesn't need resizing. - state.setAutoResize(true); - states.set(bucket, state); - } - + DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { final int valueCount = values.docValueCount(); for (int i = 0; i < valueCount; i++) { @@ -106,6 +104,39 @@ public void collect(int doc, long bucket) throws IOException { }; } + private LeafBucketCollector collectHistogramValues(final HistogramValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); + if (values.advanceExact(doc)) { + final HistogramValue sketch = values.histogram(); + while (sketch.next()) { + state.recordValueWithCount(sketch.value(), sketch.count()); + } + } + } + }; + } + + private DoubleHistogram getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) { + states = bigArrays.grow(states, bucket + 1); + DoubleHistogram state = states.get(bucket); + if (state == null) { + state = new DoubleHistogram(numberOfSignificantValueDigits); + // Set the histogram to autosize so it can resize itself as + // the data range increases. Resize operations should be + // rare as the histogram buckets are exponential (on the top + // level). In the future we could expose the range as an + // option on the request so the histogram can be fixed at + // initialisation and doesn't need resizing. + state.setAutoResize(true); + states.set(bucket, state); + } + return state; + } + @Override public boolean hasMetric(String name) { return indexOfKey(keys, Double.parseDouble(name)) >= 0; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java index 15ad622fce58c..1b78db480068e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java @@ -25,6 +25,8 @@ import org.elasticsearch.common.util.ArrayUtils; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; @@ -45,13 +47,13 @@ private static int indexOfKey(double[] keys, double key) { } protected final double[] keys; - protected final ValuesSource.Numeric valuesSource; + protected final ValuesSource valuesSource; protected final DocValueFormat formatter; protected ObjectArray states; protected final double compression; protected final boolean keyed; - AbstractTDigestPercentilesAggregator(String name, ValuesSource.Numeric valuesSource, SearchContext context, Aggregator parent, + AbstractTDigestPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] keys, double compression, boolean keyed, DocValueFormat formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); @@ -75,18 +77,22 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + if (valuesSource instanceof ValuesSource.Histogram) { + final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx); + return collectHistogramValues(values, bigArrays, sub); + } else { + final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx); + return collectNumeric(values, bigArrays, sub); + } + + } + + private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { - states = bigArrays.grow(states, bucket + 1); - - TDigestState state = states.get(bucket); - if (state == null) { - state = new TDigestState(compression); - states.set(bucket, state); - } - + TDigestState state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { final int valueCount = values.docValueCount(); for (int i = 0; i < valueCount; i++) { @@ -97,6 +103,32 @@ public void collect(int doc, long bucket) throws IOException { }; } + private LeafBucketCollector collectHistogramValues(final HistogramValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + TDigestState state = getExistingOrNewHistogram(bigArrays, bucket); + if (values.advanceExact(doc)) { + final HistogramValue sketch = values.histogram(); + while(sketch.next()) { + state.add(sketch.value(), sketch.count()); + } + } + } + }; + } + + private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) { + states = bigArrays.grow(states, bucket + 1); + TDigestState state = states.get(bucket); + if (state == null) { + state = new TDigestState(compression); + states.set(bucket, state); + } + return state; + } + @Override public boolean hasMetric(String name) { return indexOfKey(keys, Double.parseDouble(name)) >= 0; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java index 881d7a4bf4f4d..308d40c94cf8b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java @@ -23,7 +23,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -32,7 +32,7 @@ class HDRPercentileRanksAggregator extends AbstractHDRPercentilesAggregator { - HDRPercentileRanksAggregator(String name, Numeric valuesSource, SearchContext context, Aggregator parent, + HDRPercentileRanksAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat format, List pipelineAggregators, Map metaData) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, format, pipelineAggregators, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java index dd191e8c457f2..3e3717b146fad 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -35,13 +34,13 @@ import java.util.Map; class HDRPercentileRanksAggregatorFactory - extends ValuesSourceAggregatorFactory { + extends ValuesSourceAggregatorFactory { private final double[] values; private final int numberOfSignificantValueDigits; private final boolean keyed; - HDRPercentileRanksAggregatorFactory(String name, ValuesSourceConfig config, double[] values, + HDRPercentileRanksAggregatorFactory(String name, ValuesSourceConfig config, double[] values, int numberOfSignificantValueDigits, boolean keyed, QueryShardContext queryShardContext, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { @@ -61,7 +60,7 @@ protected Aggregator createUnmapped(SearchContext searchContext, } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, + protected Aggregator doCreateInternal(ValuesSource valuesSource, SearchContext searchContext, Aggregator parent, boolean collectsFromSingleBucket, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java index f1a4a03b24bb1..ae45dcf50d4d1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java @@ -23,7 +23,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -32,7 +32,7 @@ class HDRPercentilesAggregator extends AbstractHDRPercentilesAggregator { - HDRPercentilesAggregator(String name, Numeric valuesSource, SearchContext context, Aggregator parent, double[] percents, + HDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, formatter, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java index de5af206c5398..4df4e19aa5df9 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -34,14 +33,14 @@ import java.util.List; import java.util.Map; -class HDRPercentilesAggregatorFactory extends ValuesSourceAggregatorFactory { +class HDRPercentilesAggregatorFactory extends ValuesSourceAggregatorFactory { private final double[] percents; private final int numberOfSignificantValueDigits; private final boolean keyed; HDRPercentilesAggregatorFactory(String name, - ValuesSourceConfig config, + ValuesSourceConfig config, double[] percents, int numberOfSignificantValueDigits, boolean keyed, @@ -66,7 +65,7 @@ protected Aggregator createUnmapped(SearchContext searchContext, } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, + protected Aggregator doCreateInternal(ValuesSource valuesSource, SearchContext searchContext, Aggregator parent, boolean collectsFromSingleBucket, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java index ae9afe476bf10..304a21bf98484 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java @@ -33,7 +33,6 @@ import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder.LeafOnly; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; @@ -47,7 +46,7 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; -public class PercentileRanksAggregationBuilder extends LeafOnly { +public class PercentileRanksAggregationBuilder extends LeafOnly { public static final String NAME = PercentileRanks.TYPE_NAME; public static final ParseField VALUES_FIELD = new ParseField("values"); @@ -80,7 +79,7 @@ private static class HDROptions { static { PARSER = new ConstructingObjectParser<>(PercentileRanksAggregationBuilder.NAME, false, (a, context) -> new PercentileRanksAggregationBuilder(context, (List) a[0])); - ValuesSourceParserHelper.declareNumericFields(PARSER, true, false, false); + ValuesSourceParserHelper.declareAnyFields(PARSER, true, true); PARSER.declareDoubleArray(constructorArg(), VALUES_FIELD); PARSER.declareBoolean(PercentileRanksAggregationBuilder::keyed, PercentilesAggregationBuilder.KEYED_FIELD); @@ -240,8 +239,10 @@ public PercentilesMethod method() { } @Override - protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, ValuesSourceConfig config, - AggregatorFactory parent, Builder subFactoriesBuilder) throws IOException { + protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, + ValuesSourceConfig config, + AggregatorFactory parent, + Builder subFactoriesBuilder) throws IOException { switch (method) { case TDIGEST: return new TDigestPercentileRanksAggregatorFactory(name, config, values, compression, keyed, queryShardContext, parent, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java index b0743c4248496..db90f8c72634b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java @@ -32,7 +32,6 @@ import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder.LeafOnly; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; @@ -44,7 +43,7 @@ import java.util.Objects; import java.util.function.Consumer; -public class PercentilesAggregationBuilder extends LeafOnly { +public class PercentilesAggregationBuilder extends LeafOnly { public static final String NAME = Percentiles.TYPE_NAME; private static final double[] DEFAULT_PERCENTS = new double[] { 1, 5, 25, 50, 75, 95, 99 }; @@ -79,7 +78,7 @@ private static class HDROptions { private static final ObjectParser PARSER; static { PARSER = new ObjectParser<>(PercentilesAggregationBuilder.NAME); - ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false); + ValuesSourceParserHelper.declareAnyFields(PARSER, true, true); PARSER.declareDoubleArray( (b, v) -> b.percentiles(v.stream().mapToDouble(Double::doubleValue).toArray()), @@ -263,8 +262,8 @@ public PercentilesMethod method() { } @Override - protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, - ValuesSourceConfig config, + protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, + ValuesSourceConfig config, AggregatorFactory parent, Builder subFactoriesBuilder) throws IOException { switch (method) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java index 69e385151eae3..831b302f313e3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java @@ -22,7 +22,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -32,7 +32,7 @@ class TDigestPercentileRanksAggregator extends AbstractTDigestPercentilesAggregator { TDigestPercentileRanksAggregator(String name, - Numeric valuesSource, + ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java index 5138ff2741680..5a6142263736d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -35,14 +34,14 @@ import java.util.Map; class TDigestPercentileRanksAggregatorFactory - extends ValuesSourceAggregatorFactory { + extends ValuesSourceAggregatorFactory { private final double[] percents; private final double compression; private final boolean keyed; TDigestPercentileRanksAggregatorFactory(String name, - ValuesSourceConfig config, + ValuesSourceConfig config, double[] percents, double compression, boolean keyed, @@ -66,7 +65,7 @@ protected Aggregator createUnmapped(SearchContext searchContext, } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, + protected Aggregator doCreateInternal(ValuesSource valuesSource, SearchContext searchContext, Aggregator parent, boolean collectsFromSingleBucket, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java index 81bbe15e82150..5140011808796 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java @@ -22,7 +22,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -32,7 +32,7 @@ class TDigestPercentilesAggregator extends AbstractTDigestPercentilesAggregator { TDigestPercentilesAggregator(String name, - Numeric valuesSource, + ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java index 252a3b4ac3870..012dde2a92f70 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -35,13 +34,13 @@ import java.util.Map; class TDigestPercentilesAggregatorFactory - extends ValuesSourceAggregatorFactory { + extends ValuesSourceAggregatorFactory { private final double[] percents; private final double compression; private final boolean keyed; - TDigestPercentilesAggregatorFactory(String name, ValuesSourceConfig config, double[] percents, + TDigestPercentilesAggregatorFactory(String name, ValuesSourceConfig config, double[] percents, double compression, boolean keyed, QueryShardContext queryShardContext, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData); @@ -60,7 +59,7 @@ protected Aggregator createUnmapped(SearchContext searchContext, } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, + protected Aggregator doCreateInternal(ValuesSource valuesSource, SearchContext searchContext, Aggregator parent, boolean collectsFromSingleBucket, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java index 2eae054d55eeb..6df32b4deefa0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java @@ -26,6 +26,7 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; +import org.elasticsearch.index.fielddata.IndexHistogramFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; import org.elasticsearch.index.mapper.MappedFieldType; @@ -191,6 +192,34 @@ public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFa return new ValuesSource.Range(fieldContext.indexFieldData(), rangeFieldType.rangeType()); } + @Override + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { + throw new IllegalArgumentException("Can't apply missing values on a " + valuesSource.getClass()); + } + }, + HISTOGRAM { + @Override + public ValuesSource getEmpty() { + // TODO: Is this the correct exception type here? + throw new IllegalArgumentException("Can't deal with unmapped ValuesSource type " + this.value()); + } + + @Override + public ValuesSource getScript(AggregationScript.LeafFactory script, ValueType scriptValueType) { + throw new AggregationExecutionException("value source of type [" + this.value() + "] is not supported by scripts"); + } + + @Override + public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFactory script) { + final IndexFieldData indexFieldData = fieldContext.indexFieldData(); + + if (!(indexFieldData instanceof IndexHistogramFieldData)) { + throw new IllegalArgumentException("Expected histogram type on field [" + fieldContext.field() + + "], but got [" + fieldContext.fieldType().typeName() + "]"); + } + return new ValuesSource.Histogram.Fielddata((IndexHistogramFieldData) indexFieldData); + } + @Override public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { throw new IllegalArgumentException("Can't apply missing values on a " + valuesSource.getClass()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index 2bbd7ce2c9b8c..3f77744b5a56e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -33,8 +33,10 @@ import org.elasticsearch.index.fielddata.AbstractSortingNumericDocValues; import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData; import org.elasticsearch.index.fielddata.DocValueBits; +import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; +import org.elasticsearch.index.fielddata.IndexHistogramFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; import org.elasticsearch.index.fielddata.MultiGeoPointValues; @@ -563,5 +565,39 @@ public org.elasticsearch.index.fielddata.MultiGeoPointValues geoPointValues(Leaf } } } + + public abstract static class Histogram extends ValuesSource { + + public abstract HistogramValues getHistogramValues(LeafReaderContext context) throws IOException; + + public static class Fielddata extends Histogram { + + protected final IndexHistogramFieldData indexFieldData; + + public Fielddata(IndexHistogramFieldData indexFieldData) { + this.indexFieldData = indexFieldData; + } + + @Override + public SortedBinaryDocValues bytesValues(LeafReaderContext context) { + return indexFieldData.load(context).getBytesValues(); + } + + @Override + public DocValueBits docsWithValue(LeafReaderContext context) throws IOException { + HistogramValues values = getHistogramValues(context); + return new DocValueBits() { + @Override + public boolean advanceExact(int doc) throws IOException { + return values.advanceExact(doc); + } + }; + } + + public HistogramValues getHistogramValues(LeafReaderContext context) throws IOException { + return indexFieldData.load(context).getHistogramValues(); + } + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java index df29b77a287ac..4919b5bc9f9ad 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; +import org.elasticsearch.index.fielddata.IndexHistogramFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; @@ -114,6 +115,8 @@ public static ValuesSourceConfig resolve( config = new ValuesSourceConfig<>(CoreValuesSourceType.GEOPOINT); } else if (fieldType instanceof RangeFieldMapper.RangeFieldType) { config = new ValuesSourceConfig<>(CoreValuesSourceType.RANGE); + } else if (indexFieldData instanceof IndexHistogramFieldData) { + config = new ValuesSourceConfig<>(CoreValuesSourceType.HISTOGRAM); } else { if (valueType == null) { config = new ValuesSourceConfig<>(CoreValuesSourceType.BYTES); @@ -250,7 +253,7 @@ public VS toValuesSource(QueryShardContext context) { public VS toValuesSource(QueryShardContext context, Function resolveMissingAny) { if (!valid()) { throw new IllegalStateException( - "value source config is invalid; must have either a field context or a script or marked as unwrapped"); + "value source config is invalid; must have either a field context or a script or marked as unwrapped"); } final VS vs; diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java index e676e16b007ac..c7128e3e93dca 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java @@ -7,10 +7,17 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.plugins.ActionPlugin; +import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction; +import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction; +import org.elasticsearch.xpack.core.analytics.action.AnalyticsStatsAction; import org.elasticsearch.xpack.analytics.action.AnalyticsInfoTransportAction; import org.elasticsearch.xpack.analytics.action.AnalyticsUsageTransportAction; import org.elasticsearch.xpack.analytics.action.TransportAnalyticsStatsAction; @@ -18,18 +25,16 @@ import org.elasticsearch.xpack.analytics.cumulativecardinality.CumulativeCardinalityPipelineAggregator; import org.elasticsearch.xpack.analytics.stringstats.InternalStringStats; import org.elasticsearch.xpack.analytics.stringstats.StringStatsAggregationBuilder; -import org.elasticsearch.xpack.core.XPackPlugin; -import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction; -import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction; -import org.elasticsearch.xpack.core.analytics.action.AnalyticsStatsAction; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicLong; import static java.util.Collections.singletonList; -public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugin { +public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugin, MapperPlugin { // TODO this should probably become more structured once Analytics plugin has more than just one agg public static AtomicLong cumulativeCardUsage = new AtomicLong(0); @@ -66,4 +71,9 @@ public List getAggregations() { new ActionHandler<>(XPackInfoFeatureAction.ANALYTICS, AnalyticsInfoTransportAction.class), new ActionHandler<>(AnalyticsStatsAction.INSTANCE, TransportAnalyticsStatsAction.class)); } + + @Override + public Map getMappers() { + return Collections.singletonMap(HistogramFieldMapper.CONTENT_TYPE, new HistogramFieldMapper.TypeParser()); + } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java new file mode 100644 index 0000000000000..6ef920bd33fa3 --- /dev/null +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -0,0 +1,442 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + + +import com.carrotsearch.hppc.DoubleArrayList; +import com.carrotsearch.hppc.IntArrayList; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Explicit; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.ByteBufferStreamInput; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentSubParser; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.fielddata.AtomicHistogramFieldData; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.IndexHistogramFieldData; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +/** + * Field Mapper for pre-aggregated histograms. + */ +public class HistogramFieldMapper extends FieldMapper { + public static final String CONTENT_TYPE = "histogram"; + + public static class Names { + public static final String IGNORE_MALFORMED = "ignore_malformed"; + } + + public static class Defaults { + public static final Explicit IGNORE_MALFORMED = new Explicit<>(false, false); + public static final HistogramFieldType FIELD_TYPE = new HistogramFieldType(); + + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setHasDocValues(true); + FIELD_TYPE.setIndexOptions(IndexOptions.NONE); + FIELD_TYPE.freeze(); + } + } + + public static final ParseField COUNTS_FIELD = new ParseField("counts"); + public static final ParseField VALUES_FIELD = new ParseField("values"); + + public static class Builder extends FieldMapper.Builder { + protected Boolean ignoreMalformed; + + public Builder(String name) { + super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); + builder = this; + } + + public Builder ignoreMalformed(boolean ignoreMalformed) { + this.ignoreMalformed = ignoreMalformed; + return builder; + } + + protected Explicit ignoreMalformed(BuilderContext context) { + if (ignoreMalformed != null) { + return new Explicit<>(ignoreMalformed, true); + } + if (context.indexSettings() != null) { + return new Explicit<>(IGNORE_MALFORMED_SETTING.get(context.indexSettings()), false); + } + return HistogramFieldMapper.Defaults.IGNORE_MALFORMED; + } + + public HistogramFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType, + MappedFieldType defaultFieldType, Settings indexSettings, + MultiFields multiFields, Explicit ignoreMalformed, CopyTo copyTo) { + setupFieldType(context); + return new HistogramFieldMapper(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, + ignoreMalformed, copyTo); + } + + @Override + public HistogramFieldMapper build(BuilderContext context) { + return build(context, name, fieldType, defaultFieldType, context.indexSettings(), + multiFieldsBuilder.build(this, context), ignoreMalformed(context), copyTo); + } + } + + public static class TypeParser implements Mapper.TypeParser { + @Override + public Mapper.Builder parse(String name, + Map node, ParserContext parserContext) + throws MapperParsingException { + Builder builder = new HistogramFieldMapper.Builder(name); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + String propName = entry.getKey(); + Object propNode = entry.getValue(); + if (propName.equals(Names.IGNORE_MALFORMED)) { + builder.ignoreMalformed(XContentMapValues.nodeBooleanValue(propNode, name + "." + Names.IGNORE_MALFORMED)); + iterator.remove(); + } + } + return builder; + } + } + + protected Explicit ignoreMalformed; + + public HistogramFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, + Settings indexSettings, MultiFields multiFields, Explicit ignoreMalformed, CopyTo copyTo) { + super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); + this.ignoreMalformed = ignoreMalformed; + } + + @Override + protected void doMerge(Mapper mergeWith) { + super.doMerge(mergeWith); + HistogramFieldMapper gpfmMergeWith = (HistogramFieldMapper) mergeWith; + if (gpfmMergeWith.ignoreMalformed.explicit()) { + this.ignoreMalformed = gpfmMergeWith.ignoreMalformed; + } + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + throw new UnsupportedOperationException("Parsing is implemented in parse(), this method should NEVER be called"); + } + + public static class HistogramFieldType extends MappedFieldType { + + public HistogramFieldType() { + } + + HistogramFieldType(HistogramFieldType ref) { + super(ref); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public MappedFieldType clone() { + return new HistogramFieldType(this); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { + failIfNoDocValues(); + return new IndexFieldData.Builder() { + + @Override + public IndexFieldData build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache, + CircuitBreakerService breakerService, MapperService mapperService) { + + return new IndexHistogramFieldData(indexSettings.getIndex(), fieldType.name()) { + + @Override + public AtomicHistogramFieldData load(LeafReaderContext context) { + return new AtomicHistogramFieldData() { + @Override + public HistogramValues getHistogramValues() throws IOException { + try { + final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName); + return new HistogramValues() { + @Override + public boolean advanceExact(int doc) throws IOException { + return values.advanceExact(doc); + } + + @Override + public HistogramValue histogram() throws IOException { + try { + return getHistogramValue(values.binaryValue()); + } catch (IOException e) { + throw new IOException("Cannot load doc value", e); + } + } + }; + } catch (IOException e) { + throw new IOException("Cannot load doc values", e); + } + + } + + @Override + public ScriptDocValues getScriptValues() { + throw new UnsupportedOperationException("The [" + CONTENT_TYPE + "] field does not " + + "support scripts"); + } + + @Override + public SortedBinaryDocValues getBytesValues() { + throw new UnsupportedOperationException("String representation of doc values " + + "for [" + CONTENT_TYPE + "] fields is not supported"); + } + + @Override + public long ramBytesUsed() { + return 0; // Unknown + } + + @Override + public void close() { + + } + }; + } + + @Override + public AtomicHistogramFieldData loadDirect(LeafReaderContext context) throws Exception { + return load(context); + } + + @Override + public SortField sortField(Object missingValue, MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, boolean reverse) { + throw new UnsupportedOperationException("can't sort on the [" + CONTENT_TYPE + "] field"); + } + }; + } + + private HistogramValue getHistogramValue(final BytesRef bytesRef) throws IOException { + final ByteBufferStreamInput streamInput = new ByteBufferStreamInput( + ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length)); + return new HistogramValue() { + double value; + int count; + boolean isExhausted; + + @Override + public boolean next() throws IOException { + if (streamInput.available() > 0) { + count = streamInput.readVInt(); + value = streamInput.readDouble(); + return true; + } + isExhausted = true; + return false; + } + + @Override + public double value() { + if (isExhausted) { + throw new IllegalArgumentException("histogram already exhausted"); + } + return value; + } + + @Override + public int count() { + if (isExhausted) { + throw new IllegalArgumentException("histogram already exhausted"); + } + return count; + } + }; + } + + }; + } + + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + throw new QueryShardException(context, "field " + name() + " of type [" + CONTENT_TYPE + "] " + + "has no doc values and cannot be searched"); + } + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new QueryShardException(context, "[" + CONTENT_TYPE + "] field do not support searching, " + + "use dedicated aggregations instead: [" + + name() + "]"); + } + } + + @Override + public void parse(ParseContext context) throws IOException { + if (context.externalValueSet()) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] can't be used in multi-fields"); + } + context.path().add(simpleName()); + XContentParser.Token token = null; + XContentSubParser subParser = null; + try { + token = context.parser().currentToken(); + if (token == XContentParser.Token.VALUE_NULL) { + context.path().remove(); + return; + } + DoubleArrayList values = null; + IntArrayList counts = null; + // should be an object + ensureExpectedToken(XContentParser.Token.START_OBJECT, token, context.parser()::getTokenLocation); + subParser = new XContentSubParser(context.parser()); + token = subParser.nextToken(); + while (token != XContentParser.Token.END_OBJECT) { + // should be an field + ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, subParser::getTokenLocation); + String fieldName = subParser.currentName(); + if (fieldName.equals(VALUES_FIELD.getPreferredName())) { + token = subParser.nextToken(); + // should be an array + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, subParser::getTokenLocation); + values = new DoubleArrayList(); + token = subParser.nextToken(); + double previousVal = -Double.MAX_VALUE; + while (token != XContentParser.Token.END_ARRAY) { + // should be a number + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser::getTokenLocation); + double val = subParser.doubleValue(); + if (val < previousVal) { + // values must be in increasing order + throw new MapperParsingException("error parsing field [" + + name() + "], ["+ COUNTS_FIELD + "] values must be in increasing order, got [" + val + + "] but previous value was [" + previousVal +"]"); + } + values.add(val); + previousVal = val; + token = subParser.nextToken(); + } + } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { + token = subParser.nextToken(); + // should be an array + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, subParser::getTokenLocation); + counts = new IntArrayList(); + token = subParser.nextToken(); + while (token != XContentParser.Token.END_ARRAY) { + // should be a number + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser::getTokenLocation); + counts.add(subParser.intValue()); + token = subParser.nextToken(); + } + } else { + throw new MapperParsingException("error parsing field [" + + name() + "], with unknown parameter [" + fieldName + "]"); + } + token = subParser.nextToken(); + } + if (values == null) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected field called [" + VALUES_FIELD.getPreferredName() + "]"); + } + if (counts == null) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected field called [" + COUNTS_FIELD.getPreferredName() + "]"); + } + if (values.size() != counts.size()) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected same length from [" + VALUES_FIELD.getPreferredName() +"] and " + + "[" + COUNTS_FIELD.getPreferredName() +"] but got [" + values.size() + " != " + counts.size() +"]"); + } + if (fieldType().hasDocValues()) { + BytesStreamOutput streamOutput = new BytesStreamOutput(); + for (int i = 0; i < values.size(); i++) { + int count = counts.get(i); + if (count < 0) { + throw new MapperParsingException("error parsing field [" + + name() + "], ["+ COUNTS_FIELD + "] elements must be >= 0 but got " + counts.get(i)); + } else if (count > 0) { + // we do not add elements with count == 0 + streamOutput.writeVInt(count); + streamOutput.writeDouble(values.get(i)); + } + } + + Field field = new BinaryDocValuesField(simpleName(), streamOutput.bytes().toBytesRef()); + streamOutput.close(); + if (context.doc().getByKey(fieldType().name()) != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + + "] doesn't not support indexing multiple values for the same field in the same document"); + } + context.doc().addWithKey(fieldType().name(), field); + } + + } catch (Exception ex) { + if (ignoreMalformed.value() == false) { + throw new MapperParsingException("failed to parse field [{}] of type [{}]", + ex, fieldType().name(), fieldType().typeName()); + } + + if (subParser != null) { + // close the subParser so we advance to the end of the object + subParser.close(); + } + context.addIgnoredField(fieldType().name()); + } + context.path().remove(); + } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + if (includeDefaults || ignoreMalformed.explicit()) { + builder.field(Names.IGNORE_MALFORMED, ignoreMalformed.value()); + } + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java new file mode 100644 index 0000000000000..843ff9447fde5 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java @@ -0,0 +1,83 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import org.HdrHistogram.DoubleHistogram; +import org.HdrHistogram.DoubleHistogramIterationValue; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.Percentile; +import org.elasticsearch.search.aggregations.metrics.PercentileRanks; +import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.Iterator; + +public class HDRPreAggregatedPercentileRanksAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + DoubleHistogram histogram = new DoubleHistogram(3);//default + for (double value : values) { + histogram.recordValue(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); + Iterator iterator = recordedValues.iterator(); + while (iterator.hasNext()) { + DoubleHistogramIterationValue value = iterator.next(); + long count = value.getCountAtValueIteratedTo(); + streamOutput.writeVInt(Math.toIntExact(count)); + double d = value.getValueIteratedTo(); + streamOutput.writeDouble(d); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testSimple() throws IOException { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + Document doc = new Document(); + doc.add(getDocValue("field", new double[] {3, 0.2, 10})); + w.addDocument(doc); + + PercentileRanksAggregationBuilder aggBuilder = new PercentileRanksAggregationBuilder("my_agg", new double[]{0.1, 0.5, 12}) + .field("field") + .method(PercentilesMethod.HDR); + MappedFieldType fieldType = new HistogramFieldMapper.Builder("field").fieldType(); + fieldType.setName("field"); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + PercentileRanks ranks = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType); + Iterator rankIterator = ranks.iterator(); + Percentile rank = rankIterator.next(); + assertEquals(0.1, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.equalTo(0d)); + rank = rankIterator.next(); + assertEquals(0.5, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.greaterThan(0d)); + assertThat(rank.getPercent(), Matchers.lessThan(100d)); + rank = rankIterator.next(); + assertEquals(12, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.equalTo(100d)); + assertFalse(rankIterator.hasNext()); + assertTrue(AggregationInspectionHelper.hasValue((InternalHDRPercentileRanks)ranks)); + } + } + } + +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java new file mode 100644 index 0000000000000..5d4e5c05b2e9d --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import org.HdrHistogram.DoubleHistogram; +import org.HdrHistogram.DoubleHistogramIterationValue; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; +import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; + +import java.io.IOException; +import java.util.Iterator; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; + +public class HDRPreAggregatedPercentilesAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + DoubleHistogram histogram = new DoubleHistogram(3);//default + for (double value : values) { + histogram.recordValue(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); + Iterator iterator = recordedValues.iterator(); + while (iterator.hasNext()) { + + DoubleHistogramIterationValue value = iterator.next(); + long count = value.getCountAtValueIteratedTo(); + if (count != 0) { + streamOutput.writeVInt(Math.toIntExact(count)); + double d = value.getValueIteratedTo(); + streamOutput.writeDouble(d); + } + + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("wrong_number", new double[]{7, 1}))); + }, hdr -> { + //assertEquals(0L, hdr.state.getTotalCount()); + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testEmptyField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[0]))); + }, hdr -> { + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(4L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(10.0d, hdr.percentile(25), approximation); + assertEquals(20.0d, hdr.percentile(50), approximation); + assertEquals(40.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesMultiBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(16L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(10.0d, hdr.percentile(25), approximation); + assertEquals(20.0d, hdr.percentile(50), approximation); + assertEquals(40.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + private void testCase(Query query, CheckedConsumer buildIndex, + Consumer verify) throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + buildIndex.accept(indexWriter); + } + + try (IndexReader indexReader = DirectoryReader.open(directory)) { + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + + PercentilesAggregationBuilder builder = + new PercentilesAggregationBuilder("test").field("number").method(PercentilesMethod.HDR); + + MappedFieldType fieldType = new HistogramFieldMapper.Builder("number").fieldType(); + fieldType.setName("number"); + Aggregator aggregator = createAggregator(builder, indexSearcher, fieldType); + aggregator.preCollection(); + indexSearcher.search(query, aggregator); + aggregator.postCollection(); + verify.accept((InternalHDRPercentiles) aggregator.buildAggregation(0L)); + + } + } + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java new file mode 100644 index 0000000000000..8878d86fb2051 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -0,0 +1,509 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.core.XPackPlugin; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; + + +public class HistogramFieldMapperTests extends ESSingleNodeTestCase { + + public void testParseValue() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {2, 3}) + .field("counts", new int[] {0, 4}) + .endObject() + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), notNullValue()); + } + + public void testParseArrayValue() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().startArray("pre_aggregated") + .startObject() + .field("counts", new int[] {2, 2, 3}) + .field("values", new double[] {2, 2, 3}) + .endObject() + .startObject() + .field("counts", new int[] {2, 2, 3}) + .field("values", new double[] {2, 2, 3}) + .endObject().endArray() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("doesn't not support indexing multiple values " + + "for the same field in the same document")); + } + + public void testEmptyArrays() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {}) + .field("counts", new int[] {}) + .endObject() + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), notNullValue()); + } + + public void testNullValue() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().nullField("pre_aggregated") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + } + + public void testMissingFieldCounts() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected field called [counts]")); + } + + public void testIgnoreMalformed() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + } + + public void testIgnoreMalformedSkipsKeyword() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated", "value") + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testIgnoreMalformedSkipsArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated", new int[] {2, 2, 2}) + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testIgnoreMalformedSkipsField() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {2, 2}) + .field("typo", new double[] {2, 2}) + .endObject() + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testIgnoreMalformedSkipsObjects() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .startObject("values").field("values", new double[] {2, 2}) + .startObject("otherData").startObject("more").field("toto", 1) + .endObject().endObject() + .endObject() + .field("counts", new double[] {2, 2}) + .endObject() + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testIgnoreMalformedSkipsEmpty() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject().endObject() + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testMissingFieldValues() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected field called [values]")); + } + + public void testUnknownField() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2}) + .field("values", new double[] {2, 2}) + .field("unknown", new double[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("with unknown parameter [unknown]")); + } + + public void testFieldArraysDifferentSize() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2}) + .field("values", new double[] {2, 2, 3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected same length from [values] and [counts] but got [3 != 2]")); + } + + public void testFieldCountsNotArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", "bah") + .field("values", new double[] {2, 2, 3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [START_ARRAY] but found [VALUE_STRING]")); + } + + public void testFieldCountsStringArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new String[] {"4", "5", "6"}) + .field("values", new double[] {2, 2, 3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [VALUE_NUMBER] but found [VALUE_STRING]")); + } + + public void testFieldValuesStringArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {4, 5, 6}) + .field("values", new String[] {"2", "2", "3"}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [VALUE_NUMBER] but found [VALUE_STRING]")); + } + + public void testFieldValuesNotArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2, 3}) + .field("values", "bah") + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [START_ARRAY] but found [VALUE_STRING]")); + } + + public void testCountIsLong() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new long[] {2, 2, Long.MAX_VALUE}) + .field("values", new double[] {2 ,2 ,3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString(" out of range of int")); + } + + public void testValuesNotInOrder() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 8, 4}) + .field("values", new double[] {2 ,3 ,2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString(" values must be in increasing order, " + + "got [2.0] but previous value was [3.0]")); + } + + public void testFieldNotObject() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated", "bah") + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [START_OBJECT] " + + "but found [VALUE_STRING]")); + } + + public void testNegativeCount() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().startObject("pre_aggregated") + .field("counts", new int[] {2, 2, -3}) + .field("values", new double[] {2, 2, 3}) + .endObject().endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("[counts] elements must be >= 0 but got -3")); + } + + @Override + protected Collection> getPlugins() { + List> plugins = new ArrayList<>(super.getPlugins()); + plugins.add(AnalyticsPlugin.class); + plugins.add(XPackPlugin.class); + return plugins; + } + +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldTypeTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldTypeTests.java new file mode 100644 index 0000000000000..82ef60fe0ccb5 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldTypeTests.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + + +package org.elasticsearch.xpack.analytics.mapper; + +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; + +public class HistogramFieldTypeTests extends FieldTypeTestCase { + + @Override + protected MappedFieldType createDefaultFieldType() { + return new HistogramFieldMapper.HistogramFieldType(); + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java new file mode 100644 index 0000000000000..9561870f55495 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java @@ -0,0 +1,237 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + + +import com.tdunning.math.stats.Centroid; +import org.HdrHistogram.DoubleHistogram; +import org.HdrHistogram.DoubleHistogramIterationValue; +import org.apache.lucene.util.TestUtil; +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; + +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.plugins.Plugin; + +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.core.XPackPlugin; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + + +public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase { + + public void testHDRHistogram() throws Exception { + + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "double") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("raw"); + PutMappingRequest request = new PutMappingRequest("raw").source(xContentBuilder); + client().admin().indices().putMapping(request).actionGet(); + + + XContentBuilder xContentBuilder2 = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "histogram") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("pre_agg"); + PutMappingRequest request2 = new PutMappingRequest("pre_agg").source(xContentBuilder2); + client().admin().indices().putMapping(request2).actionGet(); + + + int numberOfSignificantValueDigits = TestUtil.nextInt(random(), 1, 5); + DoubleHistogram histogram = new DoubleHistogram(numberOfSignificantValueDigits); + BulkRequest bulkRequest = new BulkRequest(); + + int numDocs = 10000; + int frq = 1000; + + for (int i =0; i < numDocs; i ++) { + double value = random().nextDouble(); + XContentBuilder doc = XContentFactory.jsonBuilder() + .startObject() + .field("data", value) + .endObject(); + bulkRequest.add(new IndexRequest("raw").source(doc)); + histogram.recordValue(value); + if ((i + 1) % frq == 0) { + client().bulk(bulkRequest); + bulkRequest = new BulkRequest(); + List values = new ArrayList<>(); + List counts = new ArrayList<>(); + Iterator iterator = histogram.recordedValues().iterator(); + while (iterator.hasNext()) { + DoubleHistogramIterationValue histValue = iterator.next(); + values.add(histValue.getValueIteratedTo()); + counts.add(Math.toIntExact(histValue.getCountAtValueIteratedTo())); + } + XContentBuilder preAggDoc = XContentFactory.jsonBuilder() + .startObject() + .startObject("data") + .field("values", values.toArray(new Double[values.size()])) + .field("counts", counts.toArray(new Integer[counts.size()])) + .endObject() + .endObject(); + client().prepareIndex("pre_agg").setSource(preAggDoc).get(); + histogram.reset(); + } + } + client().admin().indices().refresh(new RefreshRequest("raw", "pre_agg")).get(); + + SearchResponse response = client().prepareSearch("raw").setTrackTotalHits(true).get(); + assertEquals(numDocs, response.getHits().getTotalHits().value); + + response = client().prepareSearch("pre_agg").get(); + assertEquals(numDocs / frq, response.getHits().getTotalHits().value); + + PercentilesAggregationBuilder builder = + AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.HDR) + .numberOfSignificantValueDigits(numberOfSignificantValueDigits).percentiles(10); + + SearchResponse responseRaw = client().prepareSearch("raw").addAggregation(builder).get(); + SearchResponse responsePreAgg = client().prepareSearch("pre_agg").addAggregation(builder).get(); + SearchResponse responseBoth = client().prepareSearch("pre_agg", "raw").addAggregation(builder).get(); + + InternalHDRPercentiles percentilesRaw = responseRaw.getAggregations().get("agg"); + InternalHDRPercentiles percentilesPreAgg = responsePreAgg.getAggregations().get("agg"); + InternalHDRPercentiles percentilesBoth = responseBoth.getAggregations().get("agg"); + for (int i = 1; i < 100; i++) { + assertEquals(percentilesRaw.percentile(i), percentilesPreAgg.percentile(i), 0.0); + assertEquals(percentilesRaw.percentile(i), percentilesBoth.percentile(i), 0.0); + } + } + + public void testTDigestHistogram() throws Exception { + + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "double") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("raw"); + PutMappingRequest request = new PutMappingRequest("raw").source(xContentBuilder); + client().admin().indices().putMapping(request).actionGet(); + + + XContentBuilder xContentBuilder2 = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "histogram") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("pre_agg"); + PutMappingRequest request2 = new PutMappingRequest("pre_agg").source(xContentBuilder2); + client().admin().indices().putMapping(request2).actionGet(); + + + int compression = TestUtil.nextInt(random(), 25, 300); + TDigestState histogram = new TDigestState(compression); + BulkRequest bulkRequest = new BulkRequest(); + + int numDocs = 10000; + int frq = 1000; + + for (int i =0; i < numDocs; i ++) { + double value = random().nextDouble(); + XContentBuilder doc = XContentFactory.jsonBuilder() + .startObject() + .field("data", value) + .endObject(); + bulkRequest.add(new IndexRequest("raw").source(doc)); + histogram.add(value); + if ((i + 1) % frq == 0) { + client().bulk(bulkRequest); + bulkRequest = new BulkRequest(); + List values = new ArrayList<>(); + List counts = new ArrayList<>(); + Collection centroids = histogram.centroids(); + for (Centroid centroid : centroids) { + values.add(centroid.mean()); + counts.add(centroid.count()); + } + XContentBuilder preAggDoc = XContentFactory.jsonBuilder() + .startObject() + .startObject("data") + .field("values", values.toArray(new Double[values.size()])) + .field("counts", counts.toArray(new Integer[counts.size()])) + .endObject() + .endObject(); + client().prepareIndex("pre_agg").setSource(preAggDoc).get(); + histogram = new TDigestState(compression); + } + } + client().admin().indices().refresh(new RefreshRequest("raw", "pre_agg")).get(); + + SearchResponse response = client().prepareSearch("raw").setTrackTotalHits(true).get(); + assertEquals(numDocs, response.getHits().getTotalHits().value); + + response = client().prepareSearch("pre_agg").get(); + assertEquals(numDocs / frq, response.getHits().getTotalHits().value); + + PercentilesAggregationBuilder builder = + AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.TDIGEST) + .compression(compression).percentiles(10, 25, 500, 75); + + SearchResponse responseRaw = client().prepareSearch("raw").addAggregation(builder).get(); + SearchResponse responsePreAgg = client().prepareSearch("pre_agg").addAggregation(builder).get(); + SearchResponse responseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(builder).get(); + + InternalTDigestPercentiles percentilesRaw = responseRaw.getAggregations().get("agg"); + InternalTDigestPercentiles percentilesPreAgg = responsePreAgg.getAggregations().get("agg"); + InternalTDigestPercentiles percentilesBoth = responseBoth.getAggregations().get("agg"); + for (int i = 1; i < 100; i++) { + assertEquals(percentilesRaw.percentile(i), percentilesPreAgg.percentile(i), 1e-2); + assertEquals(percentilesRaw.percentile(i), percentilesBoth.percentile(i), 1e-2); + } + } + + + @Override + protected Collection> getPlugins() { + List> plugins = new ArrayList<>(super.getPlugins()); + plugins.add(AnalyticsPlugin.class); + plugins.add(XPackPlugin.class); + return plugins; + } + +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java new file mode 100644 index 0000000000000..879173a5bc26e --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import com.tdunning.math.stats.Centroid; +import com.tdunning.math.stats.TDigest; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.Percentile; +import org.elasticsearch.search.aggregations.metrics.PercentileRanks; +import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; + + +public class TDigestPreAggregatedPercentileRanksAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + TDigest histogram = new TDigestState(100.0); //default + for (double value : values) { + histogram.add(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + histogram.compress(); + Collection centroids = histogram.centroids(); + Iterator iterator = centroids.iterator(); + while ( iterator.hasNext()) { + Centroid centroid = iterator.next(); + streamOutput.writeVInt(centroid.count()); + streamOutput.writeDouble(centroid.mean()); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testSimple() throws IOException { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + Document doc = new Document(); + doc.add(getDocValue("field", new double[] {3, 0.2, 10})); + w.addDocument(doc); + + PercentileRanksAggregationBuilder aggBuilder = new PercentileRanksAggregationBuilder("my_agg", new double[] {0.1, 0.5, 12}) + .field("field") + .method(PercentilesMethod.TDIGEST); + MappedFieldType fieldType = new HistogramFieldMapper.Builder("number").fieldType(); + fieldType.setName("field"); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + PercentileRanks ranks = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType); + Iterator rankIterator = ranks.iterator(); + Percentile rank = rankIterator.next(); + assertEquals(0.1, rank.getValue(), 0d); + // TODO: Fix T-Digest: this assertion should pass but we currently get ~15 + // https://github.com/elastic/elasticsearch/issues/14851 + // assertThat(rank.getPercent(), Matchers.equalTo(0d)); + rank = rankIterator.next(); + assertEquals(0.5, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.greaterThan(0d)); + assertThat(rank.getPercent(), Matchers.lessThan(100d)); + rank = rankIterator.next(); + assertEquals(12, rank.getValue(), 0d); + // TODO: Fix T-Digest: this assertion should pass but we currently get ~59 + // https://github.com/elastic/elasticsearch/issues/14851 + // assertThat(rank.getPercent(), Matchers.equalTo(100d)); + assertFalse(rankIterator.hasNext()); + assertTrue(AggregationInspectionHelper.hasValue(((InternalTDigestPercentileRanks)ranks))); + } + } + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java new file mode 100644 index 0000000000000..e1340619256cf --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import com.tdunning.math.stats.Centroid; +import com.tdunning.math.stats.TDigest; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.function.Consumer; + +import static java.util.Collections.singleton; + +public class TDigestPreAggregatedPercentilesAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + TDigest histogram = new TDigestState(100.0); //default + for (double value : values) { + histogram.add(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + histogram.compress(); + Collection centroids = histogram.centroids(); + Iterator iterator = centroids.iterator(); + while ( iterator.hasNext()) { + Centroid centroid = iterator.next(); + streamOutput.writeVInt(centroid.count()); + streamOutput.writeDouble(centroid.mean()); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("wrong_number", new double[]{7, 1}))); + }, hdr -> { + //assertEquals(0L, hdr.state.getTotalCount()); + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testEmptyField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[0]))); + }, hdr -> { + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(4L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(15.0d, hdr.percentile(25), approximation); + assertEquals(30.0d, hdr.percentile(50), approximation); + assertEquals(50.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesMultiBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(16L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(15.0d, hdr.percentile(25), approximation); + assertEquals(30.0d, hdr.percentile(50), approximation); + assertEquals(50.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + private void testCase(Query query, CheckedConsumer buildIndex, + Consumer verify) throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + buildIndex.accept(indexWriter); + } + + try (IndexReader indexReader = DirectoryReader.open(directory)) { + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + + PercentilesAggregationBuilder builder = + new PercentilesAggregationBuilder("test").field("number").method(PercentilesMethod.TDIGEST); + + MappedFieldType fieldType = new HistogramFieldMapper.Builder("number").fieldType(); + fieldType.setName("number"); + Aggregator aggregator = createAggregator(builder, indexSearcher, fieldType); + aggregator.preCollection(); + indexSearcher.search(query, aggregator); + aggregator.postCollection(); + verify.accept((InternalTDigestPercentiles) aggregator.buildAggregation(0L)); + + } + } + } +}