66
77package org .elasticsearch .xpack .analytics .boxplot ;
88
9+ import com .tdunning .math .stats .Centroid ;
10+
911import org .elasticsearch .common .io .stream .StreamInput ;
1012import org .elasticsearch .common .io .stream .StreamOutput ;
1113import org .elasticsearch .common .xcontent .XContentBuilder ;
2426
2527public class InternalBoxplot extends InternalNumericMetricsAggregation .MultiValue implements Boxplot {
2628
29+ /**
30+ * This value is used in determining the width of the whiskers of the boxplot. After the IQR value is calculated, it gets multiplied
31+ * by this multiplier to decide how far out from the q1 and q3 points to extend the whiskers. The value of 1.5 is traditional.
32+ * See https://en.wikipedia.org/wiki/Box_plot
33+ */
34+ public static final double IQR_MULTIPLIER = 1.5 ;
35+
2736 enum Metrics {
37+ MIN {
38+ @ Override
39+ double value (InternalBoxplot boxplot ) {
40+ return boxplot .getMin ();
41+ }
42+
43+ @ Override
44+ double value (TDigestState state ) {
45+ return state == null ? Double .NEGATIVE_INFINITY : state .getMin ();
46+ }
47+ },
48+ MAX {
49+ @ Override
50+ double value (InternalBoxplot boxplot ) {
51+ return boxplot .getMax ();
52+ }
53+
54+ @ Override
55+ double value (TDigestState state ) {
56+ return state == null ? Double .POSITIVE_INFINITY : state .getMax ();
57+ }
58+ },
59+ Q1 {
60+ @ Override
61+ double value (InternalBoxplot boxplot ) {
62+ return boxplot .getQ1 ();
63+ }
64+
65+ @ Override
66+ double value (TDigestState state ) {
67+ return state == null ? Double .NaN : state .quantile (0.25 );
68+ }
69+ },
70+ Q2 {
71+ @ Override
72+ double value (InternalBoxplot boxplot ) {
73+ return boxplot .getQ2 ();
74+ }
75+
76+ @ Override
77+ double value (TDigestState state ) {
78+ return state == null ? Double .NaN : state .quantile (0.5 );
79+ }
80+ },
81+ Q3 {
82+ @ Override
83+ double value (InternalBoxplot boxplot ) {
84+ return boxplot .getQ3 ();
85+ }
86+
87+ @ Override
88+ double value (TDigestState state ) {
89+ return state == null ? Double .NaN : state .quantile (0.75 );
90+ }
91+ },
92+ LOWER {
93+ @ Override
94+ double value (InternalBoxplot boxplot ) {
95+ return whiskers (boxplot .state )[0 ];
96+ }
2897
29- MIN , MAX , Q1 , Q2 , Q3 ;
98+ @ Override
99+ double value (TDigestState state ) {
100+ return whiskers (state )[0 ];
101+ }
102+ },
103+ UPPER {
104+ @ Override
105+ double value (InternalBoxplot boxplot ) {
106+ return whiskers (boxplot .state )[1 ];
107+ }
108+
109+ @ Override
110+ double value (TDigestState state ) {
111+ return whiskers (state )[1 ];
112+ }
113+ };
30114
31115 public static Metrics resolve (String name ) {
32116 return Metrics .valueOf (name .toUpperCase (Locale .ROOT ));
@@ -36,39 +120,48 @@ public String value() {
36120 return name ().toLowerCase (Locale .ROOT );
37121 }
38122
39- double value (InternalBoxplot boxplot ) {
40- switch (this ) {
41- case MIN :
42- return boxplot .getMin ();
43- case MAX :
44- return boxplot .getMax ();
45- case Q1 :
46- return boxplot .getQ1 ();
47- case Q2 :
48- return boxplot .getQ2 ();
49- case Q3 :
50- return boxplot .getQ3 ();
51- default :
52- throw new IllegalArgumentException ("Unknown value [" + this .value () + "] in the boxplot aggregation" );
53- }
123+ abstract double value (InternalBoxplot boxplot );
124+
125+ abstract double value (TDigestState state );
126+ }
127+
128+ /**
129+ * For a given TDigest, find the "whisker" valeus, such that the upper whisker is (close to) the highest observed value less than
130+ * q3 + 1.5 * IQR and the lower whisker is (close to) the lowest observed value greater than q1 - 1.5 * IQR. Since we don't track
131+ * observed values directly, this function returns the centroid according to the above logic.
132+ *
133+ * @param state - an initialized TDigestState representing the observed data.
134+ * @return - two doubles in an array, where whiskers[0] is the lower whisker and whiskers[1] is the upper whisker.
135+ */
136+ public static double [] whiskers (TDigestState state ) {
137+ double [] results = new double [2 ];
138+ results [0 ] = Double .NaN ;
139+ results [1 ] = Double .NaN ;
140+ if (state == null ) {
141+ return results ;
54142 }
55143
56- double value (TDigestState state ) {
57- switch (this ) {
58- case MIN :
59- return state == null ? Double .NEGATIVE_INFINITY : state .getMin ();
60- case MAX :
61- return state == null ? Double .POSITIVE_INFINITY : state .getMax ();
62- case Q1 :
63- return state == null ? Double .NaN : state .quantile (0.25 );
64- case Q2 :
65- return state == null ? Double .NaN : state .quantile (0.5 );
66- case Q3 :
67- return state == null ? Double .NaN : state .quantile (0.75 );
68- default :
69- throw new IllegalArgumentException ("Unknown value [" + this .value () + "] in the boxplot aggregation" );
144+ double q3 = state .quantile (0.75 );
145+ double q1 = state .quantile (0.25 );
146+ double iqr = q3 - q1 ;
147+ double upper = q3 + (IQR_MULTIPLIER * iqr );
148+ double lower = q1 - (IQR_MULTIPLIER * iqr );
149+ Centroid prev = null ;
150+ // Does this iterate in ascending order? if not, we might need to sort...
151+ for (Centroid c : state .centroids ()) {
152+ if (Double .isNaN (results [0 ]) && c .mean () > lower ) {
153+ results [0 ] = c .mean ();
70154 }
155+ if (c .mean () > upper ) {
156+ results [1 ] = prev .mean ();
157+ break ;
158+ }
159+ prev = c ;
160+ }
161+ if (Double .isNaN (results [1 ])) {
162+ results [1 ] = state .getMax ();
71163 }
164+ return results ;
72165 }
73166
74167 public static List <String > metricNames = Stream .of (Metrics .values ())
@@ -188,17 +281,22 @@ public InternalBoxplot reduce(List<InternalAggregation> aggregations, ReduceCont
188281
189282 @ Override
190283 public XContentBuilder doXContentBody (XContentBuilder builder , Params params ) throws IOException {
284+ double [] whiskers = whiskers (state );
191285 builder .field ("min" , getMin ());
192286 builder .field ("max" , getMax ());
193287 builder .field ("q1" , getQ1 ());
194288 builder .field ("q2" , getQ2 ());
195289 builder .field ("q3" , getQ3 ());
290+ builder .field ("lower" , whiskers [0 ]);
291+ builder .field ("upper" , whiskers [1 ]);
196292 if (format != DocValueFormat .RAW ) {
197293 builder .field ("min_as_string" , format .format (getMin ()));
198294 builder .field ("max_as_string" , format .format (getMax ()));
199295 builder .field ("q1_as_string" , format .format (getQ1 ()));
200296 builder .field ("q2_as_string" , format .format (getQ2 ()));
201297 builder .field ("q3_as_string" , format .format (getQ3 ()));
298+ builder .field ("lower_as_string" , format .format (whiskers [0 ]));
299+ builder .field ("upper_as_string" , format .format (whiskers [1 ]));
202300 }
203301 return builder ;
204302 }
0 commit comments