Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/reference/sql/functions/grouping.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ actually used will be `INTERVAL '2' DAY`. If the interval specified is less than

[IMPORTANT]
All intervals specified for a date/time HISTOGRAM will use a <<search-aggregations-bucket-datehistogram-aggregation,fixed interval>>
in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR` AND `INTERVAL '1' MONTH` where a calendar interval is used.
The choice for a calendar interval was made for having a more intuitive result for YEAR and MONTH groupings. In the case of YEAR, for example, the calendar intervals consider a one year
in their `date_histogram` aggregation definition, with the notable exceptions of `INTERVAL '1' YEAR`, `INTERVAL '1' MONTH` and `INTERVAL '1' DAY` where a calendar interval is used.
The choice for a calendar interval was made for having a more intuitive result for YEAR, MONTH and DAY groupings. In the case of YEAR, for example, the calendar intervals consider a one year
bucket as the one starting on January 1st that specific year, whereas a fixed interval one-year-bucket considers one year as a number
of milliseconds (for example, `31536000000ms` corresponding to 365 days, 24 hours per day, 60 minutes per hour etc.). With fixed intervals,
the day of February 5th, 2019 for example, belongs to a bucket that starts on December 20th, 2018 and {es} (and implicitly {es-sql}) would
Expand Down
24 changes: 24 additions & 0 deletions x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,30 @@ null |10 |null
1953-11-01T00:00:00.000Z|1 |1953-11-07T00:00:00.000Z
;

histogramOneDay
schema::h:ts|c:l|birth_date:ts
SELECT HISTOGRAM(birth_date, INTERVAL 1 DAY) AS h, COUNT(*) as c, birth_date FROM test_emp WHERE YEAR(birth_date) BETWEEN 1959 AND 1960 GROUP BY h, birth_date ORDER BY h ASC;

h | c | birth_date
------------------------+---------------+------------------------
1959-01-27T00:00:00.000Z|1 |1959-01-27T00:00:00.000Z
1959-04-07T00:00:00.000Z|1 |1959-04-07T00:00:00.000Z
1959-07-23T00:00:00.000Z|2 |1959-07-23T00:00:00.000Z
1959-08-10T00:00:00.000Z|1 |1959-08-10T00:00:00.000Z
1959-08-19T00:00:00.000Z|1 |1959-08-19T00:00:00.000Z
1959-10-01T00:00:00.000Z|1 |1959-10-01T00:00:00.000Z
1959-12-03T00:00:00.000Z|1 |1959-12-03T00:00:00.000Z
1959-12-25T00:00:00.000Z|1 |1959-12-25T00:00:00.000Z
1960-02-20T00:00:00.000Z|1 |1960-02-20T00:00:00.000Z
1960-03-09T00:00:00.000Z|1 |1960-03-09T00:00:00.000Z
1960-05-25T00:00:00.000Z|1 |1960-05-25T00:00:00.000Z
1960-07-20T00:00:00.000Z|1 |1960-07-20T00:00:00.000Z
1960-08-09T00:00:00.000Z|1 |1960-08-09T00:00:00.000Z
1960-09-06T00:00:00.000Z|1 |1960-09-06T00:00:00.000Z
1960-10-04T00:00:00.000Z|1 |1960-10-04T00:00:00.000Z
1960-12-17T00:00:00.000Z|1 |1960-12-17T00:00:00.000Z
;

histogramDateTimeWithMonthOnTop
schema::h:i|c:l
SELECT HISTOGRAM(MONTH(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public class Histogram extends GroupingFunction {
private final ZoneId zoneId;
public static String YEAR_INTERVAL = DateHistogramInterval.YEAR.toString();
public static String MONTH_INTERVAL = DateHistogramInterval.MONTH.toString();
public static String DAY_INTERVAL = DateHistogramInterval.DAY.toString();

public Histogram(Source source, Expression field, Expression interval, ZoneId zoneId) {
super(source, field, Collections.singletonList(interval));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits;
import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram;
import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DateTimeHistogramFunction;
import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalDayTime;
import org.elasticsearch.xpack.sql.expression.literal.interval.IntervalYearMonth;
import org.elasticsearch.xpack.sql.expression.literal.interval.Intervals;
import org.elasticsearch.xpack.sql.plan.logical.Pivot;
Expand Down Expand Up @@ -80,6 +81,7 @@
import org.elasticsearch.xpack.sql.util.Check;
import org.elasticsearch.xpack.sql.util.DateUtils;

import java.time.Duration;
import java.time.Period;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -90,6 +92,7 @@
import java.util.concurrent.atomic.AtomicReference;

import static org.elasticsearch.xpack.ql.util.CollectionUtils.combine;
import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.DAY_INTERVAL;
import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.MONTH_INTERVAL;
import static org.elasticsearch.xpack.sql.expression.function.grouping.Histogram.YEAR_INTERVAL;
import static org.elasticsearch.xpack.sql.planner.QueryTranslator.toAgg;
Expand Down Expand Up @@ -332,14 +335,24 @@ else if (exp instanceof GroupingFunction) {

// When the histogram is `INTERVAL '1' YEAR` or `INTERVAL '1' MONTH`, the interval used in
// the ES date_histogram will be a calendar_interval with value "1y" or "1M" respectively.
// All other intervals will be fixed_intervals expressed in ms.
if (field instanceof FieldAttribute) {
key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), calendarInterval, h.zoneId());
} else if (field instanceof Function) {
key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), calendarInterval, h.zoneId());
}
}
// typical interval
// interval of exactly 1 day
else if (value instanceof IntervalDayTime
&& ((IntervalDayTime) value).interval().equals(Duration.ofDays(1))) {
// When the histogram is `INTERVAL '1' DAY` the interval used in
// the ES date_histogram will be a calendar_interval with value "1d"
if (field instanceof FieldAttribute) {
key = new GroupByDateHistogram(aggId, QueryTranslator.nameOf(field), DAY_INTERVAL, h.zoneId());
} else if (field instanceof Function) {
key = new GroupByDateHistogram(aggId, ((Function) field).asScript(), DAY_INTERVAL, h.zoneId());
}
}
// All other intervals will be fixed_intervals expressed in ms.
else {
long intervalAsMillis = Intervals.inMillis(h.interval());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,45 @@ public void testGroupByMoreMonthsHistogramQueryTranslator() {
+ "\"fixed_interval\":\"12960000000ms\",\"time_zone\":\"Z\"}}}]}}}"));
}

public void testGroupByOneDayHistogramQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL 1 DAY) AS h FROM test GROUP BY h");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("h", eqe.output().get(0).qualifiedName());
assertEquals(DATETIME, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
+ "\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}"));
}

public void testGroupByMoreDaysHistogramQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date, INTERVAL '1 5' DAY TO HOUR) AS h FROM test GROUP BY h");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("h", eqe.output().get(0).qualifiedName());
assertEquals(DATETIME, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
endsWith("\"date_histogram\":{\"field\":\"date\",\"missing_bucket\":true,\"value_type\":\"date\",\"order\":\"asc\","
+ "\"fixed_interval\":\"104400000ms\",\"time_zone\":\"Z\"}}}]}}}"));
}

public void testGroupByMoreDaysHistogram_WithFunction_QueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT HISTOGRAM(date + INTERVAL 5 DAYS, INTERVAL 1 DAY) AS h FROM test GROUP BY h");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("h", eqe.output().get(0).qualifiedName());
assertEquals(DATETIME, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.add(" +
"InternalSqlScriptUtils.docValue(doc,params.v0),InternalSqlScriptUtils.intervalDayTime(params.v1,params.v2))\"," +
"\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"PT120H\",\"v2\":\"INTERVAL_DAY\"}}," +
"\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," +
"\"calendar_interval\":\"1d\",\"time_zone\":\"Z\"}}}]}}}"));
}

public void testGroupByYearAndScalarsQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");
assertEquals(EsQueryExec.class, p.getClass());
Expand Down