Skip to content

Commit 3d9ca4b

Browse files
authored
SQL: Enable aggregations to create a separate bucket for missing values (#32832)
Enable aggregations to create a separate bucket for missing values.
1 parent c41c614 commit 3d9ca4b

File tree

9 files changed

+154
-15
lines changed

9 files changed

+154
-15
lines changed

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/GroupByColumnKey.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ public GroupByColumnKey(String id, String fieldName, Direction direction) {
2525
public TermsValuesSourceBuilder asValueSource() {
2626
return new TermsValuesSourceBuilder(id())
2727
.field(fieldName())
28-
.order(direction().asOrder());
28+
.order(direction().asOrder())
29+
.missingBucket(true);
2930
}
3031

3132
@Override

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/GroupByDateKey.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ public DateHistogramValuesSourceBuilder asValueSource() {
4444
return new DateHistogramValuesSourceBuilder(id())
4545
.field(fieldName())
4646
.dateHistogramInterval(new DateHistogramInterval(interval))
47-
.timeZone(DateTimeZone.forTimeZone(timeZone));
47+
.timeZone(DateTimeZone.forTimeZone(timeZone))
48+
.missingBucket(true);
4849
}
4950

5051
@Override

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/GroupByScriptKey.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ public ScriptTemplate script() {
3636
public TermsValuesSourceBuilder asValueSource() {
3737
TermsValuesSourceBuilder builder = new TermsValuesSourceBuilder(id())
3838
.script(script.toPainless())
39-
.order(direction().asOrder());
39+
.order(direction().asOrder())
40+
.missingBucket(true);
4041

4142
if (script.outputType().isNumeric()) {
4243
builder.valueType(ValueType.NUMBER);

x-pack/qa/sql/src/main/java/org/elasticsearch/xpack/qa/sql/jdbc/DataLoader.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,15 @@ protected static void loadDatasetIntoEs(RestClient client) throws Exception {
4242
}
4343

4444
protected static void loadEmpDatasetIntoEs(RestClient client) throws Exception {
45-
loadEmpDatasetIntoEs(client, "test_emp");
46-
loadEmpDatasetIntoEs(client, "test_emp_copy");
45+
loadEmpDatasetIntoEs(client, "test_emp", "employees");
46+
loadEmpDatasetIntoEs(client, "test_emp_copy", "employees");
47+
loadEmpDatasetIntoEs(client, "test_emp_with_nulls", "employees_with_nulls");
4748
makeAlias(client, "test_alias", "test_emp", "test_emp_copy");
4849
makeAlias(client, "test_alias_emp", "test_emp", "test_emp_copy");
4950
}
5051

5152
public static void loadDocsDatasetIntoEs(RestClient client) throws Exception {
52-
loadEmpDatasetIntoEs(client, "emp");
53+
loadEmpDatasetIntoEs(client, "emp", "employees");
5354
loadLibDatasetIntoEs(client, "library");
5455
makeAlias(client, "employees", "emp");
5556
}
@@ -62,7 +63,7 @@ private static void createString(String name, XContentBuilder builder) throws Ex
6263
.endObject();
6364
}
6465

65-
protected static void loadEmpDatasetIntoEs(RestClient client, String index) throws Exception {
66+
protected static void loadEmpDatasetIntoEs(RestClient client, String index, String fileName) throws Exception {
6667
Request request = new Request("PUT", "/" + index);
6768
XContentBuilder createIndex = JsonXContent.contentBuilder().startObject();
6869
createIndex.startObject("settings");
@@ -129,15 +130,18 @@ protected static void loadEmpDatasetIntoEs(RestClient client, String index) thro
129130
request = new Request("POST", "/" + index + "/emp/_bulk");
130131
request.addParameter("refresh", "true");
131132
StringBuilder bulk = new StringBuilder();
132-
csvToLines("employees", (titles, fields) -> {
133+
csvToLines(fileName, (titles, fields) -> {
133134
bulk.append("{\"index\":{}}\n");
134135
bulk.append('{');
135136
String emp_no = fields.get(1);
136137
for (int f = 0; f < fields.size(); f++) {
137-
if (f != 0) {
138-
bulk.append(',');
138+
// an empty value in the csv file is treated as 'null', thus skipping it in the bulk request
139+
if (fields.get(f).trim().length() > 0) {
140+
if (f != 0) {
141+
bulk.append(',');
142+
}
143+
bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
139144
}
140-
bulk.append('"').append(titles.get(f)).append("\":\"").append(fields.get(f)).append('"');
141145
}
142146
// append department
143147
List<List<String>> list = dep_emp.get(emp_no);

x-pack/qa/sql/src/main/java/org/elasticsearch/xpack/qa/sql/jdbc/SqlSpecTestCase.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ public abstract class SqlSpecTestCase extends SpecBaseIntegrationTestCase {
2525
private String query;
2626

2727
@ClassRule
28-
public static LocalH2 H2 = new LocalH2((c) -> c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'"));
28+
public static LocalH2 H2 = new LocalH2((c) -> {
29+
c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp.sql'");
30+
c.createStatement().execute("RUNSCRIPT FROM 'classpath:/setup_test_emp_with_nulls.sql'");
31+
});
2932

3033
@ParametersFactory(argumentFormatting = PARAM_FORMATTING)
3134
public static List<Object[]> readScriptSpec() throws Exception {
@@ -39,6 +42,7 @@ public static List<Object[]> readScriptSpec() throws Exception {
3942
tests.addAll(readScriptSpec("/arithmetic.sql-spec", parser));
4043
tests.addAll(readScriptSpec("/string-functions.sql-spec", parser));
4144
tests.addAll(readScriptSpec("/case-functions.sql-spec", parser));
45+
tests.addAll(readScriptSpec("/agg_nulls.sql-spec", parser));
4246
return tests;
4347
}
4448

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
selectGenderWithNullsAndGroupByGender
2+
SELECT gender, COUNT(*) count FROM test_emp_with_nulls GROUP BY gender ORDER BY gender;
3+
selectFirstNameWithNullsAndGroupByFirstName
4+
SELECT first_name FROM test_emp_with_nulls GROUP BY first_name ORDER BY first_name;
5+
selectCountWhereIsNull
6+
SELECT COUNT(*) count FROM test_emp_with_nulls WHERE first_name IS NULL;
7+
selectLanguagesCountWithNullsAndGroupByLanguage
8+
SELECT languages l, COUNT(*) c FROM test_emp_with_nulls GROUP BY languages ORDER BY languages;
9+
selectHireDateGroupByHireDate
10+
SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
11+
selectHireDateGroupByHireDate
12+
SELECT hire_date HD, COUNT(*) c FROM test_emp_with_nulls GROUP BY hire_date ORDER BY hire_date DESC;
13+
selectSalaryGroupBySalary
14+
SELECT salary, COUNT(*) c FROM test_emp_with_nulls GROUP BY salary ORDER BY salary DESC;

x-pack/qa/sql/src/main/resources/alias.csv-spec

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ test_alias | ALIAS
8686
test_alias_emp | ALIAS
8787
test_emp | BASE TABLE
8888
test_emp_copy | BASE TABLE
89+
test_emp_with_nulls | BASE TABLE
8990
;
9091

9192
testGroupByOnAlias
@@ -98,10 +99,10 @@ F | 10099.28
9899
;
99100

100101
testGroupByOnPattern
101-
SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* GROUP BY gender;
102+
SELECT gender, PERCENTILE(emp_no, 97) p1 FROM test_* WHERE gender is NOT NULL GROUP BY gender;
102103

103104
gender:s | p1:d
104105

105-
F | 10099.28
106-
M | 10095.75
106+
F | 10099.32
107+
M | 10095.98
107108
;
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
birth_date,emp_no,first_name,gender,hire_date,languages,last_name,salary
2+
1953-09-02T00:00:00Z,10001,Georgi,,1986-06-26T00:00:00Z,2,Facello,57305
3+
1964-06-02T00:00:00Z,10002,Bezalel,,1985-11-21T00:00:00Z,5,Simmel,56371
4+
1959-12-03T00:00:00Z,10003,Parto,,1986-08-28T00:00:00Z,4,Bamford,61805
5+
1954-05-01T00:00:00Z,10004,Chirstian,,1986-12-01T00:00:00Z,5,Koblick,36174
6+
1955-01-21T00:00:00Z,10005,Kyoichi,,1989-09-12T00:00:00Z,1,Maliniak,63528
7+
1953-04-20T00:00:00Z,10006,Anneke,,1989-06-02T00:00:00Z,3,Preusig,60335
8+
1957-05-23T00:00:00Z,10007,Tzvetan,,1989-02-10T00:00:00Z,4,Zielinski,74572
9+
1958-02-19T00:00:00Z,10008,Saniya,,1994-09-15T00:00:00Z,2,Kalloufi,43906
10+
1952-04-19T00:00:00Z,10009,Sumant,,1985-02-18T00:00:00Z,1,Peac,66174
11+
1963-06-01T00:00:00Z,10010,Duangkaew,,1989-08-24T00:00:00Z,4,Piveteau,45797
12+
1953-11-07T00:00:00Z,10011,Mary,F,1990-01-22T00:00:00Z,5,Sluis,31120
13+
1960-10-04T00:00:00Z,10012,Patricio,M,1992-12-18T00:00:00Z,5,Bridgland,48942
14+
1963-06-07T00:00:00Z,10013,Eberhardt,M,1985-10-20T00:00:00Z,1,Terkki,48735
15+
1956-02-12T00:00:00Z,10014,Berni,M,1987-03-11T00:00:00Z,5,Genin,37137
16+
1959-08-19T00:00:00Z,10015,Guoxiang,M,1987-07-02T00:00:00Z,5,Nooteboom,25324
17+
1961-05-02T00:00:00Z,10016,Kazuhito,M,1995-01-27T00:00:00Z,2,Cappelletti,61358
18+
1958-07-06T00:00:00Z,10017,Cristinel,F,1993-08-03T00:00:00Z,2,Bouloucos,58715
19+
1954-06-19T00:00:00Z,10018,Kazuhide,F,1993-08-03T00:00:00Z,2,Peha,56760
20+
1953-01-23T00:00:00Z,10019,Lillian,M,1993-08-03T00:00:00Z,1,Haddadi,73717
21+
1952-12-24T00:00:00Z,10020,,M,1991-01-26T00:00:00Z,3,Warwick,40031
22+
1960-02-20T00:00:00Z,10021,,M,1989-12-17T00:00:00Z,5,Erde,60408
23+
1952-07-08T00:00:00Z,10022,,M,1995-08-22T00:00:00Z,3,Famili,48233
24+
1953-09-29T00:00:00Z,10023,,F,1989-12-17T00:00:00Z,2,Montemayor,47896
25+
1958-09-05T00:00:00Z,10024,,F,1997-05-19T00:00:00Z,3,Pettey,64675
26+
1958-10-31T00:00:00Z,10025,Prasadram,M,1987-08-17T00:00:00Z,5,Heyers,47411
27+
1953-04-03T00:00:00Z,10026,Yongqiao,M,1995-03-20T00:00:00Z,3,Berztiss,28336
28+
1962-07-10T00:00:00Z,10027,Divier,F,1989-07-07T00:00:00Z,5,Reistad,73851
29+
1963-11-26T00:00:00Z,10028,Domenick,M,1991-10-22T00:00:00Z,1,Tempesti,39356
30+
1956-12-13T00:00:00Z,10029,Otmar,M,1985-11-20T00:00:00Z,,Herbst,74999
31+
1958-07-14T00:00:00Z,10030,Elvis,M,1994-02-17T00:00:00Z,,Demeyer,67492
32+
1959-01-27T00:00:00Z,10031,Karsten,M,1994-02-17T00:00:00Z,,Joslin,37716
33+
1960-08-09T00:00:00Z,10032,Jeong,F,1990-06-20T00:00:00Z,,Reistad,62233
34+
1956-11-14T00:00:00Z,10033,Arif,M,1987-03-18T00:00:00Z,,Merlo,70011
35+
1962-12-29T00:00:00Z,10034,Bader,M,1988-09-05T00:00:00Z,,Swan,39878
36+
1953-02-08T00:00:00Z,10035,Alain,M,1988-09-05T00:00:00Z,,Chappelet,25945
37+
1959-08-10T00:00:00Z,10036,Adamantios,M,1992-01-03T00:00:00Z,,Portugali,60781
38+
1963-07-22T00:00:00Z,10037,Pradeep,M,1990-12-05T00:00:00Z,,Makrucki,37691
39+
1960-07-20T00:00:00Z,10038,Huan,M,1989-09-20T00:00:00Z,,Lortz,35222
40+
1959-10-01T00:00:00Z,10039,Alejandro,M,1988-01-19T00:00:00Z,,Brender,36051
41+
1959-09-13T00:00:00Z,10040,Weiyi,F,1993-02-14T00:00:00Z,,Meriste,37112
42+
1959-08-27T00:00:00Z,10041,Uri,F,1989-11-12T00:00:00Z,1,Lenart,56415
43+
1956-02-26T00:00:00Z,10042,Magy,F,1993-03-21T00:00:00Z,3,Stamatiou,30404
44+
1960-09-19T00:00:00Z,10043,Yishay,M,1990-10-20T00:00:00Z,1,Tzvieli,34341
45+
1961-09-21T00:00:00Z,10044,Mingsen,F,1994-05-21T00:00:00Z,1,Casley,39728
46+
1957-08-14T00:00:00Z,10045,Moss,M,1989-09-02T00:00:00Z,3,Shanbhogue,74970
47+
1960-07-23T00:00:00Z,10046,Lucien,M,1992-06-20T00:00:00Z,4,Rosenbaum,50064
48+
1952-06-29T00:00:00Z,10047,Zvonko,M,1989-03-31T00:00:00Z,4,Nyanchama,42716
49+
1963-07-11T00:00:00Z,10048,Florian,M,1985-02-24T00:00:00Z,3,Syrotiuk,26436
50+
1961-04-24T00:00:00Z,10049,Basil,F,1992-05-04T00:00:00Z,5,Tramer,37853
51+
1958-05-21T00:00:00Z,10050,Yinghua,M,1990-12-25T00:00:00Z,2,Dredge,43026
52+
1953-07-28T00:00:00Z,10051,Hidefumi,M,1992-10-15T00:00:00Z,3,Caine,58121
53+
1961-02-26T00:00:00Z,10052,Heping,M,1988-05-21T00:00:00Z,1,Nitsch,55360
54+
1954-09-13T00:00:00Z,10053,Sanjiv,F,1986-02-04T00:00:00Z,3,Zschoche,54462
55+
1957-04-04T00:00:00Z,10054,Mayumi,M,1995-03-13T00:00:00Z,4,Schueller,65367
56+
1956-06-06T00:00:00Z,10055,Georgy,M,1992-04-27T00:00:00Z,5,Dredge,49281
57+
1961-09-01T00:00:00Z,10056,Brendon,F,1990-02-01T00:00:00Z,2,Bernini,33370
58+
1954-05-30T00:00:00Z,10057,Ebbe,F,1992-01-15T00:00:00Z,4,Callaway,27215
59+
1954-10-01T00:00:00Z,10058,Berhard,M,1987-04-13T00:00:00Z,3,McFarlin,38376
60+
1953-09-19T00:00:00Z,10059,Alejandro,F,1991-06-26T00:00:00Z,2,McAlpine,44307
61+
1961-10-15T00:00:00Z,10060,Breannda,M,1987-11-02T00:00:00Z,2,Billingsley,29175
62+
1962-10-19T00:00:00Z,10061,Tse,M,1985-09-17T00:00:00Z,1,Herber,49095
63+
1961-11-02T00:00:00Z,10062,Anoosh,M,1991-08-30T00:00:00Z,3,Peyn,65030
64+
1952-08-06T00:00:00Z,10063,Gino,F,1989-04-08T00:00:00Z,3,Leonhardt,52121
65+
1959-04-07T00:00:00Z,10064,Udi,M,1985-11-20T00:00:00Z,5,Jansch,33956
66+
1963-04-14T00:00:00Z,10065,Satosi,M,1988-05-18T00:00:00Z,2,Awdeh,50249
67+
1952-11-13T00:00:00Z,10066,Kwee,M,1986-02-26T00:00:00Z,5,Schusler,31897
68+
1953-01-07T00:00:00Z,10067,Claudi,M,1987-03-04T00:00:00Z,2,Stavenow,52044
69+
1962-11-26T00:00:00Z,10068,Charlene,M,1987-08-07T00:00:00Z,3,Brattka,28941
70+
1960-09-06T00:00:00Z,10069,Margareta,F,1989-11-05T00:00:00Z,5,Bierman,41933
71+
1955-08-20T00:00:00Z,10070,Reuven,M,1985-10-14T00:00:00Z,3,Garigliano,54329
72+
1958-01-21T00:00:00Z,10071,Hisao,M,1987-10-01T00:00:00Z,2,Lipner,40612
73+
1952-05-15T00:00:00Z,10072,Hironoby,F,1988-07-21T00:00:00Z,5,Sidou,54518
74+
1954-02-23T00:00:00Z,10073,Shir,M,1991-12-01T00:00:00Z,4,McClurg,32568
75+
1955-08-28T00:00:00Z,10074,Mokhtar,F,1990-08-13T00:00:00Z,5,Bernatsky,38992
76+
1960-03-09T00:00:00Z,10075,Gao,F,1987-03-19T00:00:00Z,5,Dolinsky,51956
77+
1952-06-13T00:00:00Z,10076,Erez,F,1985-07-09T00:00:00Z,3,Ritzmann,62405
78+
1964-04-18T00:00:00Z,10077,Mona,M,1990-03-02T00:00:00Z,5,Azuma,46595
79+
1959-12-25T00:00:00Z,10078,Danel,F,1987-05-26T00:00:00Z,2,Mondadori,69904
80+
1961-10-05T00:00:00Z,10079,Kshitij,F,1986-03-27T00:00:00Z,2,Gils,32263
81+
1957-12-03T00:00:00Z,10080,Premal,M,1985-11-19T00:00:00Z,5,Baek,52833
82+
1960-12-17T00:00:00Z,10081,Zhongwei,M,1986-10-30T00:00:00Z,2,Rosen,50128
83+
1963-09-09T00:00:00Z,10082,Parviz,M,1990-01-03T00:00:00Z,4,Lortz,49818
84+
1959-07-23T00:00:00Z,10083,Vishv,M,1987-03-31T00:00:00Z,1,Zockler,
85+
1960-05-25T00:00:00Z,10084,Tuval,M,1995-12-15T00:00:00Z,1,Kalloufi,
86+
1962-11-07T00:00:00Z,10085,Kenroku,M,1994-04-09T00:00:00Z,5,Malabarba,
87+
1962-11-19T00:00:00Z,10086,Somnath,M,1990-02-16T00:00:00Z,1,Foote,
88+
1959-07-23T00:00:00Z,10087,Xinglin,F,1986-09-08T00:00:00Z,5,Eugenio,
89+
1954-02-25T00:00:00Z,10088,Jungsoon,F,1988-09-02T00:00:00Z,5,Syrzycki,
90+
1963-03-21T00:00:00Z,10089,Sudharsan,F,1986-08-12T00:00:00Z,4,Flasterstein,
91+
1961-05-30T00:00:00Z,10090,Kendra,M,1986-03-14T00:00:00Z,2,Hofting,44956
92+
1955-10-04T00:00:00Z,10091,Amabile,M,1992-11-18T00:00:00Z,3,Gomatam,38645
93+
1964-10-18T00:00:00Z,10092,Valdiodio,F,1989-09-22T00:00:00Z,1,Niizuma,25976
94+
1964-06-11T00:00:00Z,10093,Sailaja,M,1996-11-05T00:00:00Z,3,Desikan,45656
95+
1957-05-25T00:00:00Z,10094,Arumugam,F,1987-04-18T00:00:00Z,5,Ossenbruggen,66817
96+
1965-01-03T00:00:00Z,10095,Hilari,M,1986-07-15T00:00:00Z,4,Morton,37702
97+
1954-09-16T00:00:00Z,10096,Jayson,M,1990-01-14T00:00:00Z,4,Mandell,43889
98+
1952-02-27T00:00:00Z,10097,Remzi,M,1990-09-15T00:00:00Z,3,Waschkowski,71165
99+
1961-09-23T00:00:00Z,10098,Sreekrishna,F,1985-05-13T00:00:00Z,4,Servieres,44817
100+
1956-05-25T00:00:00Z,10099,Valter,F,1988-10-18T00:00:00Z,2,Sullins,73578
101+
1953-04-21T00:00:00Z,10100,Hironobu,F,1987-09-21T00:00:00Z,4,Haraldson,68431
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
DROP TABLE IF EXISTS "test_emp_with_nulls";
2+
CREATE TABLE "test_emp_with_nulls" (
3+
"birth_date" TIMESTAMP WITH TIME ZONE,
4+
"emp_no" INT,
5+
"first_name" VARCHAR(50),
6+
"gender" VARCHAR(1),
7+
"hire_date" TIMESTAMP WITH TIME ZONE,
8+
"languages" TINYINT,
9+
"last_name" VARCHAR(50),
10+
"salary" INT
11+
)
12+
AS SELECT * FROM CSVREAD('classpath:/employees_with_nulls.csv');

0 commit comments

Comments
 (0)