Skip to content

Commit 2b35157

Browse files
authored
SQL: Add multi_value_field_leniency inside FieldHitExtractor (#40113)
For cases where fields can have multi values, allow the behavior to be customized through a dedicated configuration field. By default this will be enabled on the drivers so that existing datasets work instead of throwing an exception. For regular SQL usage, the behavior is false so that the user is aware of the underlying data. Fix #39700
1 parent ad90055 commit 2b35157

File tree

27 files changed

+278
-116
lines changed

27 files changed

+278
-116
lines changed

docs/reference/sql/endpoints/jdbc.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ Query timeout (in seconds). That is the maximum amount of time waiting for a que
122122

123123
`proxy.socks`:: SOCKS proxy host name
124124

125+
[float]
126+
==== Mapping
127+
`field.multi.value.leniency` (default `true`):: Whether to be lenient and return the first value for fields with multiple values (true) or throw an exception.
128+
125129
[float]
126130
==== Additional
127131

docs/reference/sql/endpoints/rest.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,10 @@ More information available https://docs.oracle.com/javase/8/docs/api/java/time/Z
356356
|false
357357
|Return the results in a columnar fashion, rather than row-based fashion. Valid for `json`, `yaml`, `cbor` and `smile`.
358358

359+
|field_multi_value_leniency
360+
|false
361+
|Throw an exception when encountering multiple values for a field (default) or be lenient and return the first value from the list (without any guarantees of what that will be - typically the first in natural ascending order).
362+
359363
|===
360364

361365
Do note that most parameters (outside the timeout and `columnar` ones) make sense only during the initial query - any follow-up pagination request only requires the `cursor` parameter as explained in the <<sql-pagination, pagination>> chapter.

docs/reference/sql/limitations.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ pagination taking place on the **root nested document and not on its inner hits*
6060

6161
Array fields are not supported due to the "invisible" way in which {es} handles an array of values: the mapping doesn't indicate whether
6262
a field is an array (has multiple values) or not, so without reading all the data, {es-sql} cannot know whether a field is a single or multi value.
63+
When multiple values are returned for a field, by default, {es-sql} will throw an exception. However, it is possible to change this behavior through `field_multi_value_leniency` parameter in REST (disabled by default) or
64+
`field.multi.value.leniency` in drivers (enabled by default).
6365

6466
[float]
6567
=== Sorting by aggregation

x-pack/plugin/sql/jdbc/src/main/java/org/elasticsearch/xpack/sql/jdbc/JdbcConfiguration.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,25 @@ class JdbcConfiguration extends ConnectionConfiguration {
4747
// can be out/err/url
4848
static final String DEBUG_OUTPUT_DEFAULT = "err";
4949

50-
public static final String TIME_ZONE = "timezone";
50+
static final String TIME_ZONE = "timezone";
5151
// follow the JDBC spec and use the JVM default...
5252
// to avoid inconsistency, the default is picked up once at startup and reused across connections
5353
// to cater to the principle of least surprise
5454
// really, the way to move forward is to specify a calendar or the timezone manually
5555
static final String TIME_ZONE_DEFAULT = TimeZone.getDefault().getID();
5656

57+
static final String FIELD_MULTI_VALUE_LENIENCY = "field.multi.value.leniency";
58+
static final String FIELD_MULTI_VALUE_LENIENCY_DEFAULT = "true";
59+
60+
5761
// options that don't change at runtime
58-
private static final Set<String> OPTION_NAMES = new LinkedHashSet<>(Arrays.asList(TIME_ZONE, DEBUG, DEBUG_OUTPUT));
62+
private static final Set<String> OPTION_NAMES = new LinkedHashSet<>(
63+
Arrays.asList(TIME_ZONE, FIELD_MULTI_VALUE_LENIENCY, DEBUG, DEBUG_OUTPUT));
5964

6065
static {
6166
// trigger version initialization
6267
// typically this should have already happened but in case the
63-
// JdbcDriver/JdbcDataSource are not used and the impl. classes used directly
68+
// EsDriver/EsDataSource are not used and the impl. classes used directly
6469
// this covers that case
6570
Version.CURRENT.toString();
6671
}
@@ -71,6 +76,7 @@ class JdbcConfiguration extends ConnectionConfiguration {
7176

7277
// mutable ones
7378
private ZoneId zoneId;
79+
private boolean fieldMultiValueLeniency;
7480

7581
public static JdbcConfiguration create(String u, Properties props, int loginTimeoutSeconds) throws JdbcSQLException {
7682
URI uri = parseUrl(u);
@@ -151,6 +157,8 @@ private JdbcConfiguration(URI baseURI, String u, Properties props) throws JdbcSQ
151157

152158
this.zoneId = parseValue(TIME_ZONE, props.getProperty(TIME_ZONE, TIME_ZONE_DEFAULT),
153159
s -> TimeZone.getTimeZone(s).toZoneId().normalized());
160+
this.fieldMultiValueLeniency = parseValue(FIELD_MULTI_VALUE_LENIENCY,
161+
props.getProperty(FIELD_MULTI_VALUE_LENIENCY, FIELD_MULTI_VALUE_LENIENCY_DEFAULT), Boolean::parseBoolean);
154162
}
155163

156164
@Override
@@ -174,6 +182,10 @@ public TimeZone timeZone() {
174182
return zoneId != null ? TimeZone.getTimeZone(zoneId) : null;
175183
}
176184

185+
public boolean fieldMultiValueLeniency() {
186+
return fieldMultiValueLeniency;
187+
}
188+
177189
public static boolean canAccept(String url) {
178190
return (StringUtils.hasText(url) && url.trim().startsWith(JdbcConfiguration.URL_PREFIX));
179191
}

x-pack/plugin/sql/jdbc/src/main/java/org/elasticsearch/xpack/sql/jdbc/JdbcHttpClient.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,15 @@ boolean ping(long timeoutInMs) throws SQLException {
4949

5050
Cursor query(String sql, List<SqlTypedParamValue> params, RequestMeta meta) throws SQLException {
5151
int fetch = meta.fetchSize() > 0 ? meta.fetchSize() : conCfg.pageSize();
52-
SqlQueryRequest sqlRequest = new SqlQueryRequest(sql, params, null, conCfg.zoneId(),
52+
SqlQueryRequest sqlRequest = new SqlQueryRequest(sql, params, conCfg.zoneId(),
5353
fetch,
54-
TimeValue.timeValueMillis(meta.timeoutInMs()), TimeValue.timeValueMillis(meta.queryTimeoutInMs()),
55-
false, new RequestInfo(Mode.JDBC));
54+
TimeValue.timeValueMillis(meta.timeoutInMs()),
55+
TimeValue.timeValueMillis(meta.queryTimeoutInMs()),
56+
null,
57+
Boolean.FALSE,
58+
null,
59+
new RequestInfo(Mode.JDBC),
60+
conCfg.fieldMultiValueLeniency());
5661
SqlQueryResponse response = httpClient.query(sqlRequest);
5762
return new DefaultCursor(this, response.cursor(), toJdbcColumnInfo(response.columns()), response.rows(), meta);
5863
}

x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/JdbcIntegrationTestCase.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,33 @@ public static String elasticsearchAddress() {
5050
}
5151

5252
public Connection esJdbc() throws SQLException {
53-
return randomBoolean() ? useDriverManager() : useDataSource();
53+
return esJdbc(connectionProperties());
5454
}
5555

56+
public Connection esJdbc(Properties props) throws SQLException {
57+
return createConnection(props);
58+
}
59+
60+
protected Connection createConnection(Properties connectionProperties) throws SQLException {
61+
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
62+
String address = "jdbc:es://" + elasticsearchAddress;
63+
Connection connection = null;
64+
if (randomBoolean()) {
65+
connection = DriverManager.getConnection(address, connectionProperties);
66+
} else {
67+
EsDataSource dataSource = new EsDataSource();
68+
dataSource.setUrl(address);
69+
dataSource.setProperties(connectionProperties);
70+
connection = dataSource.getConnection();
71+
}
72+
73+
assertNotNull("The timezone should be specified", connectionProperties.getProperty("timezone"));
74+
return connection;
75+
}
76+
77+
//
78+
// methods below are used inside the documentation only
79+
//
5680
protected Connection useDriverManager() throws SQLException {
5781
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
5882
// tag::connect-dm
@@ -114,6 +138,8 @@ protected String clusterName() {
114138
protected Properties connectionProperties() {
115139
Properties connectionProperties = new Properties();
116140
connectionProperties.put("timezone", randomKnownTimeZone());
141+
// in the tests, don't be lenient towards multi values
142+
connectionProperties.put("field.multi.value.leniency", "false");
117143
return connectionProperties;
118144
}
119145

x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/ResultSetTestCase.java

Lines changed: 72 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
import org.elasticsearch.common.CheckedBiFunction;
1010
import org.elasticsearch.common.CheckedConsumer;
1111
import org.elasticsearch.common.CheckedFunction;
12+
import org.elasticsearch.common.CheckedSupplier;
1213
import org.elasticsearch.common.Strings;
1314
import org.elasticsearch.common.collect.Tuple;
1415
import org.elasticsearch.common.xcontent.XContentBuilder;
1516
import org.elasticsearch.common.xcontent.json.JsonXContent;
16-
import org.elasticsearch.xpack.sql.jdbc.EsDataSource;
1717
import org.elasticsearch.xpack.sql.jdbc.EsType;
1818

1919
import java.io.IOException;
@@ -22,7 +22,6 @@
2222
import java.sql.Blob;
2323
import java.sql.Clob;
2424
import java.sql.Connection;
25-
import java.sql.DriverManager;
2625
import java.sql.NClob;
2726
import java.sql.PreparedStatement;
2827
import java.sql.ResultSet;
@@ -80,6 +79,34 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
8079
dateTimeTestingFields.put(new Tuple<String, Object>("test_keyword", "true"), EsType.KEYWORD);
8180
}
8281

82+
public void testMultiValueFieldWithMultiValueLeniencyEnabled() throws Exception {
83+
createTestDataForMultiValueTests();
84+
85+
doWithQuery(() -> esWithLeniency(true), "SELECT int, keyword FROM test", (results) -> {
86+
results.next();
87+
Object number = results.getObject(1);
88+
Object string = results.getObject(2);
89+
assertEquals(-10, number);
90+
assertEquals("-10", string);
91+
assertFalse(results.next());
92+
});
93+
}
94+
95+
public void testMultiValueFieldWithMultiValueLeniencyDisabled() throws Exception {
96+
createTestDataForMultiValueTests();
97+
98+
SQLException expected = expectThrows(SQLException.class,
99+
() -> doWithQuery(() -> esWithLeniency(false), "SELECT int, keyword FROM test", (results) -> {
100+
}));
101+
assertTrue(expected.getMessage().contains("Arrays (returned by [int]) are not supported"));
102+
103+
// default has multi value disabled
104+
expected = expectThrows(SQLException.class,
105+
() -> doWithQuery(() -> esJdbc(), "SELECT int, keyword FROM test", (results) -> {
106+
}));
107+
108+
}
109+
83110
// Byte values testing
84111
public void testGettingValidByteWithoutCasting() throws Exception {
85112
byte random1 = randomByte();
@@ -1132,7 +1159,7 @@ public void testValidGetObjectCalls() throws Exception {
11321159
/*
11331160
* Checks StackOverflowError fix for https://github.com/elastic/elasticsearch/pull/31735
11341161
*/
1135-
public void testNoInfiniteRecursiveGetObjectCalls() throws SQLException, IOException {
1162+
public void testNoInfiniteRecursiveGetObjectCalls() throws Exception {
11361163
index("library", "1", builder -> {
11371164
builder.field("name", "Don Quixote");
11381165
builder.field("page_count", 1072);
@@ -1303,17 +1330,16 @@ public void testUnsupportedUpdateMethods() throws IOException, SQLException {
13031330
}
13041331

13051332
private void doWithQuery(String query, CheckedConsumer<ResultSet, SQLException> consumer) throws SQLException {
1306-
try (Connection connection = esJdbc()) {
1307-
try (PreparedStatement statement = connection.prepareStatement(query)) {
1308-
try (ResultSet results = statement.executeQuery()) {
1309-
consumer.accept(results);
1310-
}
1311-
}
1312-
}
1333+
doWithQuery(() -> esJdbc(), query, consumer);
13131334
}
13141335

13151336
private void doWithQueryAndTimezone(String query, String tz, CheckedConsumer<ResultSet, SQLException> consumer) throws SQLException {
1316-
try (Connection connection = esJdbc(tz)) {
1337+
doWithQuery(() -> esJdbc(tz), query, consumer);
1338+
}
1339+
1340+
private void doWithQuery(CheckedSupplier<Connection, SQLException> con, String query, CheckedConsumer<ResultSet, SQLException> consumer)
1341+
throws SQLException {
1342+
try (Connection connection = con.get()) {
13171343
try (PreparedStatement statement = connection.prepareStatement(query)) {
13181344
try (ResultSet results = statement.executeQuery()) {
13191345
consumer.accept(results);
@@ -1355,7 +1381,29 @@ protected static void updateMapping(String index, CheckedConsumer<XContentBuilde
13551381
client().performRequest(request);
13561382
}
13571383

1358-
private void createTestDataForByteValueTests(byte random1, byte random2, byte random3) throws Exception, IOException {
1384+
private void createTestDataForMultiValueTests() throws Exception {
1385+
createIndex("test");
1386+
updateMapping("test", builder -> {
1387+
builder.startObject("int").field("type", "integer").endObject();
1388+
builder.startObject("keyword").field("type", "keyword").endObject();
1389+
});
1390+
1391+
Integer[] values = randomArray(3, 15, s -> new Integer[s], () -> Integer.valueOf(randomInt(50)));
1392+
// add the minimal value in the middle yet the test will pick it up since the results are sorted
1393+
values[2] = Integer.valueOf(-10);
1394+
1395+
String[] stringValues = new String[values.length];
1396+
for (int i = 0; i < values.length; i++) {
1397+
stringValues[i] = String.valueOf(values[i]);
1398+
}
1399+
1400+
index("test", "1", builder -> {
1401+
builder.array("int", (Object[]) values);
1402+
builder.array("keyword", stringValues);
1403+
});
1404+
}
1405+
1406+
private void createTestDataForByteValueTests(byte random1, byte random2, byte random3) throws Exception {
13591407
createIndex("test");
13601408
updateMapping("test", builder -> {
13611409
builder.startObject("test_byte").field("type", "byte").endObject();
@@ -1373,7 +1421,7 @@ private void createTestDataForByteValueTests(byte random1, byte random2, byte ra
13731421
});
13741422
}
13751423

1376-
private void createTestDataForShortValueTests(short random1, short random2, short random3) throws Exception, IOException {
1424+
private void createTestDataForShortValueTests(short random1, short random2, short random3) throws Exception {
13771425
createIndex("test");
13781426
updateMapping("test", builder -> {
13791427
builder.startObject("test_short").field("type", "short").endObject();
@@ -1391,7 +1439,7 @@ private void createTestDataForShortValueTests(short random1, short random2, shor
13911439
});
13921440
}
13931441

1394-
private void createTestDataForIntegerValueTests(int random1, int random2, int random3) throws Exception, IOException {
1442+
private void createTestDataForIntegerValueTests(int random1, int random2, int random3) throws Exception {
13951443
createIndex("test");
13961444
updateMapping("test", builder -> {
13971445
builder.startObject("test_integer").field("type", "integer").endObject();
@@ -1409,7 +1457,7 @@ private void createTestDataForIntegerValueTests(int random1, int random2, int ra
14091457
});
14101458
}
14111459

1412-
private void createTestDataForLongValueTests(long random1, long random2, long random3) throws Exception, IOException {
1460+
private void createTestDataForLongValueTests(long random1, long random2, long random3) throws Exception {
14131461
createIndex("test");
14141462
updateMapping("test", builder -> {
14151463
builder.startObject("test_long").field("type", "long").endObject();
@@ -1427,7 +1475,7 @@ private void createTestDataForLongValueTests(long random1, long random2, long ra
14271475
});
14281476
}
14291477

1430-
private void createTestDataForDoubleValueTests(double random1, double random2, double random3) throws Exception, IOException {
1478+
private void createTestDataForDoubleValueTests(double random1, double random2, double random3) throws Exception {
14311479
createIndex("test");
14321480
updateMapping("test", builder -> {
14331481
builder.startObject("test_double").field("type", "double").endObject();
@@ -1445,7 +1493,7 @@ private void createTestDataForDoubleValueTests(double random1, double random2, d
14451493
});
14461494
}
14471495

1448-
private void createTestDataForFloatValueTests(float random1, float random2, float random3) throws Exception, IOException {
1496+
private void createTestDataForFloatValueTests(float random1, float random2, float random3) throws Exception {
14491497
createIndex("test");
14501498
updateMapping("test", builder -> {
14511499
builder.startObject("test_float").field("type", "float").endObject();
@@ -1481,7 +1529,7 @@ private void indexSimpleDocumentWithTrueValues(Long randomLongDate) throws IOExc
14811529
* Creates test data for all numeric get* methods. All values random and different from the other numeric fields already generated.
14821530
* It returns a map containing the field name and its randomly generated value to be later used in checking the returned values.
14831531
*/
1484-
private Map<String,Number> createTestDataForNumericValueTypes(Supplier<Number> randomGenerator) throws Exception, IOException {
1532+
private Map<String, Number> createTestDataForNumericValueTypes(Supplier<Number> randomGenerator) throws Exception {
14851533
Map<String,Number> map = new HashMap<>();
14861534
createIndex("test");
14871535
updateMappingForNumericValuesTests("test");
@@ -1575,31 +1623,19 @@ private Double getMaxLongPlusOne() {
15751623
}
15761624

15771625
private Connection esJdbc(String timeZoneId) throws SQLException {
1578-
return randomBoolean() ? useDriverManager(timeZoneId) : useDataSource(timeZoneId);
1579-
}
1580-
1581-
private Connection useDriverManager(String timeZoneId) throws SQLException {
1582-
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
1583-
String address = "jdbc:es://" + elasticsearchAddress;
15841626
Properties connectionProperties = connectionProperties();
15851627
connectionProperties.put(JDBC_TIMEZONE, timeZoneId);
1586-
Connection connection = DriverManager.getConnection(address, connectionProperties);
1587-
1628+
Connection connection = esJdbc(connectionProperties);
15881629
assertNotNull("The timezone should be specified", connectionProperties.getProperty(JDBC_TIMEZONE));
15891630
return connection;
15901631
}
15911632

1592-
private Connection useDataSource(String timeZoneId) throws SQLException {
1593-
String elasticsearchAddress = getProtocol() + "://" + elasticsearchAddress();
1594-
EsDataSource dataSource = new EsDataSource();
1595-
String address = "jdbc:es://" + elasticsearchAddress;
1596-
dataSource.setUrl(address);
1633+
private Connection esWithLeniency(boolean multiValueLeniency) throws SQLException {
1634+
String property = "field.multi.value.leniency";
15971635
Properties connectionProperties = connectionProperties();
1598-
connectionProperties.put(JDBC_TIMEZONE, timeZoneId);
1599-
dataSource.setProperties(connectionProperties);
1600-
Connection connection = dataSource.getConnection();
1601-
1602-
assertNotNull("The timezone should be specified", connectionProperties.getProperty(JDBC_TIMEZONE));
1636+
connectionProperties.setProperty(property, Boolean.toString(multiValueLeniency));
1637+
Connection connection = esJdbc(connectionProperties);
1638+
assertNotNull("The leniency should be specified", connectionProperties.getProperty(property));
16031639
return connection;
16041640
}
16051641
}

x-pack/plugin/sql/sql-action/src/main/java/org/elasticsearch/xpack/sql/action/AbstractSqlQueryRequest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public AbstractSqlQueryRequest() {
5757
}
5858

5959
public AbstractSqlQueryRequest(String query, List<SqlTypedParamValue> params, QueryBuilder filter, ZoneId zoneId,
60-
int fetchSize, TimeValue requestTimeout, TimeValue pageTimeout, RequestInfo requestInfo) {
60+
int fetchSize, TimeValue requestTimeout, TimeValue pageTimeout, RequestInfo requestInfo) {
6161
super(requestInfo);
6262
this.query = query;
6363
this.params = params;

0 commit comments

Comments
 (0)