Skip to content

Commit 12ffe4a

Browse files
authored
QL: handle IP type fields extraction with ignore_malformed property (#66622) (#66686)
Return null for any field that is present in the _ignored section of the response, not only numerics and IPs. (cherry picked from commit 106719f)
1 parent d403922 commit 12ffe4a

File tree

2 files changed

+279
-12
lines changed

2 files changed

+279
-12
lines changed

x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/execution/search/extractor/AbstractFieldHitExtractor.java

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,13 @@ public Object extract(SearchHit hit) {
129129
// if the field was ignored because it was malformed and ignore_malformed was turned on
130130
if (fullFieldName != null
131131
&& hit.getFields().containsKey(IgnoredFieldMapper.NAME)
132-
&& isFromDocValuesOnly(dataType) == false
133-
&& dataType.isNumeric()) {
132+
&& isFromDocValuesOnly(dataType) == false) {
134133
/*
135-
* ignore_malformed makes sense for extraction from _source for numeric fields only.
136-
* And we check here that the data type is actually a numeric one to rule out
137-
* any non-numeric sub-fields (for which the "parent" field should actually be extracted from _source).
134+
* We check here the presence of the field name (fullFieldName including the parent name) in the list
135+
* of _ignored fields (due to malformed data, which was ignored).
138136
* For example, in the case of a malformed number, a "byte" field with "ignore_malformed: true"
139137
* with a "text" sub-field should return "null" for the "byte" parent field and the actual malformed
140-
* data for the "text" sub-field. Also, the _ignored section of the response contains the full field
141-
* name, thus the need to do the comparison with that and not only the field name.
138+
* data for the "text" sub-field.
142139
*/
143140
if (hit.getFields().get(IgnoredFieldMapper.NAME).getValues().contains(fullFieldName)) {
144141
return null;
@@ -202,7 +199,7 @@ protected Object unwrapMultiValue(Object values) {
202199
}
203200
return result;
204201
}
205-
} else if (DataTypes.isString(dataType)) {
202+
} else if (DataTypes.isString(dataType) || dataType == DataTypes.IP) {
206203
return values.toString();
207204
} else {
208205
return values;

x-pack/plugin/sql/qa/server/src/main/java/org/elasticsearch/xpack/sql/qa/FieldExtractorTestCase.java

Lines changed: 274 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,25 +345,155 @@ public void testBooleanField() throws IOException {
345345

346346
/*
347347
* "ip_field": {
348-
* "type": "ip"
348+
* "type": "ip",
349+
* "ignore_malformed": true/false
349350
* }
350351
*/
351352
public void testIpField() throws IOException {
352353
String query = "SELECT ip_field FROM test";
353354
String ipField = "192.168.1.1";
355+
String actualValue = ipField;
354356
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
355357
boolean enableSource = randomBoolean(); // enable _source at index level
358+
boolean ignoreMalformed = randomBoolean();
356359

357360
Map<String, Object> indexProps = new HashMap<>(1);
358361
indexProps.put("_source", enableSource);
359362

360-
createIndexWithFieldTypeAndProperties("ip", null, explicitSourceSetting ? indexProps : null);
361-
index("{\"ip_field\":\"" + ipField + "\"}");
363+
Map<String, Map<String, Object>> fieldProps = null;
364+
if (ignoreMalformed) {
365+
fieldProps = new HashMap<>(1);
366+
Map<String, Object> fieldProp = new HashMap<>(1);
367+
// on purpose use a non-IP and check for null when querying the field's value
368+
fieldProp.put("ignore_malformed", true);
369+
fieldProps.put("ip_field", fieldProp);
370+
actualValue = "foo";
371+
}
372+
createIndexWithFieldTypeAndProperties("ip", fieldProps, explicitSourceSetting ? indexProps : null);
373+
index("{\"ip_field\":\"" + actualValue + "\"}");
362374

363375
if (explicitSourceSetting == false || enableSource) {
364376
Map<String, Object> expected = new HashMap<>();
365377
expected.put("columns", Arrays.asList(columnInfo("plain", "ip_field", "ip", JDBCType.VARCHAR, Integer.MAX_VALUE)));
366-
expected.put("rows", singletonList(singletonList(ipField)));
378+
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : actualValue)));
379+
assertResponse(expected, runSql(query));
380+
} else {
381+
expectSourceDisabledError(query);
382+
}
383+
}
384+
385+
/*
386+
* "geo_point_field": {
387+
* "type": "geo_point",
388+
* "ignore_malformed": true/false
389+
* }
390+
*/
391+
public void testGeoPointField() throws IOException {
392+
String query = "SELECT geo_point_field FROM test";
393+
String geoPointField = "41.12,-71.34";
394+
String geoPointFromDocValues = "POINT (-71.34000004269183 41.1199999647215)";
395+
String actualValue = geoPointField;
396+
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
397+
boolean enableSource = randomBoolean(); // enable _source at index level
398+
boolean ignoreMalformed = randomBoolean();
399+
400+
Map<String, Object> indexProps = new HashMap<>(1);
401+
indexProps.put("_source", enableSource);
402+
403+
Map<String, Map<String, Object>> fieldProps = null;
404+
if (ignoreMalformed) {
405+
fieldProps = new HashMap<>(1);
406+
Map<String, Object> fieldProp = new HashMap<>(1);
407+
// on purpose use a non-geo-point and check for null when querying the field's value
408+
fieldProp.put("ignore_malformed", true);
409+
fieldProps.put("geo_point_field", fieldProp);
410+
actualValue = "foo";
411+
}
412+
createIndexWithFieldTypeAndProperties("geo_point", fieldProps, explicitSourceSetting ? indexProps : null);
413+
index("{\"geo_point_field\":\"" + actualValue + "\"}");
414+
415+
// the values come from docvalues (vs from _source) so disabling the source doesn't have any impact on the values returned
416+
Map<String, Object> expected = new HashMap<>();
417+
expected.put("columns", Arrays.asList(columnInfo("plain", "geo_point_field", "geo_point", JDBCType.VARCHAR, Integer.MAX_VALUE)));
418+
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : geoPointFromDocValues)));
419+
assertResponse(expected, runSql(query));
420+
}
421+
422+
/*
423+
* "geo_shape_field": {
424+
* "type": "point",
425+
* "ignore_malformed": true/false
426+
* }
427+
*/
428+
public void testGeoShapeField() throws IOException {
429+
String query = "SELECT geo_shape_field FROM test";
430+
String actualValue = "[-77.03653, 38.897676]";
431+
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
432+
boolean enableSource = randomBoolean(); // enable _source at index level
433+
boolean ignoreMalformed = randomBoolean();
434+
435+
Map<String, Object> indexProps = new HashMap<>(1);
436+
indexProps.put("_source", enableSource);
437+
438+
Map<String, Map<String, Object>> fieldProps = null;
439+
if (ignoreMalformed) {
440+
fieldProps = new HashMap<>(1);
441+
Map<String, Object> fieldProp = new HashMap<>(1);
442+
// on purpose use a non-geo-shape and check for null when querying the field's value
443+
fieldProp.put("ignore_malformed", true);
444+
fieldProps.put("geo_shape_field", fieldProp);
445+
actualValue = "\"foo\"";
446+
}
447+
createIndexWithFieldTypeAndProperties("geo_shape", fieldProps, explicitSourceSetting ? indexProps : null);
448+
index("{\"geo_shape_field\":{\"type\":\"point\",\"coordinates\":" + actualValue + "}}");
449+
450+
if (explicitSourceSetting == false || enableSource) {
451+
Map<String, Object> expected = new HashMap<>();
452+
expected.put(
453+
"columns",
454+
Arrays.asList(columnInfo("plain", "geo_shape_field", "geo_shape", JDBCType.VARCHAR, Integer.MAX_VALUE))
455+
);
456+
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : "POINT (-77.03653 38.897676)")));
457+
assertResponse(expected, runSql(query));
458+
} else {
459+
expectSourceDisabledError(query);
460+
}
461+
}
462+
463+
/*
464+
* "shape_field": {
465+
* "type": "shape",
466+
* "ignore_malformed": true/false
467+
* }
468+
*/
469+
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/66678")
470+
public void testShapeField() throws IOException {
471+
String query = "SELECT shape_field FROM test";
472+
String shapeField = "POINT (-377.03653 389.897676)";
473+
String actualValue = shapeField;
474+
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
475+
boolean enableSource = randomBoolean(); // enable _source at index level
476+
boolean ignoreMalformed = randomBoolean();
477+
478+
Map<String, Object> indexProps = new HashMap<>(1);
479+
indexProps.put("_source", enableSource);
480+
481+
Map<String, Map<String, Object>> fieldProps = null;
482+
if (ignoreMalformed) {
483+
fieldProps = new HashMap<>(1);
484+
Map<String, Object> fieldProp = new HashMap<>(1);
485+
// on purpose use a non-geo-point and check for null when querying the field's value
486+
fieldProp.put("ignore_malformed", true);
487+
fieldProps.put("shape_field", fieldProp);
488+
actualValue = "foo";
489+
}
490+
createIndexWithFieldTypeAndProperties("shape", fieldProps, explicitSourceSetting ? indexProps : null);
491+
index("{\"shape_field\":\"" + actualValue + "\"}");
492+
493+
if (explicitSourceSetting == false || enableSource) {
494+
Map<String, Object> expected = new HashMap<>();
495+
expected.put("columns", Arrays.asList(columnInfo("plain", "shape_field", "shape", JDBCType.VARCHAR, Integer.MAX_VALUE)));
496+
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : "POINT (-377.03653 389.897676)")));
367497
assertResponse(expected, runSql(query));
368498
} else {
369499
expectSourceDisabledError(query);
@@ -584,6 +714,65 @@ public void testTextFieldWithIntegerNumberSubfield() throws IOException {
584714
}
585715
}
586716

717+
/*
718+
* "text_field": {
719+
* "type": "text",
720+
* "fields": {
721+
* "ip_subfield": {
722+
* "type": "ip",
723+
* "ignore_malformed": true/false
724+
* }
725+
* }
726+
* }
727+
*/
728+
public void testTextFieldWithIpSubfield() throws IOException {
729+
String ip = "123.123.123.123";
730+
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
731+
boolean enableSource = randomBoolean(); // enable _source at index level
732+
boolean ignoreMalformed = randomBoolean(); // ignore_malformed is true, thus test a non-IP value
733+
String actualValue = ip;
734+
String fieldName = "text_field";
735+
String subFieldName = "text_field.ip_subfield";
736+
String query = "SELECT " + fieldName + "," + subFieldName + " FROM test";
737+
738+
Map<String, Object> indexProps = new HashMap<>(1);
739+
indexProps.put("_source", enableSource);
740+
741+
Map<String, Map<String, Object>> subFieldsProps = null;
742+
if (ignoreMalformed) {
743+
subFieldsProps = new HashMap<>(1);
744+
Map<String, Object> fieldProp = new HashMap<>(1);
745+
// on purpose use a non-IP value instead of an IP and check for null when querying the field's value
746+
fieldProp.put("ignore_malformed", true);
747+
subFieldsProps.put(subFieldName, fieldProp);
748+
actualValue = "foo";
749+
}
750+
751+
createIndexWithFieldTypeAndSubFields("text", null, explicitSourceSetting ? indexProps : null, subFieldsProps, "ip");
752+
index("{\"" + fieldName + "\":\"" + actualValue + "\"}");
753+
754+
if (explicitSourceSetting == false || enableSource) {
755+
Map<String, Object> expected = new HashMap<>();
756+
expected.put(
757+
"columns",
758+
Arrays.asList(
759+
columnInfo("plain", fieldName, "text", JDBCType.VARCHAR, Integer.MAX_VALUE),
760+
columnInfo("plain", subFieldName, "ip", JDBCType.VARCHAR, Integer.MAX_VALUE)
761+
)
762+
);
763+
if (ignoreMalformed) {
764+
expected.put("rows", singletonList(Arrays.asList("foo", null)));
765+
} else {
766+
expected.put("rows", singletonList(Arrays.asList(ip, ip)));
767+
}
768+
assertResponse(expected, runSql(query));
769+
} else {
770+
expectSourceDisabledError(query);
771+
// if the _source is disabled, selecting only the ip sub-field shouldn't work as well
772+
expectSourceDisabledError("SELECT " + subFieldName + " FROM test");
773+
}
774+
}
775+
587776
/*
588777
* "integer_field": {
589778
* "type": "integer",
@@ -663,6 +852,85 @@ public void testNumberFieldWithTextOrKeywordSubfield() throws IOException {
663852
}
664853
}
665854

855+
/*
856+
* "ip_field": {
857+
* "type": "ip",
858+
* "ignore_malformed": true/false,
859+
* "fields": {
860+
* "keyword_subfield/text_subfield": {
861+
* "type": "keyword/text"
862+
* }
863+
* }
864+
* }
865+
*/
866+
public void testIpFieldWithTextOrKeywordSubfield() throws IOException {
867+
String ip = "123.123.123.123";
868+
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
869+
boolean enableSource = randomBoolean(); // enable _source at index level
870+
boolean ignoreMalformed = randomBoolean(); // ignore_malformed is true, thus test a non-number value
871+
boolean isKeyword = randomBoolean(); // text or keyword subfield
872+
String actualValue = ip;
873+
String fieldName = "ip_field";
874+
String subFieldName = "ip_field." + (isKeyword ? "keyword_subfield" : "text_subfield");
875+
String query = "SELECT " + fieldName + "," + subFieldName + " FROM test";
876+
877+
Map<String, Object> indexProps = new HashMap<>(1);
878+
indexProps.put("_source", enableSource);
879+
880+
Map<String, Map<String, Object>> fieldProps = null;
881+
if (ignoreMalformed) {
882+
fieldProps = new HashMap<>(1);
883+
Map<String, Object> fieldProp = new HashMap<>(1);
884+
// on purpose use a non-IP instead of an ip and check for null when querying the field's value
885+
fieldProp.put("ignore_malformed", true);
886+
fieldProps.put(fieldName, fieldProp);
887+
actualValue = "foo";
888+
}
889+
890+
createIndexWithFieldTypeAndSubFields(
891+
"ip",
892+
fieldProps,
893+
explicitSourceSetting ? indexProps : null,
894+
null,
895+
isKeyword ? "keyword" : "text"
896+
);
897+
index("{\"" + fieldName + "\":\"" + actualValue + "\"}");
898+
899+
if (explicitSourceSetting == false || enableSource) {
900+
Map<String, Object> expected = new HashMap<>();
901+
expected.put(
902+
"columns",
903+
Arrays.asList(
904+
columnInfo("plain", fieldName, "ip", JDBCType.VARCHAR, Integer.MAX_VALUE),
905+
columnInfo("plain", subFieldName, isKeyword ? "keyword" : "text", JDBCType.VARCHAR, Integer.MAX_VALUE)
906+
)
907+
);
908+
if (ignoreMalformed) {
909+
expected.put("rows", singletonList(Arrays.asList(null, "foo")));
910+
} else {
911+
expected.put("rows", singletonList(Arrays.asList(ip, ip)));
912+
}
913+
assertResponse(expected, runSql(query));
914+
} else {
915+
if (isKeyword) {
916+
// selecting only the keyword subfield when the _source is disabled should work
917+
Map<String, Object> expected = new HashMap<>();
918+
expected.put("columns", singletonList(columnInfo("plain", subFieldName, "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE)));
919+
if (ignoreMalformed) {
920+
expected.put("rows", singletonList(singletonList("foo")));
921+
} else {
922+
expected.put("rows", singletonList(singletonList(ip)));
923+
}
924+
assertResponse(expected, runSql("SELECT ip_field.keyword_subfield FROM test"));
925+
} else {
926+
expectSourceDisabledError(query);
927+
}
928+
929+
// if the _source is disabled, selecting only the ip field shouldn't work
930+
expectSourceDisabledError("SELECT " + fieldName + " FROM test");
931+
}
932+
}
933+
666934
/*
667935
* "integer_field": {
668936
* "type": "integer",
@@ -946,6 +1214,8 @@ private JDBCType jdbcTypeFor(String esType) {
9461214
return JDBCType.FLOAT;
9471215
case "scaled_float":
9481216
return JDBCType.DOUBLE;
1217+
case "ip":
1218+
return JDBCType.VARCHAR;
9491219
default:
9501220
throw new AssertionError("Illegal value [" + esType + "] for data type");
9511221
}

0 commit comments

Comments
 (0)