From 858b0482e95f56c800d461f1902edc26788c68ec Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Tue, 1 Feb 2022 13:18:45 -0500 Subject: [PATCH 1/7] [DOCS] Add documentation for Painless field API --- docs/reference/scripting/using.asciidoc | 90 +++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 5 deletions(-) diff --git a/docs/reference/scripting/using.asciidoc b/docs/reference/scripting/using.asciidoc index 6e96ae67845ab..e463e7a146f6e 100644 --- a/docs/reference/scripting/using.asciidoc +++ b/docs/reference/scripting/using.asciidoc @@ -70,7 +70,7 @@ GET my-index-000001/_search "script_fields": { "my_doubled_field": { "script": { <1> - "source": "doc['my_field'].value * params['multiplier']", <2> + "source": "field('my_field').get(null) * params['multiplier']", <2> "params": { "multiplier": 2 } @@ -101,7 +101,7 @@ first value for `my_field` and then multiply it by `2`: [source,painless] ---- -"source": "return doc['my_field'].value * 2" +"source": "return field('my_field').get(null) * 2" ---- Though it works, this solution is pretty inflexible. We have to modify the @@ -114,7 +114,7 @@ the `multiplier` parameter without {es} recompiling the script. [source,painless] ---- -"source": "doc['my_field'].value * params['multiplier']", +"source": "field('my_field').get(null) * params['multiplier']", "params": { "multiplier": 2 } @@ -147,7 +147,7 @@ GET my-index-000001/_search "my_doubled_field": { "script": { "lang": "painless", - "source": "return doc['my_field'].value * params.get('multiplier');", + "source": "return field('my_field').get(null) * params.get('multiplier');", "params": { "multiplier": 2 } @@ -168,7 +168,7 @@ GET my-index-000001/_search "script_fields": { "my_doubled_field": { "script": { - "source": "doc['my_field'].value * params['multiplier']", + "source": "field('my_field').get(null) * params['multiplier']", "params": { "multiplier": 2 } @@ -197,6 +197,86 @@ them in other cases to remove ambiguity. Use this abbreviated syntax anywhere that {es} supports scripts, such as when you're creating <>. +[discrete] +[[script-fields-api]] +=== Access document fields +The examples from previous sections use the `field` API to access document +fields: + +[source,painless] +---- +field('my_field').get(null) +---- + +The `field` API fundamentally changes how you access documents in Painless. +Previously, you had to declare `doc` and the field name that you wanted to +access: + +[source,painless] +---- +doc['my_field'].value +---- + +Accessing document fields this way didn't handle missing values, which meant +that your Painless scripts had to check that fields exist and create +intermediary objects for handling such omissions. + +Instead, use the `field` API, which is the preferred approach to access +documents in Painless. The `field` API abstracts access options (such as +`_source` or `doc_values`) and provides a field-centric view of your documents. + +The `field` API always returns a non-null object that represents the given field, +regardless of whether the field exists or has any values for the current +document. This means that the `field` API can handle missing values without +requiring additional logic. + +The `field` API returns a `Field` object that iterates over fields with +multiple values, providing access to the underlying value through the `get()` +method, as well as type conversion and helper methods. + +[discrete] +==== Convenient, simpler access +Instead of explicitly calling the `field` API with the `get()` method, you can +include the `$` shortcut. Just include the `$` symbol, field name, and value +that you want to retrieve: + +[source,painless] +---- +$(‘field’, null) +---- + +With these enhanced capabilities and simplified syntax, you can write scripts +that are shorter, less complex, and easier to parse. For example, the following +script uses the outdated syntax to determine the difference in milliseconds +between two complex `datetime` values from an indexed document: + +[source,painless] +---- +if (doc.containsKey('start') && doc.containsKey('end')) { + if (doc['start'].size() > 0 && doc['end'].size() > 0) { + ZonedDateTime start = doc['start'].value; + ZonedDateTime end = doc['end'].value; + return ChronoUnit.MILLIS.between(start, end); + } else { + return -1; + } +} else { + return -1; +} +---- + +Using the `field` API, you can write this same script much more succinctly, +without requiring additional logic to determine whether fields exist before +operating on them: + +[source,painless] +---- +ZonedDateTime start = field('start').get(null); +ZonedDateTime end = field('end').get(null); +return start == null || end == null ? -1 : ChronoUnit.MILLIS.between(start, end) +---- + + [discrete] [[script-stored-scripts]] === Store and retrieve scripts From 0571e21c5dad609469e141bedd0d93ec20a76690 Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Tue, 1 Feb 2022 17:26:36 -0500 Subject: [PATCH 2/7] Moving content to a new page and incorporating reviewer feedback --- docs/reference/scripting.asciidoc | 2 + .../scripting/access-fields.asciidoc | 86 ++++++++++++++++++ docs/reference/scripting/using.asciidoc | 88 +------------------ 3 files changed, 92 insertions(+), 84 deletions(-) create mode 100644 docs/reference/scripting/access-fields.asciidoc diff --git a/docs/reference/scripting.asciidoc b/docs/reference/scripting.asciidoc index 170d01512cacc..c2b3748d1a810 100644 --- a/docs/reference/scripting.asciidoc +++ b/docs/reference/scripting.asciidoc @@ -53,6 +53,8 @@ include::scripting/painless.asciidoc[] include::scripting/using.asciidoc[] +include::scripting/access-fields.asciidoc[] + include::scripting/common-script-uses.asciidoc[] include::scripting/fields.asciidoc[] diff --git a/docs/reference/scripting/access-fields.asciidoc b/docs/reference/scripting/access-fields.asciidoc new file mode 100644 index 0000000000000..6c83880be3c83 --- /dev/null +++ b/docs/reference/scripting/access-fields.asciidoc @@ -0,0 +1,86 @@ +[[script-fields-api]] +== Access fields in a document with the `field` API +++++ +Access fields in a document +++++ + +beta::["The `field` API is still in development and should be considered a beta feature. The API is subject to change and this iteration is likely not the final state.",{es-issue}78920] + +Use the `field` API to access document fields: + +[source,painless] +---- +field('my_field').get() +---- + +This API fundamentally changes how you access documents in Painless. Previously, +you had to access the `doc` map with the field name that you wanted to access: + +[source,painless] +---- +doc['my_field'].value +---- + +Accessing document fields this way didn't handle missing values or missing +mappings, which meant that your Painless scripts had to check that fields exist +and create intermediary objects for handling such omissions. + +Instead, use the `field` API, which is the preferred approach to access +documents in Painless. The `field` API abstracts access options (such as +`_source` or `doc_values`) and provides a field-centric view of your documents. + +NOTE: Some fields aren't yet compatible with the `fields` API, such as `text` or +`geo` fields. since we are still trying to figure out the best values there to return, so continued use of doc there is what is available. + +The `field` API returns a `Field` object that iterates over fields with +multiple values, providing access to the underlying value through the `get()` +method, as well as type conversion and helper methods. + +The `field` API returns the default value that you specify, regardless of +whether the field exists or has any values for the current document. +This means that the `field` API can handle missing values without requiring +additional logic. For a reference (`Object`) type such as `keyword`, the default +value can be `null`. For a primitive type such as `boolean` or `long`, the +default value must be a non-null matching numerical type, such as `false` or `1`. + +[discrete] +=== Convenient, simpler access +Instead of explicitly calling the `field` API with the `get()` method, you can +include the `$` shortcut. Just include the `$` symbol, field name, and value +that you want to retrieve: + +[source,painless] +---- +$(‘field’, ) +---- + +With these enhanced capabilities and simplified syntax, you can write scripts +that are shorter, less complex, and easier to read. For example, the following +script uses the outdated syntax to determine the difference in milliseconds +between two complex `datetime` values from an indexed document: + +[source,painless] +---- +if (doc.containsKey('start') && doc.containsKey('end')) { + if (doc['start'].size() > 0 && doc['end'].size() > 0) { + ZonedDateTime start = doc['start'].value; + ZonedDateTime end = doc['end'].value; + return ChronoUnit.MILLIS.between(start, end); + } else { + return -1; + } +} else { + return -1; +} +---- + +Using the `field` API, you can write this same script much more succinctly, +without requiring additional logic to determine whether fields exist before +operating on them: + +[source,painless] +---- +ZonedDateTime start = field('start').get(null); +ZonedDateTime end = field('end').get(null); +return start == null || end == null ? -1 : ChronoUnit.MILLIS.between(start, end) +---- \ No newline at end of file diff --git a/docs/reference/scripting/using.asciidoc b/docs/reference/scripting/using.asciidoc index e463e7a146f6e..d4b4fd91e3e37 100644 --- a/docs/reference/scripting/using.asciidoc +++ b/docs/reference/scripting/using.asciidoc @@ -70,7 +70,7 @@ GET my-index-000001/_search "script_fields": { "my_doubled_field": { "script": { <1> - "source": "field('my_field').get(null) * params['multiplier']", <2> + "source": "doc['my_field'].value * params['multiplier']", <2> "params": { "multiplier": 2 } @@ -101,7 +101,7 @@ first value for `my_field` and then multiply it by `2`: [source,painless] ---- -"source": "return field('my_field').get(null) * 2" +"source": "return doc['my_field'].value * 2" ---- Though it works, this solution is pretty inflexible. We have to modify the @@ -114,7 +114,7 @@ the `multiplier` parameter without {es} recompiling the script. [source,painless] ---- -"source": "field('my_field').get(null) * params['multiplier']", +"source": "doc['my_field'].value * params['multiplier']", "params": { "multiplier": 2 } @@ -147,7 +147,7 @@ GET my-index-000001/_search "my_doubled_field": { "script": { "lang": "painless", - "source": "return field('my_field').get(null) * params.get('multiplier');", + "source": "doc['my_field'].value * params.get('multiplier');", "params": { "multiplier": 2 } @@ -197,86 +197,6 @@ them in other cases to remove ambiguity. Use this abbreviated syntax anywhere that {es} supports scripts, such as when you're creating <>. -[discrete] -[[script-fields-api]] -=== Access document fields -The examples from previous sections use the `field` API to access document -fields: - -[source,painless] ----- -field('my_field').get(null) ----- - -The `field` API fundamentally changes how you access documents in Painless. -Previously, you had to declare `doc` and the field name that you wanted to -access: - -[source,painless] ----- -doc['my_field'].value ----- - -Accessing document fields this way didn't handle missing values, which meant -that your Painless scripts had to check that fields exist and create -intermediary objects for handling such omissions. - -Instead, use the `field` API, which is the preferred approach to access -documents in Painless. The `field` API abstracts access options (such as -`_source` or `doc_values`) and provides a field-centric view of your documents. - -The `field` API always returns a non-null object that represents the given field, -regardless of whether the field exists or has any values for the current -document. This means that the `field` API can handle missing values without -requiring additional logic. - -The `field` API returns a `Field` object that iterates over fields with -multiple values, providing access to the underlying value through the `get()` -method, as well as type conversion and helper methods. - -[discrete] -==== Convenient, simpler access -Instead of explicitly calling the `field` API with the `get()` method, you can -include the `$` shortcut. Just include the `$` symbol, field name, and value -that you want to retrieve: - -[source,painless] ----- -$(‘field’, null) ----- - -With these enhanced capabilities and simplified syntax, you can write scripts -that are shorter, less complex, and easier to parse. For example, the following -script uses the outdated syntax to determine the difference in milliseconds -between two complex `datetime` values from an indexed document: - -[source,painless] ----- -if (doc.containsKey('start') && doc.containsKey('end')) { - if (doc['start'].size() > 0 && doc['end'].size() > 0) { - ZonedDateTime start = doc['start'].value; - ZonedDateTime end = doc['end'].value; - return ChronoUnit.MILLIS.between(start, end); - } else { - return -1; - } -} else { - return -1; -} ----- - -Using the `field` API, you can write this same script much more succinctly, -without requiring additional logic to determine whether fields exist before -operating on them: - -[source,painless] ----- -ZonedDateTime start = field('start').get(null); -ZonedDateTime end = field('end').get(null); -return start == null || end == null ? -1 : ChronoUnit.MILLIS.between(start, end) ----- - - [discrete] [[script-stored-scripts]] === Store and retrieve scripts From defcb239b15a878e719d845a1bd6de974b710e33 Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Wed, 2 Feb 2022 12:17:16 -0500 Subject: [PATCH 3/7] Clarify note --- docs/reference/scripting/access-fields.asciidoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/reference/scripting/access-fields.asciidoc b/docs/reference/scripting/access-fields.asciidoc index 6c83880be3c83..748d690a3e69d 100644 --- a/docs/reference/scripting/access-fields.asciidoc +++ b/docs/reference/scripting/access-fields.asciidoc @@ -30,7 +30,8 @@ documents in Painless. The `field` API abstracts access options (such as `_source` or `doc_values`) and provides a field-centric view of your documents. NOTE: Some fields aren't yet compatible with the `fields` API, such as `text` or -`geo` fields. since we are still trying to figure out the best values there to return, so continued use of doc there is what is available. +`geo` fields. Continue using `doc['my_field'].value` to access field types that +aren't supported yet. The `field` API returns a `Field` object that iterates over fields with multiple values, providing access to the underlying value through the `get()` From febe14f3c4e87d572b594b18b7a248503b4c3a04 Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Wed, 2 Feb 2022 17:06:29 -0500 Subject: [PATCH 4/7] Incorporating review comments --- .../scripting/access-fields.asciidoc | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/reference/scripting/access-fields.asciidoc b/docs/reference/scripting/access-fields.asciidoc index 748d690a3e69d..ab7fa05082621 100644 --- a/docs/reference/scripting/access-fields.asciidoc +++ b/docs/reference/scripting/access-fields.asciidoc @@ -10,7 +10,7 @@ Use the `field` API to access document fields: [source,painless] ---- -field('my_field').get() +field('my_field').get() ---- This API fundamentally changes how you access documents in Painless. Previously, @@ -22,37 +22,37 @@ doc['my_field'].value ---- Accessing document fields this way didn't handle missing values or missing -mappings, which meant that your Painless scripts had to check that fields exist -and create intermediary objects for handling such omissions. +mappings, which meant that to write robust Painless scripts, you needed to +include logic to check that both fields and values exist. Instead, use the `field` API, which is the preferred approach to access -documents in Painless. The `field` API abstracts access options (such as -`_source` or `doc_values`) and provides a field-centric view of your documents. +documents in Painless. The `field` API handles missing values, and will evolve +to abstract access to `_source` and `doc_values`. NOTE: Some fields aren't yet compatible with the `fields` API, such as `text` or -`geo` fields. Continue using `doc['my_field'].value` to access field types that -aren't supported yet. +`geo` fields. Continue using `doc` to access field types that the `field` API +doesn't support. The `field` API returns a `Field` object that iterates over fields with -multiple values, providing access to the underlying value through the `get()` -method, as well as type conversion and helper methods. +multiple values, providing access to the underlying value through the +`get()` method, as well as type conversion and helper methods. The `field` API returns the default value that you specify, regardless of whether the field exists or has any values for the current document. This means that the `field` API can handle missing values without requiring additional logic. For a reference (`Object`) type such as `keyword`, the default value can be `null`. For a primitive type such as `boolean` or `long`, the -default value must be a non-null matching numerical type, such as `false` or `1`. +default value must be a matching primitive type, such as `false` or `1`. [discrete] === Convenient, simpler access Instead of explicitly calling the `field` API with the `get()` method, you can -include the `$` shortcut. Just include the `$` symbol, field name, and value -that you want to retrieve: +include the `$` shortcut. Just include the `$` symbol, field name, and a default +value, in case the field doesn't have a value: [source,painless] ---- -$(‘field’, ) +$(‘field’, ) ---- With these enhanced capabilities and simplified syntax, you can write scripts From af45035286c31ecb00abed50a3d224905faff01c Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Thu, 3 Feb 2022 10:02:41 -0500 Subject: [PATCH 5/7] Remove Object so as not to confuse it with the object type --- docs/reference/scripting/access-fields.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/scripting/access-fields.asciidoc b/docs/reference/scripting/access-fields.asciidoc index ab7fa05082621..5ff75503db4df 100644 --- a/docs/reference/scripting/access-fields.asciidoc +++ b/docs/reference/scripting/access-fields.asciidoc @@ -40,7 +40,7 @@ multiple values, providing access to the underlying value through the The `field` API returns the default value that you specify, regardless of whether the field exists or has any values for the current document. This means that the `field` API can handle missing values without requiring -additional logic. For a reference (`Object`) type such as `keyword`, the default +additional logic. For a reference type such as `keyword`, the default value can be `null`. For a primitive type such as `boolean` or `long`, the default value must be a matching primitive type, such as `false` or `1`. From 8bf4f8125ff53c20ea3c1996b89d3b58c4e06b49 Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Thu, 3 Feb 2022 12:12:45 -0500 Subject: [PATCH 6/7] Add section and table for supported mapped field types --- .../scripting/access-fields.asciidoc | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/docs/reference/scripting/access-fields.asciidoc b/docs/reference/scripting/access-fields.asciidoc index 5ff75503db4df..bf501cd7092b1 100644 --- a/docs/reference/scripting/access-fields.asciidoc +++ b/docs/reference/scripting/access-fields.asciidoc @@ -4,7 +4,7 @@ Access fields in a document ++++ -beta::["The `field` API is still in development and should be considered a beta feature. The API is subject to change and this iteration is likely not the final state.",{es-issue}78920] +beta::["The `field` API is still in development and should be considered a beta feature. The API is subject to change and this iteration is likely not the final state. For feature status, refer to {es-issue}78920[#78920]."] Use the `field` API to access document fields: @@ -84,4 +84,42 @@ operating on them: ZonedDateTime start = field('start').get(null); ZonedDateTime end = field('end').get(null); return start == null || end == null ? -1 : ChronoUnit.MILLIS.between(start, end) ----- \ No newline at end of file +---- + +[discrete] +=== Supported mapped field types +The following table indicates the mapped field types that the `field` API +supports. For each supported type, values are listed that are returned by the +`field` API (from the `get` and `as` methods) and the `doc` map (from the +`getValue` and `get` methods). + +NOTE: Some fields are not currently supported. For the most current list of +supported fields, refer to {es-issue}79105[#79105]. + +[cols="1,1,1",options="header",] +|======== +|Mapped field type +|Returned type from `field` (`get`,`asType`) +|Returned type from `doc` (`getValue`,`get`) + |`binary` |`ByteBuffer` |`BytesRef` + |`boolean` |`boolean` |`boolean/Boolean` + |`keyword` |`String` |`String` + |`long` |`long` |`long`,`Long` + |`integer` |`int` |`long`,`Long` + |`short` |`short` |`long`,`Long` + |`byte` |`byte` |`long`,`Long` + |`double` |`double` |`double`,`Double` + |`scaled_float` |`double` |`double`,`Double` + |`half_float` |`float` |`double`,`Double` + |`unsigned_long` |`long`,`BigInteger` |`long`,`Long` + |`date` |`ZonedDateTime` |`ZonedDateTime` + |`date_nanos` |`ZonedDateTime` |`ZonedDateTime` + |`ip` |`IpAddress`,`String` |`String` + |`_version` |`long` |`long`,`Long` + |`_seq_no` |`long` |`long`,`Long` + |`version` |`Version`,`String` |`String` + |`murmur3` |`long` |`long`,`Long` + |`constant_keyword` |`String` |`String` + |`wildcard` |`String` |`String` + |`flattened` |`String` |`String` +|======== \ No newline at end of file From 43f6efc8739aa79b83841e8fa793575e6938891b Mon Sep 17 00:00:00 2001 From: Adam Locke Date: Thu, 3 Feb 2022 14:35:27 -0500 Subject: [PATCH 7/7] Update table based on review feedback --- .../scripting/access-fields.asciidoc | 52 ++++++++++--------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/docs/reference/scripting/access-fields.asciidoc b/docs/reference/scripting/access-fields.asciidoc index bf501cd7092b1..2e2d44d89881d 100644 --- a/docs/reference/scripting/access-fields.asciidoc +++ b/docs/reference/scripting/access-fields.asciidoc @@ -93,33 +93,35 @@ supports. For each supported type, values are listed that are returned by the `field` API (from the `get` and `as` methods) and the `doc` map (from the `getValue` and `get` methods). -NOTE: Some fields are not currently supported. For the most current list of +NOTE: The `fields` API currently doesn't support some fields, but you can still +access those fields through the `doc` map. For the most current list of supported fields, refer to {es-issue}79105[#79105]. -[cols="1,1,1",options="header",] +[cols="1,1,1,1,1",options="header",] |======== |Mapped field type -|Returned type from `field` (`get`,`asType`) -|Returned type from `doc` (`getValue`,`get`) - |`binary` |`ByteBuffer` |`BytesRef` - |`boolean` |`boolean` |`boolean/Boolean` - |`keyword` |`String` |`String` - |`long` |`long` |`long`,`Long` - |`integer` |`int` |`long`,`Long` - |`short` |`short` |`long`,`Long` - |`byte` |`byte` |`long`,`Long` - |`double` |`double` |`double`,`Double` - |`scaled_float` |`double` |`double`,`Double` - |`half_float` |`float` |`double`,`Double` - |`unsigned_long` |`long`,`BigInteger` |`long`,`Long` - |`date` |`ZonedDateTime` |`ZonedDateTime` - |`date_nanos` |`ZonedDateTime` |`ZonedDateTime` - |`ip` |`IpAddress`,`String` |`String` - |`_version` |`long` |`long`,`Long` - |`_seq_no` |`long` |`long`,`Long` - |`version` |`Version`,`String` |`String` - |`murmur3` |`long` |`long`,`Long` - |`constant_keyword` |`String` |`String` - |`wildcard` |`String` |`String` - |`flattened` |`String` |`String` +2+|Returned type from `field` +2+|Returned type from `doc` +h| h|`get` h|`as` h|`getValue` h|`get` + |`binary` |`ByteBuffer` |- |`BytesRef` |`BytesRef` + |`boolean` |`boolean` |- |`boolean` |`Boolean` + |`keyword` |`String` |- |`String` |`String` + |`long` |`long` |- |`long` |`Long` + |`integer` |`int` |- |`long` |`Long` + |`short` |`short` |- |`long` |`Long` + |`byte` |`byte` |- |`long` |`Long` + |`double` |`double` |- |`double` |`Double` + |`scaled_float` |`double` |- |`double` |`Double` + |`half_float` |`float` |- |`double` |`Double` + |`unsigned_long` |`long` |`BigInteger` |`long` |`Long` + |`date` |`ZonedDateTime` |- |`ZonedDateTime` |`ZonedDateTime` + |`date_nanos` |`ZonedDateTime` |- |`ZonedDateTime` |`ZonedDateTime` + |`ip` |`IpAddress` |`String` |`String` |`String` + |`_version` |`long` |- |`long` |`Long` + |`_seq_no` |`long` |- |`long` |`Long` + |`version` |`Version` |`String` |`String` |`String` + |`murmur3` |`long` |- |`long` |`Long` + |`constant_keyword` |`String` |- |`String` |`String` + |`wildcard` |`String` |- |`String` |`String` + |`flattened` |`String` |- |`String` |`String` |======== \ No newline at end of file