From 134a3e89d741f56b62c06f039446ffd29deac82c Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 4 Jul 2024 18:31:55 +0200 Subject: [PATCH 01/36] Unique output attribute names after optimization --- .../src/main/resources/row.csv-spec | 7 ------ .../xpack/esql/optimizer/OptimizerRules.java | 22 ++++++++++++++++++- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index bb1cf7358ca74..e1ab452bdc27a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -69,13 +69,6 @@ a:integer | b:integer | c:null | z:integer 1 | 2 | null | null ; -evalRowWithNull2 -row a = 1, null, b = 2, c = null, null | eval z = a+b; - -a:integer | null:null | b:integer | c:null | null:null | z:integer -1 | null | 2 | null | null | 3 -; - evalRowWithNull3 row a = 1, b = 2, x = round(null) | eval z = a+b+x; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java index 4c5d9efb449f7..c02b9948def3f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java @@ -8,8 +8,10 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.core.common.Failures; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.QueryPlan; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; @@ -36,6 +38,9 @@ import org.elasticsearch.xpack.esql.plan.physical.RowExec; import org.elasticsearch.xpack.esql.plan.physical.ShowExec; +import java.util.HashSet; +import java.util.Set; + import static org.elasticsearch.xpack.esql.core.common.Failure.fail; class OptimizerRules { @@ -49,9 +54,24 @@ void checkPlan(P p, Failures failures) { AttributeSet input = p.inputSet(); AttributeSet generated = generates(p); AttributeSet missing = refs.subtract(input).subtract(generated); - if (missing.size() > 0) { + if (missing.isEmpty() == false) { failures.add(fail(p, "Plan [{}] optimized incorrectly due to missing references {}", p.nodeString(), missing)); } + + Set outputAttributeNames = new HashSet<>(); + Set outputAttributeIds = new HashSet<>(); + for (Attribute outputAttr : p.output()) { + if (outputAttributeNames.add(outputAttr.name()) == false || outputAttributeIds.add(outputAttr.id()) == false) { + failures.add( + fail( + p, + "Plan [{}] optimized incorrectly due to duplicate output attribute {}", + p.nodeString(), + outputAttr.toString() + ) + ); + } + } } protected AttributeSet references(P p) { From 9d9c70f42b94e0104392cf3466ce1ca697fad73c Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 4 Jul 2024 18:32:49 +0200 Subject: [PATCH 02/36] Enforce unique row attribute names in verifier --- .../xpack/esql/core/analyzer/AnalyzerRules.java | 12 +----------- .../elasticsearch/xpack/esql/analysis/Verifier.java | 6 ++++++ .../xpack/esql/analysis/VerifierTests.java | 9 +++++++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java index ce188511fe7bc..83afb2ac38a4e 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java @@ -20,8 +20,6 @@ import java.util.function.Predicate; import java.util.function.Supplier; -import static java.util.Collections.singletonList; - public final class AnalyzerRules { public abstract static class AnalyzerRule extends Rule { @@ -138,14 +136,6 @@ public static List maybeResolveAgainstList( ) .toList(); - return singletonList( - ua.withUnresolvedMessage( - "Reference [" - + ua.qualifiedName() - + "] is ambiguous (to disambiguate use quotes or qualifiers); " - + "matches any of " - + refs - ) - ); + throw new IllegalStateException("Reference [" + ua.qualifiedName() + "] is ambiguous; " + "matches any of " + refs); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 514a53b0933e9..93f0913cec3d4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -46,6 +46,7 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -349,10 +350,15 @@ private static void checkRegexExtractOnlyOnStrings(LogicalPlan p, Set f private static void checkRow(LogicalPlan p, Set failures) { if (p instanceof Row row) { + Set outputAttributeNames = new HashSet<>(); + row.fields().forEach(a -> { if (EsqlDataTypes.isRepresentable(a.dataType()) == false) { failures.add(fail(a, "cannot use [{}] directly in a row assignment", a.child().sourceText())); } + if (outputAttributeNames.add(a.name()) == false) { + failures.add(fail(a, "cannot use the name [{}] multiple times in a row assignment", a.name())); + } }); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index ad08130c5b0d9..d27e2b6c242cf 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -35,6 +35,15 @@ public class VerifierTests extends ESTestCase { private final Analyzer defaultAnalyzer = AnalyzerTestUtils.expandedDefaultAnalyzer(); private final Analyzer tsdb = AnalyzerTestUtils.analyzer(AnalyzerTestUtils.tsdbIndexResolution()); + public void testRowAllowsOnlyUniqueAttributeNames() { + assertEquals("1:19: cannot use the name [a] multiple times in a row assignment", error("row a = 1, b = 2, a = 3")); + assertEquals("1:11: cannot use the name [1] multiple times in a row assignment", error("row 1, 2, 1")); + assertEquals( + "1:35: cannot use the name [null] multiple times in a row assignment", + error("row a = 1, null, b = 2, c = null, null | eval z = a+b") + ); + } + public void testIncompatibleTypesInMathOperation() { assertEquals( "1:40: second argument of [a + c] must be [datetime or numeric], found value [c] type [keyword]", From 0d4e1df9b849e53e536b6c6d422ecc0f01bd9ff3 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 4 Jul 2024 18:38:40 +0200 Subject: [PATCH 03/36] Update docs/changelog/110488.yaml --- docs/changelog/110488.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/110488.yaml diff --git a/docs/changelog/110488.yaml b/docs/changelog/110488.yaml new file mode 100644 index 0000000000000..1561c5e1ea7de --- /dev/null +++ b/docs/changelog/110488.yaml @@ -0,0 +1,5 @@ +pr: 110488 +summary: "ESQL: Validate unique plan attribute names" +area: ES|QL +type: bug +issues: [] From 6f36c29ad39e625a38c40536ed1ae8998c6abc78 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 5 Jul 2024 16:34:52 +0200 Subject: [PATCH 04/36] Add tests for grok, dissect, enrich --- .../testFixtures/src/main/resources/dissect.csv-spec | 12 ++++++++++++ .../testFixtures/src/main/resources/enrich.csv-spec | 11 +++++++++++ .../qa/testFixtures/src/main/resources/eval.csv-spec | 10 ++++++++++ .../qa/testFixtures/src/main/resources/grok.csv-spec | 11 +++++++++++ 4 files changed, 44 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 812198c324217..86593f69752d1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -50,6 +50,18 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM employees +| KEEP first_name, last_name +| WHERE last_name == "Facello" +| EVAL name = concat(first_name, "1 ", last_name) +| DISSECT name "%{foo} %{foo}" +; + +first_name:keyword | last_name:keyword | name:keyword | foo:keyword +Georgi | Facello | Georgi1 Facello | Facello +; + complexPattern ROW a = "1953-01-23T12:15:00Z - some text - 127.0.0.1;" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index fc8c48afdf8cc..2dd792c62ddf9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -107,6 +107,17 @@ ROW left = "left", airport = "Zurich Airport ZRH", city = "Zürich", middle = "m left:keyword | city:keyword | middle:keyword | right:keyword | airport:text | region:text | city_boundary:geo_shape ; +shadowingInternal +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH x = airport, x = region +; + +city:keyword | x:text +Zürich | Bezirk Zürich +; + + simple required_capability: enrich_load diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 3df3b85e5e3af..667c8f70e7649 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -33,6 +33,16 @@ left:keyword | middle:keyword | right:keyword | x:integer | y:integer left | middle | right | 9 | 10 ; +shadowingInternal +ROW x = 10000 +| EVAL x = x + 1, x = x - 2 +; + +x:integer +9999 +; + + withMath row a = 1 | eval b = 2 + 3; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 9d574eed7be6b..733d8dca86cd9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -50,6 +50,17 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM employees +| KEEP last_name +| WHERE last_name == "Facello" +| EVAL name = concat("1 ", last_name) +| GROK name "%{WORD:foo} %{WORD:foo}" +; + +last_name:keyword | name:keyword | foo:keyword +Facello | 1 Facello | [1, Facello] +; complexPattern ROW a = "1953-01-23T12:15:00Z 127.0.0.1 some.email@foo.com 42" From cd48514d98b030ad69e65b2c0ab357bc99c2f744 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 5 Jul 2024 17:23:40 +0200 Subject: [PATCH 05/36] Add tests for keep --- .../src/main/resources/keep.csv-spec | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index 14a3807b8729c..57134786c33c1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -539,3 +539,28 @@ c:i 1 1 ; + +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP last_name, last_name +| LIMIT 2 +; + +last_name:keyword +Facello +Simmel +; + + +shadowingInternalWildcard +FROM employees +| SORT emp_no ASC +| KEEP last*name, last*name, last*, last_name +| LIMIT 2 +; + +last_name:keyword +Facello +Simmel +; From 3a5dab74665dce3e5f54d3f60508f9de80d06a04 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 5 Jul 2024 18:13:07 +0200 Subject: [PATCH 06/36] Make row consistent with other plans --- .../testFixtures/src/main/resources/row.csv-spec | 16 ++++++++++++++++ .../xpack/esql/analysis/Verifier.java | 6 ------ .../xpack/esql/parser/LogicalPlanBuilder.java | 4 +++- .../xpack/esql/analysis/VerifierTests.java | 9 --------- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index e1ab452bdc27a..78dcd35960cf8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -36,6 +36,14 @@ a:integer // end::multivalue-result[] ; +shadowingInternal +//TODO: needs cap to skip on bwc tests +row a = 1, a = 2; + +a:integer + 2 +; + unsignedLongLiteral ROW long_max = 9223372036854775807, ul_start = 9223372036854775808, ul_end = 18446744073709551615, double=18446744073709551616; @@ -69,6 +77,14 @@ a:integer | b:integer | c:null | z:integer 1 | 2 | null | null ; +evalRowWithNull2 +//TODO: needs cap to skip on bwc tests +row a = 1, null, b = 2, c = null, null | eval z = a+b; + +a:integer | b:integer | c:null | null:null | z:integer + 1 | 2 | null | null | 3 +; + evalRowWithNull3 row a = 1, b = 2, x = round(null) | eval z = a+b+x; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 93f0913cec3d4..514a53b0933e9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -46,7 +46,6 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; -import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -350,15 +349,10 @@ private static void checkRegexExtractOnlyOnStrings(LogicalPlan p, Set f private static void checkRow(LogicalPlan p, Set failures) { if (p instanceof Row row) { - Set outputAttributeNames = new HashSet<>(); - row.fields().forEach(a -> { if (EsqlDataTypes.isRepresentable(a.dataType()) == false) { failures.add(fail(a, "cannot use [{}] directly in a row assignment", a.child().sourceText())); } - if (outputAttributeNames.add(a.name()) == false) { - failures.add(fail(a, "cannot use the name [{}] multiple times in a row assignment", a.name())); - } }); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index fee51c40a2525..3f3d02c6597be 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -74,6 +74,7 @@ import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.source; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.typedParsing; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.visitList; +import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputExpressions; import static org.elasticsearch.xpack.esql.plan.logical.Enrich.Mode; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToInt; @@ -217,8 +218,9 @@ public Map visitCommandOptions(EsqlBaseParser.CommandOptionsCont } @Override + @SuppressWarnings("unchecked") public LogicalPlan visitRowCommand(EsqlBaseParser.RowCommandContext ctx) { - return new Row(source(ctx), visitFields(ctx.fields())); + return new Row(source(ctx), (List) (List) mergeOutputExpressions(visitFields(ctx.fields()), List.of())); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d27e2b6c242cf..ad08130c5b0d9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -35,15 +35,6 @@ public class VerifierTests extends ESTestCase { private final Analyzer defaultAnalyzer = AnalyzerTestUtils.expandedDefaultAnalyzer(); private final Analyzer tsdb = AnalyzerTestUtils.analyzer(AnalyzerTestUtils.tsdbIndexResolution()); - public void testRowAllowsOnlyUniqueAttributeNames() { - assertEquals("1:19: cannot use the name [a] multiple times in a row assignment", error("row a = 1, b = 2, a = 3")); - assertEquals("1:11: cannot use the name [1] multiple times in a row assignment", error("row 1, 2, 1")); - assertEquals( - "1:35: cannot use the name [null] multiple times in a row assignment", - error("row a = 1, null, b = 2, c = null, null | eval z = a+b") - ); - } - public void testIncompatibleTypesInMathOperation() { assertEquals( "1:40: second argument of [a + c] must be [datetime or numeric], found value [c] type [keyword]", From f71ef42449f0f0b88efeb3b13f54efa972df3323 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 5 Jul 2024 18:19:04 +0200 Subject: [PATCH 07/36] Update docs/changelog/110488.yaml --- docs/changelog/110488.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changelog/110488.yaml b/docs/changelog/110488.yaml index 1561c5e1ea7de..fbb439f20fc96 100644 --- a/docs/changelog/110488.yaml +++ b/docs/changelog/110488.yaml @@ -2,4 +2,5 @@ pr: 110488 summary: "ESQL: Validate unique plan attribute names" area: ES|QL type: bug -issues: [] +issues: + - 110541 From 42be4eb6c724b7c49bf6cf89b718a416d85e2075 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 5 Jul 2024 18:44:01 +0200 Subject: [PATCH 08/36] Add test for drop, rename and stats --- .../src/main/resources/drop.csv-spec | 25 ++++++++++++++++++ .../src/main/resources/rename.csv-spec | 26 +++++++++++++++++++ .../src/main/resources/stats.csv-spec | 9 +++++++ .../xpack/esql/plan/logical/Rename.java | 7 +++++ 4 files changed, 67 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 35530cf6fdb8e..117026d777c81 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -122,3 +122,28 @@ FROM employees | STATS COUNT(*), MIN(salary * 10), MAX(languages)| DROP `COUNT( MIN(salary * 10):i | MAX(languages):i 253240 | 5 ; + +// Not really shadowing, but let's keep the name consistent with the other command's tests +shadowingInternal +FROM employees +| KEEP emp_no, first_name, last_name +| DROP last_name, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingInternalWildcard +FROM employees +| KEEP emp_no, first_name, last_name +| DROP last*name, last*name, last*, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index 1e830486cc7c7..26a35f77ba267 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -174,3 +174,29 @@ avg_worked_seconds:l | birth_date:date | emp_no:i | first_n 341158890 | 1961-10-15T00:00:00.000Z | 10060 | Breannda | M | 1.42 | 1.4199999570846558 | 1.419921875 | 1.42 | 1987-11-02T00:00:00.000Z | [false, false, false, true]| [Business Analyst, Data Scientist, Senior Team Lead] | 2 | 2 | 2 | 2 | Billingsley | 29175 | [-1.76, -0.85] | [-1, 0] | [-0.85, -1.76] | [-1, 0] | true | 29175 246355863 | null | 10042 | Magy | F | 1.44 | 1.440000057220459 | 1.4404296875 | 1.44 | 1993-03-21T00:00:00.000Z | null | [Architect, Business Analyst, Internship, Junior Developer] | 3 | 3 | 3 | 3 | Stamatiou | 30404 | [-9.28, 9.42] | [-9, 9] | [-9.28, 9.42] | [-9, 9] | true | 30404 ; + +shadowing +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| RENAME emp_no AS last_name +| LIMIT 2 +; + +last_name:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP emp_no, last_name +| RENAME emp_no AS x, last_name AS x +| LIMIT 2 +; + +x:keyword +Facello +Simmel +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index e4fc0580e4ba2..4476a1f5ab78f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1819,3 +1819,12 @@ warning:Line 3:17: java.lang.ArithmeticException: / by zero w_avg:double null ; + +shadowingInternal +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) +; + +x:integer +10001 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java index 7d99c566aa0c7..2d816ae117816 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.plan.logical; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -30,6 +31,12 @@ public List renamings() { return renamings; } + @Override + public List output() { + // Rename is mapped to a Project during analysis; we do not compute the output here. + throw new IllegalStateException("Should never reach here."); + } + @Override public boolean expressionsResolved() { for (var alias : renamings) { From 11da00c7a72ad77bca300d63f78eeede88840221 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 11:22:29 +0200 Subject: [PATCH 09/36] Add test dataset with deeper field hierarchy --- .../xpack/esql/CsvTestsDataLoader.java | 2 +- .../src/main/resources/addresses.csv | 4 ++ .../src/main/resources/mapping-addresses.json | 37 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 530b2bc01b3d6..54dca3ffb52a7 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -96,8 +96,8 @@ public class CsvTestsDataLoader { "cartesian_multipolygons.csv" ); private static final TestsDataset DISTANCES = new TestsDataset("distances", "mapping-distances.json", "distances.csv"); - private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv", "k8s-settings.json", true); + private static final TestsDataset ADDRESSES = new TestsDataset("addresses", "mapping-addresses.json", "addresses.csv", null, true); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv new file mode 100644 index 0000000000000..b1d8e3cd925d4 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv @@ -0,0 +1,4 @@ +street:keyword,number:integer,zip_code:integer,city.name:keyword,city.country.name:keyword,city.country.continent.name:keyword,city.country.continent.planet:keyword +Keizersgracht,281,1016 ED,Amsterdam,Netherlands,Europe,Earth +Kearny St,88,CA 94108,San Francisco,United States of America,North America,Earth +Marunouchi,2-7-2,100-7014,Tokyo,Japan,Asia,Earth diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json new file mode 100644 index 0000000000000..378c9c1ca5db8 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json @@ -0,0 +1,37 @@ +{ + "properties" : { + "street" : { + "type": "keyword" + }, + "number" : { + "type": "keyword" + }, + "zip_code": { + "type": "keyword" + }, + "city" : { + "properties": { + "name": { + "type": "keyword" + }, + "country": { + "properties": { + "name": { + "type": "keyword" + }, + "continent": { + "properties": { + "name": { + "type": "keyword" + }, + "planet": { + "type": "keyword" + } + } + } + } + } + } + } + } +} From 0d6758e7b971bb4ba03fe8fd398fabf223686d9b Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 12:49:08 +0200 Subject: [PATCH 10/36] Add hierarchical shadowing test for eval --- .../xpack/esql/CsvTestsDataLoader.java | 3 ++- .../testFixtures/src/main/resources/addresses.csv | 8 ++++---- .../testFixtures/src/main/resources/eval.csv-spec | 13 +++++++++++++ .../src/main/resources/mapping-addresses.json | 9 ++++++++- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 54dca3ffb52a7..f9b768d67d574 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -121,7 +121,8 @@ public class CsvTestsDataLoader { Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES), Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS), Map.entry(K8S.indexName, K8S), - Map.entry(DISTANCES.indexName, DISTANCES) + Map.entry(DISTANCES.indexName, DISTANCES), + Map.entry(ADDRESSES.indexName, ADDRESSES) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv index b1d8e3cd925d4..0eea102400d60 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv @@ -1,4 +1,4 @@ -street:keyword,number:integer,zip_code:integer,city.name:keyword,city.country.name:keyword,city.country.continent.name:keyword,city.country.continent.planet:keyword -Keizersgracht,281,1016 ED,Amsterdam,Netherlands,Europe,Earth -Kearny St,88,CA 94108,San Francisco,United States of America,North America,Earth -Marunouchi,2-7-2,100-7014,Tokyo,Japan,Asia,Earth +street:keyword,number:keyword,zip_code:keyword,city.name:keyword,city.country.name:keyword,city.country.continent.name:keyword,city.country.continent.planet.name:keyword,city.country.continent.planet.galaxy:keyword +Keizersgracht,281,1016 ED,Amsterdam,Netherlands,Europe,Earth,Milky Way +Kearny St,88,CA 94108,San Francisco,United States of America,North America,Earth,Milky Way +Marunouchi,2-7-2,100-7014,Tokyo,Japan,Asia,Earth,Milky Way diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 667c8f70e7649..7758496730cad 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -15,6 +15,19 @@ left:keyword | right:keyword | x:integer left | right | 1 ; +shadowingHierarchical +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.country.continent.planet.name = to_upper(city.country.continent.planet.name) +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | EARTH +United States of America | San Francisco | EARTH +Japan | Tokyo | EARTH +; + shadowingSelf ROW left = "left", x = 10000 , right = "right" | EVAL x = x + 1 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json index 378c9c1ca5db8..679efb3c8d38b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json @@ -25,7 +25,14 @@ "type": "keyword" }, "planet": { - "type": "keyword" + "properties": { + "name": { + "type": "keyword" + }, + "galaxy": { + "type": "keyword" + } + } } } } From 0e13eafcc4dc27cf969650020249b912b1357f97 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 13:12:36 +0200 Subject: [PATCH 11/36] Add hierarchical tests for drop, dissect --- .../src/main/resources/dissect.csv-spec | 13 +++++++++++ .../src/main/resources/drop.csv-spec | 23 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 86593f69752d1..747bc930fbc3f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingHierarchical +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| DISSECT city.name "%{city.country.continent.planet.name} %{?}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 117026d777c81..1326d4926d225 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -147,3 +147,26 @@ emp_no:integer | first_name:keyword 10001 | Georgi 10002 | Bezalel ; + +hierarchical +FROM addresses +| DROP city.country.continent.planet.name, city.country.continent.name, city.country.name, number, street, zip_code, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.planet.galaxy:keyword | city.name:keyword +Milky Way | Amsterdam +Milky Way | San Francisco +Milky Way | Tokyo +; + +hierarchical +FROM addresses +| DROP *.name, number, street, zip_code, *ame +; + +city.country.continent.planet.galaxy:keyword +Milky Way +Milky Way +Milky Way +; From 878eb9970619a75fee569aaf6a2c0fe516b82085 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 13:29:56 +0200 Subject: [PATCH 12/36] Add hierarchical tests for enrich --- .../src/main/resources/enrich.csv-spec | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index 2dd792c62ddf9..d66fe9ac3e739 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -69,6 +69,34 @@ ROW left = "left", foo = "foo", client_ip = "172.21.0.5", env = "env", right = " left:keyword | client_ip:keyword | env:keyword | right:keyword | foo:keyword ; +shadowingHierarchical +required_capability: enrich_load +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +Netherlands | Amsterdam | null +United States of America | South San Francisco | San Francisco Int'l +Japan | Tokyo | null +; + +shadowingHierarchicalLimit0 +required_capability: enrich_load +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +| LIMIT 0 +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +; + shadowingSelf required_capability: enrich_load ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" From e8609f21bd4808730d4fdd8490d450584d33fc23 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 13:37:59 +0200 Subject: [PATCH 13/36] Add hierarchical tests for grok, keep --- .../src/main/resources/grok.csv-spec | 13 ++++++++++ .../src/main/resources/keep.csv-spec | 24 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 733d8dca86cd9..bfd6fcd3a28c1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingHierarchical +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| GROK city.name "%{WORD:city.country.continent.planet.name} %{WORD}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index 57134786c33c1..bc6b029f63bcb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -564,3 +564,27 @@ last_name:keyword Facello Simmel ; + +shadowingHierarchical +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; + +shadowingHierarchicalWildcard +FROM addresses +| KEEP *name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; From 5b4be7288352be0de94d0372ff4d2ef9b92c0774 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 13:53:00 +0200 Subject: [PATCH 14/36] Add hierarchical tests for rename, row --- .../testFixtures/src/main/resources/rename.csv-spec | 13 +++++++++++++ .../qa/testFixtures/src/main/resources/row.csv-spec | 11 ++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index 26a35f77ba267..75235d19c1f51 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -188,6 +188,19 @@ last_name:integer | first_name:keyword 10002 | Bezalel ; +shadowingHierarchical +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name +| RENAME city.name AS city.country.continent.planet.name, city.country.name AS city.country.continent.name +| SORT city.country.continent.planet.name +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam +United States of America | San Francisco +Japan | Tokyo +; + shadowingInternal FROM employees | SORT emp_no ASC diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index 78dcd35960cf8..5342e68b4a4ad 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -38,12 +38,21 @@ a:integer shadowingInternal //TODO: needs cap to skip on bwc tests -row a = 1, a = 2; +ROW a = 1, a = 2; a:integer 2 ; +shadowingInternalHierarchical +// Fun fact: "Sissi" is an actual exoplanet name, after the character from the movie with the same name. A.k.a. HAT-P-14 b. +ROW city.country.continent.planet.name = "Earth", city.country.continent.name = "Netherlands", city.country.continent.planet.name = "Sissi" +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Sissi +; + unsignedLongLiteral ROW long_max = 9223372036854775807, ul_start = 9223372036854775808, ul_end = 18446744073709551615, double=18446744073709551616; From 2f1778ae3337dc41c5a93f5185f0c0633801cfa1 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 14:09:30 +0200 Subject: [PATCH 15/36] Add more extreme case for stats --- .../testFixtures/src/main/resources/stats.csv-spec | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 4476a1f5ab78f..8128dd55e3947 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1828,3 +1828,15 @@ FROM employees x:integer 10001 ; + +shadowingInternalWithGroup +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) BY x = gender +| SORT x ASC +; + +x:keyword +F +M +null +; From 73a973675ce0bfa3473d8393a5109879889de77c Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 14:42:05 +0200 Subject: [PATCH 16/36] Add new capability for this fix --- .../esql/qa/testFixtures/src/main/resources/row.csv-spec | 4 ++-- .../elasticsearch/xpack/esql/action/EsqlCapabilities.java | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index 5342e68b4a4ad..2fbe040520ea2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -37,7 +37,7 @@ a:integer ; shadowingInternal -//TODO: needs cap to skip on bwc tests +required_capability: enrich_load ROW a = 1, a = 2; a:integer @@ -87,7 +87,7 @@ a:integer | b:integer | c:null | z:integer ; evalRowWithNull2 -//TODO: needs cap to skip on bwc tests +required_capability: enrich_load row a = 1, null, b = 2, c = null, null | eval z = a+b; a:integer | b:integer | c:null | null:null | z:integer diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 07362311d37a5..33423dd62171a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -106,7 +106,13 @@ public enum Cap { /** * Support for WEIGHTED_AVG function. */ - AGG_WEIGHTED_AVG; + AGG_WEIGHTED_AVG, + + /** + * Fix for non-unique attribute names in ROW. + * https://github.com/elastic/elasticsearch/issues/110541 + */ + UNIQUE_NAMES; private final boolean snapshotOnly; From 2cb23f594dcf0b1a0d1d9f96b5a7386b3e97fcd3 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 9 Jul 2024 17:53:39 +0200 Subject: [PATCH 17/36] Fix EsRelation.equals, mutation in ResolveUnionTypes --- .../org/elasticsearch/xpack/esql/analysis/Analyzer.java | 3 ++- .../elasticsearch/xpack/esql/plan/logical/EsRelation.java | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 30ffffd4770a9..89f344798731d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1106,7 +1106,8 @@ protected LogicalPlan doRule(LogicalPlan plan) { // And add generated fields to EsRelation, so these new attributes will appear in the OutputExec of the Fragment // and thereby get used in FieldExtractExec plan = plan.transformDown(EsRelation.class, esr -> { - List output = esr.output(); + // Copy output so we do not accidentally mutate the previous plan + List output = new ArrayList<>(esr.output()); List missing = new ArrayList<>(); for (FieldAttribute fa : unionFieldAttributes) { if (output.stream().noneMatch(a -> a.id().equals(fa.id()))) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java index 382838a5968cc..866385e6c7c28 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java @@ -98,7 +98,7 @@ public boolean expressionsResolved() { @Override public int hashCode() { - return Objects.hash(index, indexMode, frozen); + return Objects.hash(index, indexMode, frozen, attrs); } @Override @@ -112,7 +112,10 @@ public boolean equals(Object obj) { } EsRelation other = (EsRelation) obj; - return Objects.equals(index, other.index) && indexMode == other.indexMode() && frozen == other.frozen; + return Objects.equals(index, other.index) + && indexMode == other.indexMode() + && frozen == other.frozen + && Objects.equals(attrs, other.attrs); } @Override From 1a648e4a05d3c6172d2581b678bfa7ceee6176a3 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 11 Jul 2024 15:55:22 +0200 Subject: [PATCH 18/36] Make union types use unique attribute names --- .../xpack/esql/analysis/Analyzer.java | 59 +++++++++++++++---- .../optimizer/LocalLogicalPlanOptimizer.java | 6 +- .../planner/EsPhysicalOperationProviders.java | 7 ++- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index cef4129d6d0b9..1e1e5e7b98202 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -39,6 +39,7 @@ import org.elasticsearch.xpack.esql.core.plan.TableIdentifier; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.rule.RuleExecutor; import org.elasticsearch.xpack.esql.core.session.Configuration; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -60,6 +61,7 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; +import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Drop; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -140,7 +142,13 @@ public class Analyzer extends ParameterizedRuleExecutor("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnresolveUnionTypes()); + var finish = new Batch<>( + "Finish Analysis", + Limiter.ONCE, + new AddImplicitLimit(), + new UnresolveUnionTypes(), + new DropSyntheticUnionTypeAttributes() + ); rules = List.of(init, resolution, finish); } @@ -1103,24 +1111,21 @@ protected LogicalPlan doRule(LogicalPlan plan) { plan = plan.transformExpressionsOnly(UnresolvedAttribute.class, ua -> resolveAttribute(ua, resolved)); } - // Otherwise drop the converted attributes after the alias function, as they are only needed for this function, and - // the original version of the attribute should still be seen as unconverted. - plan = dropConvertedAttributes(plan, unionFieldAttributes); - // And add generated fields to EsRelation, so these new attributes will appear in the OutputExec of the Fragment // and thereby get used in FieldExtractExec plan = plan.transformDown(EsRelation.class, esr -> { - // Copy output so we do not accidentally mutate the previous plan - List output = new ArrayList<>(esr.output()); List missing = new ArrayList<>(); for (FieldAttribute fa : unionFieldAttributes) { - if (output.stream().noneMatch(a -> a.id().equals(fa.id()))) { + // Using outputSet().contains looks by NameId, resp. uses semanticEquals. + if (esr.outputSet().contains(fa) == false) { missing.add(fa); } } + if (missing.isEmpty() == false) { - output.addAll(missing); - return new EsRelation(esr.source(), esr.index(), output, esr.indexMode(), esr.frozen()); + List newOutput = new ArrayList<>(esr.output()); + newOutput.addAll(missing); + return new EsRelation(esr.source(), esr.index(), newOutput, esr.indexMode(), esr.frozen()); } return esr; }); @@ -1175,7 +1180,13 @@ private Expression createIfDoesNotAlreadyExist( MultiTypeEsField resolvedField, List unionFieldAttributes ) { - var unionFieldAttribute = new FieldAttribute(fa.source(), fa.name(), resolvedField); // Generates new ID for the field + // Generate new ID for the field and suffix it with the data type to maintain unique attribute names. + String unionTypedFieldName = SubstituteSurrogates.rawTemporaryName( + fa.name(), + "converted_to", + resolvedField.getDataType().typeName() + ); + FieldAttribute unionFieldAttribute = new FieldAttribute(fa.source(), unionTypedFieldName, resolvedField); int existingIndex = unionFieldAttributes.indexOf(unionFieldAttribute); if (existingIndex >= 0) { // Do not generate multiple name/type combinations with different IDs @@ -1235,4 +1246,30 @@ static Attribute checkUnresolved(FieldAttribute fa) { return fa; } } + + /** + * {@link ResolveUnionTypes} creates new, synthetic attributes for union types: + * If {@code client_ip} is present in 2 indices, once with type {@code ip} and once with type {@code keyword}, + * using {@code EVAL x = to_ip(client_ip)} will create a single attribute @{code $$client_ip$converted_to$ip}. + * This should not spill into the query output, so we drop such attributes at the end. + */ + private static class DropSyntheticUnionTypeAttributes extends Rule { + public LogicalPlan apply(LogicalPlan plan) { + List output = plan.output(); + List newOutput = new ArrayList<>(output.size()); + + for (Attribute attr : output) { + // TODO: this should really use .isSynthetic + // https://github.com/elastic/elasticsearch/issues/105821 + if (attr.name().contains("$$") == false) { + newOutput.add(attr); + } + } + + if (newOutput.size() != output.size()) { + return new Project(Source.EMPTY, plan, newOutput); + } + return plan; + } + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 9a2ae742c2feb..2b8c5396cd534 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -140,7 +140,8 @@ else if (plan instanceof Project project) { Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); for (NamedExpression projection : projections) { - if (projection instanceof FieldAttribute f && stats.exists(f.qualifiedName()) == false) { + // Do not use the attribute name, this can deviate from the field name for union types. + if (projection instanceof FieldAttribute f && stats.exists(f.field().getName()) == false) { DataType dt = f.dataType(); Alias nullAlias = nullLiteral.get(f.dataType()); // save the first field as null (per datatype) @@ -170,7 +171,8 @@ else if (plan instanceof Project project) { || plan instanceof TopN) { plan = plan.transformExpressionsOnlyUp( FieldAttribute.class, - f -> stats.exists(f.qualifiedName()) ? f : Literal.of(f, null) + // Do not use the attribute name, this can deviate from the field name for union types. + f -> stats.exists(f.field().getName()) ? f : Literal.of(f, null) ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 8611d2c6fa9fb..fefe9f538c227 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -116,7 +116,8 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi DataType dataType = attr.dataType(); MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr)); ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference); - String fieldName = attr.name(); + // Do not use the field attribute name, this can deviate from the field name for union types. + String fieldName = attr instanceof FieldAttribute fa ? fa.field().getName() : attr.name(); boolean isUnsupported = dataType == DataType.UNSUPPORTED; IntFunction loader = s -> getBlockLoaderFor(s, fieldName, isUnsupported, fieldExtractPreference, unionTypes); fields.add(new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, loader)); @@ -234,8 +235,10 @@ public final Operator.OperatorFactory ordinalGroupingOperatorFactory( // Costin: why are they ready and not already exposed in the layout? boolean isUnsupported = attrSource.dataType() == DataType.UNSUPPORTED; var unionTypes = findUnionTypes(attrSource); + // Field attribute names deviate in case of union types. + String fieldName = attrSource instanceof FieldAttribute fa ? fa.field().getName() : attrSource.name(); return new OrdinalsGroupingOperator.OrdinalsGroupingOperatorFactory( - shardIdx -> getBlockLoaderFor(shardIdx, attrSource.name(), isUnsupported, NONE, unionTypes), + shardIdx -> getBlockLoaderFor(shardIdx, fieldName, isUnsupported, NONE, unionTypes), vsShardContexts, groupElementType, docChannel, From 658bebcdc8f802cc034e31c9607150ef76960a7e Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 11 Jul 2024 16:05:18 +0200 Subject: [PATCH 19/36] Cleanup leftover --- .../elasticsearch/xpack/esql/analysis/Analyzer.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 1e1e5e7b98202..1c8c427a1a40c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1141,17 +1141,6 @@ private Expression resolveAttribute(UnresolvedAttribute ua, Map unionFieldAttributes) { - List projections = new ArrayList<>(plan.output()); - for (var e : unionFieldAttributes) { - projections.removeIf(p -> p.id().equals(e.id())); - } - if (projections.size() != plan.output().size()) { - return new EsqlProject(plan.source(), plan, projections); - } - return plan; - } - private Expression resolveConvertFunction(AbstractConvertFunction convert, List unionFieldAttributes) { if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { HashMap typeResolutions = new HashMap<>(); From 6ac0fa93054ae256fbaf13dd729c99bf8c86bdd4 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 15:14:25 +0200 Subject: [PATCH 20/36] Revert "Cleanup leftover" This reverts commit 658bebcdc8f802cc034e31c9607150ef76960a7e. --- .../elasticsearch/xpack/esql/analysis/Analyzer.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index a3bb56b1c9011..691eb5640dd47 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1142,6 +1142,17 @@ private Expression resolveAttribute(UnresolvedAttribute ua, Map unionFieldAttributes) { + List projections = new ArrayList<>(plan.output()); + for (var e : unionFieldAttributes) { + projections.removeIf(p -> p.id().equals(e.id())); + } + if (projections.size() != plan.output().size()) { + return new EsqlProject(plan.source(), plan, projections); + } + return plan; + } + private Expression resolveConvertFunction(AbstractConvertFunction convert, List unionFieldAttributes) { if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { HashMap typeResolutions = new HashMap<>(); From 0f3c274f689f2af3e18d0bc6095c2fe469c5b346 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 15:14:41 +0200 Subject: [PATCH 21/36] Revert "Make union types use unique attribute names" This reverts commit 1a648e4a05d3c6172d2581b678bfa7ceee6176a3. --- .../xpack/esql/analysis/Analyzer.java | 59 ++++--------------- .../optimizer/LocalLogicalPlanOptimizer.java | 6 +- .../planner/EsPhysicalOperationProviders.java | 7 +-- 3 files changed, 15 insertions(+), 57 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 691eb5640dd47..7d946c4dab37b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -39,7 +39,6 @@ import org.elasticsearch.xpack.esql.core.plan.TableIdentifier; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; -import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.rule.RuleExecutor; import org.elasticsearch.xpack.esql.core.session.Configuration; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -61,7 +60,6 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; -import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Drop; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -141,13 +139,7 @@ public class Analyzer extends ParameterizedRuleExecutor( - "Finish Analysis", - Limiter.ONCE, - new AddImplicitLimit(), - new UnresolveUnionTypes(), - new DropSyntheticUnionTypeAttributes() - ); + var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnresolveUnionTypes()); rules = List.of(init, resolution, finish); } @@ -1112,21 +1104,24 @@ protected LogicalPlan doRule(LogicalPlan plan) { plan = plan.transformExpressionsOnly(UnresolvedAttribute.class, ua -> resolveAttribute(ua, resolved)); } + // Otherwise drop the converted attributes after the alias function, as they are only needed for this function, and + // the original version of the attribute should still be seen as unconverted. + plan = dropConvertedAttributes(plan, unionFieldAttributes); + // And add generated fields to EsRelation, so these new attributes will appear in the OutputExec of the Fragment // and thereby get used in FieldExtractExec plan = plan.transformDown(EsRelation.class, esr -> { + // Copy output so we do not accidentally mutate the previous plan + List output = new ArrayList<>(esr.output()); List missing = new ArrayList<>(); for (FieldAttribute fa : unionFieldAttributes) { - // Using outputSet().contains looks by NameId, resp. uses semanticEquals. - if (esr.outputSet().contains(fa) == false) { + if (output.stream().noneMatch(a -> a.id().equals(fa.id()))) { missing.add(fa); } } - if (missing.isEmpty() == false) { - List newOutput = new ArrayList<>(esr.output()); - newOutput.addAll(missing); - return new EsRelation(esr.source(), esr.index(), newOutput, esr.indexMode(), esr.frozen()); + output.addAll(missing); + return new EsRelation(esr.source(), esr.index(), output, esr.indexMode(), esr.frozen()); } return esr; }); @@ -1181,13 +1176,7 @@ private Expression createIfDoesNotAlreadyExist( MultiTypeEsField resolvedField, List unionFieldAttributes ) { - // Generate new ID for the field and suffix it with the data type to maintain unique attribute names. - String unionTypedFieldName = SubstituteSurrogates.rawTemporaryName( - fa.name(), - "converted_to", - resolvedField.getDataType().typeName() - ); - FieldAttribute unionFieldAttribute = new FieldAttribute(fa.source(), unionTypedFieldName, resolvedField); + var unionFieldAttribute = new FieldAttribute(fa.source(), fa.name(), resolvedField); // Generates new ID for the field int existingIndex = unionFieldAttributes.indexOf(unionFieldAttribute); if (existingIndex >= 0) { // Do not generate multiple name/type combinations with different IDs @@ -1247,30 +1236,4 @@ static Attribute checkUnresolved(FieldAttribute fa) { return fa; } } - - /** - * {@link ResolveUnionTypes} creates new, synthetic attributes for union types: - * If {@code client_ip} is present in 2 indices, once with type {@code ip} and once with type {@code keyword}, - * using {@code EVAL x = to_ip(client_ip)} will create a single attribute @{code $$client_ip$converted_to$ip}. - * This should not spill into the query output, so we drop such attributes at the end. - */ - private static class DropSyntheticUnionTypeAttributes extends Rule { - public LogicalPlan apply(LogicalPlan plan) { - List output = plan.output(); - List newOutput = new ArrayList<>(output.size()); - - for (Attribute attr : output) { - // TODO: this should really use .isSynthetic - // https://github.com/elastic/elasticsearch/issues/105821 - if (attr.name().contains("$$") == false) { - newOutput.add(attr); - } - } - - if (newOutput.size() != output.size()) { - return new Project(Source.EMPTY, plan, newOutput); - } - return plan; - } - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index 2b8c5396cd534..9a2ae742c2feb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -140,8 +140,7 @@ else if (plan instanceof Project project) { Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); for (NamedExpression projection : projections) { - // Do not use the attribute name, this can deviate from the field name for union types. - if (projection instanceof FieldAttribute f && stats.exists(f.field().getName()) == false) { + if (projection instanceof FieldAttribute f && stats.exists(f.qualifiedName()) == false) { DataType dt = f.dataType(); Alias nullAlias = nullLiteral.get(f.dataType()); // save the first field as null (per datatype) @@ -171,8 +170,7 @@ else if (plan instanceof Project project) { || plan instanceof TopN) { plan = plan.transformExpressionsOnlyUp( FieldAttribute.class, - // Do not use the attribute name, this can deviate from the field name for union types. - f -> stats.exists(f.field().getName()) ? f : Literal.of(f, null) + f -> stats.exists(f.qualifiedName()) ? f : Literal.of(f, null) ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 839a1cc19e181..9386e77691a43 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -117,8 +117,7 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi DataType dataType = attr.dataType(); MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr)); ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference); - // Do not use the field attribute name, this can deviate from the field name for union types. - String fieldName = attr instanceof FieldAttribute fa ? fa.field().getName() : attr.name(); + String fieldName = attr.name(); boolean isUnsupported = dataType == DataType.UNSUPPORTED; IntFunction loader = s -> getBlockLoaderFor(s, fieldName, isUnsupported, fieldExtractPreference, unionTypes); fields.add(new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, loader)); @@ -236,10 +235,8 @@ public final Operator.OperatorFactory ordinalGroupingOperatorFactory( // Costin: why are they ready and not already exposed in the layout? boolean isUnsupported = attrSource.dataType() == DataType.UNSUPPORTED; var unionTypes = findUnionTypes(attrSource); - // Field attribute names deviate in case of union types. - String fieldName = attrSource instanceof FieldAttribute fa ? fa.field().getName() : attrSource.name(); return new OrdinalsGroupingOperator.OrdinalsGroupingOperatorFactory( - shardIdx -> getBlockLoaderFor(shardIdx, fieldName, isUnsupported, NONE, unionTypes), + shardIdx -> getBlockLoaderFor(shardIdx, attrSource.name(), isUnsupported, NONE, unionTypes), vsShardContexts, groupElementType, docChannel, From 091c0993b4c1f296b4e7eaab0873bf6de6d20663 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 15:14:55 +0200 Subject: [PATCH 22/36] Revert "Fix EsRelation.equals, mutation in ResolveUnionTypes" This reverts commit 2cb23f594dcf0b1a0d1d9f96b5a7386b3e97fcd3. --- .../org/elasticsearch/xpack/esql/analysis/Analyzer.java | 3 +-- .../elasticsearch/xpack/esql/plan/logical/EsRelation.java | 7 ++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 7d946c4dab37b..5fdd6309ae71e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1111,8 +1111,7 @@ protected LogicalPlan doRule(LogicalPlan plan) { // And add generated fields to EsRelation, so these new attributes will appear in the OutputExec of the Fragment // and thereby get used in FieldExtractExec plan = plan.transformDown(EsRelation.class, esr -> { - // Copy output so we do not accidentally mutate the previous plan - List output = new ArrayList<>(esr.output()); + List output = esr.output(); List missing = new ArrayList<>(); for (FieldAttribute fa : unionFieldAttributes) { if (output.stream().noneMatch(a -> a.id().equals(fa.id()))) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java index 866385e6c7c28..382838a5968cc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java @@ -98,7 +98,7 @@ public boolean expressionsResolved() { @Override public int hashCode() { - return Objects.hash(index, indexMode, frozen, attrs); + return Objects.hash(index, indexMode, frozen); } @Override @@ -112,10 +112,7 @@ public boolean equals(Object obj) { } EsRelation other = (EsRelation) obj; - return Objects.equals(index, other.index) - && indexMode == other.indexMode() - && frozen == other.frozen - && Objects.equals(attrs, other.attrs); + return Objects.equals(index, other.index) && indexMode == other.indexMode() && frozen == other.frozen; } @Override From f478d59533f6268ca4727fa00f1e4f66bc97170a Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 15:49:23 +0200 Subject: [PATCH 23/36] More ENRICH tests with internal shadowing --- .../src/main/resources/enrich.csv-spec | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index d66fe9ac3e739..8f76b74965567 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -145,6 +145,35 @@ city:keyword | x:text Zürich | Bezirk Zürich ; +shadowingInternalImplicit +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit2 +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport, airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit3 +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region, airport +; + +city:keyword | airport:text +Zürich | Zurich Int'l +; simple required_capability: enrich_load From 6f005341641e49be2139ef39389015f5d9b9c8f3 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 16:20:38 +0200 Subject: [PATCH 24/36] More consistent test names --- .../esql/qa/testFixtures/src/main/resources/dissect.csv-spec | 2 +- .../esql/qa/testFixtures/src/main/resources/drop.csv-spec | 4 ++-- .../esql/qa/testFixtures/src/main/resources/enrich.csv-spec | 4 ++-- .../esql/qa/testFixtures/src/main/resources/eval.csv-spec | 2 +- .../esql/qa/testFixtures/src/main/resources/grok.csv-spec | 2 +- .../esql/qa/testFixtures/src/main/resources/keep.csv-spec | 4 ++-- .../esql/qa/testFixtures/src/main/resources/rename.csv-spec | 2 +- .../esql/qa/testFixtures/src/main/resources/row.csv-spec | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 747bc930fbc3f..8c4e797b7982d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -26,7 +26,7 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; -shadowingHierarchical +shadowingSubfields FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name | DISSECT city.name "%{city.country.continent.planet.name} %{?}" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 1326d4926d225..fa84d7ba41f6b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -148,7 +148,7 @@ emp_no:integer | first_name:keyword 10002 | Bezalel ; -hierarchical +subfields FROM addresses | DROP city.country.continent.planet.name, city.country.continent.name, city.country.name, number, street, zip_code, city.country.continent.planet.name | SORT city.name @@ -160,7 +160,7 @@ Milky Way | San Francisco Milky Way | Tokyo ; -hierarchical +subfieldsWildcard FROM addresses | DROP *.name, number, street, zip_code, *ame ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index 8f76b74965567..cf32e028b23bc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -69,7 +69,7 @@ ROW left = "left", foo = "foo", client_ip = "172.21.0.5", env = "env", right = " left:keyword | client_ip:keyword | env:keyword | right:keyword | foo:keyword ; -shadowingHierarchical +shadowingSubfields required_capability: enrich_load FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name @@ -84,7 +84,7 @@ United States of America | South San Francisco | San Francisco Int'l Japan | Tokyo | null ; -shadowingHierarchicalLimit0 +shadowingSubfieldsLimit0 required_capability: enrich_load FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 7758496730cad..87f54fbf0f174 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -15,7 +15,7 @@ left:keyword | right:keyword | x:integer left | right | 1 ; -shadowingHierarchical +shadowingSubfields FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name | EVAL city.country.continent.planet.name = to_upper(city.country.continent.planet.name) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index bfd6fcd3a28c1..194e177a4ecbf 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -26,7 +26,7 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; -shadowingHierarchical +shadowingSubfields FROM addresses | KEEP city.country.continent.planet.name, city.country.name, city.name | GROK city.name "%{WORD:city.country.continent.planet.name} %{WORD}" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index bc6b029f63bcb..e4c4204848147 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -565,7 +565,7 @@ Facello Simmel ; -shadowingHierarchical +shadowingSubfields FROM addresses | KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name, city.country.continent.planet.name | SORT city.name @@ -577,7 +577,7 @@ North America | United States of America | San Francisco Asia | Japan | Tokyo | Earth ; -shadowingHierarchicalWildcard +shadowingSubfieldsWildcard FROM addresses | KEEP *name, city.country.continent.planet.name | SORT city.name diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index 75235d19c1f51..ca4c627cae749 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -188,7 +188,7 @@ last_name:integer | first_name:keyword 10002 | Bezalel ; -shadowingHierarchical +shadowingSubfields FROM addresses | KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name | RENAME city.name AS city.country.continent.planet.name, city.country.name AS city.country.continent.name diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index 2fbe040520ea2..78ee460d64958 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -44,7 +44,7 @@ a:integer 2 ; -shadowingInternalHierarchical +shadowingInternalSubfields // Fun fact: "Sissi" is an actual exoplanet name, after the character from the movie with the same name. A.k.a. HAT-P-14 b. ROW city.country.continent.planet.name = "Earth", city.country.continent.name = "Netherlands", city.country.continent.planet.name = "Sissi" ; From 535c954580610bb8f1015cfb3b2a2a1ebd1ca7c5 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 16:35:04 +0200 Subject: [PATCH 25/36] More KEEP tests --- .../src/main/resources/keep.csv-spec | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index e4c4204848147..bcce35eb81e0f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -543,26 +543,37 @@ c:i shadowingInternal FROM employees | SORT emp_no ASC -| KEEP last_name, last_name +| KEEP last_name, emp_no, last_name | LIMIT 2 ; -last_name:keyword -Facello -Simmel +emp_no:integer | last_name:keyword + 10001 | Facello + 10002 | Simmel ; - shadowingInternalWildcard FROM employees | SORT emp_no ASC -| KEEP last*name, last*name, last*, last_name +| KEEP last*name, emp_no, last*name, first_name, last*, gender, last* +| LIMIT 2 +; + +emp_no:integer | first_name:keyword | gender:keyword | last_name:keyword + 10001 | Georgi | M | Facello + 10002 | Bezalel | F | Simmel +; + +shadowingInternalWildcardAndExplicit +FROM employees +| SORT emp_no ASC +| KEEP last*name, emp_no, last_name, first_name, last*, languages, last_name, gender, last*name | LIMIT 2 ; -last_name:keyword -Facello -Simmel +emp_no:integer | first_name:keyword | languages:integer | last_name:keyword | gender:keyword + 10001 | Georgi | 2 | Facello | M + 10002 | Bezalel | 5 | Simmel | F ; shadowingSubfields From fb85e16206a9908482de2c0d64a2ee4145a1feec Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 17:00:48 +0200 Subject: [PATCH 26/36] Update docs --- docs/reference/esql/processing-commands/dissect.asciidoc | 2 ++ docs/reference/esql/processing-commands/enrich.asciidoc | 7 ++++++- docs/reference/esql/processing-commands/eval.asciidoc | 4 +++- docs/reference/esql/processing-commands/grok.asciidoc | 3 +++ docs/reference/esql/processing-commands/keep.asciidoc | 4 ++++ docs/reference/esql/processing-commands/lookup.asciidoc | 2 ++ docs/reference/esql/processing-commands/rename.asciidoc | 6 +++++- docs/reference/esql/processing-commands/stats.asciidoc | 3 +++ docs/reference/esql/source-commands/row.asciidoc | 1 + 9 files changed, 29 insertions(+), 3 deletions(-) diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc index 72c811a318a5d..0f4b63ed4d026 100644 --- a/docs/reference/esql/processing-commands/dissect.asciidoc +++ b/docs/reference/esql/processing-commands/dissect.asciidoc @@ -20,6 +20,8 @@ multiple values, `DISSECT` will process each value. `pattern`:: A <>. +In case a field name coincides with an existing column, the existing column is discarded. +If a field name is used more than once, only the rightmost duplicate creates a column. ``:: A string used as the separator between appended values, when using the <>. diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc index f34e77dbf5c23..75e2f2f7b9197 100644 --- a/docs/reference/esql/processing-commands/enrich.asciidoc +++ b/docs/reference/esql/processing-commands/enrich.asciidoc @@ -31,11 +31,16 @@ name as the `match_field` defined in the <>. The enrich fields from the enrich index that are added to the result as new columns. If a column with the same name as the enrich field already exists, the existing column will be replaced by the new column. If not specified, each of -the enrich fields defined in the policy is added +the enrich fields defined in the policy is added. +If a column has the same name as the enrich field, it will be discarded unless +that field is given a new name. `new_nameX`:: Enables you to change the name of the column that's added for each of the enrich fields. Defaults to the enrich field name. +If a column has the same name as the new name, it will be discarded. +If a name (new or original) occurs more than once, only the rightmost duplicate +creates a new column. *Description* diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc index f77249736c1b3..ab35442ec492b 100644 --- a/docs/reference/esql/processing-commands/eval.asciidoc +++ b/docs/reference/esql/processing-commands/eval.asciidoc @@ -16,10 +16,12 @@ EVAL [column1 =] value1[, ..., [columnN =] valueN] `columnX`:: The column name. +In case a column name coincides with an existing column, the existing column is discarded. +If a column name is used more than once, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a -<>. +<>. Can refer to columns defined left of this one. *Description* diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc index d631d17f7a42c..bf1abbb775865 100644 --- a/docs/reference/esql/processing-commands/grok.asciidoc +++ b/docs/reference/esql/processing-commands/grok.asciidoc @@ -20,6 +20,9 @@ multiple values, `GROK` will process each value. `pattern`:: A grok pattern. +In case a field name coincides with an existing column, the existing column is discarded. +If a field name is used more than once, a multi-valued column will be created with one value +per each occurrence of the field name. *Description* diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 468f459411640..5d9c612a95684 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -16,6 +16,10 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. +In case a column name without wildcards occurs multiple times, the column is placed at the +position of the right-most duplicate. The same is true if a column name matches multiple +wildcards. If a column name matches both a wildcard and also a column name without wildcards, +the position of the latter is used. *Description* diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc index 426527bf4d2d6..b9cad78f55aec 100644 --- a/docs/reference/esql/processing-commands/lookup.asciidoc +++ b/docs/reference/esql/processing-commands/lookup.asciidoc @@ -18,6 +18,8 @@ LOOKUP table ON match_field1[, match_field2, ...] `table`:: The name of the `table` provided in the request to match. +If it has column names that conflict with existing columns, the table's columns replace the +existing ones. `match_field`:: The fields in the input to match against the table. diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc index 8507a826f085d..2094d87bb1fdf 100644 --- a/docs/reference/esql/processing-commands/rename.asciidoc +++ b/docs/reference/esql/processing-commands/rename.asciidoc @@ -17,7 +17,11 @@ RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN] The name of a column you want to rename. `new_nameX`:: -The new name of the column. +The new name of the column. If it conflicts with an existing column name, +the existing column is replaced by the renamed column. If multiple columns +are renamed to the same name, all but the rightmost column with the same new +name are dropped. If a column is renamed multiple times, all but the rightmost +rename are ignored. *Description* diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc index 34ae81fd5414e..7377522a93201 100644 --- a/docs/reference/esql/processing-commands/stats.asciidoc +++ b/docs/reference/esql/processing-commands/stats.asciidoc @@ -18,12 +18,15 @@ STATS [column1 =] expression1[, ..., [columnN =] expressionN] `columnX`:: The name by which the aggregated value is returned. If omitted, the name is equal to the corresponding expression (`expressionX`). +If multiple columns have the same name, all but the rightmost column with this +name will be ignored. `expressionX`:: An expression that computes an aggregated value. `grouping_expressionX`:: An expression that outputs the values to group by. +If its name coincides with one of the computed columns, that column will be ignored. NOTE: Individual `null` values are skipped when computing aggregations. diff --git a/docs/reference/esql/source-commands/row.asciidoc b/docs/reference/esql/source-commands/row.asciidoc index 5c81d67c4ac22..28a4f29ae9a5b 100644 --- a/docs/reference/esql/source-commands/row.asciidoc +++ b/docs/reference/esql/source-commands/row.asciidoc @@ -16,6 +16,7 @@ ROW column1 = value1[, ..., columnN = valueN] `columnX`:: The column name. +In case of duplicate column names, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a From 6f98cde584296645d7d331dcc5ce99cb3e47a72b Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 18:02:56 +0200 Subject: [PATCH 27/36] Add more tests --- .../src/main/resources/rename.csv-spec | 41 +++++++++++++++++++ .../src/main/resources/stats.csv-spec | 12 ++++++ 2 files changed, 53 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index ca4c627cae749..bec75bbd6096a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -213,3 +213,44 @@ x:keyword Facello Simmel ; + +renameMultipleTimes +FROM employees +| SORT emp_no ASC +| KEEP emp_no +| RENAME emp_no AS x, emp_no AS y, emp_no AS z +| LIMIT 2 +; + +z:keyword +Facello +Simmel +; + + +shadowingInternalAndRenameMultipleTimes +FROM employees +| SORT emp_no ASC +| KEEP emp_no, last_name +| RENAME emp_no AS x, last_name AS x, emp_no AS z +| LIMIT 2 +; + +z:keyword | x:integer +Facello | 10001 +Simmel | 10002 +; + + +shadowingInternalAndRenameMultipleTimes2 +FROM employees +| SORT emp_no ASC +| KEEP emp_no, last_name +| RENAME emp_no AS x, last_name AS x, last_name AS z +| LIMIT 2 +; + +x:integer | z:keyword + 10001 | Facello + 10002 | Simmel +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 0e00285442936..bd7015a285855 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1879,6 +1879,18 @@ M null ; +shadowingTheGroup +FROM employees +| STATS gender = MAX(emp_no), gender = MIN(emp_no) BY gender +| SORT gender ASC +; + +gender:keyword +F +M +null +; + docsStatsMvGroup // tag::mv-group[] ROW i=1, a=["a", "b"] | STATS MIN(i) BY a | SORT a ASC From fdcc40e1d37cc5325437b2fd30fb0e9390be9cec Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 18:08:37 +0200 Subject: [PATCH 28/36] Improve doc wording --- docs/reference/esql/processing-commands/eval.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc index ab35442ec492b..80d4ebb60df19 100644 --- a/docs/reference/esql/processing-commands/eval.asciidoc +++ b/docs/reference/esql/processing-commands/eval.asciidoc @@ -21,7 +21,7 @@ If a column name is used more than once, only the rightmost duplicate creates a `valueX`:: The value for the column. Can be a literal, an expression, or a -<>. Can refer to columns defined left of this one. +<>. Can use columns defined left of this one. *Description* From 3779900b8d650f5472695e1ca9d06f7c027f4374 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 15 Jul 2024 18:47:42 +0200 Subject: [PATCH 29/36] Improve GROK docs --- .../esql/processing-commands/grok.asciidoc | 12 ++++++++++++ .../esql/processing-commands/keep.asciidoc | 4 ++-- .../src/main/resources/docs.csv-spec | 17 +++++++++++++++++ .../src/main/resources/grok.csv-spec | 15 ++++++++------- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc index bf1abbb775865..2a24f30740040 100644 --- a/docs/reference/esql/processing-commands/grok.asciidoc +++ b/docs/reference/esql/processing-commands/grok.asciidoc @@ -70,4 +70,16 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime] |=== include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime-result] |=== + +In case a field name is used more than once, `GROK` creates a multi-valued +column: + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames-result] +|=== // end::examples[] diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 5d9c612a95684..1a2e9cc1b6bc6 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -17,7 +17,7 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. In case a column name without wildcards occurs multiple times, the column is placed at the -position of the right-most duplicate. The same is true if a column name matches multiple +position of the rightmost duplicate. The same is true if a column name matches multiple wildcards. If a column name matches both a wildcard and also a column name without wildcards, the position of the latter is used. @@ -33,7 +33,7 @@ Fields are added in the order they appear. If one field matches multiple express 2. Partial wildcard expressions (for example: `fieldNam*`) 3. Wildcard only (`*`) -If a field matches two expressions with the same precedence, the right-most expression wins. +If a field matches two expressions with the same precedence, the rightmost expression wins. Refer to the examples for illustrations of these precedence rules. diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index d34620a9e118d..15fe6853ae491 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -436,6 +436,23 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" // end::grokWithEscape-result[] ; +grokWithDuplicateFieldNames +// tag::grokWithDuplicateFieldNames[] +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +// end::grokWithDuplicateFieldNames[] +| SORT city.name +; + +// tag::grokWithDuplicateFieldNames-result[] +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null +// end::grokWithDuplicateFieldNames-result[] +; + basicDissect // tag::basicDissect[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 194e177a4ecbf..d9857e8c122ef 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -64,15 +64,16 @@ Facello | left | Georgi1 Georgi2 Facello | middle | ri ; shadowingInternal -FROM employees -| KEEP last_name -| WHERE last_name == "Facello" -| EVAL name = concat("1 ", last_name) -| GROK name "%{WORD:foo} %{WORD:foo}" +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +| SORT city.name ; -last_name:keyword | name:keyword | foo:keyword -Facello | 1 Facello | [1, Facello] +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null ; complexPattern From 94737ff016a0ed23b0f71d74a82dc992978ff5da Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 16 Jul 2024 13:42:12 +0200 Subject: [PATCH 30/36] Update RENAME docs and tests --- .../esql/processing-commands/rename.asciidoc | 3 +- .../src/main/resources/rename.csv-spec | 41 ------------------- 2 files changed, 1 insertion(+), 43 deletions(-) diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc index 2094d87bb1fdf..df1cc572f4dbf 100644 --- a/docs/reference/esql/processing-commands/rename.asciidoc +++ b/docs/reference/esql/processing-commands/rename.asciidoc @@ -20,8 +20,7 @@ The name of a column you want to rename. The new name of the column. If it conflicts with an existing column name, the existing column is replaced by the renamed column. If multiple columns are renamed to the same name, all but the rightmost column with the same new -name are dropped. If a column is renamed multiple times, all but the rightmost -rename are ignored. +name are dropped. *Description* diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index bec75bbd6096a..ca4c627cae749 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -213,44 +213,3 @@ x:keyword Facello Simmel ; - -renameMultipleTimes -FROM employees -| SORT emp_no ASC -| KEEP emp_no -| RENAME emp_no AS x, emp_no AS y, emp_no AS z -| LIMIT 2 -; - -z:keyword -Facello -Simmel -; - - -shadowingInternalAndRenameMultipleTimes -FROM employees -| SORT emp_no ASC -| KEEP emp_no, last_name -| RENAME emp_no AS x, last_name AS x, emp_no AS z -| LIMIT 2 -; - -z:keyword | x:integer -Facello | 10001 -Simmel | 10002 -; - - -shadowingInternalAndRenameMultipleTimes2 -FROM employees -| SORT emp_no ASC -| KEEP emp_no, last_name -| RENAME emp_no AS x, last_name AS x, last_name AS z -| LIMIT 2 -; - -x:integer | z:keyword - 10001 | Facello - 10002 | Simmel -; From ea9b9a9edea2954bc794ec262062d5b21ff71959 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 16 Jul 2024 15:16:33 +0200 Subject: [PATCH 31/36] Avoid duplicate field attribs from union type res --- .../xpack/esql/analysis/Analyzer.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index cc12d3730f495..a691f88f29f99 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1068,13 +1068,29 @@ public static Expression castStringLiteral(Expression from, DataType target) { * Any fields which could not be resolved by conversion functions will be converted to UnresolvedAttribute instances in a later rule * (See UnresolveUnionTypes below). */ - private static class ResolveUnionTypes extends BaseAnalyzerRule { + private static class ResolveUnionTypes extends Rule { record TypeResolutionKey(String fieldName, DataType fieldType) {} + private List unionFieldAttributes; + @Override - protected LogicalPlan doRule(LogicalPlan plan) { - List unionFieldAttributes = new ArrayList<>(); + public LogicalPlan apply(LogicalPlan plan) { + unionFieldAttributes = new ArrayList<>(); + // Collect field attributes from previous runs + plan.forEachUp(EsRelation.class, rel -> { + for (Attribute attr : rel.output()) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField) { + unionFieldAttributes.add(fa); + } + } + }); + + return plan.transformUp(LogicalPlan.class, p -> p.resolved() || p.childrenResolved() == false ? p : doRule(p)); + } + + private LogicalPlan doRule(LogicalPlan plan) { + int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size(); // See if the eval function has an unresolved MultiTypeEsField field // Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge) plan = plan.transformExpressionsOnly( @@ -1082,7 +1098,7 @@ protected LogicalPlan doRule(LogicalPlan plan) { convert -> resolveConvertFunction(convert, unionFieldAttributes) ); // If no union fields were generated, return the plan as is - if (unionFieldAttributes.isEmpty()) { + if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) { return plan; } From f5d9568eb2bfd6a1a60406fc95852d447b6fc88f Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 16 Jul 2024 16:54:50 +0200 Subject: [PATCH 32/36] Fix leftovers --- .../esql/qa/testFixtures/src/main/resources/row.csv-spec | 5 +++-- .../elasticsearch/xpack/esql/action/EsqlCapabilities.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index 78ee460d64958..da640b6306299 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -37,7 +37,7 @@ a:integer ; shadowingInternal -required_capability: enrich_load +required_capability: unique_names ROW a = 1, a = 2; a:integer @@ -45,6 +45,7 @@ a:integer ; shadowingInternalSubfields +required_capability: unique_names // Fun fact: "Sissi" is an actual exoplanet name, after the character from the movie with the same name. A.k.a. HAT-P-14 b. ROW city.country.continent.planet.name = "Earth", city.country.continent.name = "Netherlands", city.country.continent.planet.name = "Sissi" ; @@ -87,7 +88,7 @@ a:integer | b:integer | c:null | z:integer ; evalRowWithNull2 -required_capability: enrich_load +required_capability: unique_names row a = 1, null, b = 2, c = null, null | eval z = a+b; a:integer | b:integer | c:null | null:null | z:integer diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 4461dee6e5e89..98c6d8f4332be 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -162,7 +162,7 @@ public enum Cap { STATS_BY_CONSTANT, /** - * Fix for non-unique attribute names in ROW. + * Fix for non-unique attribute names in ROW and logical plans. * https://github.com/elastic/elasticsearch/issues/110541 */ UNIQUE_NAMES; From a387165dd06574b191078d726d4f082798318f0b Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 16 Jul 2024 17:00:56 +0200 Subject: [PATCH 33/36] Make tests deterministic --- .../esql/qa/testFixtures/src/main/resources/drop.csv-spec | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index fa84d7ba41f6b..9886d6cce0ca2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -126,6 +126,7 @@ MIN(salary * 10):i | MAX(languages):i // Not really shadowing, but let's keep the name consistent with the other command's tests shadowingInternal FROM employees +| SORT emp_no ASC | KEEP emp_no, first_name, last_name | DROP last_name, last_name | LIMIT 2 @@ -138,6 +139,7 @@ emp_no:integer | first_name:keyword shadowingInternalWildcard FROM employees +| SORT emp_no ASC | KEEP emp_no, first_name, last_name | DROP last*name, last*name, last*, last_name | LIMIT 2 From 233d68de70cb92d5ba0f34ec16d91cab14cf00fa Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 16 Jul 2024 17:08:29 +0200 Subject: [PATCH 34/36] Fix rename shadowing docs --- docs/reference/esql/processing-commands/rename.asciidoc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc index df1cc572f4dbf..41e2ce9298ae8 100644 --- a/docs/reference/esql/processing-commands/rename.asciidoc +++ b/docs/reference/esql/processing-commands/rename.asciidoc @@ -18,9 +18,8 @@ The name of a column you want to rename. `new_nameX`:: The new name of the column. If it conflicts with an existing column name, -the existing column is replaced by the renamed column. If multiple columns -are renamed to the same name, all but the rightmost column with the same new -name are dropped. +the existing column is dropped. If multiple columns are renamed to the same +name, all but the rightmost column with the same new name are dropped. *Description* From d0723b07ce7a9300993148d295b21d2f0174d3c4 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 16 Jul 2024 17:16:52 +0200 Subject: [PATCH 35/36] Apply Liam's doc remarks --- docs/reference/esql/processing-commands/dissect.asciidoc | 2 +- docs/reference/esql/processing-commands/enrich.asciidoc | 4 ++-- docs/reference/esql/processing-commands/eval.asciidoc | 2 +- docs/reference/esql/processing-commands/grok.asciidoc | 4 ++-- docs/reference/esql/processing-commands/keep.asciidoc | 2 +- docs/reference/esql/processing-commands/lookup.asciidoc | 3 +-- 6 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc index 0f4b63ed4d026..82138aa238087 100644 --- a/docs/reference/esql/processing-commands/dissect.asciidoc +++ b/docs/reference/esql/processing-commands/dissect.asciidoc @@ -20,7 +20,7 @@ multiple values, `DISSECT` will process each value. `pattern`:: A <>. -In case a field name coincides with an existing column, the existing column is discarded. +If a field name conflicts with an existing column, the existing column is dropped. If a field name is used more than once, only the rightmost duplicate creates a column. ``:: diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc index 75e2f2f7b9197..2ece5a63e7570 100644 --- a/docs/reference/esql/processing-commands/enrich.asciidoc +++ b/docs/reference/esql/processing-commands/enrich.asciidoc @@ -32,8 +32,8 @@ The enrich fields from the enrich index that are added to the result as new columns. If a column with the same name as the enrich field already exists, the existing column will be replaced by the new column. If not specified, each of the enrich fields defined in the policy is added. -If a column has the same name as the enrich field, it will be discarded unless -that field is given a new name. +A column with the same name as the enrich field will be dropped unless the +enrich field is renamed. `new_nameX`:: Enables you to change the name of the column that's added for each of the enrich diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc index 80d4ebb60df19..00a7764d24004 100644 --- a/docs/reference/esql/processing-commands/eval.asciidoc +++ b/docs/reference/esql/processing-commands/eval.asciidoc @@ -16,7 +16,7 @@ EVAL [column1 =] value1[, ..., [columnN =] valueN] `columnX`:: The column name. -In case a column name coincides with an existing column, the existing column is discarded. +If a column with the same name already exists, the existing column is dropped. If a column name is used more than once, only the rightmost duplicate creates a column. `valueX`:: diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc index 2a24f30740040..57c55a5bad53f 100644 --- a/docs/reference/esql/processing-commands/grok.asciidoc +++ b/docs/reference/esql/processing-commands/grok.asciidoc @@ -20,7 +20,7 @@ multiple values, `GROK` will process each value. `pattern`:: A grok pattern. -In case a field name coincides with an existing column, the existing column is discarded. +If a field name conflicts with an existing column, the existing column is discarded. If a field name is used more than once, a multi-valued column will be created with one value per each occurrence of the field name. @@ -71,7 +71,7 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime] include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime-result] |=== -In case a field name is used more than once, `GROK` creates a multi-valued +If a field name is used more than once, `GROK` creates a multi-valued column: [source.merge.styled,esql] diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 1a2e9cc1b6bc6..392e58a6d1819 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -16,7 +16,7 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. -In case a column name without wildcards occurs multiple times, the column is placed at the +If a column name without wildcards occurs multiple times, the column is placed at the position of the rightmost duplicate. The same is true if a column name matches multiple wildcards. If a column name matches both a wildcard and also a column name without wildcards, the position of the latter is used. diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc index b9cad78f55aec..7bb3a5791deef 100644 --- a/docs/reference/esql/processing-commands/lookup.asciidoc +++ b/docs/reference/esql/processing-commands/lookup.asciidoc @@ -18,8 +18,7 @@ LOOKUP table ON match_field1[, match_field2, ...] `table`:: The name of the `table` provided in the request to match. -If it has column names that conflict with existing columns, the table's columns replace the -existing ones. +If the table's column names conflict with existing columns, the existing columns will be dropped. `match_field`:: The fields in the input to match against the table. From fb171264161e21c631e7f9914ef08584615f4218 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 17 Jul 2024 10:31:09 +0200 Subject: [PATCH 36/36] Don't describe KEEP precedence twice --- docs/reference/esql/processing-commands/keep.asciidoc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 392e58a6d1819..3dbd0c69d8222 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -16,10 +16,8 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. -If a column name without wildcards occurs multiple times, the column is placed at the -position of the rightmost duplicate. The same is true if a column name matches multiple -wildcards. If a column name matches both a wildcard and also a column name without wildcards, -the position of the latter is used. +See below for the behavior in case an existing column matches multiple +given wildcards or column names. *Description*