From 3140ebf21c5fc99f8d1fbf1935d8ee4e02aace37 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 7 May 2024 13:59:04 +0200 Subject: [PATCH 01/22] Fix escaping issue for mysql --- .../util/V2ExpressionSQLBuilder.java | 5 ++-- .../sql/catalyst/parser/AstBuilder.scala | 20 +++++++++++++ .../org/apache/spark/sql/jdbc/H2Dialect.scala | 7 ----- .../apache/spark/sql/jdbc/JdbcDialects.scala | 2 +- .../apache/spark/sql/jdbc/MySQLDialect.scala | 15 ++++++++++ .../org/apache/spark/sql/CollationSuite.scala | 30 +++++++++++++++++++ 6 files changed, 69 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index e42d9193ea39..252882263a7a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -22,6 +22,8 @@ import java.util.Map; import java.util.StringJoiner; +import org.apache.commons.lang3.StringUtils; + import org.apache.spark.SparkIllegalArgumentException; import org.apache.spark.SparkUnsupportedOperationException; import org.apache.spark.sql.connector.expressions.Cast; @@ -65,7 +67,6 @@ protected String escapeSpecialCharsForLikePattern(String str) { switch (c) { case '_' -> builder.append("\\_"); case '%' -> builder.append("\\%"); - case '\'' -> builder.append("\\\'"); default -> builder.append(c); } } @@ -169,7 +170,7 @@ yield visitBinaryArithmetic( } protected String visitLiteral(Literal literal) { - return literal.toString(); + return StringUtils.replace(literal.toString(), "'", "\\'"); } protected String visitNamedReference(NamedReference namedRef) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7d2355b2f08d..58bfcbc4faa6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2235,6 +2235,16 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { */ override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) { val rawDataType = typedVisit[DataType](ctx.dataType()) + if (rawDataType.isInstanceOf[StringType]) { + val typeCtx = ctx.dataType().asInstanceOf[PrimitiveDataTypeContext].`type` + if (typeCtx.children.asScala.toSeq.size == 2) { + val collateClause = typeCtx.collateClause() + throw QueryParsingErrors.dataTypeUnsupportedError( + s"STRING COLLATE ${collateClause.identifier.getText}", + ctx.dataType().asInstanceOf[PrimitiveDataTypeContext]) + } + + } val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType) ctx.name.getType match { case SqlBaseParser.CAST => @@ -2254,6 +2264,16 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { */ override def visitCastByColon(ctx: CastByColonContext): Expression = withOrigin(ctx) { val rawDataType = typedVisit[DataType](ctx.dataType()) + if (rawDataType.isInstanceOf[StringType]) { + val typeCtx = ctx.dataType().asInstanceOf[PrimitiveDataTypeContext].`type` + if (typeCtx.children.asScala.toSeq.size == 2) { + val collateClause = typeCtx.collateClause() + throw QueryParsingErrors.dataTypeUnsupportedError( + s"STRING COLLATE ${collateClause.identifier.getText}", + ctx.dataType().asInstanceOf[PrimitiveDataTypeContext]) + } + + } val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType) val cast = Cast(expression(ctx.primaryExpression), dataType) cast.setTagValue(Cast.USER_SPECIFIED_CAST, ()) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index ebfc6093dc16..949455b248ff 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -259,13 +259,6 @@ private[sql] case class H2Dialect() extends JdbcDialect { } class H2SQLBuilder extends JDBCSQLBuilder { - override def escapeSpecialCharsForLikePattern(str: String): String = { - str.map { - case '_' => "\\_" - case '%' => "\\%" - case c => c.toString - }.mkString - } override def visitAggregateFunction( funcName: String, isDistinct: Boolean, inputs: Array[String]): String = diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 5f69d18cad75..e7a30128c54a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -351,7 +351,7 @@ abstract class JdbcDialect extends Serializable with Logging { */ @Since("2.3.0") protected[jdbc] def escapeSql(value: String): String = - if (value == null) null else StringUtils.replace(value, "'", "''") + if (value == null) null else StringUtils.replace(value, "'", "\\'") /** * Converts value to SQL expression. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index d98fcdfd0b23..50951042737a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -66,6 +66,21 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper { } } + override def visitStartsWith(l: String, r: String): String = { + val value = r.substring(1, r.length() - 1) + s"$l LIKE '${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'" + } + + override def visitEndsWith(l: String, r: String): String = { + val value = r.substring(1, r.length() - 1) + s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}' ESCAPE '\\\\'" + } + + override def visitContains(l: String, r: String): String = { + val value = r.substring(1, r.length() - 1) + s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'" + } + override def visitAggregateFunction( funcName: String, isDistinct: Boolean, inputs: Array[String]): String = if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index fce9ad3cc184..467d703af0d6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -22,6 +22,7 @@ import scala.jdk.CollectionConverters.MapHasAsJava import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.ExtendedAnalysisException import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.connector.{DatasourceV2SQLBase, FakeV2ProviderWithCustomSchema} import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable} @@ -885,6 +886,35 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { ) } + test("SPARK-47972: Cast expression limitation for collations") { + checkError( + exception = intercept[ParseException] { + sql("SELECT Cast('a' AS STRING COLLATE UNICODE)") + }, + errorClass = "UNSUPPORTED_DATATYPE", + parameters = Map("typeName" -> """"STRING COLLATE UNICODE""""), + context = ExpectedContext( + fragment = "Cast('a' AS STRING COLLATE UNICODE)", + start = 7, + stop = 41) + ) + + checkError( + exception = intercept[ParseException] { + sql("SELECT 1::STRING COLLATE UNICODE") + }, + errorClass = "UNSUPPORTED_DATATYPE", + parameters = Map("typeName" -> """"STRING COLLATE UNICODE""""), + context = ExpectedContext( + fragment = "1::STRING COLLATE UNICODE", + start = 7, + stop = 31) + ) + + checkAnswer(sql("SELECT Cast(1 AS STRING)"), Row("1")) + checkAnswer(sql("SELECT 1::STRING"), Row("1")) + } + test("Aggregation on complex containing collated strings") { val table = "table_agg" // array From 1fda363398a643260361777e0b8de3292b82debb Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 7 May 2024 14:08:53 +0200 Subject: [PATCH 02/22] Remove unrelated changes --- .../sql/catalyst/parser/AstBuilder.scala | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 58bfcbc4faa6..7d2355b2f08d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2235,16 +2235,6 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { */ override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) { val rawDataType = typedVisit[DataType](ctx.dataType()) - if (rawDataType.isInstanceOf[StringType]) { - val typeCtx = ctx.dataType().asInstanceOf[PrimitiveDataTypeContext].`type` - if (typeCtx.children.asScala.toSeq.size == 2) { - val collateClause = typeCtx.collateClause() - throw QueryParsingErrors.dataTypeUnsupportedError( - s"STRING COLLATE ${collateClause.identifier.getText}", - ctx.dataType().asInstanceOf[PrimitiveDataTypeContext]) - } - - } val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType) ctx.name.getType match { case SqlBaseParser.CAST => @@ -2264,16 +2254,6 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { */ override def visitCastByColon(ctx: CastByColonContext): Expression = withOrigin(ctx) { val rawDataType = typedVisit[DataType](ctx.dataType()) - if (rawDataType.isInstanceOf[StringType]) { - val typeCtx = ctx.dataType().asInstanceOf[PrimitiveDataTypeContext].`type` - if (typeCtx.children.asScala.toSeq.size == 2) { - val collateClause = typeCtx.collateClause() - throw QueryParsingErrors.dataTypeUnsupportedError( - s"STRING COLLATE ${collateClause.identifier.getText}", - ctx.dataType().asInstanceOf[PrimitiveDataTypeContext]) - } - - } val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType) val cast = Cast(expression(ctx.primaryExpression), dataType) cast.setTagValue(Cast.USER_SPECIFIED_CAST, ()) From 81b6459ad5e6aba4f6b96c17fdee3609000b60b1 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 7 May 2024 14:09:48 +0200 Subject: [PATCH 03/22] Remove unrelated changes --- .../org/apache/spark/sql/CollationSuite.scala | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 467d703af0d6..fce9ad3cc184 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -22,7 +22,6 @@ import scala.jdk.CollectionConverters.MapHasAsJava import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.ExtendedAnalysisException import org.apache.spark.sql.catalyst.expressions.Literal -import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.connector.{DatasourceV2SQLBase, FakeV2ProviderWithCustomSchema} import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable} @@ -886,35 +885,6 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { ) } - test("SPARK-47972: Cast expression limitation for collations") { - checkError( - exception = intercept[ParseException] { - sql("SELECT Cast('a' AS STRING COLLATE UNICODE)") - }, - errorClass = "UNSUPPORTED_DATATYPE", - parameters = Map("typeName" -> """"STRING COLLATE UNICODE""""), - context = ExpectedContext( - fragment = "Cast('a' AS STRING COLLATE UNICODE)", - start = 7, - stop = 41) - ) - - checkError( - exception = intercept[ParseException] { - sql("SELECT 1::STRING COLLATE UNICODE") - }, - errorClass = "UNSUPPORTED_DATATYPE", - parameters = Map("typeName" -> """"STRING COLLATE UNICODE""""), - context = ExpectedContext( - fragment = "1::STRING COLLATE UNICODE", - start = 7, - stop = 31) - ) - - checkAnswer(sql("SELECT Cast(1 AS STRING)"), Row("1")) - checkAnswer(sql("SELECT 1::STRING"), Row("1")) - } - test("Aggregation on complex containing collated strings") { val table = "table_agg" // array From 339e8a977b9da81e0e8f37de9091e874a3692fe1 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 8 May 2024 08:47:06 +0200 Subject: [PATCH 04/22] Fix tests --- .../apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 252882263a7a..76323574e70b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -170,7 +170,7 @@ yield visitBinaryArithmetic( } protected String visitLiteral(Literal literal) { - return StringUtils.replace(literal.toString(), "'", "\\'"); + return literal.toString(); } protected String visitNamedReference(NamedReference namedRef) { From b6ebebed697c1a1f6448958bf634d642d37c53c5 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 8 May 2024 09:50:29 +0200 Subject: [PATCH 05/22] Remove unused imports --- .../apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 76323574e70b..11f4389245d9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -22,8 +22,6 @@ import java.util.Map; import java.util.StringJoiner; -import org.apache.commons.lang3.StringUtils; - import org.apache.spark.SparkIllegalArgumentException; import org.apache.spark.SparkUnsupportedOperationException; import org.apache.spark.sql.connector.expressions.Cast; From 4b7e3f55a2fcc300b88a2eb762e3bf9f95009ade Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 8 May 2024 12:20:16 +0200 Subject: [PATCH 06/22] Fix ' escaping --- .../spark/sql/connector/util/V2ExpressionSQLBuilder.java | 9 ++++++++- .../scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 2 +- .../scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 11f4389245d9..dc78be68d029 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -22,6 +22,8 @@ import java.util.Map; import java.util.StringJoiner; +import org.apache.commons.lang3.StringUtils; + import org.apache.spark.SparkIllegalArgumentException; import org.apache.spark.SparkUnsupportedOperationException; import org.apache.spark.sql.connector.expressions.Cast; @@ -43,6 +45,7 @@ import org.apache.spark.sql.connector.expressions.aggregate.Sum; import org.apache.spark.sql.connector.expressions.aggregate.UserDefinedAggregateFunc; import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.StringType; /** * The builder to generate SQL from V2 expressions. @@ -168,7 +171,11 @@ yield visitBinaryArithmetic( } protected String visitLiteral(Literal literal) { - return literal.toString(); + String litString = literal.toString(); + if (literal.dataType() instanceof StringType) { + return "'" + StringUtils.replace(litString.substring(1, litString.length() - 1), "'", "''") + "'"; + } + return litString; } protected String visitNamedReference(NamedReference namedRef) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index e7a30128c54a..5f69d18cad75 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -351,7 +351,7 @@ abstract class JdbcDialect extends Serializable with Logging { */ @Since("2.3.0") protected[jdbc] def escapeSql(value: String): String = - if (value == null) null else StringUtils.replace(value, "'", "\\'") + if (value == null) null else StringUtils.replace(value, "'", "''") /** * Converts value to SQL expression. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 1b3672cdba5a..8e98181a9802 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -1305,7 +1305,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df5 = spark.table("h2.test.address").filter($"email".startsWith("abc_'%")) checkFiltersRemoved(df5) checkPushedInfo(df5, - raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_\'\%%' ESCAPE '\']") + raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_''\%%' ESCAPE '\']") checkAnswer(df5, Seq(Row("abc_'%def@gmail.com"))) val df6 = spark.table("h2.test.address").filter($"email".endsWith("_def@gmail.com")) @@ -1336,7 +1336,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df10 = spark.table("h2.test.address").filter($"email".endsWith("_'%def@gmail.com")) checkFiltersRemoved(df10) checkPushedInfo(df10, - raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\'\%def@gmail.com' ESCAPE '\']") + raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_''\%def@gmail.com' ESCAPE '\']") checkAnswer(df10, Seq(Row("abc_'%def@gmail.com"))) val df11 = spark.table("h2.test.address").filter($"email".contains("c_d")) @@ -1364,7 +1364,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df15 = spark.table("h2.test.address").filter($"email".contains("c_'%d")) checkFiltersRemoved(df15) checkPushedInfo(df15, - raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\'\%d%' ESCAPE '\']") + raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_''\%d%' ESCAPE '\']") checkAnswer(df15, Seq(Row("abc_'%def@gmail.com"))) } From e117da9eeb42ea22f4794443a5c74f753d1a5f28 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 8 May 2024 12:48:00 +0200 Subject: [PATCH 07/22] Add MySQL tests --- .../sql/jdbc/MySQLIntegrationSuite.scala | 196 ++++++++++++++++++ 1 file changed, 196 insertions(+) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index 684cec37c170..ed18b16eadbd 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -97,6 +97,24 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { conn.prepareStatement("CREATE TABLE TBL_GEOMETRY (col0 GEOMETRY)").executeUpdate() conn.prepareStatement("INSERT INTO TBL_GEOMETRY VALUES (ST_GeomFromText('POINT(0 0)'))") .executeUpdate() + + conn.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() + + conn.prepareStatement( + s""" + |INSERT INTO pattern_testing_table VALUES + |('special_character_quote\\'_present'), + |('special_character_quote_not_present'), + |('special_character_percent%_present'), + |('special_character_percent_not_present'), + |('special_character_underscore_present'), + |('special_character_underscorenot_present') + """.stripMargin).executeUpdate() } def testConnection(): Unit = { @@ -358,6 +376,184 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { val df = spark.read.jdbc(jdbcUrl, "smallint_round_trip", new Properties) assert(df.schema.fields.head.dataType === ShortType) } + + test("test contains pushdown") { + // this one should map to contains + val df1 = spark.sql( + s""" + |SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) + df1.explain("formatted") + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("endswith pushdown") { + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'present') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("startswith pushdown") { + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("test like pushdown") { + // this one should map to contains + val df1 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + + // map to startsWith + // this one should map to contains + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + // map to endsWith + // this one should map to contains + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } } From dfbc210a3ea86aaac8cacd3b0fc2627e70e427c1 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 8 May 2024 13:42:21 +0200 Subject: [PATCH 08/22] Fix line length --- .../spark/sql/connector/util/V2ExpressionSQLBuilder.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index dc78be68d029..1ba46313e0a4 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -173,7 +173,12 @@ yield visitBinaryArithmetic( protected String visitLiteral(Literal literal) { String litString = literal.toString(); if (literal.dataType() instanceof StringType) { - return "'" + StringUtils.replace(litString.substring(1, litString.length() - 1), "'", "''") + "'"; + return "'" + + StringUtils.replace( + litString.substring(1, litString.length() - 1), + "'", + "''") + + "'"; } return litString; } From 9062037b765d37def126b5d6a1a7eaca4aef6e30 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Thu, 9 May 2024 08:35:51 +0200 Subject: [PATCH 09/22] Add tests for expression pushdown to different JDBCs --- .../spark/sql/jdbc/DB2IntegrationSuite.scala | 196 +++++++++++++++++ .../jdbc/MsSqlServerIntegrationSuite.scala | 196 +++++++++++++++++ .../sql/jdbc/OracleIntegrationSuite.scala | 197 ++++++++++++++++++ .../sql/jdbc/PostgresIntegrationSuite.scala | 196 +++++++++++++++++ .../util/V2ExpressionSQLBuilder.java | 9 +- 5 files changed, 789 insertions(+), 5 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index cedb33d491fb..7e6e95d73fa8 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -64,6 +64,24 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { .executeUpdate() conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', BLOB('fox')," + "'Kathy')").executeUpdate() + + conn.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() + + conn.prepareStatement( + s""" + |INSERT INTO pattern_testing_table VALUES + |('special_character_quote\\'_present'), + |('special_character_quote_not_present'), + |('special_character_percent%_present'), + |('special_character_percent_not_present'), + |('special_character_underscore_present'), + |('special_character_underscorenot_present') + """.stripMargin).executeUpdate() } test("Basic test") { @@ -224,4 +242,182 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(actual === expected) } + + test("test contains pushdown") { + // this one should map to contains + val df1 = spark.sql( + s""" + |SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) + df1.explain("formatted") + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("endswith pushdown") { + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'present') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("startswith pushdown") { + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("test like pushdown") { + // this one should map to contains + val df1 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + + // map to startsWith + // this one should map to contains + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + // map to endsWith + // this one should map to contains + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index 623f404339e9..5c533c8c4bdc 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -146,6 +146,24 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { .executeUpdate() conn.prepareStatement("""INSERT INTO test_rowversion (myKey, myValue) VALUES (1, 0)""") .executeUpdate() + + conn.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() + + conn.prepareStatement( + s""" + |INSERT INTO pattern_testing_table VALUES + |('special_character_quote\\'_present'), + |('special_character_quote_not_present'), + |('special_character_percent%_present'), + |('special_character_percent_not_present'), + |('special_character_underscore_present'), + |('special_character_underscorenot_present') + """.stripMargin).executeUpdate() } test("Basic test") { @@ -493,4 +511,182 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { errorClass = "UNRECOGNIZED_SQL_TYPE", parameters = Map("typeName" -> "sql_variant", "jdbcType" -> "-156")) } + + test("test contains pushdown") { + // this one should map to contains + val df1 = spark.sql( + s""" + |SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) + df1.explain("formatted") + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("endswith pushdown") { + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'present') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("startswith pushdown") { + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("test like pushdown") { + // this one should map to contains + val df1 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + + // map to startsWith + // this one should map to contains + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + // map to endsWith + // this one should map to contains + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index 496498e5455b..3fae1c0eba83 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -153,6 +153,25 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark """.stripMargin.replaceAll("\n", " ")).executeUpdate() conn.commit() + conn.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() + + conn.prepareStatement( + s""" + |INSERT INTO pattern_testing_table VALUES + |('special_character_quote\\'_present'), + |('special_character_quote_not_present'), + |('special_character_percent%_present'), + |('special_character_percent_not_present'), + |('special_character_underscore_present'), + |('special_character_underscorenot_present') + """.stripMargin).executeUpdate() + conn.commit() + conn.prepareStatement("CREATE TABLE test_ltz(t TIMESTAMP WITH LOCAL TIME ZONE)") .executeUpdate() conn.prepareStatement( @@ -617,4 +636,182 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(!schema(i).metadata.contains(CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)) } } + + test("test contains pushdown") { + // this one should map to contains + val df1 = spark.sql( + s""" + |SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) + df1.explain("formatted") + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("endswith pushdown") { + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'present') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("startswith pushdown") { + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("test like pushdown") { + // this one should map to contains + val df1 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + + // map to startsWith + // this one should map to contains + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + // map to endsWith + // this one should map to contains + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index 8c0a7c0a809f..59f00536d113 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -190,6 +190,24 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { conn.prepareStatement("CREATE DOMAIN myint AS integer CHECK (VALUE > 0)").executeUpdate() conn.prepareStatement("CREATE TABLE domain_table (c1 myint)").executeUpdate() conn.prepareStatement("INSERT INTO domain_table VALUES (1)").executeUpdate() + + conn.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() + + conn.prepareStatement( + s""" + |INSERT INTO pattern_testing_table VALUES + |('special_character_quote\\'_present'), + |('special_character_quote_not_present'), + |('special_character_percent%_present'), + |('special_character_percent_not_present'), + |('special_character_underscore_present'), + |('special_character_underscorenot_present') + """.stripMargin).executeUpdate() } test("Type mapping for various types") { @@ -580,4 +598,182 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(cause.getSQLState === "22003") } } + + test("test contains pushdown") { + // this one should map to contains + val df1 = spark.sql( + s""" + |SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) + df1.explain("formatted") + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE contains(pattern_testing_col, 'character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("endswith pushdown") { + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE endswith(pattern_testing_col, 'present') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("startswith pushdown") { + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("test like pushdown") { + // this one should map to contains + val df1 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + + // map to startsWith + // this one should map to contains + val df5 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + // map to endsWith + // this one should map to contains + val df9 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM pattern_testing_table + |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 1ba46313e0a4..a9298d96c47c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -174,11 +174,10 @@ protected String visitLiteral(Literal literal) { String litString = literal.toString(); if (literal.dataType() instanceof StringType) { return "'" - + StringUtils.replace( - litString.substring(1, litString.length() - 1), - "'", - "''") - + "'"; + + StringUtils.replace(litString.substring(1, litString.length() - 1), + "'", + "''") + + "'"; } return litString; } From d88fcbbd30a1bebbb74d1b6766e054594ab4ccf1 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Fri, 10 May 2024 08:59:14 +0200 Subject: [PATCH 10/22] Move ' escaping to LiteralValue --- .../sql/connector/util/V2ExpressionSQLBuilder.java | 10 +--------- .../spark/sql/connector/expressions/expressions.scala | 4 +++- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index a9298d96c47c..c3f8c4f7e5ad 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -171,15 +171,7 @@ yield visitBinaryArithmetic( } protected String visitLiteral(Literal literal) { - String litString = literal.toString(); - if (literal.dataType() instanceof StringType) { - return "'" - + StringUtils.replace(litString.substring(1, litString.length() - 1), - "'", - "''") - + "'"; - } - return litString; + return literal.toString(); } protected String visitNamedReference(NamedReference namedRef) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala index fc41d5a98e4a..b43e627c0eec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.connector.expressions +import org.apache.commons.lang3.StringUtils + import org.apache.spark.SparkException import org.apache.spark.sql.catalyst import org.apache.spark.sql.catalyst.parser.CatalystSqlParser @@ -388,7 +390,7 @@ private[sql] object HoursTransform { private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] { override def toString: String = { if (dataType.isInstanceOf[StringType]) { - s"'$value'" + s"'${StringUtils.replace(s"$value", "'", "''")}'" } else { s"$value" } From dc3c91bbe60ab35a8ce5157e7fd9f216563cd511 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Fri, 10 May 2024 10:17:48 +0200 Subject: [PATCH 11/22] Remove unused imports --- .../spark/sql/connector/util/V2ExpressionSQLBuilder.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index c3f8c4f7e5ad..11f4389245d9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -22,8 +22,6 @@ import java.util.Map; import java.util.StringJoiner; -import org.apache.commons.lang3.StringUtils; - import org.apache.spark.SparkIllegalArgumentException; import org.apache.spark.SparkUnsupportedOperationException; import org.apache.spark.sql.connector.expressions.Cast; @@ -45,7 +43,6 @@ import org.apache.spark.sql.connector.expressions.aggregate.Sum; import org.apache.spark.sql.connector.expressions.aggregate.UserDefinedAggregateFunc; import org.apache.spark.sql.types.DataType; -import org.apache.spark.sql.types.StringType; /** * The builder to generate SQL from V2 expressions. From 4be578641478c6222d1c508395293f39db74ecb7 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 12:36:48 +0200 Subject: [PATCH 12/22] Move tests to use v2 push down --- .../spark/sql/jdbc/DB2IntegrationSuite.scala | 196 ----------------- .../jdbc/MsSqlServerIntegrationSuite.scala | 200 +----------------- .../sql/jdbc/MySQLIntegrationSuite.scala | 196 ----------------- .../sql/jdbc/OracleIntegrationSuite.scala | 197 ----------------- .../sql/jdbc/PostgresIntegrationSuite.scala | 196 ----------------- .../sql/jdbc/v2/DB2IntegrationSuite.scala | 6 + .../v2/DockerJDBCIntegrationV2Suite.scala | 11 + .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 6 + .../sql/jdbc/v2/MySQLIntegrationSuite.scala | 6 + .../sql/jdbc/v2/OracleIntegrationSuite.scala | 6 + .../jdbc/v2/PostgresIntegrationSuite.scala | 6 + .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 178 ++++++++++++++++ 12 files changed, 221 insertions(+), 983 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala index 7e6e95d73fa8..cedb33d491fb 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala @@ -64,24 +64,6 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { .executeUpdate() conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', BLOB('fox')," + "'Kathy')").executeUpdate() - - conn.prepareStatement( - s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT - |) - """.stripMargin - ).executeUpdate() - - conn.prepareStatement( - s""" - |INSERT INTO pattern_testing_table VALUES - |('special_character_quote\\'_present'), - |('special_character_quote_not_present'), - |('special_character_percent%_present'), - |('special_character_percent_not_present'), - |('special_character_underscore_present'), - |('special_character_underscorenot_present') - """.stripMargin).executeUpdate() } test("Basic test") { @@ -242,182 +224,4 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite { assert(actual === expected) } - - test("test contains pushdown") { - // this one should map to contains - val df1 = spark.sql( - s""" - |SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) - df1.explain("formatted") - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("endswith pushdown") { - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'present') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("startswith pushdown") { - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("test like pushdown") { - // this one should map to contains - val df1 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - - // map to startsWith - // this one should map to contains - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - // map to endsWith - // this one should map to contains - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index 5c533c8c4bdc..79470b58e62a 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -146,24 +146,6 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { .executeUpdate() conn.prepareStatement("""INSERT INTO test_rowversion (myKey, myValue) VALUES (1, 0)""") .executeUpdate() - - conn.prepareStatement( - s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT - |) - """.stripMargin - ).executeUpdate() - - conn.prepareStatement( - s""" - |INSERT INTO pattern_testing_table VALUES - |('special_character_quote\\'_present'), - |('special_character_quote_not_present'), - |('special_character_percent%_present'), - |('special_character_percent_not_present'), - |('special_character_underscore_present'), - |('special_character_underscorenot_present') - """.stripMargin).executeUpdate() } test("Basic test") { @@ -228,8 +210,8 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { assert(row.getDouble(6) == 1.23456788103168E14) // float(24) has 7-digits precision assert(row.getDouble(7) == 1.23456788103168E14) // real = float(24) } else { - assert(row.getFloat(6) == 1.23456788103168E14) // float(24) has 7-digits precision - assert(row.getFloat(7) == 1.23456788103168E14) // real = float(24) + assert(row.getFloat(6) == 1.23456788103168E14) // float(24) has 7-digits precision + assert(row.getFloat(7) == 1.23456788103168E14) // real = float(24) } assert(row.getAs[BigDecimal](8).equals(new BigDecimal("123.00"))) assert(row.getAs[BigDecimal](9).equals(new BigDecimal("12345.12000"))) @@ -511,182 +493,4 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { errorClass = "UNRECOGNIZED_SQL_TYPE", parameters = Map("typeName" -> "sql_variant", "jdbcType" -> "-156")) } - - test("test contains pushdown") { - // this one should map to contains - val df1 = spark.sql( - s""" - |SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) - df1.explain("formatted") - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("endswith pushdown") { - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'present') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("startswith pushdown") { - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("test like pushdown") { - // this one should map to contains - val df1 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - - // map to startsWith - // this one should map to contains - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - // map to endsWith - // this one should map to contains - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index ed18b16eadbd..684cec37c170 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -97,24 +97,6 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { conn.prepareStatement("CREATE TABLE TBL_GEOMETRY (col0 GEOMETRY)").executeUpdate() conn.prepareStatement("INSERT INTO TBL_GEOMETRY VALUES (ST_GeomFromText('POINT(0 0)'))") .executeUpdate() - - conn.prepareStatement( - s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT - |) - """.stripMargin - ).executeUpdate() - - conn.prepareStatement( - s""" - |INSERT INTO pattern_testing_table VALUES - |('special_character_quote\\'_present'), - |('special_character_quote_not_present'), - |('special_character_percent%_present'), - |('special_character_percent_not_present'), - |('special_character_underscore_present'), - |('special_character_underscorenot_present') - """.stripMargin).executeUpdate() } def testConnection(): Unit = { @@ -376,184 +358,6 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { val df = spark.read.jdbc(jdbcUrl, "smallint_round_trip", new Properties) assert(df.schema.fields.head.dataType === ShortType) } - - test("test contains pushdown") { - // this one should map to contains - val df1 = spark.sql( - s""" - |SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) - df1.explain("formatted") - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("endswith pushdown") { - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'present') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("startswith pushdown") { - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("test like pushdown") { - // this one should map to contains - val df1 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - - // map to startsWith - // this one should map to contains - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - // map to endsWith - // this one should map to contains - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index 3fae1c0eba83..496498e5455b 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -153,25 +153,6 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark """.stripMargin.replaceAll("\n", " ")).executeUpdate() conn.commit() - conn.prepareStatement( - s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT - |) - """.stripMargin - ).executeUpdate() - - conn.prepareStatement( - s""" - |INSERT INTO pattern_testing_table VALUES - |('special_character_quote\\'_present'), - |('special_character_quote_not_present'), - |('special_character_percent%_present'), - |('special_character_percent_not_present'), - |('special_character_underscore_present'), - |('special_character_underscorenot_present') - """.stripMargin).executeUpdate() - conn.commit() - conn.prepareStatement("CREATE TABLE test_ltz(t TIMESTAMP WITH LOCAL TIME ZONE)") .executeUpdate() conn.prepareStatement( @@ -636,182 +617,4 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(!schema(i).metadata.contains(CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)) } } - - test("test contains pushdown") { - // this one should map to contains - val df1 = spark.sql( - s""" - |SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) - df1.explain("formatted") - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("endswith pushdown") { - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'present') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("startswith pushdown") { - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("test like pushdown") { - // this one should map to contains - val df1 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - - // map to startsWith - // this one should map to contains - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - // map to endsWith - // this one should map to contains - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala index 59f00536d113..8c0a7c0a809f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala @@ -190,24 +190,6 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { conn.prepareStatement("CREATE DOMAIN myint AS integer CHECK (VALUE > 0)").executeUpdate() conn.prepareStatement("CREATE TABLE domain_table (c1 myint)").executeUpdate() conn.prepareStatement("INSERT INTO domain_table VALUES (1)").executeUpdate() - - conn.prepareStatement( - s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT - |) - """.stripMargin - ).executeUpdate() - - conn.prepareStatement( - s""" - |INSERT INTO pattern_testing_table VALUES - |('special_character_quote\\'_present'), - |('special_character_quote_not_present'), - |('special_character_percent%_present'), - |('special_character_percent_not_present'), - |('special_character_underscore_present'), - |('special_character_underscorenot_present') - """.stripMargin).executeUpdate() } test("Type mapping for various types") { @@ -598,182 +580,4 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite { assert(cause.getSQLState === "22003") } } - - test("test contains pushdown") { - // this one should map to contains - val df1 = spark.sql( - s""" - |SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) - df1.explain("formatted") - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE contains(pattern_testing_col, 'character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("endswith pushdown") { - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE endswith(pattern_testing_col, 'present') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("startswith pushdown") { - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character') - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } - - test("test like pushdown") { - // this one should map to contains - val df1 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) - - checkAnswer(df1, Row("special_character_quote'_present")) - - val df2 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) - - val df3 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) - - val df4 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - - // map to startsWith - // this one should map to contains - val df5 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) - val df6 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) - val df7 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) - val df8 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character%' - |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - // map to endsWith - // this one should map to contains - val df9 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) - val df10 = spark.sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) - val df11 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) - val df12 = spark. - sql( - s"""SELECT * FROM pattern_testing_table - |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala index 6c1b7fdd1be5..5cdd8fa8fd9f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -65,6 +65,12 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest { connection.prepareStatement( "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)") .executeUpdate() + connection.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() } override def testUpdateColumnType(tbl: String): Unit = { diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala index 72edfc9f1bf1..a42caeafe6fe 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala @@ -38,6 +38,17 @@ abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite { .executeUpdate() connection.prepareStatement("INSERT INTO employee VALUES (6, 'jen', 12000, 1200)") .executeUpdate() + + connection.prepareStatement( + s""" + |INSERT INTO pattern_testing_table VALUES + |('special_character_quote\\'_present'), + |('special_character_quote_not_present'), + |('special_character_percent%_present'), + |('special_character_percent_not_present'), + |('special_character_underscore_present'), + |('special_character_underscorenot_present') + """.stripMargin).executeUpdate() } def tablePreparation(connection: Connection): Unit diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index 65f7579de820..8c82e4faa7f4 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -74,6 +74,12 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD connection.prepareStatement( "CREATE TABLE employee (dept INT, name VARCHAR(32), salary NUMERIC(20, 2), bonus FLOAT)") .executeUpdate() + connection.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() } override def notSupportsTableComment: Boolean = true diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala index 4997d335fda6..c7c7f88b8ae3 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala @@ -77,6 +77,12 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest connection.prepareStatement( "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," + " bonus DOUBLE)").executeUpdate() + conn.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() } override def testUpdateColumnType(tbl: String): Unit = { diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala index a011afac1772..a14c765d76ad 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -97,6 +97,12 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes connection.prepareStatement( "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," + " bonus BINARY_DOUBLE)").executeUpdate() + connection.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() } override def testUpdateColumnType(tbl: String): Unit = { diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index 1f09c2fd3fc5..24ad97af6449 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -59,6 +59,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT connection.prepareStatement( "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," + " bonus double precision)").executeUpdate() + connection.prepareStatement( + s"""CREATE TABLE pattern_testing_table ( + |pattern_testing_col LONGTEXT + |) + """.stripMargin + ).executeUpdate() } override def testUpdateColumnType(tbl: String): Unit = { diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index c80fbfc748dd..cb70e93bd94e 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -359,6 +359,184 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(scan.schema.names.sameElements(Seq(col))) } + test("SPARK-48172: Test CONTAINS") { + // this one should map to contains + val df1 = spark.sql( + s""" + |SELECT * FROM $catalogName.pattern_testing_table + |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) + df1.explain("formatted") + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE contains(pattern_testing_col, 'character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("SPARK-48172: Test ENDSWITH") { + val df1 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) + checkAnswer(df1, Row("special_character_quote'_present")) + val df2 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + val df3 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + val df4 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE endswith(pattern_testing_col, 'present') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("SPARK-48172: Test STARTSWITH") { + val df1 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) + checkAnswer(df1, Row("special_character_quote'_present")) + val df2 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + val df3 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + val df4 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE startswith(pattern_testing_col, 'special_character') + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + + test("SPARK-48172: Test LIKE") { + // this one should map to contains + val df1 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) + + checkAnswer(df1, Row("special_character_quote'_present")) + + val df2 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) + checkAnswer(df2, Row("special_character_percent%_present")) + + val df3 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) + checkAnswer(df3, Row("special_character_underscore_present")) + + val df4 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df4, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + + // map to startsWith + // this one should map to contains + val df5 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) + checkAnswer(df5, Row("special_character_quote'_present")) + val df6 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) + checkAnswer(df6, Row("special_character_percent%_present")) + val df7 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) + checkAnswer(df7, Row("special_character_underscore_present")) + val df8 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE 'special_character%' + |ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df8, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + // map to endsWith + // this one should map to contains + val df9 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) + checkAnswer(df9, Row("special_character_quote'_present")) + val df10 = spark.sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) + checkAnswer(df10, Row("special_character_percent%_present")) + val df11 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) + checkAnswer(df11, Row("special_character_underscore_present")) + val df12 = spark. + sql( + s"""SELECT * FROM $catalogName.pattern_testing_table + |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) + checkAnswer(df12, Seq( + Row("special_character_percent%_present"), + Row("special_character_percent_not_present"), + Row("special_character_quote'_present"), + Row("special_character_quote_not_present"), + Row("special_character_underscore_present"), + Row("special_character_underscorenot_present"))) + } + test("SPARK-37038: Test TABLESAMPLE") { if (supportsTableSample) { withTable(s"$catalogName.new_table") { From 7f70b6797cb3b1648a730e943bebd10eab0134f6 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 12:40:27 +0200 Subject: [PATCH 13/22] Revert unnecessary changes --- .../apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index 79470b58e62a..623f404339e9 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -210,8 +210,8 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { assert(row.getDouble(6) == 1.23456788103168E14) // float(24) has 7-digits precision assert(row.getDouble(7) == 1.23456788103168E14) // real = float(24) } else { - assert(row.getFloat(6) == 1.23456788103168E14) // float(24) has 7-digits precision - assert(row.getFloat(7) == 1.23456788103168E14) // real = float(24) + assert(row.getFloat(6) == 1.23456788103168E14) // float(24) has 7-digits precision + assert(row.getFloat(7) == 1.23456788103168E14) // real = float(24) } assert(row.getAs[BigDecimal](8).equals(new BigDecimal("123.00"))) assert(row.getAs[BigDecimal](9).equals(new BigDecimal("12345.12000"))) From 13f6a8130637f1fcd454ddf5e993f7f19f8c2114 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 14:01:09 +0200 Subject: [PATCH 14/22] Fix test --- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 178 +++++++++++------- 1 file changed, 114 insertions(+), 64 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index cb70e93bd94e..a031759566d0 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -360,94 +360,118 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu } test("SPARK-48172: Test CONTAINS") { - // this one should map to contains val df1 = spark.sql( s""" |SELECT * FROM $catalogName.pattern_testing_table |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) df1.explain("formatted") - - checkAnswer(df1, Row("special_character_quote'_present")) + val rows1 = df1.collect() + assert(rows1.length === 1) + assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) + val rows2 = df2.collect() + assert(rows2.length === 1) + assert(rows2(0).getString(0) === "special_character_percent%_present") val df3 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) + val rows3 = df3.collect() + assert(rows3.length === 1) + assert(rows3(0).getString(0) === "special_character_underscore_present") val df4 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE contains(pattern_testing_col, 'character') |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) - } + val rows4 = df4.collect() + assert(rows4.length === 1) + assert(rows4(0).getString(0) === "special_character_percent%_present") + assert(rows4(1).getString(0) === "special_character_percent_not_present") + assert(rows4(2).getString(0) === "special_character_quote'_present") + assert(rows4(3).getString(0) === "special_character_quote_not_present") + assert(rows4(4).getString(0) === "special_character_underscore_present") + assert(rows4(5).getString(0) === "special_character_underscorenot_present") test("SPARK-48172: Test ENDSWITH") { val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) - checkAnswer(df1, Row("special_character_quote'_present")) + val rows1 = df1.collect() + assert(rows1.length === 1) + assert(rows1(0).getString(0) === "special_character_quote'_present") + val df2 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) + val rows2 = df2.collect() + assert(rows2.length === 1) + assert(rows2(0).getString(0) === "special_character_percent%_present") + val df3 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) + val rows3 = df3.collect() + assert(rows3.length === 1) + assert(rows3(0).getString(0) === "special_character_underscore_present") + val df4 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE endswith(pattern_testing_col, 'present') |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) + val rows4 = df4.collect() + assert(rows4.length === 1) + assert(rows4(0).getString(0) === "special_character_percent%_present") + assert(rows4(1).getString(0) === "special_character_percent_not_present") + assert(rows4(2).getString(0) === "special_character_quote'_present") + assert(rows4(3).getString(0) === "special_character_quote_not_present") + assert(rows4(4).getString(0) === "special_character_underscore_present") + assert(rows4(5).getString(0) === "special_character_underscorenot_present") } test("SPARK-48172: Test STARTSWITH") { val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) - checkAnswer(df1, Row("special_character_quote'_present")) + val rows1 = df1.collect() + assert(rows1.length === 1) + assert(rows1(0).getString(0) === "special_character_quote'_present") + val df2 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) + val rows2 = df2.collect() + assert(rows2.length === 1) + assert(rows2(0).getString(0) === "special_character_percent%_present") + val df3 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) + val rows3 = df3.collect() + assert(rows3.length === 1) + assert(rows3(0).getString(0) === "special_character_underscore_present") + val df4 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character') |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) + val rows4 = df4.collect() + assert(rows4.length === 1) + assert(rows4(0).getString(0) === "special_character_percent%_present") + assert(rows4(1).getString(0) === "special_character_percent_not_present") + assert(rows4(2).getString(0) === "special_character_quote'_present") + assert(rows4(3).getString(0) === "special_character_quote_not_present") + assert(rows4(4).getString(0) === "special_character_underscore_present") + assert(rows4(5).getString(0) === "special_character_underscorenot_present") } test("SPARK-48172: Test LIKE") { @@ -455,86 +479,112 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) - - checkAnswer(df1, Row("special_character_quote'_present")) + val rows1 = df1.collect() + assert(rows1.length === 1) + assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) - checkAnswer(df2, Row("special_character_percent%_present")) + val rows2 = df2.collect() + assert(rows2.length === 1) + assert(rows2(0).getString(0) === "special_character_percent%_present") val df3 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) - checkAnswer(df3, Row("special_character_underscore_present")) + val rows3 = df3.collect() + assert(rows3.length === 1) + assert(rows3(0).getString(0) === "special_character_underscore_present") val df4 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%character%' |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df4, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) + val rows4 = df4.collect() + assert(rows4.length === 1) + assert(rows4(0).getString(0) === "special_character_percent%_present") + assert(rows4(1).getString(0) === "special_character_percent_not_present") + assert(rows4(2).getString(0) === "special_character_quote'_present") + assert(rows4(3).getString(0) === "special_character_quote_not_present") + assert(rows4(4).getString(0) === "special_character_underscore_present") + assert(rows4(5).getString(0) === "special_character_underscorenot_present") // map to startsWith // this one should map to contains val df5 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) - checkAnswer(df5, Row("special_character_quote'_present")) + val rows5 = df5.collect() + assert(rows5.length === 1) + assert(rows5(0).getString(0) === "special_character_quote'_present") + val df6 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) - checkAnswer(df6, Row("special_character_percent%_present")) + val rows6 = df6.collect() + assert(rows6.length === 1) + assert(rows6(0).getString(0) === "special_character_percent%_present") + val df7 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) - checkAnswer(df7, Row("special_character_underscore_present")) + val rows7 = df7.collect() + assert(rows7.length === 1) + assert(rows7(0).getString(0) === "special_character_underscore_present") + val df8 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character%' |ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df8, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) + val rows4 = df4.collect() + assert(rows4.length === 1) + assert(rows4(0).getString(0) === "special_character_percent%_present") + assert(rows4(1).getString(0) === "special_character_percent_not_present") + assert(rows4(2).getString(0) === "special_character_quote'_present") + assert(rows4(3).getString(0) === "special_character_quote_not_present") + assert(rows4(4).getString(0) === "special_character_underscore_present") + assert(rows4(5).getString(0) === "special_character_underscorenot_present") // map to endsWith // this one should map to contains val df9 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) - checkAnswer(df9, Row("special_character_quote'_present")) + val rows9 = df9.collect() + assert(rows9.length === 1) + assert(rows9(0).getString(0) === "special_character_quote'_present") + val df10 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) - checkAnswer(df10, Row("special_character_percent%_present")) + val rows10 = df10.collect() + assert(rows10.length === 1) + assert(rows10(0).getString(0) === "special_character_percent%_present") + val df11 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) - checkAnswer(df11, Row("special_character_underscore_present")) + val rows11 = df11.collect() + assert(rows11.length === 1) + assert(rows11(0).getString(0) === "special_character_underscore_present") + val df12 = spark. sql( s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) - checkAnswer(df12, Seq( - Row("special_character_percent%_present"), - Row("special_character_percent_not_present"), - Row("special_character_quote'_present"), - Row("special_character_quote_not_present"), - Row("special_character_underscore_present"), - Row("special_character_underscorenot_present"))) + val rows12 = df12.collect() + assert(rows12.length === 1) + assert(rows12(0).getString(0) === "special_character_percent%_present") + assert(rows12(1).getString(0) === "special_character_percent_not_present") + assert(rows12(2).getString(0) === "special_character_quote'_present") + assert(rows12(3).getString(0) === "special_character_quote_not_present") + assert(rows12(4).getString(0) === "special_character_underscore_present") + assert(rows12(5).getString(0) === "special_character_underscorenot_present") } test("SPARK-37038: Test TABLESAMPLE") { From 9d0769841a4c5fbcb65e00cd915052f4fc3ec4be Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 14:18:04 +0200 Subject: [PATCH 15/22] Fix closing bracket --- .../src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index a031759566d0..39d86b69ca42 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -397,6 +397,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(rows4(3).getString(0) === "special_character_quote_not_present") assert(rows4(4).getString(0) === "special_character_underscore_present") assert(rows4(5).getString(0) === "special_character_underscorenot_present") + } test("SPARK-48172: Test ENDSWITH") { val df1 = spark.sql( From b3e32f573d1b209d7fcca51bbdda0321764df897 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 14:38:31 +0200 Subject: [PATCH 16/22] conn -> connection --- .../org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala index c7c7f88b8ae3..4c1d4924a41c 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala @@ -77,7 +77,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest connection.prepareStatement( "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," + " bonus DOUBLE)").executeUpdate() - conn.prepareStatement( + connection.prepareStatement( s"""CREATE TABLE pattern_testing_table ( |pattern_testing_col LONGTEXT |) From 45fcb60a9b8d8e45c152973acb19f9cd2734983c Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 14 May 2024 08:31:07 +0200 Subject: [PATCH 17/22] Rename variable --- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 39d86b69ca42..2b61dd4b5515 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -542,14 +542,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu s"""SELECT * FROM $catalogName.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character%' |ORDER BY pattern_testing_col""".stripMargin) - val rows4 = df4.collect() - assert(rows4.length === 1) - assert(rows4(0).getString(0) === "special_character_percent%_present") - assert(rows4(1).getString(0) === "special_character_percent_not_present") - assert(rows4(2).getString(0) === "special_character_quote'_present") - assert(rows4(3).getString(0) === "special_character_quote_not_present") - assert(rows4(4).getString(0) === "special_character_underscore_present") - assert(rows4(5).getString(0) === "special_character_underscorenot_present") + val rows8 = df8.collect() + assert(rows8.length === 1) + assert(rows8(0).getString(0) === "special_character_percent%_present") + assert(rows8(1).getString(0) === "special_character_percent_not_present") + assert(rows8(2).getString(0) === "special_character_quote'_present") + assert(rows8(3).getString(0) === "special_character_quote_not_present") + assert(rows8(4).getString(0) === "special_character_underscore_present") + assert(rows8(5).getString(0) === "special_character_underscorenot_present") // map to endsWith // this one should map to contains val df9 = spark.sql( From 4d485eedcd5f9b9a388c1515c0298e860a8ba61b Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 15 May 2024 08:25:29 +0200 Subject: [PATCH 18/22] Fix tests --- .../sql/jdbc/v2/DB2IntegrationSuite.scala | 2 +- .../v2/DockerJDBCIntegrationV2Suite.scala | 2 +- .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 2 +- .../sql/jdbc/v2/OracleIntegrationSuite.scala | 2 +- .../jdbc/v2/PostgresIntegrationSuite.scala | 2 +- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 20 +++++++++---------- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala index 36795747319d..57129e9d846f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala @@ -64,7 +64,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest { .executeUpdate() connection.prepareStatement( s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT + |pattern_testing_col VARCHAR(50) |) """.stripMargin ).executeUpdate() diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala index a42caeafe6fe..5f4f0b7a3afb 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala @@ -42,7 +42,7 @@ abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite { connection.prepareStatement( s""" |INSERT INTO pattern_testing_table VALUES - |('special_character_quote\\'_present'), + |('special_character_quote''_present'), |('special_character_quote_not_present'), |('special_character_percent%_present'), |('special_character_percent_not_present'), diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index 46530fe5419a..9ddd79fb257d 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -72,7 +72,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD .executeUpdate() connection.prepareStatement( s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT + |pattern_testing_col VARCHAR(50) |) """.stripMargin ).executeUpdate() diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala index a8e143d1691a..1adfef95998e 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala @@ -95,7 +95,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes " bonus BINARY_DOUBLE)").executeUpdate() connection.prepareStatement( s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT + |pattern_testing_col VARCHAR(50) |) """.stripMargin ).executeUpdate() diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index 24ad97af6449..7fef3ccd6b3f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -61,7 +61,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT " bonus double precision)").executeUpdate() connection.prepareStatement( s"""CREATE TABLE pattern_testing_table ( - |pattern_testing_col LONGTEXT + |pattern_testing_col VARCHAR(50) |) """.stripMargin ).executeUpdate() diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 419f6a61d97e..1dc6f9bda00d 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -363,7 +363,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df1 = spark.sql( s""" |SELECT * FROM $catalogName.pattern_testing_table - |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) + |WHERE contains(pattern_testing_col, 'quote''')""".stripMargin) df1.explain("formatted") val rows1 = df1.collect() assert(rows1.length === 1) @@ -390,7 +390,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu |WHERE contains(pattern_testing_col, 'character') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() - assert(rows4.length === 1) + assert(rows4.length === 6) assert(rows4(0).getString(0) === "special_character_percent%_present") assert(rows4(1).getString(0) === "special_character_percent_not_present") assert(rows4(2).getString(0) === "special_character_quote'_present") @@ -402,7 +402,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test ENDSWITH") { val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) + |WHERE endswith(pattern_testing_col, 'quote''_present')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") @@ -428,7 +428,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu |WHERE endswith(pattern_testing_col, 'present') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() - assert(rows4.length === 1) + assert(rows4.length === 6) assert(rows4(0).getString(0) === "special_character_percent%_present") assert(rows4(1).getString(0) === "special_character_percent_not_present") assert(rows4(2).getString(0) === "special_character_quote'_present") @@ -440,7 +440,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test STARTSWITH") { val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) + |WHERE startswith(pattern_testing_col, 'special_character_quote''')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") @@ -466,7 +466,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu |WHERE startswith(pattern_testing_col, 'special_character') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() - assert(rows4.length === 1) + assert(rows4.length === 6) assert(rows4(0).getString(0) === "special_character_percent%_present") assert(rows4(1).getString(0) === "special_character_percent_not_present") assert(rows4(2).getString(0) === "special_character_quote'_present") @@ -479,7 +479,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // this one should map to contains val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) + |WHERE pattern_testing_col LIKE '%quote''%'""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") @@ -505,7 +505,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu |WHERE pattern_testing_col LIKE '%character%' |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() - assert(rows4.length === 1) + assert(rows4.length === 6) assert(rows4(0).getString(0) === "special_character_percent%_present") assert(rows4(1).getString(0) === "special_character_percent_not_present") assert(rows4(2).getString(0) === "special_character_quote'_present") @@ -517,7 +517,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // this one should map to contains val df5 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) + |WHERE pattern_testing_col LIKE 'special_character_quote''%'""".stripMargin) val rows5 = df5.collect() assert(rows5.length === 1) assert(rows5(0).getString(0) === "special_character_quote'_present") @@ -554,7 +554,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // this one should map to contains val df9 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) + |WHERE pattern_testing_col LIKE '%quote''_present'""".stripMargin) val rows9 = df9.collect() assert(rows9.length === 1) assert(rows9(0).getString(0) === "special_character_quote'_present") From 36c6f028bf520d19d18a20c610eb038bb3ee82cf Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 15 May 2024 09:34:19 +0200 Subject: [PATCH 19/22] Fix spark escape of ' --- .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index 1dc6f9bda00d..a4cdff1cd821 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -363,7 +363,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df1 = spark.sql( s""" |SELECT * FROM $catalogName.pattern_testing_table - |WHERE contains(pattern_testing_col, 'quote''')""".stripMargin) + |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) df1.explain("formatted") val rows1 = df1.collect() assert(rows1.length === 1) @@ -402,7 +402,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test ENDSWITH") { val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE endswith(pattern_testing_col, 'quote''_present')""".stripMargin) + |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") @@ -440,7 +440,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test STARTSWITH") { val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE startswith(pattern_testing_col, 'special_character_quote''')""".stripMargin) + |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") @@ -479,7 +479,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // this one should map to contains val df1 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote''%'""".stripMargin) + |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") @@ -517,7 +517,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // this one should map to contains val df5 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE pattern_testing_col LIKE 'special_character_quote''%'""".stripMargin) + |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) val rows5 = df5.collect() assert(rows5.length === 1) assert(rows5(0).getString(0) === "special_character_quote'_present") @@ -554,7 +554,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // this one should map to contains val df9 = spark.sql( s"""SELECT * FROM $catalogName.pattern_testing_table - |WHERE pattern_testing_col LIKE '%quote''_present'""".stripMargin) + |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) val rows9 = df9.collect() assert(rows9.length === 1) assert(rows9(0).getString(0) === "special_character_quote'_present") From afbeaae6282612ff2a736bff934bbafde3ca8267 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 15 May 2024 10:09:41 +0200 Subject: [PATCH 20/22] Fix test --- .../test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index a4cdff1cd821..caa7435ca3d0 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -543,7 +543,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu |WHERE pattern_testing_col LIKE 'special_character%' |ORDER BY pattern_testing_col""".stripMargin) val rows8 = df8.collect() - assert(rows8.length === 1) + assert(rows8.length === 6) assert(rows8(0).getString(0) === "special_character_percent%_present") assert(rows8(1).getString(0) === "special_character_percent_not_present") assert(rows8(2).getString(0) === "special_character_quote'_present") From 950f9f02b4b2ff4fa499a7216c46e65ef434ca51 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 15 May 2024 11:15:29 +0200 Subject: [PATCH 21/22] Fix tests to include namespace --- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index caa7435ca3d0..ecb7d7353a1b 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -401,14 +401,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test ENDSWITH") { val df1 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) val rows2 = df2.collect() assert(rows2.length === 1) @@ -416,7 +416,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df3 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) val rows3 = df3.collect() assert(rows3.length === 1) @@ -424,7 +424,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df4 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE endswith(pattern_testing_col, 'present') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() @@ -439,14 +439,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test STARTSWITH") { val df1 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) val rows2 = df2.collect() assert(rows2.length === 1) @@ -454,7 +454,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df3 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) val rows3 = df3.collect() assert(rows3.length === 1) @@ -462,7 +462,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df4 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE startswith(pattern_testing_col, 'special_character') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() @@ -478,14 +478,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test LIKE") { // this one should map to contains val df1 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) val rows2 = df2.collect() assert(rows2.length === 1) @@ -493,7 +493,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df3 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) val rows3 = df3.collect() assert(rows3.length === 1) @@ -501,7 +501,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df4 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%character%' |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() @@ -516,14 +516,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // map to startsWith // this one should map to contains val df5 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) val rows5 = df5.collect() assert(rows5.length === 1) assert(rows5(0).getString(0) === "special_character_quote'_present") val df6 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) val rows6 = df6.collect() assert(rows6.length === 1) @@ -531,7 +531,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df7 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) val rows7 = df7.collect() assert(rows7.length === 1) @@ -539,7 +539,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df8 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE 'special_character%' |ORDER BY pattern_testing_col""".stripMargin) val rows8 = df8.collect() @@ -553,14 +553,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // map to endsWith // this one should map to contains val df9 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) val rows9 = df9.collect() assert(rows9.length === 1) assert(rows9(0).getString(0) === "special_character_quote'_present") val df10 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) val rows10 = df10.collect() assert(rows10.length === 1) @@ -568,7 +568,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df11 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) val rows11 = df11.collect() assert(rows11.length === 1) @@ -576,10 +576,10 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df12 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) val rows12 = df12.collect() - assert(rows12.length === 1) + assert(rows12.length === 6) assert(rows12(0).getString(0) === "special_character_percent%_present") assert(rows12(1).getString(0) === "special_character_percent_not_present") assert(rows12(2).getString(0) === "special_character_quote'_present") From 1c20c91233c0df6f73ff0c2f6593a2135737fa65 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 15 May 2024 12:57:20 +0200 Subject: [PATCH 22/22] Fix caseConvert in tests --- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index ecb7d7353a1b..45c4f41ffb77 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -362,7 +362,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test CONTAINS") { val df1 = spark.sql( s""" - |SELECT * FROM $catalogName.pattern_testing_table + |SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin) df1.explain("formatted") val rows1 = df1.collect() @@ -370,7 +370,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin) val rows2 = df2.collect() assert(rows2.length === 1) @@ -378,7 +378,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df3 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin) val rows3 = df3.collect() assert(rows3.length === 1) @@ -386,7 +386,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df4 = spark. sql( - s"""SELECT * FROM $catalogName.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE contains(pattern_testing_col, 'character') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() @@ -401,14 +401,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test ENDSWITH") { val df1 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin) val rows2 = df2.collect() assert(rows2.length === 1) @@ -416,7 +416,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df3 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin) val rows3 = df3.collect() assert(rows3.length === 1) @@ -424,7 +424,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df4 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE endswith(pattern_testing_col, 'present') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() @@ -439,14 +439,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test STARTSWITH") { val df1 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin) val rows2 = df2.collect() assert(rows2.length === 1) @@ -454,7 +454,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df3 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin) val rows3 = df3.collect() assert(rows3.length === 1) @@ -462,7 +462,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df4 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE startswith(pattern_testing_col, 'special_character') |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() @@ -478,14 +478,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu test("SPARK-48172: Test LIKE") { // this one should map to contains val df1 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin) val rows1 = df1.collect() assert(rows1.length === 1) assert(rows1(0).getString(0) === "special_character_quote'_present") val df2 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin) val rows2 = df2.collect() assert(rows2.length === 1) @@ -493,7 +493,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df3 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin) val rows3 = df3.collect() assert(rows3.length === 1) @@ -501,7 +501,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df4 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%character%' |ORDER BY pattern_testing_col""".stripMargin) val rows4 = df4.collect() @@ -516,14 +516,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // map to startsWith // this one should map to contains val df5 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin) val rows5 = df5.collect() assert(rows5.length === 1) assert(rows5(0).getString(0) === "special_character_quote'_present") val df6 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin) val rows6 = df6.collect() assert(rows6.length === 1) @@ -531,7 +531,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df7 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin) val rows7 = df7.collect() assert(rows7.length === 1) @@ -539,7 +539,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df8 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE 'special_character%' |ORDER BY pattern_testing_col""".stripMargin) val rows8 = df8.collect() @@ -553,14 +553,14 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu // map to endsWith // this one should map to contains val df9 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin) val rows9 = df9.collect() assert(rows9.length === 1) assert(rows9(0).getString(0) === "special_character_quote'_present") val df10 = spark.sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin) val rows10 = df10.collect() assert(rows10.length === 1) @@ -568,7 +568,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df11 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin) val rows11 = df11.collect() assert(rows11.length === 1) @@ -576,7 +576,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu val df12 = spark. sql( - s"""SELECT * FROM $catalogAndNamespace.pattern_testing_table + s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")} |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin) val rows12 = df12.collect() assert(rows12.length === 6)