From 388be6d41b16d6e3c33d7d7e6cdd4405c8a65840 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 18:54:34 +0800 Subject: [PATCH 01/20] [SPARK-27442][SQL] Remove check filename when reading data --- .../apache/spark/sql/execution/datasources/DataSource.scala | 2 +- .../spark/sql/execution/datasources/DataSourceUtils.scala | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 2bb3d48c1458c..6e6d0ae482d4c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -434,7 +434,7 @@ case class DataSource( hs.partitionSchema, "in the partition schema", equality) - DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema) + DataSourceUtils.checkFieldType(hs.fileFormat, hs.dataSchema) case _ => SchemaUtils.checkSchemaColumnNameDuplication( relation.schema, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala index 6ceb44ab15020..941fd3f9b16d1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala @@ -81,12 +81,16 @@ object DataSourceUtils extends PredicateHelper { * in a driver side. */ def verifySchema(format: FileFormat, schema: StructType): Unit = { + checkFieldType(format, schema) + checkFieldNames(format, schema) + } + + def checkFieldType(format: FileFormat, schema: StructType): Unit = { schema.foreach { field => if (!format.supportDataType(field.dataType)) { throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(format.toString, field) } } - checkFieldNames(format, schema) } // SPARK-24626: Metadata files and temporary files should not be From 4bf6a19e06cbb8a7f51f6fc9d3de7ef728ed0433 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 18 Jan 2022 13:54:22 +0800 Subject: [PATCH 02/20] Add UT --- .../field_with_invalid_char.snappy.parquet | Bin 0 -> 880 bytes .../org/apache/spark/sql/SQLQuerySuite.scala | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet diff --git a/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet b/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..facf0a3a297161d4bf774a16d3b802e98e9191bf GIT binary patch literal 880 zcmbu8J#W)M7{|}oP+3(Lq~6JvJXn@$B~mygzL2C=7BV5kP_ZI}F3zPy*hy+TyvWIS zU}Qur9omtZ4*(kjLq`U7R*(?S*{+j{2_ac`_q_f5Jpa?3+s?Zx`q%cgTLQ?z0z+4@izGR=O=a}wg)luHa3l#LX;c9CcSp6VsPh-9d}-5)YMjH zTT~}Llp)iiqw2IK1LcF9hxhBrQpC@nCc!fq39=8)BV_DH;#DJU7RH{i$Hhs@5XGit z4qY{id89)VWh%iG(;oF*eOPo|tQ0l!843%!06l<(5dDbBA^o5chTKsFQ@^T6BOs{> zi&c@f)-LIbB7MtA>x!ghIj2hp?ix5sl44byt+mVft~hTnbVT226uUaT)DA(pmSHU7 zQzn)IG-A12?1hYz* + val df = spark.read.parquet(dir.getAbsolutePath) + checkAnswer(df, Row(1, 2, 3) :: Nil) + assert(df.schema.names.sameElements(Array("max(t)", "a b", "{"))) + } + } } case class Foo(bar: Option[String]) From f3f4e4c146b833d410341ec6b9d9d1f69389c9a9 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 18 Jan 2022 15:32:25 +0800 Subject: [PATCH 03/20] Update FileFormat.scala --- .../org/apache/spark/sql/execution/datasources/FileFormat.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala index c3bcf06b6e5fb..0ced1cee69d6d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala @@ -165,7 +165,7 @@ trait FileFormat { def supportDataType(dataType: DataType): Boolean = true /** - * Returns whether this format supports the given filed name in read/write path. + * Returns whether this format supports the given filed name in write path. * By default all field name is supported. */ def supportFieldName(name: String): Boolean = true From 3d7114a267759f8696082e5ba9373919285e273f Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 18 Jan 2022 18:10:20 +0800 Subject: [PATCH 04/20] follow comment --- .../field_with_invalid_char.snappy.parquet | Bin 880 -> 1304 bytes .../org/apache/spark/sql/SQLQuerySuite.scala | 5 +++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet b/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet index facf0a3a297161d4bf774a16d3b802e98e9191bf..3ed3a0a5aba71cb12c8747b164ab9802013d0dbb 100644 GIT binary patch delta 433 zcmeysHiK(Il({H_D2FJI=&|QLJv=g^Okget0|N^W3j+fKRPuA{gdZ#@lB^TMJyism zBqdlt%Ak}elcX4s0~Th52(wO{rmiI^BesQ6)Cwra0A#*{GJ&ddJa~!=5{t6qi<2`_ za}!0mCSFvK5M@$jkYLnf0MpEgdPxjoOp-E^F%!Q#v-h!xiioXXn(WBv%)SW3zQ#1U zo6)g;8;IS)EUL$#wuKSsP)St|praV|M47~x#LUF3fTA`Kr$R_E1`V*oAv~a~fF1=} z$iN_WibYgRY!ld|cL*{}y7!ybSP6YBxGM2$mi0n_9M xjKNGmXPYwJn0$eWi&1ZKJd-VxUJ{Tpn#{`_AQ>aWz<`Vxt}-()1ULp60syj+Lr?$! delta 129 zcmbQi^?_|d)Wj)nlQ|e=MMRk-eLQ%I3lfX6kV`x1!#h;gzslcUHp5PJfXs2+pZ0!A?=H4d>gjFZ`zgPCqIPL5{2 ZF?l^pfUu1W0|NvxtY%_h2yhHC1OUF~A5Z`Q diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index c52b583f275b4..e9729542922f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4247,8 +4247,9 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark test("SPARK-27442: Spark support read parquet file with invalid char in field name") { withResourceTempPath("test-data/field_with_invalid_char.snappy.parquet") { dir => val df = spark.read.parquet(dir.getAbsolutePath) - checkAnswer(df, Row(1, 2, 3) :: Nil) - assert(df.schema.names.sameElements(Array("max(t)", "a b", "{"))) + checkAnswer(df, Row(1, 2, 3, 4, 5) :: Nil) + assert(df.schema.names.sameElements(Array("max(t)", "a b", "{", ".", "a.b"))) + checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"), Row(1, 2, 3, 4, 5) :: Nil) } } } From 59e9030b8f91e6e3308b4428eddb8107c6a2c06a Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 18 Jan 2022 21:41:16 +0800 Subject: [PATCH 05/20] update --- .../field_with_invalid_char.snappy.parquet | Bin 1304 -> 1540 bytes .../org/apache/spark/sql/SQLQuerySuite.scala | 9 ++++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet b/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet index 3ed3a0a5aba71cb12c8747b164ab9802013d0dbb..ba07f220039a9af22908fb9aa1756400aa857aa4 100644 GIT binary patch literal 1540 zcmbu9&ui0Q7{}kFX$ew9Ved-;{IGP9~n7OYDOz*5Wo32!p9Lon4%lHTUHIg&Z?j^hESO) z>%L(&>sFOQIfjf|8bDYdes??`&B zV0HU2!by2NEpJ~27It4#a1@Z0T3Oa_WPJ%)Cn-x3Nqr|s!hwmZL{e&HNzV#E`h_I+ z6Z%zF+0)z%%vD9xdfqk{I|Al|hoi#pI-cL|b+x?3Extgs`TsKfU6;dD?e?93d%b(S z;{;+$1VVd}8<@d^9mlMje&F@(z^s_9w&QO3X8p#%?1~PCqTz#3oZ9XN&bH%$quX~~ z(R3lHC0yUBm}9b89}I_;gaE@b&Gx}GoB1@G1)oZ|IQ!W*heT_{&AQa_N^y5fRbH*} zT4}D?Z@XKifOuvZpVEo)`Tu`kzb%WjD!+gfZcJ1xtqRjpdhUR|8mU4xK+exwbb J=O+CB{{<9B8{z-} literal 1304 zcmbu9&x_MQ6vroNR>IPYurnmkLkVnYp$=`bw%y{zp2XAQ)fJhh)3uoX*d+A_X>WQf z;?bk*MbA?F6FjVEd+@Y3Zw2up3gVk2O;S8bp}d**zVkkB`reR}+xO}iVHM}`#k=3% z_X}7KT@@iEFCm0t@z0ypS9MF2Q}Oe!OS+}yQti{X#Uj5YDyjJ6SZ|}1?NRh+-NDKAevlgDFI2$CbEK*dRfw? zf(iK$lGZ6H$7uTJ)`BGl8d&$N{s82N)NxqXfkS=fc)V(C$VNMu&#MUm2E0ppuuC%C zB^ieI|LG6rNV^;N`^L~M#sezWY2Bn|vD}>woL;fn<@i`r+fn6#J{1$_>2Gm diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index e9729542922f8..bf0efd140d0ca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4247,9 +4247,12 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark test("SPARK-27442: Spark support read parquet file with invalid char in field name") { withResourceTempPath("test-data/field_with_invalid_char.snappy.parquet") { dir => val df = spark.read.parquet(dir.getAbsolutePath) - checkAnswer(df, Row(1, 2, 3, 4, 5) :: Nil) - assert(df.schema.names.sameElements(Array("max(t)", "a b", "{", ".", "a.b"))) - checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"), Row(1, 2, 3, 4, 5) :: Nil) + checkAnswer(df, Row(1, 2, 3, 4, 5, 6) :: Row(2, 4, 6, 8, 10, 12) :: Nil) + assert(df.schema.names.sameElements(Array("max(t)", "a b", "{", ".", "a.b", "a"))) + checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`") + , Row(1, 2, 3, 4, 5) :: Row(2, 4, 6, 8, 10) :: Nil) + checkAnswer(df.where("`a.b` > 8"), + Row(2, 4, 6, 8, 10, 12) :: Nil) } } } From d09f83d2e6377571f918fa4a89b1141ad00c071a Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Tue, 18 Jan 2022 21:55:03 +0800 Subject: [PATCH 06/20] Update DataSource.scala --- .../apache/spark/sql/execution/datasources/DataSource.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 6e6d0ae482d4c..f0590b4a2ebf4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -434,7 +434,8 @@ case class DataSource( hs.partitionSchema, "in the partition schema", equality) - DataSourceUtils.checkFieldType(hs.fileFormat, hs.dataSchema) + DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema, + !hs.fileFormat.isInstanceOf[ParquetFileFormat]) case _ => SchemaUtils.checkSchemaColumnNameDuplication( relation.schema, From 5a9d99309f694fe8b9490fdaddc3526783f33d52 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 10:03:04 +0800 Subject: [PATCH 07/20] update --- .../sql/execution/datasources/DataSourceUtils.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala index 941fd3f9b16d1..6c0cd266f7b74 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala @@ -80,17 +80,18 @@ object DataSourceUtils extends PredicateHelper { * Verify if the schema is supported in datasource. This verification should be done * in a driver side. */ - def verifySchema(format: FileFormat, schema: StructType): Unit = { - checkFieldType(format, schema) - checkFieldNames(format, schema) - } - - def checkFieldType(format: FileFormat, schema: StructType): Unit = { + def verifySchema( + format: FileFormat, + schema: StructType, + shouldCheckFieldName: Boolean = true): Unit = { schema.foreach { field => if (!format.supportDataType(field.dataType)) { throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(format.toString, field) } } + if (shouldCheckFieldName) { + checkFieldNames(format, schema) + } } // SPARK-24626: Metadata files and temporary files should not be From 5dc67416303c40ce70f55441c5d76dc7a4b0ffda Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 10:13:37 +0800 Subject: [PATCH 08/20] Update FileFormat.scala --- .../org/apache/spark/sql/execution/datasources/FileFormat.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala index 0ced1cee69d6d..c3bcf06b6e5fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala @@ -165,7 +165,7 @@ trait FileFormat { def supportDataType(dataType: DataType): Boolean = true /** - * Returns whether this format supports the given filed name in write path. + * Returns whether this format supports the given filed name in read/write path. * By default all field name is supported. */ def supportFieldName(name: String): Boolean = true From b2d64af0ed10d5fe65c79105f473ebae2686cbdc Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 13:36:15 +0800 Subject: [PATCH 09/20] follow comment --- .../spark/sql/execution/datasources/DataSource.scala | 3 +-- .../sql/execution/datasources/DataSourceUtils.scala | 9 ++------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index f0590b4a2ebf4..2bb3d48c1458c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -434,8 +434,7 @@ case class DataSource( hs.partitionSchema, "in the partition schema", equality) - DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema, - !hs.fileFormat.isInstanceOf[ParquetFileFormat]) + DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema) case _ => SchemaUtils.checkSchemaColumnNameDuplication( relation.schema, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala index 6c0cd266f7b74..6ceb44ab15020 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala @@ -80,18 +80,13 @@ object DataSourceUtils extends PredicateHelper { * Verify if the schema is supported in datasource. This verification should be done * in a driver side. */ - def verifySchema( - format: FileFormat, - schema: StructType, - shouldCheckFieldName: Boolean = true): Unit = { + def verifySchema(format: FileFormat, schema: StructType): Unit = { schema.foreach { field => if (!format.supportDataType(field.dataType)) { throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(format.toString, field) } } - if (shouldCheckFieldName) { - checkFieldNames(format, schema) - } + checkFieldNames(format, schema) } // SPARK-24626: Metadata files and temporary files should not be From b3f0f09783153a4cdf4009198522c97fd8f317d4 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 13:37:00 +0800 Subject: [PATCH 10/20] Update ParquetFileFormat.scala --- .../sql/execution/datasources/parquet/ParquetFileFormat.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 4515387bdaa90..b0a168c9a85c7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -407,10 +407,6 @@ class ParquetFileFormat case _ => false } - - override def supportFieldName(name: String): Boolean = { - !name.matches(".*[ ,;{}()\n\t=].*") - } } object ParquetFileFormat extends Logging { From fe3aeb2fda45d683e288051c46183070dcf05ee2 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 14:19:10 +0800 Subject: [PATCH 11/20] Follow comment --- .../parquet/ParquetSchemaConverter.scala | 1 - .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 13 ++++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index 352e5f01172f2..2f3735c9d8beb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -467,7 +467,6 @@ class SparkToParquetSchemaConverter( } private def convertField(field: StructField, repetition: Type.Repetition): Type = { - ParquetSchemaConverter.checkFieldName(field.name) field.dataType match { // =================== diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index bf0efd140d0ca..4bd5148d73cc2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4245,10 +4245,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } test("SPARK-27442: Spark support read parquet file with invalid char in field name") { - withResourceTempPath("test-data/field_with_invalid_char.snappy.parquet") { dir => + withTempDir { dir => + Seq((1, 2, 3, 4, 5, 6, 7, 8, 9, 10), (2, 4, 6, 8, 10, 12, 14, 16, 18, 20)) + .toDF("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a") + .repartition(1) + .write.mode(SaveMode.Overwrite).parquet(dir.getAbsolutePath) val df = spark.read.parquet(dir.getAbsolutePath) - checkAnswer(df, Row(1, 2, 3, 4, 5, 6) :: Row(2, 4, 6, 8, 10, 12) :: Nil) - assert(df.schema.names.sameElements(Array("max(t)", "a b", "{", ".", "a.b", "a"))) + checkAnswer(df, + Row(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) :: + Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20) :: Nil) + assert(df.schema.names.sameElements( + Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a"))) checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`") , Row(1, 2, 3, 4, 5) :: Row(2, 4, 6, 8, 10) :: Nil) checkAnswer(df.where("`a.b` > 8"), From e11bcdb2ef7096d56714888ecee09cbe825b3752 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 14:20:58 +0800 Subject: [PATCH 12/20] update --- .../parquet/ParquetSchemaConverter.scala | 17 ----------------- .../datasources/v2/parquet/ParquetWrite.scala | 1 - 2 files changed, 18 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index 2f3735c9d8beb..cb5d646f85e9e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -697,23 +697,6 @@ private[sql] object ParquetSchemaConverter { val EMPTY_MESSAGE: MessageType = Types.buildMessage().named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME) - def checkFieldName(name: String): Unit = { - // ,;{}()\n\t= and space are special characters in Parquet schema - if (name.matches(".*[ ,;{}()\n\t=].*")) { - throw QueryCompilationErrors.columnNameContainsInvalidCharactersError(name) - } - } - - def checkFieldNames(schema: StructType): Unit = { - schema.foreach { field => - checkFieldName(field.name) - field.dataType match { - case s: StructType => checkFieldNames(s) - case _ => - } - } - } - def checkConversionRequirement(f: => Boolean, message: String): Unit = { if (!f) { throw new AnalysisException(message) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala index b2b6d313e1bcd..0316d91f40732 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala @@ -72,7 +72,6 @@ case class ParquetWrite( ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetWriteSupport]) - ParquetSchemaConverter.checkFieldNames(dataSchema) // This metadata is useful for keeping UDTs like Vector/Matrix. ParquetWriteSupport.setSchema(dataSchema, conf) From 5f3430f90c806ff74e0a0f7c0395d8b0e4c05efc Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 14:21:52 +0800 Subject: [PATCH 13/20] Delete field_with_invalid_char.snappy.parquet --- .../field_with_invalid_char.snappy.parquet | Bin 1540 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet diff --git a/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet b/sql/core/src/test/resources/test-data/field_with_invalid_char.snappy.parquet deleted file mode 100644 index ba07f220039a9af22908fb9aa1756400aa857aa4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1540 zcmbu9&ui0Q7{}kFX$ew9Ved-;{IGP9~n7OYDOz*5Wo32!p9Lon4%lHTUHIg&Z?j^hESO) z>%L(&>sFOQIfjf|8bDYdes??`&B zV0HU2!by2NEpJ~27It4#a1@Z0T3Oa_WPJ%)Cn-x3Nqr|s!hwmZL{e&HNzV#E`h_I+ z6Z%zF+0)z%%vD9xdfqk{I|Al|hoi#pI-cL|b+x?3Extgs`TsKfU6;dD?e?93d%b(S z;{;+$1VVd}8<@d^9mlMje&F@(z^s_9w&QO3X8p#%?1~PCqTz#3oZ9XN&bH%$quX~~ z(R3lHC0yUBm}9b89}I_;gaE@b&Gx}GoB1@G1)oZ|IQ!W*heT_{&AQa_N^y5fRbH*} zT4}D?Z@XKifOuvZpVEo)`Tu`kzb%WjD!+gfZcJ1xtqRjpdhUR|8mU4xK+exwbb J=O+CB{{<9B8{z-} From bd5540e754aa44207002c927ac178eaacbef90a0 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 14:25:30 +0800 Subject: [PATCH 14/20] Update SQLQuerySuite.scala --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 4bd5148d73cc2..733b902368327 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4256,8 +4256,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20) :: Nil) assert(df.schema.names.sameElements( Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a"))) - checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`") - , Row(1, 2, 3, 4, 5) :: Row(2, 4, 6, 8, 10) :: Nil) + checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"), + Row(1, 6, 7, 8, 9) :: Row(2, 12, 14, 16, 18) :: Nil) checkAnswer(df.where("`a.b` > 8"), Row(2, 4, 6, 8, 10, 12) :: Nil) } From 310b00cbf8bc19907c427b0b4d76291072dbaa35 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 14:25:47 +0800 Subject: [PATCH 15/20] Update SQLQuerySuite.scala --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 733b902368327..fa752936c477c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4244,7 +4244,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("SPARK-27442: Spark support read parquet file with invalid char in field name") { + test("SPARK-27442: Spark support read/write parquet file with invalid char in field name") { withTempDir { dir => Seq((1, 2, 3, 4, 5, 6, 7, 8, 9, 10), (2, 4, 6, 8, 10, 12, 14, 16, 18, 20)) .toDF("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a") From e6253a2863a13dfaea1550d0a635216fb16d28fd Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 14:31:48 +0800 Subject: [PATCH 16/20] Update SQLQuerySuite.scala --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index fa752936c477c..523a8e242e7e8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4258,8 +4258,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a"))) checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"), Row(1, 6, 7, 8, 9) :: Row(2, 12, 14, 16, 18) :: Nil) - checkAnswer(df.where("`a.b` > 8"), - Row(2, 4, 6, 8, 10, 12) :: Nil) + checkAnswer(df.where("`a.b` > 10"), + Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20) :: Nil) } } } From e3efd3f5cac23b8c9670f36e1faf928a5cab60d3 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 19 Jan 2022 23:54:18 +0800 Subject: [PATCH 17/20] remove related UT --- .../spark/sql/FileBasedDataSourceSuite.scala | 22 ------------ .../sql/hive/HiveParquetSourceSuite.scala | 17 --------- .../sql/hive/execution/HiveDDLSuite.scala | 14 -------- .../sql/hive/execution/SQLQuerySuite.scala | 35 ------------------- .../ParquetHadoopFsRelationSuite.scala | 15 -------- 5 files changed, 103 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 518090877e633..39b08bd560bb1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -991,28 +991,6 @@ class FileBasedDataSourceSuite extends QueryTest checkAnswer(df, Row("v1", "v2")) } } - - test("SPARK-36271: V1 insert should check schema field name too") { - withView("v") { - spark.range(1).createTempView("v") - withTempDir { dir => - val e = intercept[AnalysisException] { - sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite) - .format("parquet").save(dir.getCanonicalPath) - }.getMessage - assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s).")) - } - - withTempDir { dir => - val e = intercept[AnalysisException] { - sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v") - .write.mode(SaveMode.Overwrite) - .format("parquet").save(dir.getCanonicalPath) - }.getMessage - assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s).")) - } - } - } } object TestingUDT { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index 7690e1e9e1465..5778b259c7d5a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -207,23 +207,6 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest with ParquetTest { } } - test("Aggregation attribute names can't contain special chars \" ,;{}()\\n\\t=\"") { - withTempDir { tempDir => - val filePath = new File(tempDir, "testParquet").getCanonicalPath - val filePath2 = new File(tempDir, "testParquet2").getCanonicalPath - - val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") - val df2 = df.as("x").join(df.as("y"), $"x.str" === $"y.str").groupBy("y.str").max("y.int") - intercept[Throwable](df2.write.parquet(filePath)) - - val df3 = df2.toDF("str", "max_int") - df3.write.parquet(filePath2) - val df4 = read.parquet(filePath2) - checkAnswer(df4, Row("1", 1) :: Row("2", 2) :: Row("3", 3) :: Nil) - assert(df4.columns === Array("str", "max_int")) - } - } - test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") { Seq("true", "false").foreach { parquetConversion => withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 014feb33df5ea..2c047d2da4cc6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2926,20 +2926,6 @@ class HiveDDLSuite } } - test("SPARK-33844: Insert overwrite directory should check schema too") { - withView("v") { - spark.range(1).createTempView("v") - withTempPath { path => - val e = intercept[AnalysisException] { - spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " + - s"STORED AS PARQUET SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v") - }.getMessage - assert(e.contains("Column name \"(IF((1 = 1), 1, 0))\" contains invalid character(s). " + - "Please use alias to rename it.")) - } - } - } - test("SPARK-36201: Add check for inner field of parquet/orc schema") { withView("v") { spark.range(1).createTempView("v") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 1829f38fe5775..e690d026053d6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2212,41 +2212,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } } - test("SPARK-21912 Parquet table should not create invalid column names") { - Seq(" ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name => - val source = "PARQUET" - withTable("t21912") { - val m = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912(`col$name` INT) USING $source") - }.getMessage - assert(m.contains(s"contains invalid character(s)")) - - val m1 = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`") - }.getMessage - assert(m1.contains(s"contains invalid character(s)")) - - val m2 = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`") - }.getMessage - assert(m2.contains(s"contains invalid character(s)")) - - withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") { - val m3 = intercept[AnalysisException] { - sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')") - }.getMessage - assert(m3.contains(s"contains invalid character(s)")) - } - - sql(s"CREATE TABLE t21912(`col` INT) USING $source") - val m4 = intercept[AnalysisException] { - sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)") - }.getMessage - assert(m4.contains(s"contains invalid character(s)")) - } - } - } - test("SPARK-32889: ORC table column name supports special characters") { // " " "," is not allowed. Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala index 2e6b86206a631..18e8401ee3d2b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala @@ -107,21 +107,6 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { } } - test("SPARK-8079: Avoid NPE thrown from BaseWriterContainer.abortJob") { - withTempPath { dir => - intercept[AnalysisException] { - // Parquet doesn't allow field names with spaces. Here we are intentionally making an - // exception thrown from the `ParquetRelation2.prepareForWriteJob()` method to trigger - // the bug. Please refer to spark-8079 for more details. - spark.range(1, 10) - .withColumnRenamed("id", "a b") - .write - .format("parquet") - .save(dir.getCanonicalPath) - } - } - } - test("SPARK-8604: Parquet data source should write summary file while doing appending") { withSQLConf( ParquetOutputFormat.JOB_SUMMARY_LEVEL -> "ALL", From 8a1dc9148817511553185d423f944d8ce21d42d5 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 20 Jan 2022 10:37:56 +0800 Subject: [PATCH 18/20] Update HiveDDLSuite.scala --- .../sql/hive/execution/HiveDDLSuite.scala | 38 ------------------- 1 file changed, 38 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 2c047d2da4cc6..41665ce0c540f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2926,44 +2926,6 @@ class HiveDDLSuite } } - test("SPARK-36201: Add check for inner field of parquet/orc schema") { - withView("v") { - spark.range(1).createTempView("v") - withTempPath { path => - val e = intercept[AnalysisException] { - spark.sql( - s""" - |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' - |STORED AS PARQUET - |SELECT - |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1 - |FROM v - """.stripMargin) - }.getMessage - assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains" + - " invalid character(s). Please use alias to rename it.")) - } - } - } - - test("SPARK-36312: ParquetWriteSupport should check inner field") { - withView("v") { - spark.range(1).createTempView("v") - withTempPath { path => - val e = intercept[AnalysisException] { - spark.sql( - """ - |SELECT - |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1 - |FROM v - |""".stripMargin).write.mode(SaveMode.Overwrite).parquet(path.toString) - }.getMessage - assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains" + - " invalid character(s). Please use alias to rename it.")) - } - } - } - test("SPARK-34261: Avoid side effect if create exists temporary function") { withUserDefinedFunction("f1" -> true) { sql("CREATE TEMPORARY FUNCTION f1 AS 'org.apache.hadoop.hive.ql.udf.UDFUUID'") From 88836342bf366f901c36462da9308c364f61552b Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Thu, 20 Jan 2022 17:49:42 +0800 Subject: [PATCH 19/20] Update HiveDDLSuite.scala --- .../spark/sql/hive/execution/HiveDDLSuite.scala | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 41665ce0c540f..ed52086b7834c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2926,6 +2926,20 @@ class HiveDDLSuite } } + test("SPARK-33844: Insert overwrite directory should check schema too") { + withView("v") { + spark.range(1).createTempView("v") + withTempPath { path => + val e = intercept[SparkException] { + spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " + + s"STORED AS PARQUET SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v") + } + assert(e.getCause.getCause.getMessage.contains( + "field ended by ';': expected ';' but got 'IF' at line 2: optional int32 (IF")) + } + } + } + test("SPARK-34261: Avoid side effect if create exists temporary function") { withUserDefinedFunction("f1" -> true) { sql("CREATE TEMPORARY FUNCTION f1 AS 'org.apache.hadoop.hive.ql.udf.UDFUUID'") From 3d4adf9cc585faa08e8edb221a4fff4a33d66fe7 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 21 Jan 2022 10:38:48 +0800 Subject: [PATCH 20/20] Update HiveDDLSuite.scala --- .../sql/hive/execution/HiveDDLSuite.scala | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index ed52086b7834c..85e3d0b53ba7d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2933,13 +2933,33 @@ class HiveDDLSuite val e = intercept[SparkException] { spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " + s"STORED AS PARQUET SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v") - } - assert(e.getCause.getCause.getMessage.contains( + }.getCause.getCause.getMessage + assert(e.contains( "field ended by ';': expected ';' but got 'IF' at line 2: optional int32 (IF")) } } } + test("SPARK-36201: Add check for inner field of parquet/orc schema") { + withView("v") { + spark.range(1).createTempView("v") + withTempPath { path => + val e = intercept[SparkException] { + spark.sql( + s""" + |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' + |STORED AS PARQUET + |SELECT + |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1 + |FROM v + """.stripMargin) + }.getCause.getCause.getMessage + assert(e.contains("expected at the position 19 of " + + "'struct' but '(' is found.")) + } + } + } + test("SPARK-34261: Avoid side effect if create exists temporary function") { withUserDefinedFunction("f1" -> true) { sql("CREATE TEMPORARY FUNCTION f1 AS 'org.apache.hadoop.hive.ql.udf.UDFUUID'")