Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import org.apache.spark.util.{SerializableConfiguration, Utils}
private[sql] object OrcFileFormat {
private def checkFieldName(name: String): Unit = {
try {
TypeDescription.fromString(s"struct<$name:int>")
TypeDescription.fromString(s"struct<`$name`:int>")
} catch {
case _: IllegalArgumentException =>
throw new AnalysisException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,20 @@ class FileBasedDataSourceSuite extends QueryTest
}
}

Seq("json", "orc").foreach { format =>
test(s"SPARK-32889: column name supports special characters using $format") {
Seq("$", " ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
withTempDir { dir =>
val dataDir = new File(dir, "file").getCanonicalPath
Seq(1).toDF(name).write.format(format).save(dataDir)
val schema = spark.read.format(format).load(dataDir).schema
assert(schema.size == 1)
assertResult(name)(schema.head.name)
}
}
}
}

// Text file format only supports string type
test("SPARK-24691 error handling for unsupported types - text") {
withTempDir { dir =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2206,39 +2206,63 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
}
}

test("SPARK-21912 ORC/Parquet table should not create invalid column names") {
test("SPARK-21912 Parquet table should not create invalid column names") {
Seq(" ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
Seq("ORC", "PARQUET").foreach { source =>
withTable("t21912") {
val m = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912(`col$name` INT) USING $source")
}.getMessage
assert(m.contains(s"contains invalid character(s)"))
val source = "PARQUET"
withTable("t21912") {
val m = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912(`col$name` INT) USING $source")
}.getMessage
assert(m.contains(s"contains invalid character(s)"))

val m1 = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`")
}.getMessage
assert(m1.contains(s"contains invalid character(s)"))
val m1 = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`")
}.getMessage
assert(m1.contains(s"contains invalid character(s)"))

val m2 = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`")
}.getMessage
assert(m2.contains(s"contains invalid character(s)"))

val m2 = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`")
withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") {
val m3 = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')")
}.getMessage
assert(m2.contains(s"contains invalid character(s)"))
assert(m3.contains(s"contains invalid character(s)"))
}

withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") {
val m3 = intercept[AnalysisException] {
sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')")
}.getMessage
assert(m3.contains(s"contains invalid character(s)"))
}
sql(s"CREATE TABLE t21912(`col` INT) USING $source")
val m4 = intercept[AnalysisException] {
sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)")
}.getMessage
assert(m4.contains(s"contains invalid character(s)"))
}
}
}

sql(s"CREATE TABLE t21912(`col` INT) USING $source")
val m4 = intercept[AnalysisException] {
sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)")
}.getMessage
assert(m4.contains(s"contains invalid character(s)"))
test("SPARK-32889: ORC table column name supports special characters") {
// " " "," is not allowed.
Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
val source = "ORC"
Seq(s"CREATE TABLE t32889(`$name` INT) USING $source",
s"CREATE TABLE t32889 STORED AS $source AS SELECT 1 `$name`",
s"CREATE TABLE t32889 USING $source AS SELECT 1 `$name`",
s"CREATE TABLE t32889(`$name` INT) USING hive OPTIONS (fileFormat '$source')")
.foreach { command =>
withTable("t32889") {
sql(command)
assertResult(name)(
sessionState.catalog.getTableMetadata(TableIdentifier("t32889")).schema.fields(0).name)
}
}

withTable("t32889") {
sql(s"CREATE TABLE t32889(`col` INT) USING $source")
sql(s"ALTER TABLE t32889 ADD COLUMNS(`$name` INT)")
assertResult(name)(
sessionState.catalog.getTableMetadata(TableIdentifier("t32889")).schema.fields(1).name)
}
}
}

Expand Down