Skip to content

Commit 808dfe0

Browse files
committed
[SPARK-21912][SQL] Creating ORC datasource table should check invalid column names
1 parent 9f30d92 commit 808dfe0

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ import org.apache.hadoop.mapreduce._
3434
import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat, FileSplit}
3535

3636
import org.apache.spark.TaskContext
37-
import org.apache.spark.sql.{Row, SparkSession}
37+
import org.apache.spark.sql._
3838
import org.apache.spark.sql.catalyst.InternalRow
3939
import org.apache.spark.sql.catalyst.expressions._
4040
import org.apache.spark.sql.execution.datasources._
4141
import org.apache.spark.sql.hive.{HiveInspectors, HiveShim}
42-
import org.apache.spark.sql.sources.{Filter, _}
42+
import org.apache.spark.sql.sources._
4343
import org.apache.spark.sql.types.StructType
4444
import org.apache.spark.util.SerializableConfiguration
4545

@@ -83,6 +83,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
8383
classOf[MapRedOutputFormat[_, _]])
8484
}
8585

86+
dataSchema.map(_.name).foreach(checkFieldName)
87+
8688
new OutputWriterFactory {
8789
override def newInstance(
8890
path: String,
@@ -169,6 +171,16 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
169171
}
170172
}
171173
}
174+
175+
private def checkFieldName(name: String): Unit = {
176+
// ,;{}()\n\t= and space are special characters in ORC schema
177+
if (name.matches(".*[ ,;{}()\n\t=].*")) {
178+
throw new AnalysisException(
179+
s"""Attribute name "$name" contains invalid character(s) among " ,;{}()\\n\\t=".
180+
|Please use alias to rename it.
181+
""".stripMargin.split("\n").mkString(" ").trim)
182+
}
183+
}
172184
}
173185

174186
private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration)

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,4 +2000,13 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
20002000
assert(setOfPath.size() == pathSizeToDeleteOnExit)
20012001
}
20022002
}
2003+
2004+
test("SPARK-21912 Creating ORC datasource table should check invalid column names") {
2005+
withTable("orc1") {
2006+
val m = intercept[AnalysisException] {
2007+
sql("CREATE TABLE orc1 USING ORC AS SELECT 1 `a b`")
2008+
}.getMessage
2009+
assert(m.contains("""Attribute name "a b" contains invalid character(s)"""))
2010+
}
2011+
}
20032012
}

0 commit comments

Comments
 (0)