@@ -34,12 +34,12 @@ import org.apache.hadoop.mapreduce._
3434import org .apache .hadoop .mapreduce .lib .input .{FileInputFormat , FileSplit }
3535
3636import org .apache .spark .TaskContext
37- import org .apache .spark .sql .{ Row , SparkSession }
37+ import org .apache .spark .sql ._
3838import org .apache .spark .sql .catalyst .InternalRow
3939import org .apache .spark .sql .catalyst .expressions ._
4040import org .apache .spark .sql .execution .datasources ._
4141import org .apache .spark .sql .hive .{HiveInspectors , HiveShim }
42- import org .apache .spark .sql .sources .{ Filter , _ }
42+ import org .apache .spark .sql .sources ._
4343import org .apache .spark .sql .types .StructType
4444import org .apache .spark .util .SerializableConfiguration
4545
@@ -83,6 +83,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
8383 classOf [MapRedOutputFormat [_, _]])
8484 }
8585
86+ dataSchema.map(_.name).foreach(checkFieldName)
87+
8688 new OutputWriterFactory {
8789 override def newInstance (
8890 path : String ,
@@ -169,6 +171,16 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
169171 }
170172 }
171173 }
174+
175+ private def checkFieldName (name : String ): Unit = {
176+ // ,;{}()\n\t= and space are special characters in ORC schema
177+ if (name.matches(" .*[ ,;{}()\n\t =].*" )) {
178+ throw new AnalysisException (
179+ s """ Attribute name " $name" contains invalid character(s) among " ,;{}() \\ n \\ t=".
180+ |Please use alias to rename it.
181+ """ .stripMargin.split(" \n " ).mkString(" " ).trim)
182+ }
183+ }
172184}
173185
174186private [orc] class OrcSerializer (dataSchema : StructType , conf : Configuration )
0 commit comments