Clean up CSV and enable JSON types for timestamp

HyukjinKwon · HyukjinKwon · commit 2f10a99ab97e · 2016-08-26T20:03:43.000+09:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -84,7 +84,6 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
       job: Job,
       options: Map[String, String],
       dataSchema: StructType): OutputWriterFactory = {
-    verifySchema(dataSchema)
     val conf = job.getConfiguration
     val csvOptions = new CSVOptions(options)
     csvOptions.compressionCodec.foreach { codec =>
@@ -184,15 +183,4 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
         .mapPartitions(_.map(pair => new String(pair._2.getBytes, 0, pair._2.getLength, charset)))
     }
   }
-
-  private def verifySchema(schema: StructType): Unit = {
-    schema.foreach { field =>
-      field.dataType match {
-        case _: ArrayType | _: MapType | _: StructType =>
-          throw new UnsupportedOperationException(
-            s"CSV data source does not support ${field.dataType.simpleString} data type.")
-        case _ =>
-      }
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
@@ -227,6 +227,12 @@ private[csv] class CsvOutputWriter(
   }
 
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
+    case ByteType | ShortType | IntegerType | LongType =>
+      (row: InternalRow, ordinal: Int) => row.get(ordinal, dataType).toString
+
+    case FloatType | DoubleType | _: DecimalType | BooleanType | StringType =>
+      (row: InternalRow, ordinal: Int) => row.get(ordinal, dataType).toString
+
     case DateType =>
       (row: InternalRow, ordinal: Int) =>
         params.dateFormat.format(DateTimeUtils.toJavaDate(row.getInt(ordinal)))
@@ -235,11 +241,9 @@ private[csv] class CsvOutputWriter(
       (row: InternalRow, ordinal: Int) =>
         params.timestampFormat.format(DateTimeUtils.toJavaTimestamp(row.getLong(ordinal)))
 
-    case udt: UserDefinedType[_] => makeConverter(udt.sqlType)
-
-    case dt: DataType =>
-      (row: InternalRow, ordinal: Int) =>
-        row.get(ordinal, dt).toString
+    case _ =>
+      throw new UnsupportedOperationException(
+        s"CSV data source does not support ${dataType.simpleString} data type.")
   }
 
   override def write(row: Row): Unit = throw new UnsupportedOperationException("call writeInternal")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -665,21 +665,24 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   }
 
   test("error handling for unsupported data types.") {
-    withTempDir { dir =>
-      val csvDir = new File(dir, "csv").getCanonicalPath
-      var msg = intercept[UnsupportedOperationException] {
-        Seq((1, "Tesla")).toDF("a", "b").selectExpr("struct(a, b)").write.csv(csvDir)
-      }.getMessage
+    withTempPath { path =>
+      val msg = intercept[SparkException] {
+        Seq((1, "Tesla")).toDF("a", "b").selectExpr("struct(a, b)").write.csv(path.getAbsolutePath)
+      }.getCause.getMessage
       assert(msg.contains("CSV data source does not support struct<a:int,b:string> data type"))
+    }
 
-      msg = intercept[UnsupportedOperationException] {
-        Seq((1, Map("Tesla" -> 3))).toDF("id", "cars").write.csv(csvDir)
-      }.getMessage
+    withTempPath { path =>
+      val msg = intercept[SparkException] {
+        Seq((1, Map("Tesla" -> 3))).toDF().write.csv(path.getAbsolutePath)
+      }.getCause.getMessage
       assert(msg.contains("CSV data source does not support map<string,int> data type"))
+    }
 
-      msg = intercept[UnsupportedOperationException] {
-        Seq((1, Array("Tesla", "Chevy", "Ford"))).toDF("id", "brands").write.csv(csvDir)
-      }.getMessage
+    withTempPath { path =>
+      val msg = intercept[SparkException] {
+        Seq((1, Array("Tesla", "Chevy", "Ford"))).toDF().write.csv(path.getAbsolutePath)
+      }.getCause.getMessage
       assert(msg.contains("CSV data source does not support array<string> data type"))
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala
@@ -32,10 +32,6 @@ class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
   override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: NullType => false
     case _: BinaryType => false
-    // `TimestampType` is disabled because `DatatypeConverter.parseDateTime()`
-    // in `DateTimeUtils` parses the formatted string wrongly when the date is
-    // too early. (e.g. "1600-07-13T08:36:32.847").
-    case _: TimestampType => false
     case _: CalendarIntervalType => false
     case _ => true
   }