From e15a3722afe780f06c8f7079dbd734b3be2a8b70 Mon Sep 17 00:00:00 2001 From: softmanu <26399543+softmanu@users.noreply.github.com> Date: Tue, 25 Sep 2018 01:08:35 +0530 Subject: [PATCH] detect date type in csv file This fix is with reference to the below JIRA Issue which I've created just hours before: https://issues.apache.org/jira/browse/SPARK-25517 This is about spark.read.format("csv").option("inferSchema", "true").option("dateFormat", "MM/dd/yyyy").load(/path/to/csvfile). Assume /path/to/csvfile has date type column such as employee joining date, for example:- 02/22/2018 which is 22nd of feb 2018 is a date but the spark always read this joining_date column as string, whereas this works perfectly fine with timestampFormat. --- .../spark/sql/execution/datasources/csv/CSVInferSchema.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index a585cbed2551..af105e4d779f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -149,6 +149,8 @@ private[csv] object CSVInferSchema { // This case infers a custom `dataFormat` is set. if ((allCatch opt options.timestampFormat.parse(field)).isDefined) { TimestampType + } else if ((allCatch opt options.dateFormat.parse(field)).isDefined) { + DateType } else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) { // We keep this for backwards compatibility. TimestampType