From e15a3722afe780f06c8f7079dbd734b3be2a8b70 Mon Sep 17 00:00:00 2001
From: softmanu <26399543+softmanu@users.noreply.github.com>
Date: Tue, 25 Sep 2018 01:08:35 +0530
Subject: [PATCH] detect date type in csv file

This fix is with reference to the below JIRA Issue which I've created just hours before:

https://issues.apache.org/jira/browse/SPARK-25517

This is about spark.read.format("csv").option("inferSchema", "true").option("dateFormat", "MM/dd/yyyy").load(/path/to/csvfile). Assume /path/to/csvfile has date type column such as employee joining date, for example:- 02/22/2018 which is 22nd of feb 2018 is a date but the spark always read this joining_date column as string, whereas this works perfectly fine with timestampFormat.
---
 .../spark/sql/execution/datasources/csv/CSVInferSchema.scala    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index a585cbed2551..af105e4d779f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -149,6 +149,8 @@ private[csv] object CSVInferSchema {
     // This case infers a custom `dataFormat` is set.
     if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
       TimestampType
+    } else if ((allCatch opt options.dateFormat.parse(field)).isDefined) {
+      DateType
     } else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) {
       // We keep this for backwards compatibility.
       TimestampType