Skip to content

Commit fa915fd

Browse files
committed
Inferring date type
1 parent a8d27d6 commit fa915fd

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.catalyst.csv
1919

20+
import java.text.ParsePosition
21+
2022
import scala.util.control.Exception.allCatch
2123

2224
import org.apache.spark.rdd.RDD
@@ -98,6 +100,7 @@ class CSVInferSchema(options: CSVOptions) extends Serializable {
98100
compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType)
99101
case DoubleType => tryParseDouble(field)
100102
case TimestampType => tryParseTimestamp(field)
103+
case DateType => tryParseDate(field)
101104
case BooleanType => tryParseBoolean(field)
102105
case StringType => StringType
103106
case other: DataType =>
@@ -159,6 +162,21 @@ class CSVInferSchema(options: CSVOptions) extends Serializable {
159162
} else if ((allCatch opt DateTimeUtils.stringToTime(field)).isDefined) {
160163
// We keep this for backwards compatibility.
161164
TimestampType
165+
} else {
166+
tryParseDate(field)
167+
}
168+
}
169+
170+
private def tryParseDate(field: String): DataType = {
171+
val dateTry = allCatch opt {
172+
val pos = new ParsePosition(0)
173+
options.dateFormat.parse(field, pos)
174+
if (pos.getErrorIndex != -1 || pos.getIndex != field.length) {
175+
throw new IllegalArgumentException(s"${field} cannot be parsed as ${DateType.simpleString}")
176+
}
177+
}
178+
if (dateTry.isDefined) {
179+
DateType
162180
} else {
163181
tryParseBoolean(field)
164182
}

0 commit comments

Comments
 (0)