Skip to content

Commit 97e9080

Browse files
committed
parquet support for date type
1 parent b9fe504 commit 97e9080

File tree

4 files changed

+14
-2
lines changed

4 files changed

+14
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.parquet
1919

20-
import java.sql.Timestamp
20+
import java.sql.{Date, Timestamp}
2121
import java.util.{TimeZone, Calendar}
2222

2323
import scala.collection.mutable.{Buffer, ArrayBuffer, HashMap}
@@ -192,6 +192,9 @@ private[parquet] abstract class CatalystConverter extends GroupConverter {
192192
protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit =
193193
updateField(fieldIndex, value)
194194

195+
protected[parquet] def updateDate(fieldIndex: Int, value: Int): Unit =
196+
updateField(fieldIndex, new Date(value))
197+
195198
protected[parquet] def updateLong(fieldIndex: Int, value: Long): Unit =
196199
updateField(fieldIndex, value)
197200

@@ -388,6 +391,9 @@ private[parquet] class CatalystPrimitiveRowConverter(
388391
override protected[parquet] def updateInt(fieldIndex: Int, value: Int): Unit =
389392
current.setInt(fieldIndex, value)
390393

394+
override protected[parquet] def updateDate(fieldIndex: Int, value: Int): Unit =
395+
current.update(fieldIndex, new Date(value))
396+
391397
override protected[parquet] def updateLong(fieldIndex: Int, value: Long): Unit =
392398
current.setLong(fieldIndex, value)
393399

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
212212
case DoubleType => writer.addDouble(value.asInstanceOf[Double])
213213
case FloatType => writer.addFloat(value.asInstanceOf[Float])
214214
case BooleanType => writer.addBoolean(value.asInstanceOf[Boolean])
215+
case DateType => writer.addInteger(value.asInstanceOf[java.sql.Date].getTime.toInt)
215216
case d: DecimalType =>
216217
if (d.precisionInfo == None || d.precisionInfo.get.precision > 18) {
217218
sys.error(s"Unsupported datatype $d, cannot write to consumer")

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ private[parquet] object ParquetTypesConverter extends Logging {
6464
case ParquetPrimitiveTypeName.BOOLEAN => BooleanType
6565
case ParquetPrimitiveTypeName.DOUBLE => DoubleType
6666
case ParquetPrimitiveTypeName.FLOAT => FloatType
67+
case ParquetPrimitiveTypeName.INT32
68+
if originalType == ParquetOriginalType.DATE => DateType
6769
case ParquetPrimitiveTypeName.INT32 => IntegerType
6870
case ParquetPrimitiveTypeName.INT64 => LongType
6971
case ParquetPrimitiveTypeName.INT96 if int96AsTimestamp => TimestampType
@@ -222,6 +224,8 @@ private[parquet] object ParquetTypesConverter extends Logging {
222224
// There is no type for Byte or Short so we promote them to INT32.
223225
case ShortType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
224226
case ByteType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT32))
227+
case DateType => Some(ParquetTypeInfo(
228+
ParquetPrimitiveTypeName.INT32, Some(ParquetOriginalType.DATE)))
225229
case LongType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT64))
226230
case TimestampType => Some(ParquetTypeInfo(ParquetPrimitiveTypeName.INT96))
227231
case DecimalType.Fixed(precision, scale) if precision <= 18 =>

sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,15 @@ class ParquetSchemaSuite extends FunSuite with ParquetTest {
5757
|}
5858
""".stripMargin)
5959

60-
testSchema[(Byte, Short, Int, Long)](
60+
testSchema[(Byte, Short, Int, Long, java.sql.Date)](
6161
"logical integral types",
6262
"""
6363
|message root {
6464
| required int32 _1 (INT_8);
6565
| required int32 _2 (INT_16);
6666
| required int32 _3 (INT_32);
6767
| required int64 _4 (INT_64);
68+
| optional int32 _5 (DATE);
6869
|}
6970
""".stripMargin)
7071

0 commit comments

Comments
 (0)