From aae648a5a1f1fdb0a78b316d7a497ee51bbac73f Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 27 Sep 2016 11:07:54 -0700 Subject: [PATCH 1/2] [SPARK-17477][SQL] SparkSQL cannot handle schema evolution from Int -> Long when parquet files have Int as its type while hive metastore has Long as its type --- .../datasources/parquet/ParquetRowConverter.scala | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index 9ffc2b5dd8a5..1263ba8aba95 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -214,9 +214,22 @@ private[parquet] class ParquetRowConverter( updater: ParentContainerUpdater): Converter with HasParentContainerUpdater = { catalystType match { - case BooleanType | IntegerType | LongType | FloatType | DoubleType | BinaryType => + case BooleanType | IntegerType | FloatType | DoubleType | BinaryType => new ParquetPrimitiveConverter(updater) + /** + * When reading a hive table of parquet files with schema evolution from + * Int to Long and if hive metastore has Long as its type while parquet files + * have Int, SparkSQL need to figure out the actual type in the parquet + * files. Otherwise, it will result in java.lang.ClassCastException: + * [[MutableLong]] cannot be cast to [[MutableInt]]. + */ + case LongType => + new ParquetPrimitiveConverter(updater) { + override def addInt(value: Int): Unit = + updater.setLong(value.asInstanceOf[Long]) + } + case ByteType => new ParquetPrimitiveConverter(updater) { override def addInt(value: Int): Unit = From 1f6ea811cec5b8f4d9ea4df267e8d60b11477044 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 27 Sep 2016 20:55:14 -0700 Subject: [PATCH 2/2] modified comment to pass scalastyle test --- .../datasources/parquet/ParquetRowConverter.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index 1263ba8aba95..f53a226df999 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -218,12 +218,12 @@ private[parquet] class ParquetRowConverter( new ParquetPrimitiveConverter(updater) /** - * When reading a hive table of parquet files with schema evolution from - * Int to Long and if hive metastore has Long as its type while parquet files - * have Int, SparkSQL need to figure out the actual type in the parquet - * files. Otherwise, it will result in java.lang.ClassCastException: - * [[MutableLong]] cannot be cast to [[MutableInt]]. - */ + * [SPARK-17477] When reading a hive table of parquet files with schema evolution + * from Int to Long and if hive metastore has Long as its type while parquet files + * have Int, SparkSQL need to figure out the actual type in the parquet files. + * Otherwise, it will result in java.lang.ClassCastException: [[MutableLong]] cannot + * be cast to [[MutableInt]]. + */ case LongType => new ParquetPrimitiveConverter(updater) { override def addInt(value: Int): Unit =