From ecd25477abd6735514ab48549a4a937bf6d00f42 Mon Sep 17 00:00:00 2001 From: linweizhong Date: Fri, 3 Jul 2015 15:55:00 +0800 Subject: [PATCH 1/7] Change schema for array type from element to array_element --- .../apache/spark/sql/parquet/CatalystSchemaConverter.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala index 2be7c64612cd..fa1ec8dd381f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala @@ -446,7 +446,7 @@ private[parquet] class CatalystSchemaConverter( field.name, Types .buildGroup(REPEATED) - .addField(convertField(StructField("element", elementType, nullable))) + .addField(convertField(StructField("array_element", elementType, nullable))) .named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME)) // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level @@ -459,7 +459,7 @@ private[parquet] class CatalystSchemaConverter( ConversionPatterns.listType( repetition, field.name, - convertField(StructField("element", elementType, nullable), REPEATED)) + convertField(StructField("array_element", elementType, nullable), REPEATED)) // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by // MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`. @@ -490,7 +490,7 @@ private[parquet] class CatalystSchemaConverter( .buildGroup(repetition).as(LIST) .addField( Types.repeatedGroup() - .addField(convertField(StructField("element", elementType, containsNull))) + .addField(convertField(StructField("array_element", elementType, containsNull))) .named("list")) .named(field.name) From 3d38a75553a050217af1f7ff8f5b6de7faf3040b Mon Sep 17 00:00:00 2001 From: linweizhong Date: Fri, 3 Jul 2015 17:39:54 +0800 Subject: [PATCH 2/7] Update TestSuite --- .../sql/parquet/ParquetSchemaSuite.scala | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index 35d3c33f99a0..e47cfac5a6a7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { """ |message root { | optional group _1 (LIST) { - | repeated int32 element; + | repeated int32 array_element; | } |} """.stripMargin) @@ -185,7 +185,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group list { - | required int32 element; + | required int32 array_element; | } | } |} @@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group bag { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -210,7 +210,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group list { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { | optional binary _1 (UTF8); | optional group _2 (LIST) { | repeated group bag { - | optional group element { + | optional group array_element { | required int32 _1; | required double _2; | } @@ -290,7 +290,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { | optional binary _1 (UTF8); | optional group _2 (LIST) { | repeated group list { - | optional group element { + | optional group array_element { | required int32 _1; | required double _2; | } @@ -467,7 +467,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -482,7 +482,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated group element { + | repeated group array_element { | optional int32 num; | } | } @@ -496,7 +496,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | required int32 element; + | required int32 array_element; | } | } |} @@ -508,7 +508,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { StructField("f1", ArrayType(IntegerType, containsNull = false), nullable = true))), """message root { | optional group f1 (LIST) { - | repeated group element { + | repeated group array_element { | required int32 num; | } | } @@ -521,7 +521,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { StructField("f1", ArrayType(IntegerType, containsNull = false), nullable = true))), """message root { | optional group f1 (LIST) { - | repeated int32 element; + | repeated int32 array_element; | } |} """.stripMargin) @@ -539,7 +539,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated group element { + | repeated group array_element { | required binary str (UTF8); | required int32 num; | } @@ -599,7 +599,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group bag { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -632,7 +632,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | required int32 element; + | required int32 array_element; | } | } |} @@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated int32 element; + | repeated int32 array_element; | } |} """.stripMargin) From e8877066af94aed3e1412c8a8c6113a89bd182ca Mon Sep 17 00:00:00 2001 From: linweizhong Date: Tue, 7 Jul 2015 11:45:50 +0800 Subject: [PATCH 3/7] Update, array_element to array --- .../sql/parquet/CatalystSchemaConverter.scala | 6 ++-- .../sql/parquet/ParquetSchemaSuite.scala | 32 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala index fa1ec8dd381f..6aa46f7dc63b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala @@ -446,7 +446,7 @@ private[parquet] class CatalystSchemaConverter( field.name, Types .buildGroup(REPEATED) - .addField(convertField(StructField("array_element", elementType, nullable))) + .addField(convertField(StructField("array", elementType, nullable))) .named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME)) // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level @@ -459,7 +459,7 @@ private[parquet] class CatalystSchemaConverter( ConversionPatterns.listType( repetition, field.name, - convertField(StructField("array_element", elementType, nullable), REPEATED)) + convertField(StructField("array", elementType, nullable), REPEATED)) // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by // MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`. @@ -490,7 +490,7 @@ private[parquet] class CatalystSchemaConverter( .buildGroup(repetition).as(LIST) .addField( Types.repeatedGroup() - .addField(convertField(StructField("array_element", elementType, containsNull))) + .addField(convertField(StructField("array", elementType, containsNull))) .named("list")) .named(field.name) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index e47cfac5a6a7..7c1994d86bc6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { """ |message root { | optional group _1 (LIST) { - | repeated int32 array_element; + | repeated int32 array; | } |} """.stripMargin) @@ -185,7 +185,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group list { - | required int32 array_element; + | required int32 array; | } | } |} @@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group bag { - | optional int32 array_element; + | optional int32 array; | } | } |} @@ -210,7 +210,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group list { - | optional int32 array_element; + | optional int32 array; | } | } |} @@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { | optional binary _1 (UTF8); | optional group _2 (LIST) { | repeated group bag { - | optional group array_element { + | optional group array { | required int32 _1; | required double _2; | } @@ -290,7 +290,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { | optional binary _1 (UTF8); | optional group _2 (LIST) { | repeated group list { - | optional group array_element { + | optional group array { | required int32 _1; | required double _2; | } @@ -467,7 +467,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | optional int32 array_element; + | optional int32 array; | } | } |} @@ -482,7 +482,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated group array_element { + | repeated group array { | optional int32 num; | } | } @@ -496,7 +496,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | required int32 array_element; + | required int32 array; | } | } |} @@ -508,7 +508,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { StructField("f1", ArrayType(IntegerType, containsNull = false), nullable = true))), """message root { | optional group f1 (LIST) { - | repeated group array_element { + | repeated group array { | required int32 num; | } | } @@ -521,7 +521,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { StructField("f1", ArrayType(IntegerType, containsNull = false), nullable = true))), """message root { | optional group f1 (LIST) { - | repeated int32 array_element; + | repeated int32 array; | } |} """.stripMargin) @@ -539,7 +539,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated group array_element { + | repeated group array { | required binary str (UTF8); | required int32 num; | } @@ -599,7 +599,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | optional int32 array_element; + | optional int32 array; | } | } |} @@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group bag { - | optional int32 array_element; + | optional int32 array; | } | } |} @@ -632,7 +632,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | required int32 array_element; + | required int32 array; | } | } |} @@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated int32 array_element; + | repeated int32 array; | } |} """.stripMargin) From 49ca11242c6bc91e8e06cc0f848347d26b9d75b0 Mon Sep 17 00:00:00 2001 From: linweizhong Date: Tue, 7 Jul 2015 14:41:20 +0800 Subject: [PATCH 4/7] Update unit test --- .../org/apache/spark/sql/parquet/ParquetSchemaSuite.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index 7c1994d86bc6..81133a6b67b4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -478,7 +478,8 @@ class ParquetSchemaSuite extends ParquetSchemaTest { StructType(Seq( StructField( "f1", - ArrayType(IntegerType, containsNull = true), + ArrayType( + StructType(Seq(StructField("num", IntegerType, nullable = false))), containsNull = true), nullable = true))), """message root { | optional group f1 (LIST) { @@ -505,7 +506,9 @@ class ParquetSchemaSuite extends ParquetSchemaTest { testParquetToCatalyst( "Backwards-compatibility: LIST with non-nullable element type - 2", StructType(Seq( - StructField("f1", ArrayType(IntegerType, containsNull = false), nullable = true))), + StructField("f1", + ArrayType(StructType(Seq(StructField("num", IntegerType))), containsNull = false), + nullable = true))), """message root { | optional group f1 (LIST) { | repeated group array { From d9311417d948e3341d17b393db353a38f2196e0b Mon Sep 17 00:00:00 2001 From: linweizhong Date: Tue, 7 Jul 2015 14:44:06 +0800 Subject: [PATCH 5/7] Update unit test --- .../org/apache/spark/sql/parquet/ParquetSchemaSuite.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index 81133a6b67b4..83931c96c65d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -479,7 +479,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { StructField( "f1", ArrayType( - StructType(Seq(StructField("num", IntegerType, nullable = false))), containsNull = true), + StructType(Seq(StructField("num", IntegerType))), containsNull = true), nullable = true))), """message root { | optional group f1 (LIST) { @@ -507,7 +507,8 @@ class ParquetSchemaSuite extends ParquetSchemaTest { "Backwards-compatibility: LIST with non-nullable element type - 2", StructType(Seq( StructField("f1", - ArrayType(StructType(Seq(StructField("num", IntegerType))), containsNull = false), + ArrayType(StructType(Seq(StructField("num", IntegerType, nullable = false))), + containsNull = false), nullable = true))), """message root { | optional group f1 (LIST) { From 00698956a6a12fff2fd2576fada559767d6a7784 Mon Sep 17 00:00:00 2001 From: linweizhong Date: Tue, 7 Jul 2015 14:51:40 +0800 Subject: [PATCH 6/7] Update --- .../apache/spark/sql/parquet/CatalystSchemaConverter.scala | 2 +- .../org/apache/spark/sql/parquet/ParquetSchemaSuite.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala index 6aa46f7dc63b..94cce9c596fc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala @@ -490,7 +490,7 @@ private[parquet] class CatalystSchemaConverter( .buildGroup(repetition).as(LIST) .addField( Types.repeatedGroup() - .addField(convertField(StructField("array", elementType, containsNull))) + .addField(convertField(StructField("element", elementType, containsNull))) .named("list")) .named(field.name) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index 83931c96c65d..e84a2e828bd0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -603,7 +603,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | optional int32 array; + | optional int32 element; | } | } |} @@ -636,7 +636,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group list { - | required int32 array; + | required int32 element; | } | } |} From 2480abda8f2bde67107c7ef5f25da940ad117124 Mon Sep 17 00:00:00 2001 From: linweizhong Date: Tue, 7 Jul 2015 16:23:40 +0800 Subject: [PATCH 7/7] Update unit test --- .../org/apache/spark/sql/parquet/ParquetSchemaSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index e84a2e828bd0..54be9685c868 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -185,7 +185,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group list { - | required int32 array; + | required int32 element; | } | } |} @@ -210,7 +210,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group list { - | optional int32 array; + | optional int32 element; | } | } |} @@ -290,7 +290,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { | optional binary _1 (UTF8); | optional group _2 (LIST) { | repeated group list { - | optional group array { + | optional group element { | required int32 _1; | required double _2; | } @@ -479,7 +479,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { StructField( "f1", ArrayType( - StructType(Seq(StructField("num", IntegerType))), containsNull = true), + StructType(Seq(StructField("num", IntegerType))), containsNull = false), nullable = true))), """message root { | optional group f1 (LIST) {