Skip to content

Commit 2bc5ebc

Browse files
committed
Addresses comments
1 parent f03ef93 commit 2bc5ebc

File tree

4 files changed

+19
-20
lines changed

4 files changed

+19
-20
lines changed

sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,10 +292,9 @@ private[spark] object SQLConf {
292292

293293
val PARQUET_WRITE_LEGACY_FORMAT = booleanConf(
294294
key = "spark.sql.parquet.writeLegacyFormat",
295-
defaultValue = Some(true),
295+
defaultValue = Some(false),
296296
doc = "Whether to follow Parquet's format specification when converting Parquet schema to " +
297-
"Spark SQL schema and vice versa.",
298-
isPublic = false)
297+
"Spark SQL schema and vice versa.")
299298

300299
val PARQUET_OUTPUT_COMMITTER_CLASS = stringConf(
301300
key = "spark.sql.parquet.output.committer.class",

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import org.apache.spark.sql.{AnalysisException, SQLConf}
4747
* [[StructType]]. Note that Spark SQL [[TimestampType]] is similar to Hive timestamp, which
4848
* has optional nanosecond precision, but different from `TIME_MILLS` and `TIMESTAMP_MILLIS`
4949
* described in Parquet format spec. This argument only affects Parquet read path.
50-
* @param writeLegacyParquetFormat Whether to use legacy Parquet format compatible with Spark 1.5
50+
* @param writeLegacyParquetFormat Whether to use legacy Parquet format compatible with Spark 1.4
5151
* and prior versions when converting a Catalyst [[StructType]] to a Parquet [[MessageType]].
5252
* When set to false, use standard format defined in parquet-format spec. This argument only
5353
* affects Parquet write path.
@@ -356,7 +356,7 @@ private[parquet] class CatalystSchemaConverter(
356356
// `TIMESTAMP_MICROS` which are both logical types annotating `INT64`.
357357
//
358358
// Originally, Spark SQL uses the same nanosecond timestamp type as Impala and Hive. Starting
359-
// from Spark 1.5.0, we resort to a timestamp type with 100 ns precision so that we can store
359+
// from Spark 1.4.0, we resort to a timestamp type with 100 ns precision so that we can store
360360
// a timestamp into a `Long`. This design decision is subject to change though, for example,
361361
// we may resort to microsecond precision in the future.
362362
//
@@ -375,7 +375,7 @@ private[parquet] class CatalystSchemaConverter(
375375
// Decimals (legacy mode)
376376
// ======================
377377

378-
// Spark 1.5.x and prior versions only support decimals with a maximum precision of 18 and
378+
// Spark 1.4.x and prior versions only support decimals with a maximum precision of 18 and
379379
// always store decimals in fixed-length byte arrays. To keep compatibility with these older
380380
// versions, here we convert decimals with all precisions to `FIXED_LEN_BYTE_ARRAY` annotated
381381
// by `DECIMAL`.
@@ -426,7 +426,7 @@ private[parquet] class CatalystSchemaConverter(
426426
// ArrayType and MapType (legacy mode)
427427
// ===================================
428428

429-
// Spark 1.5.x and prior versions convert `ArrayType` with nullable elements into a 3-level
429+
// Spark 1.4.x and prior versions convert `ArrayType` with nullable elements into a 3-level
430430
// `LIST` structure. This behavior is somewhat a hybrid of parquet-hive and parquet-avro
431431
// (1.6.0rc3): the 3-level structure is similar to parquet-hive while the 3rd level element
432432
// field name "array" is borrowed from parquet-avro.
@@ -445,7 +445,7 @@ private[parquet] class CatalystSchemaConverter(
445445
.addField(convertField(StructField("array", elementType, nullable)))
446446
.named("bag"))
447447

448-
// Spark 1.5.x and prior versions convert ArrayType with non-nullable elements into a 2-level
448+
// Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level
449449
// LIST structure. This behavior mimics parquet-avro (1.6.0rc3). Note that this case is
450450
// covered by the backwards-compatibility rules implemented in `isElementType()`.
451451
case ArrayType(elementType, nullable @ false) if writeLegacyParquetFormat =>
@@ -458,7 +458,7 @@ private[parquet] class CatalystSchemaConverter(
458458
// "array" is the name chosen by parquet-avro (1.7.0 and prior version)
459459
convertField(StructField("array", elementType, nullable), REPEATED))
460460

461-
// Spark 1.5.x and prior versions convert MapType into a 3-level group annotated by
461+
// Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by
462462
// MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`.
463463
case MapType(keyType, valueType, valueContainsNull) if writeLegacyParquetFormat =>
464464
// <map-repetition> group <name> (MAP) {

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ import org.apache.spark.sql.types._
4242
* messages. This class can write Parquet data in two modes:
4343
*
4444
* - Standard mode: Parquet data are written in standard format defined in parquet-format spec.
45-
* - Legacy mode: Parquet data are written in legacy format compatible with Spark 1.5 and prior.
45+
* - Legacy mode: Parquet data are written in legacy format compatible with Spark 1.4 and prior.
4646
*
4747
* This behavior can be controlled by SQL option `spark.sql.parquet.writeLegacyParquetFormat`. The
4848
* value of the option is propagated to this class by the `init()` method and its Hadoop
@@ -63,7 +63,7 @@ private[parquet] class CatalystWriteSupport extends WriteSupport[InternalRow] wi
6363
// The Parquet `RecordConsumer` to which all `InternalRow`s are written
6464
private var recordConsumer: RecordConsumer = _
6565

66-
// Whether to write data in legacy Parquet format compatible with Spark 1.5 and prior versions
66+
// Whether to write data in legacy Parquet format compatible with Spark 1.4 and prior versions
6767
private var writeLegacyParquetFormat: Boolean = _
6868

6969
// Reusable byte array used to write timestamps as Parquet INT96 values

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
665665
writeLegacyParquetFormat = false)
666666

667667
testCatalystToParquet(
668-
"Backwards-compatibility: LIST with nullable element type - 2 - prior to 1.5.x",
668+
"Backwards-compatibility: LIST with nullable element type - 2 - prior to 1.4.x",
669669
StructType(Seq(
670670
StructField(
671671
"f1",
@@ -703,7 +703,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
703703
writeLegacyParquetFormat = false)
704704

705705
testCatalystToParquet(
706-
"Backwards-compatibility: LIST with non-nullable element type - 2 - prior to 1.5.x",
706+
"Backwards-compatibility: LIST with non-nullable element type - 2 - prior to 1.4.x",
707707
StructType(Seq(
708708
StructField(
709709
"f1",
@@ -764,7 +764,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
764764
writeLegacyParquetFormat = true)
765765

766766
testParquetToCatalyst(
767-
"Backwards-compatibility: MAP with non-nullable value type - 3 - prior to 1.5.x",
767+
"Backwards-compatibility: MAP with non-nullable value type - 3 - prior to 1.4.x",
768768
StructType(Seq(
769769
StructField(
770770
"f1",
@@ -868,7 +868,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
868868
writeLegacyParquetFormat = false)
869869

870870
testCatalystToParquet(
871-
"Backwards-compatibility: MAP with non-nullable value type - 2 - prior to 1.5.x",
871+
"Backwards-compatibility: MAP with non-nullable value type - 2 - prior to 1.4.x",
872872
StructType(Seq(
873873
StructField(
874874
"f1",
@@ -908,7 +908,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
908908
writeLegacyParquetFormat = false)
909909

910910
testCatalystToParquet(
911-
"Backwards-compatibility: MAP with nullable value type - 3 - prior to 1.5.x",
911+
"Backwards-compatibility: MAP with nullable value type - 3 - prior to 1.4.x",
912912
StructType(Seq(
913913
StructField(
914914
"f1",
@@ -987,7 +987,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
987987
writeLegacyParquetFormat = false)
988988

989989
testSchema(
990-
"DECIMAL(1, 0) - prior to 1.5.x",
990+
"DECIMAL(1, 0) - prior to 1.4.x",
991991
StructType(Seq(StructField("f1", DecimalType(1, 0)))),
992992
"""message root {
993993
| optional fixed_len_byte_array(1) f1 (DECIMAL(1, 0));
@@ -998,7 +998,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
998998
writeLegacyParquetFormat = true)
999999

10001000
testSchema(
1001-
"DECIMAL(8, 3) - prior to 1.5.x",
1001+
"DECIMAL(8, 3) - prior to 1.4.x",
10021002
StructType(Seq(StructField("f1", DecimalType(8, 3)))),
10031003
"""message root {
10041004
| optional fixed_len_byte_array(4) f1 (DECIMAL(8, 3));
@@ -1009,7 +1009,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
10091009
writeLegacyParquetFormat = true)
10101010

10111011
testSchema(
1012-
"DECIMAL(9, 3) - prior to 1.5.x",
1012+
"DECIMAL(9, 3) - prior to 1.4.x",
10131013
StructType(Seq(StructField("f1", DecimalType(9, 3)))),
10141014
"""message root {
10151015
| optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
@@ -1020,7 +1020,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
10201020
writeLegacyParquetFormat = true)
10211021

10221022
testSchema(
1023-
"DECIMAL(18, 3) - prior to 1.5.x",
1023+
"DECIMAL(18, 3) - prior to 1.4.x",
10241024
StructType(Seq(StructField("f1", DecimalType(18, 3)))),
10251025
"""message root {
10261026
| optional fixed_len_byte_array(8) f1 (DECIMAL(18, 3));

0 commit comments

Comments
 (0)