@@ -63,18 +63,17 @@ case class ParquetTableScan(
6363 // The resolution of Parquet attributes is case sensitive, so we resolve the original attributes
6464 // by exprId. note: output cannot be transient, see
6565 // https://issues.apache.org/jira/browse/SPARK-1367
66- val normalOutput =
67- attributes
68- .filterNot(a => relation.partitioningAttributes.map(_.exprId).contains(a.exprId))
69- .flatMap(a => relation.output.find(o => o.exprId == a.exprId))
66+ val output = attributes.map(relation.attributeMap)
7067
71- val partOutput =
72- attributes.flatMap(a => relation.partitioningAttributes.find(o => o.exprId == a.exprId))
68+ // A mapping of ordinals partitionRow -> finalOutput.
69+ val requestedPartitionOrdinals = {
70+ val partitionAttributeOrdinals = AttributeMap (relation.partitioningAttributes.zipWithIndex)
7371
74- def output = partOutput ++ normalOutput
75-
76- assert(normalOutput.size + partOutput.size == attributes.size,
77- s " $normalOutput + $partOutput != $attributes, ${relation.output}" )
72+ attributes.zipWithIndex.flatMap {
73+ case (attribute, finalOrdinal) =>
74+ partitionAttributeOrdinals.get(attribute).map(_ -> finalOrdinal)
75+ }
76+ }.toArray
7877
7978 override def execute (): RDD [Row ] = {
8079 import parquet .filter2 .compat .FilterCompat .FilterPredicateCompat
@@ -96,7 +95,7 @@ case class ParquetTableScan(
9695 // Store both requested and original schema in `Configuration`
9796 conf.set(
9897 RowReadSupport .SPARK_ROW_REQUESTED_SCHEMA ,
99- ParquetTypesConverter .convertToString(normalOutput ))
98+ ParquetTypesConverter .convertToString(output ))
10099 conf.set(
101100 RowWriteSupport .SPARK_ROW_SCHEMA ,
102101 ParquetTypesConverter .convertToString(relation.output))
@@ -124,7 +123,7 @@ case class ParquetTableScan(
124123 classOf [Row ],
125124 conf)
126125
127- if (partOutput .nonEmpty) {
126+ if (requestedPartitionOrdinals .nonEmpty) {
128127 baseRDD.mapPartitionsWithInputSplit { case (split, iter) =>
129128 val partValue = " ([^=]+)=([^=]+)" .r
130129 val partValues =
@@ -137,15 +136,25 @@ case class ParquetTableScan(
137136 case _ => None
138137 }.toMap
139138
139+ // Convert the partitioning attributes into the correct types
140140 val partitionRowValues =
141- partOutput.map(a => Cast (Literal (partValues(a.name)), a.dataType).eval(EmptyRow ))
141+ relation.partitioningAttributes
142+ .map(a => Cast (Literal (partValues(a.name)), a.dataType).eval(EmptyRow ))
142143
143144 new Iterator [Row ] {
144- private [this ] val joinedRow = new JoinedRow5 (Row (partitionRowValues:_* ), null )
145-
146145 def hasNext = iter.hasNext
147-
148- def next () = joinedRow.withRight(iter.next()._2)
146+ def next () = {
147+ val row = iter.next()._2.asInstanceOf [SpecificMutableRow ]
148+
149+ // Parquet will leave partitioning columns empty, so we fill them in here.
150+ var i = 0
151+ while (i < requestedPartitionOrdinals.size) {
152+ row(requestedPartitionOrdinals(i)._2) =
153+ partitionRowValues(requestedPartitionOrdinals(i)._1)
154+ i += 1
155+ }
156+ row
157+ }
149158 }
150159 }
151160 } else {
0 commit comments