Skip to content

Commit 4dec469

Browse files
committed
Consider the columns in Hive table but not in partition when filling values.
1 parent d3db2fd commit 4dec469

File tree

1 file changed

+56
-37
lines changed

1 file changed

+56
-37
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.hive
1919

20+
import scala.collection.JavaConversions._
21+
2022
import org.apache.hadoop.conf.Configuration
2123
import org.apache.hadoop.fs.{Path, PathFilter}
2224
import org.apache.hadoop.hive.conf.HiveConf
@@ -332,47 +334,60 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
332334

333335
logDebug(soi.toString)
334336

337+
val allStructFieldNames = soi.getAllStructFieldRefs().toList
338+
.map(fieldRef => fieldRef.getFieldName())
339+
335340
val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
336-
soi.getStructFieldRef(attr.name) -> ordinal
341+
// If the partition contain this attribute or not
342+
if (allStructFieldNames.contains(attr.name)) {
343+
soi.getStructFieldRef(attr.name) -> ordinal
344+
} else {
345+
(null, ordinal)
346+
}
337347
}.unzip
338348

339349
/**
340350
* Builds specific unwrappers ahead of time according to object inspector
341351
* types to avoid pattern matching and branching costs per row.
342352
*/
343-
val unwrappers: Seq[(Any, MutableRow, Int) => Unit] = fieldRefs.map {
344-
_.getFieldObjectInspector match {
345-
case oi: BooleanObjectInspector =>
346-
(value: Any, row: MutableRow, ordinal: Int) => row.setBoolean(ordinal, oi.get(value))
347-
case oi: ByteObjectInspector =>
348-
(value: Any, row: MutableRow, ordinal: Int) => row.setByte(ordinal, oi.get(value))
349-
case oi: ShortObjectInspector =>
350-
(value: Any, row: MutableRow, ordinal: Int) => row.setShort(ordinal, oi.get(value))
351-
case oi: IntObjectInspector =>
352-
(value: Any, row: MutableRow, ordinal: Int) => row.setInt(ordinal, oi.get(value))
353-
case oi: LongObjectInspector =>
354-
(value: Any, row: MutableRow, ordinal: Int) => row.setLong(ordinal, oi.get(value))
355-
case oi: FloatObjectInspector =>
356-
(value: Any, row: MutableRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value))
357-
case oi: DoubleObjectInspector =>
358-
(value: Any, row: MutableRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value))
359-
case oi: HiveVarcharObjectInspector =>
360-
(value: Any, row: MutableRow, ordinal: Int) =>
361-
row.setString(ordinal, oi.getPrimitiveJavaObject(value).getValue)
362-
case oi: HiveDecimalObjectInspector =>
363-
(value: Any, row: MutableRow, ordinal: Int) =>
364-
row.update(ordinal, HiveShim.toCatalystDecimal(oi, value))
365-
case oi: TimestampObjectInspector =>
366-
(value: Any, row: MutableRow, ordinal: Int) =>
367-
row.update(ordinal, oi.getPrimitiveJavaObject(value).clone())
368-
case oi: DateObjectInspector =>
369-
(value: Any, row: MutableRow, ordinal: Int) =>
370-
row.update(ordinal, DateUtils.fromJavaDate(oi.getPrimitiveJavaObject(value)))
371-
case oi: BinaryObjectInspector =>
372-
(value: Any, row: MutableRow, ordinal: Int) =>
373-
row.update(ordinal, oi.getPrimitiveJavaObject(value))
374-
case oi =>
375-
(value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrap(value, oi)
353+
val unwrappers: Seq[(Any, MutableRow, Int) => Unit] = fieldRefs.map { ref =>
354+
if (ref == null) {
355+
// Placeholder, never used
356+
(value: Any, row: MutableRow, ordinal: Int) => row.setNullAt(ordinal)
357+
} else {
358+
ref.getFieldObjectInspector match {
359+
case oi: BooleanObjectInspector =>
360+
(value: Any, row: MutableRow, ordinal: Int) => row.setBoolean(ordinal, oi.get(value))
361+
case oi: ByteObjectInspector =>
362+
(value: Any, row: MutableRow, ordinal: Int) => row.setByte(ordinal, oi.get(value))
363+
case oi: ShortObjectInspector =>
364+
(value: Any, row: MutableRow, ordinal: Int) => row.setShort(ordinal, oi.get(value))
365+
case oi: IntObjectInspector =>
366+
(value: Any, row: MutableRow, ordinal: Int) => row.setInt(ordinal, oi.get(value))
367+
case oi: LongObjectInspector =>
368+
(value: Any, row: MutableRow, ordinal: Int) => row.setLong(ordinal, oi.get(value))
369+
case oi: FloatObjectInspector =>
370+
(value: Any, row: MutableRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value))
371+
case oi: DoubleObjectInspector =>
372+
(value: Any, row: MutableRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value))
373+
case oi: HiveVarcharObjectInspector =>
374+
(value: Any, row: MutableRow, ordinal: Int) =>
375+
row.setString(ordinal, oi.getPrimitiveJavaObject(value).getValue)
376+
case oi: HiveDecimalObjectInspector =>
377+
(value: Any, row: MutableRow, ordinal: Int) =>
378+
row.update(ordinal, HiveShim.toCatalystDecimal(oi, value))
379+
case oi: TimestampObjectInspector =>
380+
(value: Any, row: MutableRow, ordinal: Int) =>
381+
row.update(ordinal, oi.getPrimitiveJavaObject(value).clone())
382+
case oi: DateObjectInspector =>
383+
(value: Any, row: MutableRow, ordinal: Int) =>
384+
row.update(ordinal, DateUtils.fromJavaDate(oi.getPrimitiveJavaObject(value)))
385+
case oi: BinaryObjectInspector =>
386+
(value: Any, row: MutableRow, ordinal: Int) =>
387+
row.update(ordinal, oi.getPrimitiveJavaObject(value))
388+
case oi =>
389+
(value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrap(value, oi)
390+
}
376391
}
377392
}
378393

@@ -383,11 +398,15 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
383398
val raw = converter.convert(rawDeser.deserialize(value))
384399
var i = 0
385400
while (i < fieldRefs.length) {
386-
val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
387-
if (fieldValue == null) {
401+
if (fieldRefs(i) == null) {
388402
mutableRow.setNullAt(fieldOrdinals(i))
389403
} else {
390-
unwrappers(i)(fieldValue, mutableRow, fieldOrdinals(i))
404+
val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
405+
if (fieldValue == null) {
406+
mutableRow.setNullAt(fieldOrdinals(i))
407+
} else {
408+
unwrappers(i)(fieldValue, mutableRow, fieldOrdinals(i))
409+
}
391410
}
392411
i += 1
393412
}

0 commit comments

Comments
 (0)