1717
1818package org .apache .spark .sql .hive
1919
20+ import scala .collection .JavaConversions ._
21+
2022import org .apache .hadoop .conf .Configuration
2123import org .apache .hadoop .fs .{Path , PathFilter }
2224import org .apache .hadoop .hive .conf .HiveConf
@@ -332,47 +334,60 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
332334
333335 logDebug(soi.toString)
334336
337+ val allStructFieldNames = soi.getAllStructFieldRefs().toList
338+ .map(fieldRef => fieldRef.getFieldName())
339+
335340 val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
336- soi.getStructFieldRef(attr.name) -> ordinal
341+ // If the partition contain this attribute or not
342+ if (allStructFieldNames.contains(attr.name)) {
343+ soi.getStructFieldRef(attr.name) -> ordinal
344+ } else {
345+ (null , ordinal)
346+ }
337347 }.unzip
338348
339349 /**
340350 * Builds specific unwrappers ahead of time according to object inspector
341351 * types to avoid pattern matching and branching costs per row.
342352 */
343- val unwrappers : Seq [(Any , MutableRow , Int ) => Unit ] = fieldRefs.map {
344- _.getFieldObjectInspector match {
345- case oi : BooleanObjectInspector =>
346- (value : Any , row : MutableRow , ordinal : Int ) => row.setBoolean(ordinal, oi.get(value))
347- case oi : ByteObjectInspector =>
348- (value : Any , row : MutableRow , ordinal : Int ) => row.setByte(ordinal, oi.get(value))
349- case oi : ShortObjectInspector =>
350- (value : Any , row : MutableRow , ordinal : Int ) => row.setShort(ordinal, oi.get(value))
351- case oi : IntObjectInspector =>
352- (value : Any , row : MutableRow , ordinal : Int ) => row.setInt(ordinal, oi.get(value))
353- case oi : LongObjectInspector =>
354- (value : Any , row : MutableRow , ordinal : Int ) => row.setLong(ordinal, oi.get(value))
355- case oi : FloatObjectInspector =>
356- (value : Any , row : MutableRow , ordinal : Int ) => row.setFloat(ordinal, oi.get(value))
357- case oi : DoubleObjectInspector =>
358- (value : Any , row : MutableRow , ordinal : Int ) => row.setDouble(ordinal, oi.get(value))
359- case oi : HiveVarcharObjectInspector =>
360- (value : Any , row : MutableRow , ordinal : Int ) =>
361- row.setString(ordinal, oi.getPrimitiveJavaObject(value).getValue)
362- case oi : HiveDecimalObjectInspector =>
363- (value : Any , row : MutableRow , ordinal : Int ) =>
364- row.update(ordinal, HiveShim .toCatalystDecimal(oi, value))
365- case oi : TimestampObjectInspector =>
366- (value : Any , row : MutableRow , ordinal : Int ) =>
367- row.update(ordinal, oi.getPrimitiveJavaObject(value).clone())
368- case oi : DateObjectInspector =>
369- (value : Any , row : MutableRow , ordinal : Int ) =>
370- row.update(ordinal, DateUtils .fromJavaDate(oi.getPrimitiveJavaObject(value)))
371- case oi : BinaryObjectInspector =>
372- (value : Any , row : MutableRow , ordinal : Int ) =>
373- row.update(ordinal, oi.getPrimitiveJavaObject(value))
374- case oi =>
375- (value : Any , row : MutableRow , ordinal : Int ) => row(ordinal) = unwrap(value, oi)
353+ val unwrappers : Seq [(Any , MutableRow , Int ) => Unit ] = fieldRefs.map { ref =>
354+ if (ref == null ) {
355+ // Placeholder, never used
356+ (value : Any , row : MutableRow , ordinal : Int ) => row.setNullAt(ordinal)
357+ } else {
358+ ref.getFieldObjectInspector match {
359+ case oi : BooleanObjectInspector =>
360+ (value : Any , row : MutableRow , ordinal : Int ) => row.setBoolean(ordinal, oi.get(value))
361+ case oi : ByteObjectInspector =>
362+ (value : Any , row : MutableRow , ordinal : Int ) => row.setByte(ordinal, oi.get(value))
363+ case oi : ShortObjectInspector =>
364+ (value : Any , row : MutableRow , ordinal : Int ) => row.setShort(ordinal, oi.get(value))
365+ case oi : IntObjectInspector =>
366+ (value : Any , row : MutableRow , ordinal : Int ) => row.setInt(ordinal, oi.get(value))
367+ case oi : LongObjectInspector =>
368+ (value : Any , row : MutableRow , ordinal : Int ) => row.setLong(ordinal, oi.get(value))
369+ case oi : FloatObjectInspector =>
370+ (value : Any , row : MutableRow , ordinal : Int ) => row.setFloat(ordinal, oi.get(value))
371+ case oi : DoubleObjectInspector =>
372+ (value : Any , row : MutableRow , ordinal : Int ) => row.setDouble(ordinal, oi.get(value))
373+ case oi : HiveVarcharObjectInspector =>
374+ (value : Any , row : MutableRow , ordinal : Int ) =>
375+ row.setString(ordinal, oi.getPrimitiveJavaObject(value).getValue)
376+ case oi : HiveDecimalObjectInspector =>
377+ (value : Any , row : MutableRow , ordinal : Int ) =>
378+ row.update(ordinal, HiveShim .toCatalystDecimal(oi, value))
379+ case oi : TimestampObjectInspector =>
380+ (value : Any , row : MutableRow , ordinal : Int ) =>
381+ row.update(ordinal, oi.getPrimitiveJavaObject(value).clone())
382+ case oi : DateObjectInspector =>
383+ (value : Any , row : MutableRow , ordinal : Int ) =>
384+ row.update(ordinal, DateUtils .fromJavaDate(oi.getPrimitiveJavaObject(value)))
385+ case oi : BinaryObjectInspector =>
386+ (value : Any , row : MutableRow , ordinal : Int ) =>
387+ row.update(ordinal, oi.getPrimitiveJavaObject(value))
388+ case oi =>
389+ (value : Any , row : MutableRow , ordinal : Int ) => row(ordinal) = unwrap(value, oi)
390+ }
376391 }
377392 }
378393
@@ -383,11 +398,15 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
383398 val raw = converter.convert(rawDeser.deserialize(value))
384399 var i = 0
385400 while (i < fieldRefs.length) {
386- val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
387- if (fieldValue == null ) {
401+ if (fieldRefs(i) == null ) {
388402 mutableRow.setNullAt(fieldOrdinals(i))
389403 } else {
390- unwrappers(i)(fieldValue, mutableRow, fieldOrdinals(i))
404+ val fieldValue = soi.getStructFieldData(raw, fieldRefs(i))
405+ if (fieldValue == null ) {
406+ mutableRow.setNullAt(fieldOrdinals(i))
407+ } else {
408+ unwrappers(i)(fieldValue, mutableRow, fieldOrdinals(i))
409+ }
391410 }
392411 i += 1
393412 }
0 commit comments