Address comments.

dongjoon-hyun · dongjoon-hyun · commit ef2123ecc516 · 2017-10-12T21:24:06.000-07:00
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -134,8 +134,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
       // SPARK-8501: Empty ORC files always have an empty schema stored in their footer. In this
       // case, `OrcFileOperator.readSchema` returns `None`, and we can't read the underlying file
       // using the given physical schema. Instead, we simply return an empty iterator.
-      val maybePhysicalSchema = OrcFileOperator.readSchema(Seq(file.filePath), Some(conf))
-      if (maybePhysicalSchema.isEmpty) {
+      val isEmptyFile = OrcFileOperator.readSchema(Seq(file.filePath), Some(conf)).isEmpty
+      if (isEmptyFile) {
         Iterator.empty
       } else {
         OrcRelation.setRequiredColumns(conf, dataSchema, requiredSchema)
@@ -284,10 +284,7 @@ private[orc] object OrcRelation extends HiveInspectors {
         case (field, ordinal) =>
           var ref = oi.getStructFieldRef(field.name)
           if (ref == null) {
-            val maybeIndex = dataSchema.getFieldIndex(field.name)
-            if (maybeIndex.isDefined) {
-              ref = oi.getStructFieldRef("_col" + maybeIndex.get)
-            }
+            ref = oi.getStructFieldRef("_col" + dataSchema.fieldIndex(field.name))
           }
           ref -> ordinal
       }.unzip
@@ -300,7 +297,7 @@ private[orc] object OrcRelation extends HiveInspectors {
         val length = fieldRefs.length
         while (i < length) {
           val fieldRef = fieldRefs(i)
-          val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw, fieldRefs(i))
+          val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw, fieldRef)
           if (fieldValue == null) {
             mutableRow.setNullAt(fieldOrdinals(i))
           } else {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2078,7 +2078,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
           )
 
           checkAnswer(
-            sql(s"SELECT * FROM $db.t"),
+            sql(s"SELECT click_id, search_id, uid, ts, hour FROM $db.t"),
             Row("12", "2", 12345, "98765", "01"))
 
           client.runSqlHive(s"ALTER TABLE $db.t ADD COLUMNS (dummy string)")
@@ -2100,30 +2100,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
             Row(null, "12"))
 
           checkAnswer(
-            sql(s"SELECT * FROM $db.t"),
+            sql(s"SELECT click_id, search_id, uid, dummy, ts, hour FROM $db.t"),
             Row("12", "2", 12345, null, "98765", "01"))
         }
       }
     }
   }
-
-  // This test case is added to prevent regression.
-  test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") {
-    withTempDir { dir =>
-      val path = dir.getCanonicalPath
-
-      Seq(1 -> 2).toDF("c1", "c2").write.format("orc").mode("overwrite").save(path)
-      checkAnswer(spark.read.orc(path), Row(1, 2))
-
-      Seq("true", "false").foreach { value =>
-        withTable("t") {
-          withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
-            sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'")
-            // The correct answer is Row(2, 1). SPARK-22267 should fix this later.
-            checkAnswer(spark.table("t"), if (value == "true") Row(2, 1) else Row(1, 2))
-          }
-        }
-      }
-    }
-  }
 }