Skip to content

Commit ef2123e

Browse files
committed
Address comments.
1 parent 8ac1acf commit ef2123e

File tree

2 files changed

+6
-29
lines changed

2 files changed

+6
-29
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
134134
// SPARK-8501: Empty ORC files always have an empty schema stored in their footer. In this
135135
// case, `OrcFileOperator.readSchema` returns `None`, and we can't read the underlying file
136136
// using the given physical schema. Instead, we simply return an empty iterator.
137-
val maybePhysicalSchema = OrcFileOperator.readSchema(Seq(file.filePath), Some(conf))
138-
if (maybePhysicalSchema.isEmpty) {
137+
val isEmptyFile = OrcFileOperator.readSchema(Seq(file.filePath), Some(conf)).isEmpty
138+
if (isEmptyFile) {
139139
Iterator.empty
140140
} else {
141141
OrcRelation.setRequiredColumns(conf, dataSchema, requiredSchema)
@@ -284,10 +284,7 @@ private[orc] object OrcRelation extends HiveInspectors {
284284
case (field, ordinal) =>
285285
var ref = oi.getStructFieldRef(field.name)
286286
if (ref == null) {
287-
val maybeIndex = dataSchema.getFieldIndex(field.name)
288-
if (maybeIndex.isDefined) {
289-
ref = oi.getStructFieldRef("_col" + maybeIndex.get)
290-
}
287+
ref = oi.getStructFieldRef("_col" + dataSchema.fieldIndex(field.name))
291288
}
292289
ref -> ordinal
293290
}.unzip
@@ -300,7 +297,7 @@ private[orc] object OrcRelation extends HiveInspectors {
300297
val length = fieldRefs.length
301298
while (i < length) {
302299
val fieldRef = fieldRefs(i)
303-
val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw, fieldRefs(i))
300+
val fieldValue = if (fieldRef == null) null else oi.getStructFieldData(raw, fieldRef)
304301
if (fieldValue == null) {
305302
mutableRow.setNullAt(fieldOrdinals(i))
306303
} else {

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,7 +2078,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
20782078
)
20792079

20802080
checkAnswer(
2081-
sql(s"SELECT * FROM $db.t"),
2081+
sql(s"SELECT click_id, search_id, uid, ts, hour FROM $db.t"),
20822082
Row("12", "2", 12345, "98765", "01"))
20832083

20842084
client.runSqlHive(s"ALTER TABLE $db.t ADD COLUMNS (dummy string)")
@@ -2100,30 +2100,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
21002100
Row(null, "12"))
21012101

21022102
checkAnswer(
2103-
sql(s"SELECT * FROM $db.t"),
2103+
sql(s"SELECT click_id, search_id, uid, dummy, ts, hour FROM $db.t"),
21042104
Row("12", "2", 12345, null, "98765", "01"))
21052105
}
21062106
}
21072107
}
21082108
}
2109-
2110-
// This test case is added to prevent regression.
2111-
test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") {
2112-
withTempDir { dir =>
2113-
val path = dir.getCanonicalPath
2114-
2115-
Seq(1 -> 2).toDF("c1", "c2").write.format("orc").mode("overwrite").save(path)
2116-
checkAnswer(spark.read.orc(path), Row(1, 2))
2117-
2118-
Seq("true", "false").foreach { value =>
2119-
withTable("t") {
2120-
withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> value) {
2121-
sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'")
2122-
// The correct answer is Row(2, 1). SPARK-22267 should fix this later.
2123-
checkAnswer(spark.table("t"), if (value == "true") Row(2, 1) else Row(1, 2))
2124-
}
2125-
}
2126-
}
2127-
}
2128-
}
21292109
}

0 commit comments

Comments
 (0)