Skip to content

Commit befb22d

Browse files
cxzl25gatorsmile
authored andcommitted
[SPARK-23230][SQL] When hive.default.fileformat is other kinds of file types, create textfile table cause a serde error
When hive.default.fileformat is other kinds of file types, create textfile table cause a serde error. We should take the default type of textfile and sequencefile both as org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe. ``` set hive.default.fileformat=orc; create table tbl( i string ) stored as textfile; desc formatted tbl; Serde Library org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat org.apache.hadoop.mapred.TextInputFormat OutputFormat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat ``` Author: sychen <[email protected]> Closes #20406 from cxzl25/default_serde. (cherry picked from commit 4104b68) Signed-off-by: gatorsmile <[email protected]>
1 parent 2b80571 commit befb22d

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ object HiveSerDe {
3131
"sequencefile" ->
3232
HiveSerDe(
3333
inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"),
34-
outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat")),
34+
outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat"),
35+
serde = Option("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
3536

3637
"rcfile" ->
3738
HiveSerDe(
@@ -54,7 +55,8 @@ object HiveSerDe {
5455
"textfile" ->
5556
HiveSerDe(
5657
inputFormat = Option("org.apache.hadoop.mapred.TextInputFormat"),
57-
outputFormat = Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
58+
outputFormat = Option("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"),
59+
serde = Option("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
5860

5961
"avro" ->
6062
HiveSerDe(

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,25 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
100100
assert(output == Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
101101
assert(serde == Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
102102
}
103+
104+
withSQLConf("hive.default.fileformat" -> "orc") {
105+
val (desc, exists) = extractTableDesc(
106+
"CREATE TABLE IF NOT EXISTS fileformat_test (id int) STORED AS textfile")
107+
assert(exists)
108+
assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
109+
assert(desc.storage.outputFormat ==
110+
Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
111+
assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
112+
}
113+
114+
withSQLConf("hive.default.fileformat" -> "orc") {
115+
val (desc, exists) = extractTableDesc(
116+
"CREATE TABLE IF NOT EXISTS fileformat_test (id int) STORED AS sequencefile")
117+
assert(exists)
118+
assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.SequenceFileInputFormat"))
119+
assert(desc.storage.outputFormat == Some("org.apache.hadoop.mapred.SequenceFileOutputFormat"))
120+
assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
121+
}
103122
}
104123

105124
test("create hive serde table with new syntax - basic") {

0 commit comments

Comments
 (0)