diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index f50a0aac0aefc..ff6d5c30c1eb4 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -2,7 +2,7 @@ JavaEWAH-0.3.2.jar
RoaringBitmap-0.5.11.jar
ST4-4.0.4.jar
activation-1.1.1.jar
-aircompressor-0.8.jar
+aircompressor-0.10.jar
antlr-2.7.7.jar
antlr-runtime-3.4.jar
antlr4-runtime-4.7.jar
@@ -157,8 +157,9 @@ objenesis-2.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
-orc-core-1.4.4-nohive.jar
-orc-mapreduce-1.4.4-nohive.jar
+orc-core-1.5.2-nohive.jar
+orc-mapreduce-1.5.2-nohive.jar
+orc-shims-1.5.2.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 774f9dc39ce4d..72a94f8953c6c 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -2,7 +2,7 @@ JavaEWAH-0.3.2.jar
RoaringBitmap-0.5.11.jar
ST4-4.0.4.jar
activation-1.1.1.jar
-aircompressor-0.8.jar
+aircompressor-0.10.jar
antlr-2.7.7.jar
antlr-runtime-3.4.jar
antlr4-runtime-4.7.jar
@@ -158,8 +158,9 @@ objenesis-2.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
-orc-core-1.4.4-nohive.jar
-orc-mapreduce-1.4.4-nohive.jar
+orc-core-1.5.2-nohive.jar
+orc-mapreduce-1.5.2-nohive.jar
+orc-shims-1.5.2.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index 19c05ad1e991f..3409dc4613324 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -4,7 +4,7 @@ RoaringBitmap-0.5.11.jar
ST4-4.0.4.jar
accessors-smart-1.2.jar
activation-1.1.1.jar
-aircompressor-0.8.jar
+aircompressor-0.10.jar
antlr-2.7.7.jar
antlr-runtime-3.4.jar
antlr4-runtime-4.7.jar
@@ -176,8 +176,9 @@ okhttp-2.7.5.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
-orc-core-1.4.4-nohive.jar
-orc-mapreduce-1.4.4-nohive.jar
+orc-core-1.5.2-nohive.jar
+orc-mapreduce-1.5.2-nohive.jar
+orc-shims-1.5.2.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index cd567e227f331..4211dd3594085 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
1.2.1
10.12.1.1
1.10.0
- 1.4.4
+ 1.5.2
nohive
1.6.0
9.3.20.v20170531
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 18ae314309d7b..8873b00e7117a 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -90,11 +90,39 @@
org.apache.orc
orc-core
${orc.classifier}
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+
+
+
+ org.apache.hive
+ hive-storage-api
+
+
org.apache.orc
orc-mapreduce
${orc.classifier}
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+
+
+
+ org.apache.hive
+ hive-storage-api
+
+
org.apache.parquet
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index df488a748e3e5..372043e28d5ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -59,6 +59,19 @@ private[sql] object OrcFileFormat {
def checkFieldNames(names: Seq[String]): Unit = {
names.foreach(checkFieldName)
}
+
+ def getQuotedSchemaString(dataType: DataType): String = dataType match {
+ case _: AtomicType => dataType.catalogString
+ case StructType(fields) =>
+ fields.map(f => s"`${f.name}`:${getQuotedSchemaString(f.dataType)}")
+ .mkString("struct<", ",", ">")
+ case ArrayType(elementType, _) =>
+ s"array<${getQuotedSchemaString(elementType)}>"
+ case MapType(keyType, valueType, _) =>
+ s"map<${getQuotedSchemaString(keyType)},${getQuotedSchemaString(valueType)}>"
+ case _ => // UDT and others
+ dataType.catalogString
+ }
}
/**
@@ -95,7 +108,7 @@ class OrcFileFormat
val conf = job.getConfiguration
- conf.set(MAPRED_OUTPUT_SCHEMA.getAttribute, dataSchema.catalogString)
+ conf.set(MAPRED_OUTPUT_SCHEMA.getAttribute, OrcFileFormat.getQuotedSchemaString(dataSchema))
conf.set(COMPRESS.getAttribute, orcOptions.compressionCodec)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
index 899af0750cadf..90d1268028096 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala
@@ -223,6 +223,6 @@ class OrcSerializer(dataSchema: StructType) {
* Return a Orc value object for the given Spark schema.
*/
private def createOrcValue(dataType: DataType) = {
- OrcStruct.createValue(TypeDescription.fromString(dataType.catalogString))
+ OrcStruct.createValue(TypeDescription.fromString(OrcFileFormat.getQuotedSchemaString(dataType)))
}
}