Skip to content

Commit c55b9fd

Browse files
committed
[SPARK-35992][BUILD] Upgrade ORC to 1.6.9
### What changes were proposed in this pull request? This PR aims to upgrade Apache ORC to 1.6.9. ### Why are the changes needed? This is required to bring ORC-804 in order to fix ORC encryption masking bug. ### Does this PR introduce _any_ user-facing change? No. This is not released yet. ### How was this patch tested? Pass the newly added test case. Closes #33189 from dongjoon-hyun/SPARK-35992. Lead-authored-by: Dongjoon Hyun <[email protected]> Co-authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 7769644 commit c55b9fd

File tree

4 files changed

+75
-9
lines changed

4 files changed

+75
-9
lines changed

dev/deps/spark-deps-hadoop-2.7-hive-2.3

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
44
RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
55
ST4/4.0.4//ST4-4.0.4.jar
66
activation/1.1.1//activation-1.1.1.jar
7-
aircompressor/0.16//aircompressor-0.16.jar
7+
aircompressor/0.19//aircompressor-0.19.jar
88
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
99
annotations/17.0.0//annotations-17.0.0.jar
1010
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
@@ -196,9 +196,9 @@ objenesis/2.6//objenesis-2.6.jar
196196
okhttp/3.12.12//okhttp-3.12.12.jar
197197
okio/1.14.0//okio-1.14.0.jar
198198
opencsv/2.3//opencsv-2.3.jar
199-
orc-core/1.6.8//orc-core-1.6.8.jar
200-
orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar
201-
orc-shims/1.6.8//orc-shims-1.6.8.jar
199+
orc-core/1.6.9//orc-core-1.6.9.jar
200+
orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar
201+
orc-shims/1.6.9//orc-shims-1.6.9.jar
202202
oro/2.0.8//oro-2.0.8.jar
203203
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
204204
paranamer/2.8//paranamer-2.8.jar

dev/deps/spark-deps-hadoop-3.2-hive-2.3

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
44
RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
55
ST4/4.0.4//ST4-4.0.4.jar
66
activation/1.1.1//activation-1.1.1.jar
7-
aircompressor/0.16//aircompressor-0.16.jar
7+
aircompressor/0.19//aircompressor-0.19.jar
88
algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
99
annotations/17.0.0//annotations-17.0.0.jar
1010
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
@@ -167,9 +167,9 @@ objenesis/2.6//objenesis-2.6.jar
167167
okhttp/3.12.12//okhttp-3.12.12.jar
168168
okio/1.14.0//okio-1.14.0.jar
169169
opencsv/2.3//opencsv-2.3.jar
170-
orc-core/1.6.8//orc-core-1.6.8.jar
171-
orc-mapreduce/1.6.8//orc-mapreduce-1.6.8.jar
172-
orc-shims/1.6.8//orc-shims-1.6.8.jar
170+
orc-core/1.6.9//orc-core-1.6.9.jar
171+
orc-mapreduce/1.6.9//orc-mapreduce-1.6.9.jar
172+
orc-shims/1.6.9//orc-shims-1.6.9.jar
173173
oro/2.0.8//oro-2.0.8.jar
174174
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
175175
paranamer/2.8//paranamer-2.8.jar

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
<!-- After 10.15.1.3, the minimum required version is JDK9 -->
138138
<derby.version>10.14.2.0</derby.version>
139139
<parquet.version>1.12.0</parquet.version>
140-
<orc.version>1.6.8</orc.version>
140+
<orc.version>1.6.9</orc.version>
141141
<jetty.version>9.4.42.v20210604</jetty.version>
142142
<jakartaservlet.version>4.0.3</jakartaservlet.version>
143143
<chill.version>0.10.0</chill.version>

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,70 @@ class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
159159
}
160160
}
161161
}
162+
163+
test("SPARK-35992: Write and read fully-encrypted columns with default masking") {
164+
val conf = spark.sessionState.newHadoopConf()
165+
val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)
166+
assume(!provider.getKeyNames.isEmpty,
167+
s"$provider doesn't has the test keys. ORC shim is created with old Hadoop libraries")
168+
169+
val df = originalData.toDF("ssn", "email", "name")
170+
171+
withTempPath { dir =>
172+
val path = dir.getAbsolutePath
173+
withSQLConf(
174+
"hadoop.security.key.provider.path" -> "test:///",
175+
"orc.key.provider" -> "hadoop",
176+
"orc.encrypt" -> "pii:ssn,email,name") {
177+
df.write.mode("overwrite").orc(path)
178+
checkAnswer(spark.read.orc(path), df)
179+
}
180+
181+
withSQLConf(
182+
"orc.key.provider" -> "memory",
183+
"orc.encrypt" -> "pii:ssn,email,name") {
184+
checkAnswer(spark.read.orc(path), Row(null, null, null))
185+
}
186+
}
187+
188+
val originalNestedData = Row(1, Row("123456789", "[email protected]", "Dongjoon"))
189+
190+
withTempDir { dir =>
191+
val path = dir.getAbsolutePath
192+
withTable("encrypted") {
193+
sql(
194+
s"""
195+
|CREATE TABLE encrypted (
196+
| id INT,
197+
| contact struct<ssn:STRING, email:STRING, name:STRING>
198+
|)
199+
|USING ORC
200+
|LOCATION "$path"
201+
|OPTIONS (
202+
| hadoop.security.key.provider.path "test:///",
203+
| orc.key.provider "hadoop",
204+
| orc.encrypt "pii:id,contact"
205+
|)
206+
|""".stripMargin)
207+
sql("INSERT INTO encrypted VALUES(1, ('123456789', '[email protected]', 'Dongjoon'))")
208+
checkAnswer(sql("SELECT * FROM encrypted"), originalNestedData)
209+
}
210+
withTable("normal") {
211+
sql(
212+
s"""
213+
|CREATE TABLE normal (
214+
| id INT,
215+
| contact struct<ssn:STRING, email:STRING, name:STRING>
216+
|)
217+
|USING ORC
218+
|LOCATION "$path"
219+
|OPTIONS (
220+
| orc.key.provider "memory"
221+
|)
222+
|""".stripMargin)
223+
checkAnswer(sql("SELECT * FROM normal"), Row(null, null))
224+
checkAnswer(sql("SELECT id, contact.* FROM normal"), Row(null, null, null, null))
225+
}
226+
}
227+
}
162228
}

0 commit comments

Comments
 (0)