Skip to content

Commit 13a012f

Browse files
committed
[SPARK-23342][SQL][TEST] Add ORC configuration tests for ORC data source
1 parent 8141c3e commit 13a012f

File tree

1 file changed

+73
-1
lines changed

1 file changed

+73
-1
lines changed

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ package org.apache.spark.sql.execution.datasources.orc
2020
import java.io.File
2121
import java.util.Locale
2222

23-
import org.apache.orc.OrcConf.COMPRESS
23+
import org.apache.orc.{OrcFile, Reader}
24+
import org.apache.orc.OrcConf.{BUFFER_SIZE, COMPRESS, ROW_INDEX_STRIDE, STRIPE_SIZE}
2425
import org.scalatest.BeforeAndAfterAll
2526

2627
import org.apache.spark.sql.Row
@@ -160,6 +161,77 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
160161
}
161162
}
162163
}
164+
165+
private def getReader(path: String): Reader = {
166+
val conf = spark.sessionState.newHadoopConf()
167+
val files = OrcUtils.listOrcFiles(path, conf)
168+
assert(files.length == 1)
169+
val file = files.head
170+
val fs = file.getFileSystem(conf)
171+
val readerOptions = org.apache.orc.OrcFile.readerOptions(conf).filesystem(fs)
172+
OrcFile.createReader(file, readerOptions)
173+
}
174+
175+
test("SPARK-23342 Support orc.stripe.size and hive.exec.orc.default.stripe.size") {
176+
val df = spark.range(1000000).map(_ => scala.util.Random.nextLong).repartition(1)
177+
178+
Seq(org.apache.orc.OrcConf.STRIPE_SIZE).foreach { conf =>
179+
Seq(conf.getAttribute, conf.getHiveConfName).foreach { name =>
180+
// Since the default value of orc.stripe.size is 64MB, there exists only 1 stripe.
181+
withTempPath { path =>
182+
val dir = path.getCanonicalPath
183+
df.write.format("orc").save(dir)
184+
assert(getReader(dir).getStripes().size === 1)
185+
}
186+
187+
withTempPath { path =>
188+
val dir = path.getCanonicalPath
189+
df.write.format("orc").option(name, "10000").save(dir)
190+
assert(getReader(dir).getStripes().size > 100)
191+
}
192+
}
193+
}
194+
}
195+
196+
test("SPARK-23342 Support orc.row.index.stride and hive.exec.orc.default.row.index.stride") {
197+
val df = spark.range(1000000).map(_ => scala.util.Random.nextLong).repartition(1)
198+
199+
Seq(ROW_INDEX_STRIDE).foreach { conf =>
200+
Seq(conf.getAttribute, conf.getHiveConfName).foreach { name =>
201+
withTempPath { path =>
202+
val dir = path.getCanonicalPath
203+
df.write.format("orc").save(dir)
204+
}
205+
206+
withTempPath { path =>
207+
val dir = path.getCanonicalPath
208+
df.write.format("orc").option(name, "1024").save(dir)
209+
assert(getReader(dir).getRowIndexStride === 1024)
210+
}
211+
}
212+
}
213+
}
214+
215+
test("SPARK-23342 Support orc.compress.size and hive.exec.orc.default.buffer.size") {
216+
val df = spark.range(1000000).map(_ => scala.util.Random.nextLong).repartition(1)
217+
218+
Seq(BUFFER_SIZE).foreach { conf =>
219+
Seq(conf.getAttribute, conf.getHiveConfName).foreach { name =>
220+
withTempPath { path =>
221+
val dir = path.getCanonicalPath
222+
df.write.format("orc").save(dir)
223+
assert(getReader(dir).getCompressionSize === BUFFER_SIZE.getDefaultValue)
224+
}
225+
226+
withTempPath { path =>
227+
val dir = path.getCanonicalPath
228+
229+
df.write.format("orc").option(name, "1024").save(dir)
230+
assert(getReader(dir).getCompressionSize === 1024)
231+
}
232+
}
233+
}
234+
}
163235
}
164236

165237
class OrcSourceSuite extends OrcSuite with SharedSQLContext {

0 commit comments

Comments
 (0)