@@ -20,7 +20,8 @@ package org.apache.spark.sql.execution.datasources.orc
2020import java .io .File
2121import java .util .Locale
2222
23- import org .apache .orc .OrcConf .COMPRESS
23+ import org .apache .orc .{OrcFile , Reader }
24+ import org .apache .orc .OrcConf .{BUFFER_SIZE , COMPRESS , ROW_INDEX_STRIDE , STRIPE_SIZE }
2425import org .scalatest .BeforeAndAfterAll
2526
2627import org .apache .spark .sql .Row
@@ -160,6 +161,77 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
160161 }
161162 }
162163 }
164+
165+ private def getReader (path : String ): Reader = {
166+ val conf = spark.sessionState.newHadoopConf()
167+ val files = OrcUtils .listOrcFiles(path, conf)
168+ assert(files.length == 1 )
169+ val file = files.head
170+ val fs = file.getFileSystem(conf)
171+ val readerOptions = org.apache.orc.OrcFile .readerOptions(conf).filesystem(fs)
172+ OrcFile .createReader(file, readerOptions)
173+ }
174+
175+ test(" SPARK-23342 Support orc.stripe.size and hive.exec.orc.default.stripe.size" ) {
176+ val df = spark.range(1000000 ).map(_ => scala.util.Random .nextLong).repartition(1 )
177+
178+ Seq (org.apache.orc.OrcConf .STRIPE_SIZE ).foreach { conf =>
179+ Seq (conf.getAttribute, conf.getHiveConfName).foreach { name =>
180+ // Since the default value of orc.stripe.size is 64MB, there exists only 1 stripe.
181+ withTempPath { path =>
182+ val dir = path.getCanonicalPath
183+ df.write.format(" orc" ).save(dir)
184+ assert(getReader(dir).getStripes().size === 1 )
185+ }
186+
187+ withTempPath { path =>
188+ val dir = path.getCanonicalPath
189+ df.write.format(" orc" ).option(name, " 10000" ).save(dir)
190+ assert(getReader(dir).getStripes().size > 100 )
191+ }
192+ }
193+ }
194+ }
195+
196+ test(" SPARK-23342 Support orc.row.index.stride and hive.exec.orc.default.row.index.stride" ) {
197+ val df = spark.range(1000000 ).map(_ => scala.util.Random .nextLong).repartition(1 )
198+
199+ Seq (ROW_INDEX_STRIDE ).foreach { conf =>
200+ Seq (conf.getAttribute, conf.getHiveConfName).foreach { name =>
201+ withTempPath { path =>
202+ val dir = path.getCanonicalPath
203+ df.write.format(" orc" ).save(dir)
204+ }
205+
206+ withTempPath { path =>
207+ val dir = path.getCanonicalPath
208+ df.write.format(" orc" ).option(name, " 1024" ).save(dir)
209+ assert(getReader(dir).getRowIndexStride === 1024 )
210+ }
211+ }
212+ }
213+ }
214+
215+ test(" SPARK-23342 Support orc.compress.size and hive.exec.orc.default.buffer.size" ) {
216+ val df = spark.range(1000000 ).map(_ => scala.util.Random .nextLong).repartition(1 )
217+
218+ Seq (BUFFER_SIZE ).foreach { conf =>
219+ Seq (conf.getAttribute, conf.getHiveConfName).foreach { name =>
220+ withTempPath { path =>
221+ val dir = path.getCanonicalPath
222+ df.write.format(" orc" ).save(dir)
223+ assert(getReader(dir).getCompressionSize === BUFFER_SIZE .getDefaultValue)
224+ }
225+
226+ withTempPath { path =>
227+ val dir = path.getCanonicalPath
228+
229+ df.write.format(" orc" ).option(name, " 1024" ).save(dir)
230+ assert(getReader(dir).getCompressionSize === 1024 )
231+ }
232+ }
233+ }
234+ }
163235}
164236
165237class OrcSourceSuite extends OrcSuite with SharedSQLContext {
0 commit comments