@@ -22,14 +22,14 @@ import java.net.URI
2222
2323import org .apache .hadoop .conf .Configuration
2424import org .apache .hadoop .fs .{FileStatus , Path }
25+ import org .apache .hadoop .io .WritableComparable
2526import org .apache .hadoop .mapred .JobConf
2627import org .apache .hadoop .mapreduce ._
2728import org .apache .hadoop .mapreduce .lib .input .FileSplit
2829import org .apache .hadoop .mapreduce .task .TaskAttemptContextImpl
2930import org .apache .orc .{OrcUtils => _ , _ }
3031import org .apache .orc .OrcConf .COMPRESS
3132import org .apache .orc .mapred .OrcStruct
32- import org .apache .orc .mapreduce ._
3333
3434import org .apache .spark .TaskContext
3535import org .apache .spark .sql .SparkSession
@@ -155,7 +155,7 @@ class OrcFileFormat
155155 if (orcFilterPushDown && filters.nonEmpty) {
156156 OrcUtils .readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema =>
157157 OrcFilters .createFilter(fileSchema, filters).foreach { f =>
158- OrcInputFormat .setSearchArgument(conf, f, fileSchema.fieldNames)
158+ mapreduce. OrcInputFormat .setSearchArgument(conf, f, fileSchema.fieldNames)
159159 }
160160 }
161161 }
@@ -193,8 +193,8 @@ class OrcFileFormat
193193
194194 iter.asInstanceOf [Iterator [InternalRow ]]
195195 } else {
196- val orcRecordReader = new OrcInputFormat [OrcStruct ]
197- . createRecordReader(fileSplit, taskAttemptContext)
196+ val orcRecordReader : mapreduce. OrcMapreduceRecordReader [OrcStruct ] =
197+ createRecordReader[ OrcStruct ] (fileSplit, taskAttemptContext)
198198 val iter = new RecordReaderIterator [OrcStruct ](orcRecordReader)
199199 Option (TaskContext .get()).foreach(_.addTaskCompletionListener[Unit ](_ => iter.close()))
200200
@@ -214,6 +214,19 @@ class OrcFileFormat
214214 }
215215 }
216216
217+ private def createRecordReader [V <: WritableComparable [_]](
218+ inputSplit : InputSplit ,
219+ taskAttemptContext : TaskAttemptContext ): mapreduce.OrcMapreduceRecordReader [V ] = {
220+ val split = inputSplit.asInstanceOf [FileSplit ]
221+ val conf = taskAttemptContext.getConfiguration()
222+ val readOptions = OrcFile .readerOptions(conf)
223+ .maxLength(OrcConf .MAX_FILE_LENGTH .getLong(conf)).useUTCTimestamp(true )
224+ val file = OrcFile .createReader(split.getPath(), readOptions)
225+ val options = org.apache.orc.mapred.OrcInputFormat .buildOptions(
226+ conf, file, split.getStart(), split.getLength()).useSelected(true )
227+ new mapreduce.OrcMapreduceRecordReader (file, options)
228+ }
229+
217230 override def supportDataType (dataType : DataType ): Boolean = dataType match {
218231 case _ : AtomicType => true
219232
0 commit comments