2020use std:: {
2121 fs:: { remove_file, File } ,
2222 io:: BufReader ,
23- path:: PathBuf ,
23+ path:: { Path , PathBuf } ,
2424 sync:: Arc ,
2525} ;
2626
2727use arrow_array:: { RecordBatch , TimestampMillisecondArray } ;
2828use arrow_ipc:: reader:: FileReader ;
29- use arrow_schema:: Schema ;
29+ use arrow_schema:: { ArrowError , Schema , SchemaRef } ;
3030use itertools:: kmerge_by;
3131use tracing:: error;
3232
@@ -35,28 +35,58 @@ use crate::{
3535 utils:: arrow:: { adapt_batch, reverse} ,
3636} ;
3737
38+ #[ derive( Debug ) ]
39+ pub struct ReverseReader {
40+ inner : FileReader < BufReader < File > > ,
41+ idx : usize ,
42+ }
43+
44+ impl ReverseReader {
45+ fn try_new ( path : impl AsRef < Path > ) -> Result < Self , ArrowError > {
46+ let inner = FileReader :: try_new ( BufReader :: new ( File :: open ( path) . unwrap ( ) ) , None ) ?;
47+ let idx = inner. num_batches ( ) ;
48+
49+ Ok ( Self { inner, idx } )
50+ }
51+
52+ fn schema ( & self ) -> SchemaRef {
53+ self . inner . schema ( )
54+ }
55+ }
56+
57+ impl Iterator for ReverseReader {
58+ type Item = Result < RecordBatch , ArrowError > ;
59+
60+ fn next ( & mut self ) -> Option < Self :: Item > {
61+ if self . idx <= 0 {
62+ return None ;
63+ }
64+
65+ self . idx -= 1 ;
66+ if let Err ( e) = self . inner . set_index ( self . idx ) {
67+ return Some ( Err ( e) ) ;
68+ }
69+
70+ self . inner . next ( )
71+ }
72+ }
73+
3874#[ derive( Debug ) ]
3975pub struct MergedRecordReader {
40- pub readers : Vec < FileReader < BufReader < File > > > ,
76+ pub readers : Vec < ReverseReader > ,
4177}
4278
4379impl MergedRecordReader {
44- pub fn new ( files : & [ PathBuf ] ) -> Self {
45- let mut readers = Vec :: with_capacity ( files . len ( ) ) ;
80+ pub fn new ( paths : & [ PathBuf ] ) -> Self {
81+ let mut readers = Vec :: with_capacity ( paths . len ( ) ) ;
4682
47- for file in files {
83+ for path in paths {
4884 //remove empty files before reading
49- if file . metadata ( ) . unwrap ( ) . len ( ) == 0 {
50- error ! ( "Invalid file detected, removing it: {:?}" , file ) ;
51- remove_file ( file ) . unwrap ( ) ;
85+ if path . metadata ( ) . unwrap ( ) . len ( ) == 0 {
86+ error ! ( "Invalid file detected, removing it: {path :?}" ) ;
87+ remove_file ( path ) . unwrap ( ) ;
5288 } else {
53- let Ok ( reader) =
54- FileReader :: try_new ( BufReader :: new ( File :: open ( file) . unwrap ( ) ) , None )
55- else {
56- error ! ( "Invalid file detected, ignoring it: {:?}" , file) ;
57- continue ;
58- } ;
59-
89+ let reader = ReverseReader :: try_new ( path) . unwrap ( ) ;
6090 readers. push ( reader) ;
6191 }
6292 }
0 commit comments