@@ -22,7 +22,7 @@ use quickwit_common::Progress;
22
22
use quickwit_common:: uri:: Uri ;
23
23
use quickwit_metastore:: checkpoint:: PartitionId ;
24
24
use quickwit_proto:: metastore:: SourceType ;
25
- use quickwit_proto:: types:: Position ;
25
+ use quickwit_proto:: types:: { Offset , Position } ;
26
26
use quickwit_storage:: StorageResolver ;
27
27
use tokio:: io:: { AsyncBufReadExt , AsyncRead , AsyncReadExt , BufReader } ;
28
28
@@ -146,8 +146,13 @@ impl DocFileReader {
146
146
pub struct ObjectUriBatchReader {
147
147
partition_id : PartitionId ,
148
148
reader : DocFileReader ,
149
- current_offset : usize ,
150
- is_eof : bool ,
149
+ current_position : Position ,
150
+ }
151
+
152
+ fn parse_offset ( offset : & Offset ) -> anyhow:: Result < usize > {
153
+ offset
154
+ . as_usize ( )
155
+ . context ( "file offset should be stored as usize" )
151
156
}
152
157
153
158
impl ObjectUriBatchReader {
@@ -157,26 +162,22 @@ impl ObjectUriBatchReader {
157
162
uri : & Uri ,
158
163
position : Position ,
159
164
) -> anyhow:: Result < Self > {
160
- let current_offset = match position {
161
- Position :: Beginning => 0 ,
162
- Position :: Offset ( offset) => offset
163
- . as_usize ( )
164
- . context ( "file offset should be stored as usize" ) ?,
165
+ let current_offset = match & position {
165
166
Position :: Eof ( _) => {
166
167
return Ok ( ObjectUriBatchReader {
167
168
partition_id,
168
169
reader : DocFileReader :: empty ( ) ,
169
- current_offset : 0 ,
170
- is_eof : true ,
170
+ current_position : position,
171
171
} ) ;
172
172
}
173
+ Position :: Beginning => 0 ,
174
+ Position :: Offset ( offset) => parse_offset ( offset) ?,
173
175
} ;
174
176
let reader = DocFileReader :: from_uri ( storage_resolver, uri, current_offset) . await ?;
175
177
Ok ( ObjectUriBatchReader {
176
178
partition_id,
177
179
reader,
178
- current_offset,
179
- is_eof : false ,
180
+ current_position : position,
180
181
} )
181
182
}
182
183
@@ -186,11 +187,14 @@ impl ObjectUriBatchReader {
186
187
source_type : SourceType ,
187
188
) -> anyhow:: Result < BatchBuilder > {
188
189
let mut batch_builder = BatchBuilder :: new ( source_type) ;
189
- if self . is_eof {
190
- return Ok ( batch_builder) ;
191
- }
192
- let limit_num_bytes = self . current_offset + BATCH_NUM_BYTES_LIMIT as usize ;
193
- let mut new_offset = self . current_offset ;
190
+ let current_offset = match & self . current_position {
191
+ Position :: Eof ( _) => return Ok ( batch_builder) ,
192
+ Position :: Beginning => 0 ,
193
+ Position :: Offset ( offset) => parse_offset ( offset) ?,
194
+ } ;
195
+
196
+ let limit_num_bytes = current_offset + BATCH_NUM_BYTES_LIMIT as usize ;
197
+ let mut new_offset = current_offset;
194
198
let mut eof_position: Option < Position > = None ;
195
199
while new_offset < limit_num_bytes {
196
200
if let Some ( record) = source_progress
@@ -200,28 +204,26 @@ impl ObjectUriBatchReader {
200
204
new_offset = record. next_offset as usize ;
201
205
batch_builder. add_doc ( record. doc ) ;
202
206
if record. is_last {
203
- self . is_eof = true ;
204
207
eof_position = Some ( Position :: eof ( new_offset) ) ;
205
208
break ;
206
209
}
207
210
} else {
208
- self . is_eof = true ;
209
211
eof_position = Some ( Position :: eof ( new_offset) ) ;
210
212
break ;
211
213
}
212
214
}
213
215
let to_position = eof_position. unwrap_or ( Position :: offset ( new_offset) ) ;
214
216
batch_builder. checkpoint_delta . record_partition_delta (
215
217
self . partition_id . clone ( ) ,
216
- Position :: offset ( self . current_offset ) ,
217
- to_position,
218
+ self . current_position . clone ( ) ,
219
+ to_position. clone ( ) ,
218
220
) ?;
219
- self . current_offset = new_offset ;
221
+ self . current_position = to_position ;
220
222
Ok ( batch_builder)
221
223
}
222
224
223
225
pub fn is_eof ( & self ) -> bool {
224
- self . is_eof
226
+ self . current_position . is_eof ( )
225
227
}
226
228
}
227
229
0 commit comments