Skip to content

Commit 32424e8

Browse files
ZENOTMEshaeqahmed
authored andcommitted
make file scan task serializable (apache#377)
Co-authored-by: ZENOTME <[email protected]>
1 parent d10586a commit 32424e8

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

crates/iceberg/src/arrow/reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ impl ArrowReader {
125125
Ok(try_stream! {
126126
while let Some(Ok(task)) = tasks.next().await {
127127
let parquet_file = file_io
128-
.new_input(task.data().data_file().file_path())?;
128+
.new_input(task.data_file_path())?;
129129
let (parquet_metadata, parquet_reader) = try_join!(parquet_file.metadata(), parquet_file.reader())?;
130130
let arrow_file_reader = ArrowFileReader::new(parquet_metadata, parquet_reader);
131131

crates/iceberg/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pub mod avro;
4545
pub mod io;
4646
pub mod spec;
4747

48-
mod scan;
48+
pub mod scan;
4949

5050
#[allow(dead_code)]
5151
pub mod expr;

crates/iceberg/src/scan.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,16 @@ use crate::expr::visitors::manifest_evaluator::ManifestEvaluator;
2424
use crate::expr::{Bind, BoundPredicate, Predicate};
2525
use crate::io::FileIO;
2626
use crate::spec::{
27-
DataContentType, ManifestContentType, ManifestEntryRef, ManifestFile, Schema, SchemaRef,
28-
SnapshotRef, TableMetadataRef,
27+
DataContentType, ManifestContentType, ManifestFile, Schema, SchemaRef, SnapshotRef,
28+
TableMetadataRef,
2929
};
3030
use crate::table::Table;
3131
use crate::{Error, ErrorKind, Result};
3232
use arrow_array::RecordBatch;
3333
use async_stream::try_stream;
3434
use futures::stream::BoxStream;
3535
use futures::StreamExt;
36+
use serde::{Deserialize, Serialize};
3637
use std::collections::hash_map::Entry;
3738
use std::collections::HashMap;
3839
use std::sync::Arc;
@@ -55,7 +56,7 @@ pub struct TableScanBuilder<'a> {
5556
}
5657

5758
impl<'a> TableScanBuilder<'a> {
58-
pub fn new(table: &'a Table) -> Self {
59+
pub(crate) fn new(table: &'a Table) -> Self {
5960
Self {
6061
table,
6162
column_names: vec![],
@@ -265,7 +266,7 @@ impl TableScan {
265266
}
266267
DataContentType::Data => {
267268
let scan_task: Result<FileScanTask> = Ok(FileScanTask {
268-
data_manifest_entry: manifest_entry.clone(),
269+
data_file_path: manifest_entry.data_file().file_path().to_string(),
269270
start: 0,
270271
length: manifest_entry.file_size_in_bytes(),
271272
});
@@ -463,18 +464,19 @@ impl ManifestEvaluatorCache {
463464
}
464465

465466
/// A task to scan part of file.
466-
#[derive(Debug)]
467+
#[derive(Debug, Clone, Serialize, Deserialize)]
467468
pub struct FileScanTask {
468-
data_manifest_entry: ManifestEntryRef,
469+
data_file_path: String,
469470
#[allow(dead_code)]
470471
start: u64,
471472
#[allow(dead_code)]
472473
length: u64,
473474
}
474475

475476
impl FileScanTask {
476-
pub fn data(&self) -> ManifestEntryRef {
477-
self.data_manifest_entry.clone()
477+
/// Returns the data file path of this file scan task.
478+
pub fn data_file_path(&self) -> &str {
479+
&self.data_file_path
478480
}
479481
}
480482

@@ -794,17 +796,17 @@ mod tests {
794796

795797
assert_eq!(tasks.len(), 2);
796798

797-
tasks.sort_by_key(|t| t.data().data_file().file_path().to_string());
799+
tasks.sort_by_key(|t| t.data_file_path().to_string());
798800

799801
// Check first task is added data file
800802
assert_eq!(
801-
tasks[0].data().data_file().file_path(),
803+
tasks[0].data_file_path(),
802804
format!("{}/1.parquet", &fixture.table_location)
803805
);
804806

805807
// Check second task is existing data file
806808
assert_eq!(
807-
tasks[1].data().data_file().file_path(),
809+
tasks[1].data_file_path(),
808810
format!("{}/3.parquet", &fixture.table_location)
809811
);
810812
}

0 commit comments

Comments
 (0)