From 4403eb392b1638685986e536855ee041688cb0e3 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Sun, 27 Jul 2025 21:29:54 -0700 Subject: [PATCH 1/3] Fix current_written_size for ParquetWriter --- crates/iceberg/src/writer/file_writer/parquet_writer.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index 75b3d9244a..a6c4297a79 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -611,7 +611,14 @@ impl CurrentFileStatus for ParquetWriter { } fn current_written_size(&self) -> usize { - self.written_size.load(std::sync::atomic::Ordering::Relaxed) as usize + if let Some(inner) = self.inner_writer.as_ref() { + // inner/AsyncArrowWriter contains sync and async writers + // written size = bytes flushed to inner's async writer + bytes buffered in the inner's sync writer + inner.bytes_written() + inner.in_progress_size() + } else { + // inner writer is not initialized yet + 0 + } } } From fe4b9be36c34cc983bdfaacc435a102743190d32 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Sun, 27 Jul 2025 21:40:05 -0700 Subject: [PATCH 2/3] minor --- crates/iceberg/src/writer/file_writer/parquet_writer.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index a6c4297a79..5b39ee06cf 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -229,6 +229,8 @@ pub struct ParquetWriter { out_file: OutputFile, inner_writer: Option>>, writer_properties: WriterProperties, + // written_size is only accurate after closing the inner writer, + // because the inner writer flushes data asynchronously. written_size: Arc, current_row_num: usize, nan_value_count_visitor: NanValueCountVisitor, From f394c2269c3fac72b9e3363e63f528fdd25de1c5 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Sun, 27 Jul 2025 21:57:56 -0700 Subject: [PATCH 3/3] Trigger Build