From 54024af65fa3f611bcd12f3967f88a7591a26b3f Mon Sep 17 00:00:00 2001 From: Satyam Singh Date: Fri, 15 Dec 2023 16:47:45 +0530 Subject: [PATCH] Set file_size attribute directly from file metadata It is being used for calculating offset in query. Wrong file size can lead to query issues. --- server/src/catalog/manifest.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/catalog/manifest.rs b/server/src/catalog/manifest.rs index f0696fc89..d29adea4e 100644 --- a/server/src/catalog/manifest.rs +++ b/server/src/catalog/manifest.rs @@ -97,6 +97,8 @@ pub fn create_from_parquet_file( }; let file = std::fs::File::open(fs_file_path)?; + manifest_file.file_size = file.metadata()?.len(); + let file = parquet::file::serialized_reader::SerializedFileReader::new(file)?; let file_meta = file.metadata().file_metadata(); let row_groups = file.metadata().row_groups(); @@ -105,9 +107,6 @@ pub fn create_from_parquet_file( manifest_file.ingestion_size = row_groups .iter() .fold(0, |acc, x| acc + x.total_byte_size() as u64); - manifest_file.file_size = row_groups - .iter() - .fold(0, |acc, x| acc + x.compressed_size() as u64); let columns = column_statistics(row_groups); manifest_file.columns = columns.into_values().collect();