Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 35 additions & 34 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,19 @@ ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
apache-avro = { version = "0.20", default-features = false }
arrow = { version = "56.0.0", features = [
arrow = { version = "56.1.0", features = [
"prettyprint",
"chrono-tz",
] }
arrow-buffer = { version = "56.0.0", default-features = false }
arrow-flight = { version = "56.0.0", features = [
arrow-buffer = { version = "56.1.0", default-features = false }
arrow-flight = { version = "56.1.0", features = [
"flight-sql-experimental",
] }
arrow-ipc = { version = "56.0.0", default-features = false, features = [
arrow-ipc = { version = "56.1.0", default-features = false, features = [
"lz4",
] }
arrow-ord = { version = "56.0.0", default-features = false }
arrow-schema = { version = "56.0.0", default-features = false }
arrow-ord = { version = "56.1.0", default-features = false }
arrow-schema = { version = "56.1.0", default-features = false }
async-trait = "0.1.89"
bigdecimal = "0.4.8"
bytes = "1.10"
Expand Down Expand Up @@ -157,7 +157,7 @@ itertools = "0.14"
log = "^0.4"
object_store = { version = "0.12.3", default-features = false }
parking_lot = "0.12"
parquet = { version = "56.0.0", default-features = false, features = [
parquet = { version = "56.1.0", default-features = false, features = [
"arrow",
"async",
"object_store",
Expand Down
4 changes: 2 additions & 2 deletions datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ mod tests {
| filename | file_size_bytes | metadata_size_bytes | hits | extra |
+-----------------------------------+-----------------+---------------------+------+------------------+
| alltypes_plain.parquet | 1851 | 10181 | 2 | page_index=false |
| alltypes_tiny_pages.parquet | 454233 | 881634 | 2 | page_index=true |
| alltypes_tiny_pages.parquet | 454233 | 881418 | 2 | page_index=true |
| lz4_raw_compressed_larger.parquet | 380836 | 2939 | 2 | page_index=false |
+-----------------------------------+-----------------+---------------------+------+------------------+
"#);
Expand Down Expand Up @@ -607,7 +607,7 @@ mod tests {
| filename | file_size_bytes | metadata_size_bytes | hits | extra |
+-----------------------------------+-----------------+---------------------+------+------------------+
| alltypes_plain.parquet | 1851 | 10181 | 5 | page_index=false |
| alltypes_tiny_pages.parquet | 454233 | 881634 | 2 | page_index=true |
| alltypes_tiny_pages.parquet | 454233 | 881418 | 2 | page_index=true |
| lz4_raw_compressed_larger.parquet | 380836 | 2939 | 3 | page_index=false |
+-----------------------------------+-----------------+---------------------+------+------------------+
"#);
Expand Down
6 changes: 4 additions & 2 deletions datafusion/datasource-parquet/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ use object_store::path::Path;
use object_store::{ObjectMeta, ObjectStore};
use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
use parquet::arrow::parquet_to_arrow_schema;
use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData};
use parquet::file::metadata::{
PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData,
};
use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
Expand Down Expand Up @@ -148,7 +150,7 @@ impl<'a> DFParquetMetadata<'a> {

if cache_metadata && file_metadata_cache.is_some() {
// Need to retrieve the entire metadata for the caching to be effective.
reader = reader.with_page_indexes(true);
reader = reader.with_page_index_policy(PageIndexPolicy::Required);
}

let metadata = Arc::new(
Expand Down
6 changes: 3 additions & 3 deletions datafusion/datasource-parquet/src/opener.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ use log::debug;
use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
use parquet::arrow::async_reader::AsyncFileReader;
use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
use parquet::file::metadata::ParquetMetaDataReader;
use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader};

/// Implements [`FileOpener`] for a parquet file
pub(super) struct ParquetOpener {
Expand Down Expand Up @@ -652,8 +652,8 @@ async fn load_page_index<T: AsyncFileReader>(
if missing_column_index || missing_offset_index {
let m = Arc::try_unwrap(Arc::clone(parquet_metadata))
.unwrap_or_else(|e| e.as_ref().clone());
let mut reader =
ParquetMetaDataReader::new_with_metadata(m).with_page_indexes(true);
let mut reader = ParquetMetaDataReader::new_with_metadata(m)
.with_page_index_policy(PageIndexPolicy::Required);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be the exact same behavior, there's even a From<bool> for PageIndexPolicy implementation that does the same thing.

reader.load_page_index(input).await?;
let new_parquet_metadata = reader.finish()?;
let new_arrow_reader =
Expand Down
2 changes: 1 addition & 1 deletion datafusion/physical-plan/src/spill/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ mod tests {
.unwrap();

let size = get_record_batch_memory_size(&batch);
assert_eq!(size, 8320);
assert_eq!(size, 8208);
}

// ==== Spill manager tests ====
Expand Down
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/explain_tree.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,7 @@ physical_plan
11)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
12)│ DataSourceExec ││ DataSourceExec │
13)│ -------------------- ││ -------------------- │
14)│ bytes: 6040 ││ bytes: 6040
14)│ bytes: 5932 ││ bytes: 5932
15)│ format: memory ││ format: memory │
16)│ rows: 1 ││ rows: 1 │
17)└───────────────────────────┘└───────────────────────────┘
Expand Down Expand Up @@ -1798,7 +1798,7 @@ physical_plan
11)┌─────────────┴─────────────┐
12)│ DataSourceExec │
13)│ -------------------- │
14)│ bytes: 2672
14)│ bytes: 2576
15)│ format: memory │
16)│ rows: 1 │
17)└───────────────────────────┘
Expand All @@ -1821,7 +1821,7 @@ physical_plan
11)┌─────────────┴─────────────┐
12)│ DataSourceExec │
13)│ -------------------- │
14)│ bytes: 2672
14)│ bytes: 2576
15)│ format: memory │
16)│ rows: 1 │
17)└───────────────────────────┘
Expand All @@ -1844,7 +1844,7 @@ physical_plan
11)┌─────────────┴─────────────┐
12)│ DataSourceExec │
13)│ -------------------- │
14)│ bytes: 2672
14)│ bytes: 2576
15)│ format: memory │
16)│ rows: 1 │
17)└───────────────────────────┘
Expand Down
Loading