apache · alamb · Sep 18, 2025 · Sep 9, 2025 · Aug 21, 2025 · Sep 9, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -90,19 +90,19 @@ ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
 apache-avro = { version = "0.20", default-features = false }
-arrow = { version = "56.0.0", features = [
+arrow = { version = "56.1.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "56.0.0", default-features = false }
-arrow-flight = { version = "56.0.0", features = [
+arrow-buffer = { version = "56.1.0", default-features = false }
+arrow-flight = { version = "56.1.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "56.0.0", default-features = false, features = [
+arrow-ipc = { version = "56.1.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "56.0.0", default-features = false }
-arrow-schema = { version = "56.0.0", default-features = false }
+arrow-ord = { version = "56.1.0", default-features = false }
+arrow-schema = { version = "56.1.0", default-features = false }
 async-trait = "0.1.89"
 bigdecimal = "0.4.8"
 bytes = "1.10"
@@ -157,7 +157,7 @@ itertools = "0.14"
 log = "^0.4"
 object_store = { version = "0.12.3", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "56.0.0", default-features = false, features = [
+parquet = { version = "56.1.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",

diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
@@ -571,15 +571,15 @@ mod tests {
         let df = ctx.sql(sql).await?;
         let rbs = df.collect().await?;
 
-        assert_snapshot!(batches_to_string(&rbs),@r#"
+        assert_snapshot!(batches_to_string(&rbs),@r"
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | alltypes_plain.parquet            | 1851            | 10181               | 2    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881634              | 2    | page_index=true  |
+        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
         | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 2    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        "#);
+        ");
 
         // increase the number of hits
         ctx.sql("select * from alltypes_plain")
@@ -602,15 +602,15 @@ mod tests {
         let df = ctx.sql(sql).await?;
         let rbs = df.collect().await?;
 
-        assert_snapshot!(batches_to_string(&rbs),@r#"
+        assert_snapshot!(batches_to_string(&rbs),@r"
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | alltypes_plain.parquet            | 1851            | 10181               | 5    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881634              | 2    | page_index=true  |
+        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
         | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 3    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        "#);
+        ");
 
         Ok(())
     }

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -566,6 +566,14 @@ config_namespace! {
         /// (reading) Use any available bloom filters when reading parquet files
         pub bloom_filter_on_read: bool, default = true
 
+        /// (reading) The maximum predicate cache size, in bytes. When
+        /// `pushdown_filters` is enabled, sets the maximum memory used to cache
+        /// the results of predicate evaluation between filter evaluation and
+        /// output generation. Decreasing this value will reduce memory usage,
+        /// but may increase IO and CPU usage. None means use the default
+        /// parquet reader setting. 0 means no caching.
+        pub max_predicate_cache_size: Option<usize>, default = None
+
         // The following options affect writing to parquet files
         // and map to parquet::file::properties::WriterProperties
 

diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
@@ -208,6 +208,7 @@ impl ParquetOptions {
             binary_as_string: _, // not used for writer props
             coerce_int96: _,     // not used for writer props
             skip_arrow_metadata: _,
+            max_predicate_cache_size: _,
         } = self;
 
         let mut builder = WriterProperties::builder()
@@ -400,6 +401,10 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
 #[cfg(feature = "parquet")]
 #[cfg(test)]
 mod tests {
+    use super::*;
+    use crate::config::{ParquetColumnOptions, ParquetEncryptionOptions, ParquetOptions};
+    #[cfg(feature = "parquet_encryption")]
+    use crate::encryption::map_encryption_to_config_encryption;
     use parquet::{
         basic::Compression,
         file::properties::{
@@ -409,11 +414,6 @@ mod tests {
     };
     use std::collections::HashMap;
 
-    use super::*;
-    use crate::config::{ParquetColumnOptions, ParquetEncryptionOptions, ParquetOptions};
-    #[cfg(feature = "parquet_encryption")]
-    use crate::encryption::map_encryption_to_config_encryption;
-
     const COL_NAME: &str = "configured";
 
     /// Take the column defaults provided in [`ParquetOptions`], and generate a non-default col config.
@@ -475,6 +475,7 @@ mod tests {
             binary_as_string: defaults.binary_as_string,
             skip_arrow_metadata: defaults.skip_arrow_metadata,
             coerce_int96: None,
+            max_predicate_cache_size: defaults.max_predicate_cache_size,
         }
     }
 
@@ -581,6 +582,8 @@ mod tests {
                 maximum_buffered_record_batches_per_stream: global_options_defaults
                     .maximum_buffered_record_batches_per_stream,
                 bloom_filter_on_read: global_options_defaults.bloom_filter_on_read,
+                max_predicate_cache_size: global_options_defaults
+                    .max_predicate_cache_size,
                 schema_force_view_types: global_options_defaults.schema_force_view_types,
                 binary_as_string: global_options_defaults.binary_as_string,
                 skip_arrow_metadata: global_options_defaults.skip_arrow_metadata,