Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,11 @@ config_namespace! {
/// reduce the number of rows decoded. This optimization is sometimes called "late materialization".
pub pushdown_filters: bool, default = false

/// (reading) Number of row groups to prefetch while scanning parquet files.
/// Set to 0 to disable prefetching. A value of 1 prefetches the next row
/// group while the current one is being processed.
pub prefetch_row_groups: usize, default = 1

/// (reading) If true, filter expressions evaluated during the parquet decoding operation
/// will be reordered heuristically to minimize the cost of evaluation. If false,
/// the filters are applied in the same order as written in the query
Expand Down
3 changes: 3 additions & 0 deletions datafusion/common/src/file_options/parquet_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ impl ParquetOptions {
coerce_int96: _, // not used for writer props
skip_arrow_metadata: _,
max_predicate_cache_size: _,
prefetch_row_groups: _,
} = self;

let mut builder = WriterProperties::builder()
Expand Down Expand Up @@ -469,6 +470,7 @@ mod tests {
.maximum_parallel_row_group_writers,
maximum_buffered_record_batches_per_stream: defaults
.maximum_buffered_record_batches_per_stream,
prefetch_row_groups: defaults.prefetch_row_groups,
bloom_filter_on_read: defaults.bloom_filter_on_read,
schema_force_view_types: defaults.schema_force_view_types,
binary_as_string: defaults.binary_as_string,
Expand Down Expand Up @@ -583,6 +585,7 @@ mod tests {
.maximum_parallel_row_group_writers,
maximum_buffered_record_batches_per_stream: global_options_defaults
.maximum_buffered_record_batches_per_stream,
prefetch_row_groups: global_options_defaults.prefetch_row_groups,
bloom_filter_on_read: global_options_defaults.bloom_filter_on_read,
max_predicate_cache_size: global_options_defaults
.max_predicate_cache_size,
Expand Down
Loading
Loading