-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Add compression option to SpillManager #16268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f929847
8b190b6
319f413
732ad57
0f77d35
9ce876c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ use arrow::compute::SortOptions; | |
| use arrow::datatypes::{Int32Type, SchemaRef}; | ||
| use arrow_schema::{DataType, Field, Schema}; | ||
| use datafusion::assert_batches_eq; | ||
| use datafusion::config::SpillCompression; | ||
| use datafusion::datasource::memory::MemorySourceConfig; | ||
| use datafusion::datasource::source::DataSourceExec; | ||
| use datafusion::datasource::{MemTable, TableProvider}; | ||
|
|
@@ -545,10 +546,11 @@ async fn test_external_sort_zero_merge_reservation() { | |
| // Tests for disk limit (`max_temp_directory_size` in `DiskManager`) | ||
| // ------------------------------------------------------------------ | ||
|
|
||
| // Create a new `SessionContext` with speicified disk limit and memory pool limit | ||
| // Create a new `SessionContext` with speicified disk limit, memory pool limit, and spill compression codec | ||
| async fn setup_context( | ||
| disk_limit: u64, | ||
| memory_pool_limit: usize, | ||
| spill_compression: SpillCompression, | ||
| ) -> Result<SessionContext> { | ||
| let disk_manager = DiskManagerBuilder::default() | ||
| .with_mode(DiskManagerMode::OsTmpDirectory) | ||
|
|
@@ -570,6 +572,7 @@ async fn setup_context( | |
| let config = SessionConfig::new() | ||
| .with_sort_spill_reservation_bytes(64 * 1024) // 256KB | ||
| .with_sort_in_place_threshold_bytes(0) | ||
| .with_spill_compression(spill_compression) | ||
| .with_batch_size(64) // To reduce test memory usage | ||
| .with_target_partitions(1); | ||
|
|
||
|
|
@@ -580,7 +583,8 @@ async fn setup_context( | |
| /// (specified by `max_temp_directory_size` in `DiskManager`) | ||
| #[tokio::test] | ||
| async fn test_disk_spill_limit_reached() -> Result<()> { | ||
| let ctx = setup_context(1024 * 1024, 1024 * 1024).await?; // 1MB disk limit, 1MB memory limit | ||
| let spill_compression = SpillCompression::Uncompressed; | ||
| let ctx = setup_context(1024 * 1024, 1024 * 1024, spill_compression).await?; // 1MB disk limit, 1MB memory limit | ||
|
|
||
| let df = ctx | ||
| .sql("select * from generate_series(1, 1000000000000) as t1(v1) order by v1") | ||
|
|
@@ -602,7 +606,8 @@ async fn test_disk_spill_limit_reached() -> Result<()> { | |
| #[tokio::test] | ||
| async fn test_disk_spill_limit_not_reached() -> Result<()> { | ||
| let disk_spill_limit = 1024 * 1024; // 1MB | ||
| let ctx = setup_context(disk_spill_limit, 128 * 1024).await?; // 1MB disk limit, 128KB memory limit | ||
| let spill_compression = SpillCompression::Uncompressed; | ||
| let ctx = setup_context(disk_spill_limit, 128 * 1024, spill_compression).await?; // 1MB disk limit, 128KB memory limit | ||
|
|
||
| let df = ctx | ||
| .sql("select * from generate_series(1, 10000) as t1(v1) order by v1") | ||
|
|
@@ -630,6 +635,77 @@ async fn test_disk_spill_limit_not_reached() -> Result<()> { | |
| Ok(()) | ||
| } | ||
|
|
||
| /// External query should succeed using zstd as spill compression codec and | ||
| /// and all temporary spill files are properly cleaned up after execution. | ||
| /// Note: This test does not inspect file contents (e.g. magic number), | ||
| /// as spill files are automatically deleted on drop. | ||
| #[tokio::test] | ||
| async fn test_spill_file_compressed_with_zstd() -> Result<()> { | ||
| let disk_spill_limit = 1024 * 1024; // 1MB | ||
| let spill_compression = SpillCompression::Zstd; | ||
| let ctx = setup_context(disk_spill_limit, 128 * 1024, spill_compression).await?; // 1MB disk limit, 128KB memory limit, zstd | ||
|
|
||
| let df = ctx | ||
| .sql("select * from generate_series(1, 100000) as t1(v1) order by v1") | ||
| .await | ||
| .unwrap(); | ||
| let plan = df.create_physical_plan().await.unwrap(); | ||
|
|
||
| let task_ctx = ctx.task_ctx(); | ||
| let _ = collect_batches(Arc::clone(&plan), task_ctx) | ||
| .await | ||
| .expect("Query execution failed"); | ||
|
|
||
| let spill_count = plan.metrics().unwrap().spill_count().unwrap(); | ||
| let spilled_bytes = plan.metrics().unwrap().spilled_bytes().unwrap(); | ||
|
|
||
| println!("spill count {spill_count}"); | ||
| assert!(spill_count > 0); | ||
| assert!((spilled_bytes as u64) < disk_spill_limit); | ||
|
|
||
| // Verify that all temporary files have been properly cleaned up by checking | ||
| // that the total disk usage tracked by the disk manager is zero | ||
| let current_disk_usage = ctx.runtime_env().disk_manager.used_disk_space(); | ||
| assert_eq!(current_disk_usage, 0); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| /// External query should succeed using lz4_frame as spill compression codec and | ||
| /// and all temporary spill files are properly cleaned up after execution. | ||
| /// Note: This test does not inspect file contents (e.g. magic number), | ||
| /// as spill files are automatically deleted on drop. | ||
| #[tokio::test] | ||
| async fn test_spill_file_compressed_with_lz4_frame() -> Result<()> { | ||
|
Comment on lines
+674
to
+679
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added test here, but unfortunately it is not straightforward to check whether the file is actually compressed with desired codec in e2e test. Maybe we can compare |
||
| let disk_spill_limit = 1024 * 1024; // 1MB | ||
| let spill_compression = SpillCompression::Lz4Frame; | ||
| let ctx = setup_context(disk_spill_limit, 128 * 1024, spill_compression).await?; // 1MB disk limit, 128KB memory limit, lz4_frame | ||
|
|
||
| let df = ctx | ||
| .sql("select * from generate_series(1, 100000) as t1(v1) order by v1") | ||
| .await | ||
| .unwrap(); | ||
| let plan = df.create_physical_plan().await.unwrap(); | ||
|
|
||
| let task_ctx = ctx.task_ctx(); | ||
| let _ = collect_batches(Arc::clone(&plan), task_ctx) | ||
| .await | ||
| .expect("Query execution failed"); | ||
|
|
||
| let spill_count = plan.metrics().unwrap().spill_count().unwrap(); | ||
| let spilled_bytes = plan.metrics().unwrap().spilled_bytes().unwrap(); | ||
|
|
||
| println!("spill count {spill_count}"); | ||
| assert!(spill_count > 0); | ||
| assert!((spilled_bytes as u64) < disk_spill_limit); | ||
|
|
||
| // Verify that all temporary files have been properly cleaned up by checking | ||
| // that the total disk usage tracked by the disk manager is zero | ||
| let current_disk_usage = ctx.runtime_env().disk_manager.used_disk_space(); | ||
| assert_eq!(current_disk_usage, 0); | ||
|
|
||
| Ok(()) | ||
| } | ||
| /// Run the query with the specified memory limit, | ||
| /// and verifies the expected errors are returned | ||
| #[derive(Clone, Debug)] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,6 +61,7 @@ use arrow::compute::{ | |
| use arrow::datatypes::{DataType, SchemaRef, TimeUnit}; | ||
| use arrow::error::ArrowError; | ||
| use arrow::ipc::reader::StreamReader; | ||
| use datafusion_common::config::SpillCompression; | ||
| use datafusion_common::{ | ||
| exec_err, internal_err, not_impl_err, plan_err, DataFusionError, HashSet, JoinSide, | ||
| JoinType, NullEquality, Result, | ||
|
|
@@ -500,6 +501,7 @@ impl ExecutionPlan for SortMergeJoinExec { | |
|
|
||
| // create join stream | ||
| Ok(Box::pin(SortMergeJoinStream::try_new( | ||
| context.session_config().spill_compression(), | ||
| Arc::clone(&self.schema), | ||
| self.sort_options.clone(), | ||
| self.null_equality, | ||
|
|
@@ -1324,6 +1326,8 @@ impl Stream for SortMergeJoinStream { | |
| impl SortMergeJoinStream { | ||
| #[allow(clippy::too_many_arguments)] | ||
| pub fn try_new( | ||
| // Configured via `datafusion.execution.spill_compression`. | ||
| spill_compression: SpillCompression, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: For readability, I suggest adding a comment indicating that this argument is passed through from the configuration xxx.
Comment on lines
+1329
to
+1330
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made the changes. Thank you. |
||
| schema: SchemaRef, | ||
| sort_options: Vec<SortOptions>, | ||
| null_equality: NullEquality, | ||
|
|
@@ -1344,7 +1348,8 @@ impl SortMergeJoinStream { | |
| Arc::clone(&runtime_env), | ||
| join_metrics.spill_metrics.clone(), | ||
| Arc::clone(&buffered_schema), | ||
| ); | ||
| ) | ||
| .with_compression_type(spill_compression); | ||
| Ok(Self { | ||
| state: SortMergeJoinState::Init, | ||
| sort_options, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,6 +48,7 @@ use crate::{ | |
| use arrow::array::{Array, RecordBatch, RecordBatchOptions, StringViewArray}; | ||
| use arrow::compute::{concat_batches, lexsort_to_indices, take_arrays}; | ||
| use arrow::datatypes::SchemaRef; | ||
| use datafusion_common::config::SpillCompression; | ||
| use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError, Result}; | ||
| use datafusion_execution::disk_manager::RefCountedTempFile; | ||
| use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; | ||
|
|
@@ -258,6 +259,8 @@ impl ExternalSorter { | |
| batch_size: usize, | ||
| sort_spill_reservation_bytes: usize, | ||
| sort_in_place_threshold_bytes: usize, | ||
| // Configured via `datafusion.execution.spill_compression`. | ||
| spill_compression: SpillCompression, | ||
ding-young marked this conversation as resolved.
Show resolved
Hide resolved
Comment on lines
+262
to
+263
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, too |
||
| metrics: &ExecutionPlanMetricsSet, | ||
| runtime: Arc<RuntimeEnv>, | ||
| ) -> Result<Self> { | ||
|
|
@@ -274,7 +277,8 @@ impl ExternalSorter { | |
| Arc::clone(&runtime), | ||
| metrics.spill_metrics.clone(), | ||
| Arc::clone(&schema), | ||
| ); | ||
| ) | ||
| .with_compression_type(spill_compression); | ||
|
|
||
| Ok(Self { | ||
| schema, | ||
|
|
@@ -1183,6 +1187,7 @@ impl ExecutionPlan for SortExec { | |
| context.session_config().batch_size(), | ||
| execution_options.sort_spill_reservation_bytes, | ||
| execution_options.sort_in_place_threshold_bytes, | ||
| context.session_config().spill_compression(), | ||
| &self.metrics_set, | ||
| context.runtime_env(), | ||
| )?; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll change the default codec after experiments.