From 0821202018cf3fa0cd0b1809548eb2a4d387783a Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Thu, 29 Jul 2021 19:26:55 +0800 Subject: [PATCH 01/16] Extend source to enable read from remote storage --- .../core/src/serde/logical_plan/from_proto.rs | 4 +- .../src/serde/physical_plan/from_proto.rs | 15 +- .../core/src/serde/physical_plan/to_proto.rs | 2 +- ballista/rust/core/src/utils.rs | 5 + ballista/rust/scheduler/src/lib.rs | 28 +- benchmarks/src/bin/tpch.rs | 5 +- datafusion-examples/examples/flight_server.rs | 6 +- datafusion/src/datasource/csv.rs | 7 +- datafusion/src/datasource/json.rs | 6 +- datafusion/src/datasource/local.rs | 126 +++++ datafusion/src/datasource/mod.rs | 253 +++++++++ datafusion/src/datasource/object_store.rs | 108 ++++ datafusion/src/datasource/parquet.rs | 338 +++++++++++- datafusion/src/execution/context.rs | 40 +- datafusion/src/logical_plan/builder.rs | 9 +- .../src/physical_optimizer/repartition.rs | 14 +- datafusion/src/physical_plan/common.rs | 38 -- datafusion/src/physical_plan/csv.rs | 6 +- datafusion/src/physical_plan/json.rs | 6 +- datafusion/src/physical_plan/parquet.rs | 492 ++++-------------- 20 files changed, 1001 insertions(+), 507 deletions(-) create mode 100644 datafusion/src/datasource/local.rs create mode 100644 datafusion/src/datasource/object_store.rs diff --git a/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/ballista/rust/core/src/serde/logical_plan/from_proto.rs index 31b8b6d3bcbc..24faddd9f34d 100644 --- a/ballista/rust/core/src/serde/logical_plan/from_proto.rs +++ b/ballista/rust/core/src/serde/logical_plan/from_proto.rs @@ -159,7 +159,7 @@ impl TryInto for &protobuf::LogicalPlanNode { LogicalPlanBuilder::scan_parquet_with_name( &scan.path, projection, - 24, + create_datafusion_context_concurrency(24), &scan.table_name, )? //TODO concurrency .build() @@ -1100,6 +1100,8 @@ impl TryInto for &protobuf::Field { } } +use crate::utils::create_datafusion_context_concurrency; +use datafusion::physical_plan::datetime_expressions::to_timestamp; use datafusion::physical_plan::{aggregates, windows}; use datafusion::prelude::{ array, date_part, date_trunc, length, lower, ltrim, md5, rtrim, sha224, sha256, diff --git a/ballista/rust/core/src/serde/physical_plan/from_proto.rs b/ballista/rust/core/src/serde/physical_plan/from_proto.rs index 678bcde8fa73..1441f87bc0aa 100644 --- a/ballista/rust/core/src/serde/physical_plan/from_proto.rs +++ b/ballista/rust/core/src/serde/physical_plan/from_proto.rs @@ -29,11 +29,13 @@ use crate::serde::protobuf::repartition_exec_node::PartitionMethod; use crate::serde::protobuf::ShuffleReaderPartition; use crate::serde::scheduler::PartitionLocation; use crate::serde::{from_proto_binary_op, proto_error, protobuf}; +use crate::utils::create_datafusion_context_concurrency; use crate::{convert_box_required, convert_required, into_required}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::catalog::catalog::{ CatalogList, CatalogProvider, MemoryCatalogList, MemoryCatalogProvider, }; +use datafusion::datasource::object_store::ObjectStoreRegistry; use datafusion::execution::context::{ ExecutionConfig, ExecutionContextState, ExecutionProps, }; @@ -129,14 +131,13 @@ impl TryInto> for &protobuf::PhysicalPlanNode { } PhysicalPlanType::ParquetScan(scan) => { let projection = scan.projection.iter().map(|i| *i as usize).collect(); - let filenames: Vec<&str> = - scan.filename.iter().map(|s| s.as_str()).collect(); - Ok(Arc::new(ParquetExec::try_from_files( - &filenames, + let path: &str = scan.filename[0].as_str(); + Ok(Arc::new(ParquetExec::try_from_path( + path, Some(projection), None, scan.batch_size as usize, - scan.num_partitions as usize, + create_datafusion_context_concurrency(scan.num_partitions as usize), None, )?)) } @@ -614,6 +615,9 @@ impl TryFrom<&protobuf::PhysicalExprNode> for Arc { let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc; + + let object_store_registry = Arc::new(ObjectStoreRegistry::new()); + let ctx_state = ExecutionContextState { catalog_list, scalar_functions: Default::default(), @@ -621,6 +625,7 @@ impl TryFrom<&protobuf::PhysicalExprNode> for Arc { aggregate_functions: Default::default(), config: ExecutionConfig::new(), execution_props: ExecutionProps::new(), + object_store_registry, }; let fun_expr = functions::create_physical_fun( diff --git a/ballista/rust/core/src/serde/physical_plan/to_proto.rs b/ballista/rust/core/src/serde/physical_plan/to_proto.rs index 48b21345525b..7b310cd076fa 100644 --- a/ballista/rust/core/src/serde/physical_plan/to_proto.rs +++ b/ballista/rust/core/src/serde/physical_plan/to_proto.rs @@ -259,7 +259,7 @@ impl TryInto for Arc { let filenames = exec .partitions() .iter() - .flat_map(|part| part.filenames().to_owned()) + .flat_map(|part| part.filenames()) .collect(); Ok(protobuf::PhysicalPlanNode { physical_plan_type: Some(PhysicalPlanType::ParquetScan( diff --git a/ballista/rust/core/src/utils.rs b/ballista/rust/core/src/utils.rs index 4187faa6645a..e960b77575a9 100644 --- a/ballista/rust/core/src/utils.rs +++ b/ballista/rust/core/src/utils.rs @@ -252,6 +252,11 @@ pub fn create_datafusion_context( ExecutionContext::with_config(config) } +/// Create a DataFusion context that is compatible with Ballista in concurrency +pub fn create_datafusion_context_concurrency(concurrency: usize) -> ExecutionContext { + ExecutionContext::with_concurrency(concurrency) +} + pub struct BallistaQueryPlanner { scheduler_url: String, config: BallistaConfig, diff --git a/ballista/rust/scheduler/src/lib.rs b/ballista/rust/scheduler/src/lib.rs index 676975fcaec9..2037a3530aba 100644 --- a/ballista/rust/scheduler/src/lib.rs +++ b/ballista/rust/scheduler/src/lib.rs @@ -85,7 +85,8 @@ use self::state::{ConfigBackendClient, SchedulerState}; use ballista_core::config::BallistaConfig; use ballista_core::execution_plans::ShuffleWriterExec; use ballista_core::serde::scheduler::to_proto::hash_partitioning_to_proto; -use datafusion::physical_plan::parquet::ParquetExec; +use ballista_core::utils::create_datafusion_context_concurrency; +use datafusion::datasource::parquet::ParquetRootDesc; use datafusion::prelude::{ExecutionConfig, ExecutionContext}; use std::time::{Instant, SystemTime, UNIX_EPOCH}; @@ -285,24 +286,19 @@ impl SchedulerGrpc for SchedulerServer { match file_type { FileType::Parquet => { - let parquet_exec = - ParquetExec::try_from_path(&path, None, None, 1024, 1, None) - .map_err(|e| { - let msg = format!("Error opening parquet files: {}", e); - error!("{}", msg); - tonic::Status::internal(msg) - })?; + let ctx = create_datafusion_context_concurrency(1); + let parquet_desc = ParquetRootDesc::new(&path, ctx).map_err(|e| { + let msg = format!("Error opening parquet files: {}", e); + error!("{}", msg); + tonic::Status::internal(msg) + })?; //TODO include statistics and any other info needed to reconstruct ParquetExec Ok(Response::new(GetFileMetadataResult { - schema: Some(parquet_exec.schema().as_ref().into()), - partitions: parquet_exec - .partitions() - .iter() - .map(|part| FilePartitionMetadata { - filename: part.filenames().to_vec(), - }) - .collect(), + schema: Some(parquet_desc.schema().as_ref().into()), + partitions: vec![FilePartitionMetadata { + filename: vec![path], + }], })) } //TODO implement for CSV diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index 10b5c2db795f..c45341bad2de 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -475,7 +475,10 @@ fn get_table( } "parquet" => { let path = format!("{}/{}", path, table); - Ok(Arc::new(ParquetTable::try_new(&path, max_concurrency)?)) + Ok(Arc::new(ParquetTable::try_new( + &path, + ExecutionContext::with_concurrency(max_concurrency), + )?)) } other => { unimplemented!("Invalid file format '{}'", other); diff --git a/datafusion-examples/examples/flight_server.rs b/datafusion-examples/examples/flight_server.rs index 138434ea2482..aab647b86676 100644 --- a/datafusion-examples/examples/flight_server.rs +++ b/datafusion-examples/examples/flight_server.rs @@ -65,7 +65,11 @@ impl FlightService for FlightServiceImpl { ) -> Result, Status> { let request = request.into_inner(); - let table = ParquetTable::try_new(&request.path[0], num_cpus::get()).unwrap(); + let table = ParquetTable::try_new( + &request.path[0], + ExecutionContext::with_concurrency(num_cpus::get()), + ) + .unwrap(); let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default(); let schema_result = SchemaAsIpc::new(table.schema().as_ref(), &options).into(); diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs index 987c4fdb079d..d4ca073af2dd 100644 --- a/datafusion/src/datasource/csv.rs +++ b/datafusion/src/datasource/csv.rs @@ -40,12 +40,14 @@ use std::string::String; use std::sync::{Arc, Mutex}; use crate::datasource::datasource::Statistics; +use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::ObjectStore; use crate::datasource::{Source, TableProvider}; use crate::error::{DataFusionError, Result}; use crate::logical_plan::Expr; use crate::physical_plan::csv::CsvExec; pub use crate::physical_plan::csv::CsvReadOptions; -use crate::physical_plan::{common, ExecutionPlan}; +use crate::physical_plan::ExecutionPlan; /// Represents a CSV file with a provided schema pub struct CsvFile { @@ -64,7 +66,8 @@ impl CsvFile { let schema = Arc::new(match options.schema { Some(s) => s.clone(), None => { - let filenames = common::build_file_list(&path, options.file_extension)?; + let filenames = LocalFileSystem + .list_all_files(path.as_str(), options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/json.rs b/datafusion/src/datasource/json.rs index 90fedfd6f528..5bd8a5f7121f 100644 --- a/datafusion/src/datasource/json.rs +++ b/datafusion/src/datasource/json.rs @@ -30,7 +30,6 @@ use crate::{ datasource::{Source, TableProvider}, error::{DataFusionError, Result}, physical_plan::{ - common, json::{NdJsonExec, NdJsonReadOptions}, ExecutionPlan, }, @@ -38,6 +37,8 @@ use crate::{ use arrow::{datatypes::SchemaRef, json::reader::infer_json_schema_from_seekable}; use super::datasource::Statistics; +use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::ObjectStore; trait SeekRead: Read + Seek {} @@ -57,7 +58,8 @@ impl NdJsonFile { let schema = if let Some(schema) = options.schema { schema } else { - let filenames = common::build_file_list(path, options.file_extension)?; + let filenames = + LocalFileSystem.list_all_files(path, options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/local.rs b/datafusion/src/datasource/local.rs new file mode 100644 index 000000000000..4890e9d229f9 --- /dev/null +++ b/datafusion/src/datasource/local.rs @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Object store that represents the Local File System. + +use crate::datasource::object_store::{ObjectReader, ObjectStore}; +use crate::error::DataFusionError; +use crate::error::Result; +use crate::parquet::file::reader::{ChunkReader, Length}; +use std::any::Any; +use std::fs; +use std::fs::{metadata, File}; +use std::io::Read; +use std::sync::Arc; + +#[derive(Debug)] +/// Local File System as Object Store. +pub struct LocalFileSystem; + +impl ObjectStore for LocalFileSystem { + fn as_any(&self) -> &dyn Any { + self + } + + fn list_all_files(&self, path: &str, ext: &str) -> Result> { + list_all(path, ext) + } + + fn get_reader(&self, file_path: &str) -> Result> { + let file = File::open(file_path)?; + let reader = LocalFSObjectReader::new(file)?; + Ok(Arc::new(reader)) + } +} + +struct LocalFSObjectReader { + file: File, +} + +impl LocalFSObjectReader { + fn new(file: File) -> Result { + Ok(Self { file }) + } +} + +impl ObjectReader for LocalFSObjectReader { + fn get_reader(&self, start: u64, length: usize) -> Box { + Box::new(FileSegmentReader::new( + self.file.try_clone().unwrap(), + start, + length, + )) + } + + fn length(&self) -> u64 { + self.file.len() + } +} + +struct FileSegmentReader { + file: File, + start: u64, + length: usize, +} + +impl FileSegmentReader { + fn new(file: File, start: u64, length: usize) -> Self { + Self { + file, + start, + length, + } + } +} + +impl Read for FileSegmentReader { + fn read(&mut self, buf: &mut [u8]) -> std::result::Result { + let mut file_source = self.file.get_read(self.start, self.length)?; + file_source.read(buf) + } +} + +fn list_all(root_path: &str, ext: &str) -> Result> { + let mut filenames: Vec = Vec::new(); + list_all_files(root_path, &mut filenames, ext)?; + Ok(filenames) +} + +/// Recursively build a list of files in a directory with a given extension with an accumulator list +fn list_all_files(dir: &str, filenames: &mut Vec, ext: &str) -> Result<()> { + let metadata = metadata(dir)?; + if metadata.is_file() { + if dir.ends_with(ext) { + filenames.push(dir.to_string()); + } + } else { + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if let Some(path_name) = path.to_str() { + if path.is_dir() { + list_all_files(path_name, filenames, ext)?; + } else if path_name.ends_with(ext) { + filenames.push(path_name.to_string()); + } + } else { + return Err(DataFusionError::Plan("Invalid path".to_string())); + } + } + } + Ok(()) +} diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index 9699a997caa1..64e84c8e5611 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -21,13 +21,24 @@ pub mod csv; pub mod datasource; pub mod empty; pub mod json; +pub mod local; pub mod memory; +pub mod object_store; pub mod parquet; pub use self::csv::{CsvFile, CsvReadOptions}; pub use self::datasource::{TableProvider, TableType}; pub use self::memory::MemTable; +use crate::arrow::datatypes::{Schema, SchemaRef}; +use crate::datasource::datasource::{ColumnStatistics, Statistics}; +use crate::datasource::object_store::ObjectStore; +use crate::error::{DataFusionError, Result}; +use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; +use crate::physical_plan::Accumulator; +use crate::scalar::ScalarValue; +use std::sync::Arc; + /// Source for table input data pub(crate) enum Source> { /// Path to a single file or a directory containing one of more files @@ -36,3 +47,245 @@ pub(crate) enum Source> { /// Read data from a reader Reader(std::sync::Mutex>), } + +#[derive(Debug, Clone)] +/// A single file that should be read, along with its schema, statistics +/// and partition column values that need to be appended to each row. +pub struct PartitionedFile { + /// Path for the file (e.g. URL, filesystem path, etc) + pub file_path: String, + /// Schema of the file + pub schema: Schema, + /// Statistics of the file + pub statistics: Statistics, + /// Values of partition columns to be appended to each row + pub partition_value: Option>, + /// Schema of partition columns + pub partition_schema: Option, + // We may include row group range here for a more fine-grained parallel execution +} + +impl From for PartitionedFile { + fn from(file_path: String) -> Self { + Self { + file_path, + schema: Schema::empty(), + statistics: Default::default(), + partition_value: None, + partition_schema: None, + } + } +} + +impl std::fmt::Display for PartitionedFile { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "PartitionedFile(file_path: {}, schema: {}, statistics: {:?},\ + partition_value: {:?}, partition_schema: {:?})", + self.file_path, + self.schema, + self.statistics, + self.partition_value, + self.partition_schema + ) + } +} + +#[derive(Debug, Clone)] +/// A collection of files that should be read in a single task +pub struct FilePartition { + /// The index of the partition among all partitions + pub index: usize, + /// The contained files of the partition + pub files: Vec, +} + +impl std::fmt::Display for FilePartition { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let files: Vec = self.files.iter().map(|f| format!("{}", f)).collect(); + write!( + f, + "FilePartition[{}], files: {}", + self.index, + files.join(", ") + ) + } +} + +#[derive(Debug, Clone)] +/// All source files with same schema exists in a path +pub struct SourceRootDescriptor { + /// All source files in the path + pub partition_files: Vec, + /// The schema of the files + pub schema: SchemaRef, +} + +/// Builder for ['SourceRootDescriptor'] inside given path +pub trait SourceRootDescBuilder { + /// Construct a ['SourceRootDescriptor'] from the provided path + fn get_source_desc( + path: &str, + object_store: Arc, + ext: &str, + ) -> Result { + let filenames = object_store.list_all_files(path, ext)?; + if filenames.is_empty() { + return Err(DataFusionError::Plan(format!( + "No file (with .{} extension) found at path {}", + ext, path + ))); + } + + // build a list of Parquet partitions with statistics and gather all unique schemas + // used in this data set + let mut schemas: Vec = vec![]; + + let partitioned_files = filenames + .iter() + .map(|file_path| { + let pf = Self::get_file_meta(file_path, object_store.clone())?; + let schema = pf.schema.clone(); + if schemas.is_empty() { + schemas.push(schema); + } else if schema != schemas[0] { + // we currently get the schema information from the first file rather than do + // schema merging and this is a limitation. + // See https://issues.apache.org/jira/browse/ARROW-11017 + return Err(DataFusionError::Plan(format!( + "The file {} have different schema from the first file and DataFusion does \ + not yet support schema merging", + file_path + ))); + } + Ok(pf) + }).collect::>>(); + + Ok(SourceRootDescriptor { + partition_files: partitioned_files?, + schema: Arc::new(schemas.pop().unwrap()), + }) + } + + /// Get all metadata for a source file, including schema, statistics, partitions, etc. + fn get_file_meta( + file_path: &str, + object_store: Arc, + ) -> Result; +} + +/// Get all files as well as the summary statistics when a limit is provided +pub fn get_statistics_with_limit( + source_desc: &SourceRootDescriptor, + limit: Option, +) -> (Vec, Statistics) { + let mut all_files = source_desc.partition_files.clone(); + let schema = source_desc.schema.clone(); + + let mut total_byte_size = 0; + let mut null_counts = vec![0; schema.fields().len()]; + let mut has_statistics = false; + let (mut max_values, mut min_values) = create_max_min_accs(&schema); + + let mut num_rows = 0; + let mut num_files = 0; + for file in &all_files { + num_files += 1; + let file_stats = &file.statistics; + num_rows += file_stats.num_rows.unwrap_or(0); + total_byte_size += file_stats.total_byte_size.unwrap_or(0); + if let Some(vec) = &file_stats.column_statistics { + has_statistics = true; + for (i, cs) in vec.iter().enumerate() { + null_counts[i] += cs.null_count.unwrap_or(0); + + if let Some(max_value) = &mut max_values[i] { + if let Some(file_max) = cs.max_value.clone() { + match max_value.update(&[file_max]) { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + } + + if let Some(min_value) = &mut min_values[i] { + if let Some(file_min) = cs.min_value.clone() { + match min_value.update(&[file_min]) { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + if num_rows > limit.unwrap_or(usize::MAX) { + break; + } + } + all_files.truncate(num_files); + + let column_stats = if has_statistics { + Some(get_col_stats( + &*schema, + null_counts, + &mut max_values, + &mut min_values, + )) + } else { + None + }; + + let statistics = Statistics { + num_rows: Some(num_rows as usize), + total_byte_size: Some(total_byte_size as usize), + column_statistics: column_stats, + }; + (all_files, statistics) +} + +fn create_max_min_accs( + schema: &Schema, +) -> (Vec>, Vec>) { + let max_values: Vec> = schema + .fields() + .iter() + .map(|field| MaxAccumulator::try_new(field.data_type()).ok()) + .collect::>(); + let min_values: Vec> = schema + .fields() + .iter() + .map(|field| MinAccumulator::try_new(field.data_type()).ok()) + .collect::>(); + (max_values, min_values) +} + +fn get_col_stats( + schema: &Schema, + null_counts: Vec, + max_values: &mut Vec>, + min_values: &mut Vec>, +) -> Vec { + (0..schema.fields().len()) + .map(|i| { + let max_value = match &max_values[i] { + Some(max_value) => max_value.evaluate().ok(), + None => None, + }; + let min_value = match &min_values[i] { + Some(min_value) => min_value.evaluate().ok(), + None => None, + }; + ColumnStatistics { + null_count: Some(null_counts[i] as usize), + max_value, + min_value, + distinct_count: None, + } + }) + .collect() +} diff --git a/datafusion/src/datasource/object_store.rs b/datafusion/src/datasource/object_store.rs new file mode 100644 index 000000000000..5c7a53215534 --- /dev/null +++ b/datafusion/src/datasource/object_store.rs @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Object Store abstracts access to an underlying file/object storage. + +use crate::datasource::local::LocalFileSystem; +use crate::error::Result; +use std::any::Any; +use std::collections::HashMap; +use std::fmt::Debug; +use std::io::Read; +use std::sync::{Arc, RwLock}; + +/// Objct Reader for one file in a object store +pub trait ObjectReader { + /// Get reader for a part [start, start + length] in the file + fn get_reader(&self, start: u64, length: usize) -> Box; + + /// Get lenght for the file + fn length(&self) -> u64; +} + +/// A ObjectStore abstracts access to an underlying file/object storage. +/// It maps strings (e.g. URLs, filesystem paths, etc) to sources of bytes +pub trait ObjectStore: Sync + Send + Debug { + /// Returns the object store as [`Any`](std::any::Any) + /// so that it can be downcast to a specific implementation. + fn as_any(&self) -> &dyn Any; + + /// Returns all the files with `ext` in path `prefix` + fn list_all_files(&self, prefix: &str, ext: &str) -> Result>; + + /// Get object reader for one file + fn get_reader(&self, file_path: &str) -> Result>; +} + +static LOCAL_SCHEME: &str = "file"; + +/// A Registry holds all the object stores at runtime with a scheme for each store. +/// This allows the user to extend DataFusion with different storage systems such as S3 or HDFS +/// and query data inside these systems. +pub struct ObjectStoreRegistry { + /// A map from scheme to object store that serve list / read operations for the store + pub object_stores: RwLock>>, +} + +impl ObjectStoreRegistry { + /// Create the registry that object stores can registered into. + /// ['LocalFileSystem'] store is registered in by default to support read from localfs natively. + pub fn new() -> Self { + let mut map: HashMap> = HashMap::new(); + map.insert(LOCAL_SCHEME.to_string(), Arc::new(LocalFileSystem)); + + Self { + object_stores: RwLock::new(map), + } + } + + /// Adds a new store to this registry. + /// If a store of the same prefix existed before, it is replaced in the registry and returned. + pub fn register_store( + &self, + scheme: String, + store: Arc, + ) -> Option> { + let mut stores = self.object_stores.write().unwrap(); + stores.insert(scheme, store) + } + + /// Get the store registered for scheme + pub fn get(&self, scheme: &str) -> Option> { + let stores = self.object_stores.read().unwrap(); + stores.get(scheme).cloned() + } + + /// Get a suitable store for the path based on it's scheme. For example: + /// path with prefix file:/// or no prefix will return the default LocalFS store, + /// path with prefix s3:/// will return the S3 store if it's registered, + /// and will always return LocalFS store when a prefix is not registered in the path. + pub fn store_for_path(&self, path: &str) -> Arc { + if let Some((scheme, _)) = path.split_once(':') { + let stores = self.object_stores.read().unwrap(); + if let Some(store) = stores.get(&*scheme.to_lowercase()) { + return store.clone(); + } + } + self.object_stores + .read() + .unwrap() + .get(LOCAL_SCHEME) + .unwrap() + .clone() + } +} diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs index 28f79a6ae8dd..aaec9e83f78c 100644 --- a/datafusion/src/datasource/parquet.rs +++ b/datafusion/src/datasource/parquet.rs @@ -18,39 +18,51 @@ //! Parquet data source use std::any::Any; -use std::string::String; +use std::io::Read; use std::sync::Arc; -use arrow::datatypes::*; +use arrow::datatypes::SchemaRef; +use parquet::arrow::ArrowReader; +use parquet::arrow::ParquetFileArrowReader; +use parquet::file::reader::ChunkReader; +use parquet::file::serialized_reader::SerializedFileReader; +use parquet::file::statistics::Statistics as ParquetStatistics; + +use super::datasource::TableProviderFilterPushDown; +use crate::arrow::datatypes::{DataType, Field}; use crate::datasource::datasource::Statistics; -use crate::datasource::TableProvider; +use crate::datasource::object_store::{ObjectReader, ObjectStore}; +use crate::datasource::{ + create_max_min_accs, get_col_stats, get_statistics_with_limit, PartitionedFile, + SourceRootDescBuilder, SourceRootDescriptor, TableProvider, +}; use crate::error::Result; use crate::logical_plan::{combine_filters, Expr}; +use crate::parquet::file::reader::Length; +use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; use crate::physical_plan::parquet::ParquetExec; -use crate::physical_plan::ExecutionPlan; - -use super::datasource::TableProviderFilterPushDown; +use crate::physical_plan::{Accumulator, ExecutionPlan}; +use crate::prelude::ExecutionContext; +use crate::scalar::ScalarValue; /// Table-based representation of a `ParquetFile`. pub struct ParquetTable { path: String, - schema: SchemaRef, - statistics: Statistics, + desc: Arc, max_concurrency: usize, enable_pruning: bool, } impl ParquetTable { /// Attempt to initialize a new `ParquetTable` from a file path. - pub fn try_new(path: impl Into, max_concurrency: usize) -> Result { + pub fn try_new(path: impl Into, context: ExecutionContext) -> Result { let path = path.into(); - let parquet_exec = ParquetExec::try_from_path(&path, None, None, 0, 1, None)?; - let schema = parquet_exec.schema(); + let max_concurrency = context.state.lock().unwrap().config.concurrency; + let root_desc = ParquetRootDesc::new(path.as_str(), context); Ok(Self { path, - schema, - statistics: parquet_exec.statistics().to_owned(), + desc: Arc::new(root_desc?), max_concurrency, enable_pruning: true, }) @@ -80,7 +92,7 @@ impl TableProvider for ParquetTable { /// Get the schema for this parquet file. fn schema(&self) -> SchemaRef { - self.schema.clone() + self.desc.schema() } fn supports_filter_pushdown( @@ -107,8 +119,8 @@ impl TableProvider for ParquetTable { } else { None }; - Ok(Arc::new(ParquetExec::try_from_path( - &self.path, + Ok(Arc::new(ParquetExec::try_new( + self.desc.clone(), projection.clone(), predicate, limit @@ -120,7 +132,7 @@ impl TableProvider for ParquetTable { } fn statistics(&self) -> Statistics { - self.statistics.clone() + self.desc.statistics() } fn has_exact_statistics(&self) -> bool { @@ -128,6 +140,295 @@ impl TableProvider for ParquetTable { } } +#[derive(Debug)] +/// Descriptor for a parquet root path +pub struct ParquetRootDesc { + /// object store for reading files inside the root path + pub object_store: Arc, + /// metadata for files inside the root path + pub descriptor: SourceRootDescriptor, +} + +impl ParquetRootDesc { + /// Construct a new parquet descriptor for a root path + pub fn new(root_path: &str, context: ExecutionContext) -> Result { + let object_store = context + .state + .lock() + .unwrap() + .object_store_registry + .store_for_path(root_path); + let root_desc = Self::get_source_desc(root_path, object_store.clone(), "parquet"); + Ok(Self { + object_store, + descriptor: root_desc?, + }) + } + + /// Get file schema for all parquet files + pub fn schema(&self) -> SchemaRef { + self.descriptor.schema.clone() + } + + /// Get the summary statistics for all parquet files + pub fn statistics(&self) -> Statistics { + get_statistics_with_limit(&self.descriptor, None).1 + } + + fn summarize_min_max( + max_values: &mut Vec>, + min_values: &mut Vec>, + fields: &Vec, + i: usize, + stat: &ParquetStatistics, + ) { + match stat { + ParquetStatistics::Boolean(s) => { + if let DataType::Boolean = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value + .update(&[ScalarValue::Boolean(Some(*s.max()))]) + { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value + .update(&[ScalarValue::Boolean(Some(*s.min()))]) + { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Int32(s) => { + if let DataType::Int32 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value.update(&[ScalarValue::Int32(Some(*s.max()))]) + { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value.update(&[ScalarValue::Int32(Some(*s.min()))]) + { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Int64(s) => { + if let DataType::Int64 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value.update(&[ScalarValue::Int64(Some(*s.max()))]) + { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value.update(&[ScalarValue::Int64(Some(*s.min()))]) + { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Float(s) => { + if let DataType::Float32 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value + .update(&[ScalarValue::Float32(Some(*s.max()))]) + { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value + .update(&[ScalarValue::Float32(Some(*s.min()))]) + { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + ParquetStatistics::Double(s) => { + if let DataType::Float64 = fields[i].data_type() { + if s.has_min_max_set() { + if let Some(max_value) = &mut max_values[i] { + match max_value + .update(&[ScalarValue::Float64(Some(*s.max()))]) + { + Ok(_) => {} + Err(_) => { + max_values[i] = None; + } + } + } + if let Some(min_value) = &mut min_values[i] { + match min_value + .update(&[ScalarValue::Float64(Some(*s.min()))]) + { + Ok(_) => {} + Err(_) => { + min_values[i] = None; + } + } + } + } + } + } + _ => {} + } + } +} + +impl SourceRootDescBuilder for ParquetRootDesc { + fn get_file_meta( + file_path: &str, + object_store: Arc, + ) -> Result { + let reader = object_store.get_reader(file_path)?; + let file_reader = + Arc::new(SerializedFileReader::new(ObjectReaderWrapper::new(reader))?); + let mut arrow_reader = ParquetFileArrowReader::new(file_reader); + let file_path = file_path.to_string(); + let schema = arrow_reader.get_schema()?; + let num_fields = schema.fields().len(); + let fields = schema.fields().to_vec(); + let meta_data = arrow_reader.get_metadata(); + + let mut num_rows = 0; + let mut total_byte_size = 0; + let mut null_counts = vec![0; num_fields]; + let mut has_statistics = false; + + let (mut max_values, mut min_values) = create_max_min_accs(&schema); + + for row_group_meta in meta_data.row_groups() { + num_rows += row_group_meta.num_rows(); + total_byte_size += row_group_meta.total_byte_size(); + + let columns_null_counts = row_group_meta + .columns() + .iter() + .flat_map(|c| c.statistics().map(|stats| stats.null_count())); + + for (i, cnt) in columns_null_counts.enumerate() { + null_counts[i] += cnt as usize + } + + for (i, column) in row_group_meta.columns().iter().enumerate() { + if let Some(stat) = column.statistics() { + has_statistics = true; + ParquetRootDesc::summarize_min_max( + &mut max_values, + &mut min_values, + &fields, + i, + stat, + ) + } + } + } + + let column_stats = if has_statistics { + Some(get_col_stats( + &schema, + null_counts, + &mut max_values, + &mut min_values, + )) + } else { + None + }; + + let statistics = Statistics { + num_rows: Some(num_rows as usize), + total_byte_size: Some(total_byte_size as usize), + column_statistics: column_stats, + }; + + Ok(PartitionedFile { + file_path, + schema, + statistics, + partition_value: None, + partition_schema: None, + }) + } +} + +/// Thin wrapper over object wrapper to work with parquet file read +pub struct ObjectReaderWrapper { + reader: Arc, +} + +impl ObjectReaderWrapper { + /// Construct a wrapper over the provided object reader + pub fn new(reader: Arc) -> Self { + Self { reader } + } +} + +impl ChunkReader for ObjectReaderWrapper { + type T = InnerReaderWrapper; + + fn get_read(&self, start: u64, length: usize) -> parquet::errors::Result { + Ok(InnerReaderWrapper { + inner_reader: self.reader.get_reader(start, length), + }) + } +} + +impl Length for ObjectReaderWrapper { + fn len(&self) -> u64 { + self.reader.length() + } +} + +/// Thin wrapper over reader for a parquet file +pub struct InnerReaderWrapper { + inner_reader: Box, +} + +impl Read for InnerReaderWrapper { + fn read(&mut self, buf: &mut [u8]) -> std::result::Result { + self.inner_reader.read(buf) + } +} + #[cfg(test)] mod tests { use super::*; @@ -355,7 +656,8 @@ mod tests { fn load_table(name: &str) -> Result> { let testdata = crate::test_util::parquet_test_data(); let filename = format!("{}/{}", testdata, name); - let table = ParquetTable::try_new(&filename, 2)?; + let table = + ParquetTable::try_new(&filename, ExecutionContext::with_concurrency(2))?; Ok(Arc::new(table)) } diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 0cf8b3b6c276..4c7b85c1eb26 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -49,6 +49,8 @@ use crate::catalog::{ ResolvedTableReference, TableReference, }; use crate::datasource::csv::CsvFile; +use crate::datasource::object_store::ObjectStore; +use crate::datasource::object_store::ObjectStoreRegistry; use crate::datasource::parquet::ParquetTable; use crate::datasource::TableProvider; use crate::error::{DataFusionError, Result}; @@ -164,10 +166,17 @@ impl ExecutionContext { aggregate_functions: HashMap::new(), config, execution_props: ExecutionProps::new(), + object_store_registry: Arc::new(ObjectStoreRegistry::new()), })), } } + /// Creates a new execution context using the provided concurrency. + pub fn with_concurrency(concurrency: usize) -> ExecutionContext { + let config = ExecutionConfig::new().with_concurrency(concurrency); + ExecutionContext::with_config(config) + } + /// Creates a dataframe that will execute a SQL query. pub fn sql(&mut self, sql: &str) -> Result> { let plan = self.create_logical_plan(sql)?; @@ -288,12 +297,7 @@ impl ExecutionContext { ) -> Result> { Ok(Arc::new(DataFrameImpl::new( self.state.clone(), - &LogicalPlanBuilder::scan_parquet( - filename, - None, - self.state.lock().unwrap().config.concurrency, - )? - .build()?, + &LogicalPlanBuilder::scan_parquet(filename, None, self.clone())?.build()?, ))) } @@ -325,7 +329,7 @@ impl ExecutionContext { pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()> { let table = { let m = self.state.lock().unwrap(); - ParquetTable::try_new(filename, m.config.concurrency)? + ParquetTable::try_new(filename, self.clone())? .with_enable_pruning(m.config.parquet_pruning) }; self.register_table(name, Arc::new(table))?; @@ -358,6 +362,25 @@ impl ExecutionContext { state.catalog_list.register_catalog(name, catalog) } + /// Registers a object store with scheme using a custom `ObjectStore` so that + /// an external file system or object storage system could be used against this context. + /// + /// Returns the `ObjectStore` previously registered for this + /// scheme, if any + pub fn register_object_store( + &self, + scheme: impl Into, + object_store: Arc, + ) -> Option> { + let scheme = scheme.into(); + + self.state + .lock() + .unwrap() + .object_store_registry + .register_store(scheme, object_store) + } + /// Retrieves a `CatalogProvider` instance by name pub fn catalog(&self, name: &str) -> Option> { self.state.lock().unwrap().catalog_list.catalog(name) @@ -840,6 +863,8 @@ pub struct ExecutionContextState { pub config: ExecutionConfig, /// Execution properties pub execution_props: ExecutionProps, + /// Object Store that are registered with the context + pub object_store_registry: Arc, } impl ExecutionProps { @@ -867,6 +892,7 @@ impl ExecutionContextState { aggregate_functions: HashMap::new(), config: ExecutionConfig::new(), execution_props: ExecutionProps::new(), + object_store_registry: Arc::new(ObjectStoreRegistry::new()), } } diff --git a/datafusion/src/logical_plan/builder.rs b/datafusion/src/logical_plan/builder.rs index 0dfc1e7aa048..77dd65c97ef3 100644 --- a/datafusion/src/logical_plan/builder.rs +++ b/datafusion/src/logical_plan/builder.rs @@ -40,6 +40,7 @@ use crate::logical_plan::{ columnize_expr, normalize_col, normalize_cols, Column, DFField, DFSchema, DFSchemaRef, Partitioning, }; +use crate::prelude::ExecutionContext; /// Default table name for unnamed table pub const UNNAMED_TABLE: &str = "?table?"; @@ -137,20 +138,20 @@ impl LogicalPlanBuilder { pub fn scan_parquet( path: impl Into, projection: Option>, - max_concurrency: usize, + context: ExecutionContext, ) -> Result { let path = path.into(); - Self::scan_parquet_with_name(path.clone(), projection, max_concurrency, path) + Self::scan_parquet_with_name(path.clone(), projection, context, path) } /// Scan a Parquet data source and register it with a given table name pub fn scan_parquet_with_name( path: impl Into, projection: Option>, - max_concurrency: usize, + context: ExecutionContext, table_name: impl Into, ) -> Result { - let provider = Arc::new(ParquetTable::try_new(path, max_concurrency)?); + let provider = Arc::new(ParquetTable::try_new(path, context)?); Self::scan(table_name, provider, projection) } diff --git a/datafusion/src/physical_optimizer/repartition.rs b/datafusion/src/physical_optimizer/repartition.rs index 4504c81daa06..30ec896b4e2f 100644 --- a/datafusion/src/physical_optimizer/repartition.rs +++ b/datafusion/src/physical_optimizer/repartition.rs @@ -110,6 +110,8 @@ mod tests { use super::*; use crate::datasource::datasource::Statistics; + use crate::datasource::local::LocalFileSystem; + use crate::datasource::PartitionedFile; use crate::physical_plan::parquet::{ ParquetExec, ParquetExecMetrics, ParquetPartition, }; @@ -122,11 +124,13 @@ mod tests { vec![], Arc::new(ParquetExec::new( vec![ParquetPartition::new( - vec!["x".to_string()], - Statistics::default(), + vec![PartitionedFile::from("x".to_string())], + 0, )], + Arc::new(LocalFileSystem), schema, None, + Statistics::default(), ParquetExecMetrics::new(), None, 2048, @@ -160,11 +164,13 @@ mod tests { vec![], Arc::new(ParquetExec::new( vec![ParquetPartition::new( - vec!["x".to_string()], - Statistics::default(), + vec![PartitionedFile::from("x".to_string())], + 0, )], + Arc::new(LocalFileSystem), schema, None, + Statistics::default(), ParquetExecMetrics::new(), None, 2048, diff --git a/datafusion/src/physical_plan/common.rs b/datafusion/src/physical_plan/common.rs index 2482bfc0872c..628095c6640c 100644 --- a/datafusion/src/physical_plan/common.rs +++ b/datafusion/src/physical_plan/common.rs @@ -27,8 +27,6 @@ use arrow::error::Result as ArrowResult; use arrow::record_batch::RecordBatch; use futures::channel::mpsc; use futures::{SinkExt, Stream, StreamExt, TryStreamExt}; -use std::fs; -use std::fs::metadata; use std::sync::Arc; use std::task::{Context, Poll}; use tokio::task::JoinHandle; @@ -107,42 +105,6 @@ pub(crate) fn combine_batches( } } -/// Recursively builds a list of files in a directory with a given extension -pub fn build_file_list(dir: &str, ext: &str) -> Result> { - let mut filenames: Vec = Vec::new(); - build_file_list_recurse(dir, &mut filenames, ext)?; - Ok(filenames) -} - -/// Recursively build a list of files in a directory with a given extension with an accumulator list -fn build_file_list_recurse( - dir: &str, - filenames: &mut Vec, - ext: &str, -) -> Result<()> { - let metadata = metadata(dir)?; - if metadata.is_file() { - if dir.ends_with(ext) { - filenames.push(dir.to_string()); - } - } else { - for entry in fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - if let Some(path_name) = path.to_str() { - if path.is_dir() { - build_file_list_recurse(path_name, filenames, ext)?; - } else if path_name.ends_with(ext) { - filenames.push(path_name.to_string()); - } - } else { - return Err(DataFusionError::Plan("Invalid path".to_string())); - } - } - } - Ok(()) -} - /// Spawns a task to the tokio threadpool and writes its outputs to the provided mpsc sender pub(crate) fn spawn_execution( input: Arc, diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs index 544f98cba0c6..293f46d7a736 100644 --- a/datafusion/src/physical_plan/csv.rs +++ b/datafusion/src/physical_plan/csv.rs @@ -17,9 +17,11 @@ //! Execution plan for reading CSV files +use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::ObjectStore; use crate::error::{DataFusionError, Result}; use crate::physical_plan::ExecutionPlan; -use crate::physical_plan::{common, source::Source, Partitioning}; +use crate::physical_plan::{source::Source, Partitioning}; use arrow::csv; use arrow::datatypes::{Schema, SchemaRef}; use arrow::error::Result as ArrowResult; @@ -141,7 +143,7 @@ impl CsvExec { ) -> Result { let file_extension = String::from(options.file_extension); - let filenames = common::build_file_list(path, file_extension.as_str())?; + let filenames = LocalFileSystem.list_all_files(path, options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/physical_plan/json.rs b/datafusion/src/physical_plan/json.rs index ed9b0b03a38e..df7e9e5e5014 100644 --- a/datafusion/src/physical_plan/json.rs +++ b/datafusion/src/physical_plan/json.rs @@ -19,7 +19,9 @@ use async_trait::async_trait; use futures::Stream; -use super::{common, source::Source, ExecutionPlan, Partitioning, RecordBatchStream}; +use super::{source::Source, ExecutionPlan, Partitioning, RecordBatchStream}; +use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::ObjectStore; use crate::error::{DataFusionError, Result}; use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter}; use arrow::{ @@ -87,7 +89,7 @@ impl NdJsonExec { ) -> Result { let file_extension = options.file_extension.to_string(); - let filenames = common::build_file_list(path, &file_extension)?; + let filenames = LocalFileSystem.list_all_files(path, options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs index ec5611f96292..bc3e0d597213 100644 --- a/datafusion/src/physical_plan/parquet.rs +++ b/datafusion/src/physical_plan/parquet.rs @@ -18,7 +18,6 @@ //! Execution plan for reading Parquet files use std::fmt; -use std::fs::File; use std::sync::Arc; use std::task::{Context, Poll}; use std::{any::Any, convert::TryInto}; @@ -28,7 +27,7 @@ use crate::{ logical_plan::{Column, Expr}, physical_optimizer::pruning::{PruningPredicate, PruningStatistics}, physical_plan::{ - common, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, + DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream, }, scalar::ScalarValue, @@ -36,7 +35,7 @@ use crate::{ use arrow::{ array::ArrayRef, - datatypes::{DataType, Schema, SchemaRef}, + datatypes::{Schema, SchemaRef}, error::{ArrowError, Result as ArrowResult}, record_batch::RecordBatch, }; @@ -57,19 +56,23 @@ use tokio::{ }; use tokio_stream::wrappers::ReceiverStream; -use crate::datasource::datasource::{ColumnStatistics, Statistics}; +use crate::datasource::datasource::Statistics; use async_trait::async_trait; use futures::stream::{Stream, StreamExt}; use super::SQLMetric; -use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; -use crate::physical_plan::Accumulator; +use crate::datasource::object_store::ObjectStore; +use crate::datasource::parquet::{ObjectReaderWrapper, ParquetRootDesc}; +use crate::datasource::{get_statistics_with_limit, FilePartition, PartitionedFile}; +use crate::prelude::ExecutionContext; /// Execution plan for scanning one or more Parquet partitions #[derive(Debug, Clone)] pub struct ParquetExec { /// Parquet partitions to read partitions: Vec, + /// Source used for get reader for partitions + object_store: Arc, /// Schema after projection is applied schema: SchemaRef, /// Projection for which columns to load @@ -98,9 +101,7 @@ pub struct ParquetExec { #[derive(Debug, Clone)] pub struct ParquetPartition { /// The Parquet filename for this partition - pub filenames: Vec, - /// Statistics for this partition - pub statistics: Statistics, + pub file_partition: FilePartition, /// Execution metrics metrics: ParquetPartitionMetrics, } @@ -129,290 +130,44 @@ impl ParquetExec { projection: Option>, predicate: Option, batch_size: usize, - max_concurrency: usize, + context: ExecutionContext, limit: Option, ) -> Result { + let max_concurrency = context.state.lock().unwrap().config.concurrency; // build a list of filenames from the specified path, which could be a single file or // a directory containing one or more parquet files - let filenames = common::build_file_list(path, ".parquet")?; - if filenames.is_empty() { - Err(DataFusionError::Plan(format!( - "No Parquet files (with .parquet extension) found at path {}", - path - ))) - } else { - let filenames = filenames - .iter() - .map(|filename| filename.as_str()) - .collect::>(); - Self::try_from_files( - &filenames, - projection, - predicate, - batch_size, - max_concurrency, - limit, - ) - } + let root_desc = ParquetRootDesc::new(path, context)?; + Self::try_new( + Arc::new(root_desc), + projection, + predicate, + batch_size, + max_concurrency, + limit, + ) } - /// Create a new Parquet reader execution plan based on the specified list of Parquet - /// files - pub fn try_from_files( - filenames: &[&str], + /// Create a new Parquet reader execution plan with root descriptor, provided partitions and schema + pub fn try_new( + desc: Arc, projection: Option>, predicate: Option, batch_size: usize, max_concurrency: usize, limit: Option, ) -> Result { - debug!("Creating ParquetExec, filenames: {:?}, projection {:?}, predicate: {:?}, limit: {:?}", - filenames, projection, predicate, limit); - // build a list of Parquet partitions with statistics and gather all unique schemas - // used in this data set - let mut schemas: Vec = vec![]; - let mut partitions = Vec::with_capacity(max_concurrency); - let filenames: Vec = filenames.iter().map(|s| s.to_string()).collect(); - let chunks = split_files(&filenames, max_concurrency); - let mut num_rows = 0; - let mut num_fields = 0; - let mut fields = Vec::new(); - let mut total_byte_size = 0; - let mut null_counts = Vec::new(); - let mut max_values: Vec> = Vec::new(); - let mut min_values: Vec> = Vec::new(); - let mut limit_exhausted = false; - for chunk in chunks { - let mut filenames: Vec = - chunk.iter().map(|x| x.to_string()).collect(); - let mut total_files = 0; - for filename in &filenames { - total_files += 1; - let file = File::open(filename)?; - let file_reader = Arc::new(SerializedFileReader::new(file)?); - let mut arrow_reader = ParquetFileArrowReader::new(file_reader); - let meta_data = arrow_reader.get_metadata(); - // collect all the unique schemas in this data set - let schema = arrow_reader.get_schema()?; - if schemas.is_empty() || schema != schemas[0] { - fields = schema.fields().to_vec(); - num_fields = schema.fields().len(); - null_counts = vec![0; num_fields]; - max_values = schema - .fields() - .iter() - .map(|field| MaxAccumulator::try_new(field.data_type()).ok()) - .collect::>(); - min_values = schema - .fields() - .iter() - .map(|field| MinAccumulator::try_new(field.data_type()).ok()) - .collect::>(); - schemas.push(schema); - } + debug!("Creating ParquetExec, desc: {:?}, projection {:?}, predicate: {:?}, limit: {:?}", + desc, projection, predicate, limit); - for row_group_meta in meta_data.row_groups() { - num_rows += row_group_meta.num_rows(); - total_byte_size += row_group_meta.total_byte_size(); + let (all_files, statistics) = get_statistics_with_limit(&desc.descriptor, limit); + let schema = desc.schema(); - // Currently assumes every Parquet file has same schema - // https://issues.apache.org/jira/browse/ARROW-11017 - let columns_null_counts = row_group_meta - .columns() - .iter() - .flat_map(|c| c.statistics().map(|stats| stats.null_count())); - - for (i, cnt) in columns_null_counts.enumerate() { - null_counts[i] += cnt - } - - for (i, column) in row_group_meta.columns().iter().enumerate() { - if let Some(stat) = column.statistics() { - match stat { - ParquetStatistics::Boolean(s) => { - if let DataType::Boolean = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[ - ScalarValue::Boolean(Some(*s.max())), - ]) { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[ - ScalarValue::Boolean(Some(*s.min())), - ]) { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Int32(s) => { - if let DataType::Int32 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[ - ScalarValue::Int32(Some(*s.max())), - ]) { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[ - ScalarValue::Int32(Some(*s.min())), - ]) { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Int64(s) => { - if let DataType::Int64 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[ - ScalarValue::Int64(Some(*s.max())), - ]) { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[ - ScalarValue::Int64(Some(*s.min())), - ]) { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Float(s) => { - if let DataType::Float32 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[ - ScalarValue::Float32(Some(*s.max())), - ]) { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[ - ScalarValue::Float32(Some(*s.min())), - ]) { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - ParquetStatistics::Double(s) => { - if let DataType::Float64 = fields[i].data_type() { - if s.has_min_max_set() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[ - ScalarValue::Float64(Some(*s.max())), - ]) { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[ - ScalarValue::Float64(Some(*s.min())), - ]) { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - _ => {} - } - } - } - - if limit.map(|x| num_rows >= x as i64).unwrap_or(false) { - limit_exhausted = true; - break; - } - } - } - let column_stats = (0..num_fields) - .map(|i| { - let max_value = match &max_values[i] { - Some(max_value) => max_value.evaluate().ok(), - None => None, - }; - let min_value = match &min_values[i] { - Some(min_value) => min_value.evaluate().ok(), - None => None, - }; - ColumnStatistics { - null_count: Some(null_counts[i] as usize), - max_value, - min_value, - distinct_count: None, - } - }) - .collect(); - - let statistics = Statistics { - num_rows: Some(num_rows as usize), - total_byte_size: Some(total_byte_size as usize), - column_statistics: Some(column_stats), - }; - // remove files that are not needed in case of limit - filenames.truncate(total_files); - partitions.push(ParquetPartition::new(filenames, statistics)); - if limit_exhausted { - break; - } + let mut partitions = Vec::with_capacity(max_concurrency); + let chunked_files = split_files(&all_files, max_concurrency); + for (index, group) in chunked_files.iter().enumerate() { + partitions.push(ParquetPartition::new(Vec::from(*group), index)); } - // we currently get the schema information from the first file rather than do - // schema merging and this is a limitation. - // See https://issues.apache.org/jira/browse/ARROW-11017 - if schemas.len() > 1 { - return Err(DataFusionError::Plan(format!( - "The Parquet files have {} different schemas and DataFusion does \ - not yet support schema merging", - schemas.len() - ))); - } - let schema = Arc::new(schemas.pop().unwrap()); let metrics = ParquetExecMetrics::new(); let predicate_builder = predicate.and_then(|predicate_expr| { @@ -431,8 +186,10 @@ impl ParquetExec { Ok(Self::new( partitions, + desc.object_store.clone(), schema, projection, + statistics, metrics, predicate_builder, batch_size, @@ -443,8 +200,10 @@ impl ParquetExec { /// Create a new Parquet reader execution plan with provided partitions and schema pub fn new( partitions: Vec, + object_store: Arc, schema: SchemaRef, projection: Option>, + statistics: Statistics, metrics: ParquetExecMetrics, predicate_builder: Option, batch_size: usize, @@ -462,96 +221,23 @@ impl ParquetExec { .collect(), ); - // sum the statistics - let mut num_rows: Option = None; - let mut total_byte_size: Option = None; - let mut null_counts: Vec = vec![0; schema.fields().len()]; - let mut has_statistics = false; - let mut max_values = schema - .fields() - .iter() - .map(|field| MaxAccumulator::try_new(field.data_type()).ok()) - .collect::>(); - let mut min_values = schema - .fields() - .iter() - .map(|field| MinAccumulator::try_new(field.data_type()).ok()) - .collect::>(); - for part in &partitions { - if let Some(n) = part.statistics.num_rows { - num_rows = Some(num_rows.unwrap_or(0) + n) - } - if let Some(n) = part.statistics.total_byte_size { - total_byte_size = Some(total_byte_size.unwrap_or(0) + n) + let new_column_statistics = statistics.column_statistics.map(|stats| { + let mut projected_stats = Vec::with_capacity(projection.len()); + for proj in &projection { + projected_stats.push(stats[*proj].clone()); } - if let Some(x) = &part.statistics.column_statistics { - let part_nulls: Vec> = - x.iter().map(|c| c.null_count).collect(); - has_statistics = true; - - let part_max_values: Vec> = - x.iter().map(|c| c.max_value.clone()).collect(); - let part_min_values: Vec> = - x.iter().map(|c| c.min_value.clone()).collect(); - - for &i in projection.iter() { - null_counts[i] = part_nulls[i].unwrap_or(0); - if let Some(part_max_value) = part_max_values[i].clone() { - if let Some(max_value) = &mut max_values[i] { - match max_value.update(&[part_max_value]) { - Ok(_) => {} - Err(_) => { - max_values[i] = None; - } - } - } - } - if let Some(part_min_value) = part_min_values[i].clone() { - if let Some(min_value) = &mut min_values[i] { - match min_value.update(&[part_min_value]) { - Ok(_) => {} - Err(_) => { - min_values[i] = None; - } - } - } - } - } - } - } - - let column_stats = if has_statistics { - Some( - (0..schema.fields().len()) - .map(|i| { - let max_value = match &max_values[i] { - Some(max_value) => max_value.evaluate().ok(), - None => None, - }; - let min_value = match &min_values[i] { - Some(min_value) => min_value.evaluate().ok(), - None => None, - }; - ColumnStatistics { - null_count: Some(null_counts[i] as usize), - max_value, - min_value, - distinct_count: None, - } - }) - .collect(), - ) - } else { - None - }; + projected_stats + }); let statistics = Statistics { - num_rows, - total_byte_size, - column_statistics: column_stats, + num_rows: statistics.num_rows, + total_byte_size: statistics.total_byte_size, + column_statistics: new_column_statistics, }; + Self { partitions, + object_store, schema: Arc::new(projected_schema), projection, metrics, @@ -585,22 +271,20 @@ impl ParquetExec { impl ParquetPartition { /// Create a new parquet partition - pub fn new(filenames: Vec, statistics: Statistics) -> Self { + pub fn new(files: Vec, index: usize) -> Self { Self { - filenames, - statistics, + file_partition: FilePartition { index, files }, metrics: ParquetPartitionMetrics::new(), } } /// The Parquet filename for this partition - pub fn filenames(&self) -> &[String] { - &self.filenames - } - - /// Statistics for this partition - pub fn statistics(&self) -> &Statistics { - &self.statistics + pub fn filenames(&self) -> Vec { + self.file_partition + .files + .iter() + .map(|f| f.file_path.clone()) + .collect() } } @@ -666,8 +350,8 @@ impl ExecutionPlan for ParquetExec { Receiver>, ) = channel(2); - let partition = &self.partitions[partition]; - let filenames = partition.filenames.clone(); + let object_store = self.object_store.clone(); + let partition = self.partitions[partition].clone(); let metrics = partition.metrics.clone(); let projection = self.projection.clone(); let predicate_builder = self.predicate_builder.clone(); @@ -676,7 +360,8 @@ impl ExecutionPlan for ParquetExec { task::spawn_blocking(move || { if let Err(e) = read_files( - &filenames, + object_store, + partition, metrics, &projection, &predicate_builder, @@ -704,9 +389,7 @@ impl ExecutionPlan for ParquetExec { let files: Vec<_> = self .partitions .iter() - .map(|pp| pp.filenames.iter()) - .flatten() - .map(|s| s.as_str()) + .map(|pp| format!("{}", pp.file_partition)) .collect(); write!( @@ -726,14 +409,11 @@ impl ExecutionPlan for ParquetExec { .flat_map(|p| { vec![ ( - format!( - "numPredicateEvaluationErrors for {}", - p.filenames.join(",") - ), + format!("numPredicateEvaluationErrors for {}", p.file_partition), p.metrics.predicate_evaluation_errors.as_ref().clone(), ), ( - format!("numRowGroupsPruned for {}", p.filenames.join(",")), + format!("numRowGroupsPruned for {}", p.file_partition), p.metrics.row_groups_pruned.as_ref().clone(), ), ] @@ -857,7 +537,7 @@ fn build_row_group_predicate( match predicate_values { Ok(values) => { // NB: false means don't scan row group - let num_pruned = values.iter().filter(|&v| !v).count(); + let num_pruned = values.iter().filter(|&v| !*v).count(); metrics.row_groups_pruned.add(num_pruned); Box::new(move |_, i| values[i]) } @@ -872,7 +552,8 @@ fn build_row_group_predicate( } fn read_files( - filenames: &[String], + object_store: Arc, + partition: ParquetPartition, metrics: ParquetPartitionMetrics, projection: &[usize], predicate_builder: &Option, @@ -881,9 +562,11 @@ fn read_files( limit: Option, ) -> Result<()> { let mut total_rows = 0; - 'outer: for filename in filenames { - let file = File::open(&filename)?; - let mut file_reader = SerializedFileReader::new(file)?; + let all_files = partition.file_partition.files; + 'outer: for partitioned_file in all_files { + let reader = object_store.get_reader(partitioned_file.file_path.as_str())?; + let mut file_reader = + SerializedFileReader::new(ObjectReaderWrapper::new(reader))?; if let Some(predicate_builder) = predicate_builder { let row_group_predicate = build_row_group_predicate( predicate_builder, @@ -910,7 +593,7 @@ fn read_files( Some(Err(e)) => { let err_msg = format!( "Error reading batch from {}: {}", - filename, + partitioned_file, e.to_string() ); // send error to operator @@ -930,12 +613,15 @@ fn read_files( Ok(()) } -fn split_files(filenames: &[String], n: usize) -> Vec<&[String]> { - let mut chunk_size = filenames.len() / n; - if filenames.len() % n > 0 { +fn split_files( + partitioned_files: &[PartitionedFile], + n: usize, +) -> Vec<&[PartitionedFile]> { + let mut chunk_size = partitioned_files.len() / n; + if partitioned_files.len() % n > 0 { chunk_size += 1; } - filenames.chunks(chunk_size).collect() + partitioned_files.chunks(chunk_size).collect() } struct ParquetStream { @@ -973,24 +659,24 @@ mod tests { #[test] fn test_split_files() { - let filenames = vec![ - "a".to_string(), - "b".to_string(), - "c".to_string(), - "d".to_string(), - "e".to_string(), + let files = vec![ + PartitionedFile::from("a".to_string()), + PartitionedFile::from("b".to_string()), + PartitionedFile::from("c".to_string()), + PartitionedFile::from("d".to_string()), + PartitionedFile::from("e".to_string()), ]; - let chunks = split_files(&filenames, 1); + let chunks = split_files(&files, 1); assert_eq!(1, chunks.len()); assert_eq!(5, chunks[0].len()); - let chunks = split_files(&filenames, 2); + let chunks = split_files(&files, 2); assert_eq!(2, chunks.len()); assert_eq!(3, chunks[0].len()); assert_eq!(2, chunks[1].len()); - let chunks = split_files(&filenames, 5); + let chunks = split_files(&files, 5); assert_eq!(5, chunks.len()); assert_eq!(1, chunks[0].len()); assert_eq!(1, chunks[1].len()); @@ -998,7 +684,7 @@ mod tests { assert_eq!(1, chunks[3].len()); assert_eq!(1, chunks[4].len()); - let chunks = split_files(&filenames, 123); + let chunks = split_files(&files, 123); assert_eq!(5, chunks.len()); assert_eq!(1, chunks[0].len()); assert_eq!(1, chunks[1].len()); @@ -1016,7 +702,7 @@ mod tests { Some(vec![0, 1, 2]), None, 1024, - 4, + ExecutionContext::with_concurrency(4), None, )?; assert_eq!(parquet_exec.output_partitioning().partition_count(), 1); From 6f59715587b614306497443e249e56a2bd3a2a27 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Tue, 10 Aug 2021 22:38:32 +0800 Subject: [PATCH 02/16] fix read --- .../src/execution_plans/shuffle_writer.rs | 2 ++ datafusion/src/datasource/local.rs | 32 ++----------------- datafusion/src/datasource/object_store.rs | 2 +- datafusion/src/datasource/parquet.rs | 2 +- datafusion/src/execution/context.rs | 1 + datafusion/src/physical_plan/repartition.rs | 2 +- 6 files changed, 9 insertions(+), 32 deletions(-) diff --git a/ballista/rust/core/src/execution_plans/shuffle_writer.rs b/ballista/rust/core/src/execution_plans/shuffle_writer.rs index b1db21fa90a1..8c66a1e3a739 100644 --- a/ballista/rust/core/src/execution_plans/shuffle_writer.rs +++ b/ballista/rust/core/src/execution_plans/shuffle_writer.rs @@ -482,6 +482,7 @@ mod tests { use tempfile::TempDir; #[tokio::test] + #[ignore] async fn test() -> Result<()> { let input_plan = Arc::new(CoalescePartitionsExec::new(create_input_plan()?)); let work_dir = TempDir::new()?; @@ -534,6 +535,7 @@ mod tests { } #[tokio::test] + #[ignore] async fn test_partitioned() -> Result<()> { let input_plan = create_input_plan()?; let work_dir = TempDir::new()?; diff --git a/datafusion/src/datasource/local.rs b/datafusion/src/datasource/local.rs index 4890e9d229f9..cdd9b324a7ad 100644 --- a/datafusion/src/datasource/local.rs +++ b/datafusion/src/datasource/local.rs @@ -20,7 +20,8 @@ use crate::datasource::object_store::{ObjectReader, ObjectStore}; use crate::error::DataFusionError; use crate::error::Result; -use crate::parquet::file::reader::{ChunkReader, Length}; +use crate::parquet::file::reader::Length; +use crate::parquet::file::serialized_reader::FileSource; use std::any::Any; use std::fs; use std::fs::{metadata, File}; @@ -59,11 +60,7 @@ impl LocalFSObjectReader { impl ObjectReader for LocalFSObjectReader { fn get_reader(&self, start: u64, length: usize) -> Box { - Box::new(FileSegmentReader::new( - self.file.try_clone().unwrap(), - start, - length, - )) + Box::new(FileSource::::new(&self.file, start, length)) } fn length(&self) -> u64 { @@ -71,29 +68,6 @@ impl ObjectReader for LocalFSObjectReader { } } -struct FileSegmentReader { - file: File, - start: u64, - length: usize, -} - -impl FileSegmentReader { - fn new(file: File, start: u64, length: usize) -> Self { - Self { - file, - start, - length, - } - } -} - -impl Read for FileSegmentReader { - fn read(&mut self, buf: &mut [u8]) -> std::result::Result { - let mut file_source = self.file.get_read(self.start, self.length)?; - file_source.read(buf) - } -} - fn list_all(root_path: &str, ext: &str) -> Result> { let mut filenames: Vec = Vec::new(); list_all_files(root_path, &mut filenames, ext)?; diff --git a/datafusion/src/datasource/object_store.rs b/datafusion/src/datasource/object_store.rs index 5c7a53215534..ef1e8701f4e0 100644 --- a/datafusion/src/datasource/object_store.rs +++ b/datafusion/src/datasource/object_store.rs @@ -41,7 +41,7 @@ pub trait ObjectStore: Sync + Send + Debug { /// so that it can be downcast to a specific implementation. fn as_any(&self) -> &dyn Any; - /// Returns all the files with `ext` in path `prefix` + /// Returns all the files with filename extension `ext` in path `prefix` fn list_all_files(&self, prefix: &str, ext: &str) -> Result>; /// Get object reader for one file diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs index aaec9e83f78c..fcf7ccd5331b 100644 --- a/datafusion/src/datasource/parquet.rs +++ b/datafusion/src/datasource/parquet.rs @@ -178,7 +178,7 @@ impl ParquetRootDesc { fn summarize_min_max( max_values: &mut Vec>, min_values: &mut Vec>, - fields: &Vec, + fields: &[Field], i: usize, stat: &ParquetStatistics, ) { diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 4c7b85c1eb26..a49e5b24f1ce 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -2709,6 +2709,7 @@ mod tests { } #[tokio::test] + #[ignore] async fn write_parquet_results() -> Result<()> { // create partitioned input file and context let tmp_dir = TempDir::new()?; diff --git a/datafusion/src/physical_plan/repartition.rs b/datafusion/src/physical_plan/repartition.rs index eb3fe5560fd6..9047a01729aa 100644 --- a/datafusion/src/physical_plan/repartition.rs +++ b/datafusion/src/physical_plan/repartition.rs @@ -734,7 +734,7 @@ mod tests { #[tokio::test] // skip this test when hash function is different because the hard // coded expected output is a function of the hash values - #[cfg(not(feature = "force_hash_collisions"))] + //#[cfg(not(feature = "force_hash_collisions"))] async fn repartition_with_dropping_output_stream() { #[derive(Debug)] struct Case<'a> { From 5545ac74fc47c0c814dcb280c01ee18b43ef96b0 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Tue, 10 Aug 2021 22:54:58 +0800 Subject: [PATCH 03/16] deadlock --- datafusion/src/execution/context.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index a49e5b24f1ce..1771468d8ea7 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -328,9 +328,9 @@ impl ExecutionContext { /// executed against this context. pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()> { let table = { - let m = self.state.lock().unwrap(); + let enable_pruning = self.state.lock().unwrap().config.parquet_pruning; ParquetTable::try_new(filename, self.clone())? - .with_enable_pruning(m.config.parquet_pruning) + .with_enable_pruning(enable_pruning) }; self.register_table(name, Arc::new(table))?; Ok(()) From b0a353c5f3c39da7f19dc9e64e21e43ea0e80cc3 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Wed, 11 Aug 2021 11:55:19 +0800 Subject: [PATCH 04/16] fix prunning test --- datafusion/src/datasource/mod.rs | 20 +++----------------- datafusion/src/execution/context.rs | 1 - datafusion/src/physical_plan/repartition.rs | 1 + 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index 64e84c8e5611..71f1bcef58ad 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -79,16 +79,7 @@ impl From for PartitionedFile { impl std::fmt::Display for PartitionedFile { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "PartitionedFile(file_path: {}, schema: {}, statistics: {:?},\ - partition_value: {:?}, partition_schema: {:?})", - self.file_path, - self.schema, - self.statistics, - self.partition_value, - self.partition_schema - ) + write!(f, "{}", self.file_path) } } @@ -103,13 +94,8 @@ pub struct FilePartition { impl std::fmt::Display for FilePartition { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - let files: Vec = self.files.iter().map(|f| format!("{}", f)).collect(); - write!( - f, - "FilePartition[{}], files: {}", - self.index, - files.join(", ") - ) + let files: Vec = self.files.iter().map(|f| f.to_string()).collect(); + write!(f, "{}", files.join(", ")) } } diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 1771468d8ea7..1b5882712c61 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -2709,7 +2709,6 @@ mod tests { } #[tokio::test] - #[ignore] async fn write_parquet_results() -> Result<()> { // create partitioned input file and context let tmp_dir = TempDir::new()?; diff --git a/datafusion/src/physical_plan/repartition.rs b/datafusion/src/physical_plan/repartition.rs index 9047a01729aa..9f875083283f 100644 --- a/datafusion/src/physical_plan/repartition.rs +++ b/datafusion/src/physical_plan/repartition.rs @@ -732,6 +732,7 @@ mod tests { } #[tokio::test] + #[ignore] // skip this test when hash function is different because the hard // coded expected output is a function of the hash values //#[cfg(not(feature = "force_hash_collisions"))] From 42b6f436fa739da196df8e6ccf964ce3a96edd05 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Wed, 11 Aug 2021 13:44:52 +0800 Subject: [PATCH 05/16] fix clippy --- datafusion/src/physical_plan/parquet.rs | 2 ++ datafusion/src/physical_plan/repartition.rs | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs index bc3e0d597213..24f7c12178eb 100644 --- a/datafusion/src/physical_plan/parquet.rs +++ b/datafusion/src/physical_plan/parquet.rs @@ -198,6 +198,7 @@ impl ParquetExec { } /// Create a new Parquet reader execution plan with provided partitions and schema + #[allow(clippy::too_many_arguments)] pub fn new( partitions: Vec, object_store: Arc, @@ -551,6 +552,7 @@ fn build_row_group_predicate( } } +#[allow(clippy::too_many_arguments)] fn read_files( object_store: Arc, partition: ParquetPartition, diff --git a/datafusion/src/physical_plan/repartition.rs b/datafusion/src/physical_plan/repartition.rs index 9f875083283f..84fdc3d04d83 100644 --- a/datafusion/src/physical_plan/repartition.rs +++ b/datafusion/src/physical_plan/repartition.rs @@ -262,6 +262,7 @@ impl RepartitionExec { // fetch the next batch let now = Instant::now(); let result = stream.next().await; + println!("input {:?}", result); metrics.fetch_nanos.add_elapsed(now); // Input is done @@ -298,11 +299,13 @@ impl RepartitionExec { hashes_buf.resize(arrays[0].len(), 0); // Hash arrays and compute buckets based on number of partitions let hashes = create_hashes(&arrays, &random_state, hashes_buf)?; + println!("hashes: {:?}", &hashes); let mut indices = vec![vec![]; num_output_partitions]; for (index, hash) in hashes.iter().enumerate() { indices[(*hash % num_output_partitions as u64) as usize] .push(index as u64) } + println!("indices: {:?}", &indices); metrics.repart_nanos.add_elapsed(now); for (num_output_partition, partition_indices) in indices.into_iter().enumerate() @@ -732,7 +735,6 @@ mod tests { } #[tokio::test] - #[ignore] // skip this test when hash function is different because the hard // coded expected output is a function of the hash values //#[cfg(not(feature = "force_hash_collisions"))] From 97793953ddc1fc798ed7b070c51e4049ce11d180 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Wed, 11 Aug 2021 13:45:19 +0800 Subject: [PATCH 06/16] fix --- datafusion/src/physical_plan/repartition.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/physical_plan/repartition.rs b/datafusion/src/physical_plan/repartition.rs index 84fdc3d04d83..e6ce98c28721 100644 --- a/datafusion/src/physical_plan/repartition.rs +++ b/datafusion/src/physical_plan/repartition.rs @@ -737,7 +737,7 @@ mod tests { #[tokio::test] // skip this test when hash function is different because the hard // coded expected output is a function of the hash values - //#[cfg(not(feature = "force_hash_collisions"))] + #[cfg(not(feature = "force_hash_collisions"))] async fn repartition_with_dropping_output_stream() { #[derive(Debug)] struct Case<'a> { From 9a8614ba3287106868d9b277907afcba7faa2941 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Wed, 11 Aug 2021 14:18:50 +0800 Subject: [PATCH 07/16] enable shuffle_writer tests --- ballista/rust/core/src/execution_plans/shuffle_writer.rs | 2 -- datafusion/src/physical_plan/repartition.rs | 3 --- 2 files changed, 5 deletions(-) diff --git a/ballista/rust/core/src/execution_plans/shuffle_writer.rs b/ballista/rust/core/src/execution_plans/shuffle_writer.rs index 8c66a1e3a739..b1db21fa90a1 100644 --- a/ballista/rust/core/src/execution_plans/shuffle_writer.rs +++ b/ballista/rust/core/src/execution_plans/shuffle_writer.rs @@ -482,7 +482,6 @@ mod tests { use tempfile::TempDir; #[tokio::test] - #[ignore] async fn test() -> Result<()> { let input_plan = Arc::new(CoalescePartitionsExec::new(create_input_plan()?)); let work_dir = TempDir::new()?; @@ -535,7 +534,6 @@ mod tests { } #[tokio::test] - #[ignore] async fn test_partitioned() -> Result<()> { let input_plan = create_input_plan()?; let work_dir = TempDir::new()?; diff --git a/datafusion/src/physical_plan/repartition.rs b/datafusion/src/physical_plan/repartition.rs index e6ce98c28721..eb3fe5560fd6 100644 --- a/datafusion/src/physical_plan/repartition.rs +++ b/datafusion/src/physical_plan/repartition.rs @@ -262,7 +262,6 @@ impl RepartitionExec { // fetch the next batch let now = Instant::now(); let result = stream.next().await; - println!("input {:?}", result); metrics.fetch_nanos.add_elapsed(now); // Input is done @@ -299,13 +298,11 @@ impl RepartitionExec { hashes_buf.resize(arrays[0].len(), 0); // Hash arrays and compute buckets based on number of partitions let hashes = create_hashes(&arrays, &random_state, hashes_buf)?; - println!("hashes: {:?}", &hashes); let mut indices = vec![vec![]; num_output_partitions]; for (index, hash) in hashes.iter().enumerate() { indices[(*hash % num_output_partitions as u64) as usize] .push(index as u64) } - println!("indices: {:?}", &indices); metrics.repart_nanos.add_elapsed(now); for (num_output_partition, partition_indices) in indices.into_iter().enumerate() From 90e4d889e75b38fb294f987e8b67e1df1390bd99 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Fri, 13 Aug 2021 18:11:29 +0800 Subject: [PATCH 08/16] wip make it async --- datafusion/src/datasource/csv.rs | 2 +- datafusion/src/datasource/json.rs | 2 +- datafusion/src/datasource/local.rs | 94 ++++++++++++++++------- datafusion/src/datasource/mod.rs | 2 +- datafusion/src/datasource/object_store.rs | 11 ++- datafusion/src/physical_plan/csv.rs | 2 +- datafusion/src/physical_plan/json.rs | 2 +- 7 files changed, 79 insertions(+), 36 deletions(-) diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs index d4ca073af2dd..ec6cbe472950 100644 --- a/datafusion/src/datasource/csv.rs +++ b/datafusion/src/datasource/csv.rs @@ -67,7 +67,7 @@ impl CsvFile { Some(s) => s.clone(), None => { let filenames = LocalFileSystem - .list_all_files(path.as_str(), options.file_extension)?; + .list_all_files(path.as_str(), options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/json.rs b/datafusion/src/datasource/json.rs index 5bd8a5f7121f..84f5bccb8fac 100644 --- a/datafusion/src/datasource/json.rs +++ b/datafusion/src/datasource/json.rs @@ -59,7 +59,7 @@ impl NdJsonFile { schema } else { let filenames = - LocalFileSystem.list_all_files(path, options.file_extension)?; + LocalFileSystem.list_all_files(path, options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/local.rs b/datafusion/src/datasource/local.rs index cdd9b324a7ad..7b99e38dc043 100644 --- a/datafusion/src/datasource/local.rs +++ b/datafusion/src/datasource/local.rs @@ -16,29 +16,32 @@ // under the License. //! Object store that represents the Local File System. - -use crate::datasource::object_store::{ObjectReader, ObjectStore}; +use crate::datasource::object_store::{ObjectReader, ObjectStore, FileNameStream}; use crate::error::DataFusionError; use crate::error::Result; use crate::parquet::file::reader::Length; use crate::parquet::file::serialized_reader::FileSource; +use async_trait::async_trait; use std::any::Any; -use std::fs; -use std::fs::{metadata, File}; use std::io::Read; use std::sync::Arc; +use futures::{stream, Stream, StreamExt}; +use tokio::fs::{File, self, ReadDir}; +use std::path::PathBuf; #[derive(Debug)] /// Local File System as Object Store. pub struct LocalFileSystem; + +#[async_trait] impl ObjectStore for LocalFileSystem { fn as_any(&self) -> &dyn Any { self } - fn list_all_files(&self, path: &str, ext: &str) -> Result> { - list_all(path, ext) + async fn list_all_files(&self, path: &str, ext: &str) -> Result { + list_all(path.to_string(), ext.to_string()).await } fn get_reader(&self, file_path: &str) -> Result> { @@ -68,33 +71,66 @@ impl ObjectReader for LocalFSObjectReader { } } -fn list_all(root_path: &str, ext: &str) -> Result> { - let mut filenames: Vec = Vec::new(); - list_all_files(root_path, &mut filenames, ext)?; - Ok(filenames) -} +async fn list_all(root_path: String, ext: String) -> Result { + // let mut filenames: Vec = Vec::new(); + // list_all_files(root_path, &mut filenames, ext).await?; + // Ok(filenames) -/// Recursively build a list of files in a directory with a given extension with an accumulator list -fn list_all_files(dir: &str, filenames: &mut Vec, ext: &str) -> Result<()> { - let metadata = metadata(dir)?; - if metadata.is_file() { - if dir.ends_with(ext) { - filenames.push(dir.to_string()); - } - } else { - for entry in fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - if let Some(path_name) = path.to_str() { - if path.is_dir() { - list_all_files(path_name, filenames, ext)?; - } else if path_name.ends_with(ext) { - filenames.push(path_name.to_string()); + async fn one_level(path: String, to_visit: &mut Vec, ext: String) -> Result> { + let mut dir = fs::read_dir(path).await?; + let mut files = Vec::new(); + + while let Some(child) = dir.next_entry().await? { + if let Some(child_path) = child.path().to_str() { + if child.metadata().await?.is_dir() { + to_visit.push(child_path.to_string()); + } else { + if child_path.ends_with(&ext) { + files.push(child_path.to_string()) + } } } else { - return Err(DataFusionError::Plan("Invalid path".to_string())); + return Err(DataFusionError::Plan("Invalid path".to_string())) } + } + Ok(files) } - Ok(()) + + stream::unfold(vec![root_path], |mut to_visit| { + async { + let path = to_visit.pop()?; + let file_stream = match one_level(path, &mut to_visit, ext).await { + Ok(files) => stream::iter(files).map(Ok).left_stream(), + Err(e) => stream::once(async { Err(e) }).right_stream(), + }; + + Some((file_stream, to_visit)) + } + }).flatten() } + +/// Recursively build a list of files in a directory with a given extension with an accumulator list +// async fn list_all_files(dir: &str, filenames: &mut Vec, ext: &str) -> Result<()> { +// let metadata = std::fs::metadata(dir)?; +// if metadata.is_file() { +// if dir.ends_with(ext) { +// filenames.push(dir.to_string()); +// } +// } else { +// for entry in std::fs::read_dir(dir)? { +// let entry = entry?; +// let path = entry.path(); +// if let Some(path_name) = path.to_str() { +// if path.is_dir() { +// list_all_files(path_name, filenames, ext).await?; +// } else if path_name.ends_with(ext) { +// filenames.push(path_name.to_string()); +// } +// } else { +// return Err(DataFusionError::Plan("Invalid path".to_string())); +// } +// } +// } +// Ok(()) +// } diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index 71f1bcef58ad..b34770c07b4d 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -116,7 +116,7 @@ pub trait SourceRootDescBuilder { object_store: Arc, ext: &str, ) -> Result { - let filenames = object_store.list_all_files(path, ext)?; + let filenames = object_store.list_all_files(path, ext).await?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No file (with .{} extension) found at path {}", diff --git a/datafusion/src/datasource/object_store.rs b/datafusion/src/datasource/object_store.rs index ef1e8701f4e0..8c2de37e5be2 100644 --- a/datafusion/src/datasource/object_store.rs +++ b/datafusion/src/datasource/object_store.rs @@ -19,13 +19,17 @@ use crate::datasource::local::LocalFileSystem; use crate::error::Result; +use async_trait::async_trait; use std::any::Any; use std::collections::HashMap; use std::fmt::Debug; use std::io::Read; use std::sync::{Arc, RwLock}; +use futures::{Stream, StreamExt}; +use std::pin::Pin; -/// Objct Reader for one file in a object store +/// Object Reader for one file in a object store +#[async_trait] pub trait ObjectReader { /// Get reader for a part [start, start + length] in the file fn get_reader(&self, start: u64, length: usize) -> Box; @@ -34,15 +38,18 @@ pub trait ObjectReader { fn length(&self) -> u64; } +pub type FileNameStream = Pin> + Send + Sync + 'static>>; + /// A ObjectStore abstracts access to an underlying file/object storage. /// It maps strings (e.g. URLs, filesystem paths, etc) to sources of bytes +#[async_trait] pub trait ObjectStore: Sync + Send + Debug { /// Returns the object store as [`Any`](std::any::Any) /// so that it can be downcast to a specific implementation. fn as_any(&self) -> &dyn Any; /// Returns all the files with filename extension `ext` in path `prefix` - fn list_all_files(&self, prefix: &str, ext: &str) -> Result>; + async fn list_all_files(&self, prefix: &str, ext: &str) -> Result; /// Get object reader for one file fn get_reader(&self, file_path: &str) -> Result>; diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs index 293f46d7a736..3a44c60fe4b6 100644 --- a/datafusion/src/physical_plan/csv.rs +++ b/datafusion/src/physical_plan/csv.rs @@ -143,7 +143,7 @@ impl CsvExec { ) -> Result { let file_extension = String::from(options.file_extension); - let filenames = LocalFileSystem.list_all_files(path, options.file_extension)?; + let filenames = LocalFileSystem.list_all_files(path, options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/physical_plan/json.rs b/datafusion/src/physical_plan/json.rs index df7e9e5e5014..2ce4e07c0871 100644 --- a/datafusion/src/physical_plan/json.rs +++ b/datafusion/src/physical_plan/json.rs @@ -89,7 +89,7 @@ impl NdJsonExec { ) -> Result { let file_extension = options.file_extension.to_string(); - let filenames = LocalFileSystem.list_all_files(path, options.file_extension)?; + let filenames = LocalFileSystem.list_all_files(path, options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( From af106a9664695c8eb0b1d81a722a8f5b0d99f563 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Mon, 16 Aug 2021 15:03:15 +0800 Subject: [PATCH 09/16] resolve comment, wip --- .../core/src/serde/logical_plan/from_proto.rs | 8 ++------ .../core/src/serde/physical_plan/from_proto.rs | 5 ++--- ballista/rust/core/src/utils.rs | 5 ----- ballista/rust/scheduler/src/lib.rs | 3 +-- datafusion/src/datasource/csv.rs | 4 ++-- datafusion/src/datasource/json.rs | 4 ++-- datafusion/src/datasource/mod.rs | 3 +-- .../src/datasource/{ => object_store}/local.rs | 2 +- .../{object_store.rs => object_store/mod.rs} | 18 ++++++++++++------ datafusion/src/datasource/parquet.rs | 2 +- .../src/physical_optimizer/repartition.rs | 2 +- datafusion/src/physical_plan/csv.rs | 4 ++-- datafusion/src/physical_plan/json.rs | 4 ++-- datafusion/src/physical_plan/parquet.rs | 4 ++-- 14 files changed, 31 insertions(+), 37 deletions(-) rename datafusion/src/datasource/{ => object_store}/local.rs (98%) rename datafusion/src/datasource/{object_store.rs => object_store/mod.rs} (95%) diff --git a/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/ballista/rust/core/src/serde/logical_plan/from_proto.rs index 24faddd9f34d..dbc819d855c8 100644 --- a/ballista/rust/core/src/serde/logical_plan/from_proto.rs +++ b/ballista/rust/core/src/serde/logical_plan/from_proto.rs @@ -159,7 +159,7 @@ impl TryInto for &protobuf::LogicalPlanNode { LogicalPlanBuilder::scan_parquet_with_name( &scan.path, projection, - create_datafusion_context_concurrency(24), + ExecutionContext::with_concurrency(24), &scan.table_name, )? //TODO concurrency .build() @@ -1100,13 +1100,9 @@ impl TryInto for &protobuf::Field { } } -use crate::utils::create_datafusion_context_concurrency; use datafusion::physical_plan::datetime_expressions::to_timestamp; use datafusion::physical_plan::{aggregates, windows}; -use datafusion::prelude::{ - array, date_part, date_trunc, length, lower, ltrim, md5, rtrim, sha224, sha256, - sha384, sha512, trim, upper, -}; +use datafusion::prelude::{array, date_part, date_trunc, length, lower, ltrim, md5, rtrim, sha224, sha256, sha384, sha512, trim, upper, ExecutionContext}; use std::convert::TryFrom; impl TryFrom for protobuf::FileType { diff --git a/ballista/rust/core/src/serde/physical_plan/from_proto.rs b/ballista/rust/core/src/serde/physical_plan/from_proto.rs index 1441f87bc0aa..2767c9318ce1 100644 --- a/ballista/rust/core/src/serde/physical_plan/from_proto.rs +++ b/ballista/rust/core/src/serde/physical_plan/from_proto.rs @@ -29,7 +29,6 @@ use crate::serde::protobuf::repartition_exec_node::PartitionMethod; use crate::serde::protobuf::ShuffleReaderPartition; use crate::serde::scheduler::PartitionLocation; use crate::serde::{from_proto_binary_op, proto_error, protobuf}; -use crate::utils::create_datafusion_context_concurrency; use crate::{convert_box_required, convert_required, into_required}; use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef}; use datafusion::catalog::catalog::{ @@ -71,7 +70,7 @@ use datafusion::physical_plan::{ Partitioning, }; use datafusion::physical_plan::{AggregateExpr, ExecutionPlan, PhysicalExpr, WindowExpr}; -use datafusion::prelude::CsvReadOptions; +use datafusion::prelude::{CsvReadOptions, ExecutionContext}; use log::debug; use protobuf::physical_expr_node::ExprType; use protobuf::physical_plan_node::PhysicalPlanType; @@ -137,7 +136,7 @@ impl TryInto> for &protobuf::PhysicalPlanNode { Some(projection), None, scan.batch_size as usize, - create_datafusion_context_concurrency(scan.num_partitions as usize), + ExecutionContext::with_concurrency(scan.num_partitions as usize), None, )?)) } diff --git a/ballista/rust/core/src/utils.rs b/ballista/rust/core/src/utils.rs index e960b77575a9..4187faa6645a 100644 --- a/ballista/rust/core/src/utils.rs +++ b/ballista/rust/core/src/utils.rs @@ -252,11 +252,6 @@ pub fn create_datafusion_context( ExecutionContext::with_config(config) } -/// Create a DataFusion context that is compatible with Ballista in concurrency -pub fn create_datafusion_context_concurrency(concurrency: usize) -> ExecutionContext { - ExecutionContext::with_concurrency(concurrency) -} - pub struct BallistaQueryPlanner { scheduler_url: String, config: BallistaConfig, diff --git a/ballista/rust/scheduler/src/lib.rs b/ballista/rust/scheduler/src/lib.rs index 2037a3530aba..9ab01ce9e64c 100644 --- a/ballista/rust/scheduler/src/lib.rs +++ b/ballista/rust/scheduler/src/lib.rs @@ -85,7 +85,6 @@ use self::state::{ConfigBackendClient, SchedulerState}; use ballista_core::config::BallistaConfig; use ballista_core::execution_plans::ShuffleWriterExec; use ballista_core::serde::scheduler::to_proto::hash_partitioning_to_proto; -use ballista_core::utils::create_datafusion_context_concurrency; use datafusion::datasource::parquet::ParquetRootDesc; use datafusion::prelude::{ExecutionConfig, ExecutionContext}; use std::time::{Instant, SystemTime, UNIX_EPOCH}; @@ -286,7 +285,7 @@ impl SchedulerGrpc for SchedulerServer { match file_type { FileType::Parquet => { - let ctx = create_datafusion_context_concurrency(1); + let ctx = ExecutionContext::with_concurrency(1); let parquet_desc = ParquetRootDesc::new(&path, ctx).map_err(|e| { let msg = format!("Error opening parquet files: {}", e); error!("{}", msg); diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs index ec6cbe472950..3f19e5772860 100644 --- a/datafusion/src/datasource/csv.rs +++ b/datafusion/src/datasource/csv.rs @@ -40,7 +40,7 @@ use std::string::String; use std::sync::{Arc, Mutex}; use crate::datasource::datasource::Statistics; -use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::local::LocalFileSystem; use crate::datasource::object_store::ObjectStore; use crate::datasource::{Source, TableProvider}; use crate::error::{DataFusionError, Result}; @@ -67,7 +67,7 @@ impl CsvFile { Some(s) => s.clone(), None => { let filenames = LocalFileSystem - .list_all_files(path.as_str(), options.file_extension).await?; + .list(path.as_str(), options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/json.rs b/datafusion/src/datasource/json.rs index 84f5bccb8fac..d04e95892cf3 100644 --- a/datafusion/src/datasource/json.rs +++ b/datafusion/src/datasource/json.rs @@ -37,7 +37,7 @@ use crate::{ use arrow::{datatypes::SchemaRef, json::reader::infer_json_schema_from_seekable}; use super::datasource::Statistics; -use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::local::LocalFileSystem; use crate::datasource::object_store::ObjectStore; trait SeekRead: Read + Seek {} @@ -59,7 +59,7 @@ impl NdJsonFile { schema } else { let filenames = - LocalFileSystem.list_all_files(path, options.file_extension).await?; + LocalFileSystem.list(path, options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index b34770c07b4d..b53822b83dd3 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -21,7 +21,6 @@ pub mod csv; pub mod datasource; pub mod empty; pub mod json; -pub mod local; pub mod memory; pub mod object_store; pub mod parquet; @@ -116,7 +115,7 @@ pub trait SourceRootDescBuilder { object_store: Arc, ext: &str, ) -> Result { - let filenames = object_store.list_all_files(path, ext).await?; + let filenames = object_store.list(path, ext).await?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No file (with .{} extension) found at path {}", diff --git a/datafusion/src/datasource/local.rs b/datafusion/src/datasource/object_store/local.rs similarity index 98% rename from datafusion/src/datasource/local.rs rename to datafusion/src/datasource/object_store/local.rs index 7b99e38dc043..f1a9595d854f 100644 --- a/datafusion/src/datasource/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -40,7 +40,7 @@ impl ObjectStore for LocalFileSystem { self } - async fn list_all_files(&self, path: &str, ext: &str) -> Result { + async fn list(&self, path: &str, ext: &str) -> Result { list_all(path.to_string(), ext.to_string()).await } diff --git a/datafusion/src/datasource/object_store.rs b/datafusion/src/datasource/object_store/mod.rs similarity index 95% rename from datafusion/src/datasource/object_store.rs rename to datafusion/src/datasource/object_store/mod.rs index 8c2de37e5be2..b273aecf5a84 100644 --- a/datafusion/src/datasource/object_store.rs +++ b/datafusion/src/datasource/object_store/mod.rs @@ -17,16 +17,22 @@ //! Object Store abstracts access to an underlying file/object storage. -use crate::datasource::local::LocalFileSystem; -use crate::error::Result; -use async_trait::async_trait; +pub mod local; + use std::any::Any; use std::collections::HashMap; use std::fmt::Debug; use std::io::Read; +use std::pin::Pin; use std::sync::{Arc, RwLock}; + +use async_trait::async_trait; use futures::{Stream, StreamExt}; -use std::pin::Pin; + +use local::LocalFileSystem; + +use crate::error::Result; + /// Object Reader for one file in a object store #[async_trait] @@ -49,7 +55,7 @@ pub trait ObjectStore: Sync + Send + Debug { fn as_any(&self) -> &dyn Any; /// Returns all the files with filename extension `ext` in path `prefix` - async fn list_all_files(&self, prefix: &str, ext: &str) -> Result; + async fn list(&self, prefix: &str, ext: &str) -> Result; /// Get object reader for one file fn get_reader(&self, file_path: &str) -> Result>; @@ -98,7 +104,7 @@ impl ObjectStoreRegistry { /// path with prefix file:/// or no prefix will return the default LocalFS store, /// path with prefix s3:/// will return the S3 store if it's registered, /// and will always return LocalFS store when a prefix is not registered in the path. - pub fn store_for_path(&self, path: &str) -> Arc { + pub fn get_by_path(&self, path: &str) -> Arc { if let Some((scheme, _)) = path.split_once(':') { let stores = self.object_stores.read().unwrap(); if let Some(store) = stores.get(&*scheme.to_lowercase()) { diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs index fcf7ccd5331b..03eca4605c2a 100644 --- a/datafusion/src/datasource/parquet.rs +++ b/datafusion/src/datasource/parquet.rs @@ -157,7 +157,7 @@ impl ParquetRootDesc { .lock() .unwrap() .object_store_registry - .store_for_path(root_path); + .get_by_path(root_path); let root_desc = Self::get_source_desc(root_path, object_store.clone(), "parquet"); Ok(Self { object_store, diff --git a/datafusion/src/physical_optimizer/repartition.rs b/datafusion/src/physical_optimizer/repartition.rs index 30ec896b4e2f..42986d5c4dcc 100644 --- a/datafusion/src/physical_optimizer/repartition.rs +++ b/datafusion/src/physical_optimizer/repartition.rs @@ -110,7 +110,7 @@ mod tests { use super::*; use crate::datasource::datasource::Statistics; - use crate::datasource::local::LocalFileSystem; + use crate::datasource::object_store::local::LocalFileSystem; use crate::datasource::PartitionedFile; use crate::physical_plan::parquet::{ ParquetExec, ParquetExecMetrics, ParquetPartition, diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs index 3a44c60fe4b6..05e8361051f9 100644 --- a/datafusion/src/physical_plan/csv.rs +++ b/datafusion/src/physical_plan/csv.rs @@ -17,7 +17,7 @@ //! Execution plan for reading CSV files -use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::local::LocalFileSystem; use crate::datasource::object_store::ObjectStore; use crate::error::{DataFusionError, Result}; use crate::physical_plan::ExecutionPlan; @@ -143,7 +143,7 @@ impl CsvExec { ) -> Result { let file_extension = String::from(options.file_extension); - let filenames = LocalFileSystem.list_all_files(path, options.file_extension).await?; + let filenames = LocalFileSystem.list(path, options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/physical_plan/json.rs b/datafusion/src/physical_plan/json.rs index 2ce4e07c0871..a694aae5cbde 100644 --- a/datafusion/src/physical_plan/json.rs +++ b/datafusion/src/physical_plan/json.rs @@ -20,7 +20,7 @@ use async_trait::async_trait; use futures::Stream; use super::{source::Source, ExecutionPlan, Partitioning, RecordBatchStream}; -use crate::datasource::local::LocalFileSystem; +use crate::datasource::object_store::local::LocalFileSystem; use crate::datasource::object_store::ObjectStore; use crate::error::{DataFusionError, Result}; use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter}; @@ -89,7 +89,7 @@ impl NdJsonExec { ) -> Result { let file_extension = options.file_extension.to_string(); - let filenames = LocalFileSystem.list_all_files(path, options.file_extension).await?; + let filenames = LocalFileSystem.list(path, options.file_extension).await?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs index 24f7c12178eb..30bdf15f07a2 100644 --- a/datafusion/src/physical_plan/parquet.rs +++ b/datafusion/src/physical_plan/parquet.rs @@ -360,7 +360,7 @@ impl ExecutionPlan for ParquetExec { let limit = self.limit; task::spawn_blocking(move || { - if let Err(e) = read_files( + if let Err(e) = read_partition( object_store, partition, metrics, @@ -553,7 +553,7 @@ fn build_row_group_predicate( } #[allow(clippy::too_many_arguments)] -fn read_files( +fn read_partition( object_store: Arc, partition: ParquetPartition, metrics: ParquetPartitionMetrics, From 2c2650b9e7257a84c748b26102724a4859556841 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Mon, 16 Aug 2021 15:15:16 +0800 Subject: [PATCH 10/16] take sync async func apart --- .../core/src/serde/logical_plan/from_proto.rs | 5 +- datafusion/src/datasource/csv.rs | 4 +- datafusion/src/datasource/json.rs | 3 +- .../src/datasource/object_store/local.rs | 99 ++++++++++--------- datafusion/src/datasource/object_store/mod.rs | 12 ++- datafusion/src/physical_plan/csv.rs | 2 +- datafusion/src/physical_plan/json.rs | 2 +- 7 files changed, 71 insertions(+), 56 deletions(-) diff --git a/ballista/rust/core/src/serde/logical_plan/from_proto.rs b/ballista/rust/core/src/serde/logical_plan/from_proto.rs index dbc819d855c8..6490b251eec9 100644 --- a/ballista/rust/core/src/serde/logical_plan/from_proto.rs +++ b/ballista/rust/core/src/serde/logical_plan/from_proto.rs @@ -1102,7 +1102,10 @@ impl TryInto for &protobuf::Field { use datafusion::physical_plan::datetime_expressions::to_timestamp; use datafusion::physical_plan::{aggregates, windows}; -use datafusion::prelude::{array, date_part, date_trunc, length, lower, ltrim, md5, rtrim, sha224, sha256, sha384, sha512, trim, upper, ExecutionContext}; +use datafusion::prelude::{ + array, date_part, date_trunc, length, lower, ltrim, md5, rtrim, sha224, sha256, + sha384, sha512, trim, upper, ExecutionContext, +}; use std::convert::TryFrom; impl TryFrom for protobuf::FileType { diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs index 3f19e5772860..6df59d63eed2 100644 --- a/datafusion/src/datasource/csv.rs +++ b/datafusion/src/datasource/csv.rs @@ -66,8 +66,8 @@ impl CsvFile { let schema = Arc::new(match options.schema { Some(s) => s.clone(), None => { - let filenames = LocalFileSystem - .list(path.as_str(), options.file_extension).await?; + let filenames = + LocalFileSystem.list(path.as_str(), options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/json.rs b/datafusion/src/datasource/json.rs index d04e95892cf3..e353cfe917c5 100644 --- a/datafusion/src/datasource/json.rs +++ b/datafusion/src/datasource/json.rs @@ -58,8 +58,7 @@ impl NdJsonFile { let schema = if let Some(schema) = options.schema { schema } else { - let filenames = - LocalFileSystem.list(path, options.file_extension).await?; + let filenames = LocalFileSystem.list(path, options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Plan(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index f1a9595d854f..fbdaaa38109e 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -16,31 +16,36 @@ // under the License. //! Object store that represents the Local File System. -use crate::datasource::object_store::{ObjectReader, ObjectStore, FileNameStream}; +use crate::datasource::object_store::{FileNameStream, ObjectReader, ObjectStore}; use crate::error::DataFusionError; use crate::error::Result; use crate::parquet::file::reader::Length; use crate::parquet::file::serialized_reader::FileSource; use async_trait::async_trait; +use futures::{stream, Stream, StreamExt}; use std::any::Any; use std::io::Read; -use std::sync::Arc; -use futures::{stream, Stream, StreamExt}; -use tokio::fs::{File, self, ReadDir}; use std::path::PathBuf; +use std::sync::Arc; +use tokio::fs::{self, File, ReadDir}; #[derive(Debug)] /// Local File System as Object Store. pub struct LocalFileSystem; - #[async_trait] impl ObjectStore for LocalFileSystem { fn as_any(&self) -> &dyn Any { self } - async fn list(&self, path: &str, ext: &str) -> Result { + fn list(&self, path: &str, ext: &str) -> Result> { + let mut filenames: Vec = Vec::new(); + list_all_files(path, &mut filenames, ext)?; + Ok(filenames) + } + + async fn list_async(&self, path: &str, ext: &str) -> Result { list_all(path.to_string(), ext.to_string()).await } @@ -66,17 +71,21 @@ impl ObjectReader for LocalFSObjectReader { Box::new(FileSource::::new(&self.file, start, length)) } + fn get_reader_async(&self, start: u64, length: usize) -> Box { + todo!() + } + fn length(&self) -> u64 { self.file.len() } } async fn list_all(root_path: String, ext: String) -> Result { - // let mut filenames: Vec = Vec::new(); - // list_all_files(root_path, &mut filenames, ext).await?; - // Ok(filenames) - - async fn one_level(path: String, to_visit: &mut Vec, ext: String) -> Result> { + async fn one_level( + path: String, + to_visit: &mut Vec, + ext: String, + ) -> Result> { let mut dir = fs::read_dir(path).await?; let mut files = Vec::new(); @@ -90,47 +99,45 @@ async fn list_all(root_path: String, ext: String) -> Result { } } } else { - return Err(DataFusionError::Plan("Invalid path".to_string())) + return Err(DataFusionError::Plan("Invalid path".to_string())); } - } Ok(files) } - stream::unfold(vec![root_path], |mut to_visit| { - async { - let path = to_visit.pop()?; - let file_stream = match one_level(path, &mut to_visit, ext).await { - Ok(files) => stream::iter(files).map(Ok).left_stream(), - Err(e) => stream::once(async { Err(e) }).right_stream(), - }; + stream::unfold(vec![root_path], |mut to_visit| async { + let path = to_visit.pop()?; + let file_stream = match one_level(path, &mut to_visit, ext).await { + Ok(files) => stream::iter(files).map(Ok).left_stream(), + Err(e) => stream::once(async { Err(e) }).right_stream(), + }; - Some((file_stream, to_visit)) - } - }).flatten() + Some((file_stream, to_visit)) + }) + .flatten() } /// Recursively build a list of files in a directory with a given extension with an accumulator list -// async fn list_all_files(dir: &str, filenames: &mut Vec, ext: &str) -> Result<()> { -// let metadata = std::fs::metadata(dir)?; -// if metadata.is_file() { -// if dir.ends_with(ext) { -// filenames.push(dir.to_string()); -// } -// } else { -// for entry in std::fs::read_dir(dir)? { -// let entry = entry?; -// let path = entry.path(); -// if let Some(path_name) = path.to_str() { -// if path.is_dir() { -// list_all_files(path_name, filenames, ext).await?; -// } else if path_name.ends_with(ext) { -// filenames.push(path_name.to_string()); -// } -// } else { -// return Err(DataFusionError::Plan("Invalid path".to_string())); -// } -// } -// } -// Ok(()) -// } +fn list_all_files(dir: &str, filenames: &mut Vec, ext: &str) -> Result<()> { + let metadata = std::fs::metadata(dir)?; + if metadata.is_file() { + if dir.ends_with(ext) { + filenames.push(dir.to_string()); + } + } else { + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if let Some(path_name) = path.to_str() { + if path.is_dir() { + list_all_files(path_name, filenames, ext).await?; + } else if path_name.ends_with(ext) { + filenames.push(path_name.to_string()); + } + } else { + return Err(DataFusionError::Plan("Invalid path".to_string())); + } + } + } + Ok(()) +} diff --git a/datafusion/src/datasource/object_store/mod.rs b/datafusion/src/datasource/object_store/mod.rs index b273aecf5a84..f9b8e0407fdf 100644 --- a/datafusion/src/datasource/object_store/mod.rs +++ b/datafusion/src/datasource/object_store/mod.rs @@ -33,18 +33,21 @@ use local::LocalFileSystem; use crate::error::Result; - /// Object Reader for one file in a object store #[async_trait] pub trait ObjectReader { /// Get reader for a part [start, start + length] in the file fn get_reader(&self, start: u64, length: usize) -> Box; + /// Get reader for a part [start, start + length] in the file asynchronously + fn get_reader_async(&self, start: u64, length: usize) -> Box; + /// Get lenght for the file fn length(&self) -> u64; } -pub type FileNameStream = Pin> + Send + Sync + 'static>>; +pub type FileNameStream = + Pin> + Send + Sync + 'static>>; /// A ObjectStore abstracts access to an underlying file/object storage. /// It maps strings (e.g. URLs, filesystem paths, etc) to sources of bytes @@ -55,7 +58,10 @@ pub trait ObjectStore: Sync + Send + Debug { fn as_any(&self) -> &dyn Any; /// Returns all the files with filename extension `ext` in path `prefix` - async fn list(&self, prefix: &str, ext: &str) -> Result; + fn list(&self, prefix: &str, ext: &str) -> Result>; + + /// Returns all the files with filename extension `ext` in path `prefix` asynchronously + async fn list_async(&self, prefix: &str, ext: &str) -> Result; /// Get object reader for one file fn get_reader(&self, file_path: &str) -> Result>; diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs index 05e8361051f9..3f2214c57750 100644 --- a/datafusion/src/physical_plan/csv.rs +++ b/datafusion/src/physical_plan/csv.rs @@ -143,7 +143,7 @@ impl CsvExec { ) -> Result { let file_extension = String::from(options.file_extension); - let filenames = LocalFileSystem.list(path, options.file_extension).await?; + let filenames = LocalFileSystem.list(path, options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( "No files found at {path} with file extension {file_extension}", diff --git a/datafusion/src/physical_plan/json.rs b/datafusion/src/physical_plan/json.rs index a694aae5cbde..e70af74fb7e9 100644 --- a/datafusion/src/physical_plan/json.rs +++ b/datafusion/src/physical_plan/json.rs @@ -89,7 +89,7 @@ impl NdJsonExec { ) -> Result { let file_extension = options.file_extension.to_string(); - let filenames = LocalFileSystem.list(path, options.file_extension).await?; + let filenames = LocalFileSystem.list(path, options.file_extension)?; if filenames.is_empty() { return Err(DataFusionError::Execution(format!( From cf2c2038aa94704d86f8925edeba6b2550f4dc78 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Tue, 17 Aug 2021 22:29:36 +0800 Subject: [PATCH 11/16] Make async list and metadata fetch --- benchmarks/src/bin/tpch.rs | 5 +- datafusion/src/datasource/csv.rs | 2 +- datafusion/src/datasource/mod.rs | 161 ++++++++++++++---- .../src/datasource/object_store/local.rs | 91 +++++----- datafusion/src/datasource/object_store/mod.rs | 7 +- datafusion/src/datasource/parquet.rs | 70 ++++++-- 6 files changed, 240 insertions(+), 96 deletions(-) diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index c45341bad2de..f47ae57eacc3 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -475,9 +475,12 @@ fn get_table( } "parquet" => { let path = format!("{}/{}", path, table); - Ok(Arc::new(ParquetTable::try_new( + let schema = get_schema(table); + Ok(Arc::new(ParquetTable::try_new_with_schema( &path, ExecutionContext::with_concurrency(max_concurrency), + schema, + false, )?)) } other => { diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs index 6df59d63eed2..160556cd4da7 100644 --- a/datafusion/src/datasource/csv.rs +++ b/datafusion/src/datasource/csv.rs @@ -49,7 +49,7 @@ use crate::physical_plan::csv::CsvExec; pub use crate::physical_plan::csv::CsvReadOptions; use crate::physical_plan::ExecutionPlan; -/// Represents a CSV file with a provided schema +/// Represents a CSV file with a provided scxhema pub struct CsvFile { source: Source, schema: SchemaRef, diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index b53822b83dd3..0fae9571a1fd 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -31,12 +31,18 @@ pub use self::memory::MemTable; use crate::arrow::datatypes::{Schema, SchemaRef}; use crate::datasource::datasource::{ColumnStatistics, Statistics}; -use crate::datasource::object_store::ObjectStore; +use crate::datasource::object_store::{FileNameStream, ObjectStore}; use crate::error::{DataFusionError, Result}; use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; use crate::physical_plan::Accumulator; -use crate::scalar::ScalarValue; +use async_trait::async_trait; +use futures::{Stream, StreamExt}; +use std::fmt::Debug; +use std::pin::Pin; use std::sync::Arc; +use tokio::runtime::{Handle, Runtime}; +use tokio::sync::mpsc::{channel, Receiver, Sender}; +use tokio_stream::wrappers::ReceiverStream; /// Source for table input data pub(crate) enum Source> { @@ -57,10 +63,10 @@ pub struct PartitionedFile { pub schema: Schema, /// Statistics of the file pub statistics: Statistics, - /// Values of partition columns to be appended to each row - pub partition_value: Option>, - /// Schema of partition columns - pub partition_schema: Option, + // Values of partition columns to be appended to each row + // pub partition_value: Option>, + // Schema of partition columns + // pub partition_schema: Option, // We may include row group range here for a more fine-grained parallel execution } @@ -70,8 +76,6 @@ impl From for PartitionedFile { file_path, schema: Schema::empty(), statistics: Default::default(), - partition_value: None, - partition_schema: None, } } } @@ -107,59 +111,142 @@ pub struct SourceRootDescriptor { pub schema: SchemaRef, } +/// Stream of +pub type PartitionedFileStream = + Pin> + Send + Sync + 'static>>; + /// Builder for ['SourceRootDescriptor'] inside given path -pub trait SourceRootDescBuilder { +#[async_trait] +pub trait SourceRootDescBuilder: Sync + Send + Debug { /// Construct a ['SourceRootDescriptor'] from the provided path fn get_source_desc( path: &str, object_store: Arc, ext: &str, + provided_schema: Option, + collect_statistics: bool, ) -> Result { - let filenames = object_store.list(path, ext).await?; - if filenames.is_empty() { - return Err(DataFusionError::Plan(format!( - "No file (with .{} extension) found at path {}", - ext, path - ))); - } + let handle = get_runtime_handle(); + let mut results: Vec> = Vec::new(); + handle.block_on(async { + match Self::get_source_desc_async( + path, + object_store, + ext, + provided_schema, + collect_statistics, + ) + .await + { + Ok(mut stream) => { + while let Some(pf) = stream.next().await { + results.push(pf); + } + } + Err(e) => { + results.push(Err(e)); + } + } + }); + + let partition_results: Result> = + results.into_iter().collect(); + let partition_files = partition_results?; // build a list of Parquet partitions with statistics and gather all unique schemas // used in this data set let mut schemas: Vec = vec![]; - let partitioned_files = filenames - .iter() - .map(|file_path| { - let pf = Self::get_file_meta(file_path, object_store.clone())?; - let schema = pf.schema.clone(); - if schemas.is_empty() { - schemas.push(schema); - } else if schema != schemas[0] { - // we currently get the schema information from the first file rather than do - // schema merging and this is a limitation. - // See https://issues.apache.org/jira/browse/ARROW-11017 - return Err(DataFusionError::Plan(format!( - "The file {} have different schema from the first file and DataFusion does \ + for pf in &partition_files { + let schema = pf.schema.clone(); + if schemas.is_empty() { + schemas.push(schema); + } else if schema != schemas[0] { + // we currently get the schema information from the first file rather than do + // schema merging and this is a limitation. + // See https://issues.apache.org/jira/browse/ARROW-11017 + return Err(DataFusionError::Plan(format!( + "The file {} have different schema from the first file and DataFusion does \ not yet support schema merging", - file_path - ))); - } - Ok(pf) - }).collect::>>(); + pf.file_path + ))); + } + } Ok(SourceRootDescriptor { - partition_files: partitioned_files?, + partition_files, schema: Arc::new(schemas.pop().unwrap()), }) } + /// Construct a ['SourceRootDescriptor'] from the provided path asynchronously + async fn get_source_desc_async( + path: &str, + object_store: Arc, + ext: &str, + provided_schema: Option, + collect_statistics: bool, + ) -> Result { + let mut list_result: FileNameStream = object_store.list_async(path, ext).await?; + + let (tx, rx): ( + Sender>, + Receiver>, + ) = channel(2); + + let mut contains_file = false; + while let Some(item) = list_result.next().await { + contains_file = true; + match item { + Ok(file_path) => { + if collect_statistics { + let tx = tx.clone(); + let object_store = object_store.clone(); + let path = file_path.clone(); + tokio::spawn(async move { + let file_meta = Self::get_file_meta(path, object_store).await; + tx.send(file_meta).await.unwrap(); + }); + } else { + tx.send(Ok(PartitionedFile { + file_path, + schema: provided_schema.clone().unwrap(), + statistics: Statistics::default(), + })) + .await + .unwrap(); + } + } + Err(e) => { + tx.send(Err(e)).await.unwrap(); + } + } + } + + if !contains_file { + return Err(DataFusionError::Plan(format!( + "No file (with .{} extension) found at path {}", + ext, path + ))); + } + + Ok(Box::pin(ReceiverStream::new(rx))) + } + /// Get all metadata for a source file, including schema, statistics, partitions, etc. - fn get_file_meta( - file_path: &str, + async fn get_file_meta( + file_path: String, object_store: Arc, ) -> Result; } +fn get_runtime_handle() -> Handle { + match Handle::try_current() { + Ok(h) => h, + Err(_) => Runtime::new().unwrap().handle().to_owned(), + } +} + /// Get all files as well as the summary statistics when a limit is provided pub fn get_statistics_with_limit( source_desc: &SourceRootDescriptor, diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index fbdaaa38109e..e2e1541e0d11 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -16,18 +16,18 @@ // under the License. //! Object store that represents the Local File System. +use crate::datasource::get_runtime_handle; use crate::datasource::object_store::{FileNameStream, ObjectReader, ObjectStore}; use crate::error::DataFusionError; use crate::error::Result; use crate::parquet::file::reader::Length; use crate::parquet::file::serialized_reader::FileSource; use async_trait::async_trait; -use futures::{stream, Stream, StreamExt}; +use futures::{stream, StreamExt}; use std::any::Any; +use std::fs::File; use std::io::Read; -use std::path::PathBuf; use std::sync::Arc; -use tokio::fs::{self, File, ReadDir}; #[derive(Debug)] /// Local File System as Object Store. @@ -40,13 +40,12 @@ impl ObjectStore for LocalFileSystem { } fn list(&self, path: &str, ext: &str) -> Result> { - let mut filenames: Vec = Vec::new(); - list_all_files(path, &mut filenames, ext)?; + let filenames: Vec = list_all(path.to_string(), ext.to_string())?; Ok(filenames) } async fn list_async(&self, path: &str, ext: &str) -> Result { - list_all(path.to_string(), ext.to_string()).await + list_all_async(path.to_string(), ext.to_string()).await } fn get_reader(&self, file_path: &str) -> Result> { @@ -71,7 +70,7 @@ impl ObjectReader for LocalFSObjectReader { Box::new(FileSource::::new(&self.file, start, length)) } - fn get_reader_async(&self, start: u64, length: usize) -> Box { + fn get_reader_async(&self, _start: u64, _length: usize) -> Box { todo!() } @@ -80,23 +79,39 @@ impl ObjectReader for LocalFSObjectReader { } } -async fn list_all(root_path: String, ext: String) -> Result { - async fn one_level( +fn list_all(root_path: String, ext: String) -> Result> { + let handle = get_runtime_handle(); + let mut file_results: Vec> = Vec::new(); + handle.block_on(async { + match list_all_async(root_path, ext).await { + Ok(mut stream) => { + while let Some(result) = stream.next().await { + file_results.push(result); + } + } + Err(_) => { + file_results.push(Err(DataFusionError::Plan("Invalid path".to_string()))); + } + } + }); + file_results.into_iter().collect() +} + +async fn list_all_async(root_path: String, ext: String) -> Result { + async fn find_files_in_dir( path: String, to_visit: &mut Vec, ext: String, ) -> Result> { - let mut dir = fs::read_dir(path).await?; + let mut dir = tokio::fs::read_dir(path).await?; let mut files = Vec::new(); while let Some(child) = dir.next_entry().await? { if let Some(child_path) = child.path().to_str() { if child.metadata().await?.is_dir() { to_visit.push(child_path.to_string()); - } else { - if child_path.ends_with(&ext) { - files.push(child_path.to_string()) - } + } else if child_path.ends_with(&ext.clone()) { + files.push(child_path.to_string()) } } else { return Err(DataFusionError::Plan("Invalid path".to_string())); @@ -105,39 +120,23 @@ async fn list_all(root_path: String, ext: String) -> Result { Ok(files) } - stream::unfold(vec![root_path], |mut to_visit| async { - let path = to_visit.pop()?; - let file_stream = match one_level(path, &mut to_visit, ext).await { - Ok(files) => stream::iter(files).map(Ok).left_stream(), - Err(e) => stream::once(async { Err(e) }).right_stream(), - }; - - Some((file_stream, to_visit)) - }) - .flatten() -} - -/// Recursively build a list of files in a directory with a given extension with an accumulator list -fn list_all_files(dir: &str, filenames: &mut Vec, ext: &str) -> Result<()> { - let metadata = std::fs::metadata(dir)?; - if metadata.is_file() { - if dir.ends_with(ext) { - filenames.push(dir.to_string()); - } - } else { - for entry in std::fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - if let Some(path_name) = path.to_str() { - if path.is_dir() { - list_all_files(path_name, filenames, ext).await?; - } else if path_name.ends_with(ext) { - filenames.push(path_name.to_string()); + let result = stream::unfold(vec![root_path], move |mut to_visit| { + let ext = ext.clone(); + async move { + match to_visit.pop() { + None => None, + Some(path) => { + let file_stream = + match find_files_in_dir(path, &mut to_visit, ext).await { + Ok(files) => stream::iter(files).map(Ok).left_stream(), + Err(e) => stream::once(async { Err(e) }).right_stream(), + }; + + Some((file_stream, to_visit)) } - } else { - return Err(DataFusionError::Plan("Invalid path".to_string())); } } - } - Ok(()) + }) + .flatten(); + Ok(Box::pin(result)) } diff --git a/datafusion/src/datasource/object_store/mod.rs b/datafusion/src/datasource/object_store/mod.rs index f9b8e0407fdf..9d3776fb9475 100644 --- a/datafusion/src/datasource/object_store/mod.rs +++ b/datafusion/src/datasource/object_store/mod.rs @@ -27,7 +27,7 @@ use std::pin::Pin; use std::sync::{Arc, RwLock}; use async_trait::async_trait; -use futures::{Stream, StreamExt}; +use futures::Stream; use local::LocalFileSystem; @@ -46,6 +46,11 @@ pub trait ObjectReader { fn length(&self) -> u64; } +/// Stream of files get listed from object store. Currently, we only +/// return file paths, but for many object stores, object listing will actually give us more +/// information than just the file path, for example, last updated time and file size are +/// often returned as part of the api/sys call. +/// These extra metadata might be useful for other purposes. pub type FileNameStream = Pin> + Send + Sync + 'static>>; diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs index 03eca4605c2a..010d7ed8f228 100644 --- a/datafusion/src/datasource/parquet.rs +++ b/datafusion/src/datasource/parquet.rs @@ -21,8 +21,7 @@ use std::any::Any; use std::io::Read; use std::sync::Arc; -use arrow::datatypes::SchemaRef; - +use async_trait::async_trait; use parquet::arrow::ArrowReader; use parquet::arrow::ParquetFileArrowReader; use parquet::file::reader::ChunkReader; @@ -30,7 +29,7 @@ use parquet::file::serialized_reader::SerializedFileReader; use parquet::file::statistics::Statistics as ParquetStatistics; use super::datasource::TableProviderFilterPushDown; -use crate::arrow::datatypes::{DataType, Field}; +use crate::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use crate::datasource::datasource::Statistics; use crate::datasource::object_store::{ObjectReader, ObjectStore}; use crate::datasource::{ @@ -68,6 +67,30 @@ impl ParquetTable { }) } + /// Attempt to initialize a new `ParquetTable` from a file path and known schema. + /// If collect_statistics is `false`, doesn't read files until necessary by scan + pub fn try_new_with_schema( + path: impl Into, + context: ExecutionContext, + schema: Schema, + collect_statistics: bool, + ) -> Result { + let path = path.into(); + let max_concurrency = context.state.lock().unwrap().config.concurrency; + let root_desc = ParquetRootDesc::new_with_schema( + path.as_str(), + context, + Some(schema), + collect_statistics, + ); + Ok(Self { + path, + desc: Arc::new(root_desc?), + max_concurrency, + enable_pruning: true, + }) + } + /// Get the path for the Parquet file(s) represented by this ParquetTable instance pub fn path(&self) -> &str { &self.path @@ -158,7 +181,34 @@ impl ParquetRootDesc { .unwrap() .object_store_registry .get_by_path(root_path); - let root_desc = Self::get_source_desc(root_path, object_store.clone(), "parquet"); + let root_desc = + Self::get_source_desc(root_path, object_store.clone(), "parquet", None, true); + Ok(Self { + object_store, + descriptor: root_desc?, + }) + } + + /// Construct a new parquet descriptor for a root path with known schema + pub fn new_with_schema( + root_path: &str, + context: ExecutionContext, + schema: Option, + collect_statistics: bool, + ) -> Result { + let object_store = context + .state + .lock() + .unwrap() + .object_store_registry + .get_by_path(root_path); + let root_desc = Self::get_source_desc( + root_path, + object_store.clone(), + "parquet", + schema, + collect_statistics, + ); Ok(Self { object_store, descriptor: root_desc?, @@ -314,12 +364,13 @@ impl ParquetRootDesc { } } +#[async_trait] impl SourceRootDescBuilder for ParquetRootDesc { - fn get_file_meta( - file_path: &str, + async fn get_file_meta( + file_path: String, object_store: Arc, ) -> Result { - let reader = object_store.get_reader(file_path)?; + let reader = object_store.get_reader(file_path.as_str())?; let file_reader = Arc::new(SerializedFileReader::new(ObjectReaderWrapper::new(reader))?); let mut arrow_reader = ParquetFileArrowReader::new(file_reader); @@ -384,8 +435,6 @@ impl SourceRootDescBuilder for ParquetRootDesc { file_path, schema, statistics, - partition_value: None, - partition_schema: None, }) } } @@ -418,7 +467,8 @@ impl Length for ObjectReaderWrapper { } } -/// Thin wrapper over reader for a parquet file +/// Thin wrapper over reader for a parquet file. +/// To be removed once rust-lang/rfcs#1598 is stabilized pub struct InnerReaderWrapper { inner_reader: Box, } From 0596c9c664cdc1047606a862fd7a626c6a023f6d Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Wed, 18 Aug 2021 16:03:00 +0800 Subject: [PATCH 12/16] async read --- datafusion/src/datasource/mod.rs | 1 + .../src/datasource/object_store/local.rs | 45 ++++-- datafusion/src/datasource/object_store/mod.rs | 25 ++- datafusion/src/datasource/parquet.rs | 16 +- datafusion/src/datasource/parquet_io.rs | 144 ++++++++++++++++++ 5 files changed, 212 insertions(+), 19 deletions(-) create mode 100644 datafusion/src/datasource/parquet_io.rs diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index 0fae9571a1fd..f79cae799558 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -24,6 +24,7 @@ pub mod json; pub mod memory; pub mod object_store; pub mod parquet; +pub mod parquet_io; pub use self::csv::{CsvFile, CsvReadOptions}; pub use self::datasource::{TableProvider, TableType}; diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index e2e1541e0d11..1c76c2eac118 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -17,17 +17,19 @@ //! Object store that represents the Local File System. use crate::datasource::get_runtime_handle; -use crate::datasource::object_store::{FileNameStream, ObjectReader, ObjectStore}; +use crate::datasource::object_store::{ + FileNameStream, ObjectReader, ObjectStore, ThreadSafeRead, +}; +use crate::datasource::parquet_io::FileSource2; use crate::error::DataFusionError; use crate::error::Result; use crate::parquet::file::reader::Length; -use crate::parquet::file::serialized_reader::FileSource; use async_trait::async_trait; use futures::{stream, StreamExt}; use std::any::Any; use std::fs::File; -use std::io::Read; use std::sync::Arc; +use tokio::task; #[derive(Debug)] /// Local File System as Object Store. @@ -65,17 +67,42 @@ impl LocalFSObjectReader { } } +#[async_trait] impl ObjectReader for LocalFSObjectReader { - fn get_reader(&self, start: u64, length: usize) -> Box { - Box::new(FileSource::::new(&self.file, start, length)) + fn get_reader(&self, start: u64, length: usize) -> Result> { + Ok(Box::new(FileSource2::::new( + &self.file, start, length, + ))) } - fn get_reader_async(&self, _start: u64, _length: usize) -> Box { - todo!() + async fn get_reader_async( + &self, + start: u64, + length: usize, + ) -> Result> { + let file = self.file.try_clone()?; + match task::spawn_blocking(move || { + let read: Result> = + Ok(Box::new(FileSource2::::new(&file, start, length))); + read + }) + .await + { + Ok(r) => r, + Err(e) => Err(DataFusionError::Internal(e.to_string())), + } } - fn length(&self) -> u64 { - self.file.len() + fn length(&self) -> Result { + Ok(self.file.len()) + } + + async fn length_async(&self) -> Result { + let file = self.file.try_clone()?; + match task::spawn_blocking(move || Ok(file.len())).await { + Ok(r) => r, + Err(e) => Err(DataFusionError::Internal(e.to_string())), + } } } diff --git a/datafusion/src/datasource/object_store/mod.rs b/datafusion/src/datasource/object_store/mod.rs index 9d3776fb9475..d365ab34380d 100644 --- a/datafusion/src/datasource/object_store/mod.rs +++ b/datafusion/src/datasource/object_store/mod.rs @@ -31,19 +31,36 @@ use futures::Stream; use local::LocalFileSystem; +use crate::datasource::get_runtime_handle; use crate::error::Result; +/// Thread safe read +pub trait ThreadSafeRead: Read + Send + Sync + 'static {} + /// Object Reader for one file in a object store #[async_trait] pub trait ObjectReader { /// Get reader for a part [start, start + length] in the file - fn get_reader(&self, start: u64, length: usize) -> Box; + fn get_reader(&self, start: u64, length: usize) -> Result> { + let handle = get_runtime_handle(); + handle.block_on(self.get_reader_async(start, length)) + } /// Get reader for a part [start, start + length] in the file asynchronously - fn get_reader_async(&self, start: u64, length: usize) -> Box; + async fn get_reader_async( + &self, + start: u64, + length: usize, + ) -> Result>; + + /// Get length for the file + fn length(&self) -> Result { + let handle = get_runtime_handle(); + handle.block_on(self.length_async()) + } - /// Get lenght for the file - fn length(&self) -> u64; + /// Get length for the file asynchronously + async fn length_async(&self) -> Result; } /// Stream of files get listed from object store. Currently, we only diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs index 010d7ed8f228..bfc893bf8cc7 100644 --- a/datafusion/src/datasource/parquet.rs +++ b/datafusion/src/datasource/parquet.rs @@ -24,6 +24,7 @@ use std::sync::Arc; use async_trait::async_trait; use parquet::arrow::ArrowReader; use parquet::arrow::ParquetFileArrowReader; +use parquet::errors::ParquetError; use parquet::file::reader::ChunkReader; use parquet::file::serialized_reader::SerializedFileReader; use parquet::file::statistics::Statistics as ParquetStatistics; @@ -31,7 +32,7 @@ use parquet::file::statistics::Statistics as ParquetStatistics; use super::datasource::TableProviderFilterPushDown; use crate::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use crate::datasource::datasource::Statistics; -use crate::datasource::object_store::{ObjectReader, ObjectStore}; +use crate::datasource::object_store::{ObjectReader, ObjectStore, ThreadSafeRead}; use crate::datasource::{ create_max_min_accs, get_col_stats, get_statistics_with_limit, PartitionedFile, SourceRootDescBuilder, SourceRootDescriptor, TableProvider, @@ -455,22 +456,25 @@ impl ChunkReader for ObjectReaderWrapper { type T = InnerReaderWrapper; fn get_read(&self, start: u64, length: usize) -> parquet::errors::Result { - Ok(InnerReaderWrapper { - inner_reader: self.reader.get_reader(start, length), - }) + match self.reader.get_reader(start, length) { + Ok(reader) => Ok(InnerReaderWrapper { + inner_reader: reader, + }), + Err(e) => Err(ParquetError::General(e.to_string())), + } } } impl Length for ObjectReaderWrapper { fn len(&self) -> u64 { - self.reader.length() + self.reader.length().unwrap_or(0u64) } } /// Thin wrapper over reader for a parquet file. /// To be removed once rust-lang/rfcs#1598 is stabilized pub struct InnerReaderWrapper { - inner_reader: Box, + inner_reader: Box, } impl Read for InnerReaderWrapper { diff --git a/datafusion/src/datasource/parquet_io.rs b/datafusion/src/datasource/parquet_io.rs new file mode 100644 index 000000000000..a74fd70f25d5 --- /dev/null +++ b/datafusion/src/datasource/parquet_io.rs @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Copy of parquet::util::io::FileSource for thread safe parquet reader + +use std::sync::Mutex; +use std::{cmp, fmt, io::*}; + +use crate::datasource::object_store::ThreadSafeRead; +use crate::parquet::file::reader::Length; +use crate::parquet::util::io::{ParquetReader, Position}; + +const DEFAULT_BUF_SIZE: usize = 8 * 1024; + +// ---------------------------------------------------------------------- + +/// ParquetReader is the interface which needs to be fulfilled to be able to parse a +/// parquet source. +pub trait ThreadSafeParquetReader: ParquetReader + Send + Sync + 'static {} +impl ThreadSafeParquetReader for T {} + +/// Struct that represents a slice of a file data with independent start position and +/// length. Internally clones provided file handle, wraps with a custom implementation +/// of BufReader that resets position before any read. +/// +/// This is workaround and alternative for `file.try_clone()` method. It clones `File` +/// while preserving independent position, which is not available with `try_clone()`. +/// +/// Designed after `arrow::io::RandomAccessFile` and `std::io::BufReader` +pub struct FileSource2 { + reader: Mutex, + start: u64, // start position in a file + end: u64, // end position in a file + buf: Vec, // buffer where bytes read in advance are stored + buf_pos: usize, // current position of the reader in the buffer + buf_cap: usize, // current number of bytes read into the buffer +} + +impl fmt::Debug for FileSource2 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("FileSource") + .field("reader", &"OPAQUE") + .field("start", &self.start) + .field("end", &self.end) + .field("buf.len", &self.buf.len()) + .field("buf_pos", &self.buf_pos) + .field("buf_cap", &self.buf_cap) + .finish() + } +} + +impl FileSource2 { + /// Creates new file reader with start and length from a file handle + pub fn new(fd: &R, start: u64, length: usize) -> Self { + let reader = Mutex::new(fd.try_clone().unwrap()); + Self { + reader, + start, + end: start + length as u64, + buf: vec![0_u8; DEFAULT_BUF_SIZE], + buf_pos: 0, + buf_cap: 0, + } + } + + fn fill_inner_buf(&mut self) -> Result<&[u8]> { + if self.buf_pos >= self.buf_cap { + // If we've reached the end of our internal buffer then we need to fetch + // some more data from the underlying reader. + // Branch using `>=` instead of the more correct `==` + // to tell the compiler that the pos..cap slice is always valid. + debug_assert!(self.buf_pos == self.buf_cap); + let mut reader = self.reader.lock().unwrap(); + reader.seek(SeekFrom::Start(self.start))?; // always seek to start before reading + self.buf_cap = reader.read(&mut self.buf)?; + self.buf_pos = 0; + } + Ok(&self.buf[self.buf_pos..self.buf_cap]) + } + + fn skip_inner_buf(&mut self, buf: &mut [u8]) -> Result { + // discard buffer + self.buf_pos = 0; + self.buf_cap = 0; + // read directly into param buffer + let mut reader = self.reader.lock().unwrap(); + reader.seek(SeekFrom::Start(self.start))?; // always seek to start before reading + let nread = reader.read(buf)?; + self.start += nread as u64; + Ok(nread) + } +} + +impl Read for FileSource2 { + fn read(&mut self, buf: &mut [u8]) -> Result { + let bytes_to_read = cmp::min(buf.len(), (self.end - self.start) as usize); + let buf = &mut buf[0..bytes_to_read]; + + // If we don't have any buffered data and we're doing a massive read + // (larger than our internal buffer), bypass our internal buffer + // entirely. + if self.buf_pos == self.buf_cap && buf.len() >= self.buf.len() { + return self.skip_inner_buf(buf); + } + let nread = { + let mut rem = self.fill_inner_buf()?; + // copy the data from the inner buffer to the param buffer + rem.read(buf)? + }; + // consume from buffer + self.buf_pos = cmp::min(self.buf_pos + nread, self.buf_cap); + + self.start += nread as u64; + Ok(nread) + } +} + +impl Position for FileSource2 { + fn pos(&self) -> u64 { + self.start + } +} + +impl Length for FileSource2 { + fn len(&self) -> u64 { + self.end - self.start + } +} + +impl ThreadSafeRead for FileSource2 {} From 908f4453bcab9252fc4c30fc124b1b4dbfccae8e Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Thu, 19 Aug 2021 00:19:32 +0800 Subject: [PATCH 13/16] fix new runtime get discard --- datafusion/Cargo.toml | 2 +- datafusion/src/datasource/mod.rs | 11 +++++++---- datafusion/src/datasource/object_store/local.rs | 2 +- datafusion/src/datasource/object_store/mod.rs | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index db950c4956ce..839d622effe3 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -58,7 +58,7 @@ chrono = "0.4" async-trait = "0.1.41" futures = "0.3" pin-project-lite= "^0.2.0" -tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs"] } tokio-stream = "0.1" log = "^0.4" md-5 = { version = "^0.9.1", optional = true } diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index f79cae799558..da95c5b438a3 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -127,7 +127,7 @@ pub trait SourceRootDescBuilder: Sync + Send + Debug { provided_schema: Option, collect_statistics: bool, ) -> Result { - let handle = get_runtime_handle(); + let (handle, _rt) = get_runtime_handle(); let mut results: Vec> = Vec::new(); handle.block_on(async { match Self::get_source_desc_async( @@ -241,10 +241,13 @@ pub trait SourceRootDescBuilder: Sync + Send + Debug { ) -> Result; } -fn get_runtime_handle() -> Handle { +fn get_runtime_handle() -> (Handle, Option) { match Handle::try_current() { - Ok(h) => h, - Err(_) => Runtime::new().unwrap().handle().to_owned(), + Ok(h) => (h, None), + Err(_) => { + let rt = Runtime::new().unwrap(); + (rt.handle().clone(), Some(rt)) + } } } diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index 1c76c2eac118..274ab405b541 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -107,7 +107,7 @@ impl ObjectReader for LocalFSObjectReader { } fn list_all(root_path: String, ext: String) -> Result> { - let handle = get_runtime_handle(); + let (handle, _rt) = get_runtime_handle(); let mut file_results: Vec> = Vec::new(); handle.block_on(async { match list_all_async(root_path, ext).await { diff --git a/datafusion/src/datasource/object_store/mod.rs b/datafusion/src/datasource/object_store/mod.rs index d365ab34380d..58efa9ea3887 100644 --- a/datafusion/src/datasource/object_store/mod.rs +++ b/datafusion/src/datasource/object_store/mod.rs @@ -42,7 +42,7 @@ pub trait ThreadSafeRead: Read + Send + Sync + 'static {} pub trait ObjectReader { /// Get reader for a part [start, start + length] in the file fn get_reader(&self, start: u64, length: usize) -> Result> { - let handle = get_runtime_handle(); + let (handle, _rt) = get_runtime_handle(); handle.block_on(self.get_reader_async(start, length)) } @@ -55,7 +55,7 @@ pub trait ObjectReader { /// Get length for the file fn length(&self) -> Result { - let handle = get_runtime_handle(); + let (handle, _rt) = get_runtime_handle(); handle.block_on(self.length_async()) } From a9f9a5e3af39b77a73959852903740c89c9588b5 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Thu, 19 Aug 2021 01:34:59 +0800 Subject: [PATCH 14/16] Use futures::executor::block_on instead --- ballista/rust/scheduler/src/planner.rs | 12 +++++----- datafusion/Cargo.toml | 2 +- datafusion/src/datasource/mod.rs | 14 +---------- .../src/datasource/object_store/local.rs | 4 +--- datafusion/src/datasource/object_store/mod.rs | 7 ++---- datafusion/src/physical_plan/planner.rs | 24 +++++++++---------- 6 files changed, 23 insertions(+), 40 deletions(-) diff --git a/ballista/rust/scheduler/src/planner.rs b/ballista/rust/scheduler/src/planner.rs index 05025f282477..246a057fbf88 100644 --- a/ballista/rust/scheduler/src/planner.rs +++ b/ballista/rust/scheduler/src/planner.rs @@ -269,8 +269,8 @@ mod test { }; } - #[test] - fn distributed_hash_aggregate_plan() -> Result<(), BallistaError> { + #[tokio::test] + async fn distributed_hash_aggregate_plan() -> Result<(), BallistaError> { let mut ctx = datafusion_test_context("testdata")?; // simplified form of TPC-H query 1 @@ -352,8 +352,8 @@ mod test { Ok(()) } - #[test] - fn distributed_join_plan() -> Result<(), BallistaError> { + #[tokio::test] + async fn distributed_join_plan() -> Result<(), BallistaError> { let mut ctx = datafusion_test_context("testdata")?; // simplified form of TPC-H query 12 @@ -523,8 +523,8 @@ order by Ok(()) } - #[test] - fn roundtrip_serde_hash_aggregate() -> Result<(), BallistaError> { + #[tokio::test] + async fn roundtrip_serde_hash_aggregate() -> Result<(), BallistaError> { let mut ctx = datafusion_test_context("testdata")?; // simplified form of TPC-H query 1 diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index 839d622effe3..9541e6d5a57a 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -56,7 +56,7 @@ paste = "^1.0" num_cpus = "1.13.0" chrono = "0.4" async-trait = "0.1.41" -futures = "0.3" +futures = { version = "0.3", features = ["executor"] } pin-project-lite= "^0.2.0" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs"] } tokio-stream = "0.1" diff --git a/datafusion/src/datasource/mod.rs b/datafusion/src/datasource/mod.rs index da95c5b438a3..199444cd9cd6 100644 --- a/datafusion/src/datasource/mod.rs +++ b/datafusion/src/datasource/mod.rs @@ -41,7 +41,6 @@ use futures::{Stream, StreamExt}; use std::fmt::Debug; use std::pin::Pin; use std::sync::Arc; -use tokio::runtime::{Handle, Runtime}; use tokio::sync::mpsc::{channel, Receiver, Sender}; use tokio_stream::wrappers::ReceiverStream; @@ -127,9 +126,8 @@ pub trait SourceRootDescBuilder: Sync + Send + Debug { provided_schema: Option, collect_statistics: bool, ) -> Result { - let (handle, _rt) = get_runtime_handle(); let mut results: Vec> = Vec::new(); - handle.block_on(async { + futures::executor::block_on(async { match Self::get_source_desc_async( path, object_store, @@ -241,16 +239,6 @@ pub trait SourceRootDescBuilder: Sync + Send + Debug { ) -> Result; } -fn get_runtime_handle() -> (Handle, Option) { - match Handle::try_current() { - Ok(h) => (h, None), - Err(_) => { - let rt = Runtime::new().unwrap(); - (rt.handle().clone(), Some(rt)) - } - } -} - /// Get all files as well as the summary statistics when a limit is provided pub fn get_statistics_with_limit( source_desc: &SourceRootDescriptor, diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index 274ab405b541..4244baf6a12b 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -16,7 +16,6 @@ // under the License. //! Object store that represents the Local File System. -use crate::datasource::get_runtime_handle; use crate::datasource::object_store::{ FileNameStream, ObjectReader, ObjectStore, ThreadSafeRead, }; @@ -107,9 +106,8 @@ impl ObjectReader for LocalFSObjectReader { } fn list_all(root_path: String, ext: String) -> Result> { - let (handle, _rt) = get_runtime_handle(); let mut file_results: Vec> = Vec::new(); - handle.block_on(async { + futures::executor::block_on(async { match list_all_async(root_path, ext).await { Ok(mut stream) => { while let Some(result) = stream.next().await { diff --git a/datafusion/src/datasource/object_store/mod.rs b/datafusion/src/datasource/object_store/mod.rs index 58efa9ea3887..03f51ce42592 100644 --- a/datafusion/src/datasource/object_store/mod.rs +++ b/datafusion/src/datasource/object_store/mod.rs @@ -31,7 +31,6 @@ use futures::Stream; use local::LocalFileSystem; -use crate::datasource::get_runtime_handle; use crate::error::Result; /// Thread safe read @@ -42,8 +41,7 @@ pub trait ThreadSafeRead: Read + Send + Sync + 'static {} pub trait ObjectReader { /// Get reader for a part [start, start + length] in the file fn get_reader(&self, start: u64, length: usize) -> Result> { - let (handle, _rt) = get_runtime_handle(); - handle.block_on(self.get_reader_async(start, length)) + futures::executor::block_on(self.get_reader_async(start, length)) } /// Get reader for a part [start, start + length] in the file asynchronously @@ -55,8 +53,7 @@ pub trait ObjectReader { /// Get length for the file fn length(&self) -> Result { - let (handle, _rt) = get_runtime_handle(); - handle.block_on(self.length_async()) + futures::executor::block_on(self.length_async()) } /// Get length for the file asynchronously diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 02ab15d1a652..9d4673c15ff8 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -1399,8 +1399,8 @@ mod tests { planner.create_physical_plan(logical_plan, &ctx_state) } - #[test] - fn test_all_operators() -> Result<()> { + #[tokio::test] + async fn test_all_operators() -> Result<()> { let testdata = crate::test_util::arrow_test_data(); let path = format!("{}/csv/aggregate_test_100.csv", testdata); @@ -1444,8 +1444,8 @@ mod tests { Ok(()) } - #[test] - fn test_with_csv_plan() -> Result<()> { + #[tokio::test] + async fn test_with_csv_plan() -> Result<()> { let testdata = crate::test_util::arrow_test_data(); let path = format!("{}/csv/aggregate_test_100.csv", testdata); @@ -1463,8 +1463,8 @@ mod tests { Ok(()) } - #[test] - fn errors() -> Result<()> { + #[tokio::test] + async fn errors() -> Result<()> { let testdata = crate::test_util::arrow_test_data(); let path = format!("{}/csv/aggregate_test_100.csv", testdata); let options = CsvReadOptions::new().schema_infer_max_records(100); @@ -1565,8 +1565,8 @@ mod tests { } } - #[test] - fn in_list_types() -> Result<()> { + #[tokio::test] + async fn in_list_types() -> Result<()> { let testdata = crate::test_util::arrow_test_data(); let path = format!("{}/csv/aggregate_test_100.csv", testdata); let options = CsvReadOptions::new().schema_infer_max_records(100); @@ -1612,8 +1612,8 @@ mod tests { Ok(()) } - #[test] - fn hash_agg_input_schema() -> Result<()> { + #[tokio::test] + async fn hash_agg_input_schema() -> Result<()> { let testdata = crate::test_util::arrow_test_data(); let path = format!("{}/csv/aggregate_test_100.csv", testdata); @@ -1635,8 +1635,8 @@ mod tests { Ok(()) } - #[test] - fn hash_agg_group_by_partitioned() -> Result<()> { + #[tokio::test] + async fn hash_agg_group_by_partitioned() -> Result<()> { let testdata = crate::test_util::arrow_test_data(); let path = format!("{}/csv/aggregate_test_100.csv", testdata); From 7d894ccda6091412c35a073f9686acd33159109c Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Thu, 19 Aug 2021 13:56:05 +0800 Subject: [PATCH 15/16] fix doc test --- datafusion/src/dataframe.rs | 33 ++++++++++------ datafusion/src/datasource/csv.rs | 3 ++ .../src/datasource/object_store/local.rs | 38 ++++++++++--------- datafusion/src/datasource/parquet.rs | 16 ++++---- datafusion/src/execution/context.rs | 8 ++-- datafusion/src/physical_plan/mod.rs | 4 ++ datafusion/src/physical_plan/parquet.rs | 2 +- datafusion/tests/parquet_pruning.rs | 36 +++++++++--------- datafusion/tests/sql.rs | 4 +- 9 files changed, 84 insertions(+), 60 deletions(-) diff --git a/datafusion/src/dataframe.rs b/datafusion/src/dataframe.rs index 608f6dbcaf17..45727f4160f7 100644 --- a/datafusion/src/dataframe.rs +++ b/datafusion/src/dataframe.rs @@ -41,7 +41,8 @@ use async_trait::async_trait; /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; -/// # fn main() -> Result<()> { +/// #[tokio::main] +/// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.filter(col("a").lt_eq(col("b")))? @@ -59,7 +60,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.select_columns(&["a", "b"])?; @@ -73,7 +75,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.select(vec![col("a") * col("b"), col("c")])?; @@ -87,7 +90,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.filter(col("a").lt_eq(col("b")))?; @@ -101,7 +105,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// @@ -124,7 +129,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.limit(100)?; @@ -138,7 +144,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.union(df.clone())?; @@ -153,7 +160,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.sort(vec![col("a").sort(true, true), col("b").sort(false, false)])?; @@ -196,7 +204,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?; @@ -275,7 +284,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let schema = df.schema(); @@ -309,7 +319,8 @@ pub trait DataFrame: Send + Sync { /// ``` /// # use datafusion::prelude::*; /// # use datafusion::error::Result; - /// # fn main() -> Result<()> { + /// #[tokio::main] + /// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let f = df.registry(); diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs index 160556cd4da7..8e55180ea829 100644 --- a/datafusion/src/datasource/csv.rs +++ b/datafusion/src/datasource/csv.rs @@ -24,6 +24,8 @@ //! ``` //! use datafusion::datasource::TableProvider; //! use datafusion::datasource::csv::{CsvFile, CsvReadOptions}; +//! #[tokio::main] +//! # async fn main() { //! //! let testdata = datafusion::test_util::arrow_test_data(); //! let csvdata = CsvFile::try_new( @@ -31,6 +33,7 @@ //! CsvReadOptions::new().delimiter(b'|'), //! ).unwrap(); //! let schema = csvdata.schema(); +//! # } //! ``` use arrow::datatypes::SchemaRef; diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index 4244baf6a12b..36bb7e29de40 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -145,23 +145,27 @@ async fn list_all_async(root_path: String, ext: String) -> Result None, - Some(path) => { - let file_stream = - match find_files_in_dir(path, &mut to_visit, ext).await { - Ok(files) => stream::iter(files).map(Ok).left_stream(), - Err(e) => stream::once(async { Err(e) }).right_stream(), - }; - - Some((file_stream, to_visit)) + if tokio::fs::metadata(&root_path).await?.is_file() { + Ok(Box::pin(stream::once(async { Ok(root_path) }))) + } else { + let result = stream::unfold(vec![root_path], move |mut to_visit| { + let ext = ext.clone(); + async move { + match to_visit.pop() { + None => None, + Some(path) => { + let file_stream = + match find_files_in_dir(path, &mut to_visit, ext).await { + Ok(files) => stream::iter(files).map(Ok).left_stream(), + Err(e) => stream::once(async { Err(e) }).right_stream(), + }; + + Some((file_stream, to_visit)) + } } } - } - }) - .flatten(); - Ok(Box::pin(result)) + }) + .flatten(); + Ok(Box::pin(result)) + } } diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs index bfc893bf8cc7..1a8733c0ac97 100644 --- a/datafusion/src/datasource/parquet.rs +++ b/datafusion/src/datasource/parquet.rs @@ -493,7 +493,7 @@ mod tests { use arrow::record_batch::RecordBatch; use futures::StreamExt; - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_small_batches() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; let projection = None; @@ -516,7 +516,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_alltypes_plain_parquet() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; @@ -551,7 +551,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_bool_alltypes_plain_parquet() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; let projection = Some(vec![1]); @@ -578,7 +578,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_i32_alltypes_plain_parquet() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; let projection = Some(vec![0]); @@ -602,7 +602,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_i96_alltypes_plain_parquet() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; let projection = Some(vec![10]); @@ -626,7 +626,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_f32_alltypes_plain_parquet() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; let projection = Some(vec![6]); @@ -653,7 +653,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_f64_alltypes_plain_parquet() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; let projection = Some(vec![7]); @@ -680,7 +680,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn read_binary_alltypes_plain_parquet() -> Result<()> { let table = load_table("alltypes_plain.parquet")?; let projection = Some(vec![9]); diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 91fe9cddc657..6a41f50ab0a1 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -97,7 +97,8 @@ use parquet::file::properties::WriterProperties; /// ``` /// use datafusion::prelude::*; /// # use datafusion::error::Result; -/// # fn main() -> Result<()> { +/// #[tokio::main] +/// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; /// let df = df.filter(col("a").lt_eq(col("b")))? @@ -114,7 +115,8 @@ use parquet::file::properties::WriterProperties; /// use datafusion::prelude::*; /// /// # use datafusion::error::Result; -/// # fn main() -> Result<()> { +/// #[tokio::main] +/// # async fn main() -> Result<()> { /// let mut ctx = ExecutionContext::new(); /// ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?; /// let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?; @@ -2708,7 +2710,7 @@ mod tests { Ok(()) } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn write_parquet_results() -> Result<()> { // create partitioned input file and context let tmp_dir = TempDir::new()?; diff --git a/datafusion/src/physical_plan/mod.rs b/datafusion/src/physical_plan/mod.rs index 8f7db72484c9..ecbcd054b105 100644 --- a/datafusion/src/physical_plan/mod.rs +++ b/datafusion/src/physical_plan/mod.rs @@ -217,6 +217,9 @@ pub trait ExecutionPlan: Debug + Send + Sync { /// use datafusion::prelude::*; /// use datafusion::physical_plan::displayable; /// +/// #[tokio::main] +/// # async fn main() { +/// /// // Hard code concurrency as it appears in the RepartitionExec output /// let config = ExecutionConfig::new() /// .with_concurrency(3); @@ -242,6 +245,7 @@ pub trait ExecutionPlan: Debug + Send + Sync { /// \n RepartitionExec: partitioning=RoundRobinBatch(3)\ /// \n CsvExec: source=Path(tests/example.csv: [tests/example.csv]), has_header=true", /// plan_string.trim()); +/// # } /// ``` /// pub fn displayable(plan: &dyn ExecutionPlan) -> DisplayableExecutionPlan<'_> { diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs index 6eca2c396e06..4b205fab0b6c 100644 --- a/datafusion/src/physical_plan/parquet.rs +++ b/datafusion/src/physical_plan/parquet.rs @@ -667,7 +667,7 @@ mod tests { assert_eq!(1, chunks[4].len()); } - #[tokio::test] + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn test() -> Result<()> { let testdata = crate::test_util::parquet_test_data(); let filename = format!("{}/alltypes_plain.parquet", testdata); diff --git a/datafusion/tests/parquet_pruning.rs b/datafusion/tests/parquet_pruning.rs index 789f0810c983..99e19a4789fb 100644 --- a/datafusion/tests/parquet_pruning.rs +++ b/datafusion/tests/parquet_pruning.rs @@ -41,7 +41,7 @@ use hashbrown::HashMap; use parquet::{arrow::ArrowWriter, file::properties::WriterProperties}; use tempfile::NamedTempFile; -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_timestamps_nanos() { let output = ContextWithParquet::new(Scenario::Timestamps) .await @@ -54,7 +54,7 @@ async fn prune_timestamps_nanos() { assert_eq!(output.result_rows, 10, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_timestamps_micros() { let output = ContextWithParquet::new(Scenario::Timestamps) .await @@ -69,7 +69,7 @@ async fn prune_timestamps_micros() { assert_eq!(output.result_rows, 10, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_timestamps_millis() { let output = ContextWithParquet::new(Scenario::Timestamps) .await @@ -84,7 +84,7 @@ async fn prune_timestamps_millis() { assert_eq!(output.result_rows, 10, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_timestamps_seconds() { let output = ContextWithParquet::new(Scenario::Timestamps) .await @@ -99,7 +99,7 @@ async fn prune_timestamps_seconds() { assert_eq!(output.result_rows, 10, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_date32() { let output = ContextWithParquet::new(Scenario::Dates) .await @@ -112,7 +112,7 @@ async fn prune_date32() { assert_eq!(output.result_rows, 1, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_date64() { // work around for not being able to cast Date32 to Date64 automatically let date = "2020-01-02" @@ -137,7 +137,7 @@ async fn prune_date64() { assert_eq!(output.result_rows, 1, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_disabled() { let query = "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')"; let expected_rows = 10; @@ -178,7 +178,7 @@ async fn prune_disabled() { ); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_int32_lt() { let (expected_errors, expected_row_group_pruned, expected_results) = (Some(0), Some(1), 11); @@ -218,7 +218,7 @@ async fn prune_int32_lt() { ); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_int32_eq() { // resulrt of sql "SELECT * FROM t where i = 1" let output = ContextWithParquet::new(Scenario::Int32) @@ -233,7 +233,7 @@ async fn prune_int32_eq() { assert_eq!(output.result_rows, 1, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_int32_scalar_fun_and_eq() { // resulrt of sql "SELECT * FROM t where abs(i) = 1 and i = 1" // only use "i = 1" to prune @@ -249,7 +249,7 @@ async fn prune_int32_scalar_fun_and_eq() { assert_eq!(output.result_rows, 1, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_int32_scalar_fun() { // resulrt of sql "SELECT * FROM t where abs(i) = 1" is not supported let output = ContextWithParquet::new(Scenario::Int32) @@ -265,7 +265,7 @@ async fn prune_int32_scalar_fun() { assert_eq!(output.result_rows, 3, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_int32_complex_expr() { // resulrt of sql "SELECT * FROM t where i+1 = 1" is not supported let output = ContextWithParquet::new(Scenario::Int32) @@ -281,7 +281,7 @@ async fn prune_int32_complex_expr() { assert_eq!(output.result_rows, 2, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_int32_complex_expr_subtract() { // resulrt of sql "SELECT * FROM t where 1-i > 1" is not supported let output = ContextWithParquet::new(Scenario::Int32) @@ -297,7 +297,7 @@ async fn prune_int32_complex_expr_subtract() { assert_eq!(output.result_rows, 9, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_f64_lt() { let (expected_errors, expected_row_group_pruned, expected_results) = (Some(0), Some(1), 11); @@ -337,7 +337,7 @@ async fn prune_f64_lt() { ); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_f64_scalar_fun_and_gt() { // resulrt of sql "SELECT * FROM t where abs(f - 1) <= 0.000001 and f >= 0.1" // only use "f >= 0" to prune @@ -353,7 +353,7 @@ async fn prune_f64_scalar_fun_and_gt() { assert_eq!(output.result_rows, 1, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_f64_scalar_fun() { // resulrt of sql "SELECT * FROM t where abs(f-1) <= 0.000001" is not supported let output = ContextWithParquet::new(Scenario::Float64) @@ -369,7 +369,7 @@ async fn prune_f64_scalar_fun() { assert_eq!(output.result_rows, 1, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_f64_complex_expr() { // resulrt of sql "SELECT * FROM t where f+1 > 1.1"" is not supported let output = ContextWithParquet::new(Scenario::Float64) @@ -385,7 +385,7 @@ async fn prune_f64_complex_expr() { assert_eq!(output.result_rows, 9, "{}", output.description()); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn prune_f64_complex_expr_subtract() { // resulrt of sql "SELECT * FROM t where 1-f > 1" is not supported let output = ContextWithParquet::new(Scenario::Float64) diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 0f385680deed..b70ce155b6f3 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -110,7 +110,7 @@ async fn nyc() -> Result<()> { Ok(()) } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn parquet_query() { let mut ctx = ExecutionContext::new(); register_alltypes_parquet(&mut ctx); @@ -136,7 +136,7 @@ async fn parquet_query() { assert_batches_eq!(expected, &actual); } -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn parquet_single_nan_schema() { let mut ctx = ExecutionContext::new(); let testdata = datafusion::test_util::parquet_test_data(); From f6239b57d09ef68e90431bdffd93bd13296745e9 Mon Sep 17 00:00:00 2001 From: Yijie Shen Date: Thu, 19 Aug 2021 14:07:59 +0800 Subject: [PATCH 16/16] fix fmt --- datafusion/src/datasource/object_store/local.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/datasource/object_store/local.rs b/datafusion/src/datasource/object_store/local.rs index 36bb7e29de40..47f455280d5b 100644 --- a/datafusion/src/datasource/object_store/local.rs +++ b/datafusion/src/datasource/object_store/local.rs @@ -165,7 +165,7 @@ async fn list_all_async(root_path: String, ext: String) -> Result