|
18 | 18 |
|
19 | 19 | use std::{any::Any, collections::HashMap, ops::Bound, sync::Arc}; |
20 | 20 |
|
21 | | -use arrow_schema::{DataType, Schema, SchemaRef, SortOptions}; |
| 21 | +use arrow_schema::{Schema, SchemaRef, SortOptions}; |
22 | 22 | use bytes::Bytes; |
23 | 23 | use chrono::{NaiveDateTime, Timelike, Utc}; |
24 | 24 | use datafusion::{ |
@@ -236,57 +236,23 @@ fn partitioned_files( |
236 | 236 | count += num_rows; |
237 | 237 | } |
238 | 238 |
|
239 | | - let mut statistics = vec![]; |
240 | | - |
241 | | - for field in table_schema.fields() { |
242 | | - let Some(stats) = column_statistics |
243 | | - .get(field.name()) |
244 | | - .and_then(|stats| stats.as_ref()) |
245 | | - else { |
246 | | - statistics.push(datafusion::common::ColumnStatistics::default()); |
247 | | - break; |
248 | | - }; |
249 | | - |
250 | | - let datatype = field.data_type(); |
251 | | - |
252 | | - let (min, max) = match (stats, datatype) { |
253 | | - (TypedStatistics::Bool(stats), DataType::Boolean) => ( |
254 | | - ScalarValue::Boolean(Some(stats.min)), |
255 | | - ScalarValue::Boolean(Some(stats.max)), |
256 | | - ), |
257 | | - (TypedStatistics::Int(stats), DataType::Int32) => ( |
258 | | - ScalarValue::Int32(Some(stats.min as i32)), |
259 | | - ScalarValue::Int32(Some(stats.max as i32)), |
260 | | - ), |
261 | | - (TypedStatistics::Int(stats), DataType::Int64) => ( |
262 | | - ScalarValue::Int64(Some(stats.min)), |
263 | | - ScalarValue::Int64(Some(stats.max)), |
264 | | - ), |
265 | | - (TypedStatistics::Float(stats), DataType::Float32) => ( |
266 | | - ScalarValue::Float32(Some(stats.min as f32)), |
267 | | - ScalarValue::Float32(Some(stats.max as f32)), |
268 | | - ), |
269 | | - (TypedStatistics::Float(stats), DataType::Float64) => ( |
270 | | - ScalarValue::Float64(Some(stats.min)), |
271 | | - ScalarValue::Float64(Some(stats.max)), |
272 | | - ), |
273 | | - (TypedStatistics::String(stats), DataType::Utf8) => ( |
274 | | - ScalarValue::Utf8(Some(stats.min.clone())), |
275 | | - ScalarValue::Utf8(Some(stats.max.clone())), |
276 | | - ), |
277 | | - _ => { |
278 | | - statistics.push(datafusion::common::ColumnStatistics::default()); |
279 | | - break; |
280 | | - } |
281 | | - }; |
282 | | - |
283 | | - statistics.push(datafusion::common::ColumnStatistics { |
284 | | - null_count: None, |
285 | | - max_value: Some(max), |
286 | | - min_value: Some(min), |
287 | | - distinct_count: None, |
| 239 | + let statistics = table_schema |
| 240 | + .fields() |
| 241 | + .iter() |
| 242 | + .map(|field| { |
| 243 | + column_statistics |
| 244 | + .get(field.name()) |
| 245 | + .and_then(|stats| stats.as_ref()) |
| 246 | + .and_then(|stats| stats.clone().min_max_as_scalar(field.data_type())) |
| 247 | + .map(|(min, max)| datafusion::common::ColumnStatistics { |
| 248 | + null_count: None, |
| 249 | + max_value: Some(max), |
| 250 | + min_value: Some(min), |
| 251 | + distinct_count: None, |
| 252 | + }) |
| 253 | + .unwrap_or_default() |
288 | 254 | }) |
289 | | - } |
| 255 | + .collect(); |
290 | 256 |
|
291 | 257 | let statistics = datafusion::common::Statistics { |
292 | 258 | num_rows: Some(count as usize), |
|
0 commit comments