File tree Expand file tree Collapse file tree 4 files changed +28
-4
lines changed Expand file tree Collapse file tree 4 files changed +28
-4
lines changed Original file line number Diff line number Diff line change @@ -135,6 +135,10 @@ impl FileFormat for AvroFormat {
135135 Ok ( Arc :: new ( merged_schema) )
136136 }
137137
138+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
139+ Ok ( schema)
140+ }
141+
138142 async fn infer_stats (
139143 & self ,
140144 _state : & dyn Session ,
Original file line number Diff line number Diff line change @@ -393,6 +393,10 @@ impl FileFormat for CsvFormat {
393393 Ok ( Arc :: new ( merged_schema) )
394394 }
395395
396+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
397+ Ok ( schema)
398+ }
399+
396400 async fn infer_stats (
397401 & self ,
398402 _state : & dyn Session ,
Original file line number Diff line number Diff line change @@ -233,6 +233,10 @@ impl FileFormat for JsonFormat {
233233 Ok ( Arc :: new ( schema) )
234234 }
235235
236+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
237+ Ok ( schema)
238+ }
239+
236240 async fn infer_stats (
237241 & self ,
238242 _state : & dyn Session ,
Original file line number Diff line number Diff line change @@ -359,19 +359,29 @@ impl FileFormat for ParquetFormat {
359359 Schema :: try_merge ( schemas)
360360 } ?;
361361
362+ self . transform_schema ( Arc :: new ( schema) ) . await
363+ }
364+
365+ /// transform_schema for parquet format now contains two steps:
366+ ///
367+ /// 1. Transform a schema so that any binary types are strings
368+ /// see [transform_binary_to_string]
369+ ///
370+ /// 2. Transform a schema to use view types for Utf8 and Binary
371+ /// See [transform_schema_to_view] for details
372+ async fn transform_schema ( & self , schema : SchemaRef ) -> Result < SchemaRef > {
362373 let schema = if self . binary_as_string ( ) {
363- transform_binary_to_string ( & schema)
374+ Arc :: new ( transform_binary_to_string ( schema. as_ref ( ) ) )
364375 } else {
365376 schema
366377 } ;
367378
368379 let schema = if self . force_view_types ( ) {
369- transform_schema_to_view ( & schema)
380+ Arc :: new ( transform_schema_to_view ( schema. as_ref ( ) ) )
370381 } else {
371382 schema
372383 } ;
373-
374- Ok ( Arc :: new ( schema) )
384+ Ok ( schema)
375385 }
376386
377387 async fn infer_stats (
@@ -598,6 +608,8 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
598608}
599609
600610/// Transform a schema so that any binary types are strings
611+ ///
612+ /// See [ParquetFormat::binary_as_string] for details
601613pub fn transform_binary_to_string ( schema : & Schema ) -> Schema {
602614 let transformed_fields: Vec < Arc < Field > > = schema
603615 . fields
You can’t perform that action at this time.
0 commit comments