@@ -28,7 +28,7 @@ use datafusion::arrow::record_batch::RecordBatch;
2828use datafusion:: datasource:: datasource:: TableProvider ;
2929use datafusion:: datasource:: MemTable ;
3030use datafusion:: execution:: context:: { SessionConfig , SessionContext } ;
31- use datafusion:: prelude:: { CsvReadOptions , ParquetReadOptions } ;
31+ use datafusion:: prelude:: { AvroReadOptions , CsvReadOptions , ParquetReadOptions } ;
3232
3333use crate :: catalog:: { PyCatalog , PyTable } ;
3434use crate :: dataframe:: PyDataFrame ;
@@ -264,4 +264,99 @@ impl PySessionContext {
264264 fn session_id ( & self ) -> PyResult < String > {
265265 Ok ( self . ctx . session_id ( ) )
266266 }
267+
268+ #[ allow( clippy:: too_many_arguments) ]
269+ #[ args(
270+ schema = "None" ,
271+ has_header = "true" ,
272+ delimiter = "\" ,\" " ,
273+ schema_infer_max_records = "1000" ,
274+ file_extension = "\" .csv\" " ,
275+ table_partition_cols = "vec![]"
276+ ) ]
277+ fn read_csv (
278+ & self ,
279+ path : PathBuf ,
280+ schema : Option < Schema > ,
281+ has_header : bool ,
282+ delimiter : & str ,
283+ schema_infer_max_records : usize ,
284+ file_extension : & str ,
285+ table_partition_cols : Vec < String > ,
286+ py : Python ,
287+ ) -> PyResult < PyDataFrame > {
288+ let path = path
289+ . to_str ( )
290+ . ok_or_else ( || PyValueError :: new_err ( "Unable to convert path to a string" ) ) ?;
291+
292+ let delimiter = delimiter. as_bytes ( ) ;
293+ if delimiter. len ( ) != 1 {
294+ return Err ( PyValueError :: new_err (
295+ "Delimiter must be a single character" ,
296+ ) ) ;
297+ } ;
298+
299+ let mut options = CsvReadOptions :: new ( )
300+ . has_header ( has_header)
301+ . delimiter ( delimiter[ 0 ] )
302+ . schema_infer_max_records ( schema_infer_max_records)
303+ . file_extension ( file_extension)
304+ . table_partition_cols ( table_partition_cols) ;
305+ options. schema = schema. as_ref ( ) ;
306+
307+ let result = self . ctx . read_csv ( path, options) ;
308+ let df = PyDataFrame :: new ( wait_for_future ( py, result) . map_err ( DataFusionError :: from) ?) ;
309+
310+ Ok ( df)
311+ }
312+
313+ #[ allow( clippy:: too_many_arguments) ]
314+ #[ args(
315+ parquet_pruning = "true" ,
316+ file_extension = "\" .parquet\" " ,
317+ table_partition_cols = "vec![]" ,
318+ skip_metadata = "true"
319+ ) ]
320+ fn read_parquet (
321+ & self ,
322+ path : & str ,
323+ table_partition_cols : Vec < String > ,
324+ parquet_pruning : bool ,
325+ file_extension : & str ,
326+ skip_metadata : bool ,
327+ py : Python ,
328+ ) -> PyResult < PyDataFrame > {
329+ let mut options = ParquetReadOptions :: default ( )
330+ . table_partition_cols ( table_partition_cols)
331+ . parquet_pruning ( parquet_pruning)
332+ . skip_metadata ( skip_metadata) ;
333+ options. file_extension = file_extension;
334+
335+ let result = self . ctx . read_parquet ( path, options) ;
336+ let df = PyDataFrame :: new ( wait_for_future ( py, result) . map_err ( DataFusionError :: from) ?) ;
337+ Ok ( df)
338+ }
339+
340+ #[ allow( clippy:: too_many_arguments) ]
341+ #[ args(
342+ schema = "None" ,
343+ file_extension = "\" .avro\" " ,
344+ table_partition_cols = "vec![]"
345+ ) ]
346+ fn read_avro (
347+ & self ,
348+ path : & str ,
349+ schema : Option < Schema > ,
350+ table_partition_cols : Vec < String > ,
351+ file_extension : & str ,
352+ py : Python ,
353+ ) -> PyResult < PyDataFrame > {
354+ let mut options = AvroReadOptions :: default ( ) . table_partition_cols ( table_partition_cols) ;
355+ options. file_extension = file_extension;
356+ options. schema = schema. map ( Arc :: new) ;
357+
358+ let result = self . ctx . read_avro ( path, options) ;
359+ let df = PyDataFrame :: new ( wait_for_future ( py, result) . map_err ( DataFusionError :: from) ?) ;
360+ Ok ( df)
361+ }
267362}
0 commit comments