1414from pandas import DataFrame , MultiIndex , get_option
1515from pandas .core import generic
1616
17- from pandas .io .common import IOHandles , get_handle , is_fsspec_url , stringify_path
17+ from pandas .io .common import (
18+ IOHandles ,
19+ get_handle ,
20+ is_fsspec_url ,
21+ is_url ,
22+ stringify_path ,
23+ )
1824
1925
2026def get_engine (engine : str ) -> "BaseImpl" :
@@ -66,8 +72,10 @@ def _get_path_or_handle(
6672 fs , path_or_handle = fsspec .core .url_to_fs (
6773 path_or_handle , ** (storage_options or {})
6874 )
69- elif storage_options :
70- raise ValueError ("storage_options passed with buffer or non-fsspec filepath" )
75+ elif storage_options and (not is_url (path_or_handle ) or mode != "rb" ):
76+ # can't write to a remote url
77+ # without making use of fsspec at the moment
78+ raise ValueError ("storage_options passed with buffer, or non-supported URL" )
7179
7280 handles = None
7381 if (
@@ -79,7 +87,9 @@ def _get_path_or_handle(
7987 # use get_handle only when we are very certain that it is not a directory
8088 # fsspec resources can also point to directories
8189 # this branch is used for example when reading from non-fsspec URLs
82- handles = get_handle (path_or_handle , mode , is_text = False )
90+ handles = get_handle (
91+ path_or_handle , mode , is_text = False , storage_options = storage_options
92+ )
8393 fs = None
8494 path_or_handle = handles .handle
8595 return path_or_handle , handles , fs
@@ -307,7 +317,9 @@ def read(
307317 # use get_handle only when we are very certain that it is not a directory
308318 # fsspec resources can also point to directories
309319 # this branch is used for example when reading from non-fsspec URLs
310- handles = get_handle (path , "rb" , is_text = False )
320+ handles = get_handle (
321+ path , "rb" , is_text = False , storage_options = storage_options
322+ )
311323 path = handles .handle
312324 parquet_file = self .api .ParquetFile (path , ** parquet_kwargs )
313325
@@ -404,10 +416,12 @@ def to_parquet(
404416 return None
405417
406418
419+ @doc (storage_options = generic ._shared_docs ["storage_options" ])
407420def read_parquet (
408421 path ,
409422 engine : str = "auto" ,
410423 columns = None ,
424+ storage_options : StorageOptions = None ,
411425 use_nullable_dtypes : bool = False ,
412426 ** kwargs ,
413427):
@@ -432,13 +446,18 @@ def read_parquet(
432446 By file-like object, we refer to objects with a ``read()`` method,
433447 such as a file handle (e.g. via builtin ``open`` function)
434448 or ``StringIO``.
435- engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
449+ engine : {{ 'auto', 'pyarrow', 'fastparquet'} }, default 'auto'
436450 Parquet library to use. If 'auto', then the option
437451 ``io.parquet.engine`` is used. The default ``io.parquet.engine``
438452 behavior is to try 'pyarrow', falling back to 'fastparquet' if
439453 'pyarrow' is unavailable.
440454 columns : list, default=None
441455 If not None, only these columns will be read from the file.
456+
457+ {storage_options}
458+
459+ .. versionadded:: 1.3.0
460+
442461 use_nullable_dtypes : bool, default False
443462 If True, use dtypes that use ``pd.NA`` as missing value indicator
444463 for the resulting DataFrame (only applicable for ``engine="pyarrow"``).
@@ -448,6 +467,7 @@ def read_parquet(
448467 support dtypes) may change without notice.
449468
450469 .. versionadded:: 1.2.0
470+
451471 **kwargs
452472 Any additional kwargs are passed to the engine.
453473
@@ -456,6 +476,11 @@ def read_parquet(
456476 DataFrame
457477 """
458478 impl = get_engine (engine )
479+
459480 return impl .read (
460- path , columns = columns , use_nullable_dtypes = use_nullable_dtypes , ** kwargs
481+ path ,
482+ columns = columns ,
483+ storage_options = storage_options ,
484+ use_nullable_dtypes = use_nullable_dtypes ,
485+ ** kwargs ,
461486 )
0 commit comments