Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ MultiIndex
I/O
^^^

-
- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
-
-

Expand Down
6 changes: 4 additions & 2 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,14 @@ def write(

def read(self, path, columns=None, **kwargs):
if is_s3_url(path):
from pandas.io.s3 import get_file_and_filesystem

# When path is s3:// an S3File is returned.
# We need to retain the original path(str) while also
# pass the S3File().open function to fsatparquet impl.
s3, _, _, should_close = get_filepath_or_buffer(path)
s3, filesystem = get_file_and_filesystem(path)
try:
parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open)
parquet_file = self.api.ParquetFile(path, open_with=filesystem.open)
finally:
s3.close()
else:
Expand Down
34 changes: 25 additions & 9 deletions pandas/io/s3.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from typing import Optional, Tuple, TYPE_CHECKING
from pandas._typing import FilePathOrBuffer

""" s3 support for remote file interactivity """
from urllib.parse import urlparse as parse_url

from pandas.compat._optional import import_optional_dependency

s3fs = import_optional_dependency(
"s3fs", extra="The s3fs package is required to handle s3 files."
)
if TYPE_CHECKING:
import s3fs
else:
s3fs = import_optional_dependency(
"s3fs", extra="The s3fs package is required to handle s3 files."
)


def _strip_schema(url):
Expand All @@ -14,17 +20,17 @@ def _strip_schema(url):
return result.netloc + result.path


def get_filepath_or_buffer(
filepath_or_buffer, encoding=None, compression=None, mode=None
):
def get_file_and_filesystem(
filepath_or_buffer: FilePathOrBuffer, mode: Optional[str] = None
) -> Tuple[s3fs.S3File, s3fs.S3FileSystem]:
from botocore.exceptions import NoCredentialsError

if mode is None:
mode = "rb"

fs = s3fs.S3FileSystem(anon=False)
try:
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
file = fs.open(_strip_schema(filepath_or_buffer), mode)
except (FileNotFoundError, NoCredentialsError):
# boto3 has troubles when trying to access a public file
# when credentialed...
Expand All @@ -33,5 +39,15 @@ def get_filepath_or_buffer(
# A NoCredentialsError is raised if you don't have creds
# for that bucket.
fs = s3fs.S3FileSystem(anon=True)
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
return filepath_or_buffer, None, compression, True
file = fs.open(_strip_schema(filepath_or_buffer), mode)
return file, fs


def get_filepath_or_buffer(
filepath_or_buffer: FilePathOrBuffer,
encoding: Optional[str] = None,
compression: Optional[str] = None,
mode: Optional[str] = None,
) -> Tuple[s3fs.S3File, Optional[str], Optional[str], bool]:
file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode)
return file, None, compression, True