Skip to content

Commit bebbb40

Browse files
authored
Fix s3 path error in Preprocess (#165)
* fix s3 path * bump
1 parent bb03355 commit bebbb40

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

cirro/helpers/preprocess_dataset.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,16 @@
1616
logger = logging.getLogger(__name__)
1717

1818

19+
def _fix_s3_path(path: str) -> str:
20+
"""
21+
Fix the S3 path to ensure it starts with 's3://'.
22+
"""
23+
normalized_path = path.replace(os.sep, '/').strip()
24+
if normalized_path.startswith("s3:/") and not normalized_path.startswith("s3://"):
25+
return normalized_path.replace("s3:/", "s3://", 1)
26+
return path
27+
28+
1929
def write_json(dat, local_path: str, indent=4):
2030
"""Write a JSON object to a local file."""
2131
with Path(local_path).open(mode="wt") as handle:
@@ -26,7 +36,7 @@ def read_csv(path: str, required_columns=None) -> 'DataFrame':
2636
"""Read a CSV from the dataset and check for any required columns."""
2737
if required_columns is None:
2838
required_columns = []
29-
39+
path = _fix_s3_path(path)
3040
import pandas as pd
3141
df = pd.read_csv(path)
3242
for col in required_columns:
@@ -36,6 +46,7 @@ def read_csv(path: str, required_columns=None) -> 'DataFrame':
3646

3747
def read_json(path: str):
3848
"""Read a JSON object from a local file or S3 path."""
49+
path = _fix_s3_path(path)
3950
s3_path = S3Path(path)
4051

4152
if s3_path.valid:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "cirro"
3-
version = "1.7.0"
3+
version = "1.7.1"
44
description = "CLI tool and SDK for interacting with the Cirro platform"
55
authors = ["Cirro Bio <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)