From 4536d6ef8a35e49f224ec5e2540abd9e1ab0fe6d Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Thu, 24 Jul 2025 14:28:39 -0700 Subject: [PATCH 1/2] lazy load pandas --- cirro/cli/controller.py | 3 ++- cirro/helpers/preprocess_dataset.py | 9 ++++++--- cirro/sdk/file.py | 8 ++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index e5ad6ed3..d586e9a4 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -5,7 +5,6 @@ import sys from pathlib import Path -import pandas as pd import requests from cirro_api_client.v1.models import UploadDatasetRequest, Status, Executor @@ -58,6 +57,8 @@ def run_list_datasets(input_params: ListArguments, interactive=False): datasets = cirro.datasets.list(input_params['project']) sorted_datasets = sorted(datasets, key=lambda d: d.created_at, reverse=True) + + import pandas as pd df = pd.DataFrame.from_records([d.to_dict() for d in sorted_datasets]) df = df[['id', 'name', 'description', 'processId', 'status', 'createdBy', 'createdAt']] print(df.to_string()) diff --git a/cirro/helpers/preprocess_dataset.py b/cirro/helpers/preprocess_dataset.py index 7a80e6e6..09d2238c 100644 --- a/cirro/helpers/preprocess_dataset.py +++ b/cirro/helpers/preprocess_dataset.py @@ -2,12 +2,14 @@ import logging import os from pathlib import Path +from typing import TYPE_CHECKING import boto3 -import pandas as pd -from cirro.models.s3_path import S3Path +if TYPE_CHECKING: + from pandas import DataFrame +from cirro.models.s3_path import S3Path def _write_json(dat, local_path: str, indent=4): with Path(local_path).open(mode="wt") as handle: @@ -66,11 +68,12 @@ def log(self): self.logger.info(f"Number of files in dataset: {self.files.shape[0]:,}") self.logger.info(f"Number of samples in dataset: {self.samplesheet.shape[0]:,}") - def _read_csv(self, suffix: str, required_columns=None) -> pd.DataFrame: + def _read_csv(self, suffix: str, required_columns=None) -> 'DataFrame': """Read a CSV from the dataset and check for any required columns.""" if required_columns is None: required_columns = [] + import pandas as pd df = pd.read_csv(f"{self.s3_dataset}/{suffix}") for col in required_columns: assert col in df.columns.values, f"Did not find expected columns {col} in {self.s3_dataset}/{suffix}" diff --git a/cirro/sdk/file.py b/cirro/sdk/file.py index 91b9b27e..35b70e42 100644 --- a/cirro/sdk/file.py +++ b/cirro/sdk/file.py @@ -2,11 +2,10 @@ from io import BytesIO, StringIO from typing import List -import pandas as pd - from typing import TYPE_CHECKING if TYPE_CHECKING: import anndata + from pandas import DataFrame from cirro.cirro_client import CirroApi from cirro.models.file import File @@ -88,7 +87,7 @@ def _get(self) -> bytes: return self._client.file.get_file(self._file) - def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFrame: + def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> 'DataFrame': """ Parse the file as a Pandas DataFrame. @@ -100,6 +99,7 @@ def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFr All other keyword arguments are passed to pandas.read_csv https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html """ + import pandas if compression == 'infer': # If the file appears to be compressed @@ -119,7 +119,7 @@ def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFr else: handle = StringIO(self._get().decode(encoding)) - df = pd.read_csv( + df = pandas.read_csv( handle, compression=compression, encoding=encoding, From cc7b82027a5619cc6fad2cb8db1d1f237bc48f27 Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Thu, 24 Jul 2025 14:30:43 -0700 Subject: [PATCH 2/2] lint --- cirro/helpers/preprocess_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cirro/helpers/preprocess_dataset.py b/cirro/helpers/preprocess_dataset.py index 09d2238c..6436eb1c 100644 --- a/cirro/helpers/preprocess_dataset.py +++ b/cirro/helpers/preprocess_dataset.py @@ -11,6 +11,7 @@ from cirro.models.s3_path import S3Path + def _write_json(dat, local_path: str, indent=4): with Path(local_path).open(mode="wt") as handle: return json.dump(dat, handle, indent=indent)