Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cirro/cli/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import sys
from pathlib import Path

import pandas as pd
import requests
from cirro_api_client.v1.models import UploadDatasetRequest, Status, Executor

Expand Down Expand Up @@ -58,6 +57,8 @@ def run_list_datasets(input_params: ListArguments, interactive=False):
datasets = cirro.datasets.list(input_params['project'])

sorted_datasets = sorted(datasets, key=lambda d: d.created_at, reverse=True)

import pandas as pd
df = pd.DataFrame.from_records([d.to_dict() for d in sorted_datasets])
df = df[['id', 'name', 'description', 'processId', 'status', 'createdBy', 'createdAt']]
print(df.to_string())
Expand Down
8 changes: 6 additions & 2 deletions cirro/helpers/preprocess_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
import logging
import os
from pathlib import Path
from typing import TYPE_CHECKING

import boto3
import pandas as pd

if TYPE_CHECKING:
from pandas import DataFrame

from cirro.models.s3_path import S3Path

Expand Down Expand Up @@ -66,11 +69,12 @@ def log(self):
self.logger.info(f"Number of files in dataset: {self.files.shape[0]:,}")
self.logger.info(f"Number of samples in dataset: {self.samplesheet.shape[0]:,}")

def _read_csv(self, suffix: str, required_columns=None) -> pd.DataFrame:
def _read_csv(self, suffix: str, required_columns=None) -> 'DataFrame':
"""Read a CSV from the dataset and check for any required columns."""
if required_columns is None:
required_columns = []

import pandas as pd
df = pd.read_csv(f"{self.s3_dataset}/{suffix}")
for col in required_columns:
assert col in df.columns.values, f"Did not find expected columns {col} in {self.s3_dataset}/{suffix}"
Expand Down
8 changes: 4 additions & 4 deletions cirro/sdk/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
from io import BytesIO, StringIO
from typing import List

import pandas as pd

from typing import TYPE_CHECKING
if TYPE_CHECKING:
import anndata
from pandas import DataFrame

from cirro.cirro_client import CirroApi
from cirro.models.file import File
Expand Down Expand Up @@ -88,7 +87,7 @@ def _get(self) -> bytes:

return self._client.file.get_file(self._file)

def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFrame:
def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> 'DataFrame':
"""
Parse the file as a Pandas DataFrame.

Expand All @@ -100,6 +99,7 @@ def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFr
All other keyword arguments are passed to pandas.read_csv
https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
"""
import pandas

if compression == 'infer':
# If the file appears to be compressed
Expand All @@ -119,7 +119,7 @@ def read_csv(self, compression='infer', encoding='utf-8', **kwargs) -> pd.DataFr
else:
handle = StringIO(self._get().decode(encoding))

df = pd.read_csv(
df = pandas.read_csv(
handle,
compression=compression,
encoding=encoding,
Expand Down
Loading