From a8d4ea7cdccf68038d487396ddaf10e8d2ed58b7 Mon Sep 17 00:00:00 2001 From: Shannon Osborne <31218961+shanosborne@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:00:25 -0800 Subject: [PATCH 1/6] Add cli option to download only certain files --- README.md | 1 + cirro/cli/cli.py | 3 +++ cirro/cli/controller.py | 10 ++++++++++ 3 files changed, 14 insertions(+) diff --git a/README.md b/README.md index 26c2fda2..9d251da4 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Usage: cirro download [OPTIONS] Options: --project TEXT Name or ID of the project --dataset TEXT ID of the dataset + --files TEXT Comma-separated list of files to download (optional) --data-directory TEXT Directory to store the files -i, --interactive Gather arguments interactively --help Show this message and exit. diff --git a/cirro/cli/cli.py b/cirro/cli/cli.py index db1ea475..6c59dd58 100644 --- a/cirro/cli/cli.py +++ b/cirro/cli/cli.py @@ -36,6 +36,9 @@ def list_datasets(**kwargs): help='Name or ID of the project') @click.option('--dataset', help='ID of the dataset') +@click.option('--files', + help='Comma-separated list of files to download', + default='') @click.option('--data-directory', help='Directory to store the files') @click.option('-i', '--interactive', diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index cf8be4ba..a74bae6f 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -1,4 +1,5 @@ import logging +import os import sys import pandas as pd @@ -142,6 +143,15 @@ def run_download(input_params: DownloadArguments, interactive=False): datasets = cirro.datasets.list(project_id) dataset_id = get_id_from_name(datasets, input_params['dataset']) + if (input_params['files']): + all_files = cirro.datasets.get_file_listing(project_id, dataset_id) + files_to_download = [] + for filepath in input_params['files'].split(','): + if not filepath.startswith('data/'): + filepath = os.path.join('data/', filepath) + file = [f for f in all_files if f.relative_path == filepath][0] + files_to_download.append(file) + logger.info("Downloading files") if cirro.configuration.enable_additional_checksum: checksum_method = "SHA256" From 52fb1a2616ff33beeffe3f0e5169f6322fd1643b Mon Sep 17 00:00:00 2001 From: Shannon Osborne <31218961+shanosborne@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:39:28 -0800 Subject: [PATCH 2/6] fix flake8 --- cirro/cli/controller.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index a74bae6f..70096396 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -145,9 +145,9 @@ def run_download(input_params: DownloadArguments, interactive=False): if (input_params['files']): all_files = cirro.datasets.get_file_listing(project_id, dataset_id) - files_to_download = [] + files_to_download = [] for filepath in input_params['files'].split(','): - if not filepath.startswith('data/'): + if not filepath.startswith('data/'): filepath = os.path.join('data/', filepath) file = [f for f in all_files if f.relative_path == filepath][0] files_to_download.append(file) From 385280f9dfa71f7821e00892a764b4bba10d8982 Mon Sep 17 00:00:00 2001 From: Shannon Osborne <31218961+shanosborne@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:51:36 -0800 Subject: [PATCH 3/6] requested changes --- cirro/cli/controller.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index 70096396..25c60113 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -143,13 +143,16 @@ def run_download(input_params: DownloadArguments, interactive=False): datasets = cirro.datasets.list(project_id) dataset_id = get_id_from_name(datasets, input_params['dataset']) - if (input_params['files']): + if input_params['files']: all_files = cirro.datasets.get_file_listing(project_id, dataset_id) files_to_download = [] for filepath in input_params['files'].split(','): if not filepath.startswith('data/'): filepath = os.path.join('data/', filepath) - file = [f for f in all_files if f.relative_path == filepath][0] + file = next((f for f in all_files if f.relative_path == filepath), None) + if not file: + logger.warning(f"Could not find file {filepath}. Skipping.") + continue files_to_download.append(file) logger.info("Downloading files") From e7f82086f5cf8731309eb26e1ce46eeba1d060ba Mon Sep 17 00:00:00 2001 From: Shannon Osborne <31218961+shanosborne@users.noreply.github.com> Date: Wed, 27 Nov 2024 08:21:18 -0800 Subject: [PATCH 4/6] swap singular files command for multiple file commands --- README.md | 8 ++++---- cirro/cli/cli.py | 7 ++++--- cirro/cli/controller.py | 5 +++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 9d251da4..ebda3baf 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Usage: cirro download [OPTIONS] Options: --project TEXT Name or ID of the project --dataset TEXT ID of the dataset - --files TEXT Comma-separated list of files to download (optional) + --file... TEXT Name and relative path of the file (optional) --data-directory TEXT Directory to store the files -i, --interactive Gather arguments interactively --help Show this message and exit. @@ -92,7 +92,7 @@ $ cirro upload --interactive See the following set of Jupyter notebooks that contain examples on the following topics: | Jupyter Notebook | Topic | -|--------------------------------------------------------------------|--------------------------------------| +| ------------------------------------------------------------------ | ------------------------------------ | | [Introduction](samples/Getting_started.ipynb) | Installing and authenticating | | [Uploading a dataset](samples/Uploading_a_dataset.ipynb) | Uploading data | | [Downloading a dataset](samples/Downloading_a_dataset.ipynb) | Downloading data | @@ -104,7 +104,7 @@ See the following set of Jupyter notebooks that contain examples on the followin ## R Usage | Jupyter Notebook | Topic | -|-----------------------------------------------------|---------------------| +| --------------------------------------------------- | ------------------- | | [Downloading a dataset in R](samples/Using-R.ipynb) | Reading data with R | ## Advanced Usage @@ -114,7 +114,7 @@ View the API documentation for this library [here](https://cirrobio.github.io/Ci ### Supported environment variables | Name | Description | Default | -|----------------|-------------------------------|----------| +| -------------- | ----------------------------- | -------- | | CIRRO_HOME | Local configuration directory | ~/.cirro | | CIRRO_BASE_URL | Base URL of the data portal | | diff --git a/cirro/cli/cli.py b/cirro/cli/cli.py index 6c59dd58..90d4bf47 100644 --- a/cirro/cli/cli.py +++ b/cirro/cli/cli.py @@ -36,9 +36,10 @@ def list_datasets(**kwargs): help='Name or ID of the project') @click.option('--dataset', help='ID of the dataset') -@click.option('--files', - help='Comma-separated list of files to download', - default='') +@click.option('--file', + help='Name and relative path of the file (optional)', + default=[], + multiple=True ) @click.option('--data-directory', help='Directory to store the files') @click.option('-i', '--interactive', diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index 25c60113..f6efbd0b 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -143,10 +143,11 @@ def run_download(input_params: DownloadArguments, interactive=False): datasets = cirro.datasets.list(project_id) dataset_id = get_id_from_name(datasets, input_params['dataset']) - if input_params['files']: + if input_params['file']: all_files = cirro.datasets.get_file_listing(project_id, dataset_id) files_to_download = [] - for filepath in input_params['files'].split(','): + + for filepath in input_params['file']: if not filepath.startswith('data/'): filepath = os.path.join('data/', filepath) file = next((f for f in all_files if f.relative_path == filepath), None) From 88ba3e534db3c2c976724383dbffd39ef2b1ecb8 Mon Sep 17 00:00:00 2001 From: Shannon Osborne <31218961+shanosborne@users.noreply.github.com> Date: Wed, 27 Nov 2024 08:23:39 -0800 Subject: [PATCH 5/6] lint --- cirro/cli/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cirro/cli/cli.py b/cirro/cli/cli.py index 90d4bf47..5c719db7 100644 --- a/cirro/cli/cli.py +++ b/cirro/cli/cli.py @@ -39,7 +39,7 @@ def list_datasets(**kwargs): @click.option('--file', help='Name and relative path of the file (optional)', default=[], - multiple=True ) + multiple=True) @click.option('--data-directory', help='Directory to store the files') @click.option('-i', '--interactive', From 91053dade732df0c5e3a91ba50455e5ee61719b8 Mon Sep 17 00:00:00 2001 From: Shannon Osborne <31218961+shanosborne@users.noreply.github.com> Date: Wed, 27 Nov 2024 08:48:44 -0800 Subject: [PATCH 6/6] increase package version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5124f06f..29c18137 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cirro" -version = "1.2.8" +version = "1.2.9" description = "CLI tool and SDK for interacting with the Cirro platform" authors = ["Cirro Bio "] license = "MIT"