From 688beb684f7768a5c89f4a0bb282ef3fc02f7c59 Mon Sep 17 00:00:00 2001 From: Sam Minot Date: Fri, 3 Oct 2025 13:49:39 -0700 Subject: [PATCH 01/12] Inform the user of options when an item is not matched --- cirro/cli/interactive/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cirro/cli/interactive/utils.py b/cirro/cli/interactive/utils.py index c038fd2..3baad08 100644 --- a/cirro/cli/interactive/utils.py +++ b/cirro/cli/interactive/utils.py @@ -99,7 +99,8 @@ def get_id_from_name(items: List[T], name_or_id: str) -> Optional[str]: matched = get_item_from_name_or_id(items, name_or_id) if not matched: item_type = type(items[0]).__name__ - raise InputError(f"Could not find {item_type} {name_or_id}") + item_names = ", ".join([i.id for i in items]) + raise InputError(f"Could not find {item_type} {name_or_id} - options: {item_names}") return matched.id From 554a82b3e3b6409e0dea8b5901c3a09a8747f119 Mon Sep 17 00:00:00 2001 From: Sam Minot Date: Fri, 3 Oct 2025 15:52:06 -0700 Subject: [PATCH 02/12] Add CLI method to validate that a local folder matches a dataset in Cirro --- README.md | 20 ++++ cirro/cli/__init__.py | 6 +- cirro/cli/cli.py | 18 +++- cirro/cli/controller.py | 52 ++++++++++ cirro/cli/interactive/common_args.py | 44 +++++++- cirro/cli/interactive/download_args.py | 45 +------- cirro/cli/interactive/validate_args.py | 136 +++++++++++++++++++++++++ cirro/cli/models.py | 7 ++ cirro/services/dataset.py | 57 +++++++++++ 9 files changed, 337 insertions(+), 48 deletions(-) create mode 100644 cirro/cli/interactive/validate_args.py diff --git a/README.md b/README.md index 676a21c..e32b2fb 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,26 @@ Options: $ cirro upload --project "Test Project 1" --name "test" --file "sample1.fastq.gz" --file "sample2.fastq.gz" --data-directory "~/data" --data-type "Paired DNAseq (FASTQ)" ``` +#### Validating that a dataset matches a local folder + +```bash +Usage: cirro validate-folder [OPTIONS] + + Validate that the contents of a local folder match those of a dataset in Cirro + +Options: + --name TEXT Name or ID of the dataset + --project TEXT Name or ID of the project + --data-directory TEXT Local directory you wish to validate + -i, --interactive Gather arguments interactively + --help Show this message and exit. + +``` + +```bash +$ cirro validate-folder --project "Test Project 1" --name "test" --data-directory "~/data" +``` + #### Uploading a reference ```bash diff --git a/cirro/cli/__init__.py b/cirro/cli/__init__.py index dc5b06a..cd7004a 100644 --- a/cirro/cli/__init__.py +++ b/cirro/cli/__init__.py @@ -1,9 +1,11 @@ -from cirro.cli.controller import run_ingest, run_download, run_configure, run_list_datasets, run_create_pipeline_config +from cirro.cli.controller import run_ingest, run_download, run_configure, run_list_datasets +from cirro.cli.controller import run_create_pipeline_config, run_validate_folder __all__ = [ 'run_ingest', 'run_download', 'run_configure', 'run_list_datasets', - 'run_create_pipeline_config' + 'run_create_pipeline_config', + 'run_validate_folder' ] diff --git a/cirro/cli/cli.py b/cirro/cli/cli.py index 7554aa0..39ac719 100644 --- a/cirro/cli/cli.py +++ b/cirro/cli/cli.py @@ -4,7 +4,8 @@ import requests from cirro_api_client.v1.errors import CirroException -from cirro.cli import run_ingest, run_download, run_configure, run_list_datasets, run_create_pipeline_config +from cirro.cli import run_ingest, run_download, run_configure, run_list_datasets +from cirro.cli import run_create_pipeline_config, run_validate_folder from cirro.cli.controller import handle_error, run_upload_reference from cirro.cli.interactive.utils import InputError @@ -81,6 +82,21 @@ def upload(**kwargs): run_ingest(kwargs, interactive=kwargs.get('interactive')) +@run.command(help='Validate a dataset exactly matches a local folder', no_args_is_help=True) +@click.option('--dataset', + help='Name or ID of the dataset') +@click.option('--project', + help='Name or ID of the project') +@click.option('--data-directory', + help='Local directory you wish to validate') +@click.option('-i', '--interactive', + help='Gather arguments interactively', + is_flag=True, default=False) +def validate_folder(**kwargs): + check_required_args(kwargs) + run_validate_folder(kwargs, interactive=kwargs.get('interactive')) + + @run.command(help='Upload a reference to a project', no_args_is_help=True) @click.option('--name', help='Name of the reference') diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index 8f61d5f..0b17330 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -13,6 +13,7 @@ from cirro.cli.interactive.download_args import gather_download_arguments_dataset from cirro.cli.interactive.list_dataset_args import gather_list_arguments from cirro.cli.interactive.upload_args import gather_upload_arguments +from cirro.cli.interactive.validate_args import gather_validate_arguments, gather_validate_arguments_dataset from cirro.cli.interactive.upload_reference_args import gather_reference_upload_arguments from cirro.cli.interactive.utils import get_id_from_name, get_item_from_name_or_id, InputError, validate_files from cirro.cli.models import ListArguments, UploadArguments, DownloadArguments, CreatePipelineConfigArguments, \ @@ -119,6 +120,57 @@ def run_ingest(input_params: UploadArguments, interactive=False): logger.info(f"File content validated by {cirro.configuration.checksum_method_display}") +def run_validate_folder(input_params: UploadArguments, interactive=False): + _check_configure() + cirro = CirroApi() + logger.info(f"Collecting data from {cirro.configuration.base_url}") + + logger.info("Listing available projects") + projects = cirro.projects.list() + + if len(projects) == 0: + raise InputError(NO_PROJECTS) + + if interactive: + input_params = gather_validate_arguments(input_params, projects) + + input_params['project'] = get_id_from_name(projects, input_params['project']) + datasets = list_all_datasets(project_id=input_params['project'], client=cirro) + # Filter out datasets that are not complete + datasets = [d for d in datasets if d.status == Status.COMPLETED] + input_params = gather_validate_arguments_dataset(input_params, datasets) + files = cirro.datasets.get_assets_listing(input_params['project'], input_params['dataset']).files + + if len(files) == 0: + raise InputError('There are no files in this dataset to validate against') + + project_id = input_params['project'] + dataset_id = input_params['dataset'] + + else: + project_id = get_id_from_name(projects, input_params['project']) + datasets = cirro.datasets.list(project_id) + dataset_id = get_id_from_name(datasets, input_params['dataset']) + + logger.info("Validating files") + + validation_results = cirro.datasets.validate_folder( + project_id=project_id, + dataset_id=dataset_id, + local_folder=input_params['data_directory'] + ) + + for file_list, label in [ + [validation_results['ds_files_matching'], "Files exactly matching in Cirro and locally"], + [validation_results['ds_files_notmatching'], "Files with differing checksums in Cirro and locally"], + [validation_results['ds_files_missing'], "Files present in Cirro but not locally"], + [validation_results['local_only_files'], "Files present locally but not in Cirro"] + ]: + logger.info(f"{label}: {len(file_list):,}") + for file in file_list: + logger.info(f" - {file}") + + def run_download(input_params: DownloadArguments, interactive=False): _check_configure() cirro = CirroApi() diff --git a/cirro/cli/interactive/common_args.py b/cirro/cli/interactive/common_args.py index 7c9f07f..64e3ccf 100644 --- a/cirro/cli/interactive/common_args.py +++ b/cirro/cli/interactive/common_args.py @@ -1,8 +1,15 @@ from typing import List -from cirro_api_client.v1.models import Project +from cirro_api_client.v1.models import Project, Dataset +from cirro.cli.interactive.utils import ask, prompt_wrapper, InputError +from cirro.utils import format_date +from cirro.models.dataset import DatasetWithShare -from cirro.cli.interactive.utils import ask + +def _format_share(dataset: Dataset | DatasetWithShare) -> str: + if isinstance(dataset, DatasetWithShare) and dataset.share: + return f'({dataset.share.name})' + return '' def ask_project(projects: List[Project], input_value: str) -> str: @@ -21,3 +28,36 @@ def ask_project(projects: List[Project], input_value: str) -> str: choices=project_names, default=input_value if input_value in project_names else '' ) + + +def ask_dataset(datasets: List[Dataset], input_value: str, msg_action: str) -> str: + if len(datasets) == 0: + raise InputError("No datasets available") + sorted_datasets = sorted(datasets, key=lambda d: d.created_at, reverse=True) + dataset_prompt = { + 'type': 'autocomplete', + 'name': 'dataset', + 'message': f'What dataset would you like to {msg_action}? (Press Tab to see all options)', + 'choices': [f'{dataset.name} - {dataset.id}' for dataset in sorted_datasets], + 'meta_information': { + f'{dataset.name} - {dataset.id}': f'{format_date(dataset.created_at)} {_format_share(dataset)}' + for dataset in datasets + }, + 'ignore_case': True + } + answers = prompt_wrapper(dataset_prompt) + choice = answers['dataset'] + # Map the answer to a dataset + for dataset in datasets: + if f'{dataset.name} - {dataset.id}' == choice: + return dataset.id + + # The user has made a selection which does not match + # any of the options available. + # This is most likely because there was a typo + if ask( + 'confirm', + 'The selection does match an option available - try again?' + ): + return ask_dataset(datasets, input_value) + raise InputError("Exiting - no dataset selected") diff --git a/cirro/cli/interactive/download_args.py b/cirro/cli/interactive/download_args.py index fad0fa3..270b939 100644 --- a/cirro/cli/interactive/download_args.py +++ b/cirro/cli/interactive/download_args.py @@ -4,51 +4,10 @@ from cirro_api_client.v1.models import Dataset, Project -from cirro.cli.interactive.common_args import ask_project +from cirro.cli.interactive.common_args import ask_project, ask_dataset from cirro.cli.interactive.utils import ask, prompt_wrapper, InputError from cirro.cli.models import DownloadArguments -from cirro.models.dataset import DatasetWithShare from cirro.models.file import File -from cirro.utils import format_date - - -def _format_share(dataset: Dataset | DatasetWithShare) -> str: - if isinstance(dataset, DatasetWithShare) and dataset.share: - return f'({dataset.share.name})' - return '' - - -def ask_dataset(datasets: List[Dataset], input_value: str) -> str: - if len(datasets) == 0: - raise InputError("No datasets available") - sorted_datasets = sorted(datasets, key=lambda d: d.created_at, reverse=True) - dataset_prompt = { - 'type': 'autocomplete', - 'name': 'dataset', - 'message': 'What dataset would you like to download? (Press Tab to see all options)', - 'choices': [f'{dataset.name} - {dataset.id}' for dataset in sorted_datasets], - 'meta_information': { - f'{dataset.name} - {dataset.id}': f'{format_date(dataset.created_at)} {_format_share(dataset)}' - for dataset in datasets - }, - 'ignore_case': True - } - answers = prompt_wrapper(dataset_prompt) - choice = answers['dataset'] - # Map the answer to a dataset - for dataset in datasets: - if f'{dataset.name} - {dataset.id}' == choice: - return dataset.id - - # The user has made a selection which does not match - # any of the options available. - # This is most likely because there was a typo - if ask( - 'confirm', - 'The selection does match an option available - try again?' - ): - return ask_dataset(datasets, input_value) - raise InputError("Exiting - no dataset selected") def ask_dataset_files(files: List[File]) -> List[File]: @@ -172,6 +131,6 @@ def gather_download_arguments(input_params: DownloadArguments, projects: List[Pr def gather_download_arguments_dataset(input_params: DownloadArguments, datasets: List[Dataset]): - input_params['dataset'] = ask_dataset(datasets, input_params.get('dataset')) + input_params['dataset'] = ask_dataset(datasets, input_params.get('dataset'), 'download') input_params['data_directory'] = ask_directory(input_params.get('data_directory')) return input_params diff --git a/cirro/cli/interactive/validate_args.py b/cirro/cli/interactive/validate_args.py new file mode 100644 index 0000000..c1c55f2 --- /dev/null +++ b/cirro/cli/interactive/validate_args.py @@ -0,0 +1,136 @@ +from fnmatch import fnmatch +from pathlib import Path +from typing import List + +from cirro_api_client.v1.models import Dataset, Project + +from cirro.cli.interactive.common_args import ask_project, ask_dataset +from cirro.cli.interactive.utils import ask, prompt_wrapper, InputError +from cirro.cli.models import DownloadArguments +from cirro.models.file import File + + +def ask_dataset_files(files: List[File]) -> List[File]: + """Get the list of files which the user would like to download from the dataset.""" + + choices = [ + "Download all files", + "Select files from a list", + "Select files with a naming pattern (glob)" + ] + + selection_mode_prompt = { + 'type': 'select', + 'name': 'mode', + 'message': 'Which files would you like to download from this dataset?', + 'choices': choices + } + + answers = prompt_wrapper(selection_mode_prompt) + + if answers['mode'] == choices[0]: + return files + elif answers['mode'] == choices[1]: + return ask_dataset_files_list(files) + else: + return ask_dataset_files_glob(files) + + +def strip_prefix(fp: str, prefix: str): + assert fp.startswith(prefix), f"Expected {fp} to start with {prefix}" + return fp[len(prefix):] + + +def ask_dataset_files_list(files: List[File]) -> List[File]: + answers = prompt_wrapper({ + 'type': 'checkbox', + 'name': 'files', + 'message': 'Select the files to download', + 'choices': [ + strip_prefix(file.relative_path, "data/") + for file in files + ] + }) + + selected_files = [ + file + for file in files + if strip_prefix(file.relative_path, "data/") in set(answers['files']) + ] + + if len(selected_files) == 0: + if ask( + "confirm", + "No files were selected - try again?" + ): + return ask_dataset_files_list(files) + else: + raise InputError("No files selected") + else: + return selected_files + + +def ask_dataset_files_glob(files: List[File]) -> List[File]: + + confirmed = False + while not confirmed: + selected_files = ask_dataset_files_glob_single(files) + confirmed = ask( + "confirm", + f'Number of files selected: {len(selected_files):} / {len(files):,}' + ) + + if len(selected_files) == 0: + raise InputError("No files selected") + + return selected_files + + +def ask_dataset_files_glob_single(files: List[File]) -> List[File]: + + print("All Files:") + for file in files: + print(f" - {strip_prefix(file.relative_path, 'data/')}") + + answers = prompt_wrapper({ + 'type': 'text', + 'name': 'glob', + 'message': 'Select files by naming pattern (using the * wildcard)', + 'default': '*' + }) + + selected_files = [ + file + for file in files + if fnmatch(strip_prefix(file.relative_path, "data/"), answers['glob']) + ] + + print("Selected Files:") + for file in selected_files: + print(f" - {strip_prefix(file.relative_path, 'data/')}") + + return selected_files + + +def ask_directory(input_value: str) -> str: + directory_prompt = { + 'type': 'path', + 'name': 'directory', + 'only_directories': True, + 'message': 'What local folder would you like to compare data contents for?', + 'default': input_value or str(Path.cwd()) + } + + answers = prompt_wrapper(directory_prompt) + return answers['directory'] + + +def gather_validate_arguments(input_params: DownloadArguments, projects: List[Project]): + input_params['project'] = ask_project(projects, input_params.get('project')) + return input_params + + +def gather_validate_arguments_dataset(input_params: DownloadArguments, datasets: List[Dataset]): + input_params['dataset'] = ask_dataset(datasets, input_params.get('dataset'), 'validate') + input_params['data_directory'] = ask_directory(input_params.get('data_directory')) + return input_params diff --git a/cirro/cli/models.py b/cirro/cli/models.py index dacfd61..bc038fd 100644 --- a/cirro/cli/models.py +++ b/cirro/cli/models.py @@ -20,6 +20,13 @@ class UploadArguments(TypedDict): file: Optional[list[str]] +class ValidateArguments(TypedDict): + name: str + project: str + data_directory: str + interactive: bool + + class ListArguments(TypedDict): project: str interactive: bool diff --git a/cirro/services/dataset.py b/cirro/services/dataset.py index 4407f95..8938ebb 100644 --- a/cirro/services/dataset.py +++ b/cirro/services/dataset.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import List, Optional, Union, Dict from cirro_api_client.v1.api.datasets import get_datasets, get_dataset, import_public_dataset, upload_dataset, \ @@ -299,6 +300,62 @@ def upload_files(self, file_path_map=file_path_map ) + def validate_folder( + self, + project_id: str, + dataset_id: str, + local_folder: str + ): + """ + Validates that the contents of a dataset match that of a local folder. + """ + ds_files = self.get_assets_listing(project_id, dataset_id).files + + local_folder = Path(local_folder) + if not local_folder.is_dir(): + raise ValueError(f"{local_folder} is not a valid local folder") + + # Keep track of files from the dataset which match by checksum, don't match, or are missing + ds_files_matching = [] + ds_files_notmatching = [] + ds_files_missing = [] + for ds_file in ds_files: + ds_file_path = ds_file.relative_path[len("data/"):] + # Get the corresponding local file + local_file = local_folder / ds_file_path + if not local_file.exists(): + ds_files_missing.append(ds_file_path) + else: + if self.file_is_valid(ds_file, local_file): + ds_files_matching.append(ds_file_path) + else: + ds_files_notmatching.append(ds_file_path) + + # Find local files that are not in the dataset + local_only_files = [ + str(file.relative_to(local_folder)) + for file in local_folder.rglob("*") + if not file.is_dir() + if str(file.relative_to(local_folder)) not in [ + ds_file.relative_path[len("data/"):] + for ds_file in ds_files + ] + ] + + return dict( + ds_files_matching=ds_files_matching, + ds_files_notmatching=ds_files_notmatching, + ds_files_missing=ds_files_missing, + local_only_files=local_only_files + ) + + def file_is_valid(self, ds_file: File, local_file: Path) -> bool: + try: + self._file_service.validate_file(ds_file, local_file) + return True + except ValueError: + return False + def download_files( self, project_id: str, From 24a10d407ef05cb4ea432caacda91fb79102c20c Mon Sep 17 00:00:00 2001 From: Sam Minot Date: Sat, 4 Oct 2025 07:05:03 -0700 Subject: [PATCH 03/12] Use the {} constructor instead of dict() --- cirro/clients/s3.py | 4 ++-- cirro/helpers/form.py | 22 +++++++++++----------- cirro/services/dataset.py | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cirro/clients/s3.py b/cirro/clients/s3.py index 62b5e0d..c10a834 100644 --- a/cirro/clients/s3.py +++ b/cirro/clients/s3.py @@ -35,8 +35,8 @@ class S3Client: def __init__(self, creds_getter: Callable[[], AWSCredentials], checksum_method: str = None): self._creds_getter = creds_getter self._client = self._build_session_client() - self._upload_args = dict(ChecksumAlgorithm=checksum_method) - self._download_args = dict(ChecksumMode='ENABLED') if checksum_method else dict() + self._upload_args = {"ChecksumAlgorithm": checksum_method} + self._download_args = {"ChecksumMode": 'ENABLED'} if checksum_method else {} def get_aws_client(self): return self._client diff --git a/cirro/helpers/form.py b/cirro/helpers/form.py index 3e4d680..0d63023 100644 --- a/cirro/helpers/form.py +++ b/cirro/helpers/form.py @@ -11,13 +11,13 @@ class FormBuilder: def __init__(self): # Contents will be written out as the form - self.form = dict( - ui=OrderedDict(), - form=OrderedDict( + self.form = { + "ui": OrderedDict(), + "form": OrderedDict( type="object", properties=OrderedDict() ) - ) + } # Used to make sure that no keys are repeated self.used_keys = set() @@ -28,7 +28,7 @@ def __init__(self): # Store the params which will be populated either by: # a) While building the form, the optional `test_value` field will be used # b) While running non-interactively, it will use the values read from $PW_NOTEBOOK_DATA - self.params = dict() + self.params = {} def add_param( self, @@ -65,7 +65,7 @@ def add_param( assert type in self.PARAM_TYPES, msg # Start building the item in the form - item = dict(type=type) + item = {"type": type} # Populate the test value if test_value is not None: @@ -109,11 +109,11 @@ def add_section(self, title: str = None, description: str = None): self.pointer = section_name # Add the section - self.form['form']['properties'][section_name] = dict( - title=title, - description=description, - properties=OrderedDict() - ) + self.form['form']['properties'][section_name] = { + "title": title, + "description": description, + "properties": OrderedDict() + } def _new_section_name(self): """Internal method to pick a new section name.""" diff --git a/cirro/services/dataset.py b/cirro/services/dataset.py index 8938ebb..03c273a 100644 --- a/cirro/services/dataset.py +++ b/cirro/services/dataset.py @@ -342,12 +342,12 @@ def validate_folder( ] ] - return dict( - ds_files_matching=ds_files_matching, - ds_files_notmatching=ds_files_notmatching, - ds_files_missing=ds_files_missing, - local_only_files=local_only_files - ) + return { + "ds_files_matching": ds_files_matching, + "ds_files_notmatching": ds_files_notmatching, + "ds_files_missing": ds_files_missing, + "local_only_files": local_only_files + } def file_is_valid(self, ds_file: File, local_file: Path) -> bool: try: From 97c93f97b341f24063ca9bda9bb4d34530c0d67c Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 07:43:59 -0700 Subject: [PATCH 04/12] Revert "Use the {} constructor instead of dict()" This reverts commit 24a10d407ef05cb4ea432caacda91fb79102c20c. --- cirro/clients/s3.py | 4 ++-- cirro/helpers/form.py | 22 +++++++++++----------- cirro/services/dataset.py | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cirro/clients/s3.py b/cirro/clients/s3.py index c10a834..62b5e0d 100644 --- a/cirro/clients/s3.py +++ b/cirro/clients/s3.py @@ -35,8 +35,8 @@ class S3Client: def __init__(self, creds_getter: Callable[[], AWSCredentials], checksum_method: str = None): self._creds_getter = creds_getter self._client = self._build_session_client() - self._upload_args = {"ChecksumAlgorithm": checksum_method} - self._download_args = {"ChecksumMode": 'ENABLED'} if checksum_method else {} + self._upload_args = dict(ChecksumAlgorithm=checksum_method) + self._download_args = dict(ChecksumMode='ENABLED') if checksum_method else dict() def get_aws_client(self): return self._client diff --git a/cirro/helpers/form.py b/cirro/helpers/form.py index 0d63023..3e4d680 100644 --- a/cirro/helpers/form.py +++ b/cirro/helpers/form.py @@ -11,13 +11,13 @@ class FormBuilder: def __init__(self): # Contents will be written out as the form - self.form = { - "ui": OrderedDict(), - "form": OrderedDict( + self.form = dict( + ui=OrderedDict(), + form=OrderedDict( type="object", properties=OrderedDict() ) - } + ) # Used to make sure that no keys are repeated self.used_keys = set() @@ -28,7 +28,7 @@ def __init__(self): # Store the params which will be populated either by: # a) While building the form, the optional `test_value` field will be used # b) While running non-interactively, it will use the values read from $PW_NOTEBOOK_DATA - self.params = {} + self.params = dict() def add_param( self, @@ -65,7 +65,7 @@ def add_param( assert type in self.PARAM_TYPES, msg # Start building the item in the form - item = {"type": type} + item = dict(type=type) # Populate the test value if test_value is not None: @@ -109,11 +109,11 @@ def add_section(self, title: str = None, description: str = None): self.pointer = section_name # Add the section - self.form['form']['properties'][section_name] = { - "title": title, - "description": description, - "properties": OrderedDict() - } + self.form['form']['properties'][section_name] = dict( + title=title, + description=description, + properties=OrderedDict() + ) def _new_section_name(self): """Internal method to pick a new section name.""" diff --git a/cirro/services/dataset.py b/cirro/services/dataset.py index 03c273a..8938ebb 100644 --- a/cirro/services/dataset.py +++ b/cirro/services/dataset.py @@ -342,12 +342,12 @@ def validate_folder( ] ] - return { - "ds_files_matching": ds_files_matching, - "ds_files_notmatching": ds_files_notmatching, - "ds_files_missing": ds_files_missing, - "local_only_files": local_only_files - } + return dict( + ds_files_matching=ds_files_matching, + ds_files_notmatching=ds_files_notmatching, + ds_files_missing=ds_files_missing, + local_only_files=local_only_files + ) def file_is_valid(self, ds_file: File, local_file: Path) -> bool: try: From e01d6931c31d0be8b4bb8253088792d689dc325d Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 07:44:08 -0700 Subject: [PATCH 05/12] readme update --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e32b2fb..497c2c1 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ Usage: cirro validate-folder [OPTIONS] Validate that the contents of a local folder match those of a dataset in Cirro Options: - --name TEXT Name or ID of the dataset + --dataset TEXT Name or ID of the dataset --project TEXT Name or ID of the project --data-directory TEXT Local directory you wish to validate -i, --interactive Gather arguments interactively @@ -98,7 +98,7 @@ Options: ``` ```bash -$ cirro validate-folder --project "Test Project 1" --name "test" --data-directory "~/data" +$ cirro validate-folder --project "Test Project 1" --dataset "test" --data-directory "~/data" ``` #### Uploading a reference From fa41f4af07b8105cc05f4de7c69992390a9eb629 Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 08:52:47 -0700 Subject: [PATCH 06/12] fix argument model, type response of validate folder, move is valid file to file service, don't include hidden files in local-only file check --- cirro/cli/models.py | 2 +- cirro/file_utils.py | 4 ++-- cirro/models/dataset.py | 8 ++++++++ cirro/models/file.py | 5 +++++ cirro/sdk/file.py | 6 +----- cirro/services/dataset.py | 31 +++++++++++++------------------ cirro/services/file.py | 22 ++++++++++++++++++++++ 7 files changed, 52 insertions(+), 26 deletions(-) diff --git a/cirro/cli/models.py b/cirro/cli/models.py index bc038fd..3fd701c 100644 --- a/cirro/cli/models.py +++ b/cirro/cli/models.py @@ -21,7 +21,7 @@ class UploadArguments(TypedDict): class ValidateArguments(TypedDict): - name: str + dataset: str project: str data_directory: str interactive: bool diff --git a/cirro/file_utils.py b/cirro/file_utils.py index d486df6..f636668 100644 --- a/cirro/file_utils.py +++ b/cirro/file_utils.py @@ -53,7 +53,7 @@ def generate_flattened_file_map(files: List[PathLike]) -> Dict[PathLike, str]: } -def _is_hidden_file(file_path: Path): +def is_hidden_file(file_path: Path): # Remove hidden files from listing, desktop.ini .DS_Store, etc. if os.name == 'nt': attributes = win32api.GetFileAttributes(str(file_path)) @@ -86,7 +86,7 @@ def get_files_in_directory( if file_path.is_dir(): continue - if not include_hidden and _is_hidden_file(file_path): + if not include_hidden and is_hidden_file(file_path): continue if not file_path.exists(): diff --git a/cirro/models/dataset.py b/cirro/models/dataset.py index 14d28ca..f252524 100644 --- a/cirro/models/dataset.py +++ b/cirro/models/dataset.py @@ -22,3 +22,11 @@ def from_dataset(cls, dataset: Dataset, share: Share) -> 'DatasetWithShare': updated_at=dataset.updated_at, share=share ) + + +@_attrs_define +class DatasetValidationResponse: + files_matching: list[str] + files_not_matching: list[str] + files_missing: list[str] + local_only_files: list[str] diff --git a/cirro/models/file.py b/cirro/models/file.py index f35ae51..cdd2f12 100644 --- a/cirro/models/file.py +++ b/cirro/models/file.py @@ -139,6 +139,11 @@ def from_file_entry(cls, file: FileEntry, project_id: str, dataset: DatasetDetai access_context=access_context ) + @property + def normalized_path(self) -> str: + """ Without the data prefix """ + return self.relative_path[len("data/"):] + @property def absolute_path(self): return f'{self.access_context.base_url}/{self.relative_path.strip("/")}' diff --git a/cirro/sdk/file.py b/cirro/sdk/file.py index 03acd1e..bb04c00 100644 --- a/cirro/sdk/file.py +++ b/cirro/sdk/file.py @@ -212,11 +212,7 @@ def is_valid(self, local_path: PathLike) -> bool: if not local_path: raise DataPortalInputError("Must provide local path to validate file") - try: - self.validate(local_path) - return True - except ValueError: - return False + return self._client.file.is_valid_file(self._file, local_path) class DataPortalFiles(DataPortalAssets[DataPortalFile]): diff --git a/cirro/services/dataset.py b/cirro/services/dataset.py index 8938ebb..32a0f20 100644 --- a/cirro/services/dataset.py +++ b/cirro/services/dataset.py @@ -7,7 +7,9 @@ from cirro_api_client.v1.models import ImportDataRequest, UploadDatasetRequest, UpdateDatasetRequest, Dataset, \ DatasetDetail, CreateResponse, UploadDatasetCreateResponse, FileEntry +from cirro.file_utils import is_hidden_file from cirro.models.assets import DatasetAssets, Artifact +from cirro.models.dataset import DatasetValidationResponse from cirro.models.file import FileAccessContext, File, PathLike from cirro.services.base import get_all_records from cirro.services.file import FileEnabledService @@ -305,7 +307,7 @@ def validate_folder( project_id: str, dataset_id: str, local_folder: str - ): + ) -> DatasetValidationResponse: """ Validates that the contents of a dataset match that of a local folder. """ @@ -317,45 +319,38 @@ def validate_folder( # Keep track of files from the dataset which match by checksum, don't match, or are missing ds_files_matching = [] - ds_files_notmatching = [] + ds_files_not_matching = [] ds_files_missing = [] for ds_file in ds_files: - ds_file_path = ds_file.relative_path[len("data/"):] + ds_file_path = ds_file.normalized_path # Get the corresponding local file local_file = local_folder / ds_file_path if not local_file.exists(): ds_files_missing.append(ds_file_path) else: - if self.file_is_valid(ds_file, local_file): + if self._file_service.is_valid_file(ds_file, local_file): ds_files_matching.append(ds_file_path) else: - ds_files_notmatching.append(ds_file_path) + ds_files_not_matching.append(ds_file_path) # Find local files that are not in the dataset local_only_files = [ str(file.relative_to(local_folder)) for file in local_folder.rglob("*") - if not file.is_dir() + if not file.is_dir() and not is_hidden_file(file) if str(file.relative_to(local_folder)) not in [ - ds_file.relative_path[len("data/"):] + ds_file.normalized_path for ds_file in ds_files ] ] - return dict( - ds_files_matching=ds_files_matching, - ds_files_notmatching=ds_files_notmatching, - ds_files_missing=ds_files_missing, + return DatasetValidationResponse( + files_matching=ds_files_matching, + files_not_matching=ds_files_not_matching, + files_missing=ds_files_missing, local_only_files=local_only_files ) - def file_is_valid(self, ds_file: File, local_file: Path) -> bool: - try: - self._file_service.validate_file(ds_file, local_file) - return True - except ValueError: - return False - def download_files( self, project_id: str, diff --git a/cirro/services/file.py b/cirro/services/file.py index b10a28e..f2cf6e7 100644 --- a/cirro/services/file.py +++ b/cirro/services/file.py @@ -2,6 +2,7 @@ import threading from datetime import datetime, timezone from functools import partial +from pathlib import Path from typing import List, Dict from botocore.client import BaseClient @@ -180,6 +181,27 @@ def download_files(self, access_context: FileAccessContext, directory: str, file access_context.prefix ) + def is_valid_file(self, file: File, local_file: Path) -> bool: + """ + Validates the checksum of a file against a local file + See ``validate_file`` method for details. + + Args: + file (File): Cirro file to validate + local_file (PathLike): Local file path to compare against + + Returns: + bool: True if file integrity matches, False otherwise + + Raises: + RuntimeWarning: If the remote checksum is not available or not supported + """ + try: + self.validate_file(file, local_file) + return True + except ValueError: + return False + def validate_file(self, file: File, local_file: PathLike): """ Validates the checksum of a file against a local file From 6c20f3a51f64707802442d8431ebcd29507eac0e Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 09:02:17 -0700 Subject: [PATCH 07/12] ValidateArguments type, pathlike for input, use tuple to get type hints back --- cirro/cli/controller.py | 14 +++++++------- cirro/cli/interactive/download_args.py | 2 +- cirro/cli/interactive/validate_args.py | 8 ++++---- cirro/file_utils.py | 5 ++++- cirro/services/dataset.py | 2 +- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index 0b17330..71b28dc 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -13,11 +13,11 @@ from cirro.cli.interactive.download_args import gather_download_arguments_dataset from cirro.cli.interactive.list_dataset_args import gather_list_arguments from cirro.cli.interactive.upload_args import gather_upload_arguments -from cirro.cli.interactive.validate_args import gather_validate_arguments, gather_validate_arguments_dataset from cirro.cli.interactive.upload_reference_args import gather_reference_upload_arguments from cirro.cli.interactive.utils import get_id_from_name, get_item_from_name_or_id, InputError, validate_files +from cirro.cli.interactive.validate_args import gather_validate_arguments, gather_validate_arguments_dataset from cirro.cli.models import ListArguments, UploadArguments, DownloadArguments, CreatePipelineConfigArguments, \ - UploadReferenceArguments + UploadReferenceArguments, ValidateArguments from cirro.config import UserConfig, save_user_config, load_user_config from cirro.file_utils import get_files_in_directory from cirro.models.process import PipelineDefinition, ConfigAppStatus, CONFIG_APP_URL @@ -120,7 +120,7 @@ def run_ingest(input_params: UploadArguments, interactive=False): logger.info(f"File content validated by {cirro.configuration.checksum_method_display}") -def run_validate_folder(input_params: UploadArguments, interactive=False): +def run_validate_folder(input_params: ValidateArguments, interactive=False): _check_configure() cirro = CirroApi() logger.info(f"Collecting data from {cirro.configuration.base_url}") @@ -161,10 +161,10 @@ def run_validate_folder(input_params: UploadArguments, interactive=False): ) for file_list, label in [ - [validation_results['ds_files_matching'], "Files exactly matching in Cirro and locally"], - [validation_results['ds_files_notmatching'], "Files with differing checksums in Cirro and locally"], - [validation_results['ds_files_missing'], "Files present in Cirro but not locally"], - [validation_results['local_only_files'], "Files present locally but not in Cirro"] + (validation_results.files_matching, "Files exactly matching in Cirro and locally"), + (validation_results.files_not_matching, "Files with differing checksums in Cirro and locally"), + (validation_results.files_missing, "Files present in Cirro but not locally"), + (validation_results.local_only_files, "Files present locally but not in Cirro") ]: logger.info(f"{label}: {len(file_list):,}") for file in file_list: diff --git a/cirro/cli/interactive/download_args.py b/cirro/cli/interactive/download_args.py index 270b939..fe12642 100644 --- a/cirro/cli/interactive/download_args.py +++ b/cirro/cli/interactive/download_args.py @@ -131,6 +131,6 @@ def gather_download_arguments(input_params: DownloadArguments, projects: List[Pr def gather_download_arguments_dataset(input_params: DownloadArguments, datasets: List[Dataset]): - input_params['dataset'] = ask_dataset(datasets, input_params.get('dataset'), 'download') + input_params['dataset'] = ask_dataset(datasets, input_params.get('dataset'), msg_action='download') input_params['data_directory'] = ask_directory(input_params.get('data_directory')) return input_params diff --git a/cirro/cli/interactive/validate_args.py b/cirro/cli/interactive/validate_args.py index c1c55f2..61a209e 100644 --- a/cirro/cli/interactive/validate_args.py +++ b/cirro/cli/interactive/validate_args.py @@ -6,7 +6,7 @@ from cirro.cli.interactive.common_args import ask_project, ask_dataset from cirro.cli.interactive.utils import ask, prompt_wrapper, InputError -from cirro.cli.models import DownloadArguments +from cirro.cli.models import ValidateArguments from cirro.models.file import File @@ -125,12 +125,12 @@ def ask_directory(input_value: str) -> str: return answers['directory'] -def gather_validate_arguments(input_params: DownloadArguments, projects: List[Project]): +def gather_validate_arguments(input_params: ValidateArguments, projects: list[Project]): input_params['project'] = ask_project(projects, input_params.get('project')) return input_params -def gather_validate_arguments_dataset(input_params: DownloadArguments, datasets: List[Dataset]): - input_params['dataset'] = ask_dataset(datasets, input_params.get('dataset'), 'validate') +def gather_validate_arguments_dataset(input_params: ValidateArguments, datasets: list[Dataset]): + input_params['dataset'] = ask_dataset(datasets, input_params.get('dataset'), msg_action='validate') input_params['data_directory'] = ask_directory(input_params.get('data_directory')) return input_params diff --git a/cirro/file_utils.py b/cirro/file_utils.py index f636668..15f6d49 100644 --- a/cirro/file_utils.py +++ b/cirro/file_utils.py @@ -54,7 +54,10 @@ def generate_flattened_file_map(files: List[PathLike]) -> Dict[PathLike, str]: def is_hidden_file(file_path: Path): - # Remove hidden files from listing, desktop.ini .DS_Store, etc. + """ + Check if a file path is hidden + Such as desktop.ini, .DS_Store, etc. + """ if os.name == 'nt': attributes = win32api.GetFileAttributes(str(file_path)) return attributes & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM) diff --git a/cirro/services/dataset.py b/cirro/services/dataset.py index 32a0f20..4a04edb 100644 --- a/cirro/services/dataset.py +++ b/cirro/services/dataset.py @@ -306,7 +306,7 @@ def validate_folder( self, project_id: str, dataset_id: str, - local_folder: str + local_folder: PathLike ) -> DatasetValidationResponse: """ Validates that the contents of a dataset match that of a local folder. From 579604c3cea0efeec26baf8bbfac1fad8ebbaad5 Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 09:03:47 -0700 Subject: [PATCH 08/12] fix case when ask_dataset has to retry --- cirro/cli/interactive/common_args.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cirro/cli/interactive/common_args.py b/cirro/cli/interactive/common_args.py index 64e3ccf..24896e3 100644 --- a/cirro/cli/interactive/common_args.py +++ b/cirro/cli/interactive/common_args.py @@ -1,9 +1,10 @@ from typing import List from cirro_api_client.v1.models import Project, Dataset + from cirro.cli.interactive.utils import ask, prompt_wrapper, InputError -from cirro.utils import format_date from cirro.models.dataset import DatasetWithShare +from cirro.utils import format_date def _format_share(dataset: Dataset | DatasetWithShare) -> str: @@ -59,5 +60,5 @@ def ask_dataset(datasets: List[Dataset], input_value: str, msg_action: str) -> s 'confirm', 'The selection does match an option available - try again?' ): - return ask_dataset(datasets, input_value) + return ask_dataset(datasets, input_value, msg_action) raise InputError("Exiting - no dataset selected") From b814225033600887665d691225a380fedad9dcad Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 09:05:40 -0700 Subject: [PATCH 09/12] cleanup --- cirro/cli/interactive/validate_args.py | 107 +------------------------ 1 file changed, 1 insertion(+), 106 deletions(-) diff --git a/cirro/cli/interactive/validate_args.py b/cirro/cli/interactive/validate_args.py index 61a209e..4580bb9 100644 --- a/cirro/cli/interactive/validate_args.py +++ b/cirro/cli/interactive/validate_args.py @@ -1,115 +1,10 @@ -from fnmatch import fnmatch from pathlib import Path -from typing import List from cirro_api_client.v1.models import Dataset, Project from cirro.cli.interactive.common_args import ask_project, ask_dataset -from cirro.cli.interactive.utils import ask, prompt_wrapper, InputError +from cirro.cli.interactive.utils import prompt_wrapper from cirro.cli.models import ValidateArguments -from cirro.models.file import File - - -def ask_dataset_files(files: List[File]) -> List[File]: - """Get the list of files which the user would like to download from the dataset.""" - - choices = [ - "Download all files", - "Select files from a list", - "Select files with a naming pattern (glob)" - ] - - selection_mode_prompt = { - 'type': 'select', - 'name': 'mode', - 'message': 'Which files would you like to download from this dataset?', - 'choices': choices - } - - answers = prompt_wrapper(selection_mode_prompt) - - if answers['mode'] == choices[0]: - return files - elif answers['mode'] == choices[1]: - return ask_dataset_files_list(files) - else: - return ask_dataset_files_glob(files) - - -def strip_prefix(fp: str, prefix: str): - assert fp.startswith(prefix), f"Expected {fp} to start with {prefix}" - return fp[len(prefix):] - - -def ask_dataset_files_list(files: List[File]) -> List[File]: - answers = prompt_wrapper({ - 'type': 'checkbox', - 'name': 'files', - 'message': 'Select the files to download', - 'choices': [ - strip_prefix(file.relative_path, "data/") - for file in files - ] - }) - - selected_files = [ - file - for file in files - if strip_prefix(file.relative_path, "data/") in set(answers['files']) - ] - - if len(selected_files) == 0: - if ask( - "confirm", - "No files were selected - try again?" - ): - return ask_dataset_files_list(files) - else: - raise InputError("No files selected") - else: - return selected_files - - -def ask_dataset_files_glob(files: List[File]) -> List[File]: - - confirmed = False - while not confirmed: - selected_files = ask_dataset_files_glob_single(files) - confirmed = ask( - "confirm", - f'Number of files selected: {len(selected_files):} / {len(files):,}' - ) - - if len(selected_files) == 0: - raise InputError("No files selected") - - return selected_files - - -def ask_dataset_files_glob_single(files: List[File]) -> List[File]: - - print("All Files:") - for file in files: - print(f" - {strip_prefix(file.relative_path, 'data/')}") - - answers = prompt_wrapper({ - 'type': 'text', - 'name': 'glob', - 'message': 'Select files by naming pattern (using the * wildcard)', - 'default': '*' - }) - - selected_files = [ - file - for file in files - if fnmatch(strip_prefix(file.relative_path, "data/"), answers['glob']) - ] - - print("Selected Files:") - for file in selected_files: - print(f" - {strip_prefix(file.relative_path, 'data/')}") - - return selected_files def ask_directory(input_value: str) -> str: From 6770600c119a2a0f12fbb762c0dc8b42a34b0d3d Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 10:48:24 -0700 Subject: [PATCH 10/12] handle validation failed case, fix issue on windows --- cirro/cli/controller.py | 15 ++++++++------- cirro/models/dataset.py | 1 + cirro/services/dataset.py | 33 ++++++++++++++++++++++----------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/cirro/cli/controller.py b/cirro/cli/controller.py index 71b28dc..bf486ab 100644 --- a/cirro/cli/controller.py +++ b/cirro/cli/controller.py @@ -160,15 +160,16 @@ def run_validate_folder(input_params: ValidateArguments, interactive=False): local_folder=input_params['data_directory'] ) - for file_list, label in [ - (validation_results.files_matching, "Files exactly matching in Cirro and locally"), - (validation_results.files_not_matching, "Files with differing checksums in Cirro and locally"), - (validation_results.files_missing, "Files present in Cirro but not locally"), - (validation_results.local_only_files, "Files present locally but not in Cirro") + for file_list, label, log_level in [ + (validation_results.files_matching, "✅ Matched Files (identical in Cirro and locally)", logging.INFO), + (validation_results.files_not_matching, "⚠️ Checksum Mismatches (same file name, different content)", logging.WARNING), + (validation_results.files_missing, "⚠️ Missing Locally (present in system but not found locally)", logging.WARNING), + (validation_results.local_only_files, "⚠️ Unexpected Local Files (present locally but not in system)", logging.WARNING), + (validation_results.validate_errors, "⚠️ Validation Failed (checksums may not be available)", logging.WARNING) ]: - logger.info(f"{label}: {len(file_list):,}") + logger.log(level=log_level, msg=f"{label}: {len(file_list):,}") for file in file_list: - logger.info(f" - {file}") + logger.log(level=log_level, msg=f" - {file}") def run_download(input_params: DownloadArguments, interactive=False): diff --git a/cirro/models/dataset.py b/cirro/models/dataset.py index f252524..37c5bb6 100644 --- a/cirro/models/dataset.py +++ b/cirro/models/dataset.py @@ -30,3 +30,4 @@ class DatasetValidationResponse: files_not_matching: list[str] files_missing: list[str] local_only_files: list[str] + validate_errors: list[str] diff --git a/cirro/services/dataset.py b/cirro/services/dataset.py index 4a04edb..3bf4793 100644 --- a/cirro/services/dataset.py +++ b/cirro/services/dataset.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path from typing import List, Optional, Union, Dict @@ -14,6 +15,8 @@ from cirro.services.base import get_all_records from cirro.services.file import FileEnabledService +logger = logging.getLogger() + class DatasetService(FileEnabledService): """ @@ -321,6 +324,7 @@ def validate_folder( ds_files_matching = [] ds_files_not_matching = [] ds_files_missing = [] + ds_validate_failed = [] for ds_file in ds_files: ds_file_path = ds_file.normalized_path # Get the corresponding local file @@ -328,27 +332,34 @@ def validate_folder( if not local_file.exists(): ds_files_missing.append(ds_file_path) else: - if self._file_service.is_valid_file(ds_file, local_file): - ds_files_matching.append(ds_file_path) - else: - ds_files_not_matching.append(ds_file_path) + try: + if self._file_service.is_valid_file(ds_file, local_file): + ds_files_matching.append(ds_file_path) + else: + ds_files_not_matching.append(ds_file_path) + except RuntimeWarning as e: + logger.warning(f"File validation failed: {e}") + ds_validate_failed.append(ds_file_path) # Find local files that are not in the dataset - local_only_files = [ - str(file.relative_to(local_folder)) + local_file_paths = [ + file.relative_to(local_folder).as_posix() for file in local_folder.rglob("*") if not file.is_dir() and not is_hidden_file(file) - if str(file.relative_to(local_folder)) not in [ - ds_file.normalized_path - for ds_file in ds_files - ] + ] + dataset_file_paths = [file.normalized_path for file in ds_files] + local_only_files = [ + file + for file in local_file_paths + if file not in dataset_file_paths ] return DatasetValidationResponse( files_matching=ds_files_matching, files_not_matching=ds_files_not_matching, files_missing=ds_files_missing, - local_only_files=local_only_files + local_only_files=local_only_files, + validate_errors=ds_validate_failed, ) def download_files( From 955a90ceaf5bcc3fdfff48500dee212a79f04c55 Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 11:30:41 -0700 Subject: [PATCH 11/12] validate --- README.md | 2 +- cirro/cli/cli.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 497c2c1..8899c95 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ $ cirro upload --project "Test Project 1" --name "test" --file "sample1.fastq.gz #### Validating that a dataset matches a local folder ```bash -Usage: cirro validate-folder [OPTIONS] +Usage: cirro validate [OPTIONS] Validate that the contents of a local folder match those of a dataset in Cirro diff --git a/cirro/cli/cli.py b/cirro/cli/cli.py index 39ac719..91cc6f7 100644 --- a/cirro/cli/cli.py +++ b/cirro/cli/cli.py @@ -4,8 +4,8 @@ import requests from cirro_api_client.v1.errors import CirroException -from cirro.cli import run_ingest, run_download, run_configure, run_list_datasets from cirro.cli import run_create_pipeline_config, run_validate_folder +from cirro.cli import run_ingest, run_download, run_configure, run_list_datasets from cirro.cli.controller import handle_error, run_upload_reference from cirro.cli.interactive.utils import InputError @@ -92,7 +92,7 @@ def upload(**kwargs): @click.option('-i', '--interactive', help='Gather arguments interactively', is_flag=True, default=False) -def validate_folder(**kwargs): +def validate(**kwargs): check_required_args(kwargs) run_validate_folder(kwargs, interactive=kwargs.get('interactive')) From f0e291724bd6970ef9185598c8a0b5dc8ec6f1c3 Mon Sep 17 00:00:00 2001 From: Nathan Thorpe Date: Mon, 6 Oct 2025 11:31:15 -0700 Subject: [PATCH 12/12] validate --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8899c95..b0b07e4 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ Options: ``` ```bash -$ cirro validate-folder --project "Test Project 1" --dataset "test" --data-directory "~/data" +$ cirro validate --project "Test Project 1" --dataset "test" --data-directory "~/data" ``` #### Uploading a reference