Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cirro/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def upload_sample_sheet(cls, project_id: str, dataset_id: str, base_url: str):
access_type=ProjectAccessType.SAMPLESHEET_UPLOAD,
dataset_id=dataset_id
),
base_url=base_url,
base_url=f'{base_url}/data',
project_id=project_id
)

Expand Down
54 changes: 53 additions & 1 deletion cirro/sdk/dataset.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import datetime
from pathlib import Path
from typing import Union, List, Optional

from cirro_api_client.v1.api.processes import validate_file_requirements
from cirro_api_client.v1.models import Dataset, DatasetDetail, RunAnalysisRequest, ProcessDetail, Status, \
RunAnalysisRequestParams, Tag, ArtifactType, NamedItem
RunAnalysisRequestParams, Tag, ArtifactType, NamedItem, Executor, ValidateFileRequirementsRequest

from cirro.cirro_client import CirroApi
from cirro.models.assets import DatasetAssets
from cirro.models.file import PathLike
from cirro.sdk.asset import DataPortalAssets, DataPortalAsset
from cirro.sdk.exceptions import DataPortalAssetNotFound
from cirro.sdk.exceptions import DataPortalInputError
Expand Down Expand Up @@ -302,6 +305,55 @@ def run_analysis(
)
return resp.id

def update_samplesheet(self,
contents: str = None,
file_path: PathLike = None):
"""
Updates the samplesheet metadata of a dataset.
Provide either the contents (as a string) or a file path.
Both must be in the format of a CSV.

Args:
contents (str): Samplesheet contents to update (should be a CSV string)
file_path (PathLike): Path of file to update (should be a CSV file)

Example:
```python
dataset.update_samplesheet(
file_path=Path('~/samplesheet.csv')
)
```
"""

if contents is None and file_path is None:
raise DataPortalInputError("Must specify either 'contents' or 'file_path' when updating samplesheet")

if self.process.executor != Executor.INGEST:
raise DataPortalInputError("Cannot update a samplesheet on a non-ingest dataset")

samplesheet_contents = contents
if file_path is not None:
samplesheet_contents = Path(file_path).expanduser().read_text()

# Validate samplesheet
file_names = [f.file_name for f in self.list_files()]
request = ValidateFileRequirementsRequest(
file_names=file_names,
sample_sheet=samplesheet_contents,
)
requirements = validate_file_requirements.sync(process_id=self.process_id,
body=request,
client=self._client.api_client)
if error_msg := requirements.error_msg:
raise DataPortalInputError(error_msg)

# Update the samplesheet if everything looks ok
self._client.datasets.update_samplesheet(
project_id=self.project_id,
dataset_id=self.id,
samplesheet=samplesheet_contents
)


class DataPortalDatasets(DataPortalAssets[DataPortalDataset]):
"""Collection of multiple DataPortalDataset objects."""
Expand Down
27 changes: 27 additions & 0 deletions cirro/services/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,30 @@ def download_files(
base_url=dataset.s3)

self._file_service.download_files(access_context, download_location, files)

def update_samplesheet(
self,
project_id: str,
dataset_id: str,
samplesheet: str
):
"""
Updates a samplesheet on a dataset

Args:
project_id (str): ID of the Project
dataset_id (str): ID of the Dataset
samplesheet (str): Samplesheet contents to update (should be a CSV string)
"""
dataset = self.get(project_id, dataset_id)
access_context = FileAccessContext.upload_sample_sheet(project_id=project_id,
dataset_id=dataset_id,
base_url=dataset.s3)

samplesheet_key = f'{access_context.prefix}/samplesheet.csv'
self._file_service.create_file(
access_context=access_context,
key=samplesheet_key,
contents=samplesheet,
content_type='text/csv'
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cirro"
version = "1.7.2"
version = "1.8.0"
description = "CLI tool and SDK for interacting with the Cirro platform"
authors = ["Cirro Bio <[email protected]>"]
license = "MIT"
Expand Down
52 changes: 42 additions & 10 deletions samples/Uploading_a_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,12 @@
"evalue": "Files do not match dataset type. Expected file type requirements: \nPaired FASTQ (Illumina Format) *_S*_L???_{I,R}{1,2}_001.fastq.gz",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/ck/j40906kx3mj90bcc8qs7gyxm0000gp/T/ipykernel_83747/2225702019.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# Try to upload the data (which will cause an error)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m project.upload_dataset(\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Test dataset'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mdescription\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/GitHub/Cirro-client/cirro/sdk/project.py\u001b[0m in \u001b[0;36mupload_dataset\u001b[0;34m(self, name, description, process, upload_folder, files)\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0;31m# Make sure that the files match the expected pattern\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m \u001b[0mcheck_dataset_files\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfiles\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfile_mapping_rules\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mupload_folder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0;31m# Create the ingest process request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Documents/GitHub/Cirro-client/cirro/file_utils.py\u001b[0m in \u001b[0;36mcheck_dataset_files\u001b[0;34m(files, file_mapping_rules, directory)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunctools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpartial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatch_pattern\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfiles\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_mapping_rules\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 150\u001b[0;31m raise ValueError(\"Files do not match dataset type. Expected file type requirements: \\n\" + \"\\n\".join(\n\u001b[0m\u001b[1;32m 151\u001b[0m [f\"{rule.get('description', '')} {rule.get('glob')}\" for rule in file_mapping_rules]))\n",
"\u001b[0;31mValueError\u001b[0m: Files do not match dataset type. Expected file type requirements: \nPaired FASTQ (Illumina Format) *_S*_L???_{I,R}{1,2}_001.fastq.gz"
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)",
"\u001B[0;32m/var/folders/ck/j40906kx3mj90bcc8qs7gyxm0000gp/T/ipykernel_83747/2225702019.py\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[1;32m 5\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 6\u001B[0m \u001B[0;31m# Try to upload the data (which will cause an error)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m----> 7\u001B[0;31m project.upload_dataset(\n\u001B[0m\u001B[1;32m 8\u001B[0m \u001B[0mname\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m'Test dataset'\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 9\u001B[0m \u001B[0mdescription\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34m''\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/Documents/GitHub/Cirro-client/cirro/sdk/project.py\u001B[0m in \u001B[0;36mupload_dataset\u001B[0;34m(self, name, description, process, upload_folder, files)\u001B[0m\n\u001B[1;32m 126\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 127\u001B[0m \u001B[0;31m# Make sure that the files match the expected pattern\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 128\u001B[0;31m \u001B[0mcheck_dataset_files\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfiles\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mprocess\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mfile_mapping_rules\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mupload_folder\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 129\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 130\u001B[0m \u001B[0;31m# Create the ingest process request\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/Documents/GitHub/Cirro-client/cirro/file_utils.py\u001B[0m in \u001B[0;36mcheck_dataset_files\u001B[0;34m(files, file_mapping_rules, directory)\u001B[0m\n\u001B[1;32m 148\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 149\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0many\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmap\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfunctools\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mpartial\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmatch_pattern\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mfiles\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mfile_mapping_rules\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 150\u001B[0;31m raise ValueError(\"Files do not match dataset type. Expected file type requirements: \\n\" + \"\\n\".join(\n\u001B[0m\u001B[1;32m 151\u001B[0m [f\"{rule.get('description', '')} {rule.get('glob')}\" for rule in file_mapping_rules]))\n",
"\u001B[0;31mValueError\u001B[0m: Files do not match dataset type. Expected file type requirements: \nPaired FASTQ (Illumina Format) *_S*_L???_{I,R}{1,2}_001.fastq.gz"
]
}
],
Expand All @@ -259,7 +259,7 @@
"print(json.dumps(ingest_10X.file_mapping_rules, indent=3))\n",
"\n",
"# Try to upload the data (which will cause an error)\n",
"project.upload_dataset(\n",
"dataset = project.upload_dataset(\n",
" name = 'Test dataset',\n",
" description = '',\n",
" upload_folder = '/tmp',\n",
Expand All @@ -269,11 +269,43 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"cell_type": "markdown",
"source": [
"You can update a sample sheet on an existing dataset by using the `update_samplesheet` method.\n",
"\n",
"You may provide either the CSV contents or a file path."
]
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"source": []
"execution_count": null,
"source": [
"from pathlib import Path\n",
"import pandas as pd\n",
"\n",
"samplesheet = pd.DataFrame.from_records([\n",
" {\n",
" 'sample': 'test',\n",
" 'fastq_1': 'test.R1.fastq.gz',\n",
" 'fastq_2': 'test.R2.fastq.gz',\n",
" 'status': 'Normal'\n",
" }\n",
"])\n",
"\n",
"dataset.update_samplesheet(\n",
" contents=samplesheet.to_csv(index=False),\n",
")\n",
"\n",
"\n",
"# OR\n",
"\n",
"dataset.update_samplesheet(\n",
" file_path=Path('~/samplesheet.csv')\n",
")"
]
}
],
"metadata": {
Expand Down
Loading