|
| 1 | +import json |
1 | 2 | import logging |
| 3 | +import uuid |
2 | 4 | from typing import List, Union |
3 | 5 |
|
4 | 6 | from pubweb.clients.utils import filter_deleted |
@@ -65,9 +67,14 @@ def find_by_project(self, project_id: str, name: str = None) -> List[Dataset]: |
65 | 67 |
|
66 | 68 | def create(self, create_request: CreateIngestDatasetInput) -> DatasetCreateResponse: |
67 | 69 | """ |
68 | | - Creates an ingest dataset |
| 70 | + Creates an ingest dataset. |
| 71 | + This only registers into the system, does not upload any files |
69 | 72 | """ |
70 | 73 | logger.info(f"Creating dataset {create_request.name}") |
| 74 | + |
| 75 | + if self._api_client.has_iam_creds: |
| 76 | + return self._write_dataset_manifest(create_request) |
| 77 | + |
71 | 78 | query = ''' |
72 | 79 | mutation CreateIngestDataset($input: CreateIngestDatasetInput!) { |
73 | 80 | createIngestDataset(input: $input) { |
@@ -112,3 +119,32 @@ def download_files(self, project_id: str, dataset_id: str, download_location: st |
112 | 119 | files = [file.relative_path for file in files] |
113 | 120 |
|
114 | 121 | self._file_service.download_files(access_context, download_location, files) |
| 122 | + |
| 123 | + def _write_dataset_manifest(self, request: CreateIngestDatasetInput) -> DatasetCreateResponse: |
| 124 | + """ |
| 125 | + Internal method for registering a dataset without API access. |
| 126 | + To be used for machine or service accounts |
| 127 | + """ |
| 128 | + manifest = { |
| 129 | + 'project': request.project_id, |
| 130 | + 'process': request.process_id, |
| 131 | + 'name': request.name, |
| 132 | + 'desc': request.description, |
| 133 | + 'infoJson': { |
| 134 | + 'ingestedBy': self._api_client.current_user |
| 135 | + }, |
| 136 | + 'files': [{'name': file} for file in request.files] |
| 137 | + } |
| 138 | + dataset_id = str(uuid.uuid4()) |
| 139 | + manifest_path = f'datasets/{dataset_id}/artifacts/manifest.json' |
| 140 | + manifest_json = json.dumps(manifest, indent=4) |
| 141 | + access_context = FileAccessContext.upload_dataset(dataset_id=dataset_id, |
| 142 | + project_id=request.project_id) |
| 143 | + self._file_service.create_file(access_context, |
| 144 | + key=manifest_path, |
| 145 | + contents=manifest_json, |
| 146 | + content_type='application/json') |
| 147 | + return { |
| 148 | + 'datasetId': dataset_id, |
| 149 | + 'dataPath': f'datasets/{dataset_id}/artifacts/data' |
| 150 | + } |
0 commit comments