Skip to content

Commit db8b4e9

Browse files
authored
Fetch all entries when listing datasets (#62)
* fetch all entries in list method and improve get by ID/Name methods * but back release condition
1 parent fcb4bff commit db8b4e9

File tree

4 files changed

+64
-8
lines changed

4 files changed

+64
-8
lines changed

.github/workflows/package.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ jobs:
4040
poetry build
4141
4242
- name: Publish python package
43+
if: github.event_name == 'release'
4344
run: |
4445
poetry publish
4546
env:

cirro/api/services/base.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,34 @@
11
from abc import ABC
2+
from typing import Dict
23

34
from cirro.api.clients import ApiClient
45
from cirro.api.config import AppConfig
56

67

8+
def fetch_all_items(client: ApiClient, query: str, input_variables: Dict, batch_size=10000, max_items=None):
9+
"""
10+
Fetches all items from a paginated graphql api
11+
"""
12+
next_token = None
13+
items = []
14+
while True:
15+
variables = {
16+
'nextToken': next_token,
17+
'limit': batch_size,
18+
**input_variables
19+
}
20+
resp = client.query(query, variables)
21+
query_name = next(iter(resp.keys()))
22+
items.extend(resp[query_name]['items'])
23+
24+
next_token = resp[query_name]['nextToken']
25+
if not next_token:
26+
return items
27+
28+
if max_items and len(items) >= max_items:
29+
return items
30+
31+
732
class BaseService(ABC):
833
_api_client: ApiClient
934
_configuration: AppConfig

cirro/api/services/dataset.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import json
22
import logging
33
import uuid
4-
from typing import List, Union
4+
from typing import List, Union, Optional
55

66
from cirro.api.clients.utils import filter_deleted
77
from cirro.api.models.dataset import CreateIngestDatasetInput, DatasetCreateResponse, Dataset
88
from cirro.api.models.file import FileAccessContext, File
99
from cirro.api.models.status import Status
10+
from cirro.api.services.base import fetch_all_items
1011
from cirro.api.services.file import FileEnabledService
1112

1213
logger = logging.getLogger()
@@ -56,17 +57,40 @@ def find_by_project(self, project_id: str, name: str = None) -> List[Dataset]:
5657
'status': {
5758
'eq': Status.COMPLETED.value
5859
}
59-
},
60-
# TODO: Implement pagination
61-
'limit': 10000
60+
}
6261
}
6362
if name:
6463
variables['filter']['name'] = {'eq': name}
6564

66-
resp = self._api_client.query(query, variables=variables)['datasetsByProject']
67-
not_deleted = filter_deleted(resp['items'])
65+
items = fetch_all_items(self._api_client, query, variables)
66+
not_deleted = filter_deleted(items)
6867
return [Dataset.from_record(item) for item in not_deleted]
6968

69+
def get_from_id(self, _id: str) -> Optional[Dataset]:
70+
query = '''
71+
query GetDataset($id: ID!) {
72+
getDataset(id: $id) {
73+
id
74+
status
75+
name
76+
desc
77+
sourceDatasets
78+
paramJson
79+
infoJson
80+
process
81+
project
82+
createdAt
83+
updatedAt
84+
}
85+
}
86+
'''
87+
88+
item = self._api_client.query(query, variables={'id': _id})['getDataset']
89+
if not item:
90+
return
91+
92+
return Dataset.from_record(item)
93+
7094
def create(self, create_request: CreateIngestDatasetInput) -> DatasetCreateResponse:
7195
"""
7296
Creates an ingested dataset.

cirro/sdk/project.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,18 @@ def list_datasets(self) -> DataPortalDatasets:
5151
def get_dataset_by_name(self, name: str = None) -> DataPortalDataset:
5252
"""Return the dataset with the specified name."""
5353

54-
return self.list_datasets().get_by_name(name)
54+
dataset = next(iter(self._client.dataset.find_by_project(self.id, name=name)), None)
55+
if dataset is None:
56+
raise DataPortalAssetNotFound(f'Dataset with name {name} not found')
57+
return DataPortalDataset(dataset, self._client)
5558

5659
def get_dataset_by_id(self, _id: str = None) -> DataPortalDataset:
5760
"""Return the dataset with the specified id."""
5861

59-
return self.list_datasets().get_by_id(_id)
62+
dataset = self._client.dataset.get_from_id(_id=_id)
63+
if dataset is None:
64+
raise DataPortalAssetNotFound(f'Dataset with ID {_id} not found')
65+
return DataPortalDataset(dataset, self._client)
6066

6167
def list_references(self, reference_type: str = None) -> DataPortalReferences:
6268
"""

0 commit comments

Comments
 (0)