From d9ee1644a751e3d841e40f4f482fdb63b1bfbeed Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 19 May 2025 12:07:16 -0400 Subject: [PATCH 1/6] Fix grpc async_req=True tests --- tests/integration/data/seed.py | 10 ++++ tests/integration/data/test_delete_future.py | 50 +++++++++++++++----- tests/integration/data/test_fetch_future.py | 23 +++++---- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/tests/integration/data/seed.py b/tests/integration/data/seed.py index 2019761e..f606367f 100644 --- a/tests/integration/data/seed.py +++ b/tests/integration/data/seed.py @@ -1,10 +1,16 @@ from ..helpers import poll_fetch_for_ids_in_namespace, embedding_values from pinecone import Vector import itertools +import logging + +logger = logging.getLogger(__name__) def setup_data(idx, target_namespace, wait): # Upsert without metadata + logger.info( + "Upserting 3 vectors as tuples to namespace '%s' without metadata", target_namespace + ) idx.upsert( vectors=[ ("1", embedding_values(2)), @@ -15,6 +21,9 @@ def setup_data(idx, target_namespace, wait): ) # Upsert with metadata + logger.info( + "Upserting 3 vectors as Vector objects to namespace '%s' with metadata", target_namespace + ) idx.upsert( vectors=[ Vector( @@ -29,6 +38,7 @@ def setup_data(idx, target_namespace, wait): ) # Upsert with dict + logger.info("Upserting 3 vectors as dicts to namespace '%s'", target_namespace) idx.upsert( vectors=[ {"id": "7", "values": embedding_values(2)}, diff --git a/tests/integration/data/test_delete_future.py b/tests/integration/data/test_delete_future.py index 3ebea445..0680737a 100644 --- a/tests/integration/data/test_delete_future.py +++ b/tests/integration/data/test_delete_future.py @@ -2,11 +2,27 @@ import pytest from pinecone import Vector from ..helpers import poll_stats_for_namespace, random_string +import logging + +logger = logging.getLogger(__name__) if os.environ.get("USE_GRPC") == "true": from pinecone.grpc import GRPCDeleteResponse +def seed_vectors(idx, namespace): + logger.info("Seeding vectors with ids [id1, id2, id3] to namespace '%s'", namespace) + idx.upsert( + vectors=[ + Vector(id="id1", values=[0.1, 0.2]), + Vector(id="id2", values=[0.1, 0.2]), + Vector(id="id3", values=[0.1, 0.2]), + ], + namespace=namespace, + ) + poll_stats_for_namespace(idx, namespace, 3) + + class TestDeleteFuture: @pytest.mark.skipif( os.getenv("USE_GRPC") != "true", reason="PineconeGrpcFutures only returned from grpc client" @@ -14,21 +30,33 @@ class TestDeleteFuture: def test_delete_future(self, idx): namespace = random_string(10) - idx.upsert( - vectors=[ - Vector(id="id1", values=[0.1, 0.2]), - Vector(id="id2", values=[0.1, 0.2]), - Vector(id="id3", values=[0.1, 0.2]), - ], - namespace=namespace, - ) - poll_stats_for_namespace(idx, namespace, 3) + seed_vectors(idx, namespace) delete_one = idx.delete(ids=["id1"], namespace=namespace, async_req=True) - delete_namespace = idx.delete(namespace=namespace, delete_all=True, async_req=True) + delete_two = idx.delete(ids=["id2"], namespace=namespace, async_req=True) + + from concurrent.futures import as_completed + + for future in as_completed([delete_one, delete_two], timeout=10): + resp = future.result() + assert isinstance(resp, GRPCDeleteResponse) + + @pytest.mark.skipif( + os.getenv("USE_GRPC") != "true", reason="PineconeGrpcFutures only returned from grpc client" + ) + def test_delete_future_by_namespace(self, idx): + namespace = random_string(10) + + ns1 = f"{namespace}-1" + ns2 = f"{namespace}-2" + + seed_vectors(idx, ns1) + seed_vectors(idx, ns2) + delete_ns1 = idx.delete(namespace=ns1, delete_all=True, async_req=True) + delete_ns2 = idx.delete(namespace=ns2, delete_all=True, async_req=True) from concurrent.futures import as_completed - for future in as_completed([delete_one, delete_namespace], timeout=10): + for future in as_completed([delete_ns1, delete_ns2], timeout=10): resp = future.result() assert isinstance(resp, GRPCDeleteResponse) diff --git a/tests/integration/data/test_fetch_future.py b/tests/integration/data/test_fetch_future.py index 65f1607f..e100f11a 100644 --- a/tests/integration/data/test_fetch_future.py +++ b/tests/integration/data/test_fetch_future.py @@ -15,22 +15,20 @@ def fetch_namespace_future(): return random_string(10) -@pytest.mark.usefixtures("fetch_namespace_future") -@pytest.fixture(scope="class") -def seed_for_fetch(idx, fetch_namespace_future): +def seed(idx, namespace): # Upsert without metadata - logger.info("Seeding vectors without metadata") + logger.info("Seeding vectors without metadata to namespace '%s'", namespace) idx.upsert( vectors=[ ("1", embedding_values(2)), ("2", embedding_values(2)), ("3", embedding_values(2)), ], - namespace=fetch_namespace_future, + namespace=namespace, ) # Upsert with metadata - logger.info("Seeding vectors with metadata") + logger.info("Seeding vectors with metadata to namespace '%s'", namespace) idx.upsert( vectors=[ Vector( @@ -41,7 +39,7 @@ def seed_for_fetch(idx, fetch_namespace_future): id="6", values=embedding_values(2), metadata={"genre": "romance", "runtime": 240} ), ], - namespace=fetch_namespace_future, + namespace=namespace, ) # Upsert with dict @@ -51,12 +49,19 @@ def seed_for_fetch(idx, fetch_namespace_future): {"id": "8", "values": embedding_values(2)}, {"id": "9", "values": embedding_values(2)}, ], - namespace=fetch_namespace_future, + namespace=namespace, ) poll_fetch_for_ids_in_namespace( - idx, ids=["1", "2", "3", "4", "5", "6", "7", "8", "9"], namespace=fetch_namespace_future + idx, ids=["1", "2", "3", "4", "5", "6", "7", "8", "9"], namespace=namespace ) + + +@pytest.mark.usefixtures("fetch_namespace_future") +@pytest.fixture(scope="class") +def seed_for_fetch(idx, fetch_namespace_future): + seed(idx, fetch_namespace_future) + seed(idx, "") yield From 8086542393bed0521a7e468ee22c8a3d908cfa24 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 19 May 2025 15:51:58 -0400 Subject: [PATCH 2/6] Add logging in sanity tests --- scripts/create.py | 16 ++++++++++++++++ tests/dependency/asyncio-rest/test_sanity.py | 17 ++++++++++++++--- tests/dependency/grpc/test_sanity.py | 11 +++++++++-- tests/dependency/rest/test_sanity.py | 17 +++++++++++++---- 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/scripts/create.py b/scripts/create.py index 05a12c36..1e911253 100644 --- a/scripts/create.py +++ b/scripts/create.py @@ -57,6 +57,21 @@ def generate_index_name(test_name: str) -> str: return index_name.lower() +def get_tags(): + github_actor = os.getenv("GITHUB_ACTOR", None) + user = os.getenv("USER", None) + index_owner = github_actor or user or "unknown" + + github_job = os.getenv("GITHUB_JOB", "") + tags = { + "owner": index_owner, + "test-suite": "pinecone-python-client", + "created-at": datetime.now().strftime("%Y-%m-%d"), + "test-job": github_job, + } + return tags + + def main(): pc = Pinecone(api_key=read_env_var("PINECONE_API_KEY")) index_name = generate_index_name(read_env_var("NAME_PREFIX") + random_string(20)) @@ -65,6 +80,7 @@ def main(): metric=read_env_var("METRIC"), dimension=int(read_env_var("DIMENSION")), spec={"serverless": {"cloud": read_env_var("CLOUD"), "region": read_env_var("REGION")}}, + tags=get_tags(), ) write_gh_output("index_name", index_name) diff --git a/tests/dependency/asyncio-rest/test_sanity.py b/tests/dependency/asyncio-rest/test_sanity.py index 45b22a64..6c879378 100644 --- a/tests/dependency/asyncio-rest/test_sanity.py +++ b/tests/dependency/asyncio-rest/test_sanity.py @@ -3,6 +3,10 @@ import asyncio from pinecone import PineconeAsyncio +import logging + +logger = logging.getLogger(__name__) + @pytest.fixture def index_name(): @@ -16,7 +20,7 @@ def index_name(): class TestSanityRest: async def test_sanity(self, index_name): async with PineconeAsyncio() as pc: - print("Testing with index name: " + index_name) + logger.info("Testing with index name: " + index_name) assert index_name != "" # Verify index exists with expected properties @@ -25,20 +29,26 @@ async def test_sanity(self, index_name): description = await pc.describe_index(name=index_name) assert description.dimension == 2 + logger.info("Index description: %s", description) idx = pc.IndexAsyncio(host=description.host) - await idx.upsert(vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])]) + resp = await idx.upsert( + vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])] + ) + logger.info("Upsert response: %s", resp) # Wait for index freshness await asyncio.sleep(30) # Check the vector count reflects some data has been upserted description = await idx.describe_index_stats() + logger.info("Index stats: %s", description) assert description.dimension == 2 assert description.total_vector_count >= 3 # Query for results query_results = await idx.query(id="1", top_k=10, include_values=True) + logger.info("Query results: %s", query_results) assert query_results.matches[0].id == "1" assert len(query_results.matches) == 3 @@ -49,10 +59,11 @@ async def test_sanity(self, index_name): # Call an inference method, should not raise an exception from pinecone import EmbedModel - await pc.inference.embed( + resp = await pc.inference.embed( model=EmbedModel.Multilingual_E5_Large, inputs=["Hello, how are you?", "I am doing well, thank you for asking."], parameters={"input_type": "passage", "truncate": "END"}, ) + logger.info("Embed response: %s", resp) await idx.close() diff --git a/tests/dependency/grpc/test_sanity.py b/tests/dependency/grpc/test_sanity.py index 163f4c16..ee253dfd 100644 --- a/tests/dependency/grpc/test_sanity.py +++ b/tests/dependency/grpc/test_sanity.py @@ -2,6 +2,9 @@ import os import time from pinecone.grpc import PineconeGRPC +import logging + +logger = logging.getLogger(__name__) @pytest.fixture @@ -19,26 +22,30 @@ def client(): class TestSanityRest: def test_sanity(self, index_name, client): - print("Testing with index name: " + index_name) + logger.info("Testing with index name: %s", index_name) assert index_name != "" # Verify index exists with expected properties assert index_name in client.list_indexes().names() description = client.describe_index(name=index_name) assert description.dimension == 2 + logger.info("Index description: %s", description) idx = client.Index(index_name) - idx.upsert(vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])]) + resp = idx.upsert(vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])]) + logger.info("Upsert response: %s", resp) # Wait for index freshness time.sleep(30) # Check the vector count reflects some data has been upserted description = idx.describe_index_stats() + logger.info("Index stats: %s", description) assert description.dimension == 2 assert description.total_vector_count >= 3 # Query for results query_results = idx.query(id="1", top_k=10, include_values=True) + logger.info("Query results: %s", query_results) assert query_results.matches[0].id == "1" assert len(query_results.matches) == 3 diff --git a/tests/dependency/rest/test_sanity.py b/tests/dependency/rest/test_sanity.py index dcd4ad5f..f8ab56dd 100644 --- a/tests/dependency/rest/test_sanity.py +++ b/tests/dependency/rest/test_sanity.py @@ -2,6 +2,9 @@ import os import time from pinecone import Pinecone +import logging + +logger = logging.getLogger(__name__) @pytest.fixture @@ -19,22 +22,26 @@ def client(): class TestSanityRest: def test_sanity(self, index_name, client): - print("Testing with index name: " + index_name) + logger.info("Testing with index name: " + index_name) assert index_name != "" # Verify index exists with expected properties assert index_name in client.list_indexes().names() description = client.describe_index(name=index_name) assert description.dimension == 2 + logger.info("Index description: %s", description) idx = client.Index(index_name) - idx.upsert(vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])]) + resp = idx.upsert(vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])]) + logger.info("Upsert response: %s", resp) # Wait for index freshness time.sleep(30) # Check the vector count reflects some data has been upserted description = idx.describe_index_stats() + logger.info("Index stats: %s", description) + assert description.dimension == 2 assert description.total_vector_count >= 3 @@ -44,13 +51,15 @@ def test_sanity(self, index_name, client): assert len(query_results.matches) == 3 # Call a bulk import api method, should not raise an exception - idx.list_imports() + for i in idx.list_imports(): + assert i is not None # Call an inference method, should not raise an exception from pinecone import EmbedModel - client.inference.embed( + resp = client.inference.embed( model=EmbedModel.Multilingual_E5_Large, inputs=["Hello, how are you?", "I am doing well, thank you for asking."], parameters={"input_type": "passage", "truncate": "END"}, ) + logger.info("Embed response: %s", resp) From 5b815c1eea8c11a88661b07220153abfd1282793 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 20 May 2025 09:49:24 -0400 Subject: [PATCH 3/6] Fix sanity tests --- pinecone/__init__.py | 2 +- pinecone/__init__.pyi | 2 +- pinecone/data/__init__.py | 2 - pinecone/data/features/__init__.py | 7 +- .../data/features/bulk_imports/__init__.py | 5 +- pinecone/db_data/__init__.py | 2 +- .../db_data/features/bulk_import/__init__.py | 3 - pinecone/db_data/index.py | 150 +++++++++++++++++- pinecone/db_data/index_asyncio.py | 140 +++++++++++++++- .../asyncio}/bulk_import_asyncio.py | 18 +-- .../sync}/bulk_import.py | 16 +- .../sync}/bulk_import_request_factory.py | 0 pyproject.toml | 1 + tests/dependency/__init__.py | 0 tests/dependency/asyncio-rest/test_sanity.py | 72 ++++----- tests/dependency/conftest.py | 3 + tests/dependency/rest/test_sanity.py | 6 +- 17 files changed, 353 insertions(+), 76 deletions(-) delete mode 100644 pinecone/db_data/features/bulk_import/__init__.py rename pinecone/db_data/{features/bulk_import => resources/asyncio}/bulk_import_asyncio.py (92%) rename pinecone/db_data/{features/bulk_import => resources/sync}/bulk_import.py (94%) rename pinecone/db_data/{features/bulk_import => resources/sync}/bulk_import_request_factory.py (100%) create mode 100644 tests/dependency/__init__.py create mode 100644 tests/dependency/conftest.py diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 78adbf0e..d4f29bfa 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -41,7 +41,7 @@ "QueryResponse": ("pinecone.db_data.models", "QueryResponse"), "UpsertResponse": ("pinecone.db_data.models", "UpsertResponse"), "UpdateRequest": ("pinecone.db_data.models", "UpdateRequest"), - "ImportErrorMode": ("pinecone.core.openapi.db_data.models", "ImportErrorMode"), + "ImportErrorMode": ("pinecone.db_data.resources.sync.bulk_import", "ImportErrorMode"), "VectorDictionaryMissingKeysError": ( "pinecone.db_data.errors", "VectorDictionaryMissingKeysError", diff --git a/pinecone/__init__.pyi b/pinecone/__init__.pyi index f6873468..c900e420 100644 --- a/pinecone/__init__.pyi +++ b/pinecone/__init__.pyi @@ -29,7 +29,7 @@ from pinecone.db_data.models import ( UpsertResponse, UpdateRequest, ) -from pinecone.core.openapi.db_data.models import ImportErrorMode +from pinecone.db_data.resources.sync.bulk_import import ImportErrorMode from pinecone.db_data.errors import ( VectorDictionaryMissingKeysError, VectorDictionaryExcessKeysError, diff --git a/pinecone/data/__init__.py b/pinecone/data/__init__.py index 3ea4cd41..6722b22a 100644 --- a/pinecone/data/__init__.py +++ b/pinecone/data/__init__.py @@ -1,7 +1,5 @@ import warnings -from pinecone.db_data import * - warnings.warn( "The module at `pinecone.data` has moved to `pinecone.db_data`. " "Please update your imports. " diff --git a/pinecone/data/features/__init__.py b/pinecone/data/features/__init__.py index e4ff12ee..524e58ae 100644 --- a/pinecone/data/features/__init__.py +++ b/pinecone/data/features/__init__.py @@ -1,10 +1,11 @@ import warnings -from pinecone.db_data.features import * +from .bulk_imports import * +from .inference import * + warnings.warn( - "The module at `pinecone.data.features` has moved to `pinecone.db_data.features`. " - "Please update your imports. " + "The module at `pinecone.data.features` has been removed. Code has been refactored and integrated into other parts of the client. " "This warning will become an error in a future version of the Pinecone Python SDK.", DeprecationWarning, ) diff --git a/pinecone/data/features/bulk_imports/__init__.py b/pinecone/data/features/bulk_imports/__init__.py index 3af0d1f5..b45a4d99 100644 --- a/pinecone/data/features/bulk_imports/__init__.py +++ b/pinecone/data/features/bulk_imports/__init__.py @@ -1,6 +1,9 @@ import warnings -from pinecone.db_data.features.bulk_import import * +from pinecone.db_data.resources.asyncio.bulk_import_asyncio import * +from pinecone.db_data.resources.sync.bulk_import import * +from pinecone.db_data.resources.sync.bulk_import_request_factory import * + warnings.warn( "The module at `pinecone.data.features.bulk_import` has moved to `pinecone.db_data.features.bulk_import`. " diff --git a/pinecone/db_data/__init__.py b/pinecone/db_data/__init__.py index f2db9a63..7722ff79 100644 --- a/pinecone/db_data/__init__.py +++ b/pinecone/db_data/__init__.py @@ -25,7 +25,7 @@ MetadataDictionaryExpectedError, ) -from .features.bulk_import import ImportErrorMode +from .resources.sync.bulk_import import ImportErrorMode import warnings diff --git a/pinecone/db_data/features/bulk_import/__init__.py b/pinecone/db_data/features/bulk_import/__init__.py deleted file mode 100644 index ebcf782b..00000000 --- a/pinecone/db_data/features/bulk_import/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .bulk_import_request_factory import ImportErrorMode -from .bulk_import import ImportFeatureMixin -from .bulk_import_asyncio import ImportFeatureMixinAsyncio diff --git a/pinecone/db_data/index.py b/pinecone/db_data/index.py index 6c78b849..8878dee2 100644 --- a/pinecone/db_data/index.py +++ b/pinecone/db_data/index.py @@ -2,7 +2,7 @@ import warnings import logging import json -from typing import Union, List, Optional, Dict, Any, Literal, TYPE_CHECKING +from typing import Union, List, Optional, Dict, Any, Literal, Iterator, TYPE_CHECKING from pinecone.config import ConfigBuilder @@ -19,7 +19,6 @@ from .dataclasses import Vector, SparseValues, FetchResponse, SearchQuery, SearchRerank from .interfaces import IndexInterface from .request_factory import IndexRequestFactory -from .features.bulk_import import ImportFeatureMixin from .types import ( SparseVectorTypedDict, VectorTypedDict, @@ -47,6 +46,15 @@ if TYPE_CHECKING: from pinecone.config import Config, OpenApiConfiguration + from .resources.sync.bulk_import import BulkImportResource + + from pinecone.core.openapi.db_data.models import ( + StartImportResponse, + ListImportsResponse, + ImportModel, + ) + + from .resources.sync.bulk_import import ImportErrorMode logger = logging.getLogger(__name__) """ @private """ @@ -58,12 +66,15 @@ def parse_query_response(response: QueryResponse): return response -class Index(PluginAware, IndexInterface, ImportFeatureMixin): +class Index(PluginAware, IndexInterface): """ A client for interacting with a Pinecone index via REST API. For improved performance, use the Pinecone GRPC index client. """ + _bulk_import_resource: Optional["BulkImportResource"] + """ @private """ + def __init__( self, api_key: str, @@ -101,6 +112,9 @@ def __init__( self._api_client = self._vector_api.api_client + self._bulk_import_resource = None + """ @private """ + # Pass the same api_client to the ImportFeatureMixin super().__init__(api_client=self._api_client) @@ -129,6 +143,15 @@ def pool_threads(self) -> int: ) return self._pool_threads + @property + def bulk_import(self) -> "BulkImportResource": + """@private""" + if self._bulk_import_resource is None: + from .resources.sync.bulk_import import BulkImportResource + + self._bulk_import_resource = BulkImportResource(api_client=self._api_client) + return self._bulk_import_resource + def _openapi_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]: return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS) @@ -457,3 +480,124 @@ def list(self, **kwargs): kwargs.update({"pagination_token": results.pagination.next}) else: done = True + + @validate_and_convert_errors + def start_import( + self, + uri: str, + integration_id: Optional[str] = None, + error_mode: Optional[ + Union["ImportErrorMode", Literal["CONTINUE", "ABORT"], str] + ] = "CONTINUE", + ) -> "StartImportResponse": + """ + Args: + uri (str): The URI of the data to import. The URI must start with the scheme of a supported storage provider. + integration_id (Optional[str], optional): If your bucket requires authentication to access, you need to pass the id of your storage integration using this property. Defaults to None. + error_mode: Defaults to "CONTINUE". If set to "CONTINUE", the import operation will continue even if some + records fail to import. Pass "ABORT" to stop the import operation if any records fail to import. + + Returns: + `StartImportResponse`: Contains the id of the import operation. + + Import data from a storage provider into an index. The uri must start with the scheme of a supported + storage provider. For buckets that are not publicly readable, you will also need to separately configure + a storage integration and pass the integration id. + + Examples: + >>> from pinecone import Pinecone + >>> index = Pinecone().Index('my-index') + >>> index.start_import(uri="s3://bucket-name/path/to/data.parquet") + { id: "1" } + """ + return self.bulk_import.start(uri=uri, integration_id=integration_id, error_mode=error_mode) + + @validate_and_convert_errors + def list_imports(self, **kwargs) -> Iterator["ImportModel"]: + """ + Args: + limit (Optional[int]): The maximum number of operations to fetch in each network call. If unspecified, the server will use a default value. [optional] + pagination_token (Optional[str]): When there are multiple pages of results, a pagination token is returned in the response. The token can be used + to fetch the next page of results. [optional] + + Returns: + Returns a generator that yields each import operation. It automatically handles pagination tokens on your behalf so you can + easily iterate over all results. The `list_imports` method accepts all of the same arguments as list_imports_paginated + + ```python + for op in index.list_imports(): + print(op) + ``` + + You can convert the generator into a list by wrapping the generator in a call to the built-in `list` function: + + ```python + operations = list(index.list_imports()) + ``` + + You should be cautious with this approach because it will fetch all operations at once, which could be a large number + of network calls and a lot of memory to hold the results. + """ + for i in self.bulk_import.list(**kwargs): + yield i + + @validate_and_convert_errors + def list_imports_paginated( + self, limit: Optional[int] = None, pagination_token: Optional[str] = None, **kwargs + ) -> "ListImportsResponse": + """ + Args: + limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] + pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned + in the response if additional results are available. [optional] + + Returns: ListImportsResponse object which contains the list of operations as ImportModel objects, pagination information, + and usage showing the number of read_units consumed. + + The list_imports_paginated operation returns information about import operations. + It returns operations in a paginated form, with a pagination token to fetch the next page of results. + + Consider using the `list_imports` method to avoid having to handle pagination tokens manually. + + Examples: + >>> results = index.list_imports_paginated(limit=5) + >>> results.pagination.next + eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 + >>> results.data[0] + { + "id": "6", + "uri": "s3://dev-bulk-import-datasets-pub/10-records-dim-10/", + "status": "Completed", + "percent_complete": 100.0, + "records_imported": 10, + "created_at": "2024-09-06T14:52:02.567776+00:00", + "finished_at": "2024-09-06T14:52:28.130717+00:00" + } + >>> next_results = index.list_imports_paginated(limit=5, pagination_token=results.pagination.next) + """ + return self.bulk_import.list_paginated( + limit=limit, pagination_token=pagination_token, **kwargs + ) + + @validate_and_convert_errors + def describe_import(self, id: str) -> "ImportModel": + """ + Args: + id (str): The id of the import operation. This value is returned when + starting an import, and can be looked up using list_imports. + + Returns: + `ImportModel`: An object containing operation id, status, and other details. + + describe_import is used to get detailed information about a specific import operation. + """ + return self.bulk_import.describe(id=id) + + @validate_and_convert_errors + def cancel_import(self, id: str): + """Cancel an import operation. + + Args: + id (str): The id of the import operation to cancel. + """ + return self.bulk_import.cancel(id=id) diff --git a/pinecone/db_data/index_asyncio.py b/pinecone/db_data/index_asyncio.py index 6962e7f1..dee9d4e7 100644 --- a/pinecone/db_data/index_asyncio.py +++ b/pinecone/db_data/index_asyncio.py @@ -7,7 +7,7 @@ from .index_asyncio_interface import IndexAsyncioInterface from .query_results_aggregator import QueryResultsAggregator -from typing import Union, List, Optional, Dict, Any, Literal +from typing import Union, List, Optional, Dict, Any, Literal, AsyncIterator, TYPE_CHECKING from pinecone.config import ConfigBuilder @@ -47,7 +47,15 @@ from .vector_factory import VectorFactory from .query_results_aggregator import QueryNamespacesResults -from .features.bulk_import import ImportFeatureMixinAsyncio + +if TYPE_CHECKING: + from .resources.asyncio.bulk_import_asyncio import BulkImportResourceAsyncio + + from pinecone.core.openapi.db_data.models import ( + StartImportResponse, + ListImportsResponse, + ImportModel, + ) logger = logging.getLogger(__name__) @@ -75,7 +83,7 @@ def parse_query_response(response: QueryResponse): return response -class _IndexAsyncio(IndexAsyncioInterface, ImportFeatureMixinAsyncio): +class _IndexAsyncio(IndexAsyncioInterface): """ The `IndexAsyncio` class provides an asynchronous interface to interact with Pinecone indexes. @@ -129,6 +137,9 @@ async def main(): Failing to do this may result in error messages appearing from the underlyling aiohttp library. """ + _bulk_import_resource: Optional["BulkImportResourceAsyncio"] + """ @private """ + def __init__( self, api_key: str, @@ -159,9 +170,8 @@ def __init__( self._api_client = self._vector_api.api_client """ @private """ - # Pass the same api_client to the ImportFeatureMixinAsyncio - # This is important for async context management to work correctly - super().__init__(api_client=self._api_client) + self._bulk_import_resource = None + """ @private """ async def __aenter__(self): return self @@ -222,6 +232,15 @@ async def main(): """ await self._api_client.close() + @property + def bulk_import(self) -> "BulkImportResourceAsyncio": + """@private""" + if self._bulk_import_resource is None: + from .resources.asyncio.bulk_import_asyncio import BulkImportResourceAsyncio + + self._bulk_import_resource = BulkImportResourceAsyncio(api_client=self._api_client) + return self._bulk_import_resource + @validate_and_convert_errors async def upsert( self, @@ -521,3 +540,112 @@ async def search_records( def _openapi_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]: return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS) + + async def start_import( + self, + uri: str, + integration_id: Optional[str] = None, + error_mode: Optional[Literal["CONTINUE", "ABORT"]] = "CONTINUE", + ) -> "StartImportResponse": + """ + Args: + uri (str): The URI of the data to import. The URI must start with the scheme of a supported storage provider. + integration_id (Optional[str], optional): If your bucket requires authentication to access, you need to pass the id of your storage integration using this property. Defaults to None. + error_mode: Defaults to "CONTINUE". If set to "CONTINUE", the import operation will continue even if some + records fail to import. Pass "ABORT" to stop the import operation if any records fail to import. + + Returns: + `StartImportResponse`: Contains the id of the import operation. + + Import data from a storage provider into an index. The uri must start with the scheme of a supported + storage provider. For buckets that are not publicly readable, you will also need to separately configure + a storage integration and pass the integration id. + + Examples: + >>> from pinecone import Pinecone + >>> index = Pinecone().IndexAsyncio(host="example-index.svc.aped-4627-b74a.pinecone.io") + >>> await index.start_import(uri="s3://bucket-name/path/to/data.parquet") + { id: "1" } + + """ + return await self.bulk_import.start( + uri=uri, integration_id=integration_id, error_mode=error_mode + ) + + async def list_imports(self, **kwargs) -> AsyncIterator["ImportModel"]: + """ + Args: + limit (Optional[int]): The maximum number of operations to fetch in each network call. If unspecified, the server will use a default value. [optional] + pagination_token (Optional[str]): When there are multiple pages of results, a pagination token is returned in the response. The token can be used + to fetch the next page of results. [optional] + + Returns an async generator that yields each import operation. It automatically handles pagination tokens on your behalf so you can + easily iterate over all results. The `list_imports` method accepts all of the same arguments as `list_imports_paginated` + + ```python + async for op in index.list_imports(): + print(op) + ``` + """ + async for op in self.bulk_import.list(**kwargs): + yield op + + async def list_imports_paginated( + self, limit: Optional[int] = None, pagination_token: Optional[str] = None, **kwargs + ) -> "ListImportsResponse": + """ + Args: + limit (Optional[int]): The maximum number of ids to return. If unspecified, the server will use a default value. [optional] + pagination_token (Optional[str]): A token needed to fetch the next page of results. This token is returned + in the response if additional results are available. [optional] + + Returns: + `ListImportsResponse` object which contains the list of operations as ImportModel objects, pagination information, + and usage showing the number of read_units consumed. + + The `list_imports_paginated` operation returns information about import operations. + It returns operations in a paginated form, with a pagination token to fetch the next page of results. + + Consider using the `list_imports` method to avoid having to handle pagination tokens manually. + + Examples: + >>> results = await index.list_imports_paginated(limit=5) + >>> results.pagination.next + eyJza2lwX3Bhc3QiOiI5OTMiLCJwcmVmaXgiOiI5OSJ9 + >>> results.data[0] + { + "id": "6", + "uri": "s3://dev-bulk-import-datasets-pub/10-records-dim-10/", + "status": "Completed", + "percent_complete": 100.0, + "records_imported": 10, + "created_at": "2024-09-06T14:52:02.567776+00:00", + "finished_at": "2024-09-06T14:52:28.130717+00:00" + } + >>> next_results = await index.list_imports_paginated(limit=5, pagination_token=results.pagination.next) + + """ + return await self.bulk_import.list_paginated( + limit=limit, pagination_token=pagination_token, **kwargs + ) + + async def describe_import(self, id: str) -> "ImportModel": + """ + Args: + id (str): The id of the import operation. This value is returned when + starting an import, and can be looked up using list_imports. + + Returns: + ImportModel: An object containing operation id, status, and other details. + + `describe_import` is used to get detailed information about a specific import operation. + """ + return await self.bulk_import.describe(id=id) + + async def cancel_import(self, id: str): + """Cancel an import operation. + + Args: + id (str): The id of the import operation to cancel. + """ + return await self.bulk_import.cancel(id=id) diff --git a/pinecone/db_data/features/bulk_import/bulk_import_asyncio.py b/pinecone/db_data/resources/asyncio/bulk_import_asyncio.py similarity index 92% rename from pinecone/db_data/features/bulk_import/bulk_import_asyncio.py rename to pinecone/db_data/resources/asyncio/bulk_import_asyncio.py index 0cd674d5..6d9cf88f 100644 --- a/pinecone/db_data/features/bulk_import/bulk_import_asyncio.py +++ b/pinecone/db_data/resources/asyncio/bulk_import_asyncio.py @@ -1,4 +1,4 @@ -from typing import Optional, Literal, AsyncIterator, List +from typing import Optional, Literal, AsyncIterator from pinecone.core.openapi.db_data.api.bulk_operations_api import AsyncioBulkOperationsApi @@ -10,17 +10,17 @@ ImportModel, ) -from .bulk_import_request_factory import BulkImportRequestFactory +from ..sync.bulk_import_request_factory import BulkImportRequestFactory for m in [StartImportResponse, ListImportsResponse, ImportModel]: install_json_repr_override(m) -class ImportFeatureMixinAsyncio: +class BulkImportResourceAsyncio: def __init__(self, api_client, **kwargs) -> None: self.__import_operations_api = AsyncioBulkOperationsApi(api_client) - async def start_import( + async def start( self, uri: str, integration_id: Optional[str] = None, @@ -52,7 +52,7 @@ async def start_import( ) return await self.__import_operations_api.start_bulk_import(req) - async def list_imports(self, **kwargs) -> AsyncIterator[List[ImportModel]]: + async def list(self, **kwargs) -> AsyncIterator["ImportModel"]: """ Args: limit (Optional[int]): The maximum number of operations to fetch in each network call. If unspecified, the server will use a default value. [optional] @@ -69,7 +69,7 @@ async def list_imports(self, **kwargs) -> AsyncIterator[List[ImportModel]]: """ done = False while not done: - results = await self.list_imports_paginated(**kwargs) + results = await self.list_paginated(**kwargs) if len(results.data) > 0: for op in results.data: yield op @@ -79,7 +79,7 @@ async def list_imports(self, **kwargs) -> AsyncIterator[List[ImportModel]]: else: done = True - async def list_imports_paginated( + async def list_paginated( self, limit: Optional[int] = None, pagination_token: Optional[str] = None, **kwargs ) -> ListImportsResponse: """ @@ -119,7 +119,7 @@ async def list_imports_paginated( ) return await self.__import_operations_api.list_bulk_imports(**args_dict) - async def describe_import(self, id: str) -> ImportModel: + async def describe(self, id: str) -> ImportModel: """ Args: id (str): The id of the import operation. This value is returned when @@ -133,7 +133,7 @@ async def describe_import(self, id: str) -> ImportModel: args = BulkImportRequestFactory.describe_import_args(id=id) return await self.__import_operations_api.describe_bulk_import(**args) - async def cancel_import(self, id: str): + async def cancel(self, id: str): """Cancel an import operation. Args: diff --git a/pinecone/db_data/features/bulk_import/bulk_import.py b/pinecone/db_data/resources/sync/bulk_import.py similarity index 94% rename from pinecone/db_data/features/bulk_import/bulk_import.py rename to pinecone/db_data/resources/sync/bulk_import.py index fdad3aa9..35a015d2 100644 --- a/pinecone/db_data/features/bulk_import/bulk_import.py +++ b/pinecone/db_data/resources/sync/bulk_import.py @@ -1,4 +1,4 @@ -from typing import Optional, Literal, Iterator, List, Union +from typing import Optional, Literal, Iterator, Union from pinecone.core.openapi.db_data.api.bulk_operations_api import BulkOperationsApi @@ -16,11 +16,11 @@ install_json_repr_override(m) -class ImportFeatureMixin: +class BulkImportResource: def __init__(self, api_client, **kwargs) -> None: self.__import_operations_api = BulkOperationsApi(api_client) - def start_import( + def start( self, uri: str, integration_id: Optional[str] = None, @@ -53,7 +53,7 @@ def start_import( ) return self.__import_operations_api.start_bulk_import(req) - def list_imports(self, **kwargs) -> Iterator[List[ImportModel]]: + def list(self, **kwargs) -> Iterator[ImportModel]: """ Args: limit (Optional[int]): The maximum number of operations to fetch in each network call. If unspecified, the server will use a default value. [optional] @@ -80,7 +80,7 @@ def list_imports(self, **kwargs) -> Iterator[List[ImportModel]]: """ done = False while not done: - results = self.list_imports_paginated(**kwargs) + results = self.list_paginated(**kwargs) if len(results.data) > 0: for op in results.data: yield op @@ -90,7 +90,7 @@ def list_imports(self, **kwargs) -> Iterator[List[ImportModel]]: else: done = True - def list_imports_paginated( + def list_paginated( self, limit: Optional[int] = None, pagination_token: Optional[str] = None, **kwargs ) -> ListImportsResponse: """ @@ -128,7 +128,7 @@ def list_imports_paginated( ) return self.__import_operations_api.list_bulk_imports(**args_dict) - def describe_import(self, id: str) -> ImportModel: + def describe(self, id: str) -> ImportModel: """ Args: id (str): The id of the import operation. This value is returned when @@ -142,7 +142,7 @@ def describe_import(self, id: str) -> ImportModel: args = BulkImportRequestFactory.describe_import_args(id=id) return self.__import_operations_api.describe_bulk_import(**args) - def cancel_import(self, id: str): + def cancel(self, id: str): """Cancel an import operation. Args: diff --git a/pinecone/db_data/features/bulk_import/bulk_import_request_factory.py b/pinecone/db_data/resources/sync/bulk_import_request_factory.py similarity index 100% rename from pinecone/db_data/features/bulk_import/bulk_import_request_factory.py rename to pinecone/db_data/resources/sync/bulk_import_request_factory.py diff --git a/pyproject.toml b/pyproject.toml index 7b987cbe..8691c2e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,6 +113,7 @@ build-backend = "poetry.core.masonry.api" [tool.pytest.ini_options] asyncio_mode = "strict" +addopts = "-s -vv --log-cli-level=DEBUG" [tool.ruff] exclude = [ diff --git a/tests/dependency/__init__.py b/tests/dependency/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/dependency/asyncio-rest/test_sanity.py b/tests/dependency/asyncio-rest/test_sanity.py index 6c879378..98c069aa 100644 --- a/tests/dependency/asyncio-rest/test_sanity.py +++ b/tests/dependency/asyncio-rest/test_sanity.py @@ -17,7 +17,7 @@ def index_name(): @pytest.mark.asyncio -class TestSanityRest: +class TestSanityAsyncioRest: async def test_sanity(self, index_name): async with PineconeAsyncio() as pc: logger.info("Testing with index name: " + index_name) @@ -31,39 +31,37 @@ async def test_sanity(self, index_name): assert description.dimension == 2 logger.info("Index description: %s", description) - idx = pc.IndexAsyncio(host=description.host) - resp = await idx.upsert( - vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])] - ) - logger.info("Upsert response: %s", resp) - - # Wait for index freshness - await asyncio.sleep(30) - - # Check the vector count reflects some data has been upserted - description = await idx.describe_index_stats() - logger.info("Index stats: %s", description) - assert description.dimension == 2 - assert description.total_vector_count >= 3 - - # Query for results - query_results = await idx.query(id="1", top_k=10, include_values=True) - logger.info("Query results: %s", query_results) - assert query_results.matches[0].id == "1" - assert len(query_results.matches) == 3 - - # Call a bulk import api method, should not raise an exception - async for i in idx.list_imports(): - assert i is not None - - # Call an inference method, should not raise an exception - from pinecone import EmbedModel - - resp = await pc.inference.embed( - model=EmbedModel.Multilingual_E5_Large, - inputs=["Hello, how are you?", "I am doing well, thank you for asking."], - parameters={"input_type": "passage", "truncate": "END"}, - ) - logger.info("Embed response: %s", resp) - - await idx.close() + async with pc.IndexAsyncio(host=description.host) as idx: + resp = await idx.upsert( + vectors=[("1", [1.0, 2.0]), ("2", [3.0, 4.0]), ("3", [5.0, 6.0])] + ) + logger.info("Upsert response: %s", resp) + + # Wait for index freshness + await asyncio.sleep(30) + + # Check the vector count reflects some data has been upserted + description = await idx.describe_index_stats() + logger.info("Index stats: %s", description) + assert description.dimension == 2 + assert description.total_vector_count >= 3 + + # Query for results + query_results = await idx.query(id="1", top_k=10, include_values=True) + logger.info("Query results: %s", query_results) + assert query_results.matches[0].id == "1" + assert len(query_results.matches) >= 3 + + # Call a bulk import api method, should not raise an exception + async for i in idx.list_imports(): + assert i is not None + + # Call an inference method, should not raise an exception + from pinecone import EmbedModel + + resp = await pc.inference.embed( + model=EmbedModel.Multilingual_E5_Large, + inputs=["Hello, how are you?", "I am doing well, thank you for asking."], + parameters={"input_type": "passage", "truncate": "END"}, + ) + logger.info("Embed response: %s", resp) diff --git a/tests/dependency/conftest.py b/tests/dependency/conftest.py new file mode 100644 index 00000000..76acad39 --- /dev/null +++ b/tests/dependency/conftest.py @@ -0,0 +1,3 @@ +import dotenv + +dotenv.load_dotenv() diff --git a/tests/dependency/rest/test_sanity.py b/tests/dependency/rest/test_sanity.py index f8ab56dd..2c835c51 100644 --- a/tests/dependency/rest/test_sanity.py +++ b/tests/dependency/rest/test_sanity.py @@ -48,7 +48,7 @@ def test_sanity(self, index_name, client): # Query for results query_results = idx.query(id="1", top_k=10, include_values=True) assert query_results.matches[0].id == "1" - assert len(query_results.matches) == 3 + assert len(query_results.matches) >= 3 # Call a bulk import api method, should not raise an exception for i in idx.list_imports(): @@ -63,3 +63,7 @@ def test_sanity(self, index_name, client): parameters={"input_type": "passage", "truncate": "END"}, ) logger.info("Embed response: %s", resp) + + # Call an assistant method, should not raise an exception + for i in client.assistant.list_assistants(): + assert i is not None From 374c8d67168dbd0bc720df964a613717c1e2428f Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 20 May 2025 10:04:55 -0400 Subject: [PATCH 4/6] Fix unit test --- tests/unit/data/test_bulk_import.py | 32 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tests/unit/data/test_bulk_import.py b/tests/unit/data/test_bulk_import.py index c7ad5a14..3561b092 100644 --- a/tests/unit/data/test_bulk_import.py +++ b/tests/unit/data/test_bulk_import.py @@ -6,7 +6,7 @@ ImportErrorMode as ImportErrorModeGeneratedClass, ) -from pinecone.db_data.features.bulk_import import ImportFeatureMixin, ImportErrorMode +from pinecone.db_data.resources.sync.bulk_import import BulkImportResource, ImportErrorMode def build_client_w_faked_response(mocker, body: str, status: int = 200): @@ -19,11 +19,11 @@ def build_client_w_faked_response(mocker, body: str, status: int = 200): mock_request = mocker.patch.object( api_client.rest_client.pool_manager, "request", return_value=response ) - return ImportFeatureMixin(api_client=api_client), mock_request + return BulkImportResource(api_client=api_client), mock_request class TestBulkImportStartImport: - def test_start_import_minimal(self, mocker): + def test_start_minimal(self, mocker): body = """ { "id": "1" @@ -31,7 +31,7 @@ def test_start_import_minimal(self, mocker): """ client, mock_req = build_client_w_faked_response(mocker, body) - my_import = client.start_import("s3://path/to/file.parquet") + my_import = client.start("s3://path/to/file.parquet") # We made some overrides to the print behavior, so we need to # call it to ensure it doesn't raise an exception @@ -42,7 +42,7 @@ def test_start_import_minimal(self, mocker): assert my_import.to_dict() == {"id": "1"} assert my_import.__class__ == StartImportResponse - def test_start_import_with_kwargs(self, mocker): + def test_start_with_kwargs(self, mocker): body = """ { "id": "1" @@ -50,9 +50,7 @@ def test_start_import_with_kwargs(self, mocker): """ client, mock_req = build_client_w_faked_response(mocker, body) - my_import = client.start_import( - uri="s3://path/to/file.parquet", integration_id="123-456-789" - ) + my_import = client.start(uri="s3://path/to/file.parquet", integration_id="123-456-789") assert my_import.id == "1" assert my_import["id"] == "1" assert my_import.to_dict() == {"id": "1"} @@ -68,7 +66,7 @@ def test_start_import_with_kwargs(self, mocker): @pytest.mark.parametrize( "error_mode_input", [ImportErrorMode.CONTINUE, "Continue", "continue", "cONTINUE"] ) - def test_start_import_with_explicit_error_mode(self, mocker, error_mode_input): + def test_start_with_explicit_error_mode(self, mocker, error_mode_input): body = """ { "id": "1" @@ -76,14 +74,14 @@ def test_start_import_with_explicit_error_mode(self, mocker, error_mode_input): """ client, mock_req = build_client_w_faked_response(mocker, body) - client.start_import(uri="s3://path/to/file.parquet", error_mode=error_mode_input) + client.start(uri="s3://path/to/file.parquet", error_mode=error_mode_input) _, call_kwargs = mock_req.call_args assert ( call_kwargs["body"] == '{"uri": "s3://path/to/file.parquet", "errorMode": {"onError": "continue"}}' ) - def test_start_import_with_abort_error_mode(self, mocker): + def test_start_with_abort_error_mode(self, mocker): body = """ { "id": "1" @@ -91,14 +89,14 @@ def test_start_import_with_abort_error_mode(self, mocker): """ client, mock_req = build_client_w_faked_response(mocker, body) - client.start_import(uri="s3://path/to/file.parquet", error_mode=ImportErrorMode.ABORT) + client.start(uri="s3://path/to/file.parquet", error_mode=ImportErrorMode.ABORT) _, call_kwargs = mock_req.call_args assert ( call_kwargs["body"] == '{"uri": "s3://path/to/file.parquet", "errorMode": {"onError": "abort"}}' ) - def test_start_import_with_unknown_error_mode(self, mocker): + def test_start_with_unknown_error_mode(self, mocker): body = """ { "id": "1" @@ -107,7 +105,7 @@ def test_start_import_with_unknown_error_mode(self, mocker): client, mock_req = build_client_w_faked_response(mocker, body) with pytest.raises(ValueError) as e: - client.start_import(uri="s3://path/to/file.parquet", error_mode="unknown") + client.start(uri="s3://path/to/file.parquet", error_mode="unknown") assert "Invalid error_mode: unknown" in str(e.value) @@ -122,7 +120,7 @@ def test_start_invalid_uri(self, mocker): client, mock_req = build_client_w_faked_response(mocker, body, 400) with pytest.raises(PineconeApiException) as e: - client.start_import(uri="invalid path") + client.start(uri="invalid path") assert e.value.status == 400 assert e.value.body == body @@ -134,7 +132,7 @@ def test_no_arguments(self, mocker): client, mock_req = build_client_w_faked_response(mocker, "") with pytest.raises(TypeError) as e: - client.start_import() + client.start() assert "missing 1 required positional argument" in str(e.value) @@ -162,7 +160,7 @@ def test_describe_import(self, mocker): """ client, mock_req = build_client_w_faked_response(mocker, body) - my_import = client.describe_import(id="1") + my_import = client.describe(id="1") # We made some overrides to the print behavior, so we need to # call it to ensure it doesn't raise an exception From 8b90e34b7a14d62b8f632dc5a441a2c90bda9042 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 20 May 2025 11:07:48 -0400 Subject: [PATCH 5/6] Reduce retry count --- .github/actions/test-data-asyncio/action.yaml | 2 +- .github/actions/test-data-plane/action.yaml | 2 +- .github/workflows/testing-integration-asyncio.yaml | 4 ++-- .github/workflows/testing-integration.yaml | 4 ++-- .github/workflows/testing-unit.yaml | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/actions/test-data-asyncio/action.yaml b/.github/actions/test-data-asyncio/action.yaml index e81c7c9a..11ebb605 100644 --- a/.github/actions/test-data-asyncio/action.yaml +++ b/.github/actions/test-data-asyncio/action.yaml @@ -42,7 +42,7 @@ runs: - name: Run data plane tests id: data-plane-asyncio-tests shell: bash - run: poetry run pytest tests/integration/data_asyncio --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest tests/integration/data_asyncio --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: ${{ inputs.PINECONE_API_KEY }} PINECONE_ADDITIONAL_HEADERS: ${{ inputs.PINECONE_ADDITIONAL_HEADERS }} diff --git a/.github/actions/test-data-plane/action.yaml b/.github/actions/test-data-plane/action.yaml index 0cbc3023..fd9f3ee3 100644 --- a/.github/actions/test-data-plane/action.yaml +++ b/.github/actions/test-data-plane/action.yaml @@ -56,7 +56,7 @@ runs: - name: Run data plane tests id: data-plane-tests shell: bash - run: poetry run pytest tests/integration/data --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest tests/integration/data --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: ${{ inputs.PINECONE_API_KEY }} PINECONE_ADDITIONAL_HEADERS: ${{ inputs.PINECONE_ADDITIONAL_HEADERS }} diff --git a/.github/workflows/testing-integration-asyncio.yaml b/.github/workflows/testing-integration-asyncio.yaml index b45f789f..6c3a4efa 100644 --- a/.github/workflows/testing-integration-asyncio.yaml +++ b/.github/workflows/testing-integration-asyncio.yaml @@ -26,7 +26,7 @@ jobs: - name: Run data plane tests id: data-plane-asyncio-tests shell: bash - run: poetry run pytest tests/integration/data_asyncio --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest tests/integration/data_asyncio --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' @@ -52,7 +52,7 @@ jobs: include_asyncio: true include_dev: true - name: 'db_control asyncio' - run: poetry run pytest tests/integration/control_asyncio/*.py --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest tests/integration/control_asyncio/*.py --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' diff --git a/.github/workflows/testing-integration.yaml b/.github/workflows/testing-integration.yaml index f71ef3a1..a57c6a80 100644 --- a/.github/workflows/testing-integration.yaml +++ b/.github/workflows/testing-integration.yaml @@ -51,7 +51,7 @@ jobs: with: include_asyncio: true - name: 'Run integration tests' - run: poetry run pytest tests/integration/inference --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest tests/integration/inference --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' @@ -169,7 +169,7 @@ jobs: - name: Setup Poetry uses: ./.github/actions/setup-poetry - name: 'Run integration tests (REST)' - run: poetry run pytest tests/integration/control/serverless --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest tests/integration/control/serverless --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' diff --git a/.github/workflows/testing-unit.yaml b/.github/workflows/testing-unit.yaml index f97dfacf..8024b3fe 100644 --- a/.github/workflows/testing-unit.yaml +++ b/.github/workflows/testing-unit.yaml @@ -58,7 +58,7 @@ jobs: include_asyncio: true - name: Run unit tests (REST) - run: poetry run pytest --cov=pinecone --timeout=120 tests/unit --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest --cov=pinecone --timeout=120 tests/unit --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG - name: Run unit tests (GRPC) if: ${{ matrix.use_grpc == true }} - run: poetry run pytest --cov=pinecone/grpc --timeout=120 tests/unit_grpc --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + run: poetry run pytest --cov=pinecone/grpc --timeout=120 tests/unit_grpc --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG From 706351ef6973148c2361afee408c56d0944cbbbd Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 20 May 2025 12:33:40 -0400 Subject: [PATCH 6/6] Run tests on prod --- .github/actions/cleanup-all/action.yml | 2 +- .../actions/create-index-legacy/action.yml | 2 +- .github/actions/create-index/action.yml | 2 +- .github/actions/delete-index/action.yml | 2 +- .github/actions/test-data-asyncio/action.yaml | 2 +- .github/actions/test-data-plane/action.yaml | 2 +- .../test-dependency-asyncio-rest/action.yaml | 2 +- .../actions/test-dependency-grpc/action.yaml | 2 +- .../actions/test-dependency-rest/action.yaml | 2 +- .../workflows/testing-dependency-asyncio.yaml | 2 +- .../workflows/testing-dependency-grpc.yaml | 6 +- .../workflows/testing-dependency-rest.yaml | 6 +- .github/workflows/testing-dependency.yaml | 4 +- .../testing-integration-asyncio.yaml | 4 +- .github/workflows/testing-integration.yaml | 10 ++-- tests/integration/data_asyncio/test_query.py | 55 ++++++++++++------- 16 files changed, 61 insertions(+), 44 deletions(-) diff --git a/.github/actions/cleanup-all/action.yml b/.github/actions/cleanup-all/action.yml index a62da2c8..09703be5 100644 --- a/.github/actions/cleanup-all/action.yml +++ b/.github/actions/cleanup-all/action.yml @@ -12,7 +12,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' runs: using: 'composite' diff --git a/.github/actions/create-index-legacy/action.yml b/.github/actions/create-index-legacy/action.yml index a22d5679..9bef1e57 100644 --- a/.github/actions/create-index-legacy/action.yml +++ b/.github/actions/create-index-legacy/action.yml @@ -26,7 +26,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' runs: using: 'composite' diff --git a/.github/actions/create-index/action.yml b/.github/actions/create-index/action.yml index 95cfc2be..7511db8e 100644 --- a/.github/actions/create-index/action.yml +++ b/.github/actions/create-index/action.yml @@ -28,7 +28,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' outputs: diff --git a/.github/actions/delete-index/action.yml b/.github/actions/delete-index/action.yml index 9e35c83a..be62cacc 100644 --- a/.github/actions/delete-index/action.yml +++ b/.github/actions/delete-index/action.yml @@ -11,7 +11,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' runs: diff --git a/.github/actions/test-data-asyncio/action.yaml b/.github/actions/test-data-asyncio/action.yaml index 11ebb605..032db268 100644 --- a/.github/actions/test-data-asyncio/action.yaml +++ b/.github/actions/test-data-asyncio/action.yaml @@ -18,7 +18,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' python_version: description: 'The version of Python to use' required: false diff --git a/.github/actions/test-data-plane/action.yaml b/.github/actions/test-data-plane/action.yaml index fd9f3ee3..0f21e2b7 100644 --- a/.github/actions/test-data-plane/action.yaml +++ b/.github/actions/test-data-plane/action.yaml @@ -21,7 +21,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' python_version: description: 'The version of Python to use' required: false diff --git a/.github/actions/test-dependency-asyncio-rest/action.yaml b/.github/actions/test-dependency-asyncio-rest/action.yaml index 1efe4a8a..45240a7b 100644 --- a/.github/actions/test-dependency-asyncio-rest/action.yaml +++ b/.github/actions/test-dependency-asyncio-rest/action.yaml @@ -8,7 +8,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' index_name: description: 'The name of the index' required: true diff --git a/.github/actions/test-dependency-grpc/action.yaml b/.github/actions/test-dependency-grpc/action.yaml index 4ba6d9ac..04083780 100644 --- a/.github/actions/test-dependency-grpc/action.yaml +++ b/.github/actions/test-dependency-grpc/action.yaml @@ -8,7 +8,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' index_name: description: 'The name of the index' required: true diff --git a/.github/actions/test-dependency-rest/action.yaml b/.github/actions/test-dependency-rest/action.yaml index a3487cf3..8c319349 100644 --- a/.github/actions/test-dependency-rest/action.yaml +++ b/.github/actions/test-dependency-rest/action.yaml @@ -8,7 +8,7 @@ inputs: PINECONE_ADDITIONAL_HEADERS: description: 'Additional headers to send with the request' required: false - default: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + default: '{"sdk-test-suite": "pinecone-python-client"}' index_name: description: 'The name of the index' required: true diff --git a/.github/workflows/testing-dependency-asyncio.yaml b/.github/workflows/testing-dependency-asyncio.yaml index c1b9e757..c3c309af 100644 --- a/.github/workflows/testing-dependency-asyncio.yaml +++ b/.github/workflows/testing-dependency-asyncio.yaml @@ -27,5 +27,5 @@ jobs: python_version: '${{ matrix.python_version }}' index_name: '${{ inputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' aiohttp_version: '${{ matrix.aiohttp_version }}' diff --git a/.github/workflows/testing-dependency-grpc.yaml b/.github/workflows/testing-dependency-grpc.yaml index 80be0065..e8fb64d8 100644 --- a/.github/workflows/testing-dependency-grpc.yaml +++ b/.github/workflows/testing-dependency-grpc.yaml @@ -53,7 +53,7 @@ jobs: python_version: '${{ matrix.python_version }}' index_name: '${{ inputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' grpcio_version: '${{ matrix.grpcio_version }}' lz4_version: '${{ matrix.lz4_version }}' protobuf_version: '${{ matrix.protobuf_version }}' @@ -87,7 +87,7 @@ jobs: python_version: '${{ matrix.python_version }}' index_name: '${{ inputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' grpcio_version: '${{ matrix.grpcio_version }}' lz4_version: '${{ matrix.lz4_version }}' protobuf_version: '${{ matrix.protobuf_version }}' @@ -120,7 +120,7 @@ jobs: python_version: '${{ matrix.python_version }}' index_name: '${{ inputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' grpcio_version: '${{ matrix.grpcio_version }}' lz4_version: '${{ matrix.lz4_version }}' protobuf_version: '${{ matrix.protobuf_version }}' diff --git a/.github/workflows/testing-dependency-rest.yaml b/.github/workflows/testing-dependency-rest.yaml index 403b6ee8..39a587c7 100644 --- a/.github/workflows/testing-dependency-rest.yaml +++ b/.github/workflows/testing-dependency-rest.yaml @@ -30,7 +30,7 @@ jobs: python_version: '${{ matrix.python_version }}' index_name: '${{ inputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' urllib3_version: '${{ matrix.urllib3_version }}' @@ -54,7 +54,7 @@ jobs: python_version: '${{ matrix.python_version }}' index_name: '${{ inputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' urllib3_version: '${{ matrix.urllib3_version }}' dependency-matrix-rest-313: @@ -77,5 +77,5 @@ jobs: python_version: '${{ matrix.python_version }}' index_name: '${{ inputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' urllib3_version: '${{ matrix.urllib3_version }}' diff --git a/.github/workflows/testing-dependency.yaml b/.github/workflows/testing-dependency.yaml index 21d613b4..5e660f07 100644 --- a/.github/workflows/testing-dependency.yaml +++ b/.github/workflows/testing-dependency.yaml @@ -19,7 +19,7 @@ jobs: name_prefix: depstest-${{ github.run_number }} dimension: 2 PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' dependency-test-rest: uses: './.github/workflows/testing-dependency-rest.yaml' @@ -56,4 +56,4 @@ jobs: with: index_name: '${{ needs.deps-test-setup.outputs.index_name }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' diff --git a/.github/workflows/testing-integration-asyncio.yaml b/.github/workflows/testing-integration-asyncio.yaml index 6c3a4efa..99f28ab4 100644 --- a/.github/workflows/testing-integration-asyncio.yaml +++ b/.github/workflows/testing-integration-asyncio.yaml @@ -29,7 +29,7 @@ jobs: run: poetry run pytest tests/integration/data_asyncio --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' db-control-asyncio: name: db_control asyncio @@ -55,4 +55,4 @@ jobs: run: poetry run pytest tests/integration/control_asyncio/*.py --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' diff --git a/.github/workflows/testing-integration.yaml b/.github/workflows/testing-integration.yaml index a57c6a80..357ee564 100644 --- a/.github/workflows/testing-integration.yaml +++ b/.github/workflows/testing-integration.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' strategy: fail-fast: false matrix: @@ -54,7 +54,7 @@ jobs: run: poetry run pytest tests/integration/inference --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' plugins: name: Plugin installation @@ -76,7 +76,7 @@ jobs: run: poetry run pytest tests/integration/plugins --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' @@ -107,7 +107,7 @@ jobs: metric: 'cosine' spec: '{ "serverless": { "region": "us-west-2", "cloud": "aws" }}' PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' freshness_timeout_seconds: 600 skip_weird_id_tests: 'true' @@ -172,6 +172,6 @@ jobs: run: poetry run pytest tests/integration/control/serverless --retries 2 --retry-delay 35 -s -vv --log-cli-level=DEBUG env: PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' - PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client", "x-environment": "preprod-aws-0"}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' SERVERLESS_CLOUD: '${{ matrix.testConfig.serverless.cloud }}' SERVERLESS_REGION: '${{ matrix.testConfig.serverless.region }}' diff --git a/tests/integration/data_asyncio/test_query.py b/tests/integration/data_asyncio/test_query.py index 6f3193fc..02f49bf0 100644 --- a/tests/integration/data_asyncio/test_query.py +++ b/tests/integration/data_asyncio/test_query.py @@ -4,50 +4,57 @@ from .conftest import build_asyncioindex_client, poll_for_freshness from ..helpers import random_string, embedding_values +import logging + +logger = logging.getLogger(__name__) + @pytest.mark.asyncio @pytest.mark.parametrize("target_namespace", [random_string(20)]) async def test_query(index_host, dimension, target_namespace): asyncio_idx = build_asyncioindex_client(index_host) + logger.info(f"Testing query on index {index_host}") + logger.info(f"Target namespace: {target_namespace}") + logger.info(f"Dimension: {dimension}") def emb(): return embedding_values(dimension) # Upsert with tuples - await asyncio_idx.upsert( - vectors=[("1", emb()), ("2", emb()), ("3", emb())], namespace=target_namespace - ) + tuple_vectors = [("1", emb()), ("2", emb()), ("3", emb())] + logger.info(f"Upserting {len(tuple_vectors)} vectors") + await asyncio_idx.upsert(vectors=tuple_vectors, namespace=target_namespace) # Upsert with objects - await asyncio_idx.upsert( - vectors=[ - Vector(id="4", values=emb(), metadata={"genre": "action"}), - Vector(id="5", values=emb(), metadata={"genre": "action"}), - Vector(id="6", values=emb(), metadata={"genre": "horror"}), - ], - namespace=target_namespace, - ) + object_vectors = [ + Vector(id="4", values=emb(), metadata={"genre": "action"}), + Vector(id="5", values=emb(), metadata={"genre": "action"}), + Vector(id="6", values=emb(), metadata={"genre": "horror"}), + ] + logger.info(f"Upserting {len(object_vectors)} vectors") + await asyncio_idx.upsert(vectors=object_vectors, namespace=target_namespace) # Upsert with dict - await asyncio_idx.upsert( - vectors=[ - {"id": "7", "values": emb()}, - {"id": "8", "values": emb()}, - {"id": "9", "values": emb()}, - ], - namespace=target_namespace, - ) + dict_vectors = [ + {"id": "7", "values": emb()}, + {"id": "8", "values": emb()}, + {"id": "9", "values": emb()}, + ] + logger.info(f"Upserting {len(dict_vectors)} vectors") + await asyncio_idx.upsert(vectors=dict_vectors, namespace=target_namespace) await poll_for_freshness(asyncio_idx, target_namespace, 9) # Check the vector count reflects some data has been upserted stats = await asyncio_idx.describe_index_stats() + logger.info(f"Index stats: {stats}") assert stats.total_vector_count >= 9 # default namespace could have other stuff from other tests if target_namespace != "": assert stats.namespaces[target_namespace].vector_count == 9 results1 = await asyncio_idx.query(top_k=4, namespace=target_namespace, vector=emb()) + logger.info(f"Results 1: {results1}") assert results1 is not None assert len(results1.matches) == 4 assert results1.namespace == target_namespace @@ -67,6 +74,7 @@ def emb(): results2 = await asyncio_idx.query( top_k=4, namespace=target_namespace, vector=emb(), include_values=True ) + logger.info(f"Results 2: {results2}") assert results2 is not None assert len(results2.matches) == 4 assert results2.namespace == target_namespace @@ -81,6 +89,7 @@ def emb(): include_metadata=True, include_values=True, ) + logger.info(f"Results 3: {results3}") assert results3 is not None assert len(results3.matches) == 2 assert results3.namespace == target_namespace @@ -97,6 +106,7 @@ def emb(): include_metadata=True, include_values=True, ) + logger.info(f"Results 4: {results4}") assert results4 is not None assert len(results4.matches) == 1 assert results4.namespace == target_namespace @@ -113,6 +123,7 @@ def emb(): include_metadata=True, include_values=True, ) + logger.info(f"Results 5: {results5}") assert results5 is not None assert len(results5.matches) == 0 assert results5.namespace == target_namespace @@ -122,27 +133,32 @@ def emb(): # Query by id results6 = await asyncio_idx.query(top_k=4, id="1", namespace=target_namespace) + logger.info(f"Results 6: {results6}") assert results6 is not None assert len(results6.matches) == 4 # Query by id, when id doesn't exist gives empty result set results7 = await asyncio_idx.query(top_k=10, id="unknown", namespace=target_namespace) + logger.info(f"Results 7: {results7}") assert results7 is not None assert len(results7.matches) == 0 # When missing required top_k kwarg with pytest.raises(TypeError) as e: await asyncio_idx.query(id="1", namespace=target_namespace) + logger.info(f"Error Msg 1: {e.value}") assert "top_k" in str(e.value) # When incorrectly passing top_k as a positional argument with pytest.raises(TypeError) as e: await asyncio_idx.query(4, id="1", namespace=target_namespace) + logger.info(f"Error Msg 2: {e.value}") assert "top_k" in str(e.value) # When trying to pass both id and vector as query params with pytest.raises(ValueError) as e: await asyncio_idx.query(top_k=10, id="1", vector=emb(), namespace=target_namespace) + logger.info(f"Error Msg 3: {e.value}") assert "Cannot specify both `id` and `vector`" in str(e.value) # When trying to pass sparse vector as query params to dense index @@ -152,5 +168,6 @@ def emb(): sparse_vector={"indices": [i for i in range(dimension)], "values": emb()}, namespace=target_namespace, ) + logger.info(f"Error Msg 4: {e.value}") assert "Cannot query index with dense 'vector_type' with only sparse vector" in str(e.value) await asyncio_idx.close()