Add DiskANN for Semantic Cache (#42)

fatmelon · Yang Qiao (from Dev Box) · marlenezw · web-flow · commit 6176153e788a · 2025-02-13T14:14:43.000Z
Co-authored-by: Yang Qiao (from Dev Box) &lt;yangqiao@microsoft.com&gt;
Co-authored-by: Marlene &lt;57748216+marlenezw@users.noreply.github.com&gt;
diff --git a/libs/azure-ai/langchain_azure_ai/vectorstores/azure_cosmos_db_mongo_vcore.py b/libs/azure-ai/langchain_azure_ai/vectorstores/azure_cosmos_db_mongo_vcore.py
@@ -476,9 +476,9 @@ def _similarity_search_with_score(
             kind: Type of vector index to create.
                 Possible options are:
                     - vector-ivf
-                    - vector-hnsw: available as a preview feature only,
-                                   to enable visit https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/preview-features
+                    - vector-hnsw
                     - vector-diskann: available as a preview feature only
+                                      to enable visit https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/preview-features
             pre_filter: Pre-filtering function
             ef_search: The size of the dynamic candidate list for search
                        (40 by default). A higher value provides better
diff --git a/libs/azure-ai/langchain_azure_ai/vectorstores/cache.py b/libs/azure-ai/langchain_azure_ai/vectorstores/cache.py
@@ -152,6 +152,9 @@ def __init__(
         dimensions: int = 1536,
         m: int = 16,
         ef_construction: int = 64,
+        max_degree: int = 32,
+        l_build: int = 50,
+        l_search: int = 40,
         ef_search: int = 40,
         score_threshold: Optional[float] = None,
         application_name: str = "LangChainAzure-CDBMongoVCore-SemanticCache-Python",
@@ -182,7 +185,8 @@ def __init__(
             kind: Type of vector index to create.
                 Possible options are:
                     - vector-ivf
-                    - vector-hnsw: available as a preview feature only,
+                    - vector-hnsw
+                    - vector-diskann: available as a preview feature only,
                                    to enable visit https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/preview-features
             m: The max number of connections per layer (16 by default, minimum
                value is 2, maximum value is 100). Higher m is suitable for datasets
@@ -196,6 +200,15 @@ def __init__(
             ef_search: The size of the dynamic candidate list for search
                        (40 by default). A higher value provides better
                        recall at the cost of speed.
+            max_degree: Max number of neighbors.
+                Default value is 32, range from 20 to 2048.
+                Only vector-diskann search supports this for now.
+            l_build: l value for index building.
+                Default value is 50, range from 10 to 500.
+                Only vector-diskann search supports this for now.
+            l_search: l value for index searching.
+                Default value is 40, range from 10 to 10000.
+                Only vector-diskann search supports this.
             score_threshold: Maximum score used to filter the vector search documents.
             application_name: Application name for the client for tracking and logging
         """
@@ -216,6 +229,9 @@ def __init__(
         self.kind = kind
         self.m = m
         self.ef_construction = ef_construction
+        self.max_degree = max_degree
+        self.l_build = l_build
+        self.l_search = l_search
         self.ef_search = ef_search
         self.score_threshold = score_threshold
         self._cache_dict: Dict[str, AzureCosmosDBMongoVCoreVectorSearch] = {}
@@ -263,6 +279,8 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBMongoVCoreVectorSearch
                 self.kind,
                 self.m,
                 self.ef_construction,
+                self.max_degree,
+                self.l_build,
             )
 
         return vectorstore
@@ -277,6 +295,7 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
             k=1,
             kind=self.kind,
             ef_search=self.ef_search,
+            l_search=self.l_search,
             score_threshold=self.score_threshold,  # type: ignore[arg-type]
         )
         if results:
diff --git a/libs/azure-ai/tests/integration_tests/cache/test_azure_cosmos_db_mongo_vcore_cache.py b/libs/azure-ai/tests/integration_tests/cache/test_azure_cosmos_db_mongo_vcore_cache.py
@@ -30,12 +30,15 @@
 
 model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")
 num_lists = 3
-dimensions = 10
+dimensions = 1536
 similarity_algorithm = CosmosDBSimilarityType.COS
 kind = CosmosDBVectorSearchType.VECTOR_IVF
 m = 16
 ef_construction = 64
 ef_search = 40
+max_degree = 32
+l_build = 50
+l_search = 40
 score_threshold = 0.1
 application_name = "LANGCHAIN_CACHING_PYTHON"
 
@@ -54,6 +57,7 @@ def azure_openai_embeddings() -> Any:
         model=model_name,
         chunk_size=1,
     )
+
     return openai_embeddings
 
 
@@ -76,6 +80,9 @@ def test_azure_cosmos_db_semantic_cache(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,
@@ -115,6 +122,9 @@ def test_azure_cosmos_db_semantic_cache_inner_product(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,
@@ -154,6 +164,9 @@ def test_azure_cosmos_db_semantic_cache_multi(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,
@@ -195,6 +208,9 @@ def test_azure_cosmos_db_semantic_cache_multi_inner_product(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,
@@ -236,6 +252,9 @@ def test_azure_cosmos_db_semantic_cache_hnsw(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,
@@ -275,6 +294,9 @@ def test_azure_cosmos_db_semantic_cache_inner_product_hnsw(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,
@@ -314,6 +336,9 @@ def test_azure_cosmos_db_semantic_cache_multi_hnsw(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,
@@ -355,6 +380,181 @@ def test_azure_cosmos_db_semantic_cache_multi_inner_product_hnsw(
             dimensions=dimensions,
             m=m,
             ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+            application_name=application_name,
+        )
+    )
+
+    llm = AzureAIChatCompletionsModel()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_diskann(
+    azure_openai_embeddings: OpenAIEmbeddings,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBMongoVCoreSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            embedding=azure_openai_embeddings,
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=similarity_algorithm,
+            kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+            application_name=application_name,
+        )
+    )
+
+    llm = AzureAIChatCompletionsModel()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_inner_product_diskann(
+    azure_openai_embeddings: OpenAIEmbeddings,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBMongoVCoreSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            embedding=azure_openai_embeddings,
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=CosmosDBSimilarityType.IP,
+            kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+            application_name=application_name,
+        )
+    )
+
+    llm = AzureAIChatCompletionsModel()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
+
+    # foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_multi_diskann(
+    azure_openai_embeddings: OpenAIEmbeddings,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBMongoVCoreSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            embedding=azure_openai_embeddings,
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=similarity_algorithm,
+            kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
+            ef_search=ef_search,
+            score_threshold=score_threshold,
+            application_name=application_name,
+        )
+    )
+
+    llm = AzureAIChatCompletionsModel()
+    params = llm.dict()
+    params["stop"] = None
+    llm_string = str(sorted([(k, v) for k, v in params.items()]))
+    get_llm_cache().update(
+        "foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
+    )
+
+    # foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
+    cache_output = get_llm_cache().lookup("bar", llm_string)
+    assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
+
+    # clear the cache
+    get_llm_cache().clear(llm_string=llm_string)
+
+
+@pytest.mark.requires("pymongo")
+@pytest.mark.skipif(
+    not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
+)
+def test_azure_cosmos_db_semantic_cache_multi_inner_product_diskann(
+    azure_openai_embeddings: OpenAIEmbeddings,
+) -> None:
+    set_llm_cache(
+        AzureCosmosDBMongoVCoreSemanticCache(
+            cosmosdb_connection_string=CONNECTION_STRING,
+            embedding=azure_openai_embeddings,
+            database_name=DB_NAME,
+            collection_name=COLLECTION_NAME,
+            num_lists=num_lists,
+            similarity=CosmosDBSimilarityType.IP,
+            kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
+            dimensions=dimensions,
+            m=m,
+            ef_construction=ef_construction,
+            max_degree=max_degree,
+            l_build=l_build,
+            l_search=l_search,
             ef_search=ef_search,
             score_threshold=score_threshold,
             application_name=application_name,