Skip to content

Commit 6176153

Browse files
fatmelonYang Qiao (from Dev Box)marlenezw
authored
Add DiskANN for Semantic Cache (#42)
Co-authored-by: Yang Qiao (from Dev Box) <[email protected]> Co-authored-by: Marlene <[email protected]>
1 parent 9c3735c commit 6176153

File tree

3 files changed

+223
-4
lines changed

3 files changed

+223
-4
lines changed

libs/azure-ai/langchain_azure_ai/vectorstores/azure_cosmos_db_mongo_vcore.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,9 +476,9 @@ def _similarity_search_with_score(
476476
kind: Type of vector index to create.
477477
Possible options are:
478478
- vector-ivf
479-
- vector-hnsw: available as a preview feature only,
480-
to enable visit https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/preview-features
479+
- vector-hnsw
481480
- vector-diskann: available as a preview feature only
481+
to enable visit https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/preview-features
482482
pre_filter: Pre-filtering function
483483
ef_search: The size of the dynamic candidate list for search
484484
(40 by default). A higher value provides better

libs/azure-ai/langchain_azure_ai/vectorstores/cache.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ def __init__(
152152
dimensions: int = 1536,
153153
m: int = 16,
154154
ef_construction: int = 64,
155+
max_degree: int = 32,
156+
l_build: int = 50,
157+
l_search: int = 40,
155158
ef_search: int = 40,
156159
score_threshold: Optional[float] = None,
157160
application_name: str = "LangChainAzure-CDBMongoVCore-SemanticCache-Python",
@@ -182,7 +185,8 @@ def __init__(
182185
kind: Type of vector index to create.
183186
Possible options are:
184187
- vector-ivf
185-
- vector-hnsw: available as a preview feature only,
188+
- vector-hnsw
189+
- vector-diskann: available as a preview feature only,
186190
to enable visit https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/preview-features
187191
m: The max number of connections per layer (16 by default, minimum
188192
value is 2, maximum value is 100). Higher m is suitable for datasets
@@ -196,6 +200,15 @@ def __init__(
196200
ef_search: The size of the dynamic candidate list for search
197201
(40 by default). A higher value provides better
198202
recall at the cost of speed.
203+
max_degree: Max number of neighbors.
204+
Default value is 32, range from 20 to 2048.
205+
Only vector-diskann search supports this for now.
206+
l_build: l value for index building.
207+
Default value is 50, range from 10 to 500.
208+
Only vector-diskann search supports this for now.
209+
l_search: l value for index searching.
210+
Default value is 40, range from 10 to 10000.
211+
Only vector-diskann search supports this.
199212
score_threshold: Maximum score used to filter the vector search documents.
200213
application_name: Application name for the client for tracking and logging
201214
"""
@@ -216,6 +229,9 @@ def __init__(
216229
self.kind = kind
217230
self.m = m
218231
self.ef_construction = ef_construction
232+
self.max_degree = max_degree
233+
self.l_build = l_build
234+
self.l_search = l_search
219235
self.ef_search = ef_search
220236
self.score_threshold = score_threshold
221237
self._cache_dict: Dict[str, AzureCosmosDBMongoVCoreVectorSearch] = {}
@@ -263,6 +279,8 @@ def _get_llm_cache(self, llm_string: str) -> AzureCosmosDBMongoVCoreVectorSearch
263279
self.kind,
264280
self.m,
265281
self.ef_construction,
282+
self.max_degree,
283+
self.l_build,
266284
)
267285

268286
return vectorstore
@@ -277,6 +295,7 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
277295
k=1,
278296
kind=self.kind,
279297
ef_search=self.ef_search,
298+
l_search=self.l_search,
280299
score_threshold=self.score_threshold, # type: ignore[arg-type]
281300
)
282301
if results:

libs/azure-ai/tests/integration_tests/cache/test_azure_cosmos_db_mongo_vcore_cache.py

Lines changed: 201 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,15 @@
3030

3131
model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")
3232
num_lists = 3
33-
dimensions = 10
33+
dimensions = 1536
3434
similarity_algorithm = CosmosDBSimilarityType.COS
3535
kind = CosmosDBVectorSearchType.VECTOR_IVF
3636
m = 16
3737
ef_construction = 64
3838
ef_search = 40
39+
max_degree = 32
40+
l_build = 50
41+
l_search = 40
3942
score_threshold = 0.1
4043
application_name = "LANGCHAIN_CACHING_PYTHON"
4144

@@ -54,6 +57,7 @@ def azure_openai_embeddings() -> Any:
5457
model=model_name,
5558
chunk_size=1,
5659
)
60+
5761
return openai_embeddings
5862

5963

@@ -76,6 +80,9 @@ def test_azure_cosmos_db_semantic_cache(
7680
dimensions=dimensions,
7781
m=m,
7882
ef_construction=ef_construction,
83+
max_degree=max_degree,
84+
l_build=l_build,
85+
l_search=l_search,
7986
ef_search=ef_search,
8087
score_threshold=score_threshold,
8188
application_name=application_name,
@@ -115,6 +122,9 @@ def test_azure_cosmos_db_semantic_cache_inner_product(
115122
dimensions=dimensions,
116123
m=m,
117124
ef_construction=ef_construction,
125+
max_degree=max_degree,
126+
l_build=l_build,
127+
l_search=l_search,
118128
ef_search=ef_search,
119129
score_threshold=score_threshold,
120130
application_name=application_name,
@@ -154,6 +164,9 @@ def test_azure_cosmos_db_semantic_cache_multi(
154164
dimensions=dimensions,
155165
m=m,
156166
ef_construction=ef_construction,
167+
max_degree=max_degree,
168+
l_build=l_build,
169+
l_search=l_search,
157170
ef_search=ef_search,
158171
score_threshold=score_threshold,
159172
application_name=application_name,
@@ -195,6 +208,9 @@ def test_azure_cosmos_db_semantic_cache_multi_inner_product(
195208
dimensions=dimensions,
196209
m=m,
197210
ef_construction=ef_construction,
211+
max_degree=max_degree,
212+
l_build=l_build,
213+
l_search=l_search,
198214
ef_search=ef_search,
199215
score_threshold=score_threshold,
200216
application_name=application_name,
@@ -236,6 +252,9 @@ def test_azure_cosmos_db_semantic_cache_hnsw(
236252
dimensions=dimensions,
237253
m=m,
238254
ef_construction=ef_construction,
255+
max_degree=max_degree,
256+
l_build=l_build,
257+
l_search=l_search,
239258
ef_search=ef_search,
240259
score_threshold=score_threshold,
241260
application_name=application_name,
@@ -275,6 +294,9 @@ def test_azure_cosmos_db_semantic_cache_inner_product_hnsw(
275294
dimensions=dimensions,
276295
m=m,
277296
ef_construction=ef_construction,
297+
max_degree=max_degree,
298+
l_build=l_build,
299+
l_search=l_search,
278300
ef_search=ef_search,
279301
score_threshold=score_threshold,
280302
application_name=application_name,
@@ -314,6 +336,9 @@ def test_azure_cosmos_db_semantic_cache_multi_hnsw(
314336
dimensions=dimensions,
315337
m=m,
316338
ef_construction=ef_construction,
339+
max_degree=max_degree,
340+
l_build=l_build,
341+
l_search=l_search,
317342
ef_search=ef_search,
318343
score_threshold=score_threshold,
319344
application_name=application_name,
@@ -355,6 +380,181 @@ def test_azure_cosmos_db_semantic_cache_multi_inner_product_hnsw(
355380
dimensions=dimensions,
356381
m=m,
357382
ef_construction=ef_construction,
383+
max_degree=max_degree,
384+
l_build=l_build,
385+
l_search=l_search,
386+
ef_search=ef_search,
387+
score_threshold=score_threshold,
388+
application_name=application_name,
389+
)
390+
)
391+
392+
llm = AzureAIChatCompletionsModel()
393+
params = llm.dict()
394+
params["stop"] = None
395+
llm_string = str(sorted([(k, v) for k, v in params.items()]))
396+
get_llm_cache().update(
397+
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
398+
)
399+
400+
# foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
401+
cache_output = get_llm_cache().lookup("bar", llm_string)
402+
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
403+
404+
# clear the cache
405+
get_llm_cache().clear(llm_string=llm_string)
406+
407+
408+
@pytest.mark.requires("pymongo")
409+
@pytest.mark.skipif(
410+
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
411+
)
412+
def test_azure_cosmos_db_semantic_cache_diskann(
413+
azure_openai_embeddings: OpenAIEmbeddings,
414+
) -> None:
415+
set_llm_cache(
416+
AzureCosmosDBMongoVCoreSemanticCache(
417+
cosmosdb_connection_string=CONNECTION_STRING,
418+
embedding=azure_openai_embeddings,
419+
database_name=DB_NAME,
420+
collection_name=COLLECTION_NAME,
421+
num_lists=num_lists,
422+
similarity=similarity_algorithm,
423+
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
424+
dimensions=dimensions,
425+
m=m,
426+
ef_construction=ef_construction,
427+
max_degree=max_degree,
428+
l_build=l_build,
429+
l_search=l_search,
430+
ef_search=ef_search,
431+
score_threshold=score_threshold,
432+
application_name=application_name,
433+
)
434+
)
435+
436+
llm = AzureAIChatCompletionsModel()
437+
params = llm.dict()
438+
params["stop"] = None
439+
llm_string = str(sorted([(k, v) for k, v in params.items()]))
440+
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
441+
442+
# foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
443+
cache_output = get_llm_cache().lookup("bar", llm_string)
444+
assert cache_output == [Generation(text="fizz")]
445+
446+
# clear the cache
447+
get_llm_cache().clear(llm_string=llm_string)
448+
449+
450+
@pytest.mark.requires("pymongo")
451+
@pytest.mark.skipif(
452+
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
453+
)
454+
def test_azure_cosmos_db_semantic_cache_inner_product_diskann(
455+
azure_openai_embeddings: OpenAIEmbeddings,
456+
) -> None:
457+
set_llm_cache(
458+
AzureCosmosDBMongoVCoreSemanticCache(
459+
cosmosdb_connection_string=CONNECTION_STRING,
460+
embedding=azure_openai_embeddings,
461+
database_name=DB_NAME,
462+
collection_name=COLLECTION_NAME,
463+
num_lists=num_lists,
464+
similarity=CosmosDBSimilarityType.IP,
465+
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
466+
dimensions=dimensions,
467+
m=m,
468+
ef_construction=ef_construction,
469+
max_degree=max_degree,
470+
l_build=l_build,
471+
l_search=l_search,
472+
ef_search=ef_search,
473+
score_threshold=score_threshold,
474+
application_name=application_name,
475+
)
476+
)
477+
478+
llm = AzureAIChatCompletionsModel()
479+
params = llm.dict()
480+
params["stop"] = None
481+
llm_string = str(sorted([(k, v) for k, v in params.items()]))
482+
get_llm_cache().update("foo", llm_string, [Generation(text="fizz")])
483+
484+
# foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
485+
cache_output = get_llm_cache().lookup("bar", llm_string)
486+
assert cache_output == [Generation(text="fizz")]
487+
488+
# clear the cache
489+
get_llm_cache().clear(llm_string=llm_string)
490+
491+
492+
@pytest.mark.requires("pymongo")
493+
@pytest.mark.skipif(
494+
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
495+
)
496+
def test_azure_cosmos_db_semantic_cache_multi_diskann(
497+
azure_openai_embeddings: OpenAIEmbeddings,
498+
) -> None:
499+
set_llm_cache(
500+
AzureCosmosDBMongoVCoreSemanticCache(
501+
cosmosdb_connection_string=CONNECTION_STRING,
502+
embedding=azure_openai_embeddings,
503+
database_name=DB_NAME,
504+
collection_name=COLLECTION_NAME,
505+
num_lists=num_lists,
506+
similarity=similarity_algorithm,
507+
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
508+
dimensions=dimensions,
509+
m=m,
510+
ef_construction=ef_construction,
511+
max_degree=max_degree,
512+
l_build=l_build,
513+
l_search=l_search,
514+
ef_search=ef_search,
515+
score_threshold=score_threshold,
516+
application_name=application_name,
517+
)
518+
)
519+
520+
llm = AzureAIChatCompletionsModel()
521+
params = llm.dict()
522+
params["stop"] = None
523+
llm_string = str(sorted([(k, v) for k, v in params.items()]))
524+
get_llm_cache().update(
525+
"foo", llm_string, [Generation(text="fizz"), Generation(text="Buzz")]
526+
)
527+
528+
# foo and bar will have the same embedding produced by AzureAIEmbeddingsModel
529+
cache_output = get_llm_cache().lookup("bar", llm_string)
530+
assert cache_output == [Generation(text="fizz"), Generation(text="Buzz")]
531+
532+
# clear the cache
533+
get_llm_cache().clear(llm_string=llm_string)
534+
535+
536+
@pytest.mark.requires("pymongo")
537+
@pytest.mark.skipif(
538+
not _has_env_vars(), reason="Missing Azure CosmosDB Mongo vCore env. vars"
539+
)
540+
def test_azure_cosmos_db_semantic_cache_multi_inner_product_diskann(
541+
azure_openai_embeddings: OpenAIEmbeddings,
542+
) -> None:
543+
set_llm_cache(
544+
AzureCosmosDBMongoVCoreSemanticCache(
545+
cosmosdb_connection_string=CONNECTION_STRING,
546+
embedding=azure_openai_embeddings,
547+
database_name=DB_NAME,
548+
collection_name=COLLECTION_NAME,
549+
num_lists=num_lists,
550+
similarity=CosmosDBSimilarityType.IP,
551+
kind=CosmosDBVectorSearchType.VECTOR_DISKANN,
552+
dimensions=dimensions,
553+
m=m,
554+
ef_construction=ef_construction,
555+
max_degree=max_degree,
556+
l_build=l_build,
557+
l_search=l_search,
358558
ef_search=ef_search,
359559
score_threshold=score_threshold,
360560
application_name=application_name,

0 commit comments

Comments
 (0)