Skip to content
Merged
6 changes: 2 additions & 4 deletions libs/community/langchain_community/utils/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:

X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)
Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float):
return np.array([Z])
return np.array(Z)
Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
return Z
except ImportError:
logger.debug(
"Unable to import simsimd, defaulting to NumPy implementation. If you want "
Expand Down
165 changes: 154 additions & 11 deletions libs/community/poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions libs/community/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ httpx-sse = {version = "^0.4.0", optional = true}
pyjwt = {version = "^2.8.0", optional = true}
oracledb = {version = "^2.2.0", optional = true}
cloudpathlib = { version = "^0.18", optional = true }
simsimd = {version = "^4.3.1", optional = true}


[tool.poetry.group.test]
Expand Down Expand Up @@ -289,6 +290,7 @@ extended_testing = [
"httpx-sse",
"pyjwt",
"oracledb",
"simsimd",
]

[tool.ruff]
Expand Down
19 changes: 18 additions & 1 deletion libs/community/tests/unit_tests/utils/test_math.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Test math utility functions."""
import importlib
from typing import List

import numpy as np
Expand Down Expand Up @@ -68,11 +69,27 @@ def test_cosine_similarity_score_threshold(
assert np.allclose(expected_scores, actual_scores)


def test_cosine_similarity_top_k_and_score_threshold(
def invoke_cosine_similarity_top_k_score_threshold(
X: List[List[float]], Y: List[List[float]]
) -> None:
expected_idxs = [(0, 0), (2, 2), (1, 2), (0, 2)]
expected_scores = [1.0, 0.93419873, 0.87038828, 0.83743579]
actual_idxs, actual_scores = cosine_similarity_top_k(X, Y, score_threshold=0.8)
assert actual_idxs == expected_idxs
assert np.allclose(expected_scores, actual_scores)


def test_cosine_similarity_top_k_and_score_threshold(
X: List[List[float]], Y: List[List[float]]
) -> None:
if importlib.util.find_spec("simsimd"):
raise ValueError("test should be run without simsimd installed.")
invoke_cosine_similarity_top_k_score_threshold(X, Y)


@pytest.mark.requires("simsimd")
def test_cosine_similarity_top_k_and_score_threshold_with_simsimd(
X: List[List[float]], Y: List[List[float]]
) -> None:
# Same test, but ensuring simsimd is available in the project through the import.
invoke_cosine_similarity_top_k_score_threshold(X, Y)
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,8 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:

X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)
Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float):
return np.array([Z])
return np.array(Z)
Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
return Z
except ImportError:
logger.debug(
"Unable to import simsimd, defaulting to NumPy implementation. If you want "
Expand Down
6 changes: 2 additions & 4 deletions libs/partners/mongodb/langchain_mongodb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,8 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:

X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)
Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float):
return np.array([Z])
return np.array(Z)
Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
return Z
except ImportError:
logger.debug(
"Unable to import simsimd, defaulting to NumPy implementation. If you want "
Expand Down
6 changes: 2 additions & 4 deletions libs/partners/pinecone/langchain_pinecone/_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,8 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:

X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)
Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float):
return np.array([Z])
return np.array(Z)
Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
return Z
except ImportError:
X_norm = np.linalg.norm(X, axis=1)
Y_norm = np.linalg.norm(Y, axis=1)
Expand Down
6 changes: 2 additions & 4 deletions libs/partners/qdrant/langchain_qdrant/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:

X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)
Z = 1 - simd.cdist(X, Y, metric="cosine")
if isinstance(Z, float):
return np.array([Z])
return np.array(Z)
Z = 1 - np.array(simd.cdist(X, Y, metric="cosine"))
return Z
except ImportError:
X_norm = np.linalg.norm(X, axis=1)
Y_norm = np.linalg.norm(Y, axis=1)
Expand Down