Skip to content

Commit ba4428b

Browse files
CXmartinsAD101\z004nm6m
authored andcommitted
Fixed support for VectorStore search filters with multiple $like/$ilike/$in/$nin on same column (langchain-ai#217)
Fixed issue langchain-ai#216. Pull request adds a filter test case that was failing and the fix for internal function _handle_field_filter. The function fix is to add an unique identifier to the column names of operator in, nin, like and ilike --------- Co-authored-by: AD101\z004nm6m <[email protected]>
1 parent fe554f6 commit ba4428b

File tree

2 files changed

+38
-31
lines changed

2 files changed

+38
-31
lines changed

langchain_postgres/v2/async_vectorstore.py

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,46 +1095,48 @@ def _handle_field_filter(
10951095
operator = "$eq"
10961096
filter_value = value
10971097

1098+
suffix_id = str(uuid.uuid4()).split("-")[0]
10981099
if operator in COMPARISONS_TO_NATIVE:
10991100
# Then we implement an equality filter
11001101
# native is trusted input
11011102
native = COMPARISONS_TO_NATIVE[operator]
1102-
id = str(uuid.uuid4()).split("-")[0]
1103-
return f"{field} {native} :{field}_{id}", {f"{field}_{id}": filter_value}
1103+
param_name = f"{field}_{suffix_id}"
1104+
return f"{field} {native} :{param_name}", {f"{param_name}": filter_value}
11041105
elif operator == "$between":
11051106
# Use AND with two comparisons
11061107
low, high = filter_value
1107-
1108-
return f"({field} BETWEEN :{field}_low AND :{field}_high)", {
1109-
f"{field}_low": low,
1110-
f"{field}_high": high,
1108+
low_param_name = f"{field}_low_{suffix_id}"
1109+
high_param_name = f"{field}_high_{suffix_id}"
1110+
return f"({field} BETWEEN :{low_param_name} AND :{high_param_name})", {
1111+
f"{low_param_name}": low,
1112+
f"{high_param_name}": high,
11111113
}
1112-
elif operator in {"$in", "$nin", "$like", "$ilike"}:
1114+
elif operator in {"$in", "$nin"}:
11131115
# We'll do force coercion to text
1114-
if operator in {"$in", "$nin"}:
1115-
for val in filter_value:
1116-
if not isinstance(val, (str, int, float)):
1117-
raise NotImplementedError(
1118-
f"Unsupported type: {type(val)} for value: {val}"
1119-
)
1120-
1121-
if isinstance(val, bool): # b/c bool is an instance of int
1122-
raise NotImplementedError(
1123-
f"Unsupported type: {type(val)} for value: {val}"
1124-
)
1125-
1126-
if operator in {"$in"}:
1127-
return f"{field} = ANY(:{field}_in)", {f"{field}_in": filter_value}
1128-
elif operator in {"$nin"}:
1129-
return f"{field} <> ALL (:{field}_nin)", {f"{field}_nin": filter_value}
1130-
elif operator in {"$like"}:
1131-
return f"({field} LIKE :{field}_like)", {f"{field}_like": filter_value}
1132-
elif operator in {"$ilike"}:
1133-
return f"({field} ILIKE :{field}_ilike)", {
1134-
f"{field}_ilike": filter_value
1116+
for val in filter_value:
1117+
if not isinstance(val, (str, int, float)):
1118+
raise NotImplementedError(
1119+
f"Unsupported type: {type(val)} for value: {val}"
1120+
)
1121+
1122+
if isinstance(val, bool): # b/c bool is an instance of int
1123+
raise NotImplementedError(
1124+
f"Unsupported type: {type(val)} for value: {val}"
1125+
)
1126+
param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
1127+
if operator == "$in":
1128+
return f"{field} = ANY(:{param_name})", {f"{param_name}": filter_value}
1129+
else: # i.e. $nin
1130+
return f"{field} <> ALL (:{param_name})", {
1131+
f"{param_name}": filter_value
11351132
}
1136-
else:
1137-
raise NotImplementedError()
1133+
1134+
elif operator in {"$like", "$ilike"}:
1135+
param_name = f"{field}_{operator.replace('$', '')}_{suffix_id}"
1136+
if operator == "$like":
1137+
return f"({field} LIKE :{param_name})", {f"{param_name}": filter_value}
1138+
else: # i.e. $ilike
1139+
return f"({field} ILIKE :{param_name})", {f"{param_name}": filter_value}
11381140
elif operator == "$exists":
11391141
if not isinstance(filter_value, bool):
11401142
raise ValueError(

tests/unit_tests/fixtures/metadata_filtering_data.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@
210210
{"name": {"$nin": ["Smart Fitness Tracker", "Stainless Steel Water Bottle"]}},
211211
["WH001", "EC002"],
212212
),
213-
## with numeric fields
213+
# with numeric fields
214214
(
215215
{"available_quantity": {"$nin": [50, 0, 10]}},
216216
["FT004"],
@@ -225,6 +225,11 @@
225225
{"name": {"$like": "%less%"}}, # adam and jane
226226
["WH001", "WB003"],
227227
),
228+
# Test combination of $like and $and
229+
(
230+
{"$or": [{"code": {"$like": "WH00%"}}, {"code": {"$like": "EC00%"}}]},
231+
["WH001", "EC002"],
232+
),
228233
# These involve the special operator $exists
229234
(
230235
{"tags": {"$exists": False}},

0 commit comments

Comments
 (0)