Skip to content

Commit dd663f4

Browse files
committed
aligned the inserts with the way metadata is handled with uploads
1 parent 4f051cd commit dd663f4

File tree

2 files changed

+38
-10
lines changed

2 files changed

+38
-10
lines changed

engine/base_client/search.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def _insert_one(cls, query):
7575
doc_id = next(cls._doc_id_counter)
7676

7777
# Debug logging to verify inserts are happening
78-
print(f"DEBUG: Inserting vector with doc_id={doc_id}")
78+
#print(f"DEBUG: Inserting vector with doc_id={doc_id}")
7979

8080
cls.insert_one(str(doc_id), query.vector, query.meta_conditions)
8181
end = time.perf_counter()
@@ -272,7 +272,7 @@ def process_chunk(chunk, search_one, insert_one, insert_fraction):
272272
insert_count = 0
273273
search_count = 0
274274

275-
print(f"DEBUG: Processing chunk of {len(chunk)} queries with insert_fraction={insert_fraction}")
275+
#print(f"DEBUG: Processing chunk of {len(chunk)} queries with insert_fraction={insert_fraction}")
276276

277277
for i, query in enumerate(chunk):
278278
if random.random() < insert_fraction:
@@ -283,5 +283,5 @@ def process_chunk(chunk, search_one, insert_one, insert_fraction):
283283
search_count += 1
284284
results.append(result)
285285

286-
print(f"DEBUG: Chunk complete - {search_count} searches, {insert_count} inserts")
286+
#print(f"DEBUG: Chunk complete - {search_count} searches, {insert_count} inserts")
287287
return results

engine/clients/redis/search.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,39 @@ def insert_one(cls, doc_id: int, vector, meta_conditions):
112112
else:
113113
vec_param = vector
114114

115-
doc = {"vector": vec_param}
116-
if meta_conditions:
117-
for k, v in meta_conditions.items():
118-
doc[k] = str(v)
115+
# Process metadata exactly like upload_batch does
116+
meta = meta_conditions if meta_conditions else {}
117+
geopoints = {}
118+
payload = {}
119+
120+
if meta is not None:
121+
for k, v in meta.items():
122+
# This is a patch for arxiv-titles dataset where we have a list of "labels", and
123+
# we want to index all of them under the same TAG field (whose separator is ';').
124+
if k == "labels":
125+
payload[k] = ";".join(v)
126+
if (
127+
v is not None
128+
and not isinstance(v, dict)
129+
and not isinstance(v, list)
130+
):
131+
payload[k] = v
132+
# Redis treats geopoints differently and requires putting them as
133+
# a comma-separated string with lat and lon coordinates
134+
from engine.clients.redis.helper import convert_to_redis_coords
135+
geopoints = {
136+
k: ",".join(map(str, convert_to_redis_coords(v["lon"], v["lat"])))
137+
for k, v in meta.items()
138+
if isinstance(v, dict)
139+
}
119140

120-
print(f"DEBUG: Redis inserting doc_id={doc_id}, vector_size={len(vec_param)} bytes")
121-
cls.client.hset(str(doc_id), mapping=doc)
122-
print(f"DEBUG: Redis insert complete for doc_id={doc_id}")
141+
#print(f"DEBUG: Redis inserting doc_id={doc_id}, vector_size={len(vec_param)} bytes")
142+
cls.client.hset(
143+
str(doc_id),
144+
mapping={
145+
"vector": vec_param,
146+
**payload,
147+
**geopoints,
148+
},
149+
)
150+
#print(f"DEBUG: Redis insert complete for doc_id={doc_id}")

0 commit comments

Comments
 (0)