From 36e4f053709812be648999ac42b38fe96a0ac854 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 27 Aug 2025 16:07:19 -0700
Subject: [PATCH 01/12] wip: adding multi vector query class and tests

---
 redisvl/query/aggregate.py           | 160 +++++++++++++++++++++++++++
 tests/unit/test_aggregation_types.py | 149 ++++++++++++++++++++++++-
 2 files changed, 308 insertions(+), 1 deletion(-)

diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py
index fd066bce..0c970402 100644
--- a/redisvl/query/aggregate.py
+++ b/redisvl/query/aggregate.py
@@ -227,3 +227,163 @@ def _build_query_string(self) -> str:
     def __str__(self) -> str:
         """Return the string representation of the query."""
         return " ".join([str(x) for x in self.build_args()])
+
+
+class MultiVectorQuery(AggregationQuery):
+    """
+        MultiVectorQuery allows for search over multiple vector fields in a document simulateously.
+        The final score will be a weighted combination of the individual vector similarity scores
+        following the formula:
+
+        score = (w_1 * score_1 + w_2 * score_2 + w_3 * score_3 + ... ) / (w_1 + w_2 + w_3 + ...)
+
+        Vectors may be of different size and datatype.
+
+        .. code-block:: python
+
+            from redisvl.query import MultiVectorQuery
+            from redisvl.index import SearchIndex
+
+            index = SearchIndex.from_yaml("path/to/index.yaml")
+
+            query = MultiVectorQuery(
+                vectors=[[0.1, 0.2, 0.3], [0.5, 0.5], [0.1, 0.1, 0.1, 0.1]],
+                vector_field_names=["text_vector", "image_vector", "feature_vector"]
+                filter_expression=None,
+                weights=[0.7],
+                dtypes=["float32", "float32", "float32"],
+                num_results=10,
+                return_fields=["field1", "field2"],
+                dialect=2,
+            )
+
+            results = index.query(query)
+
+
+    FT.AGGREGATE 'idx:characters'
+     "@embedding1:[VECTOR_RANGE .7 $vector1]=>{$YIELD_DISTANCE_AS: vector_distance1} | @embedding2:[VECTOR_RANGE 1.0 $vector2]=>{$YIELD_DISTANCE_AS: vector_distance2} | @embedding3:[VECTOR_RANGE 1.7 $vector3]=>{$YIELD_DISTANCE_AS: vector_distance3}  | @name:(James)"
+     ADDSCORES
+     SCORER BM25STD.NORM
+     LOAD 2 created_at @embedding
+     APPLY 'case(exists(@vector_distance1), @vector_distance1, 0.0)' as v1
+     APPLY 'case(exists(@vector_distance2), @vector_distance2, 0.0)' as v2
+     APPLY 'case(exists(@vector_distance3), @vector_distance3, 0.0)' as v3
+     APPLY '(@__score * 0.3 + (@v1 * 0.3) + (@v2 * 1.2) + (@v3 * 0.1))' AS final_score
+     PARAMS 6 vector1 "\xe4\xd6..." vector2 "\x89\xa0..." vector3 "\x3c\x19..."
+     SORTBY 2 @final_score DESC
+     DIALECT 2
+     LIMIT 0 100
+
+
+    """
+
+    DISTANCE_ID: str = "vector_distance"
+    VECTOR_PARAM: str = "vector"
+
+    def __init__(
+        self,
+        vectors: Union[bytes, List[bytes], List[float], List[List[float]]],
+        vector_field_names: Union[str, List[str]],
+        filter_expression: Optional[Union[str, FilterExpression]] = None,
+        weights: Union[float, List[float]] = 1.0,
+        dtypes: Union[str, List[str]] = "float32",
+        num_results: int = 10,
+        return_fields: Optional[List[str]] = None,
+        dialect: int = 2,
+    ):
+        """
+        Instantiates a MultiVectorQuery object.
+
+        Args:
+            vectors (Union[bytes, List[bytes], List[float], List[List[float]]): The vectors to perform vector similarity search.
+            vector_field_names (str): The vector field names to search in.
+            filter_expression (Optional[FilterExpression], optional): The filter expression to use.
+                Defaults to None.
+            weights (Union[float, List[float]], optional): The weights of the vector similarity.
+                Documents will be scored as:
+                score = (w1) * score1 + (w2) * score2 + (w3) * score3 + ...
+                Defaults to 1.0, which corresponds to equal weighting
+            dtype (Union[str, List[str]] optional): The data types of the vectors. Defaults to "float32" for all vectors.
+            num_results (int, optional): The number of results to return. Defaults to 10.
+            return_fields (Optional[List[str]], optional): The fields to return. Defaults to None.
+            dialect (int, optional): The Redis dialect version. Defaults to 2.
+
+        Raises:
+            ValueError: The number of vectors, vector field names, and weights do not agree.
+            TypeError: If the stopwords are not a set, list, or tuple of strings.
+        """
+
+        self._vectors = vectors
+        self._vector_fields = vector_field_names
+        self._filter_expression = filter_expression
+        self._weights = weights
+        self._dtypes = dtypes
+        self._num_results = num_results
+
+        query_string = self._build_query_string()
+        super().__init__(query_string)
+
+        self.scorer(text_scorer)
+        self.add_scores()
+        self.apply(
+            vector_similarity=f"(2 - @{self.DISTANCE_ID})/2", text_score="@__score"
+        )
+        self.apply(hybrid_score=f"{1-alpha}*@text_score + {alpha}*@vector_similarity")
+        self.sort_by(Desc("@hybrid_score"), max=num_results)  # type: ignore
+        self.dialect(dialect)
+        if return_fields:
+            self.load(*return_fields)  # type: ignore[arg-type]
+
+    @property
+    def params(self) -> Dict[str, Any]:
+        """Return the parameters for the aggregation.
+
+        Returns:
+            Dict[str, Any]: The parameters for the aggregation.
+        """
+        if isinstance(self._vector, list):
+            vector = array_to_buffer(self._vector, dtype=self._dtype)
+        else:
+            vector = self._vector
+
+        params = {self.VECTOR_PARAM: vector}
+
+        return params
+
+    def _tokenize_and_escape_query(self, user_query: str) -> str:
+        """Convert a raw user query to a redis full text query joined by ORs
+        Args:
+            user_query (str): The user query to tokenize and escape.
+
+        Returns:
+            str: The tokenized and escaped query string.
+        Raises:
+            ValueError: If the text string becomes empty after stopwords are removed.
+        """
+        escaper = TokenEscaper()
+
+        tokens = [
+            escaper.escape(
+                token.strip().strip(",").replace("“", "").replace("”", "").lower()
+            )
+            for token in user_query.split()
+        ]
+        tokenized = " | ".join(
+            [token for token in tokens if token and token not in self._stopwords]
+        )
+
+        if not tokenized:
+            raise ValueError("text string cannot be empty after removing stopwords")
+        return tokenized
+
+    def _build_query_string(self) -> str:
+        """Build the full query string for text search with optional filtering."""
+        if isinstance(self._filter_expression, FilterExpression):
+            filter_expression = str(self._filter_expression)
+        else:
+            filter_expression = ""
+
+        # base KNN query
+        knn_query = f"KNN {self._num_results} @{self._vector_field} ${self.VECTOR_PARAM} AS {self.DISTANCE_ID}"
+
+        return f"{filter_expression})=>[{knn_query}]"
diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py
index a13e87f5..5e2f2493 100644
--- a/tests/unit/test_aggregation_types.py
+++ b/tests/unit/test_aggregation_types.py
@@ -4,13 +4,17 @@
 from redis.commands.search.result import Result
 
 from redisvl.index.index import process_results
-from redisvl.query.aggregate import HybridQuery
+from redisvl.query.aggregate import HybridQuery, MultiVectorQuery
 from redisvl.query.filter import Tag
 
 # Sample data for testing
 sample_vector = [0.1, 0.2, 0.3, 0.4]
 sample_text = "the toon squad play basketball against a gang of aliens"
 
+sample_vector_2 = [0.1, 0.2, 0.3, 0.4]
+sample_vector_3 = [0.5, 0.5]
+sample_vector_4 = [0.1, 0.1, 0.1]
+
 
 # Test Cases
 def test_aggregate_hybrid_query():
@@ -190,3 +194,146 @@ def test_hybrid_query_with_string_filter():
     query_string_wildcard = str(hybrid_query_wildcard)
     assert f"@{text_field_name}:(search | document | 12345)" in query_string_wildcard
     assert "AND" not in query_string_wildcard
+
+
+def test_aggregate_multi_vector_query():
+    # test we require vectors and field names
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery()
+
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(vectors=[sample_vector], vector_field_names=[])
+
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(vectors=[], vector_field_names=["field 1"])
+
+    # test we can initialize with a single vector and single field name
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector], vector_field_names=["field 1"]
+    )
+    assert query.query is not None
+
+    # check default properties
+    assert multivector_query._vectors == [sample_vector]
+    assert multivector_query._vector_field_names == ["field 1"]
+    assert multivector_query._filter_expression == None
+    assert multivector_query._weights == 1.0
+    assert multivector_query._num_results == 10
+    assert multivector_query._loadfields == []
+    assert multivector_query._dialect == 2
+
+    # test we can initialize with mutliple vectors and field names
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector, sample_vector_2, sample_vector_3, sample_vector_4],
+        vector_field_names=["field 1", "field 2", "field 3", "field 4"],
+        weights=[0.2, 0.5, 0.6, 0, 1],
+        dtypes=[],
+    )
+
+    assert len(multivector_query._vectors) == 4
+    assert len(multivector_query._vector_field_names) == 4
+    assert len(multivector_query._weights) == 4
+
+    # test defaults can be overwritten
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector, sample_vector_2, sample_vector_3, sample_vector_4],
+        vector_field_names=["field 1", "field 2", "field 3", "field 4"],
+        filter_expression=(Tag("user group") == ["group A", "group C"]),
+        weights=[0.2, 0.5, 0.6, 0, 1],
+        dtypes=["float32", "float32", "float64", "bfloat16"],
+        num_results=5,
+        return_fields=["field 1", "user name", "address"],
+        dialect=4,
+    )
+
+    assert multivector_query._vectors == [
+        sample_vector,
+        sample_vector_2,
+        sample_vector_3,
+        sample_vector_4,
+    ]
+    assert multivector_query._vector_field_names == [
+        "field 1",
+        "field 2",
+        "field 3",
+        "field 4",
+    ]
+    assert multivector_query._weights == [0.2, 0.5, 0.6, 0, 1]
+    assert multivector_query._filter_expression == Tag("user group")
+    assert multivector_query._num_results == 5
+    assert multivector_query._loadfields == ["field 1", "user name", "address"]
+    assert multivector_query._dialect == 4
+
+
+def test_aggregate_multi_vector_query_broadcasting():
+    # if a single vector and multiple fields is passed we search with the same vector over all fields
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector],
+        vector_field_names=["text embedding", "image embedding"],
+    )
+    assert multi_vector_query.query == "<raw text here>"
+
+    # vector being broadcast doesn't need to be in a list
+    multivector_query = MultiVectorQuery(
+        vectors=sample_vector, vector_field_names=["text embedding", "image embedding"]
+    )
+    assert multi_vector_query.query == "<raw text here>"
+
+    # if multiple vectors are passed and a single field name we search with all vectors on that field
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector_2, sample_vector_3],
+        vector_field_names=["text embedding"],
+    )
+    assert multi_vector_query.query == "<raw text here>"
+
+    # vector field name does not need to be in a list if only one is provided
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector_2, sample_vector_3], vector_field_names="text embedding"
+    )
+    assert multi_vector_query.query == "<raw text here>"
+
+    # if a single weight is passed it is applied to all similarity scores
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector_2, sample_vector_3],
+        vector_field_names=["text embedding", "image embedding"],
+        weights=[0.2],
+    )
+    assert multi_vector_query.query == "<raw text here>"
+
+    # weight does not need to be in a list if only one is provided
+    multivector_query = MultiVectorQuery(
+        vectors=[sample_vector_2, sample_vector_3],
+        vector_field_names=["text embedding", "image embedding"],
+        weights=0.2,
+    )
+    assert multi_vector_query.query == "<raw text here>"
+
+
+def test_aggregate_multi_vector_query_errors():
+    # test an error is raised if the number of vectors and number of fields don't match
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(
+            vectors=[sample_vector, sample_vector_2, sample_vector_3],
+            vector_field_names=["text embedding", "image embedding"],
+        )
+
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(
+            vectors=[sample_vector, sample_vector_2],
+            vector_field_names=["text embedding", "image embedding", "features"],
+        )
+
+    # test an error is raised if the number of weights is incorrect
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(
+            vectors=[sample_vector, sample_vector_2],
+            vector_field_names=["text embedding", "image embedding"],
+            weights=[0.1, 0.2, 0.3],
+        )
+
+    # test an error is raised if none of the field names are present
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(
+            vectors=[],
+            vector_field_names=[],
+        )

From d0dff0bb0f6472d647e3f066e7bead036f965a7c Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 24 Sep 2025 15:09:04 -0700
Subject: [PATCH 02/12] wip: working multivector query

---
 redisvl/query/aggregate.py | 193 ++++++++++++++++++++++++-------------
 1 file changed, 126 insertions(+), 67 deletions(-)

diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py
index 0c970402..3131b28d 100644
--- a/redisvl/query/aggregate.py
+++ b/redisvl/query/aggregate.py
@@ -260,35 +260,57 @@ class MultiVectorQuery(AggregationQuery):
             results = index.query(query)
 
 
+
+        FT.AGGREGATE multi_vector_test 
+        "@user_embedding:[VECTOR_RANGE 2.0 $vector_0]=>{$YIELD_DISTANCE_AS: distance_0}
+        | @image_embedding:[VECTOR_RANGE 2.0 $vector_1]=>{$YIELD_DISTANCE_AS: distance_1}" 
+        PARAMS 4
+        vector_0 "\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?" 
+        vector_1 "\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?" 
+        APPLY "(2 - @distance_0)/2" AS score_0
+        APPLY "(2 - @distance_1)/2" AS score_1 
+        DIALECT 2
+        APPLY "(@score_0 + @score_1)" AS combined_score
+        SORTBY 2 @combined_score 
+        ASC 
+        MAX 10 
+        LOAD 2 score_0 score_1
+
+
+
+
+
     FT.AGGREGATE 'idx:characters'
-     "@embedding1:[VECTOR_RANGE .7 $vector1]=>{$YIELD_DISTANCE_AS: vector_distance1} | @embedding2:[VECTOR_RANGE 1.0 $vector2]=>{$YIELD_DISTANCE_AS: vector_distance2} | @embedding3:[VECTOR_RANGE 1.7 $vector3]=>{$YIELD_DISTANCE_AS: vector_distance3}  | @name:(James)"
-     ADDSCORES
-     SCORER BM25STD.NORM
-     LOAD 2 created_at @embedding
-     APPLY 'case(exists(@vector_distance1), @vector_distance1, 0.0)' as v1
-     APPLY 'case(exists(@vector_distance2), @vector_distance2, 0.0)' as v2
-     APPLY 'case(exists(@vector_distance3), @vector_distance3, 0.0)' as v3
+     "@embedding1:[VECTOR_RANGE .7 $vector1]=>{$YIELD_DISTANCE_AS: vector_distance1}
+     | @embedding2:[VECTOR_RANGE 1.0 $vector2]=>{$YIELD_DISTANCE_AS: vector_distance2}
+     | @embedding3:[VECTOR_RANGE 1.7 $vector3]=>{$YIELD_DISTANCE_AS: vector_distance3}
+     | @name:(James)"
+     ### ADDSCORES
+     ### SCORER BM25STD.NORM
+     ### LOAD 2 created_at @embedding
+     APPLY '(2 - @vector_distance1)/2' as v1
+     APPLY '(2 - @vector_distance2)/2' as v2
+     APPLY '(2 - @vector_distance3)/2' as v3
      APPLY '(@__score * 0.3 + (@v1 * 0.3) + (@v2 * 1.2) + (@v3 * 0.1))' AS final_score
      PARAMS 6 vector1 "\xe4\xd6..." vector2 "\x89\xa0..." vector3 "\x3c\x19..."
      SORTBY 2 @final_score DESC
      DIALECT 2
      LIMIT 0 100
 
-
     """
 
     DISTANCE_ID: str = "vector_distance"
-    VECTOR_PARAM: str = "vector"
 
     def __init__(
         self,
         vectors: Union[bytes, List[bytes], List[float], List[List[float]]],
         vector_field_names: Union[str, List[str]],
+        weights: List[float] = [1.0],
+        return_fields: Optional[List[str]] = None,
         filter_expression: Optional[Union[str, FilterExpression]] = None,
-        weights: Union[float, List[float]] = 1.0,
-        dtypes: Union[str, List[str]] = "float32",
+        dtypes: List[str] = ["float32"],
         num_results: int = 10,
-        return_fields: Optional[List[str]] = None,
+        return_score: bool = False,
         dialect: int = 2,
     ):
         """
@@ -296,40 +318,81 @@ def __init__(
 
         Args:
             vectors (Union[bytes, List[bytes], List[float], List[List[float]]): The vectors to perform vector similarity search.
-            vector_field_names (str): The vector field names to search in.
-            filter_expression (Optional[FilterExpression], optional): The filter expression to use.
-                Defaults to None.
-            weights (Union[float, List[float]], optional): The weights of the vector similarity.
+            vector_field_names (Union[str, List[str]]): The vector field names to search in.
+            weights (List[float]): The weights of the vector similarity.
                 Documents will be scored as:
                 score = (w1) * score1 + (w2) * score2 + (w3) * score3 + ...
-                Defaults to 1.0, which corresponds to equal weighting
-            dtype (Union[str, List[str]] optional): The data types of the vectors. Defaults to "float32" for all vectors.
-            num_results (int, optional): The number of results to return. Defaults to 10.
+                Defaults to [1.0], which corresponds to equal weighting
             return_fields (Optional[List[str]], optional): The fields to return. Defaults to None.
+            filter_expression (Optional[Union[str, FilterExpression]]): The filter expression to use.
+                Defaults to None.
+            dtypes (List[str]): The data types of the vectors. Defaults to ["float32"] for all vectors.
+            num_results (int, optional): The number of results to return. Defaults to 10.
+            return_score (bool): Whether to return the combined vector similarity score.
+                Defaults to False.
             dialect (int, optional): The Redis dialect version. Defaults to 2.
 
         Raises:
             ValueError: The number of vectors, vector field names, and weights do not agree.
-            TypeError: If the stopwords are not a set, list, or tuple of strings.
         """
 
-        self._vectors = vectors
-        self._vector_fields = vector_field_names
         self._filter_expression = filter_expression
-        self._weights = weights
         self._dtypes = dtypes
         self._num_results = num_results
 
+        if len(vectors) == 0 or len(vector_field_names) == 0 or len(weights) == 0:
+            raise ValueError(
+                f"""The number of vectors and vector field names must be equal.
+                             If weights are specified their number must match the number of vectors and vector field names also.
+                            Length of vectors list: {len(vectors) = }
+                            Length of vector_field_names list: {len(vector_field_names) = }
+                            Length of weights list: {len(weights) = }
+                            """
+            )
+
+        if isinstance(vectors, bytes) or isinstance(vectors[0], float):
+            self._vectors = [vectors]
+        else:
+            self._vectors = vectors
+        if isinstance(vector_field_names, str):
+            self._vector_field_names = [vector_field_names]
+        else:
+            self._vector_field_names = vector_field_names
+        if len(weights) == 1:
+            self._weights = weights * len(vectors)
+        else:
+            self._weights = weights
+        if len(dtypes) == 1:
+            self._dtypes = dtypes * len(vectors)
+        else:
+            self._dtypes = dtypes
+
+        if (len(self._vectors) != len(self._vector_field_names)) or (
+            len(self._vectors) != len(self._weights)
+        ):
+            raise ValueError(
+                f"""The number of vectors and vector field names must be equal.
+                             If weights are specified their number must match the number of vectors and vector field names also.
+                            Length of vectors list: {len(self._vectors) = }
+                            Length of vector_field_names list: {len(self._vector_field_names) = }
+                            Length of weights list: {len(self._weights) = }
+                            """
+            )
+
         query_string = self._build_query_string()
         super().__init__(query_string)
 
-        self.scorer(text_scorer)
-        self.add_scores()
-        self.apply(
-            vector_similarity=f"(2 - @{self.DISTANCE_ID})/2", text_score="@__score"
-        )
-        self.apply(hybrid_score=f"{1-alpha}*@text_score + {alpha}*@vector_similarity")
-        self.sort_by(Desc("@hybrid_score"), max=num_results)  # type: ignore
+        # construct the scoring string based on the vector similarity scores and weights
+        combined_scores = []
+        for i, w in enumerate(self._weights):
+            combined_scores.append(f"@score_{i} * {w}")
+        combined_score_string = " + ".join(combined_scores)
+        combined_score_string = f"'({combined_score_string})'"
+
+        self.apply(combined_score=combined_score_string)
+
+        # self.add_scores()
+        self.sort_by(Desc("@combined_score"), max=num_results)  # type: ignore
         self.dialect(dialect)
         if return_fields:
             self.load(*return_fields)  # type: ignore[arg-type]
@@ -341,49 +404,45 @@ def params(self) -> Dict[str, Any]:
         Returns:
             Dict[str, Any]: The parameters for the aggregation.
         """
-        if isinstance(self._vector, list):
-            vector = array_to_buffer(self._vector, dtype=self._dtype)
-        else:
-            vector = self._vector
-
-        params = {self.VECTOR_PARAM: vector}
-
+        params = {}
+        for i, (vector, vector_field, dtype) in enumerate(zip(
+            self._vectors, self._vector_field_names, self._dtypes
+        )):
+            if isinstance(vector, list):
+                vector = array_to_buffer(vector, dtype=dtype)
+            params[f"vector_{i}"] = vector
         return params
 
-    def _tokenize_and_escape_query(self, user_query: str) -> str:
-        """Convert a raw user query to a redis full text query joined by ORs
-        Args:
-            user_query (str): The user query to tokenize and escape.
-
-        Returns:
-            str: The tokenized and escaped query string.
-        Raises:
-            ValueError: If the text string becomes empty after stopwords are removed.
-        """
-        escaper = TokenEscaper()
-
-        tokens = [
-            escaper.escape(
-                token.strip().strip(",").replace("“", "").replace("”", "").lower()
-            )
-            for token in user_query.split()
-        ]
-        tokenized = " | ".join(
-            [token for token in tokens if token and token not in self._stopwords]
-        )
-
-        if not tokenized:
-            raise ValueError("text string cannot be empty after removing stopwords")
-        return tokenized
-
     def _build_query_string(self) -> str:
         """Build the full query string for text search with optional filtering."""
+
+        filter_expression = self._filter_expression
         if isinstance(self._filter_expression, FilterExpression):
             filter_expression = str(self._filter_expression)
-        else:
-            filter_expression = ""
 
         # base KNN query
-        knn_query = f"KNN {self._num_results} @{self._vector_field} ${self.VECTOR_PARAM} AS {self.DISTANCE_ID}"
+        knn_queries = []
+        range_queries = []
+        for i, (vector, field) in enumerate(zip(self._vectors, self._vector_field_names)):
+            knn_queries.append(f"[KNN {self._num_results} @{field} $vector_{i} AS distance_{i}]")
+            range_queries.append(f"@{field}:[VECTOR_RANGE 2.0 $vector_{i}]=>{{$YIELD_DISTANCE_AS: distance_{i}}}")
+
+        knn_query = " | ".join(knn_queries) ## knn_queries format doesn't work
+        knn_query = " | ".join(range_queries)
+
+        # calculate the respective vector similarities
+        apply_string = ""
+        for i, (vector, field_name, weight) in enumerate(
+            zip(self._vectors, self._vector_field_names, self._weights)
+        ):
+            apply_string += f'APPLY "(2 - @distance_{i})/2" AS score_{i} '
 
-        return f"{filter_expression})=>[{knn_query}]"
+        return (
+            f"{knn_query} {filter_expression} {apply_string}"
+            if filter_expression
+            else f"{knn_query} {apply_string}"
+        )
+
+    def __str__(self) -> str:
+        """Return the string representation of the query."""
+        return " ".join([str(x) for x in self.build_args()])

From 7275f708cbdbcab98b04f60de566339b7aafc4a7 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Fri, 26 Sep 2025 16:58:55 -0700
Subject: [PATCH 03/12] working multivector query class and tests

---
 redisvl/query/__init__.py             |   3 +-
 redisvl/query/aggregate.py            | 165 ++++++--------
 tests/conftest.py                     | 173 ++++++++++++++
 tests/integration/test_aggregation.py | 309 +++++++++++++++++++++++++-
 tests/unit/test_aggregation_types.py  |  42 ++--
 5 files changed, 559 insertions(+), 133 deletions(-)

diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py
index 30d35562..67c29d2b 100644
--- a/redisvl/query/__init__.py
+++ b/redisvl/query/__init__.py
@@ -1,4 +1,4 @@
-from redisvl.query.aggregate import AggregationQuery, HybridQuery
+from redisvl.query.aggregate import AggregationQuery, HybridQuery, MultiVectorQuery
 from redisvl.query.query import (
     BaseQuery,
     BaseVectorQuery,
@@ -21,4 +21,5 @@
     "TextQuery",
     "AggregationQuery",
     "HybridQuery",
+    "MultiVectorQuery",
 ]
diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py
index 3131b28d..d0a4273b 100644
--- a/redisvl/query/aggregate.py
+++ b/redisvl/query/aggregate.py
@@ -231,76 +231,35 @@ def __str__(self) -> str:
 
 class MultiVectorQuery(AggregationQuery):
     """
-        MultiVectorQuery allows for search over multiple vector fields in a document simulateously.
-        The final score will be a weighted combination of the individual vector similarity scores
-        following the formula:
+    MultiVectorQuery allows for search over multiple vector fields in a document simulateously.
+    The final score will be a weighted combination of the individual vector similarity scores
+    following the formula:
 
-        score = (w_1 * score_1 + w_2 * score_2 + w_3 * score_3 + ... ) / (w_1 + w_2 + w_3 + ...)
-
-        Vectors may be of different size and datatype.
-
-        .. code-block:: python
-
-            from redisvl.query import MultiVectorQuery
-            from redisvl.index import SearchIndex
-
-            index = SearchIndex.from_yaml("path/to/index.yaml")
-
-            query = MultiVectorQuery(
-                vectors=[[0.1, 0.2, 0.3], [0.5, 0.5], [0.1, 0.1, 0.1, 0.1]],
-                vector_field_names=["text_vector", "image_vector", "feature_vector"]
-                filter_expression=None,
-                weights=[0.7],
-                dtypes=["float32", "float32", "float32"],
-                num_results=10,
-                return_fields=["field1", "field2"],
-                dialect=2,
-            )
-
-            results = index.query(query)
-
-
-
-        FT.AGGREGATE multi_vector_test 
-        "@user_embedding:[VECTOR_RANGE 2.0 $vector_0]=>{$YIELD_DISTANCE_AS: distance_0}
-        | @image_embedding:[VECTOR_RANGE 2.0 $vector_1]=>{$YIELD_DISTANCE_AS: distance_1}" 
-        PARAMS 4
-        vector_0 "\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?" 
-        vector_1 "\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?\x9a\x99\x99\x99\x99\x99\xb9?" 
-        APPLY "(2 - @distance_0)/2" AS score_0
-        APPLY "(2 - @distance_1)/2" AS score_1 
-        DIALECT 2
-        APPLY "(@score_0 + @score_1)" AS combined_score
-        SORTBY 2 @combined_score 
-        ASC 
-        MAX 10 
-        LOAD 2 score_0 score_1
+    score = (w_1 * score_1 + w_2 * score_2 + w_3 * score_3 + ... )
 
+    Vectors may be of different size and datatype, but must be indexed using the 'cosine' distance_metric.
 
+    .. code-block:: python
 
+        from redisvl.query import MultiVectorQuery
+        from redisvl.index import SearchIndex
 
+        index = SearchIndex.from_yaml("path/to/index.yaml")
 
-    FT.AGGREGATE 'idx:characters'
-     "@embedding1:[VECTOR_RANGE .7 $vector1]=>{$YIELD_DISTANCE_AS: vector_distance1}
-     | @embedding2:[VECTOR_RANGE 1.0 $vector2]=>{$YIELD_DISTANCE_AS: vector_distance2}
-     | @embedding3:[VECTOR_RANGE 1.7 $vector3]=>{$YIELD_DISTANCE_AS: vector_distance3}
-     | @name:(James)"
-     ### ADDSCORES
-     ### SCORER BM25STD.NORM
-     ### LOAD 2 created_at @embedding
-     APPLY '(2 - @vector_distance1)/2' as v1
-     APPLY '(2 - @vector_distance2)/2' as v2
-     APPLY '(2 - @vector_distance3)/2' as v3
-     APPLY '(@__score * 0.3 + (@v1 * 0.3) + (@v2 * 1.2) + (@v3 * 0.1))' AS final_score
-     PARAMS 6 vector1 "\xe4\xd6..." vector2 "\x89\xa0..." vector3 "\x3c\x19..."
-     SORTBY 2 @final_score DESC
-     DIALECT 2
-     LIMIT 0 100
+        query = MultiVectorQuery(
+            vectors=[[0.1, 0.2, 0.3], [0.5, 0.5], [0.1, 0.1, 0.1, 0.1]],
+            vector_field_names=["text_vector", "image_vector", "feature_vector"]
+            filter_expression=None,
+            weights=[0.7, 0.2, 0.5],
+            dtypes=["float32", "bfloat16", "float64"],
+            num_results=10,
+            return_fields=["field1", "field2"],
+            dialect=2,
+        )
 
+        results = index.query(query)
     """
 
-    DISTANCE_ID: str = "vector_distance"
-
     def __init__(
         self,
         vectors: Union[bytes, List[bytes], List[float], List[List[float]]],
@@ -340,58 +299,69 @@ def __init__(
         self._dtypes = dtypes
         self._num_results = num_results
 
-        if len(vectors) == 0 or len(vector_field_names) == 0 or len(weights) == 0:
+        if any([len(x) == 0 for x in [vectors, vector_field_names, weights, dtypes]]):
             raise ValueError(
                 f"""The number of vectors and vector field names must be equal.
-                             If weights are specified their number must match the number of vectors and vector field names also.
-                            Length of vectors list: {len(vectors) = }
-                            Length of vector_field_names list: {len(vector_field_names) = }
-                            Length of weights list: {len(weights) = }
-                            """
+                    If weights or dtypes are specified their number must match the number of vectors and vector field names also.
+                    Length of vectors list: {len(vectors) = }
+                    Length of vector_field_names list: {len(vector_field_names) = }
+                    Length of weights list: {len(weights) = }
+                    length of dtypes list: {len(dtypes) = }
+                    """
             )
 
         if isinstance(vectors, bytes) or isinstance(vectors[0], float):
             self._vectors = [vectors]
         else:
-            self._vectors = vectors
+            self._vectors = vectors  # type: ignore
+
         if isinstance(vector_field_names, str):
             self._vector_field_names = [vector_field_names]
         else:
             self._vector_field_names = vector_field_names
+
         if len(weights) == 1:
             self._weights = weights * len(vectors)
         else:
             self._weights = weights
+
         if len(dtypes) == 1:
             self._dtypes = dtypes * len(vectors)
         else:
             self._dtypes = dtypes
 
-        if (len(self._vectors) != len(self._vector_field_names)) or (
-            len(self._vectors) != len(self._weights)
+        num_vectors = len(self._vectors)
+        if any(
+            [
+                len(x) != num_vectors  # type: ignore
+                for x in [self._vector_field_names, self._weights, self._dtypes]
+            ]
         ):
             raise ValueError(
                 f"""The number of vectors and vector field names must be equal.
-                             If weights are specified their number must match the number of vectors and vector field names also.
-                            Length of vectors list: {len(self._vectors) = }
-                            Length of vector_field_names list: {len(self._vector_field_names) = }
-                            Length of weights list: {len(self._weights) = }
-                            """
+                    If weights or dtypes are specified their number must match the number of vectors and vector field names also.
+                    Length of vectors list: {len(self._vectors) = }
+                    Length of vector_field_names list: {len(self._vector_field_names) = }
+                    Length of weights list: {len(self._weights) = }
+                    Length of dtypes list: {len(self._dtypes) = }
+                    """
             )
 
         query_string = self._build_query_string()
         super().__init__(query_string)
 
+        # calculate the respective vector similarities
+        for i in range(len(vectors)):
+            self.apply(**{f"score_{i}": f"(2 - @distance_{i})/2"})
+
         # construct the scoring string based on the vector similarity scores and weights
         combined_scores = []
         for i, w in enumerate(self._weights):
             combined_scores.append(f"@score_{i} * {w}")
         combined_score_string = " + ".join(combined_scores)
-        combined_score_string = f"'({combined_score_string})'"
 
         self.apply(combined_score=combined_score_string)
 
-        # self.add_scores()
         self.sort_by(Desc("@combined_score"), max=num_results)  # type: ignore
         self.dialect(dialect)
         if return_fields:
@@ -405,43 +375,34 @@ def params(self) -> Dict[str, Any]:
             Dict[str, Any]: The parameters for the aggregation.
         """
         params = {}
-        for i, (vector, vector_field, dtype) in enumerate(zip(
-            self._vectors, self._vector_field_names, self._dtypes
-        )):
+        for i, (vector, dtype) in enumerate(zip(self._vectors, self._dtypes)):
             if isinstance(vector, list):
-                vector = array_to_buffer(vector, dtype=dtype)
+                vector = array_to_buffer(vector, dtype=dtype)  # type: ignore
             params[f"vector_{i}"] = vector
         return params
 
     def _build_query_string(self) -> str:
         """Build the full query string for text search with optional filtering."""
 
-        filter_expression = self._filter_expression
-        if isinstance(self._filter_expression, FilterExpression):
-            filter_expression = str(self._filter_expression)
-
         # base KNN query
-        knn_queries = []
         range_queries = []
-        for i, (vector, field) in enumerate(zip(self._vectors, self._vector_field_names)):
-            knn_queries.append(f"[KNN {self._num_results} @{field} $vector_{i} AS distance_{i}]")
-            range_queries.append(f"@{field}:[VECTOR_RANGE 2.0 $vector_{i}]=>{{$YIELD_DISTANCE_AS: distance_{i}}}")
+        for i, (vector, field) in enumerate(
+            zip(self._vectors, self._vector_field_names)
+        ):
+            range_queries.append(
+                f"@{field}:[VECTOR_RANGE 2.0 $vector_{i}]=>{{$YIELD_DISTANCE_AS: distance_{i}}}"
+            )
 
-        knn_query = " | ".join(knn_queries) ## knn_queries format doesn't work
-        knn_query = " | ".join(range_queries)
+        range_query = " | ".join(range_queries)
 
-        # calculate the respective vector similarities
-        apply_string = ""
-        for i, (vector, field_name, weight) in enumerate(
-            zip(self._vectors, self._vector_field_names, self._weights)
-        ):
-            apply_string += f'APPLY "(2 - @distance_{i})/2" AS score_{i} '
+        filter_expression = self._filter_expression
+        if isinstance(self._filter_expression, FilterExpression):
+            filter_expression = str(self._filter_expression)
 
-        return (
-            f"{knn_query} {filter_expression} {apply_string}"
-            if filter_expression
-            else f"{knn_query} {apply_string}"
-        )
+        if filter_expression:
+            return f"({range_query}) AND ({filter_expression})"
+        else:
+            return f"{range_query}"
 
     def __str__(self) -> str:
         """Return the string representation of the query."""
diff --git a/tests/conftest.py b/tests/conftest.py
index b6b27746..7fc9dd9c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -233,6 +233,89 @@ def sample_datetimes():
     }
 
 
+@pytest.fixture
+def OG(sample_datetimes):
+    return [
+        {
+            "user": "john",
+            "age": 18,
+            "job": "engineer",
+            "description": "engineers conduct trains that ride on train tracks",
+            "last_updated": sample_datetimes["low"].timestamp(),
+            "credit_score": "high",
+            "location": "-122.4194,37.7749",
+            "user_embedding": [0.1, 0.1, 0.5],
+            "image_embedding": [0.1, 0.1, 0.1, 0.1, 0.1],
+        },
+        {
+            "user": "mary",
+            "age": 14,
+            "job": "doctor",
+            "description": "a medical professional who treats diseases and helps people stay healthy",
+            "last_updated": sample_datetimes["low"].timestamp(),
+            "credit_score": "low",
+            "location": "-122.4194,37.7749",
+            "user_embedding": [0.1, 0.1, 0.5],
+            "image_embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
+        },
+        {
+            "user": "nancy",
+            "age": 94,
+            "job": "doctor",
+            "description": "a research scientist specializing in cancers and diseases of the lungs",
+            "last_updated": sample_datetimes["mid"].timestamp(),
+            "credit_score": "high",
+            "location": "-122.4194,37.7749",
+            "user_embedding": [0.7, 0.1, 0.5],
+            "image_embedding": [0.1, 0.1, 0.3, 0.3, 0.5],
+        },
+        {
+            "user": "tyler",
+            "age": 100,
+            "job": "engineer",
+            "description": "a software developer with expertise in mathematics and computer science",
+            "last_updated": sample_datetimes["mid"].timestamp(),
+            "credit_score": "high",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [0.1, 0.4, 0.5],
+            "image_embedding": [-0.1, -0.2, -0.3, -0.4, -0.5],
+        },
+        {
+            "user": "tim",
+            "age": 12,
+            "job": "dermatologist",
+            "description": "a medical professional specializing in diseases of the skin",
+            "last_updated": sample_datetimes["mid"].timestamp(),
+            "credit_score": "high",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [0.4, 0.4, 0.5],
+            "image_embedding": [-0.1, 0.0, 0.6, 0.0, -0.9],
+        },
+        {
+            "user": "taimur",
+            "age": 15,
+            "job": "CEO",
+            "description": "high stress, but financially rewarding position at the head of a company",
+            "last_updated": sample_datetimes["high"].timestamp(),
+            "credit_score": "low",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [0.6, 0.1, 0.5],
+            "image_embedding": [1.1, 1.2, -0.3, -4.1, 5.0],
+        },
+        {
+            "user": "joe",
+            "age": 35,
+            "job": "dentist",
+            "description": "like the tooth fairy because they'll take your teeth, but you have to pay them!",
+            "last_updated": sample_datetimes["high"].timestamp(),
+            "credit_score": "medium",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [-0.1, -0.1, -0.5],
+            "image_embedding": [-0.8, 2.0, 3.1, 1.5, -1.6],
+        },
+    ]
+
+
 @pytest.fixture
 def sample_data(sample_datetimes):
     return [
@@ -309,6 +392,96 @@ def sample_data(sample_datetimes):
     ]
 
 
+@pytest.fixture
+def multi_vector_data(sample_datetimes):
+    return [
+        {
+            "user": "john",
+            "age": 18,
+            "job": "engineer",
+            "description": "engineers conduct trains that ride on train tracks",
+            "last_updated": sample_datetimes["low"].timestamp(),
+            "credit_score": "high",
+            "location": "-122.4194,37.7749",
+            "user_embedding": [0.1, 0.1, 0.5],
+            "image_embedding": [0.1, 0.1, 0.1, 0.1, 0.1],
+            "audio_embedding": [34, 18.5, -6.0, -12, 115, 96.5],
+        },
+        {
+            "user": "mary",
+            "age": 14,
+            "job": "doctor",
+            "description": "a medical professional who treats diseases and helps people stay healthy",
+            "last_updated": sample_datetimes["low"].timestamp(),
+            "credit_score": "low",
+            "location": "-122.4194,37.7749",
+            "user_embedding": [0.1, 0.1, 0.5],
+            "image_embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
+            "audio_embedding": [0.0, -1.06, 4.55, -1.93, 0.0, 1.53],
+        },
+        {
+            "user": "nancy",
+            "age": 94,
+            "job": "doctor",
+            "description": "a research scientist specializing in cancers and diseases of the lungs",
+            "last_updated": sample_datetimes["mid"].timestamp(),
+            "credit_score": "high",
+            "location": "-122.4194,37.7749",
+            "user_embedding": [0.7, 0.1, 0.5],
+            "image_embedding": [0.1, 0.1, 0.3, 0.3, 0.5],
+            "audio_embedding": [2.75, -0.33, -3.01, -0.52, 5.59, -2.30],
+        },
+        {
+            "user": "tyler",
+            "age": 100,
+            "job": "engineer",
+            "description": "a software developer with expertise in mathematics and computer science",
+            "last_updated": sample_datetimes["mid"].timestamp(),
+            "credit_score": "high",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [0.1, 0.4, 0.5],
+            "image_embedding": [-0.1, -0.2, -0.3, -0.4, -0.5],
+            "audio_embedding": [1.11, -6.73, 5.41, 1.04, 3.92, 0.73],
+        },
+        {
+            "user": "tim",
+            "age": 12,
+            "job": "dermatologist",
+            "description": "a medical professional specializing in diseases of the skin",
+            "last_updated": sample_datetimes["mid"].timestamp(),
+            "credit_score": "high",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [0.4, 0.4, 0.5],
+            "image_embedding": [-0.1, 0.0, 0.6, 0.0, -0.9],
+            "audio_embedding": [0.03, -2.67, -2.08, 4.57, -2.33, 0.0],
+        },
+        {
+            "user": "taimur",
+            "age": 15,
+            "job": "CEO",
+            "description": "high stress, but financially rewarding position at the head of a company",
+            "last_updated": sample_datetimes["high"].timestamp(),
+            "credit_score": "low",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [0.6, 0.1, 0.5],
+            "image_embedding": [1.1, 1.2, -0.3, -4.1, 5.0],
+            "audio_embedding": [0.68, 0.26, 2.08, 2.96, 0.01, 5.13],
+        },
+        {
+            "user": "joe",
+            "age": 35,
+            "job": "dentist",
+            "description": "like the tooth fairy because they'll take your teeth, but you have to pay them!",
+            "last_updated": sample_datetimes["high"].timestamp(),
+            "credit_score": "medium",
+            "location": "-110.0839,37.3861",
+            "user_embedding": [-0.1, -0.1, -0.5],
+            "image_embedding": [-0.8, 2.0, 3.1, 1.5, -1.6],
+            "audio_embedding": [0.91, 7.10, -2.14, -0.52, -6.08, -5.53],
+        },
+    ]
+
+
 def pytest_addoption(parser: pytest.Parser) -> None:
     parser.addoption(
         "--run-api-tests",
diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py
index 3561b1de..f1ff7d0b 100644
--- a/tests/integration/test_aggregation.py
+++ b/tests/integration/test_aggregation.py
@@ -1,14 +1,15 @@
 import pytest
 
 from redisvl.index import SearchIndex
-from redisvl.query import HybridQuery
+from redisvl.query import HybridQuery, MultiVectorQuery
 from redisvl.query.filter import FilterExpression, Geo, GeoRadius, Num, Tag, Text
 from redisvl.redis.utils import array_to_buffer
 from tests.conftest import skip_if_redis_version_below
 
 
 @pytest.fixture
-def index(sample_data, redis_url, worker_id):
+def index(multi_vector_data, redis_url, worker_id):
+
     index = SearchIndex.from_dict(
         {
             "index": {
@@ -33,6 +34,26 @@ def index(sample_data, redis_url, worker_id):
                         "datatype": "float32",
                     },
                 },
+                {
+                    "name": "image_embedding",
+                    "type": "vector",
+                    "attrs": {
+                        "dims": 5,
+                        "distance_metric": "cosine",
+                        "algorithm": "flat",
+                        "datatype": "float32",
+                    },
+                },
+                {
+                    "name": "audio_embedding",
+                    "type": "vector",
+                    "attrs": {
+                        "dims": 6,
+                        "distance_metric": "cosine",
+                        "algorithm": "hnsw",
+                        "datatype": "bfloat16",
+                    },
+                },
             ],
         },
         redis_url=redis_url,
@@ -46,9 +67,12 @@ def hash_preprocess(item: dict) -> dict:
         return {
             **item,
             "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
+            "image_embedding": array_to_buffer(item["image_embedding"], "float32"),
+            "audio_embedding": array_to_buffer(item["audio_embedding"], "bfloat16"),
         }
 
-    index.load(sample_data, preprocess=hash_preprocess)
+    ### TODO get sample data that has two vector fields
+    index.load(multi_vector_data, preprocess=hash_preprocess)
 
     # run the test
     yield index
@@ -57,7 +81,7 @@ def hash_preprocess(item: dict) -> dict:
     index.delete(drop=True)
 
 
-def test_aggregation_query(index):
+def test_hybrid_query(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
     text = "a medical professional with expertise in lung cancer"
@@ -136,7 +160,7 @@ def test_empty_query_string():
         )
 
 
-def test_aggregation_query_with_filter(index):
+def test_hybrid_query_with_filter(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
     text = "a medical professional with expertise in lung cancer"
@@ -162,7 +186,7 @@ def test_aggregation_query_with_filter(index):
         assert int(result["age"]) > 30
 
 
-def test_aggregation_query_with_geo_filter(index):
+def test_hybrid_query_with_geo_filter(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
     text = "a medical professional with expertise in lung cancer"
@@ -188,7 +212,7 @@ def test_aggregation_query_with_geo_filter(index):
 
 
 @pytest.mark.parametrize("alpha", [0.1, 0.5, 0.9])
-def test_aggregate_query_alpha(index, alpha):
+def test_hybrid_query_alpha(index, alpha):
     skip_if_redis_version_below(index.client, "7.2.0")
 
     text = "a medical professional with expertise in lung cancer"
@@ -215,7 +239,7 @@ def test_aggregate_query_alpha(index, alpha):
         )  # allow for small floating point error
 
 
-def test_aggregate_query_stopwords(index):
+def test_hybrid_query_stopwords(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
     text = "a medical professional with expertise in lung cancer"
@@ -249,7 +273,7 @@ def test_aggregate_query_stopwords(index):
         )  # allow for small floating point error
 
 
-def test_aggregate_query_with_text_filter(index):
+def test_hybrid_query_with_text_filter(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
     text = "a medical professional with expertise in lung cancer"
@@ -292,3 +316,270 @@ def test_aggregate_query_with_text_filter(index):
     for result in results:
         assert "medical" in result[text_field].lower()
         assert "research" not in result[text_field].lower()
+
+
+def test_multivector_query(index):
+    skip_if_redis_version_below(index.client, "7.2.0")
+
+    vectors = [[0.1, 0.1, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
+    vector_fields = ["user_embedding", "image_embedding"]
+    return_fields = ["user", "credit_score", "age", "job", "location", "description"]
+
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+    )
+
+    results = index.query(multi_query)
+    assert isinstance(results, list)
+    assert len(results) == 7
+    for doc in results:
+        assert doc["user"] in [
+            "john",
+            "derrick",
+            "nancy",
+            "tyler",
+            "tim",
+            "taimur",
+            "joe",
+            "mary",
+        ]
+        assert int(doc["age"]) in [18, 14, 94, 100, 12, 15, 35]
+        assert doc["job"] in ["engineer", "doctor", "dermatologist", "CEO", "dentist"]
+        assert doc["credit_score"] in ["high", "low", "medium"]
+
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        num_results=3,
+    )
+
+    results = index.query(multi_query)
+    assert len(results) == 3
+    assert (
+        results[0]["combined_score"]
+        >= results[1]["combined_score"]
+        >= results[2]["combined_score"]
+    )
+
+
+def test_multivector_query_with_filter(index):
+    skip_if_redis_version_below(index.client, "7.2.0")
+
+    text_field = "description"
+    vectors = [[0.1, 0.1, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
+    vector_fields = ["user_embedding", "image_embedding"]
+    filter_expression = Text(text_field) == ("medical")
+
+    # make sure we can still apply filters to the same text field we are querying
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        filter_expression=filter_expression,
+        return_fields=["job", "description"],
+    )
+
+    results = index.query(multi_query)
+    assert len(results) == 2
+    for result in results:
+        assert "medical" in result[text_field].lower()
+
+    filter_expression = (Text(text_field) == ("medical")) & (
+        (Text(text_field) != ("research"))
+    )
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        filter_expression=filter_expression,
+        return_fields=["description"],
+    )
+
+    results = index.query(multi_query)
+    assert len(results) == 2
+    for result in results:
+        assert "medical" in result[text_field].lower()
+        assert "research" not in result[text_field].lower()
+
+    filter_expression = (Num("age") > 30) & ((Num("age") < 30))
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        filter_expression=filter_expression,
+        return_fields=["description"],
+    )
+
+    results = index.query(multi_query)
+    assert len(results) == 0
+
+
+def test_multivector_query_with_geo_filter(index):
+    skip_if_redis_version_below(index.client, "7.2.0")
+
+    vectors = [[0.2, 0.4, 0.1], [0.1, 0.8, 0.3, -0.2, 0.3]]
+    vector_fields = ["user_embedding", "image_embedding"]
+    return_fields = ["user", "credit_score", "age", "job", "location", "description"]
+    filter_expression = Geo("location") == GeoRadius(-122.4194, 37.7749, 1000, "m")
+
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        filter_expression=filter_expression,
+        return_fields=return_fields,
+    )
+
+    results = index.query(multi_query)
+    assert len(results) == 3
+    for result in results:
+        assert result["location"] is not None
+
+
+def test_multivector_query_weights(index):
+    skip_if_redis_version_below(
+        index.client, "7.2.0"
+    )  ## TODO figure out min version for 'case()'
+
+    vectors = [[0.1, 0.2, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
+    vector_fields = ["user_embedding", "image_embedding"]
+    return_fields = [
+        "distance_0",
+        "distance_1",
+        "score_0",
+        "score_1",
+        "user_embedding",
+        "image_embedding",
+    ]
+
+    # changing the weights does indeed change the result order
+    multi_query_1 = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        weights=[0.2, 0.9],
+    )
+    results_1 = index.query(multi_query_1)
+
+    multi_query_2 = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        weights=[0.5, 0.1],
+    )
+    results_2 = index.query(multi_query_2)
+
+    assert results_1 != results_2
+
+    for i in range(1, len(results_1)):
+        assert results_1[i]["combined_score"] <= results_1[i - 1]["combined_score"]
+
+    for i in range(1, len(results_2)):
+        assert results_2[i]["combined_score"] <= results_2[i - 1]["combined_score"]
+
+    # weights can be negative, 0.0, or greater than 1.0
+    weights = [-5.2, 0.0]
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        weights=weights,
+    )
+
+    results = index.query(multi_query)
+    assert results
+    for r in results:
+        score = float(r["score_0"]) * weights[0]
+        assert (
+            float(r["combined_score"]) - score <= 0.0001
+        )  # allow for small floating point error
+
+    # verify we're doing the combined score math correctly
+    weights = [-1.322, 0.851]
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        weights=weights,
+    )
+
+    results = index.query(multi_query)
+    assert results
+    for r in results:
+        score = float(r["score_0"]) * weights[0] + float(r["score_1"]) * weights[1]
+        assert (
+            float(r["combined_score"]) - score <= 0.0001
+        )  # allow for small floating point error
+
+    # raise error if wrong number of weights are passed
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(
+            vectors=vectors,
+            vector_field_names=vector_fields,
+            return_fields=return_fields,
+            weights=[],
+        )
+
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(
+            vectors=vectors,
+            vector_field_names=vector_fields,
+            return_fields=return_fields,
+            weights=[1.2, 0.23, 0.52],
+        )
+
+
+def test_multivector_query_datatypes(index):
+    skip_if_redis_version_below(index.client, "7.2.0")
+
+    vectors = [[0.1, 0.2, 0.5], [1.2, 0.3, -0.4, 0.7, 0.2, -0.3]]
+    vector_fields = ["user_embedding", "audio_embedding"]
+    return_fields = [
+        "distance_0",
+        "distance_1",
+        "score_0",
+        "score_1",
+        "user_embedding",
+        "audio_embedding",
+    ]
+
+    # changing the weights does indeed change the result order
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        dtypes=["float32", "bfloat16"],
+    )
+    results = index.query(multi_query)
+
+    for i in range(1, len(results)):
+        assert results[i]["combined_score"] <= results[i - 1]["combined_score"]
+
+    # verify we're doing the combined score math correctly
+    weights = [-1.322, 0.851]
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        dtypes=["float32", "bfloat16"],
+        weights=weights,
+    )
+
+    results = index.query(multi_query)
+    assert results
+    for r in results:
+        score = float(r["score_0"]) * weights[0] + float(r["score_1"]) * weights[1]
+        assert (
+            float(r["combined_score"]) - score <= 0.0001
+        )  # allow for small floating point error
+
+    # raise error if wrong number of datatypes are passed
+    with pytest.raises(ValueError):
+        _ = MultiVectorQuery(
+            vectors=vectors,
+            vector_field_names=vector_fields,
+            return_fields=return_fields,
+            dtypes=["float32", "float32", "float64"],
+        )
+
+
+def test_multivector_query_broadcasting(index):
+    pass
diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py
index 5e2f2493..b684e9d0 100644
--- a/tests/unit/test_aggregation_types.py
+++ b/tests/unit/test_aggregation_types.py
@@ -141,7 +141,6 @@ def test_hybrid_query_with_string_filter():
     )
 
     # Check that filter is stored correctly
-    print("hybrid_query.filter ===", hybrid_query.filter)
     assert hybrid_query._filter_expression == string_filter
 
     # Check that the generated query string includes both text search and filter
@@ -198,26 +197,25 @@ def test_hybrid_query_with_string_filter():
 
 def test_aggregate_multi_vector_query():
     # test we require vectors and field names
-    with pytest.raises(ValueError):
+    with pytest.raises(TypeError):
         _ = MultiVectorQuery()
 
     with pytest.raises(ValueError):
         _ = MultiVectorQuery(vectors=[sample_vector], vector_field_names=[])
 
     with pytest.raises(ValueError):
-        _ = MultiVectorQuery(vectors=[], vector_field_names=["field 1"])
+        _ = MultiVectorQuery(vectors=[], vector_field_names=["field_1"])
 
     # test we can initialize with a single vector and single field name
     multivector_query = MultiVectorQuery(
-        vectors=[sample_vector], vector_field_names=["field 1"]
+        vectors=[sample_vector], vector_field_names=["field_1"]
     )
-    assert query.query is not None
 
     # check default properties
     assert multivector_query._vectors == [sample_vector]
-    assert multivector_query._vector_field_names == ["field 1"]
+    assert multivector_query._vector_field_names == ["field_1"]
     assert multivector_query._filter_expression == None
-    assert multivector_query._weights == 1.0
+    assert multivector_query._weights == [1.0]
     assert multivector_query._num_results == 10
     assert multivector_query._loadfields == []
     assert multivector_query._dialect == 2
@@ -225,24 +223,26 @@ def test_aggregate_multi_vector_query():
     # test we can initialize with mutliple vectors and field names
     multivector_query = MultiVectorQuery(
         vectors=[sample_vector, sample_vector_2, sample_vector_3, sample_vector_4],
-        vector_field_names=["field 1", "field 2", "field 3", "field 4"],
-        weights=[0.2, 0.5, 0.6, 0, 1],
-        dtypes=[],
+        vector_field_names=["field_1", "field_2", "field_3", "field_4"],
+        weights=[0.2, 0.5, 0.6, 0.1],
+        dtypes=["float32", "float32", "float32", "float32"],
     )
 
     assert len(multivector_query._vectors) == 4
     assert len(multivector_query._vector_field_names) == 4
     assert len(multivector_query._weights) == 4
+    assert len(multivector_query._dtypes) == 4
 
     # test defaults can be overwritten
+    filter_expression = Tag("user group") == ["group A", "group C"]
     multivector_query = MultiVectorQuery(
         vectors=[sample_vector, sample_vector_2, sample_vector_3, sample_vector_4],
-        vector_field_names=["field 1", "field 2", "field 3", "field 4"],
-        filter_expression=(Tag("user group") == ["group A", "group C"]),
-        weights=[0.2, 0.5, 0.6, 0, 1],
+        vector_field_names=["field_1", "field_2", "field_3", "field_4"],
+        filter_expression=filter_expression,
+        weights=[0.2, 0.5, 0.6, 0.1],
         dtypes=["float32", "float32", "float64", "bfloat16"],
         num_results=5,
-        return_fields=["field 1", "user name", "address"],
+        return_fields=["field_1", "user name", "address"],
         dialect=4,
     )
 
@@ -253,15 +253,15 @@ def test_aggregate_multi_vector_query():
         sample_vector_4,
     ]
     assert multivector_query._vector_field_names == [
-        "field 1",
-        "field 2",
-        "field 3",
-        "field 4",
+        "field_1",
+        "field_2",
+        "field_3",
+        "field_4",
     ]
-    assert multivector_query._weights == [0.2, 0.5, 0.6, 0, 1]
-    assert multivector_query._filter_expression == Tag("user group")
+    assert multivector_query._weights == [0.2, 0.5, 0.6, 0.1]
+    assert multivector_query._filter_expression == filter_expression
     assert multivector_query._num_results == 5
-    assert multivector_query._loadfields == ["field 1", "user name", "address"]
+    assert multivector_query._loadfields == ["field_1", "user name", "address"]
     assert multivector_query._dialect == 4
 
 

From 0518dc40d6e6de4e0a3921befd64a55e569cbd8a Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Thu, 9 Oct 2025 14:18:18 -0700
Subject: [PATCH 04/12] cleans up unit test

---
 tests/unit/test_aggregation_types.py | 55 +++++++++++-----------------
 1 file changed, 21 insertions(+), 34 deletions(-)

diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py
index b684e9d0..442a338c 100644
--- a/tests/unit/test_aggregation_types.py
+++ b/tests/unit/test_aggregation_types.py
@@ -91,6 +91,7 @@ def test_aggregate_hybrid_query():
         stopwords=["the", "a", "of"],
     )
     assert hybrid_query.stopwords == set(["the", "a", "of"])
+
     hybrid_query = HybridQuery(
         sample_text,
         text_field_name,
@@ -195,7 +196,7 @@ def test_hybrid_query_with_string_filter():
     assert "AND" not in query_string_wildcard
 
 
-def test_aggregate_multi_vector_query():
+def test_multi_vector_query():
     # test we require vectors and field names
     with pytest.raises(TypeError):
         _ = MultiVectorQuery()
@@ -265,51 +266,37 @@ def test_aggregate_multi_vector_query():
     assert multivector_query._dialect == 4
 
 
-def test_aggregate_multi_vector_query_broadcasting():
-    # if a single vector and multiple fields is passed we search with the same vector over all fields
-    multivector_query = MultiVectorQuery(
-        vectors=[sample_vector],
-        vector_field_names=["text embedding", "image embedding"],
-    )
-    assert multi_vector_query.query == "<raw text here>"
-
-    # vector being broadcast doesn't need to be in a list
-    multivector_query = MultiVectorQuery(
-        vectors=sample_vector, vector_field_names=["text embedding", "image embedding"]
-    )
-    assert multi_vector_query.query == "<raw text here>"
-
-    # if multiple vectors are passed and a single field name we search with all vectors on that field
-    multivector_query = MultiVectorQuery(
+def test_multi_vector_query_broadcasting():
+    # if a single weight is passed it is applied to all similarity scores
+    field_1 = "text embedding"
+    field_2 = "image embedding"
+    weight = 0.2
+    multi_vector_query = MultiVectorQuery(
         vectors=[sample_vector_2, sample_vector_3],
-        vector_field_names=["text embedding"],
+        vector_field_names=[field_1, field_2],
+        weights=[weight],
     )
-    assert multi_vector_query.query == "<raw text here>"
 
-    # vector field name does not need to be in a list if only one is provided
-    multivector_query = MultiVectorQuery(
-        vectors=[sample_vector_2, sample_vector_3], vector_field_names="text embedding"
+    assert (
+        str(multi_vector_query)
+        == f"@{field_1}:[VECTOR_RANGE 2.0 $vector_0]=>{{$YIELD_DISTANCE_AS: distance_0}} | @{field_2}:[VECTOR_RANGE 2.0 $vector_1]=>{{$YIELD_DISTANCE_AS: distance_1}} SCORER TFIDF DIALECT 2 APPLY (2 - @distance_0)/2 AS score_0 APPLY (2 - @distance_1)/2 AS score_1 APPLY @score_0 * {weight} + @score_1 * {weight} AS combined_score SORTBY 2 @combined_score DESC MAX 10"
     )
-    assert multi_vector_query.query == "<raw text here>"
 
-    # if a single weight is passed it is applied to all similarity scores
-    multivector_query = MultiVectorQuery(
+    # if a single dtype is passed it is applied to all vectors
+    multi_vector_query = MultiVectorQuery(
         vectors=[sample_vector_2, sample_vector_3],
         vector_field_names=["text embedding", "image embedding"],
-        weights=[0.2],
+        dtypes=["float16"],
     )
-    assert multi_vector_query.query == "<raw text here>"
 
-    # weight does not need to be in a list if only one is provided
-    multivector_query = MultiVectorQuery(
-        vectors=[sample_vector_2, sample_vector_3],
-        vector_field_names=["text embedding", "image embedding"],
-        weights=0.2,
+    assert multi_vector_query._dtypes == ["float16", "float16"]
+    assert (
+        str(multi_vector_query)
+        == f"@{field_1}:[VECTOR_RANGE 2.0 $vector_0]=>{{$YIELD_DISTANCE_AS: distance_0}} | @{field_2}:[VECTOR_RANGE 2.0 $vector_1]=>{{$YIELD_DISTANCE_AS: distance_1}} SCORER TFIDF DIALECT 2 APPLY (2 - @distance_0)/2 AS score_0 APPLY (2 - @distance_1)/2 AS score_1 APPLY @score_0 * 1.0 + @score_1 * 1.0 AS combined_score SORTBY 2 @combined_score DESC MAX 10"
     )
-    assert multi_vector_query.query == "<raw text here>"
 
 
-def test_aggregate_multi_vector_query_errors():
+def test_multi_vector_query_errors():
     # test an error is raised if the number of vectors and number of fields don't match
     with pytest.raises(ValueError):
         _ = MultiVectorQuery(

From 7c85122deea2076755613218011d3f69ba090532 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Thu, 9 Oct 2025 15:14:38 -0700
Subject: [PATCH 05/12] skips test if redis version not new enough

---
 tests/integration/test_aggregation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py
index f1ff7d0b..d4658660 100644
--- a/tests/integration/test_aggregation.py
+++ b/tests/integration/test_aggregation.py
@@ -582,4 +582,5 @@ def test_multivector_query_datatypes(index):
 
 
 def test_multivector_query_broadcasting(index):
+    skip_if_redis_version_below(index.client, "7.2.0")
     pass

From 6493304e93fd5e31a5a4f2988003d9a733410388 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Thu, 9 Oct 2025 16:06:57 -0700
Subject: [PATCH 06/12] tests hnsw multi vector indices only on supported
 search module versions

---
 tests/conftest.py                     | 83 ---------------------------
 tests/integration/test_aggregation.py | 64 ++++++++++++++++++++-
 2 files changed, 61 insertions(+), 86 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 99baf110..692ce77d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -232,89 +232,6 @@ def sample_datetimes():
     }
 
 
-@pytest.fixture
-def OG(sample_datetimes):
-    return [
-        {
-            "user": "john",
-            "age": 18,
-            "job": "engineer",
-            "description": "engineers conduct trains that ride on train tracks",
-            "last_updated": sample_datetimes["low"].timestamp(),
-            "credit_score": "high",
-            "location": "-122.4194,37.7749",
-            "user_embedding": [0.1, 0.1, 0.5],
-            "image_embedding": [0.1, 0.1, 0.1, 0.1, 0.1],
-        },
-        {
-            "user": "mary",
-            "age": 14,
-            "job": "doctor",
-            "description": "a medical professional who treats diseases and helps people stay healthy",
-            "last_updated": sample_datetimes["low"].timestamp(),
-            "credit_score": "low",
-            "location": "-122.4194,37.7749",
-            "user_embedding": [0.1, 0.1, 0.5],
-            "image_embedding": [0.1, 0.2, 0.3, 0.4, 0.5],
-        },
-        {
-            "user": "nancy",
-            "age": 94,
-            "job": "doctor",
-            "description": "a research scientist specializing in cancers and diseases of the lungs",
-            "last_updated": sample_datetimes["mid"].timestamp(),
-            "credit_score": "high",
-            "location": "-122.4194,37.7749",
-            "user_embedding": [0.7, 0.1, 0.5],
-            "image_embedding": [0.1, 0.1, 0.3, 0.3, 0.5],
-        },
-        {
-            "user": "tyler",
-            "age": 100,
-            "job": "engineer",
-            "description": "a software developer with expertise in mathematics and computer science",
-            "last_updated": sample_datetimes["mid"].timestamp(),
-            "credit_score": "high",
-            "location": "-110.0839,37.3861",
-            "user_embedding": [0.1, 0.4, 0.5],
-            "image_embedding": [-0.1, -0.2, -0.3, -0.4, -0.5],
-        },
-        {
-            "user": "tim",
-            "age": 12,
-            "job": "dermatologist",
-            "description": "a medical professional specializing in diseases of the skin",
-            "last_updated": sample_datetimes["mid"].timestamp(),
-            "credit_score": "high",
-            "location": "-110.0839,37.3861",
-            "user_embedding": [0.4, 0.4, 0.5],
-            "image_embedding": [-0.1, 0.0, 0.6, 0.0, -0.9],
-        },
-        {
-            "user": "taimur",
-            "age": 15,
-            "job": "CEO",
-            "description": "high stress, but financially rewarding position at the head of a company",
-            "last_updated": sample_datetimes["high"].timestamp(),
-            "credit_score": "low",
-            "location": "-110.0839,37.3861",
-            "user_embedding": [0.6, 0.1, 0.5],
-            "image_embedding": [1.1, 1.2, -0.3, -4.1, 5.0],
-        },
-        {
-            "user": "joe",
-            "age": 35,
-            "job": "dentist",
-            "description": "like the tooth fairy because they'll take your teeth, but you have to pay them!",
-            "last_updated": sample_datetimes["high"].timestamp(),
-            "credit_score": "medium",
-            "location": "-110.0839,37.3861",
-            "user_embedding": [-0.1, -0.1, -0.5],
-            "image_embedding": [-0.8, 2.0, 3.1, 1.5, -1.6],
-        },
-    ]
-
-
 @pytest.fixture
 def sample_data(sample_datetimes):
     return [
diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py
index d4658660..32782457 100644
--- a/tests/integration/test_aggregation.py
+++ b/tests/integration/test_aggregation.py
@@ -50,7 +50,7 @@ def index(multi_vector_data, redis_url, worker_id):
                     "attrs": {
                         "dims": 6,
                         "distance_metric": "cosine",
-                        "algorithm": "hnsw",
+                        "algorithm": "flat",
                         "datatype": "bfloat16",
                     },
                 },
@@ -581,6 +581,64 @@ def test_multivector_query_datatypes(index):
         )
 
 
-def test_multivector_query_broadcasting(index):
+def test_multivector_query_mixed_index(index):
+    # test that we can do multi vector queries on indices with both a 'flat' and 'hnsw' index
     skip_if_redis_version_below(index.client, "7.2.0")
-    pass
+    try:
+        index.schema.remove_field("audio_embedding")
+        index.schema.add_field(
+            {
+                "name": "audio_embedding",
+                "type": "vector",
+                "attrs": {
+                    "dims": 6,
+                    "distance_metric": "cosine",
+                    "algorithm": "hnsw",
+                    "datatype": "bfloat16",
+                },
+            },
+        )
+
+    except:
+        pytest.skip("Required Redis modules not available or version too low")
+
+    vectors = [[0.1, 0.2, 0.5], [1.2, 0.3, -0.4, 0.7, 0.2, -0.3]]
+    vector_fields = ["user_embedding", "audio_embedding"]
+    return_fields = [
+        "distance_0",
+        "distance_1",
+        "score_0",
+        "score_1",
+        "user_embedding",
+        "audio_embedding",
+    ]
+
+    # changing the weights does indeed change the result order
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        dtypes=["float32", "bfloat16"],
+    )
+    results = index.query(multi_query)
+
+    for i in range(1, len(results)):
+        assert results[i]["combined_score"] <= results[i - 1]["combined_score"]
+
+    # verify we're doing the combined score math correctly
+    weights = [-1.322, 0.851]
+    multi_query = MultiVectorQuery(
+        vectors=vectors,
+        vector_field_names=vector_fields,
+        return_fields=return_fields,
+        dtypes=["float32", "bfloat16"],
+        weights=weights,
+    )
+
+    results = index.query(multi_query)
+    assert results
+    for r in results:
+        score = float(r["score_0"]) * weights[0] + float(r["score_1"]) * weights[1]
+        assert (
+            float(r["combined_score"]) - score <= 0.0001
+        )  # allow for small floating point error

From 406f4202e5fb739a9251322e3ed5e866983de2de Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Thu, 9 Oct 2025 17:02:53 -0700
Subject: [PATCH 07/12] switches test to float64

---
 tests/integration/test_aggregation.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py
index 32782457..38f1f5fb 100644
--- a/tests/integration/test_aggregation.py
+++ b/tests/integration/test_aggregation.py
@@ -51,7 +51,7 @@ def index(multi_vector_data, redis_url, worker_id):
                         "dims": 6,
                         "distance_metric": "cosine",
                         "algorithm": "flat",
-                        "datatype": "bfloat16",
+                        "datatype": "float64",
                     },
                 },
             ],
@@ -68,7 +68,7 @@ def hash_preprocess(item: dict) -> dict:
             **item,
             "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
             "image_embedding": array_to_buffer(item["image_embedding"], "float32"),
-            "audio_embedding": array_to_buffer(item["audio_embedding"], "bfloat16"),
+            "audio_embedding": array_to_buffer(item["audio_embedding"], "float64"),
         }
 
     ### TODO get sample data that has two vector fields
@@ -546,7 +546,7 @@ def test_multivector_query_datatypes(index):
         vectors=vectors,
         vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "bfloat16"],
+        dtypes=["float32", "float64"],
     )
     results = index.query(multi_query)
 
@@ -559,7 +559,7 @@ def test_multivector_query_datatypes(index):
         vectors=vectors,
         vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "bfloat16"],
+        dtypes=["float32", "float64"],
         weights=weights,
     )
 
@@ -594,7 +594,7 @@ def test_multivector_query_mixed_index(index):
                     "dims": 6,
                     "distance_metric": "cosine",
                     "algorithm": "hnsw",
-                    "datatype": "bfloat16",
+                    "datatype": "float64",
                 },
             },
         )
@@ -618,7 +618,7 @@ def test_multivector_query_mixed_index(index):
         vectors=vectors,
         vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "bfloat16"],
+        dtypes=["float32", "float64"],
     )
     results = index.query(multi_query)
 
@@ -631,7 +631,7 @@ def test_multivector_query_mixed_index(index):
         vectors=vectors,
         vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "bfloat16"],
+        dtypes=["float32", "float64"],
         weights=weights,
     )
 

From e23b652c974a8c97669eeddc2dab2814422873db Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Fri, 10 Oct 2025 17:45:08 -0700
Subject: [PATCH 08/12] refactors MultiVectorQuery to accept Vector objects

---
 redisvl/query/__init__.py             |   8 +-
 redisvl/query/aggregate.py            | 126 ++++++++++++------------
 tests/integration/test_aggregation.py | 126 ++++++++++++------------
 tests/unit/test_aggregation_types.py  | 136 ++++++++++++--------------
 4 files changed, 193 insertions(+), 203 deletions(-)

diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py
index 67c29d2b..8cae93b2 100644
--- a/redisvl/query/__init__.py
+++ b/redisvl/query/__init__.py
@@ -1,4 +1,9 @@
-from redisvl.query.aggregate import AggregationQuery, HybridQuery, MultiVectorQuery
+from redisvl.query.aggregate import (
+    AggregationQuery,
+    HybridQuery,
+    MultiVectorQuery,
+    Vector,
+)
 from redisvl.query.query import (
     BaseQuery,
     BaseVectorQuery,
@@ -22,4 +27,5 @@
     "AggregationQuery",
     "HybridQuery",
     "MultiVectorQuery",
+    "Vector",
 ]
diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py
index d0a4273b..d3e89a25 100644
--- a/redisvl/query/aggregate.py
+++ b/redisvl/query/aggregate.py
@@ -1,9 +1,11 @@
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
+from pydantic import BaseModel, field_validator
 from redis.commands.search.aggregation import AggregateRequest, Desc
 
 from redisvl.query.filter import FilterExpression
 from redisvl.redis.utils import array_to_buffer
+from redisvl.schema.fields import VectorDataType
 from redisvl.utils.token_escaper import TokenEscaper
 from redisvl.utils.utils import lazy_import
 
@@ -11,6 +13,29 @@
 nltk_stopwords = lazy_import("nltk.corpus.stopwords")
 
 
+class Vector(BaseModel):
+    """
+    Simple object containing the necessary arguments to perform a multi vector query.
+    """
+
+    vector: Union[List[float], bytes]
+    field_name: str
+    dtype: str = "float32"
+    weight: float = 1.0
+
+    @field_validator("dtype")
+    @classmethod
+    def validate_dtype(cls, dtype: str) -> str:
+        try:
+            VectorDataType(dtype.upper())
+        except ValueError:
+            raise ValueError(
+                f"Invalid data type: {dtype}. Supported types are: {[t.lower() for t in VectorDataType]}"
+            )
+
+        return dtype
+
+
 class AggregationQuery(AggregateRequest):
     """
     Base class for aggregation queries used to create aggregation queries for Redis.
@@ -241,17 +266,33 @@ class MultiVectorQuery(AggregationQuery):
 
     .. code-block:: python
 
-        from redisvl.query import MultiVectorQuery
+        from redisvl.query import MultiVectorQuery, Vector
         from redisvl.index import SearchIndex
 
         index = SearchIndex.from_yaml("path/to/index.yaml")
 
+        vector_1 = Vector(
+            vector=[0.1, 0.2, 0.3],
+            field_name="text_vector",
+            dtype="float32",
+            weight=0.7,
+        )
+        vector_2 = Vector(
+            vector=[0.5, 0.5],
+            field_name="image_vector",
+            dtype="bfloat16",
+            weight=0.2,
+        )
+        vector_3 = Vector(
+            vector=[0.1, 0.2, 0.3],
+            field_name="text_vector",
+            dtype="float64",
+            weight=0.5,
+        )
+
         query = MultiVectorQuery(
-            vectors=[[0.1, 0.2, 0.3], [0.5, 0.5], [0.1, 0.1, 0.1, 0.1]],
-            vector_field_names=["text_vector", "image_vector", "feature_vector"]
+            vectors=[vector_1, vector_2, vector_3],
             filter_expression=None,
-            weights=[0.7, 0.2, 0.5],
-            dtypes=["float32", "bfloat16", "float64"],
             num_results=10,
             return_fields=["field1", "field2"],
             dialect=2,
@@ -260,14 +301,13 @@ class MultiVectorQuery(AggregationQuery):
         results = index.query(query)
     """
 
+    _vectors: List[Vector]
+
     def __init__(
         self,
-        vectors: Union[bytes, List[bytes], List[float], List[List[float]]],
-        vector_field_names: Union[str, List[str]],
-        weights: List[float] = [1.0],
+        vectors: Union[Vector, List[Vector]],
         return_fields: Optional[List[str]] = None,
         filter_expression: Optional[Union[str, FilterExpression]] = None,
-        dtypes: List[str] = ["float32"],
         num_results: int = 10,
         return_score: bool = False,
         dialect: int = 2,
@@ -276,87 +316,39 @@ def __init__(
         Instantiates a MultiVectorQuery object.
 
         Args:
-            vectors (Union[bytes, List[bytes], List[float], List[List[float]]): The vectors to perform vector similarity search.
-            vector_field_names (Union[str, List[str]]): The vector field names to search in.
-            weights (List[float]): The weights of the vector similarity.
-                Documents will be scored as:
-                score = (w1) * score1 + (w2) * score2 + (w3) * score3 + ...
-                Defaults to [1.0], which corresponds to equal weighting
+            vectors (Union[Vector, List[Vector]]): The Vectors to perform vector similarity search.
             return_fields (Optional[List[str]], optional): The fields to return. Defaults to None.
             filter_expression (Optional[Union[str, FilterExpression]]): The filter expression to use.
                 Defaults to None.
-            dtypes (List[str]): The data types of the vectors. Defaults to ["float32"] for all vectors.
             num_results (int, optional): The number of results to return. Defaults to 10.
             return_score (bool): Whether to return the combined vector similarity score.
                 Defaults to False.
             dialect (int, optional): The Redis dialect version. Defaults to 2.
-
-        Raises:
-            ValueError: The number of vectors, vector field names, and weights do not agree.
         """
 
         self._filter_expression = filter_expression
-        self._dtypes = dtypes
         self._num_results = num_results
 
-        if any([len(x) == 0 for x in [vectors, vector_field_names, weights, dtypes]]):
-            raise ValueError(
-                f"""The number of vectors and vector field names must be equal.
-                    If weights or dtypes are specified their number must match the number of vectors and vector field names also.
-                    Length of vectors list: {len(vectors) = }
-                    Length of vector_field_names list: {len(vector_field_names) = }
-                    Length of weights list: {len(weights) = }
-                    length of dtypes list: {len(dtypes) = }
-                    """
-            )
-
-        if isinstance(vectors, bytes) or isinstance(vectors[0], float):
+        if isinstance(vectors, Vector):
             self._vectors = [vectors]
         else:
             self._vectors = vectors  # type: ignore
 
-        if isinstance(vector_field_names, str):
-            self._vector_field_names = [vector_field_names]
-        else:
-            self._vector_field_names = vector_field_names
-
-        if len(weights) == 1:
-            self._weights = weights * len(vectors)
-        else:
-            self._weights = weights
-
-        if len(dtypes) == 1:
-            self._dtypes = dtypes * len(vectors)
-        else:
-            self._dtypes = dtypes
-
-        num_vectors = len(self._vectors)
-        if any(
-            [
-                len(x) != num_vectors  # type: ignore
-                for x in [self._vector_field_names, self._weights, self._dtypes]
-            ]
-        ):
-            raise ValueError(
-                f"""The number of vectors and vector field names must be equal.
-                    If weights or dtypes are specified their number must match the number of vectors and vector field names also.
-                    Length of vectors list: {len(self._vectors) = }
-                    Length of vector_field_names list: {len(self._vector_field_names) = }
-                    Length of weights list: {len(self._weights) = }
-                    Length of dtypes list: {len(self._dtypes) = }
-                    """
+        if not all([isinstance(v, Vector) for v in self._vectors]):
+            raise TypeError(
+                "vector arugment must be a Vector object or list of Vector objects."
             )
 
         query_string = self._build_query_string()
         super().__init__(query_string)
 
         # calculate the respective vector similarities
-        for i in range(len(vectors)):
+        for i in range(len(self._vectors)):
             self.apply(**{f"score_{i}": f"(2 - @distance_{i})/2"})
 
         # construct the scoring string based on the vector similarity scores and weights
         combined_scores = []
-        for i, w in enumerate(self._weights):
+        for i, w in enumerate([v.weight for v in self._vectors]):
             combined_scores.append(f"@score_{i} * {w}")
         combined_score_string = " + ".join(combined_scores)
 
@@ -375,7 +367,9 @@ def params(self) -> Dict[str, Any]:
             Dict[str, Any]: The parameters for the aggregation.
         """
         params = {}
-        for i, (vector, dtype) in enumerate(zip(self._vectors, self._dtypes)):
+        for i, (vector, dtype) in enumerate(
+            [(v.vector, v.dtype) for v in self._vectors]
+        ):
             if isinstance(vector, list):
                 vector = array_to_buffer(vector, dtype=dtype)  # type: ignore
             params[f"vector_{i}"] = vector
@@ -387,7 +381,7 @@ def _build_query_string(self) -> str:
         # base KNN query
         range_queries = []
         for i, (vector, field) in enumerate(
-            zip(self._vectors, self._vector_field_names)
+            [(v.vector, v.field_name) for v in self._vectors]
         ):
             range_queries.append(
                 f"@{field}:[VECTOR_RANGE 2.0 $vector_{i}]=>{{$YIELD_DISTANCE_AS: distance_{i}}}"
diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py
index 38f1f5fb..f08815a6 100644
--- a/tests/integration/test_aggregation.py
+++ b/tests/integration/test_aggregation.py
@@ -1,7 +1,7 @@
 import pytest
 
 from redisvl.index import SearchIndex
-from redisvl.query import HybridQuery, MultiVectorQuery
+from redisvl.query import HybridQuery, MultiVectorQuery, Vector
 from redisvl.query.filter import FilterExpression, Geo, GeoRadius, Num, Tag, Text
 from redisvl.redis.utils import array_to_buffer
 from tests.conftest import skip_if_redis_version_below
@@ -71,7 +71,6 @@ def hash_preprocess(item: dict) -> dict:
             "audio_embedding": array_to_buffer(item["audio_embedding"], "float64"),
         }
 
-    ### TODO get sample data that has two vector fields
     index.load(multi_vector_data, preprocess=hash_preprocess)
 
     # run the test
@@ -321,13 +320,16 @@ def test_hybrid_query_with_text_filter(index):
 def test_multivector_query(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
-    vectors = [[0.1, 0.1, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
+    vector_vals = [[0.1, 0.1, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
     vector_fields = ["user_embedding", "image_embedding"]
+    vectors = []
+    for vector, field in zip(vector_vals, vector_fields):
+        vectors.append(Vector(vector=vector, field_name=field))
+
     return_fields = ["user", "credit_score", "age", "job", "location", "description"]
 
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
     )
 
@@ -351,7 +353,6 @@ def test_multivector_query(index):
 
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         num_results=3,
     )
 
@@ -368,14 +369,17 @@ def test_multivector_query_with_filter(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
     text_field = "description"
-    vectors = [[0.1, 0.1, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
+    vector_vals = [[0.1, 0.1, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
     vector_fields = ["user_embedding", "image_embedding"]
     filter_expression = Text(text_field) == ("medical")
 
+    vectors = []
+    for vector, field in zip(vector_vals, vector_fields):
+        vectors.append(Vector(vector=vector, field_name=field))
+
     # make sure we can still apply filters to the same text field we are querying
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         filter_expression=filter_expression,
         return_fields=["job", "description"],
     )
@@ -390,7 +394,6 @@ def test_multivector_query_with_filter(index):
     )
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         filter_expression=filter_expression,
         return_fields=["description"],
     )
@@ -404,7 +407,6 @@ def test_multivector_query_with_filter(index):
     filter_expression = (Num("age") > 30) & ((Num("age") < 30))
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         filter_expression=filter_expression,
         return_fields=["description"],
     )
@@ -416,14 +418,17 @@ def test_multivector_query_with_filter(index):
 def test_multivector_query_with_geo_filter(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
-    vectors = [[0.2, 0.4, 0.1], [0.1, 0.8, 0.3, -0.2, 0.3]]
+    vector_vals = [[0.2, 0.4, 0.1], [0.1, 0.8, 0.3, -0.2, 0.3]]
     vector_fields = ["user_embedding", "image_embedding"]
     return_fields = ["user", "credit_score", "age", "job", "location", "description"]
     filter_expression = Geo("location") == GeoRadius(-122.4194, 37.7749, 1000, "m")
 
+    vectors = []
+    for vector, field in zip(vector_vals, vector_fields):
+        vectors.append(Vector(vector=vector, field_name=field))
+
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         filter_expression=filter_expression,
         return_fields=return_fields,
     )
@@ -435,11 +440,9 @@ def test_multivector_query_with_geo_filter(index):
 
 
 def test_multivector_query_weights(index):
-    skip_if_redis_version_below(
-        index.client, "7.2.0"
-    )  ## TODO figure out min version for 'case()'
+    skip_if_redis_version_below(index.client, "7.2.0")
 
-    vectors = [[0.1, 0.2, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
+    vector_vals = [[0.1, 0.2, 0.5], [0.3, 0.4, 0.7, 0.2, -0.3]]
     vector_fields = ["user_embedding", "image_embedding"]
     return_fields = [
         "distance_0",
@@ -450,20 +453,25 @@ def test_multivector_query_weights(index):
         "image_embedding",
     ]
 
+    vectors = []
+    for vector, field in zip(vector_vals, vector_fields):
+        vectors.append(Vector(vector=vector, field_name=field))
+
     # changing the weights does indeed change the result order
     multi_query_1 = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        weights=[0.2, 0.9],
     )
     results_1 = index.query(multi_query_1)
 
+    weights = [0.2, 0.9]
+    vectors = []
+    for vector, field, weight in zip(vector_vals, vector_fields, weights):
+        vectors.append(Vector(vector=vector, field_name=field, weight=weight))
+
     multi_query_2 = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        weights=[0.5, 0.1],
     )
     results_2 = index.query(multi_query_2)
 
@@ -477,11 +485,13 @@ def test_multivector_query_weights(index):
 
     # weights can be negative, 0.0, or greater than 1.0
     weights = [-5.2, 0.0]
+    vectors = []
+    for vector, field, weight in zip(vector_vals, vector_fields, weights):
+        vectors.append(Vector(vector=vector, field_name=field, weight=weight))
+
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        weights=weights,
     )
 
     results = index.query(multi_query)
@@ -494,11 +504,13 @@ def test_multivector_query_weights(index):
 
     # verify we're doing the combined score math correctly
     weights = [-1.322, 0.851]
+    vectors = []
+    for vector, field, weight in zip(vector_vals, vector_fields, weights):
+        vectors.append(Vector(vector=vector, field_name=field, weight=weight))
+
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        weights=weights,
     )
 
     results = index.query(multi_query)
@@ -509,29 +521,13 @@ def test_multivector_query_weights(index):
             float(r["combined_score"]) - score <= 0.0001
         )  # allow for small floating point error
 
-    # raise error if wrong number of weights are passed
-    with pytest.raises(ValueError):
-        _ = MultiVectorQuery(
-            vectors=vectors,
-            vector_field_names=vector_fields,
-            return_fields=return_fields,
-            weights=[],
-        )
-
-    with pytest.raises(ValueError):
-        _ = MultiVectorQuery(
-            vectors=vectors,
-            vector_field_names=vector_fields,
-            return_fields=return_fields,
-            weights=[1.2, 0.23, 0.52],
-        )
-
 
 def test_multivector_query_datatypes(index):
     skip_if_redis_version_below(index.client, "7.2.0")
 
-    vectors = [[0.1, 0.2, 0.5], [1.2, 0.3, -0.4, 0.7, 0.2, -0.3]]
+    vector_vals = [[0.1, 0.2, 0.5], [1.2, 0.3, -0.4, 0.7, 0.2, -0.3]]
     vector_fields = ["user_embedding", "audio_embedding"]
+    dtypes = ["float32", "float64"]
     return_fields = [
         "distance_0",
         "distance_1",
@@ -541,12 +537,13 @@ def test_multivector_query_datatypes(index):
         "audio_embedding",
     ]
 
-    # changing the weights does indeed change the result order
+    vectors = []
+    for vector, field, dtype in zip(vector_vals, vector_fields, dtypes):
+        vectors.append(Vector(vector=vector, field_name=field, dtype=dtype))
+
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "float64"],
     )
     results = index.query(multi_query)
 
@@ -555,12 +552,17 @@ def test_multivector_query_datatypes(index):
 
     # verify we're doing the combined score math correctly
     weights = [-1.322, 0.851]
+    vectors = []
+    for vector, field, weight, dtype in zip(
+        vector_vals, vector_fields, weights, dtypes
+    ):
+        vectors.append(
+            Vector(vector=vector, field_name=field, weight=weight, dtype=dtype)
+        )
+
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "float64"],
-        weights=weights,
     )
 
     results = index.query(multi_query)
@@ -571,15 +573,6 @@ def test_multivector_query_datatypes(index):
             float(r["combined_score"]) - score <= 0.0001
         )  # allow for small floating point error
 
-    # raise error if wrong number of datatypes are passed
-    with pytest.raises(ValueError):
-        _ = MultiVectorQuery(
-            vectors=vectors,
-            vector_field_names=vector_fields,
-            return_fields=return_fields,
-            dtypes=["float32", "float32", "float64"],
-        )
-
 
 def test_multivector_query_mixed_index(index):
     # test that we can do multi vector queries on indices with both a 'flat' and 'hnsw' index
@@ -602,8 +595,9 @@ def test_multivector_query_mixed_index(index):
     except:
         pytest.skip("Required Redis modules not available or version too low")
 
-    vectors = [[0.1, 0.2, 0.5], [1.2, 0.3, -0.4, 0.7, 0.2, -0.3]]
+    vector_vals = [[0.1, 0.2, 0.5], [1.2, 0.3, -0.4, 0.7, 0.2, -0.3]]
     vector_fields = ["user_embedding", "audio_embedding"]
+    dtypes = ["float32", "float64"]
     return_fields = [
         "distance_0",
         "distance_1",
@@ -613,12 +607,13 @@ def test_multivector_query_mixed_index(index):
         "audio_embedding",
     ]
 
-    # changing the weights does indeed change the result order
+    vectors = []
+    for vector, field, dtype in zip(vector_vals, vector_fields, dtypes):
+        vectors.append(Vector(vector=vector, field_name=field, dtype=dtype))
+
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "float64"],
     )
     results = index.query(multi_query)
 
@@ -627,12 +622,17 @@ def test_multivector_query_mixed_index(index):
 
     # verify we're doing the combined score math correctly
     weights = [-1.322, 0.851]
+    vectors = []
+    for vector, field, dtype, weight in zip(
+        vector_vals, vector_fields, dtypes, weights
+    ):
+        vectors.append(
+            Vector(vector=vector, field_name=field, dtype=dtype, weight=weight)
+        )
+
     multi_query = MultiVectorQuery(
         vectors=vectors,
-        vector_field_names=vector_fields,
         return_fields=return_fields,
-        dtypes=["float32", "float64"],
-        weights=weights,
     )
 
     results = index.query(multi_query)
diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py
index 442a338c..4d3b18e2 100644
--- a/tests/unit/test_aggregation_types.py
+++ b/tests/unit/test_aggregation_types.py
@@ -4,7 +4,7 @@
 from redis.commands.search.result import Result
 
 from redisvl.index.index import process_results
-from redisvl.query.aggregate import HybridQuery, MultiVectorQuery
+from redisvl.query.aggregate import HybridQuery, MultiVectorQuery, Vector
 from redisvl.query.filter import Tag
 
 # Sample data for testing
@@ -197,130 +197,120 @@ def test_hybrid_query_with_string_filter():
 
 
 def test_multi_vector_query():
-    # test we require vectors and field names
+    # test we require Vector objects
     with pytest.raises(TypeError):
         _ = MultiVectorQuery()
 
-    with pytest.raises(ValueError):
-        _ = MultiVectorQuery(vectors=[sample_vector], vector_field_names=[])
+    with pytest.raises(TypeError):
+        _ = MultiVectorQuery(vector=[sample_vector])
 
-    with pytest.raises(ValueError):
-        _ = MultiVectorQuery(vectors=[], vector_field_names=["field_1"])
+    with pytest.raises(TypeError):
+        _ = MultiVectorQuery(vectors=[[0.1, 0.1, 0.1], "field_1"])
 
     # test we can initialize with a single vector and single field name
     multivector_query = MultiVectorQuery(
-        vectors=[sample_vector], vector_field_names=["field_1"]
+        Vector(vector=sample_vector, field_name="field_1")
     )
 
     # check default properties
-    assert multivector_query._vectors == [sample_vector]
-    assert multivector_query._vector_field_names == ["field_1"]
+    assert multivector_query._vectors == [
+        Vector(vector=sample_vector, field_name="field_1")
+    ]
+    assert multivector_query._vectors[0].field_name == "field_1"
+    assert multivector_query._vectors[0].weight == 1.0
+    assert multivector_query._vectors[0].dtype == "float32"
     assert multivector_query._filter_expression == None
-    assert multivector_query._weights == [1.0]
     assert multivector_query._num_results == 10
     assert multivector_query._loadfields == []
     assert multivector_query._dialect == 2
 
-    # test we can initialize with mutliple vectors and field names
-    multivector_query = MultiVectorQuery(
-        vectors=[sample_vector, sample_vector_2, sample_vector_3, sample_vector_4],
-        vector_field_names=["field_1", "field_2", "field_3", "field_4"],
-        weights=[0.2, 0.5, 0.6, 0.1],
-        dtypes=["float32", "float32", "float32", "float32"],
-    )
+    # test we can initialize with mutliple Vectors
+    vectors = [sample_vector, sample_vector_2, sample_vector_3, sample_vector_4]
+    vector_field_names = ["field_1", "field_2", "field_3", "field_4"]
+    weights = [0.2, 0.5, 0.6, 0.1]
+    dtypes = ["float32", "float32", "float32", "float32"]
+
+    args = []
+    for vec, field, weight, dtype in zip(vectors, vector_field_names, weights, dtypes):
+        args.append(Vector(vector=vec, field_name=field, weight=weight, dtype=dtype))
+
+    multivector_query = MultiVectorQuery(vectors=args)
 
     assert len(multivector_query._vectors) == 4
-    assert len(multivector_query._vector_field_names) == 4
-    assert len(multivector_query._weights) == 4
-    assert len(multivector_query._dtypes) == 4
+    assert multivector_query._vectors == args
 
     # test defaults can be overwritten
     filter_expression = Tag("user group") == ["group A", "group C"]
+
     multivector_query = MultiVectorQuery(
-        vectors=[sample_vector, sample_vector_2, sample_vector_3, sample_vector_4],
-        vector_field_names=["field_1", "field_2", "field_3", "field_4"],
+        vectors=args,
         filter_expression=filter_expression,
-        weights=[0.2, 0.5, 0.6, 0.1],
-        dtypes=["float32", "float32", "float64", "bfloat16"],
         num_results=5,
         return_fields=["field_1", "user name", "address"],
         dialect=4,
     )
 
-    assert multivector_query._vectors == [
-        sample_vector,
-        sample_vector_2,
-        sample_vector_3,
-        sample_vector_4,
-    ]
-    assert multivector_query._vector_field_names == [
-        "field_1",
-        "field_2",
-        "field_3",
-        "field_4",
-    ]
-    assert multivector_query._weights == [0.2, 0.5, 0.6, 0.1]
     assert multivector_query._filter_expression == filter_expression
     assert multivector_query._num_results == 5
     assert multivector_query._loadfields == ["field_1", "user name", "address"]
     assert multivector_query._dialect == 4
 
 
-def test_multi_vector_query_broadcasting():
+def test_multi_vector_query_string():
     # if a single weight is passed it is applied to all similarity scores
     field_1 = "text embedding"
     field_2 = "image embedding"
-    weight = 0.2
+    weight_1 = 0.2
+    weight_2 = 0.7
     multi_vector_query = MultiVectorQuery(
-        vectors=[sample_vector_2, sample_vector_3],
-        vector_field_names=[field_1, field_2],
-        weights=[weight],
+        vectors=[
+            Vector(vector=sample_vector_2, field_name=field_1, weight=weight_1),
+            Vector(vector=sample_vector_3, field_name=field_2, weight=weight_2),
+        ]
     )
 
     assert (
         str(multi_vector_query)
-        == f"@{field_1}:[VECTOR_RANGE 2.0 $vector_0]=>{{$YIELD_DISTANCE_AS: distance_0}} | @{field_2}:[VECTOR_RANGE 2.0 $vector_1]=>{{$YIELD_DISTANCE_AS: distance_1}} SCORER TFIDF DIALECT 2 APPLY (2 - @distance_0)/2 AS score_0 APPLY (2 - @distance_1)/2 AS score_1 APPLY @score_0 * {weight} + @score_1 * {weight} AS combined_score SORTBY 2 @combined_score DESC MAX 10"
+        == f"@{field_1}:[VECTOR_RANGE 2.0 $vector_0]=>{{$YIELD_DISTANCE_AS: distance_0}} | @{field_2}:[VECTOR_RANGE 2.0 $vector_1]=>{{$YIELD_DISTANCE_AS: distance_1}} SCORER TFIDF DIALECT 2 APPLY (2 - @distance_0)/2 AS score_0 APPLY (2 - @distance_1)/2 AS score_1 APPLY @score_0 * {weight_1} + @score_1 * {weight_2} AS combined_score SORTBY 2 @combined_score DESC MAX 10"
     )
 
-    # if a single dtype is passed it is applied to all vectors
-    multi_vector_query = MultiVectorQuery(
-        vectors=[sample_vector_2, sample_vector_3],
-        vector_field_names=["text embedding", "image embedding"],
-        dtypes=["float16"],
-    )
-
-    assert multi_vector_query._dtypes == ["float16", "float16"]
-    assert (
-        str(multi_vector_query)
-        == f"@{field_1}:[VECTOR_RANGE 2.0 $vector_0]=>{{$YIELD_DISTANCE_AS: distance_0}} | @{field_2}:[VECTOR_RANGE 2.0 $vector_1]=>{{$YIELD_DISTANCE_AS: distance_1}} SCORER TFIDF DIALECT 2 APPLY (2 - @distance_0)/2 AS score_0 APPLY (2 - @distance_1)/2 AS score_1 APPLY @score_0 * 1.0 + @score_1 * 1.0 AS combined_score SORTBY 2 @combined_score DESC MAX 10"
-    )
 
+def test_vector_object_validation():
+    # test an error is raised if none of the field names are present
+    with pytest.raises(ValueError):
+        _ = Vector()
 
-def test_multi_vector_query_errors():
-    # test an error is raised if the number of vectors and number of fields don't match
     with pytest.raises(ValueError):
-        _ = MultiVectorQuery(
-            vectors=[sample_vector, sample_vector_2, sample_vector_3],
-            vector_field_names=["text embedding", "image embedding"],
+        _ = Vector(
+            vector=[],
+            field_name=[],
         )
 
+    # test an error is raised if the type of vector or fields are incorrect
+    # no list of list of floats
     with pytest.raises(ValueError):
-        _ = MultiVectorQuery(
-            vectors=[sample_vector, sample_vector_2],
-            vector_field_names=["text embedding", "image embedding", "features"],
+        _ = Vector(
+            vector=[sample_vector, sample_vector_2, sample_vector_3],
+            field_name="text embedding",
         )
 
-    # test an error is raised if the number of weights is incorrect
+    # no list as field name
     with pytest.raises(ValueError):
-        _ = MultiVectorQuery(
-            vectors=[sample_vector, sample_vector_2],
-            vector_field_names=["text embedding", "image embedding"],
-            weights=[0.1, 0.2, 0.3],
+        _ = Vector(
+            vector=sample_vector,
+            field_name=["text embedding", "image embedding", "features"],
         )
 
-    # test an error is raised if none of the field names are present
+    # dtype must be one of the supported values
     with pytest.raises(ValueError):
-        _ = MultiVectorQuery(
-            vectors=[],
-            vector_field_names=[],
-        )
+        _ = Vector(vector=sample_vector, field_name="text embedding", dtype="float")
+
+    with pytest.raises(ValueError):
+        _ = Vector(vector=sample_vector, field_name="text embedding", dtype="normal")
+
+    with pytest.raises(ValueError):
+        _ = Vector(vector=sample_vector, field_name="text embedding", dtype="")
+
+    for dtype in ["bfloat16", "float16", "float32", "float64", "int8", "uint8"]:
+        vec = Vector(vector=sample_vector, field_name="text embedding", dtype=dtype)
+        assert isinstance(vec, Vector)

From 2d58f6a3c06e1425c3b40673a00055de04ffa4d5 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Tue, 14 Oct 2025 14:35:28 -0700
Subject: [PATCH 09/12] updates sphynx docs to include Vector class

---
 docs/api/index.md   |  1 +
 docs/api/query.rst  | 14 ++++++++++++++
 docs/api/vector.rst | 17 +++++++++++++++++
 3 files changed, 32 insertions(+)
 create mode 100644 docs/api/vector.rst

diff --git a/docs/api/index.md b/docs/api/index.md
index 5b7b6261..f7c1c661 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -15,6 +15,7 @@ Reference documentation for the RedisVL API.
 
 schema
 searchindex
+vector
 query
 filter
 vectorizer
diff --git a/docs/api/query.rst b/docs/api/query.rst
index fa92230e..c2ba04f9 100644
--- a/docs/api/query.rst
+++ b/docs/api/query.rst
@@ -88,3 +88,17 @@ CountQuery
    :inherited-members:
    :show-inheritance:
    :exclude-members: add_filter,get_args,highlight,return_field,summarize
+
+
+
+MultiVectorQuery
+==========
+
+.. currentmodule:: redisvl.query
+
+
+.. autoclass:: MultiVectorQuery
+   :members:
+   :inherited-members:
+   :show-inheritance:
+   :exclude-members: add_filter,get_args,highlight,return_field,summarize
diff --git a/docs/api/vector.rst b/docs/api/vector.rst
new file mode 100644
index 00000000..9d28d9cc
--- /dev/null
+++ b/docs/api/vector.rst
@@ -0,0 +1,17 @@
+
+*****
+Vector
+*****
+
+The Vector class in RedisVL is a container that encapsulates a numerical vector, it's datatype, corresponding index field name, and optional importance weight. It is used when constructing multi-vector queries using the MultiVectorQuery class.
+
+
+Vector
+===========
+
+.. currentmodule:: redisvl.query
+
+
+.. autoclass:: Vector
+   :members:
+   :exclude-members: 

From f29c8f8d89b3f406416c5e9021977584e03f05bb Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Tue, 14 Oct 2025 16:23:54 -0700
Subject: [PATCH 10/12] fixes typo

---
 redisvl/query/aggregate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py
index d3e89a25..77f78b3f 100644
--- a/redisvl/query/aggregate.py
+++ b/redisvl/query/aggregate.py
@@ -336,7 +336,7 @@ def __init__(
 
         if not all([isinstance(v, Vector) for v in self._vectors]):
             raise TypeError(
-                "vector arugment must be a Vector object or list of Vector objects."
+                "vector argument must be a Vector object or list of Vector objects."
             )
 
         query_string = self._build_query_string()

From a544f397ac24b617f5f33f467b38432e48e0c12d Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Tue, 14 Oct 2025 16:49:00 -0700
Subject: [PATCH 11/12] removes unused paramter

---
 redisvl/query/aggregate.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py
index 77f78b3f..a3a31e05 100644
--- a/redisvl/query/aggregate.py
+++ b/redisvl/query/aggregate.py
@@ -309,7 +309,6 @@ def __init__(
         return_fields: Optional[List[str]] = None,
         filter_expression: Optional[Union[str, FilterExpression]] = None,
         num_results: int = 10,
-        return_score: bool = False,
         dialect: int = 2,
     ):
         """
@@ -321,8 +320,6 @@ def __init__(
             filter_expression (Optional[Union[str, FilterExpression]]): The filter expression to use.
                 Defaults to None.
             num_results (int, optional): The number of results to return. Defaults to 10.
-            return_score (bool): Whether to return the combined vector similarity score.
-                Defaults to False.
             dialect (int, optional): The Redis dialect version. Defaults to 2.
         """
 

From 951b075c4caeee49ed10ebbdf2772434fb208b15 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <165097110+justin-cechmanek@users.noreply.github.com>
Date: Tue, 14 Oct 2025 16:50:43 -0700
Subject: [PATCH 12/12] fixes typo

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/unit/test_aggregation_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py
index 4d3b18e2..f2b6be86 100644
--- a/tests/unit/test_aggregation_types.py
+++ b/tests/unit/test_aggregation_types.py
@@ -224,7 +224,7 @@ def test_multi_vector_query():
     assert multivector_query._loadfields == []
     assert multivector_query._dialect == 2
 
-    # test we can initialize with mutliple Vectors
+    # test we can initialize with multiple Vectors
     vectors = [sample_vector, sample_vector_2, sample_vector_3, sample_vector_4]
     vector_field_names = ["field_1", "field_2", "field_3", "field_4"]
     weights = [0.2, 0.5, 0.6, 0.1]