feat: add role filtering to message history get_recent method (#349)

bsbodden · bsbodden · commit 81f7e85bfdfc · 2025-09-26T15:08:31.000-07:00
Add role parameter to get_recent() and get_relevant() methods in both
MessageHistory and SemanticMessageHistory classes to enable filtering
messages by role type.

Features:
- Support single role filtering: role="system"
- Support multiple role filtering: role=["system", "user"]
- Valid roles: "system", "user", "llm", "tool"
- Backward compatible: role=None returns all messages
- Works with existing parameters (top_k, session_tag, raw, etc.)
- Comprehensive validation with clear error messages

The implementation maintains full backward compatibility while enabling
users to retrieve only specific message types like system prompts.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -48,6 +48,12 @@ index = SearchIndex(schema, redis_url="redis://localhost:6379")
   token.strip().strip(",").replace(""", "").replace(""", "").lower()
   ```
 
+### Git Operations
+**CRITICAL**: NEVER use `git push` or attempt to push to remote repositories. The user will handle all git push operations.
+
+### Code Quality
+**IMPORTANT**: Always run `make format` before committing code to ensure proper formatting and linting compliance.
+
 ### README.md Maintenance
 **IMPORTANT**: DO NOT modify README.md unless explicitly requested.
 
diff --git a/redisvl/extensions/message_history/base_history.py b/redisvl/extensions/message_history/base_history.py
@@ -60,6 +60,7 @@ def get_recent(
         as_text: bool = False,
         raw: bool = False,
         session_tag: Optional[str] = None,
+        role: Optional[Union[str, List[str]]] = None,
     ) -> Union[List[str], List[Dict[str, str]]]:
         """Retrieve the recent conversation history in sequential order.
 
@@ -72,16 +73,60 @@ def get_recent(
                 prompt and response
             session_tag (str): Tag to be added to entries to link to a specific
                 conversation session. Defaults to instance ULID.
+            role (Optional[Union[str, List[str]]]): Filter messages by role(s).
+                Can be a single role string ("system", "user", "llm", "tool") or
+                a list of roles. If None, all roles are returned.
 
         Returns:
             Union[str, List[str]]: A single string transcription of the messages
                                    or list of strings if as_text is false.
 
         Raises:
-            ValueError: If top_k is not an integer greater than or equal to 0.
+            ValueError: If top_k is not an integer greater than or equal to 0,
+                or if role contains invalid values.
         """
         raise NotImplementedError
 
+    def _validate_roles(
+        self, role: Optional[Union[str, List[str]]]
+    ) -> Optional[List[str]]:
+        """Validate and normalize role parameter.
+
+        Args:
+            role: A single role string, list of roles, or None.
+
+        Returns:
+            List of valid role strings if role is provided, None otherwise.
+
+        Raises:
+            ValueError: If role contains invalid values.
+        """
+        if role is None:
+            return None
+
+        valid_roles = {"system", "user", "llm", "tool"}
+
+        # Handle single role string
+        if isinstance(role, str):
+            if role not in valid_roles:
+                raise ValueError(
+                    f"Invalid role '{role}'. Valid roles are: {valid_roles}"
+                )
+            return [role]
+
+        # Handle list of roles
+        if isinstance(role, list):
+            if not role:  # Empty list
+                raise ValueError("roles cannot be empty")
+            for r in role:
+                if r not in valid_roles:
+                    raise ValueError(
+                        f"Invalid role '{r}'. Valid roles are: {valid_roles}"
+                    )
+            return role
+
+        raise ValueError("role must be a string or list of strings")
+
     def _format_context(
         self, messages: List[Dict[str, Any]], as_text: bool
     ) -> Union[List[str], List[Dict[str, str]]]:
diff --git a/redisvl/extensions/message_history/message_history.py b/redisvl/extensions/message_history/message_history.py
@@ -119,6 +119,7 @@ def get_recent(
         as_text: bool = False,
         raw: bool = False,
         session_tag: Optional[str] = None,
+        role: Optional[Union[str, List[str]]] = None,
     ) -> Union[List[str], List[Dict[str, str]]]:
         """Retrieve the recent message history in sequential order.
 
@@ -130,17 +131,24 @@ def get_recent(
                 prompt and response.
             session_tag (Optional[str]): Tag of the entries linked to a specific
                 conversation session. Defaults to instance ULID.
+            role (Optional[Union[str, List[str]]]): Filter messages by role(s).
+                Can be a single role string ("system", "user", "llm", "tool") or
+                a list of roles. If None, all roles are returned.
 
         Returns:
             Union[str, List[str]]: A single string transcription of the messages
                 or list of strings if as_text is false.
 
         Raises:
-            ValueError: if top_k is not an integer greater than or equal to 0.
+            ValueError: if top_k is not an integer greater than or equal to 0,
+                or if role contains invalid values.
         """
         if type(top_k) != int or top_k < 0:
             raise ValueError("top_k must be an integer greater than or equal to 0")
 
+        # Validate and normalize role parameter
+        roles_to_filter = self._validate_roles(role)
+
         return_fields = [
             ID_FIELD_NAME,
             SESSION_FIELD_NAME,
@@ -157,8 +165,22 @@ def get_recent(
             else self._default_session_filter
         )
 
+        # Combine session filter with role filter if provided
+        filter_expression = session_filter
+        if roles_to_filter is not None:
+            if len(roles_to_filter) == 1:
+                role_filter = Tag(ROLE_FIELD_NAME) == roles_to_filter[0]
+            else:
+                # Multiple roles - use OR logic
+                role_filters = [Tag(ROLE_FIELD_NAME) == r for r in roles_to_filter]
+                role_filter = role_filters[0]
+                for rf in role_filters[1:]:
+                    role_filter = role_filter | rf
+
+            filter_expression = session_filter & role_filter
+
         query = FilterQuery(
-            filter_expression=session_filter,
+            filter_expression=filter_expression,
             return_fields=return_fields,
             num_results=top_k,
         )
diff --git a/redisvl/extensions/message_history/semantic_history.py b/redisvl/extensions/message_history/semantic_history.py
@@ -173,6 +173,7 @@ def get_relevant(
         session_tag: Optional[str] = None,
         raw: bool = False,
         distance_threshold: Optional[float] = None,
+        role: Optional[Union[str, List[str]]] = None,
     ) -> Union[List[str], List[Dict[str, str]]]:
         """Searches the message history for information semantically related to
         the specified prompt.
@@ -195,18 +196,25 @@ def get_relevant(
                 if no relevant context is found.
             raw (bool): Whether to return the full Redis hash entry or just the
                 message.
+            role (Optional[Union[str, List[str]]]): Filter messages by role(s).
+                Can be a single role string ("system", "user", "llm", "tool") or
+                a list of roles. If None, all roles are returned.
 
         Returns:
             Union[List[str], List[Dict[str,str]]: Either a list of strings, or a
             list of prompts and responses in JSON containing the most relevant.
 
-        Raises ValueError: if top_k is not an integer greater or equal to 0.
+        Raises ValueError: if top_k is not an integer greater or equal to 0,
+            or if role contains invalid values.
         """
         if type(top_k) != int or top_k < 0:
             raise ValueError("top_k must be an integer greater than or equal to -1")
         if top_k == 0:
             return []
 
+        # Validate and normalize role parameter
+        roles_to_filter = self._validate_roles(role)
+
         # override distance threshold
         distance_threshold = distance_threshold or self._distance_threshold
 
@@ -225,21 +233,35 @@ def get_relevant(
             else self._default_session_filter
         )
 
+        # Combine session filter with role filter if provided
+        filter_expression = session_filter
+        if roles_to_filter is not None:
+            if len(roles_to_filter) == 1:
+                role_filter = Tag(ROLE_FIELD_NAME) == roles_to_filter[0]
+            else:
+                # Multiple roles - use OR logic
+                role_filters = [Tag(ROLE_FIELD_NAME) == r for r in roles_to_filter]
+                role_filter = role_filters[0]
+                for rf in role_filters[1:]:
+                    role_filter = role_filter | rf
+
+            filter_expression = session_filter & role_filter
+
         query = RangeQuery(
             vector=self._vectorizer.embed(prompt),
             vector_field_name=MESSAGE_VECTOR_FIELD_NAME,
             return_fields=return_fields,
             distance_threshold=distance_threshold,
             num_results=top_k,
             return_score=True,
-            filter_expression=session_filter,
+            filter_expression=filter_expression,
             dtype=self._vectorizer.dtype,
         )
         messages = self._index.query(query)
 
         # if we don't find semantic matches fallback to returning recent context
         if not messages and fall_back:
-            return self.get_recent(as_text=as_text, top_k=top_k, raw=raw)
+            return self.get_recent(as_text=as_text, top_k=top_k, raw=raw, role=role)
         if raw:
             return messages
         return self._format_context(messages, as_text)
@@ -250,6 +272,7 @@ def get_recent(
         as_text: bool = False,
         raw: bool = False,
         session_tag: Optional[str] = None,
+        role: Optional[Union[str, List[str]]] = None,
     ) -> Union[List[str], List[Dict[str, str]]]:
         """Retrieve the recent message history in sequential order.
 
@@ -261,17 +284,24 @@ def get_recent(
                 prompt and response
             session_tag (Optional[str]): Tag of the entries linked to a specific
                 conversation session. Defaults to instance ULID.
+            role (Optional[Union[str, List[str]]]): Filter messages by role(s).
+                Can be a single role string ("system", "user", "llm", "tool") or
+                a list of roles. If None, all roles are returned.
 
         Returns:
             Union[str, List[str]]: A single string transcription of the session
                 or list of strings if as_text is false.
 
         Raises:
-            ValueError: if top_k is not an integer greater than or equal to 0.
+            ValueError: if top_k is not an integer greater than or equal to 0,
+                or if role contains invalid values.
         """
         if type(top_k) != int or top_k < 0:
             raise ValueError("top_k must be an integer greater than or equal to 0")
 
+        # Validate and normalize role parameter
+        roles_to_filter = self._validate_roles(role)
+
         return_fields = [
             ID_FIELD_NAME,
             SESSION_FIELD_NAME,
@@ -288,8 +318,22 @@ def get_recent(
             else self._default_session_filter
         )
 
+        # Combine session filter with role filter if provided
+        filter_expression = session_filter
+        if roles_to_filter is not None:
+            if len(roles_to_filter) == 1:
+                role_filter = Tag(ROLE_FIELD_NAME) == roles_to_filter[0]
+            else:
+                # Multiple roles - use OR logic
+                role_filters = [Tag(ROLE_FIELD_NAME) == r for r in roles_to_filter]
+                role_filter = role_filters[0]
+                for rf in role_filters[1:]:
+                    role_filter = role_filter | rf
+
+            filter_expression = session_filter & role_filter
+
         query = FilterQuery(
-            filter_expression=session_filter,
+            filter_expression=filter_expression,
             return_fields=return_fields,
             num_results=top_k,
         )
diff --git a/tests/integration/test_role_filter_get_recent.py b/tests/integration/test_role_filter_get_recent.py