From 0b36685e1b1d4cb89f16bbc8dc3fc6610f6f0eda Mon Sep 17 00:00:00 2001 From: matdev83 <211248003+matdev83@users.noreply.github.com> Date: Mon, 13 Oct 2025 00:37:28 +0200 Subject: [PATCH] Fix ZAI fallback message content normalization --- src/core/app/controllers/chat_controller.py | 68 +++++++++++++++++-- .../test_chat_controller_content.py | 36 ++++++++++ 2 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 tests/unit/core/app/controllers/test_chat_controller_content.py diff --git a/src/core/app/controllers/chat_controller.py b/src/core/app/controllers/chat_controller.py index a844e0c66..ce72be582 100644 --- a/src/core/app/controllers/chat_controller.py +++ b/src/core/app/controllers/chat_controller.py @@ -1,11 +1,9 @@ -""" -Chat Controller - -Handles all chat completion related API endpoints. -""" +"""Chat Controller handling OpenAI-compatible chat endpoints.""" import asyncio +import json import logging +from collections.abc import Sequence from typing import Any, cast from fastapi import HTTPException, Request, Response @@ -108,6 +106,62 @@ def _try_get( return TranslationService() + @staticmethod + def _coerce_message_content_to_text(content: Any) -> str: + """Flatten ChatMessage content into a plain text payload for Anthropic.""" + + if content is None: + return "" + + if isinstance(content, str): + return content + + if isinstance(content, bytes | bytearray): + return content.decode("utf-8", errors="ignore") + + if hasattr(content, "model_dump"): + try: + dumped = content.model_dump() + except Exception: # pragma: no cover - defensive + dumped = None + if dumped is not None: + return ChatController._coerce_message_content_to_text(dumped) + + if isinstance(content, dict): + text_value = content.get("text") + if isinstance(text_value, str): + return text_value + if isinstance(text_value, (bytes, bytearray)): + return text_value.decode("utf-8", errors="ignore") + + if content.get("type") == "image_url": + image_payload = content.get("image_url") + if isinstance(image_payload, dict): + url_value = image_payload.get("url") + if isinstance(url_value, str): + return url_value + + return json.dumps(content, ensure_ascii=False) + + if isinstance(content, Sequence) and not isinstance( + content, (str, bytes, bytearray) + ): + parts: list[str] = [] + for part in content: + text_part = ChatController._coerce_message_content_to_text(part) + if text_part: + parts.append(text_part) + return "\n\n".join(parts) + + if hasattr(content, "text"): + text_attr = getattr(content, "text") + if isinstance(text_attr, str): + return text_attr + if isinstance(text_attr, (bytes, bytearray)): + return text_attr.decode("utf-8", errors="ignore") + + return str(content) + async def handle_chat_completion( self, request: Request, @@ -160,7 +214,9 @@ async def handle_chat_completion( # Normalize message content to str for AnthropicMessage anth_messages = [] for m in domain_request.messages: - content_str = m.content if isinstance(m.content, str) else "" + content_str = self._coerce_message_content_to_text( + getattr(m, "content", None) + ) anth_messages.append( AnthropicMessage(role=m.role, content=content_str) ) diff --git a/tests/unit/core/app/controllers/test_chat_controller_content.py b/tests/unit/core/app/controllers/test_chat_controller_content.py new file mode 100644 index 000000000..3ce507866 --- /dev/null +++ b/tests/unit/core/app/controllers/test_chat_controller_content.py @@ -0,0 +1,36 @@ +from src.core.app.controllers.chat_controller import ChatController +from src.core.domain.chat import MessageContentPartText + + +def test_coerce_message_content_to_text_handles_sequence_parts() -> None: + """Ensure multimodal sequences retain textual payloads when flattened.""" + + content = [ + MessageContentPartText(text="First"), + {"type": "text", "text": "Second"}, + "Third", + ] + + result = ChatController._coerce_message_content_to_text(content) + + assert result == "First\n\nSecond\n\nThird" + + +def test_coerce_message_content_to_text_decodes_bytes() -> None: + """Byte content should be decoded instead of being dropped.""" + + payload = b"binary-text" + + result = ChatController._coerce_message_content_to_text(payload) + + assert result == "binary-text" + + +def test_coerce_message_content_to_text_handles_nested_model_dump() -> None: + """Domain models using model_dump should still surface their text.""" + + part = MessageContentPartText(text="Nested") + + result = ChatController._coerce_message_content_to_text(part) + + assert result == "Nested"