11import json
22import os
3- from typing import Callable
3+ from typing import cast
44
55from litellm .types .utils import (
66 ChatCompletionMessageToolCall ,
1212
1313from openhands .core .context import EnvContext , PromptManager
1414from openhands .core .conversation import ConversationCallbackType , ConversationState
15+ from openhands .core .event import ActionEvent , AgentErrorEvent , LLMConvertibleEvent , MessageEvent , ObservationEvent , SystemPromptEvent
1516from openhands .core .llm import LLM , Message , TextContent , get_llm_metadata
1617from openhands .core .logger import get_logger
1718from openhands .core .tool import BUILT_IN_TOOLS , ActionBase , FinishTool , ObservationBase , Tool
2223logger = get_logger (__name__ )
2324
2425
25-
2626class CodeActAgent (AgentBase ):
2727 def __init__ (
2828 self ,
@@ -45,43 +45,25 @@ def __init__(
4545 def init_state (
4646 self ,
4747 state : ConversationState ,
48- initial_user_message : Message | None = None ,
49- on_event : ConversationCallbackType | None = None ,
48+ on_event : ConversationCallbackType ,
5049 ) -> None :
5150 # TODO(openhands): we should add test to test this init_state will actually modify state in-place
52- messages = state .history . messages
51+ messages = [ e . to_llm_message () for e in state .events ]
5352 if len (messages ) == 0 :
5453 # Prepare system message
55- sys_msg = Message (role = "system" , content = [self .system_message ])
56- messages .append (sys_msg )
57- if on_event :
58- on_event (sys_msg )
59- if initial_user_message is None :
60- raise ValueError ("initial_user_message must be provided in init_state for CodeActAgent" )
61-
62- # Prepare user message
63- content = initial_user_message .content
64- # TODO: think about this - we might want to handle this outside Agent but inside Conversation (e.g., in send_messages)
65- # downside of handling them inside Conversation would be: conversation don't have access
66- # to *any* action execution runtime information
67- if self .env_context :
68- initial_env_context : list [TextContent ] = self .env_context .render (self .prompt_manager )
69- content += initial_env_context
70- user_msg = Message (role = "user" , content = content )
71- messages .append (user_msg )
72- if on_event :
73- on_event (user_msg )
74- if self .env_context and self .env_context .activated_microagents :
75- for microagent in self .env_context .activated_microagents :
76- state .history .microagent_activations .append ((microagent .name , len (messages ) - 1 ))
54+ event = SystemPromptEvent (source = "agent" , system_prompt = self .system_message , tools = [t .to_openai_tool () for t in self .tools .values ()])
55+ # TODO: maybe we should combine this into on_event?
56+ state .events .append (event )
57+ on_event (event )
7758
7859 def step (
7960 self ,
8061 state : ConversationState ,
81- on_event : ConversationCallbackType | None = None ,
62+ on_event : ConversationCallbackType ,
8263 ) -> None :
8364 # Get LLM Response (Action)
84- _messages = self .llm .format_messages_for_llm (state .history .messages )
65+ llm_convertible_events = cast (list [LLMConvertibleEvent ], [e for e in state .events if isinstance (e , LLMConvertibleEvent )])
66+ _messages = self .llm .format_messages_for_llm (LLMConvertibleEvent .events_to_messages (llm_convertible_events ))
8567 logger .debug (f"Sending messages to LLM: { json .dumps (_messages , indent = 2 )} " )
8668 response : ModelResponse = self .llm .completion (
8769 messages = _messages ,
@@ -90,30 +72,54 @@ def step(
9072 )
9173 assert len (response .choices ) == 1 and isinstance (response .choices [0 ], Choices )
9274 llm_message : LiteLLMMessage = response .choices [0 ].message # type: ignore
93-
9475 message = Message .from_litellm_message (llm_message )
95- state .history .messages .append (message )
96- if on_event :
97- on_event (message )
9876
9977 if message .tool_calls and len (message .tool_calls ) > 0 :
10078 tool_call : ChatCompletionMessageToolCall
79+ if any (tc .type != "function" for tc in message .tool_calls ):
80+ logger .warning ("LLM returned tool calls but some are not of type 'function' - ignoring those" )
81+
10182 tool_calls = [tool_call for tool_call in message .tool_calls if tool_call .type == "function" ]
10283 assert len (tool_calls ) > 0 , "LLM returned tool calls but none are of type 'function'"
103- for tool_call in tool_calls :
104- self ._handle_tool_call (tool_call , state , on_event )
84+ if not all (isinstance (c , TextContent ) for c in message .content ):
85+ logger .warning ("LLM returned tool calls but message content is not all TextContent - ignoring non-text content" )
86+
87+ # Generate unique batch ID for this LLM response
88+ thought_content = [c for c in message .content if isinstance (c , TextContent )]
89+
90+ action_events = []
91+ for i , tool_call in enumerate (tool_calls ):
92+ action_event = self ._get_action_events (
93+ state ,
94+ tool_call ,
95+ llm_response_id = response .id ,
96+ on_event = on_event ,
97+ thought = thought_content if i == 0 else [], # Only first gets thought
98+ )
99+ if action_event is None :
100+ continue
101+ action_events .append (action_event )
102+ state .events .append (action_event )
103+
104+ for action_event in action_events :
105+ self ._execute_action_events (state , action_event , on_event = on_event )
105106 else :
106107 logger .info ("LLM produced a message response - awaits user input" )
107108 state .agent_finished = True
109+ msg_event = MessageEvent (source = "agent" , llm_message = message )
110+ state .events .append (msg_event )
111+ on_event (msg_event )
108112
109- def _handle_tool_call (
113+ def _get_action_events (
110114 self ,
111- tool_call : ChatCompletionMessageToolCall ,
112115 state : ConversationState ,
113- on_event : Callable [[Message | ActionBase | ObservationBase ], None ] | None = None ,
114- ) -> None :
116+ tool_call : ChatCompletionMessageToolCall ,
117+ llm_response_id : str ,
118+ on_event : ConversationCallbackType ,
119+ thought : list [TextContent ] = [],
120+ ) -> ActionEvent | None :
115121 """Handle tool calls from the LLM.
116-
122+
117123 NOTE: state will be mutated in-place.
118124 """
119125 assert tool_call .type == "function"
@@ -124,35 +130,47 @@ def _handle_tool_call(
124130 if tool is None :
125131 err = f"Tool '{ tool_name } ' not found. Available: { list (self .tools .keys ())} "
126132 logger .error (err )
127- state .history .messages .append (Message (role = "user" , content = [TextContent (text = err )]))
133+ event = AgentErrorEvent (error = err )
134+ state .events .append (event )
135+ on_event (event )
128136 state .agent_finished = True
129137 return
130138
131139 # Validate arguments
132140 try :
133141 action : ActionBase = tool .action_type .model_validate (json .loads (tool_call .function .arguments ))
134- if on_event :
135- on_event (action )
136142 except (json .JSONDecodeError , ValidationError ) as e :
137143 err = f"Error validating args { tool_call .function .arguments } for tool '{ tool .name } ': { e } "
138- logger .error (err )
139- state .history .messages .append (Message (role = "tool" , name = tool .name , tool_call_id = tool_call .id , content = [TextContent (text = err )]))
144+ event = AgentErrorEvent (error = err )
145+ state .events .append (event )
146+ on_event (event )
140147 return
141148
149+ # Create one ActionEvent per action
150+ action_event = ActionEvent (action = action , thought = thought , tool_name = tool .name , tool_call_id = tool_call .id , tool_call = tool_call , llm_response_id = llm_response_id )
151+ on_event (action_event )
152+ return action_event
153+
154+ def _execute_action_events (self , state : ConversationState , action_event : ActionEvent , on_event : ConversationCallbackType ):
155+ """Execute action events and update the conversation state.
156+
157+ It will call the tool's executor and update the state & call callback fn with the observation.
158+ """
159+ tool = self .tools .get (action_event .tool_name , None )
160+ if tool is None :
161+ raise RuntimeError (f"Tool '{ action_event .tool_name } ' not found. This should not happen as it was checked earlier." )
162+
142163 # Execute actions!
143164 if tool .executor is None :
144165 raise RuntimeError (f"Tool '{ tool .name } ' has no executor" )
145- observation : ObservationBase = tool .executor (action )
146- tool_msg = Message (
147- role = "tool" ,
148- name = tool .name ,
149- tool_call_id = tool_call .id ,
150- content = [TextContent (text = observation .agent_observation )],
151- )
152- state .history .messages .append (tool_msg )
153- if on_event :
154- on_event (observation )
166+ observation : ObservationBase = tool .executor (action_event .action )
167+ assert isinstance (observation , ObservationBase ), f"Tool '{ tool .name } ' executor must return an ObservationBase"
168+
169+ obs_event = ObservationEvent (observation = observation , action_id = action_event .id , tool_name = tool .name , tool_call_id = action_event .tool_call .id )
170+ on_event (obs_event )
155171
156172 # Set conversation state
157173 if tool .name == FinishTool .name :
158174 state .agent_finished = True
175+ state .events .append (obs_event )
176+ return obs_event
0 commit comments