diff --git a/agents/camel_terminal_agent.py b/agents/camel_terminal_agent.py new file mode 100644 index 0000000..297bdc9 --- /dev/null +++ b/agents/camel_terminal_agent.py @@ -0,0 +1,237 @@ +# CAMEL agent using Terminal Toolkit +import os +import logging +from pathlib import Path +from typing import List, Tuple +from terminal_bench.agents.base_agent import BaseAgent, AgentResult +from terminal_bench.agents.failure_mode import FailureMode +from terminal_bench.terminal.tmux_session import TmuxSession + +from camel.models import ModelFactory +from camel.logger import enable_logging, set_log_file, set_log_level +from camel.types import ModelPlatformType, ModelType + +MODEL_TYPE = ModelType.GPT_5 +DEFAULT_SUMMARY_DIR = Path("agent_summary") +DEFAULT_CONTEXT_FILENAME = "terminal_task.md" + + +def _find_latest_context_file( + base_dir: Path, filename: str +) -> Path | None: + """Return the newest matching context file within ``base_dir`` if any.""" + + try: + entries = list(base_dir.iterdir()) + except FileNotFoundError: + return None + + newest_path: Path | None = None + newest_mtime: float = float("-inf") + + for entry in entries: + if not entry.is_dir(): + continue + + candidate = entry / filename + if not candidate.exists(): + continue + + try: + mtime = candidate.stat().st_mtime + except OSError: + continue + + if mtime > newest_mtime: + newest_mtime = mtime + newest_path = candidate + + return newest_path + + +def _resolve_default_context_file() -> Path | None: + return _find_latest_context_file(DEFAULT_SUMMARY_DIR, DEFAULT_CONTEXT_FILENAME) + + +DEFAULT_CONTEXT_FILE = _resolve_default_context_file() +with open(Path("prompts/summary_prompt_simple.txt"), "r") as f: + DEFAULT_SUMMARY_PROMPT = f.read() + + +class CamelTerminalAgent(BaseAgent): + @staticmethod + def name() -> str: + return "CamelTerminalAgent" + + def __init__( + self, + *, + load_context: bool = False, + context_working_dir: str | Path | None = None, + context_filename: str | Path | None = None, + summary_working_dir: str | Path | None = DEFAULT_SUMMARY_DIR, + summary_filename: str | None = None, + task_id: str = "terminal_task", + **kwargs, + ) -> None: + super().__init__(**kwargs) + self.load_context = load_context + self.context_working_dir = ( + Path(context_working_dir) + if context_working_dir is not None + else None + ) + self.context_filename = ( + Path(context_filename) if context_filename is not None else None + ) + self.summary_working_dir = ( + Path(summary_working_dir) + if summary_working_dir is not None + else None + ) + self.summary_filename = summary_filename or f"{task_id}.md" + self.task_id = task_id + + def perform_task( + self, + instruction: str, + session: TmuxSession, + logging_dir: Path | None = "logs", + ) -> AgentResult: + """Execute a task using the Terminal Bench harness.""" + if logging_dir is None: + raise ValueError("logging_dir is required for CamelTerminalAgent") + + if not isinstance(logging_dir, Path): + logging_dir = Path(logging_dir) + + logging_dir.mkdir(parents=True, exist_ok=True) + + camel_logs_dir = logging_dir / "camel" + camel_logs_dir.mkdir(parents=True, exist_ok=True) + + enable_logging() + set_log_level(logging.DEBUG) + set_log_file(str(camel_logs_dir / "camel_chat_agent.log")) + + container_name = session.container.name + if not container_name: + raise ValueError("Container name is required for DockerExecutor") + + camel_workdir = logging_dir / "CAMEL_WORKDIR" + camel_workdir.mkdir(parents=True, exist_ok=True) + os.environ["CAMEL_WORKDIR"] = str(camel_workdir.resolve()) + + session_logs_dir = logging_dir / "session" / "logs" + session_logs_dir.mkdir(parents=True, exist_ok=True) + + from util.agent_factory import developer_agent_factory + + terminal_toolkit_kwargs = { + "working_directory": "/app", + "use_docker_backend": True, + "docker_container_name": container_name, + "session_logs_dir": str(session_logs_dir), + "safe_mode": False, + "timeout": 120, + } + + model_backend_reason = ModelFactory.create( + model_platform=ModelPlatformType.OPENAI, + model_type=MODEL_TYPE, + model_config_dict={ + "stream": False, + }, + ) + + camel_agent = developer_agent_factory( + model_backend_reason, + self.task_id, + terminal_toolkit_kwargs, + ) + camel_agent.reset() + + if self.load_context: + if self.context_working_dir is None: + raise ValueError( + "context_working_dir must be provided when load_context is True" + ) + if self.context_filename is None: + raise ValueError( + "context_filename must be provided when load_context is True" + ) + + from camel.utils.context_utils import ContextUtility + + context_utility = ContextUtility( + working_directory=str(self.context_working_dir) + ) + context_path = self.context_filename + if not context_path.is_absolute(): + context_path = self.context_working_dir / context_path + context_utility.load_markdown_context_to_memory( + camel_agent, + filename=str(context_path), + ) + + usr_msg = f"{instruction}\n" + response = camel_agent.step(usr_msg) + + summary_kwargs: dict[str, str] = {} + if self.summary_working_dir is not None: + summary_kwargs["working_directory"] = str(self.summary_working_dir) + if self.summary_filename is not None: + summary_kwargs["filename"] = self.summary_filename + summary_kwargs["summary_prompt"] = DEFAULT_SUMMARY_PROMPT + + if summary_kwargs: + camel_agent.summarize(**summary_kwargs) + + total_input_tokens = response.info["usage"]["prompt_tokens"] + total_output_tokens = response.info["usage"]["completion_tokens"] + + memory_list = camel_agent._memory._chat_history_block.storage.memory_list + + def create_timestamped_marker_from_memory(records: List[dict]) -> Tuple[float, str]: + """Create a timestamped marker from memory records.""" + results = [] + for record in records: + if "func_name" in record["message"].keys(): + timestamp = record["timestamp"] + func_name = record["message"]["func_name"] + args = record["message"].get("args", {}) + command = args.get("command", "") if args else "" + results.append((timestamp, f"Called tool: {func_name} with args: {command}")) + return results + + timestamped_markers = create_timestamped_marker_from_memory(memory_list) + + return AgentResult( + total_input_tokens=total_input_tokens, + total_output_tokens=total_output_tokens, + failure_mode=FailureMode.NONE, + timestamped_markers=timestamped_markers, + ) + + +class CamelTerminalAgentWithContext(CamelTerminalAgent): + @staticmethod + def name() -> str: + return "CamelTerminalAgentWithContext" + + def __init__(self, **kwargs) -> None: + default_context = DEFAULT_CONTEXT_FILE + if default_context is None: + raise FileNotFoundError( + "No context file found under agent_summary with name " + f"{DEFAULT_CONTEXT_FILENAME}." + ) + super().__init__( + load_context=True, + context_working_dir=DEFAULT_SUMMARY_DIR, + context_filename=default_context, + summary_working_dir=DEFAULT_SUMMARY_DIR, + summary_filename="terminal_task.md", + task_id="terminal_task", + **kwargs, + ) diff --git a/prompts/summary_prompt.txt b/prompts/summary_prompt.txt new file mode 100644 index 0000000..62a385d --- /dev/null +++ b/prompts/summary_prompt.txt @@ -0,0 +1,20 @@ +Summarize the following conversation as a concise **Run Log** that focuses strictly on executed commands and tool calls and their outputs. + +Requirements: +- Be chronological; keep timestamps if present. +- For each step, capture: command or tool name, exact arguments, key stdout/stderr (trimmed), exit code/finish reason, and any retries. +- Record file system side effects: created/modified/renamed/deleted files, target paths, and brief diff/summary if stated. +- Note environment/context changes: working directory, Docker/container name, env vars toggled, compiler flags, model or tool versions. +- Highlight errors/warnings and how they were resolved. +- List final artifacts (paths) and next actions. + +Format (Markdown): +1) **Run Summary** – 3–5 bullets. +2) **Commands & Outputs** – table with columns: Time | Action | Command/Tool | Output (first lines) | Exit/Reason. +3) **File Changes** +4) **Environment Notes** +5) **Errors & Retries** +6) **Artifacts** +7) **Next Steps** + +Only include facts from the logs; omit general discussion or speculation. diff --git a/prompts/summary_prompt_simple.txt b/prompts/summary_prompt_simple.txt new file mode 100644 index 0000000..3915d09 --- /dev/null +++ b/prompts/summary_prompt_simple.txt @@ -0,0 +1,15 @@ +Summarize this conversation as a simple run log. Focus only on what actually happened. + +Include: +- Commands that were executed +- Files that were created/modified +- Any errors or warnings +- Final results + +Format as markdown with: +1. **What was done** (bullets) +2. **Commands run** (list with outputs) +3. **Files changed** (if any) +4. **Result** (success/failure) + +Be factual - only describe what actually occurred in the logs. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index f0bc01c..99fd5a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,5 +7,5 @@ requires-python = "==3.12.*" dependencies = [ "terminal-bench>=0.2.16", "openai>=1.0.0", - "camel-ai @ git+https://github.com/camel-ai/camel.git@agent-summarize", + "camel-ai @ git+https://github.com/camel-ai/camel.git@master", ] diff --git a/test.sh b/test.sh index 2e317de..656d05d 100755 --- a/test.sh +++ b/test.sh @@ -10,19 +10,5 @@ uv run tb run \ # Test camel agent with terminal bench uv run tb run \ --dataset terminal-bench-core==head \ - --agent-import-path agents.camel_agent:CamelTerminus \ - --task-id hello-world - -# Test termius 1 agent with terminal bench -uv run tb run \ - --model-name openai/gpt-4o-mini \ - --dataset terminal-bench-core==head \ - --agent-import-path agents.termius_1:Terminus \ - --task-id hello-world - -# # Test termius 2 agent with terminal bench -uv run tb run \ - --model-name openai/gpt-4o-mini \ - --dataset terminal-bench-core==head \ - --agent-import-path agents.termius_2:Terminus2 \ - --task-id hello-world \ No newline at end of file + --agent-import-path agents.camel_terminal_agent:CamelTerminalAgent \ + --task-id hello-world diff --git a/util/agent_factory.py b/util/agent_factory.py new file mode 100644 index 0000000..5496842 --- /dev/null +++ b/util/agent_factory.py @@ -0,0 +1,181 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +import asyncio +import datetime +import os +import platform +import uuid + +from camel.agents.chat_agent import ChatAgent +from camel.logger import get_logger +from camel.messages.base import BaseMessage +from camel.models import BaseModelBackend, ModelFactory +from camel.societies.workforce import Workforce +from camel.tasks.task import Task +from camel.toolkits import ( + AgentCommunicationToolkit, + NoteTakingToolkit, + TerminalToolkit, + ToolkitMessageIntegration, +) +from camel.types import ModelPlatformType, ModelType +from camel.utils.commons import api_keys_required + +logger = get_logger(__name__) + +WORKING_DIRECTORY = os.environ.get("CAMEL_WORKDIR") + +print(f"Using working directory: {WORKING_DIRECTORY}") + + +def send_message_to_user( + message_title: str, + message_description: str, + message_attachment: str = "", +) -> str: + r"""Use this tool to send a tidy message to the user, including a + short title, a one-sentence description, and an optional attachment. + + This one-way tool keeps the user informed about your progress, + decisions, or actions. It does not require a response. + You should use it to: + - Announce what you are about to do. + For example: + message_title="Starting Task" + message_description="Searching for papers on GUI Agents." + - Report the result of an action. + For example: + message_title="Search Complete" + message_description="Found 15 relevant papers." + - Report a created file. + For example: + message_title="File Ready" + message_description="The report is ready for your review." + message_attachment="report.pdf" + - State a decision. + For example: + message_title="Next Step" + message_description="Analyzing the top 10 papers." + - Give a status update during a long-running task. + + Args: + message_title (str): The title of the message. + message_description (str): The short description. + message_attachment (str): The attachment of the message, + which can be a file path or a URL. + + Returns: + str: Confirmation that the message was successfully sent. + """ + print(f"\nAgent Message:\n{message_title} " f"\n{message_description}\n") + if message_attachment: + print(message_attachment) + logger.info( + f"\nAgent Message:\n{message_title} " + f"{message_description} {message_attachment}" + ) + return ( + f"Message successfully sent to user: '{message_title} " + f"{message_description} {message_attachment}'" + ) + + +def developer_agent_factory( + model: BaseModelBackend, + task_id: str, + terminal_toolkit_kwargs: dict = None, +): + r"""Factory for creating a developer agent.""" + # Initialize message integration + message_integration = ToolkitMessageIntegration( + message_handler=send_message_to_user + ) + + # Initialize toolkits + # terminal_toolkit = TerminalToolkit(safe_mode=True, clone_current_env=False) + terminal_toolkit = TerminalToolkit(**terminal_toolkit_kwargs) + note_toolkit = NoteTakingToolkit(working_directory=WORKING_DIRECTORY) + + # Add messaging to toolkits + terminal_toolkit = message_integration.register_toolkits(terminal_toolkit) + note_toolkit = message_integration.register_toolkits(note_toolkit) + + # Get enhanced tools + tools = [ + *terminal_toolkit.get_tools(), + *note_toolkit.get_tools(), + ] + + # Determine environment info based on Docker usage + if terminal_toolkit_kwargs and terminal_toolkit_kwargs.get('use_docker_backend'): + # Use Docker container environment + system_info = "Linux (Docker Container)" + working_dir = terminal_toolkit_kwargs.get('working_directory', '/app') + env_note = "You are running inside a Docker container. All commands execute within the containerized environment." + else: + # Use host system environment + system_info = f"{platform.system()} ({platform.machine()})" + working_dir = WORKING_DIRECTORY + env_note = "You are running on the host system." + + system_message = f""" + +You are a Lead Software Engineer, a master-level coding assistant with a +powerful terminal. Your role is to solve technical tasks by writing and +executing code, installing necessary libraries, interacting with the operating +system, and deploying applications. + + + +- **System**: {system_info} +- **Working Directory**: `{working_dir}`. {env_note} +- **Current Date**: {datetime.date.today()}. +- **IMPORTANT**: When working with files, use paths relative to the working directory above. + Do NOT use host system paths like /Users/... when in a Docker container. + + + +- When you complete your task, provide a clear summary of what you accomplished. +- Focus on creating files in the correct location as specified by the task. + + + +- **Code Execution**: You can write and execute code in any language to solve tasks. +- **Terminal Control**: You have access to the terminal and can run command-line tools, + manage files, and interact with the OS. Install missing tools with package managers + like `pip3`, `uv`, or `apt-get`. +- **File Operations**: {"Create files directly in the working directory using simple paths like './filename' or 'filename'." if terminal_toolkit_kwargs and terminal_toolkit_kwargs.get('use_docker_backend') else "You can access files from any place in the file system."} +- **Verification**: Test and verify your solutions by executing them. + + + + +- Take action to solve problems. Don't just suggest solutions—implement them. +- Use the terminal effectively to execute commands and manage files. +- Verify your solutions by testing them. + + """ + + return ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Developer Agent", + content=system_message, + ), + model=model, + tools=tools, + max_iteration=10, + # toolkits_to_register_agent=[screenshot_toolkit], + ) +