GitHubSecurityLab · kevinbackhouse · Oct 15, 2025 · Oct 17, 2025 · Oct 17, 2025
@@ -0,0 +1,46 @@
+name: Basic test - run the examples to check for errors
+
+on:
+  pull_request
+
+permissions:
+  contents: read
+
+jobs:
+  Linux:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Setup Python venv
+        run: |
+          python -m venv .venv
+          source .venv/bin/activate
+          python -m pip install -r requirements.txt
+
+      - name: Run tests
+        env:
+          COPILOT_TOKEN: ${{ secrets.COPILOT_TOKEN }}
+        run: |
+          python main.py -p GitHubSecurityLab/seclab-taskflow-agent/personalities/assistant 'explain modems to me please'
+          python main.py -p GitHubSecurityLab/seclab-taskflow-agent/personalities/c_auditer 'explain modems to me please'
+          python main.py -p GitHubSecurityLab/seclab-taskflow-agent/personalities/examples/echo 'explain modems to me please'
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/CVE-2023-2283/CVE-2023-2283
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/echo
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_globals
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_inputs
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_large_list_result_iter
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_repeat_prompt
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_repeat_prompt_async
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_repeat_prompt_dictionary
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_reusable_prompt
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_reusable_taskflows
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/example_triage_taskflow
+          python main.py -t GitHubSecurityLab/seclab-taskflow-agent/taskflows/examples/single_step_taskflow
@@ -149,6 +149,11 @@ Example:
 
 ```yaml
 # personalities define the system prompt level directives for this Agent
+seclab-taskflow-agent:
+  version: 1
+  filetype: personality
+  filekey: personalities/examples/echo
+
 personality: |
   You are a simple echo bot. You use echo tools to echo things.
 
@@ -157,7 +162,7 @@ task: |
 
 # personality toolboxes map to mcp servers made available to this Agent
 toolboxes:
-  - echo
+  - toolboxes/echo
 ```
 
 ## Toolboxes
@@ -168,6 +173,11 @@ Example stdio config:
 
 ```yaml
 # stdio mcp server configuration
+seclab-taskflow-agent:
+  version: 1
+  filetype: toolbox
+  filekey: toolboxes/echo
+
 server_params:
   kind: stdio
   command: python
@@ -184,6 +194,11 @@ A sequence of interdependent tasks performed by a set of Agents. Configured thro
 Example:
 
 ```yaml
+seclab-taskflow-agent:
+  version: 1
+  filetype: taskflow
+  filekey: taskflows/examples/example.yaml
+
 taskflow:
   - task:
       # taskflows can optionally choose any of the support CAPI models for a task
@@ -194,18 +209,14 @@ taskflow:
       must_complete: true
       # taskflows can set a primary (first entry) and handoff (additional entries) agent
       agents:
-        - c_auditer
-        - fruit_expert
+        - personalities/c_auditer.yaml
+        - personalities/examples/fruit_expert.yaml
       user_prompt: |
         Store an example vulnerable C program that uses `strcpy` in the
         `vulnerable_c_example` memory key and explain why `strcpy`
         is insecure in the C programming language. Do this before handing off
         to any other agent.
 
-        Then provide a summary of a high impact CVE ID that involved a `strcpy`
-        based buffer overflow based on your GHSA knowledge as an additional
-        example.
-
         Finally, why are apples and oranges healthy to eat?
 
       # taskflows can set temporary environment variables, these support the general
@@ -217,16 +228,16 @@ taskflow:
         MEMCACHE_STATE_DIR: "example_taskflow/"
         MEMCACHE_BACKEND: "dictionary_file"
       # taskflows can optionally override personality toolboxes, in this example
-      # kevin normally only has the memcache toolbox, but we extend it here with
+      # this normally only has the memcache toolbox, but we extend it here with
       # the GHSA toolbox
       toolboxes:
-        - ghsa
-        - memcache
+        - toolboxes/memcache.yaml
+        - toolboxes/codeql.yaml
   - task:
       must_complete: true
       model: gpt-4.1
       agents:
-        - c_auditer
+        - personalities/c_auditer.yaml
       user_prompt: |
         Retrieve C code for security review from the `vulnerable_c_example`
         memory key and perform a review.
@@ -236,13 +247,58 @@ taskflow:
         MEMCACHE_STATE_DIR: "example_taskflow/"
         MEMCACHE_BACKEND: "dictionary_file"
       toolboxes:
-        - memcache
+        - toolboxes/memcache.yaml
+      # headless mode does not prompt for tool call confirms configured for a server
+      # note: this will auto-allow, if you want control over potentially dangerous
+      # tool calls, then you should NOT run a task in headless mode (default: false)
+      headless: true
+  - task:
+      # tasks can also run shell scripts that return e.g. json output for repeat prompt iterable
+      must_complete: true
+      run: |
+        echo '["apple", "banana", "orange"]'
+  - task:
+      repeat_prompt: true
+      agents:
+        - personalities/assistant.yaml
+      user_prompt: |
+        What kind of fruit is {{ RESULT }}?
 ```
 
 Taskflows support [Agent handoffs](https://openai.github.io/openai-agents-python/handoffs/). Handoffs are useful for implementing triage patterns where the primary Agent can decide to handoff a task to any subsequent Agents in the `Agents` list.
 
 See the [taskflow examples](taskflows/examples) for other useful Taskflow patterns such as repeatable and asynchronous templated prompts.
 
+## Notes about the yaml syntax
+
+Every personality, toolbox, and taskflow is defined by a YAML file, which
+should always include a header like this:
+
+```
+seclab-taskflow-agent:
+  version: 1
+  filetype: taskflow
+  filekey: taskflows/examples/example
+```
+
+The "filetype" determines whether the file defines a personality, toolbox, or
+taskflow. This means that different types of files can be stored in the same directory.
+
+The "filekey" is a unique name for the file. It is used to allow
+cross-referencing between files. For example, a taskflow can reference
+a personality by its filekey. Because filekeys are used for
+cross-referencing (rather than file paths), it means that you can move
+a file to a different directory without breaking the links. This also
+means that you can easily import new files by dropping them into a sub-directory.
+We recommend including something like your
+GitHub "username/reponame" in your filekeys to make them globably unique.
+
+The "version" number in the header should always be 1. It means that the
+file uses version 1 of the seclab-taskflow-agent syntax. If we ever need
+to make a major change to the syntax, then we'll update the version number.
+This will hopefully enable us to make changes without breaking backwards
+compatibility.
+
 ## License
 
 This project is licensed under the terms of the MIT open source license. Please refer to the [LICENSE](./LICENSE) file for the full terms.

@@ -3,9 +3,18 @@
 class VersionException(Exception):
     pass
 
+class FileIDException(Exception):
+    pass
+
 class FileTypeException(Exception):
     pass
 
+def add_yaml_to_dict(table, key, yaml):
+    """Add the yaml to the table, but raise an error if the id isn't unique """
+    if key in table:
+        raise FileIDException(str(key))
+    table.update({key: yaml})
+
 class AvailableTools:
     """
     This class is used for storing dictionaries of all the available
@@ -30,20 +39,23 @@ def __init__(self, yamls: dict):
                 version = header['version']
                 if version != 1:
                     raise VersionException(str(version))
-                filetype = header['type']
+                filekey = header['filekey']
+                filetype = header['filetype']
                 if filetype == 'personality':
-                    self.personalities.update({path: yaml})
+                    add_yaml_to_dict(self.personalities, filekey, yaml)
                 elif filetype == 'taskflow':
-                    self.taskflows.update({path: yaml})
+                    add_yaml_to_dict(self.taskflows, filekey, yaml)
                 elif filetype == 'prompt':
-                    self.prompts.update({path: yaml})
+                    add_yaml_to_dict(self.prompts, filekey, yaml)
                 elif filetype == 'toolbox':
-                    self.toolboxes.update({path: yaml})
+                    add_yaml_to_dict(self.toolboxes, filekey, yaml)
                 else:
                     raise FileTypeException(str(filetype))
             except KeyError as err:
                 logging.error(f'{path} does not contain the key {err.args[0]}')
             except VersionException as err:
                 logging.error(f'{path}: seclab-taskflow-agent version {err.args[0]} is not supported')
+            except FileIDException as err:
+                logging.error(f'{path}: file ID {err.args[0]} is not unique')
             except FileTypeException as err:
                 logging.error(f'{path}: seclab-taskflow-agent file type {err.args[0]} is not supported')
@@ -10,6 +10,7 @@
 import re
 import json
 import uuid
+import pathlib
 
 from agent import DEFAULT_MODEL, TaskRunHooks, TaskAgentHooks
 #from agents.run import DEFAULT_MAX_TURNS # XXX: this is 10, we need more than that
@@ -23,7 +24,7 @@
 from typing import Any
 
 from shell_utils import shell_tool_call
-from mcp_utils import DEFAULT_MCP_CLIENT_SESSION_TIMEOUT, ReconnectingMCPServerStdio, AsyncDebugMCPServerStdio, MCPNamespaceWrap, mcp_client_params, mcp_system_prompt, StreamableMCPThread
+from mcp_utils import DEFAULT_MCP_CLIENT_SESSION_TIMEOUT, ReconnectingMCPServerStdio, AsyncDebugMCPServerStdio, MCPNamespaceWrap, mcp_client_params, mcp_system_prompt, StreamableMCPThread, compress_name
 from render_utils import render_model_output, flush_async_output
 from env_utils import TmpEnv
 from yaml_parser import YamlParser
@@ -255,7 +256,7 @@ async def mcp_session_task(
         for handoff_agent in list(agents.keys())[1:]:
             handoffs.append(TaskAgent(
                 # XXX: name has to be descriptive for an effective handoff
-                name=handoff_agent,
+                name=compress_name(handoff_agent),
                 instructions=prompt_with_handoff_instructions(
                     mcp_system_prompt(
                         agents[handoff_agent]['personality'],
@@ -399,7 +400,7 @@ async def on_handoff_hook(
     if p:
         personality = available_tools.personalities.get(p)
         if personality is None:
-            raise ValueError("No such personality!")
+            raise ValueError(f"No such personality: {p}")
 
         await deploy_task_agents(
             available_tools,
@@ -413,7 +414,7 @@ async def on_handoff_hook(
 
         taskflow = available_tools.taskflows.get(t)
         if taskflow is None:
-            raise ValueError("No such taskflow!")
+            raise ValueError(f"No such taskflow: {t}")
 
         await render_model_output(f"** 🤖💪 Running Task Flow: {t}\n")
 
@@ -628,11 +629,12 @@ async def _deploy_task_agents(resolved_agents, prompt):
                     break
 
 if __name__ == '__main__':
+    cwd = pathlib.Path.cwd()
     available_tools = AvailableTools(
-        YamlParser('personalities').get_yaml_dict() |
-        YamlParser('taskflows').get_yaml_dict() |
-        YamlParser('prompts').get_yaml_dict(dir_namespace=True) |
-        YamlParser('toolboxes').get_yaml_dict(recurse=True))
+        YamlParser(cwd).get_yaml_dict((cwd/'personalities').rglob('*')) |
+        YamlParser(cwd).get_yaml_dict((cwd/'taskflows').rglob('*')) |
+        YamlParser(cwd).get_yaml_dict((cwd/'prompts').rglob('*')) |
+        YamlParser(cwd).get_yaml_dict((cwd/'toolboxes').rglob('*')))
 
     p, t, l, user_prompt, help_msg = parse_prompt_args(available_tools)
 

@@ -9,6 +9,7 @@
 import os
 import socket
 import signal
+import hashlib
 from urllib.parse import urlparse
 
 from mcp.types import CallToolResult, TextContent
@@ -18,6 +19,16 @@
 
 DEFAULT_MCP_CLIENT_SESSION_TIMEOUT = 120
 
+# The openai API complains if the name of a tool is longer than 64
+# chars. But we're encouraging people to use long descriptive
+# filekeys to avoid accidental collisions, so it's very easy to go
+# over the limit. So this function converts a name to a 12 character
+# hash.
+def compress_name(name):
+    m = hashlib.sha256()
+    m.update(name.encode('utf-8'))
+    return m.hexdigest()[:12]
+
 # A process management class for running in-process MCP streamable servers
 class StreamableMCPThread(Thread):
     """Process management for local streamable MCP servers"""
@@ -221,7 +232,7 @@ class MCPNamespaceWrap:
     def __init__(self, confirms, obj):
         self.confirms = confirms
         self._obj = obj
-        self.namespace = f"{obj.name.upper().replace(' ', '_')}_"
+        self.namespace = compress_name(obj.name)
 
     def __getattr__(self, name):
         attr = getattr(self._obj, name)

@@ -1,6 +1,7 @@
 seclab-taskflow-agent:
-  type: personality
   version: 1
+  filetype: personality
+  filekey: GitHubSecurityLab/seclab-taskflow-agent/personalities/assistant
 
 personality: |
   You are a helpful assistant.

@@ -1,6 +1,7 @@
 seclab-taskflow-agent:
-  type: personality
   version: 1
+  filetype: personality
+  filekey: GitHubSecurityLab/seclab-taskflow-agent/personalities/c_auditer
 
 personality: |
   Your name is Ronald. You are a C programming language security expert.
@@ -14,5 +15,5 @@ task: |
   your findings where possible.
 
 toolboxes:
-  - memcache
-  - codeql
+  - GitHubSecurityLab/seclab-taskflow-agent/toolboxes/memcache
+  - GitHubSecurityLab/seclab-taskflow-agent/toolboxes/codeql
@@ -1,6 +1,7 @@
 seclab-taskflow-agent:
-  type: personality
   version: 1
+  filetype: personality
+  filekey: GitHubSecurityLab/seclab-taskflow-agent/personalities/examples/apple_expert
 
 personality: |
   You are an apples expert.

@@ -1,6 +1,7 @@
 seclab-taskflow-agent:
-  type: personality
   version: 1
+  filetype: personality
+  filekey: GitHubSecurityLab/seclab-taskflow-agent/personalities/examples/banana_expert
 
 personality: |
   You are a bananas expert.

@@ -1,6 +1,7 @@
 seclab-taskflow-agent:
-  type: personality
   version: 1
+  filetype: personality
+  filekey: GitHubSecurityLab/seclab-taskflow-agent/personalities/examples/echo
 
 personality: |
   You are a simple echo bot. You use echo tools to echo things.
@@ -9,5 +10,5 @@ task: |
   Echo user inputs using the echo tools.
 
 toolboxes:
-  - echo
+  - GitHubSecurityLab/seclab-taskflow-agent/toolboxes/echo