diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 84de157cd..dda3206c1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.0.0a1 +current_version = 4.5.1a1 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(rc(?P\d+))? diff --git a/docs/getting-started/agentic.md b/docs/getting-started/agentic.md new file mode 100644 index 000000000..88e6c4896 --- /dev/null +++ b/docs/getting-started/agentic.md @@ -0,0 +1,134 @@ +# Agentic application +!!! danger + The features and api described in this section are under heavy development and therefore subject to change. + Be prepared for it to break. + + **However if you don't care about an unstable API, using the features in this mode + of the orchestrator will unlock quite a bit of potential** + + +The Agentic mode of the Orchestrator can be unlocked by doing the following. + +### Pre-requisites +- pg_vector installed in your postgres database +- At minimum an `api_key` to talk to ChatGPT +- The UI configured to with the LLM integration branch - still WIP - https://github.com/workfloworchestrator/example-orchestrator-ui/pull/72/files + +### Step 1 - Install the package: + +Create a virtualenv and install the core including the LLM dependencies. + +
+ +```shell +python -m venv .venv +source .venv/bin/activate +pip install orchestrator-core[llm] +``` + +
+ +### Step 2 - Setup the database: + +Create a postgres database, make sure your postgres install has the `pgvector` extension installed: + +
+ +```shell +createuser -sP nwa +createdb orchestrator-core -O nwa +``` + +
+ +Choose a password and remember it for later steps. + +As an example, you can run these docker commands in separate shells to start a temporary postgres instance: + +```shell +docker run --rm --name temp-orch-db -e POSTGRES_PASSWORD=rootpassword -p 5432:5432 postgres:15 + +docker exec -it temp-orch-db su - postgres -c 'createuser -sP nwa && createdb orchestrator-core -O nwa' +``` + +### Step 3 - Create the main.py: + +Create a `main.py` file. + +```python +from orchestrator import AgenticOrchestratorCore +from orchestrator.cli.main import app as core_cli +from orchestrator.settings import app_settings +from orchestrator.llm_settings import llm_settings + +llm_settings.LLM_ENABLED = True +llm_settings.AGENT_MODEL = 'gpt-4o-mini' +llm_settings.OPENAI_API_KEY = 'xxxxx' + + +app = AgenticOrchestratorCore( + base_settings=app_settings, + llm_settings=llm_settings, + llm_model=llm_settings.AGENT_MODEL, + agent_tools=[] +) + +if __name__ == "__main__": + core_cli() +``` + +### Step 4 - Run the database migrations: + +Initialize the migration environment and database tables. + +
+ +```shell +export DATABASE_URI=postgresql://nwa:PASSWORD_FROM_STEP_2@localhost:5432/orchestrator-core + +python main.py db init +python main.py db upgrade heads +``` + +
+ +### Step 5 - Run the app + +
+ +```shell +export DATABASE_URI=postgresql://nwa:PASSWORD_FROM_STEP_2@localhost:5432/orchestrator-core +export OAUTH2_ACTIVE=False + +uvicorn --reload --host 127.0.0.1 --port 8080 main:app +``` + +
+ +### Step 6 - Index all your current subscriptions, processes, workflows and products: + +!!! warning + This will call out to external LLM services and cost money + + +
+ +```shell +python main.py index subscriptions +python main.py index products +python main.py index processes +python main.py index workflows +``` + +
+ +### Step 7 - Profit :boom: :grin: + +Visit the [ReDoc](http://127.0.0.1:8080/api/redoc) or [OpenAPI](http://127.0.0.1:8080/api/docs) to view and interact with the API. + + +### Next: + +- [Create a product.](../workshops/advanced/domain-models.md) +- [Create a workflow for a product.](./workflows.md) +- [Generate products and workflows](../reference-docs/cli.md#generate) diff --git a/docs/getting-started/base.md b/docs/getting-started/base.md index c8b2189d8..2a647314c 100644 --- a/docs/getting-started/base.md +++ b/docs/getting-started/base.md @@ -58,9 +58,9 @@ Create a `main.py` file. ```python from orchestrator import OrchestratorCore from orchestrator.cli.main import app as core_cli -from orchestrator.settings import AppSettings +from orchestrator.settings import app_settings -app = OrchestratorCore(base_settings=AppSettings()) +app = OrchestratorCore(base_settings=app_settings) if __name__ == "__main__": core_cli() diff --git a/docs/index.md b/docs/index.md index 91d3d4574..c4a0205eb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -76,7 +76,7 @@ There are a number of options for getting started: - For those who are more adventurous, follow the guide on the [next page](getting-started/base.md) to start coding right away. - + [//]: # (- If you would like to see the workflow engine in action, click [here](https://demo.workfloworchestrator.org) this ) [//]: # (will take you to our demo environment, where you can see some of our examples in action.) diff --git a/docs/reference-docs/app/agentic-app.md b/docs/reference-docs/app/agentic-app.md new file mode 100644 index 000000000..7d3d862de --- /dev/null +++ b/docs/reference-docs/app/agentic-app.md @@ -0,0 +1,11 @@ +# agentic_app.py + +The agentic_app.py module is used in `orchestrator-core` for actually running the entire Agentic WFO FastAPI backend and the CLI. + +## FastAPI Backend + +The code for the WFO's Fast API backend is very well documented, so look through the functions used in this module here: + +::: orchestrator.agentic_app + options: + heading_level: 3 diff --git a/mkdocs.yml b/mkdocs.yml index 824ad640c..99a9baa63 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -148,9 +148,10 @@ nav: - Backfilling Existing Subscriptions: architecture/product_modelling/backfilling.md - Getting Started: - Prerequisites: getting-started/versions.md - - Base Application: + - Application: - Preparing source folder: getting-started/prepare-source-folder.md - Base application: getting-started/base.md + - Agentic application: getting-started/agentic.md - Workflows: - Creating a workflow: getting-started/workflows.md - Registering a workflow: getting-started/workflows#register-workflows @@ -183,6 +184,7 @@ nav: - Forms: reference-docs/forms.md - Running the App: - App.py: reference-docs/app/app.md + - Agentic App.py: reference-docs/app/agentic-app.md - Python Version: reference-docs/python.md - Scaling: reference-docs/app/scaling.md - Settings: reference-docs/app/settings-overview.md diff --git a/orchestrator/__init__.py b/orchestrator/__init__.py index 18a346ff6..ba4ffdb25 100644 --- a/orchestrator/__init__.py +++ b/orchestrator/__init__.py @@ -13,15 +13,39 @@ """This is the orchestrator workflow engine.""" -__version__ = "5.0.0a1" +__version__ = "4.5.1a1" -from orchestrator.app import OrchestratorCore + +from structlog import get_logger + +logger = get_logger(__name__) + +logger.info("Starting the orchestrator", version=__version__) + +from orchestrator.llm_settings import llm_settings from orchestrator.settings import app_settings + +if llm_settings.LLM_ENABLED: + try: + from importlib import import_module + + import_module("pydantic_ai") + from orchestrator.agentic_app import AgenticOrchestratorCore as OrchestratorCore + + except ImportError: + logger.error( + "Unable to import 'pydantic_ai' module, please install the orchestrator with llm dependencies. `pip install orchestrator-core[llm]", + ) + exit(1) +else: + from orchestrator.app import OrchestratorCore # type: ignore[assignment] + from orchestrator.workflow import begin, conditional, done, focussteps, inputstep, retrystep, step, steplens, workflow __all__ = [ "OrchestratorCore", "app_settings", + "llm_settings", "step", "inputstep", "workflow", diff --git a/orchestrator/agentic_app.py b/orchestrator/agentic_app.py new file mode 100644 index 000000000..67c39beb3 --- /dev/null +++ b/orchestrator/agentic_app.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +"""The main application module. + +This module contains the main `AgenticOrchestratorCore` class for the `FastAPI` backend and +provides the ability to run the CLI. +""" +# Copyright 2019-2025 SURF +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any + +import typer +from pydantic_ai.models.openai import OpenAIModel +from pydantic_ai.toolsets import FunctionToolset +from structlog import get_logger + +from orchestrator.app import OrchestratorCore +from orchestrator.cli.main import app as cli_app +from orchestrator.llm_settings import LLMSettings, llm_settings + +logger = get_logger(__name__) + + +class AgenticOrchestratorCore(OrchestratorCore): + def __init__( + self, + *args: Any, + llm_model: OpenAIModel | str = "gpt-4o-mini", + llm_settings: LLMSettings = llm_settings, + agent_tools: list[FunctionToolset] | None = None, + **kwargs: Any, + ) -> None: + """Initialize the `AgenticOrchestratorCore` class. + + This class takes the same arguments as the `OrchestratorCore` class. + + Args: + *args: All the normal arguments passed to the `OrchestratorCore` class. + llm_model: An OpenAI model class or string, not limited to OpenAI models (gpt-4o-mini etc) + llm_settings: A class of settings for the LLM + agent_tools: A list of tools that can be used by the agent + **kwargs: Additional arguments passed to the `OrchestratorCore` class. + + Returns: + None + """ + self.llm_model = llm_model + self.agent_tools = agent_tools + self.llm_settings = llm_settings + + super().__init__(*args, **kwargs) + + logger.info("Mounting the agent") + self.register_llm_integration() + + def register_llm_integration(self) -> None: + """Mount the Agent endpoint. + + This helper mounts the agent endpoint on the application. + + Returns: + None + + """ + from orchestrator.search.agent import build_agent_app + + agent_app = build_agent_app(self.llm_model, self.agent_tools) + self.mount("/agent", agent_app) + + +main_typer_app = typer.Typer() +main_typer_app.add_typer(cli_app, name="orchestrator", help="The orchestrator CLI commands") + +if __name__ == "__main__": + main_typer_app() diff --git a/orchestrator/api/api_v1/api.py b/orchestrator/api/api_v1/api.py index 24be5a2e6..e6be129d4 100644 --- a/orchestrator/api/api_v1/api.py +++ b/orchestrator/api/api_v1/api.py @@ -22,7 +22,6 @@ product_blocks, products, resource_types, - search, settings, subscription_customer_descriptions, subscriptions, @@ -31,6 +30,7 @@ workflows, ws, ) +from orchestrator.llm_settings import llm_settings from orchestrator.security import authorize api_router = APIRouter() @@ -85,8 +85,11 @@ ) api_router.include_router(ws.router, prefix="/ws", tags=["Core", "Events"]) -api_router.include_router( - search.router, - prefix="/search", - tags=["Core", "Search"], -) +if llm_settings.LLM_ENABLED: + from orchestrator.api.api_v1.endpoints import search + + api_router.include_router( + search.router, + prefix="/search", + tags=["Core", "Search"], + ) diff --git a/orchestrator/app.py b/orchestrator/app.py index 03606c515..c4dc89fd8 100644 --- a/orchestrator/app.py +++ b/orchestrator/app.py @@ -41,7 +41,6 @@ from oauth2_lib.fastapi import AuthManager, Authorization, GraphqlAuthorization, OIDCAuth from orchestrator import __version__ from orchestrator.api.api_v1.api import api_router -from orchestrator.api.api_v1.endpoints.agent import build_agent_app from orchestrator.api.error_handling import ProblemDetailException from orchestrator.cli.main import app as cli_app from orchestrator.db import db, init_database @@ -91,6 +90,22 @@ def __init__( base_settings: AppSettings = app_settings, **kwargs: Any, ) -> None: + """Initialize the Orchestrator. + + Args: + title: Name of the application. + description: Description of the application. + openapi_url: Location of the OpenAPI endpoint. + docs_url: Location of the docs endpoint. + redoc_url: Location of the redoc endpoint. + version: Version of the application. + default_response_class: Override the default response class. + base_settings: Settings for the application. + **kwargs: Any additional keyword arguments are sent to the + + Returns: + None + """ initialise_logging(LOGGER_OVERRIDES) init_model_loaders() if base_settings.ENABLE_GRAPHQL_STATS_EXTENSION: @@ -151,9 +166,6 @@ def __init__( metrics_app = make_asgi_app(registry=ORCHESTRATOR_METRICS_REGISTRY) self.mount("/api/metrics", metrics_app) - agent_app = build_agent_app() - self.mount("/agent", agent_app) - @self.router.get("/", response_model=str, response_class=JSONResponse, include_in_schema=False) def _index() -> str: return "Orchestrator Core" @@ -167,6 +179,22 @@ def add_sentry( release: str | None = GIT_COMMIT_HASH, **sentry_kwargs: Any, ) -> None: + """Register sentry to your application. + + Sentry is an application monitoring toolkit. + + Args: + sentry_dsn: The location where sentry traces are posted to. + trace_sample_rate: The sample rate + server_name: The name of the application + environment: Production or development + release: Version of the application + **sentry_kwargs: Any sentry keyword arguments + + Returns: + None + + """ logger.info("Adding Sentry middleware to app", app=self.title) if self.base_settings.EXECUTOR == ExecutorType.WORKER: from sentry_sdk.integrations.celery import CeleryIntegration diff --git a/orchestrator/cli/main.py b/orchestrator/cli/main.py index d0e7b33ef..f2810c1c4 100644 --- a/orchestrator/cli/main.py +++ b/orchestrator/cli/main.py @@ -16,29 +16,30 @@ from orchestrator.cli import ( database, generate, - index_llm, - resize_embedding, scheduler, - search_explore, - speedtest, ) +from orchestrator.llm_settings import llm_settings app = typer.Typer() app.add_typer(scheduler.app, name="scheduler", help="Access all the scheduler functions") app.add_typer(database.app, name="db", help="Interact with the application database") app.add_typer(generate.app, name="generate", help="Generate products, workflows and other artifacts") -app.add_typer(index_llm.app, name="index", help="(Re-)Index the search table.") -app.add_typer(search_explore.app, name="search", help="Try out different search types.") -app.add_typer( - resize_embedding.app, - name="embedding", - help="Resize the vector dimension of the embedding column in the search table.", -) -app.add_typer( - speedtest.app, - name="speedtest", - help="Search performance testing and analysis.", -) + +if llm_settings.LLM_ENABLED: + from orchestrator.cli import index_llm, resize_embedding, search_explore, speedtest + + app.add_typer(index_llm.app, name="index", help="(Re-)Index the search table.") + app.add_typer(search_explore.app, name="search", help="Try out different search types.") + app.add_typer( + resize_embedding.app, + name="embedding", + help="Resize the vector dimension of the embedding column in the search table.", + ) + app.add_typer( + speedtest.app, + name="speedtest", + help="Search performance testing and analysis.", + ) if __name__ == "__main__": diff --git a/orchestrator/cli/resize_embedding.py b/orchestrator/cli/resize_embedding.py index 69bd27f62..8d944e3cf 100644 --- a/orchestrator/cli/resize_embedding.py +++ b/orchestrator/cli/resize_embedding.py @@ -5,7 +5,7 @@ from orchestrator.db import db from orchestrator.db.models import AiSearchIndex -from orchestrator.settings import app_settings +from orchestrator.llm_settings import llm_settings logger = structlog.get_logger(__name__) @@ -85,10 +85,10 @@ def resize_embeddings_command() -> None: """Resize vector dimensions of the ai_search_index embedding column. Compares the current embedding dimension in the database with the configured - dimension in app_settings. If they differ, drops all records and alters the + dimension in llm_settings. If they differ, drops all records and alters the column to match the new dimension. """ - new_dimension = app_settings.EMBEDDING_DIMENSION + new_dimension = llm_settings.EMBEDDING_DIMENSION logger.info("Starting embedding dimension resize", new_dimension=new_dimension) diff --git a/orchestrator/db/models.py b/orchestrator/db/models.py index 8307020c6..2d7f43b3d 100644 --- a/orchestrator/db/models.py +++ b/orchestrator/db/models.py @@ -52,8 +52,8 @@ from orchestrator.config.assignee import Assignee from orchestrator.db.database import BaseModel, SearchQuery +from orchestrator.llm_settings import llm_settings from orchestrator.search.core.types import FieldType -from orchestrator.settings import app_settings from orchestrator.targets import Target from orchestrator.utils.datetime import nowtz from orchestrator.version import GIT_COMMIT_HASH @@ -715,7 +715,7 @@ class AiSearchIndex(BaseModel): ) # Embedding - embedding = mapped_column(Vector(app_settings.EMBEDDING_DIMENSION), nullable=True) + embedding = mapped_column(Vector(llm_settings.EMBEDDING_DIMENSION), nullable=True) # SHA-256 content_hash = mapped_column(String(64), nullable=False, index=True) diff --git a/orchestrator/llm_settings.py b/orchestrator/llm_settings.py new file mode 100644 index 000000000..7552ef0dd --- /dev/null +++ b/orchestrator/llm_settings.py @@ -0,0 +1,51 @@ +# Copyright 2019-2025 SURF +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from pydantic import Field, field_validator +from pydantic_settings import BaseSettings +from structlog import get_logger + +logger = get_logger(__name__) + + +class LLMSettings(BaseSettings): + LLM_ENABLED: bool = False # Default to false + # Pydantic-ai Agent settings + AGENT_MODEL: str = "gpt-4o-mini" # See pydantic-ai docs for supported models. + AGENT_MODEL_VERSION: str = "2025-01-01-preview" + OPENAI_API_KEY: str = "" # Change per provider (Azure, etc). + # Embedding settings + EMBEDDING_DIMENSION: int = 1536 + EMBEDDING_MODEL: str = "openai/text-embedding-3-small" # See litellm docs for supported models. + EMBEDDING_SAFE_MARGIN_PERCENT: float = Field( + 0.1, description="Safety margin as a percentage (e.g., 0.1 for 10%) for token budgeting.", ge=0, le=1 + ) + + # The following settings are only needed for local models. + # By default, they are set conservative assuming a small model like All-MiniLM-L6-V2. + OPENAI_BASE_URL: str | None = None + EMBEDDING_FALLBACK_MAX_TOKENS: int | None = 512 + EMBEDDING_MAX_BATCH_SIZE: int | None = 32 + + # General LiteLLM settings + LLM_MAX_RETRIES: int = 3 + LLM_TIMEOUT: int = 30 + + @field_validator("EMBEDDING_MODEL") + def validate_embedding_model_format(cls, v: str) -> str: + """Validate that embedding model is in 'vendor/model' format.""" + if "/" not in v: + raise ValueError("EMBEDDING_MODEL must be in format 'vendor/model'") + return v + + +llm_settings = LLMSettings() diff --git a/orchestrator/search/__init__.py b/orchestrator/search/__init__.py index e69de29bb..42093915b 100644 --- a/orchestrator/search/__init__.py +++ b/orchestrator/search/__init__.py @@ -0,0 +1,12 @@ +# Copyright 2019-2025 SURF. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/orchestrator/search/agent/__init__.py b/orchestrator/search/agent/__init__.py index 0464af7fb..5bb0abe7e 100644 --- a/orchestrator/search/agent/__init__.py +++ b/orchestrator/search/agent/__init__.py @@ -1 +1,8 @@ # This module requires: pydantic-ai==0.7.0, ag-ui-protocol>=0.1.8 + + +from orchestrator.search.agent.agent import build_agent_app + +__all__ = [ + "build_agent_app", +] diff --git a/orchestrator/api/api_v1/endpoints/agent.py b/orchestrator/search/agent/agent.py similarity index 76% rename from orchestrator/api/api_v1/endpoints/agent.py rename to orchestrator/search/agent/agent.py index e18d36c4f..06d145fca 100644 --- a/orchestrator/api/api_v1/endpoints/agent.py +++ b/orchestrator/search/agent/agent.py @@ -1,14 +1,17 @@ +from typing import Any + import structlog from fastapi import FastAPI, HTTPException from pydantic_ai.ag_ui import StateDeps from pydantic_ai.agent import Agent +from pydantic_ai.models.openai import OpenAIModel from pydantic_ai.settings import ModelSettings +from pydantic_ai.toolsets import FunctionToolset from starlette.types import ASGIApp from orchestrator.search.agent.prompts import get_base_instructions, get_dynamic_instructions from orchestrator.search.agent.state import SearchState from orchestrator.search.agent.tools import search_toolset -from orchestrator.settings import app_settings logger = structlog.get_logger(__name__) @@ -23,19 +26,17 @@ async def _disabled(path: str) -> None: return app -def build_agent_app() -> ASGIApp: - if not app_settings.AGENT_MODEL or not app_settings.OPENAI_API_KEY: - logger.warning("Agent route disabled: missing model or OPENAI_API_KEY") - return _disabled_agent_app("missing configuration") - +def build_agent_app(model: str | OpenAIModel, toolsets: list[FunctionToolset[Any]] | None = None) -> ASGIApp: try: + toolsets = toolsets + [search_toolset] if toolsets else [search_toolset] + agent = Agent( - model=app_settings.AGENT_MODEL, + model=model, deps_type=StateDeps[SearchState], model_settings=ModelSettings( - parallel_tool_calls=False + parallel_tool_calls=False, ), # https://github.com/pydantic/pydantic-ai/issues/562 - toolsets=[search_toolset], + toolsets=toolsets, ) agent.instructions(get_base_instructions) agent.instructions(get_dynamic_instructions) diff --git a/orchestrator/search/agent/prompts.py b/orchestrator/search/agent/prompts.py index 20058c2cb..ef1555a38 100644 --- a/orchestrator/search/agent/prompts.py +++ b/orchestrator/search/agent/prompts.py @@ -5,10 +5,9 @@ from pydantic_ai import RunContext from pydantic_ai.ag_ui import StateDeps +from orchestrator.search.agent.state import SearchState from orchestrator.search.retrieval.validation import get_structured_filter_schema -from .state import SearchState - logger = structlog.get_logger(__name__) @@ -42,6 +41,7 @@ async def get_base_instructions() -> str: 3) `set_filter_tree(filters=)`. 4) `execute_search()`. 5) Summarize the results for the user. + """ ) diff --git a/orchestrator/search/core/embedding.py b/orchestrator/search/core/embedding.py index 0751c3ca2..5b1f4f1de 100644 --- a/orchestrator/search/core/embedding.py +++ b/orchestrator/search/core/embedding.py @@ -5,7 +5,7 @@ from litellm import embedding as llm_embedding from litellm import exceptions as llm_exc -from orchestrator.settings import app_settings +from orchestrator.llm_settings import llm_settings logger = structlog.get_logger(__name__) @@ -25,12 +25,12 @@ def get_embeddings_from_api_batch(cls, texts: list[str], dry_run: bool) -> list[ try: resp = llm_embedding( - model=app_settings.EMBEDDING_MODEL, + model=llm_settings.EMBEDDING_MODEL, input=[t.lower() for t in texts], - api_key=app_settings.OPENAI_API_KEY, - base_url=app_settings.OPENAI_BASE_URL, - timeout=app_settings.LLM_TIMEOUT, - max_retries=app_settings.LLM_MAX_RETRIES, + api_key=llm_settings.OPENAI_API_KEY, + api_base=llm_settings.OPENAI_BASE_URL, + timeout=llm_settings.LLM_TIMEOUT, + max_retries=llm_settings.LLM_MAX_RETRIES, ) data = sorted(resp.data, key=lambda e: e["index"]) return [row["embedding"] for row in data] @@ -51,10 +51,10 @@ async def generate_for_text_async(cls, text: str) -> list[float]: return [] try: resp = await llm_aembedding( - model=app_settings.EMBEDDING_MODEL, + model=llm_settings.EMBEDDING_MODEL, input=[text.lower()], - api_key=app_settings.OPENAI_API_KEY, - base_url=app_settings.OPENAI_BASE_URL, + api_key=llm_settings.OPENAI_API_KEY, + api_base=llm_settings.OPENAI_BASE_URL, timeout=5.0, max_retries=0, # No retries, prioritize speed. ) diff --git a/orchestrator/search/indexing/indexer.py b/orchestrator/search/indexing/indexer.py index a1636364d..ea9866406 100644 --- a/orchestrator/search/indexing/indexer.py +++ b/orchestrator/search/indexing/indexer.py @@ -14,11 +14,11 @@ from orchestrator.db import db from orchestrator.db.models import AiSearchIndex +from orchestrator.llm_settings import llm_settings from orchestrator.search.core.embedding import EmbeddingIndexer from orchestrator.search.core.types import ExtractedField, IndexableRecord from orchestrator.search.indexing.registry import EntityConfig from orchestrator.search.indexing.traverse import DatabaseEntity -from orchestrator.settings import app_settings logger = structlog.get_logger(__name__) @@ -81,7 +81,7 @@ def __init__(self, config: EntityConfig, dry_run: bool, force_index: bool, chunk self.dry_run = dry_run self.force_index = force_index self.chunk_size = chunk_size - self.embedding_model = app_settings.EMBEDDING_MODEL + self.embedding_model = llm_settings.EMBEDDING_MODEL self.logger = logger.bind(entity_kind=config.entity_kind.value) def run(self, entities: Iterable[DatabaseEntity]) -> int: @@ -210,12 +210,12 @@ def _generate_upsert_batches( current_tokens = 0 max_ctx = self._get_max_tokens() - safe_margin = int(max_ctx * app_settings.EMBEDDING_SAFE_MARGIN_PERCENT) + safe_margin = int(max_ctx * llm_settings.EMBEDDING_SAFE_MARGIN_PERCENT) token_budget = max(1, max_ctx - safe_margin) max_batch_size = None - if app_settings.OPENAI_BASE_URL: # We are using a local model - max_batch_size = app_settings.EMBEDDING_MAX_BATCH_SIZE + if llm_settings.OPENAI_BASE_URL: # We are using a local model + max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE for entity_id, field in fields_to_upsert: if field.value_type.is_embeddable(field.value): @@ -279,7 +279,7 @@ def _get_max_tokens(self) -> int: # Allow local(unknown) models to fall back. self.logger.warning("Could not auto-detect max tokens.", model=self.embedding_model) - max_ctx = app_settings.EMBEDDING_FALLBACK_MAX_TOKENS + max_ctx = llm_settings.EMBEDDING_FALLBACK_MAX_TOKENS if not isinstance(max_ctx, int): raise RuntimeError("Model not recognized and EMBEDDING_FALLBACK_MAX_TOKENS not set.") self.logger.warning("Using configured fallback token limit.", fallback=max_ctx) diff --git a/orchestrator/settings.py b/orchestrator/settings.py index c8d12221b..86c3493df 100644 --- a/orchestrator/settings.py +++ b/orchestrator/settings.py @@ -1,4 +1,4 @@ -# Copyright 2019-2020 SURF, GÉANT. +# Copyright 2019-2025 SURF, GÉANT. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ from pathlib import Path from typing import Literal -from pydantic import Field, NonNegativeInt, PostgresDsn, RedisDsn, field_validator +from pydantic import Field, NonNegativeInt, PostgresDsn, RedisDsn from pydantic_settings import BaseSettings from oauth2_lib.settings import oauth2lib_settings @@ -92,34 +92,6 @@ class AppSettings(BaseSettings): EXPOSE_SETTINGS: bool = False EXPOSE_OAUTH_SETTINGS: bool = False - # Pydantic-ai Agent settings - AGENT_MODEL: str = "openai:gpt-4o-mini" # See pydantic-ai docs for supported models. - OPENAI_API_KEY: str = "OPENAI_API_KEY" # Change per provider (Azure, etc). - - # Embedding settings - EMBEDDING_DIMENSION: int = 1536 - EMBEDDING_MODEL: str = "openai/text-embedding-3-small" # See litellm docs for supported models. - EMBEDDING_SAFE_MARGIN_PERCENT: float = Field( - 0.1, description="Safety margin as a percentage (e.g., 0.1 for 10%) for token budgeting.", ge=0, le=1 - ) - - # The following settings are only needed for local models. - # By default, they are set conservative assuming a small model like All-MiniLM-L6-V2. - OPENAI_BASE_URL: str | None = None - EMBEDDING_FALLBACK_MAX_TOKENS: int | None = 512 - EMBEDDING_MAX_BATCH_SIZE: int | None = 32 - - # General LiteLLM settings - LLM_MAX_RETRIES: int = 3 - LLM_TIMEOUT: int = 30 - - @field_validator("EMBEDDING_MODEL") - def validate_embedding_model_format(cls, v: str) -> str: - """Validate that embedding model is in 'vendor/model' format.""" - if "/" not in v: - raise ValueError("EMBEDDING_MODEL must be in format 'vendor/model'") - return v - app_settings = AppSettings() diff --git a/pyproject.toml b/pyproject.toml index 2a50003d0..b95d30855 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,11 +44,8 @@ dependencies = [ "fastapi-etag==0.4.0", "itsdangerous>=2.2.0", "jinja2==3.1.6", - "litellm>=1.75.7", "more-itertools~=10.7.0", "nwa-stdlib~=1.9.0", - "pydantic-ai-slim==0.7.0", - "ag-ui-protocol>=0.1.8", "oauth2-lib>=2.4.1", "orjson==3.10.18", "pgvector>=0.4.1", @@ -82,6 +79,11 @@ Source = "https://github.com/workfloworchestrator/orchestrator-core" celery = [ "celery~=5.5.1", ] +llm = [ + "pydantic-ai-slim ==0.7.0", + "ag-ui-protocol>=0.1.8", + "litellm>=1.75.7", +] # Local dependencies for development [dependency-groups] diff --git a/uv.lock b/uv.lock index fbb005389..1c569738f 100644 --- a/uv.lock +++ b/uv.lock @@ -1875,7 +1875,6 @@ wheels = [ name = "orchestrator-core" source = { editable = "." } dependencies = [ - { name = "ag-ui-protocol" }, { name = "alembic" }, { name = "anyio" }, { name = "apscheduler" }, @@ -1886,7 +1885,6 @@ dependencies = [ { name = "fastapi-etag" }, { name = "itsdangerous" }, { name = "jinja2" }, - { name = "litellm" }, { name = "more-itertools" }, { name = "nwa-stdlib" }, { name = "oauth2-lib" }, @@ -1895,7 +1893,6 @@ dependencies = [ { name = "prometheus-client" }, { name = "psycopg2-binary" }, { name = "pydantic", extra = ["email"] }, - { name = "pydantic-ai-slim" }, { name = "pydantic-forms" }, { name = "pydantic-settings" }, { name = "python-dateutil" }, @@ -1917,6 +1914,11 @@ dependencies = [ celery = [ { name = "celery" }, ] +llm = [ + { name = "ag-ui-protocol" }, + { name = "litellm" }, + { name = "pydantic-ai-slim" }, +] [package.dev-dependencies] dev = [ @@ -1974,7 +1976,7 @@ docs = [ [package.metadata] requires-dist = [ - { name = "ag-ui-protocol", specifier = ">=0.1.8" }, + { name = "ag-ui-protocol", marker = "extra == 'llm'", specifier = ">=0.1.8" }, { name = "alembic", specifier = "==1.16.1" }, { name = "anyio", specifier = ">=3.7.0" }, { name = "apscheduler", specifier = ">=3.11.0" }, @@ -1986,7 +1988,7 @@ requires-dist = [ { name = "fastapi-etag", specifier = "==0.4.0" }, { name = "itsdangerous", specifier = ">=2.2.0" }, { name = "jinja2", specifier = "==3.1.6" }, - { name = "litellm", specifier = ">=1.75.7" }, + { name = "litellm", marker = "extra == 'llm'", specifier = ">=1.75.7" }, { name = "more-itertools", specifier = "~=10.7.0" }, { name = "nwa-stdlib", specifier = "~=1.9.0" }, { name = "oauth2-lib", specifier = ">=2.4.1" }, @@ -1995,7 +1997,7 @@ requires-dist = [ { name = "prometheus-client", specifier = "==0.22.1" }, { name = "psycopg2-binary", specifier = "==2.9.10" }, { name = "pydantic", extras = ["email"], specifier = "~=2.11.0" }, - { name = "pydantic-ai-slim", specifier = "==0.7.0" }, + { name = "pydantic-ai-slim", marker = "extra == 'llm'", specifier = "==0.7.0" }, { name = "pydantic-forms", specifier = ">=1.4.0,<=2.1.0" }, { name = "pydantic-settings", specifier = "~=2.9.1" }, { name = "python-dateutil", specifier = "==2.8.2" }, @@ -2012,7 +2014,7 @@ requires-dist = [ { name = "typer", specifier = "==0.15.4" }, { name = "uvicorn", extras = ["standard"], specifier = "~=0.34.0" }, ] -provides-extras = ["celery"] +provides-extras = ["celery", "llm"] [package.metadata.requires-dev] dev = [