From eadb6f152e9f1edd07a13862cf66905cf695a787 Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Thu, 1 May 2025 18:00:24 +0000 Subject: [PATCH] Port to GitHub Models --- .env.sample | 13 ++++++++++--- evals/evaluate.py | 4 ++++ evals/generate_ground_truth.py | 4 +++- infra/main.bicep | 2 +- src/backend/fastapi_app/dependencies.py | 8 ++++++++ src/backend/fastapi_app/openai_clients.py | 18 ++++++++++++++++++ src/backend/fastapi_app/routes/api_routes.py | 2 +- src/backend/fastapi_app/update_embeddings.py | 2 ++ tests/conftest.py | 2 +- 9 files changed, 48 insertions(+), 7 deletions(-) diff --git a/.env.sample b/.env.sample index 080cf73c..ee58dcf2 100644 --- a/.env.sample +++ b/.env.sample @@ -5,9 +5,9 @@ POSTGRES_PASSWORD=postgres POSTGRES_DATABASE=postgres POSTGRES_SSL=disable -# OPENAI_CHAT_HOST can be either azure, openai, or ollama: +# OPENAI_CHAT_HOST can be either azure, openai, ollama, or github: OPENAI_CHAT_HOST=azure -# OPENAI_EMBED_HOST can be either azure or openai: +# OPENAI_EMBED_HOST can be either azure, openai, ollama, or github: OPENAI_EMBED_HOST=azure # Needed for Azure: # You also need to `azd auth login` if running this locally @@ -28,10 +28,17 @@ AZURE_OPENAI_KEY= OPENAICOM_KEY=YOUR-OPENAI-API-KEY OPENAICOM_CHAT_MODEL=gpt-3.5-turbo OPENAICOM_EMBED_MODEL=text-embedding-3-large -OPENAICOM_EMBED_MODEL_DIMENSIONS=1024 +OPENAICOM_EMBED_DIMENSIONS=1024 OPENAICOM_EMBEDDING_COLUMN=embedding_3l # Needed for Ollama: OLLAMA_ENDPOINT=http://host.docker.internal:11434/v1 OLLAMA_CHAT_MODEL=llama3.1 OLLAMA_EMBED_MODEL=nomic-embed-text OLLAMA_EMBEDDING_COLUMN=embedding_nomic +# Needed for GitHub Models: +GITHUB_TOKEN=YOUR-GITHUB-TOKEN +GITHUB_BASE_URL=https://models.inference.ai.azure.com +GITHUB_MODEL=gpt-4o +GITHUB_EMBED_MODEL=text-embedding-3-large +GITHUB_EMBED_DIMENSIONS=1024 +GITHUB_EMBEDDING_COLUMN=embedding_3l diff --git a/evals/evaluate.py b/evals/evaluate.py index c4075479..efb7f8f2 100644 --- a/evals/evaluate.py +++ b/evals/evaluate.py @@ -66,6 +66,10 @@ def get_openai_config() -> dict: # azure-ai-evaluate will call DefaultAzureCredential behind the scenes, # so we must be logged in to Azure CLI with the correct tenant openai_config["model"] = os.environ["AZURE_OPENAI_EVAL_MODEL"] + elif os.environ.get("OPENAI_CHAT_HOST") == "ollama": + raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com") + elif os.environ.get("OPENAI_CHAT_HOST") == "github": + raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com") else: logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY") openai_config = {"api_key": os.environ["OPENAICOM_KEY"], "model": "gpt-4"} diff --git a/evals/generate_ground_truth.py b/evals/generate_ground_truth.py index f5807a7f..44410506 100644 --- a/evals/generate_ground_truth.py +++ b/evals/generate_ground_truth.py @@ -101,7 +101,9 @@ def get_openai_client() -> tuple[Union[AzureOpenAI, OpenAI], str]: ) model = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] elif OPENAI_CHAT_HOST == "ollama": - raise NotImplementedError("Ollama OpenAI Service is not supported. Switch to Azure or OpenAI.com") + raise NotImplementedError("Ollama is not supported. Switch to Azure or OpenAI.com") + elif OPENAI_CHAT_HOST == "github": + raise NotImplementedError("GitHub Models is not supported. Switch to Azure or OpenAI.com") else: logger.info("Using OpenAI Service with API Key from OPENAICOM_KEY") openai_client = OpenAI(api_key=os.environ["OPENAICOM_KEY"]) diff --git a/infra/main.bicep b/infra/main.bicep index a55c5c8b..34b8b6e8 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -302,7 +302,7 @@ var webAppEnv = union(azureOpenAIKeyEnv, openAIComKeyEnv, [ value: openAIEmbedHost } { - name: 'OPENAICOM_EMBED_MODEL_DIMENSIONS' + name: 'OPENAICOM_EMBED_DIMENSIONS' value: openAIEmbedHost == 'openaicom' ? '1024' : '' } { diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py index de916576..bd7bc4b4 100644 --- a/src/backend/fastapi_app/dependencies.py +++ b/src/backend/fastapi_app/dependencies.py @@ -51,6 +51,11 @@ async def common_parameters(): openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text" openai_embed_dimensions = None embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic" + elif OPENAI_EMBED_HOST == "github": + openai_embed_deployment = None + openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "text-embedding-3-large" + openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024)) + embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l" else: openai_embed_deployment = None openai_embed_model = os.getenv("OPENAICOM_EMBED_MODEL") or "text-embedding-3-large" @@ -63,6 +68,9 @@ async def common_parameters(): openai_chat_deployment = None openai_chat_model = os.getenv("OLLAMA_CHAT_MODEL") or "phi3:3.8b" openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text" + elif OPENAI_CHAT_HOST == "github": + openai_chat_deployment = None + openai_chat_model = os.getenv("GITHUB_MODEL") or "gpt-4o" else: openai_chat_deployment = None openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo" diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py index f0c1e6e5..e83e0c41 100644 --- a/src/backend/fastapi_app/openai_clients.py +++ b/src/backend/fastapi_app/openai_clients.py @@ -50,6 +50,15 @@ async def create_openai_chat_client( base_url=os.getenv("OLLAMA_ENDPOINT"), api_key="nokeyneeded", ) + elif OPENAI_CHAT_HOST == "github": + logger.info("Setting up OpenAI client for chat completions using GitHub Models") + github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com") + github_model = os.getenv("GITHUB_MODEL", "gpt-4o") + logger.info(f"Using GitHub Models with base URL: {github_base_url}, model: {github_model}") + openai_chat_client = openai.AsyncOpenAI( + base_url=github_base_url, + api_key=os.getenv("GITHUB_TOKEN"), + ) else: logger.info("Setting up OpenAI client for chat completions using OpenAI.com API key") openai_chat_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY")) @@ -99,6 +108,15 @@ async def create_openai_embed_client( base_url=os.getenv("OLLAMA_ENDPOINT"), api_key="nokeyneeded", ) + elif OPENAI_EMBED_HOST == "github": + logger.info("Setting up OpenAI client for embeddings using GitHub Models") + github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com") + github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "text-embedding-3-small") + logger.info(f"Using GitHub Models with base URL: {github_base_url}, embedding model: {github_embed_model}") + openai_embed_client = openai.AsyncOpenAI( + base_url=github_base_url, + api_key=os.getenv("GITHUB_TOKEN"), + ) else: logger.info("Setting up OpenAI client for embeddings using OpenAI.com API key") openai_embed_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAICOM_KEY")) diff --git a/src/backend/fastapi_app/routes/api_routes.py b/src/backend/fastapi_app/routes/api_routes.py index 54e7e3b1..d7486730 100644 --- a/src/backend/fastapi_app/routes/api_routes.py +++ b/src/backend/fastapi_app/routes/api_routes.py @@ -68,7 +68,7 @@ async def similar_handler( f"SELECT *, {context.embedding_column} <=> :embedding as DISTANCE FROM {Item.__tablename__} " "WHERE id <> :item_id ORDER BY distance LIMIT :n" ), - {"embedding": item.embedding_3l, "n": n, "item_id": id}, + {"embedding": getattr(item, context.embedding_column), "n": n, "item_id": id}, ) ).fetchall() diff --git a/src/backend/fastapi_app/update_embeddings.py b/src/backend/fastapi_app/update_embeddings.py index 83744ead..b36113b2 100644 --- a/src/backend/fastapi_app/update_embeddings.py +++ b/src/backend/fastapi_app/update_embeddings.py @@ -29,6 +29,8 @@ async def update_embeddings(in_seed_data=False): embedding_column = os.getenv("AZURE_OPENAI_EMBEDDING_COLUMN", "embedding_3l") elif OPENAI_EMBED_HOST == "ollama": embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN", "embedding_nomic") + elif OPENAI_EMBED_HOST == "github": + embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN", "embedding_3l") else: embedding_column = os.getenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l") logger.info(f"Updating embeddings in column: {embedding_column}") diff --git a/tests/conftest.py b/tests/conftest.py index 0af0c2c1..5bbff0f6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def mock_session_env_openai(monkeypatch_session): monkeypatch_session.setenv("OPENAICOM_KEY", "fakekey") monkeypatch_session.setenv("OPENAICOM_CHAT_MODEL", "gpt-3.5-turbo") monkeypatch_session.setenv("OPENAICOM_EMBED_MODEL", "text-embedding-3-large") - monkeypatch_session.setenv("OPENAICOM_EMBED_MODEL_DIMENSIONS", "1024") + monkeypatch_session.setenv("OPENAICOM_EMBED_DIMENSIONS", "1024") monkeypatch_session.setenv("OPENAICOM_EMBEDDING_COLUMN", "embedding_3l") yield