From 43e1152c772002c7df54ad7f9a63163f7e704f6d Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Tue, 30 Sep 2025 09:56:27 +0800
Subject: [PATCH 1/2] Add offline Docker build workflows and Dockerfile

Introduces new GitHub Actions workflows to build and publish offline Docker images for AMD64 and ARM64 architectures. Adds Dockerfile.offline to support fully offline operation with pre-downloaded spaCy models. Updates the multi-arch manifest workflow to include offline images.
---
 .github/workflows/publish-docker-manifest.yml | 38 ++++++++---
 .../publish-docker-offline-amd64.yml          | 52 ++++++++++++++
 .../publish-docker-offline-arm64.yml          | 55 +++++++++++++++
 Dockerfile.offline                            | 67 +++++++++++++++++++
 4 files changed, 204 insertions(+), 8 deletions(-)
 create mode 100644 .github/workflows/publish-docker-offline-amd64.yml
 create mode 100644 .github/workflows/publish-docker-offline-arm64.yml
 create mode 100644 Dockerfile.offline

diff --git a/.github/workflows/publish-docker-manifest.yml b/.github/workflows/publish-docker-manifest.yml
index 89b778ec..e83eefa8 100644
--- a/.github/workflows/publish-docker-manifest.yml
+++ b/.github/workflows/publish-docker-manifest.yml
@@ -1,11 +1,13 @@
 name: Docker Multi-arch Manifests
 on:
   workflow_run:
-    workflows: 
+    workflows:
       - "Docker Proxy AMD64"
       - "Docker Proxy ARM64"
       - "Docker Full AMD64"
       - "Docker Full ARM64"
+      - "Docker Offline AMD64"
+      - "Docker Offline ARM64"
     types: [completed]
 
 jobs:
@@ -23,9 +25,11 @@ jobs:
           script: |
             const workflows = [
               "Docker Proxy AMD64",
-              "Docker Proxy ARM64", 
+              "Docker Proxy ARM64",
               "Docker Full AMD64",
-              "Docker Full ARM64"
+              "Docker Full ARM64",
+              "Docker Offline AMD64",
+              "Docker Offline ARM64"
             ];
             
             const runId = context.payload.workflow_run.id;
@@ -107,17 +111,35 @@ jobs:
       - name: Create full multi-arch manifest
         run: |
           VERSION="${{ needs.check-builds.outputs.version }}"
-          
+
           # Create versioned full manifest
           docker manifest create ghcr.io/${{ github.repository }}:${VERSION} \
             ghcr.io/${{ github.repository }}:${VERSION}-amd64 \
             ghcr.io/${{ github.repository }}:${VERSION}-arm64
-          
+
           docker manifest push ghcr.io/${{ github.repository }}:${VERSION}
-          
+
           # Create latest full manifest
           docker manifest create ghcr.io/${{ github.repository }}:latest \
             ghcr.io/${{ github.repository }}:latest-amd64 \
             ghcr.io/${{ github.repository }}:latest-arm64
-          
-          docker manifest push ghcr.io/${{ github.repository }}:latest
\ No newline at end of file
+
+          docker manifest push ghcr.io/${{ github.repository }}:latest
+
+      - name: Create offline multi-arch manifest
+        run: |
+          VERSION="${{ needs.check-builds.outputs.version }}"
+
+          # Create versioned offline manifest
+          docker manifest create ghcr.io/${{ github.repository }}:${VERSION}-offline \
+            ghcr.io/${{ github.repository }}:${VERSION}-offline-amd64 \
+            ghcr.io/${{ github.repository }}:${VERSION}-offline-arm64
+
+          docker manifest push ghcr.io/${{ github.repository }}:${VERSION}-offline
+
+          # Create latest offline manifest
+          docker manifest create ghcr.io/${{ github.repository }}:latest-offline \
+            ghcr.io/${{ github.repository }}:latest-offline-amd64 \
+            ghcr.io/${{ github.repository }}:latest-offline-arm64
+
+          docker manifest push ghcr.io/${{ github.repository }}:latest-offline
\ No newline at end of file
diff --git a/.github/workflows/publish-docker-offline-amd64.yml b/.github/workflows/publish-docker-offline-amd64.yml
new file mode 100644
index 00000000..e8f078f7
--- /dev/null
+++ b/.github/workflows/publish-docker-offline-amd64.yml
@@ -0,0 +1,52 @@
+name: Docker Offline AMD64
+on:
+  release:
+    types: [created]
+
+jobs:
+  build:
+    name: Build offline Docker image for AMD64
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract version from tag
+        id: version
+        run: |
+          VERSION=${GITHUB_REF#refs/tags/}
+          if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
+            VERSION="latest"
+          fi
+          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Build and push offline AMD64 image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.offline
+          push: true
+          platforms: linux/amd64
+          provenance: false
+          tags: |
+            ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-amd64
+            ghcr.io/${{ github.repository }}:latest-offline-amd64
+          labels: |
+            org.opencontainers.image.source=https://github.com/${{ github.repository }}
+            org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (AMD64)
+            org.opencontainers.image.licenses=Apache-2.0
+            org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
+          cache-from: type=gha,scope=offline-amd64
+          cache-to: type=gha,scope=offline-amd64,mode=max
\ No newline at end of file
diff --git a/.github/workflows/publish-docker-offline-arm64.yml b/.github/workflows/publish-docker-offline-arm64.yml
new file mode 100644
index 00000000..70068662
--- /dev/null
+++ b/.github/workflows/publish-docker-offline-arm64.yml
@@ -0,0 +1,55 @@
+name: Docker Offline ARM64
+on:
+  release:
+    types: [created]
+
+jobs:
+  build:
+    name: Build offline Docker image for ARM64
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract version from tag
+        id: version
+        run: |
+          VERSION=${GITHUB_REF#refs/tags/}
+          if [ -z "$VERSION" ] || [ "$VERSION" = "$GITHUB_REF" ]; then
+            VERSION="latest"
+          fi
+          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Build and push offline ARM64 image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.offline
+          push: true
+          platforms: linux/arm64
+          provenance: false
+          tags: |
+            ghcr.io/${{ github.repository }}:${{ steps.version.outputs.VERSION }}-offline-arm64
+            ghcr.io/${{ github.repository }}:latest-offline-arm64
+          labels: |
+            org.opencontainers.image.source=https://github.com/${{ github.repository }}
+            org.opencontainers.image.description=OptiLLM offline image with pre-downloaded models for fully offline operation (ARM64)
+            org.opencontainers.image.licenses=Apache-2.0
+            org.opencontainers.image.version=${{ steps.version.outputs.VERSION }}
+          cache-from: type=gha,scope=offline-arm64
+          cache-to: type=gha,scope=offline-arm64,mode=max
\ No newline at end of file
diff --git a/Dockerfile.offline b/Dockerfile.offline
new file mode 100644
index 00000000..e93ed24c
--- /dev/null
+++ b/Dockerfile.offline
@@ -0,0 +1,67 @@
+# Build stage
+FROM python:3.12-slim-bookworm AS builder
+
+# Define build argument with default value
+ARG PORT=8000
+# Make it available as env variable at runtime
+ENV OPTILLM_PORT=$PORT
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    build-essential \
+    python3-dev \
+    gcc \
+    g++ \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy only the requirements file first to leverage Docker cache
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Download spaCy model for offline use
+RUN python -m spacy download en_core_web_lg
+
+# Final stage
+FROM python:3.12-slim-bookworm
+
+# Add labels for the final image
+LABEL org.opencontainers.image.source="https://github.com/codelion/optillm"
+LABEL org.opencontainers.image.description="OptiLLM offline image with pre-downloaded models for fully offline operation"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+
+# Install curl for the healthcheck
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Copy installed dependencies from builder stage
+COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy application code
+COPY . .
+
+# Create a non-root user and switch to it
+RUN useradd -m appuser
+USER appuser
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+
+# Use the ARG in EXPOSE
+EXPOSE ${PORT}
+
+# Run the application
+ENTRYPOINT ["python", "optillm.py"]
\ No newline at end of file

From def7f1bb7804153a7ed19edeec561b55552c3904 Mon Sep 17 00:00:00 2001
From: Asankhaya Sharma <codelion@users.noreply.github.com>
Date: Tue, 30 Sep 2025 11:08:53 +0800
Subject: [PATCH 2/2] Pre-warm analyzer engine to prevent recognizer reloads

Added a warm-up call to AnalyzerEngine during initialization to ensure recognizers are loaded only once, preventing repeated reloads on each analyze() call. Also added a test to verify recognizers are not reloaded, improving performance and avoiding unnecessary registry calls.
---
 optillm/__init__.py                      |  2 +-
 optillm/plugins/privacy_plugin.py        |  3 ++
 pyproject.toml                           |  2 +-
 tests/test_privacy_plugin_performance.py | 64 ++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/optillm/__init__.py b/optillm/__init__.py
index ef07b022..f22e9889 100644
--- a/optillm/__init__.py
+++ b/optillm/__init__.py
@@ -1,5 +1,5 @@
 # Version information
-__version__ = "0.3.0"
+__version__ = "0.3.1"
 
 # Import from server module
 from .server import (
diff --git a/optillm/plugins/privacy_plugin.py b/optillm/plugins/privacy_plugin.py
index f7a10bcf..5ba2228a 100644
--- a/optillm/plugins/privacy_plugin.py
+++ b/optillm/plugins/privacy_plugin.py
@@ -105,6 +105,9 @@ def get_analyzer_engine() -> AnalyzerEngine:
     global _analyzer_engine
     if _analyzer_engine is None:
         _analyzer_engine = AnalyzerEngine()
+        # Pre-warm the analyzer to load all recognizers once during initialization
+        # This prevents recognizers from being reloaded on each analyze() call
+        _analyzer_engine.analyze(text="warm up", language="en")
     return _analyzer_engine
 
 def get_anonymizer_engine() -> AnonymizerEngine:
diff --git a/pyproject.toml b/pyproject.toml
index 77f9895f..02dd0824 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "optillm"
-version = "0.3.0"
+version = "0.3.1"
 description = "An optimizing inference proxy for LLMs."
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/tests/test_privacy_plugin_performance.py b/tests/test_privacy_plugin_performance.py
index 6e6fdfbf..5b543f8d 100644
--- a/tests/test_privacy_plugin_performance.py
+++ b/tests/test_privacy_plugin_performance.py
@@ -181,6 +181,64 @@ def test_singleton_instances_are_reused():
         print(f"❌ Singleton test failed: {e}")
         raise
 
+def test_recognizers_not_reloaded():
+    """
+    Test that recognizers are not fetched/reloaded on each analyze() call.
+    This prevents the performance regression where "Fetching all recognizers for language en"
+    appears in logs on every request.
+    """
+    print("\nTesting that recognizers are not reloaded on each call...")
+
+    # Reset module state
+    if 'optillm.plugins.privacy_plugin' in sys.modules:
+        del sys.modules['optillm.plugins.privacy_plugin']
+
+    try:
+        # Mock at the presidio level to track registry calls
+        with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \
+             patch('spacy.util.is_package', return_value=True):
+
+            # Create a mock analyzer instance
+            mock_analyzer_instance = MagicMock()
+            mock_registry = MagicMock()
+
+            # Track calls to get_recognizers
+            mock_registry.get_recognizers = MagicMock(return_value=[])
+            mock_analyzer_instance.registry = mock_registry
+            mock_analyzer_instance.analyze = MagicMock(return_value=[])
+
+            MockAnalyzerEngine.return_value = mock_analyzer_instance
+
+            # Import module with mocks
+            import optillm.plugins.privacy_plugin as privacy_plugin
+
+            # First call to get_analyzer_engine - should create and warm up
+            analyzer1 = privacy_plugin.get_analyzer_engine()
+            initial_analyze_calls = mock_analyzer_instance.analyze.call_count
+
+            print(f"Warm-up analyze calls: {initial_analyze_calls}")
+            assert initial_analyze_calls == 1, f"Expected 1 warm-up analyze call, got {initial_analyze_calls}"
+
+            # Second call - should return cached instance without additional analyze
+            analyzer2 = privacy_plugin.get_analyzer_engine()
+            second_analyze_calls = mock_analyzer_instance.analyze.call_count
+
+            print(f"Total analyze calls after second get_analyzer_engine: {second_analyze_calls}")
+            assert second_analyze_calls == 1, f"Analyzer should not call analyze() again on cached retrieval, got {second_analyze_calls} calls"
+
+            # Verify it's the same instance
+            assert analyzer1 is analyzer2, "Should return the same cached analyzer instance"
+
+            print("✅ Recognizer reload test PASSED - Recognizers are pre-warmed and not reloaded!")
+            return True
+
+    except ImportError as e:
+        print(f"⚠️  Skipping recognizer reload test - dependencies not installed: {e}")
+        return True
+    except Exception as e:
+        print(f"❌ Recognizer reload test failed: {e}")
+        raise
+
 if __name__ == "__main__":
     print("=" * 60)
     print("Privacy Plugin Performance & Caching Tests")
@@ -200,6 +258,12 @@ def test_singleton_instances_are_reused():
         all_passed = False
         print(f"❌ Singleton instance test failed: {e}")
 
+    try:
+        test_recognizers_not_reloaded()
+    except Exception as e:
+        all_passed = False
+        print(f"❌ Recognizer reload test failed: {e}")
+
     try:
         test_privacy_plugin_performance()
     except Exception as e: