Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion optillm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Version information
__version__ = "0.2.9"
__version__ = "0.2.10"

# Import from server module
from .server import (
Expand Down
43 changes: 33 additions & 10 deletions optillm/plugins/privacy_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@
from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine, OperatorConfig
from presidio_anonymizer.operators import Operator, OperatorType

from typing import Dict, Tuple
from typing import Dict, Tuple, Optional

SLUG = "privacy"

# Singleton instances for expensive resources
_analyzer_engine: Optional[AnalyzerEngine] = None
_anonymizer_engine: Optional[AnonymizerEngine] = None
_model_downloaded: bool = False

class InstanceCounterAnonymizer(Operator):
"""
Anonymizer which replaces the entity value
Expand Down Expand Up @@ -67,11 +72,14 @@ def operator_type(self) -> OperatorType:
return OperatorType.Anonymize

def download_model(model_name):
if not spacy.util.is_package(model_name):
print(f"Downloading {model_name} model...")
spacy.cli.download(model_name)
else:
print(f"{model_name} model already downloaded.")
global _model_downloaded
if not _model_downloaded:
if not spacy.util.is_package(model_name):
print(f"Downloading {model_name} model...")
spacy.cli.download(model_name)
else:
print(f"{model_name} model already downloaded.")
_model_downloaded = True

def replace_entities(entity_map, text):
# Create a reverse mapping of placeholders to entity names
Expand All @@ -92,17 +100,32 @@ def replace_placeholder(match):

return replaced_text

def get_analyzer_engine() -> AnalyzerEngine:
"""Get or create singleton AnalyzerEngine instance."""
global _analyzer_engine
if _analyzer_engine is None:
_analyzer_engine = AnalyzerEngine()
return _analyzer_engine

def get_anonymizer_engine() -> AnonymizerEngine:
"""Get or create singleton AnonymizerEngine instance."""
global _anonymizer_engine
if _anonymizer_engine is None:
_anonymizer_engine = AnonymizerEngine()
_anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer)
return _anonymizer_engine

def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]:
# Use the function
model_name = "en_core_web_lg"
download_model(model_name)

analyzer = AnalyzerEngine()
# Use singleton instances
analyzer = get_analyzer_engine()
analyzer_results = analyzer.analyze(text=initial_query, language="en")

# Create Anonymizer engine and add the custom anonymizer
anonymizer_engine = AnonymizerEngine()
anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer)
# Use singleton anonymizer engine
anonymizer_engine = get_anonymizer_engine()

# Create a mapping between entity types and counters
entity_mapping = dict()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "optillm"
version = "0.2.9"
version = "0.2.10"
description = "An optimizing inference proxy for LLMs."
readme = "README.md"
license = "Apache-2.0"
Expand Down
217 changes: 217 additions & 0 deletions tests/test_privacy_plugin_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#!/usr/bin/env python3
"""
Test to ensure privacy plugin resources are properly cached and not reloaded on each request.
This test will fail if resources are being recreated on every call, preventing performance regressions.
"""

import time
import sys
import os
from unittest.mock import Mock, patch, MagicMock
import importlib

# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

def test_privacy_plugin_resource_caching():
"""
Test that expensive resources (AnalyzerEngine, AnonymizerEngine) are created only once
and reused across multiple plugin invocations.
"""
print("Testing privacy plugin resource caching...")

# Need to reset the module state before testing
if 'optillm.plugins.privacy_plugin' in sys.modules:
del sys.modules['optillm.plugins.privacy_plugin']

# Mock the expensive AnalyzerEngine and AnonymizerEngine at the module level before import
with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \
patch('presidio_anonymizer.AnonymizerEngine') as MockAnonymizerEngine, \
patch('spacy.util.is_package', return_value=True):

# Set up mock instances
mock_analyzer_instance = MagicMock()
mock_analyzer_instance.analyze.return_value = []
MockAnalyzerEngine.return_value = mock_analyzer_instance

mock_anonymizer_instance = MagicMock()
mock_anonymizer_instance.anonymize.return_value = MagicMock(text="anonymized text")
mock_anonymizer_instance.add_anonymizer = MagicMock()
MockAnonymizerEngine.return_value = mock_anonymizer_instance

# Import the module with mocks in place
import optillm.plugins.privacy_plugin as privacy_plugin

# Mock client for the run function
mock_client = Mock()
mock_response = Mock()
mock_response.choices = [Mock(message=Mock(content="response"))]
mock_response.usage.completion_tokens = 10
mock_client.chat.completions.create.return_value = mock_response

# First invocation
print("First invocation...")
result1, tokens1 = privacy_plugin.run("system", "query 1", mock_client, "model")

# Check that resources were created once
assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times, expected 1"
assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times, expected 1"

# Second invocation
print("Second invocation...")
result2, tokens2 = privacy_plugin.run("system", "query 2", mock_client, "model")

# Check that resources were NOT created again
assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times after 2nd call, expected 1"
assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times after 2nd call, expected 1"

# Third invocation to be extra sure
print("Third invocation...")
result3, tokens3 = privacy_plugin.run("system", "query 3", mock_client, "model")

# Still should be 1
assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times after 3rd call, expected 1"
assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times after 3rd call, expected 1"

print("✅ Privacy plugin resource caching test PASSED - Resources are properly cached!")
return True

def test_privacy_plugin_performance():
"""
Test that multiple invocations of the privacy plugin don't have degraded performance.
This catches the actual performance issue even without mocking.
"""
print("\nTesting privacy plugin performance (real execution)...")

try:
# Try to import the actual plugin
import optillm.plugins.privacy_plugin as privacy_plugin

# Check if required dependencies are available
try:
import spacy
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
except ImportError as e:
print(f"⚠️ Skipping performance test - dependencies not installed: {e}")
return True

# Mock client
mock_client = Mock()
mock_response = Mock()
mock_response.choices = [Mock(message=Mock(content="response"))]
mock_response.usage.completion_tokens = 10
mock_client.chat.completions.create.return_value = mock_response

# Warm-up call (might include model download)
print("Warm-up call...")
start = time.time()
privacy_plugin.run("system", "warm up query", mock_client, "model")
warmup_time = time.time() - start
print(f"Warm-up time: {warmup_time:.2f}s")

# First real measurement
print("First measurement call...")
start = time.time()
privacy_plugin.run("system", "test query 1", mock_client, "model")
first_time = time.time() - start
print(f"First call time: {first_time:.2f}s")

# Second measurement - should be fast if caching works
print("Second measurement call...")
start = time.time()
privacy_plugin.run("system", "test query 2", mock_client, "model")
second_time = time.time() - start
print(f"Second call time: {second_time:.2f}s")

# Third measurement
print("Third measurement call...")
start = time.time()
privacy_plugin.run("system", "test query 3", mock_client, "model")
third_time = time.time() - start
print(f"Third call time: {third_time:.2f}s")

# Performance assertions
# Second and third calls should be much faster than first (at least 10x faster)
# Allow some tolerance for the first call as it might still be initializing
max_acceptable_time = 2.0 # 2 seconds max for subsequent calls

if second_time > max_acceptable_time:
raise AssertionError(f"Second call took {second_time:.2f}s, expected < {max_acceptable_time}s. Resources might not be cached!")

if third_time > max_acceptable_time:
raise AssertionError(f"Third call took {third_time:.2f}s, expected < {max_acceptable_time}s. Resources might not be cached!")

print(f"✅ Privacy plugin performance test PASSED - Subsequent calls are fast ({second_time:.2f}s, {third_time:.2f}s)!")
return True

except Exception as e:
print(f"❌ Performance test failed: {e}")
raise

def test_singleton_instances_are_reused():
"""
Direct test that singleton instances are the same object across calls.
"""
print("\nTesting singleton instance reuse...")

try:
import optillm.plugins.privacy_plugin as privacy_plugin
importlib.reload(privacy_plugin)

# Get first instances
analyzer1 = privacy_plugin.get_analyzer_engine()
anonymizer1 = privacy_plugin.get_anonymizer_engine()

# Get second instances
analyzer2 = privacy_plugin.get_analyzer_engine()
anonymizer2 = privacy_plugin.get_anonymizer_engine()

# They should be the exact same object
assert analyzer1 is analyzer2, "AnalyzerEngine instances are not the same object!"
assert anonymizer1 is anonymizer2, "AnonymizerEngine instances are not the same object!"

print("✅ Singleton instance test PASSED - Same objects are reused!")
return True

except ImportError as e:
print(f"⚠️ Skipping singleton test - dependencies not installed: {e}")
return True
except Exception as e:
print(f"❌ Singleton test failed: {e}")
raise

if __name__ == "__main__":
print("=" * 60)
print("Privacy Plugin Performance & Caching Tests")
print("=" * 60)

all_passed = True

try:
test_privacy_plugin_resource_caching()
except Exception as e:
all_passed = False
print(f"❌ Resource caching test failed: {e}")

try:
test_singleton_instances_are_reused()
except Exception as e:
all_passed = False
print(f"❌ Singleton instance test failed: {e}")

try:
test_privacy_plugin_performance()
except Exception as e:
all_passed = False
print(f"❌ Performance test failed: {e}")

print("\n" + "=" * 60)
if all_passed:
print("✅ ALL TESTS PASSED!")
print("Privacy plugin resources are properly cached.")
sys.exit(0)
else:
print("❌ SOME TESTS FAILED!")
print("Privacy plugin may have performance issues.")
sys.exit(1)