diff --git a/optillm/__init__.py b/optillm/__init__.py index 6fe64d92..8c142f98 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -1,5 +1,5 @@ # Version information -__version__ = "0.2.9" +__version__ = "0.2.10" # Import from server module from .server import ( diff --git a/optillm/plugins/privacy_plugin.py b/optillm/plugins/privacy_plugin.py index bd12d74a..f7a10bcf 100644 --- a/optillm/plugins/privacy_plugin.py +++ b/optillm/plugins/privacy_plugin.py @@ -3,10 +3,15 @@ from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine, OperatorConfig from presidio_anonymizer.operators import Operator, OperatorType -from typing import Dict, Tuple +from typing import Dict, Tuple, Optional SLUG = "privacy" +# Singleton instances for expensive resources +_analyzer_engine: Optional[AnalyzerEngine] = None +_anonymizer_engine: Optional[AnonymizerEngine] = None +_model_downloaded: bool = False + class InstanceCounterAnonymizer(Operator): """ Anonymizer which replaces the entity value @@ -67,11 +72,14 @@ def operator_type(self) -> OperatorType: return OperatorType.Anonymize def download_model(model_name): - if not spacy.util.is_package(model_name): - print(f"Downloading {model_name} model...") - spacy.cli.download(model_name) - else: - print(f"{model_name} model already downloaded.") + global _model_downloaded + if not _model_downloaded: + if not spacy.util.is_package(model_name): + print(f"Downloading {model_name} model...") + spacy.cli.download(model_name) + else: + print(f"{model_name} model already downloaded.") + _model_downloaded = True def replace_entities(entity_map, text): # Create a reverse mapping of placeholders to entity names @@ -92,17 +100,32 @@ def replace_placeholder(match): return replaced_text +def get_analyzer_engine() -> AnalyzerEngine: + """Get or create singleton AnalyzerEngine instance.""" + global _analyzer_engine + if _analyzer_engine is None: + _analyzer_engine = AnalyzerEngine() + return _analyzer_engine + +def get_anonymizer_engine() -> AnonymizerEngine: + """Get or create singleton AnonymizerEngine instance.""" + global _anonymizer_engine + if _anonymizer_engine is None: + _anonymizer_engine = AnonymizerEngine() + _anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer) + return _anonymizer_engine + def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]: # Use the function model_name = "en_core_web_lg" download_model(model_name) - analyzer = AnalyzerEngine() + # Use singleton instances + analyzer = get_analyzer_engine() analyzer_results = analyzer.analyze(text=initial_query, language="en") - # Create Anonymizer engine and add the custom anonymizer - anonymizer_engine = AnonymizerEngine() - anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer) + # Use singleton anonymizer engine + anonymizer_engine = get_anonymizer_engine() # Create a mapping between entity types and counters entity_mapping = dict() diff --git a/pyproject.toml b/pyproject.toml index d1049c79..81f03ffd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.2.9" +version = "0.2.10" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0" diff --git a/tests/test_privacy_plugin_performance.py b/tests/test_privacy_plugin_performance.py new file mode 100644 index 00000000..6e6fdfbf --- /dev/null +++ b/tests/test_privacy_plugin_performance.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +""" +Test to ensure privacy plugin resources are properly cached and not reloaded on each request. +This test will fail if resources are being recreated on every call, preventing performance regressions. +""" + +import time +import sys +import os +from unittest.mock import Mock, patch, MagicMock +import importlib + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +def test_privacy_plugin_resource_caching(): + """ + Test that expensive resources (AnalyzerEngine, AnonymizerEngine) are created only once + and reused across multiple plugin invocations. + """ + print("Testing privacy plugin resource caching...") + + # Need to reset the module state before testing + if 'optillm.plugins.privacy_plugin' in sys.modules: + del sys.modules['optillm.plugins.privacy_plugin'] + + # Mock the expensive AnalyzerEngine and AnonymizerEngine at the module level before import + with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \ + patch('presidio_anonymizer.AnonymizerEngine') as MockAnonymizerEngine, \ + patch('spacy.util.is_package', return_value=True): + + # Set up mock instances + mock_analyzer_instance = MagicMock() + mock_analyzer_instance.analyze.return_value = [] + MockAnalyzerEngine.return_value = mock_analyzer_instance + + mock_anonymizer_instance = MagicMock() + mock_anonymizer_instance.anonymize.return_value = MagicMock(text="anonymized text") + mock_anonymizer_instance.add_anonymizer = MagicMock() + MockAnonymizerEngine.return_value = mock_anonymizer_instance + + # Import the module with mocks in place + import optillm.plugins.privacy_plugin as privacy_plugin + + # Mock client for the run function + mock_client = Mock() + mock_response = Mock() + mock_response.choices = [Mock(message=Mock(content="response"))] + mock_response.usage.completion_tokens = 10 + mock_client.chat.completions.create.return_value = mock_response + + # First invocation + print("First invocation...") + result1, tokens1 = privacy_plugin.run("system", "query 1", mock_client, "model") + + # Check that resources were created once + assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times, expected 1" + assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times, expected 1" + + # Second invocation + print("Second invocation...") + result2, tokens2 = privacy_plugin.run("system", "query 2", mock_client, "model") + + # Check that resources were NOT created again + assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times after 2nd call, expected 1" + assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times after 2nd call, expected 1" + + # Third invocation to be extra sure + print("Third invocation...") + result3, tokens3 = privacy_plugin.run("system", "query 3", mock_client, "model") + + # Still should be 1 + assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times after 3rd call, expected 1" + assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times after 3rd call, expected 1" + + print("✅ Privacy plugin resource caching test PASSED - Resources are properly cached!") + return True + +def test_privacy_plugin_performance(): + """ + Test that multiple invocations of the privacy plugin don't have degraded performance. + This catches the actual performance issue even without mocking. + """ + print("\nTesting privacy plugin performance (real execution)...") + + try: + # Try to import the actual plugin + import optillm.plugins.privacy_plugin as privacy_plugin + + # Check if required dependencies are available + try: + import spacy + from presidio_analyzer import AnalyzerEngine + from presidio_anonymizer import AnonymizerEngine + except ImportError as e: + print(f"⚠️ Skipping performance test - dependencies not installed: {e}") + return True + + # Mock client + mock_client = Mock() + mock_response = Mock() + mock_response.choices = [Mock(message=Mock(content="response"))] + mock_response.usage.completion_tokens = 10 + mock_client.chat.completions.create.return_value = mock_response + + # Warm-up call (might include model download) + print("Warm-up call...") + start = time.time() + privacy_plugin.run("system", "warm up query", mock_client, "model") + warmup_time = time.time() - start + print(f"Warm-up time: {warmup_time:.2f}s") + + # First real measurement + print("First measurement call...") + start = time.time() + privacy_plugin.run("system", "test query 1", mock_client, "model") + first_time = time.time() - start + print(f"First call time: {first_time:.2f}s") + + # Second measurement - should be fast if caching works + print("Second measurement call...") + start = time.time() + privacy_plugin.run("system", "test query 2", mock_client, "model") + second_time = time.time() - start + print(f"Second call time: {second_time:.2f}s") + + # Third measurement + print("Third measurement call...") + start = time.time() + privacy_plugin.run("system", "test query 3", mock_client, "model") + third_time = time.time() - start + print(f"Third call time: {third_time:.2f}s") + + # Performance assertions + # Second and third calls should be much faster than first (at least 10x faster) + # Allow some tolerance for the first call as it might still be initializing + max_acceptable_time = 2.0 # 2 seconds max for subsequent calls + + if second_time > max_acceptable_time: + raise AssertionError(f"Second call took {second_time:.2f}s, expected < {max_acceptable_time}s. Resources might not be cached!") + + if third_time > max_acceptable_time: + raise AssertionError(f"Third call took {third_time:.2f}s, expected < {max_acceptable_time}s. Resources might not be cached!") + + print(f"✅ Privacy plugin performance test PASSED - Subsequent calls are fast ({second_time:.2f}s, {third_time:.2f}s)!") + return True + + except Exception as e: + print(f"❌ Performance test failed: {e}") + raise + +def test_singleton_instances_are_reused(): + """ + Direct test that singleton instances are the same object across calls. + """ + print("\nTesting singleton instance reuse...") + + try: + import optillm.plugins.privacy_plugin as privacy_plugin + importlib.reload(privacy_plugin) + + # Get first instances + analyzer1 = privacy_plugin.get_analyzer_engine() + anonymizer1 = privacy_plugin.get_anonymizer_engine() + + # Get second instances + analyzer2 = privacy_plugin.get_analyzer_engine() + anonymizer2 = privacy_plugin.get_anonymizer_engine() + + # They should be the exact same object + assert analyzer1 is analyzer2, "AnalyzerEngine instances are not the same object!" + assert anonymizer1 is anonymizer2, "AnonymizerEngine instances are not the same object!" + + print("✅ Singleton instance test PASSED - Same objects are reused!") + return True + + except ImportError as e: + print(f"⚠️ Skipping singleton test - dependencies not installed: {e}") + return True + except Exception as e: + print(f"❌ Singleton test failed: {e}") + raise + +if __name__ == "__main__": + print("=" * 60) + print("Privacy Plugin Performance & Caching Tests") + print("=" * 60) + + all_passed = True + + try: + test_privacy_plugin_resource_caching() + except Exception as e: + all_passed = False + print(f"❌ Resource caching test failed: {e}") + + try: + test_singleton_instances_are_reused() + except Exception as e: + all_passed = False + print(f"❌ Singleton instance test failed: {e}") + + try: + test_privacy_plugin_performance() + except Exception as e: + all_passed = False + print(f"❌ Performance test failed: {e}") + + print("\n" + "=" * 60) + if all_passed: + print("✅ ALL TESTS PASSED!") + print("Privacy plugin resources are properly cached.") + sys.exit(0) + else: + print("❌ SOME TESTS FAILED!") + print("Privacy plugin may have performance issues.") + sys.exit(1) \ No newline at end of file