diff --git a/optillm/__init__.py b/optillm/__init__.py
index 6fe64d92..8c142f98 100644
--- a/optillm/__init__.py
+++ b/optillm/__init__.py
@@ -1,5 +1,5 @@
 # Version information
-__version__ = "0.2.9"
+__version__ = "0.2.10"
 
 # Import from server module
 from .server import (
diff --git a/optillm/plugins/privacy_plugin.py b/optillm/plugins/privacy_plugin.py
index bd12d74a..f7a10bcf 100644
--- a/optillm/plugins/privacy_plugin.py
+++ b/optillm/plugins/privacy_plugin.py
@@ -3,10 +3,15 @@
 from presidio_anonymizer import AnonymizerEngine, DeanonymizeEngine, OperatorConfig
 from presidio_anonymizer.operators import Operator, OperatorType
 
-from typing import Dict, Tuple
+from typing import Dict, Tuple, Optional
 
 SLUG = "privacy"
 
+# Singleton instances for expensive resources
+_analyzer_engine: Optional[AnalyzerEngine] = None
+_anonymizer_engine: Optional[AnonymizerEngine] = None
+_model_downloaded: bool = False
+
 class InstanceCounterAnonymizer(Operator):
     """
     Anonymizer which replaces the entity value
@@ -67,11 +72,14 @@ def operator_type(self) -> OperatorType:
         return OperatorType.Anonymize
 
 def download_model(model_name):
-    if not spacy.util.is_package(model_name):
-        print(f"Downloading {model_name} model...")
-        spacy.cli.download(model_name)
-    else:
-        print(f"{model_name} model already downloaded.")
+    global _model_downloaded
+    if not _model_downloaded:
+        if not spacy.util.is_package(model_name):
+            print(f"Downloading {model_name} model...")
+            spacy.cli.download(model_name)
+        else:
+            print(f"{model_name} model already downloaded.")
+        _model_downloaded = True
 
 def replace_entities(entity_map, text):
     # Create a reverse mapping of placeholders to entity names
@@ -92,17 +100,32 @@ def replace_placeholder(match):
     
     return replaced_text
 
+def get_analyzer_engine() -> AnalyzerEngine:
+    """Get or create singleton AnalyzerEngine instance."""
+    global _analyzer_engine
+    if _analyzer_engine is None:
+        _analyzer_engine = AnalyzerEngine()
+    return _analyzer_engine
+
+def get_anonymizer_engine() -> AnonymizerEngine:
+    """Get or create singleton AnonymizerEngine instance."""
+    global _anonymizer_engine
+    if _anonymizer_engine is None:
+        _anonymizer_engine = AnonymizerEngine()
+        _anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer)
+    return _anonymizer_engine
+
 def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]:
     # Use the function
     model_name = "en_core_web_lg"
     download_model(model_name)
 
-    analyzer = AnalyzerEngine() 
+    # Use singleton instances
+    analyzer = get_analyzer_engine()
     analyzer_results = analyzer.analyze(text=initial_query, language="en")
 
-    # Create Anonymizer engine and add the custom anonymizer
-    anonymizer_engine = AnonymizerEngine()
-    anonymizer_engine.add_anonymizer(InstanceCounterAnonymizer)
+    # Use singleton anonymizer engine
+    anonymizer_engine = get_anonymizer_engine()
 
     # Create a mapping between entity types and counters
     entity_mapping = dict()
diff --git a/pyproject.toml b/pyproject.toml
index d1049c79..81f03ffd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "optillm"
-version = "0.2.9"
+version = "0.2.10"
 description = "An optimizing inference proxy for LLMs."
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/tests/test_privacy_plugin_performance.py b/tests/test_privacy_plugin_performance.py
new file mode 100644
index 00000000..6e6fdfbf
--- /dev/null
+++ b/tests/test_privacy_plugin_performance.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""
+Test to ensure privacy plugin resources are properly cached and not reloaded on each request.
+This test will fail if resources are being recreated on every call, preventing performance regressions.
+"""
+
+import time
+import sys
+import os
+from unittest.mock import Mock, patch, MagicMock
+import importlib
+
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+def test_privacy_plugin_resource_caching():
+    """
+    Test that expensive resources (AnalyzerEngine, AnonymizerEngine) are created only once
+    and reused across multiple plugin invocations.
+    """
+    print("Testing privacy plugin resource caching...")
+
+    # Need to reset the module state before testing
+    if 'optillm.plugins.privacy_plugin' in sys.modules:
+        del sys.modules['optillm.plugins.privacy_plugin']
+
+    # Mock the expensive AnalyzerEngine and AnonymizerEngine at the module level before import
+    with patch('presidio_analyzer.AnalyzerEngine') as MockAnalyzerEngine, \
+         patch('presidio_anonymizer.AnonymizerEngine') as MockAnonymizerEngine, \
+         patch('spacy.util.is_package', return_value=True):
+
+        # Set up mock instances
+        mock_analyzer_instance = MagicMock()
+        mock_analyzer_instance.analyze.return_value = []
+        MockAnalyzerEngine.return_value = mock_analyzer_instance
+
+        mock_anonymizer_instance = MagicMock()
+        mock_anonymizer_instance.anonymize.return_value = MagicMock(text="anonymized text")
+        mock_anonymizer_instance.add_anonymizer = MagicMock()
+        MockAnonymizerEngine.return_value = mock_anonymizer_instance
+
+        # Import the module with mocks in place
+        import optillm.plugins.privacy_plugin as privacy_plugin
+
+        # Mock client for the run function
+        mock_client = Mock()
+        mock_response = Mock()
+        mock_response.choices = [Mock(message=Mock(content="response"))]
+        mock_response.usage.completion_tokens = 10
+        mock_client.chat.completions.create.return_value = mock_response
+
+        # First invocation
+        print("First invocation...")
+        result1, tokens1 = privacy_plugin.run("system", "query 1", mock_client, "model")
+
+        # Check that resources were created once
+        assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times, expected 1"
+        assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times, expected 1"
+
+        # Second invocation
+        print("Second invocation...")
+        result2, tokens2 = privacy_plugin.run("system", "query 2", mock_client, "model")
+
+        # Check that resources were NOT created again
+        assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times after 2nd call, expected 1"
+        assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times after 2nd call, expected 1"
+
+        # Third invocation to be extra sure
+        print("Third invocation...")
+        result3, tokens3 = privacy_plugin.run("system", "query 3", mock_client, "model")
+
+        # Still should be 1
+        assert MockAnalyzerEngine.call_count == 1, f"AnalyzerEngine created {MockAnalyzerEngine.call_count} times after 3rd call, expected 1"
+        assert MockAnonymizerEngine.call_count == 1, f"AnonymizerEngine created {MockAnonymizerEngine.call_count} times after 3rd call, expected 1"
+
+        print("✅ Privacy plugin resource caching test PASSED - Resources are properly cached!")
+        return True
+
+def test_privacy_plugin_performance():
+    """
+    Test that multiple invocations of the privacy plugin don't have degraded performance.
+    This catches the actual performance issue even without mocking.
+    """
+    print("\nTesting privacy plugin performance (real execution)...")
+
+    try:
+        # Try to import the actual plugin
+        import optillm.plugins.privacy_plugin as privacy_plugin
+
+        # Check if required dependencies are available
+        try:
+            import spacy
+            from presidio_analyzer import AnalyzerEngine
+            from presidio_anonymizer import AnonymizerEngine
+        except ImportError as e:
+            print(f"⚠️  Skipping performance test - dependencies not installed: {e}")
+            return True
+
+        # Mock client
+        mock_client = Mock()
+        mock_response = Mock()
+        mock_response.choices = [Mock(message=Mock(content="response"))]
+        mock_response.usage.completion_tokens = 10
+        mock_client.chat.completions.create.return_value = mock_response
+
+        # Warm-up call (might include model download)
+        print("Warm-up call...")
+        start = time.time()
+        privacy_plugin.run("system", "warm up query", mock_client, "model")
+        warmup_time = time.time() - start
+        print(f"Warm-up time: {warmup_time:.2f}s")
+
+        # First real measurement
+        print("First measurement call...")
+        start = time.time()
+        privacy_plugin.run("system", "test query 1", mock_client, "model")
+        first_time = time.time() - start
+        print(f"First call time: {first_time:.2f}s")
+
+        # Second measurement - should be fast if caching works
+        print("Second measurement call...")
+        start = time.time()
+        privacy_plugin.run("system", "test query 2", mock_client, "model")
+        second_time = time.time() - start
+        print(f"Second call time: {second_time:.2f}s")
+
+        # Third measurement
+        print("Third measurement call...")
+        start = time.time()
+        privacy_plugin.run("system", "test query 3", mock_client, "model")
+        third_time = time.time() - start
+        print(f"Third call time: {third_time:.2f}s")
+
+        # Performance assertions
+        # Second and third calls should be much faster than first (at least 10x faster)
+        # Allow some tolerance for the first call as it might still be initializing
+        max_acceptable_time = 2.0  # 2 seconds max for subsequent calls
+
+        if second_time > max_acceptable_time:
+            raise AssertionError(f"Second call took {second_time:.2f}s, expected < {max_acceptable_time}s. Resources might not be cached!")
+
+        if third_time > max_acceptable_time:
+            raise AssertionError(f"Third call took {third_time:.2f}s, expected < {max_acceptable_time}s. Resources might not be cached!")
+
+        print(f"✅ Privacy plugin performance test PASSED - Subsequent calls are fast ({second_time:.2f}s, {third_time:.2f}s)!")
+        return True
+
+    except Exception as e:
+        print(f"❌ Performance test failed: {e}")
+        raise
+
+def test_singleton_instances_are_reused():
+    """
+    Direct test that singleton instances are the same object across calls.
+    """
+    print("\nTesting singleton instance reuse...")
+
+    try:
+        import optillm.plugins.privacy_plugin as privacy_plugin
+        importlib.reload(privacy_plugin)
+
+        # Get first instances
+        analyzer1 = privacy_plugin.get_analyzer_engine()
+        anonymizer1 = privacy_plugin.get_anonymizer_engine()
+
+        # Get second instances
+        analyzer2 = privacy_plugin.get_analyzer_engine()
+        anonymizer2 = privacy_plugin.get_anonymizer_engine()
+
+        # They should be the exact same object
+        assert analyzer1 is analyzer2, "AnalyzerEngine instances are not the same object!"
+        assert anonymizer1 is anonymizer2, "AnonymizerEngine instances are not the same object!"
+
+        print("✅ Singleton instance test PASSED - Same objects are reused!")
+        return True
+
+    except ImportError as e:
+        print(f"⚠️  Skipping singleton test - dependencies not installed: {e}")
+        return True
+    except Exception as e:
+        print(f"❌ Singleton test failed: {e}")
+        raise
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("Privacy Plugin Performance & Caching Tests")
+    print("=" * 60)
+
+    all_passed = True
+
+    try:
+        test_privacy_plugin_resource_caching()
+    except Exception as e:
+        all_passed = False
+        print(f"❌ Resource caching test failed: {e}")
+
+    try:
+        test_singleton_instances_are_reused()
+    except Exception as e:
+        all_passed = False
+        print(f"❌ Singleton instance test failed: {e}")
+
+    try:
+        test_privacy_plugin_performance()
+    except Exception as e:
+        all_passed = False
+        print(f"❌ Performance test failed: {e}")
+
+    print("\n" + "=" * 60)
+    if all_passed:
+        print("✅ ALL TESTS PASSED!")
+        print("Privacy plugin resources are properly cached.")
+        sys.exit(0)
+    else:
+        print("❌ SOME TESTS FAILED!")
+        print("Privacy plugin may have performance issues.")
+        sys.exit(1)
\ No newline at end of file