Tweak optimization trials and summary logs for anomaly detection.

JohT · JohT · commit 4cd6bae2d0d9 · 2025-08-06T08:22:50.000+02:00
diff --git a/domains/anomaly-detection/tunedAnomalyDetectionExplained.py b/domains/anomaly-detection/tunedAnomalyDetectionExplained.py
@@ -384,10 +384,11 @@ def no_anomalies(cls):
 
 def tune_anomaly_detection_models(
     feature_matrix: np.ndarray,
+    parameters: Parameters,
     contamination: float | typing.Literal["auto"] = 0.05,
     random_seed: int = 42,
     number_of_trials: int = 25,
-    optimization_timeout_in_seconds: int = 60
+    optimization_timeout_in_seconds: int = 50
 ) -> AnomalyDetectionResults:
     """
     Tunes both Isolation Forest and a proxy Random Forest using Optuna, maximizing the F1 score
@@ -464,7 +465,7 @@ def objective(trial) -> float:
 
     # Print the number of samples and features in the feature matrix
     n_samples = feature_matrix.shape[0]
-    print(f"tunedAnomalyDetectionExplained: Tuned Anomaly Detection: Number of samples: {n_samples}, Number of features: {feature_matrix.shape[1]}, Number of trials: {number_of_trials}")
+    print(f"tunedAnomalyDetectionExplained: Tuning Anomaly Detection: Number of samples: {n_samples}, Number of features: {feature_matrix.shape[1]}, Number of trials: {number_of_trials}")
 
     # Run Optuna optimization
     study = create_study(direction="maximize", sampler=TPESampler(seed=random_seed), study_name="AnomalyDetection_Tuning")
@@ -480,7 +481,12 @@ def objective(trial) -> float:
     study.enqueue_trial({'isolation_max_samples': 0.10015063610944819, 'isolation_n_estimators': 329, 'proxy_n_estimators': 314, 'proxy_max_depth': 8})
 
     study.optimize(objective, n_trials=number_of_trials, timeout=optimization_timeout_in_seconds)
-    output_optuna_tuning_results(study, study.study_name)
+
+    # Output tuning results
+    print(f"Best Isolation & Random Forest parameters for {parameters.get_plot_prefix()} after {len(study.trials)}/{number_of_trials} trials with best #{study.best_trial.number} (Optuna):", study.best_params)
+
+    if parameters.is_verbose():
+        output_optuna_tuning_results(study, study.study_name)
 
     if np.isclose(study.best_value, 0.0, rtol=1e-09, atol=1e-09):
         red = "\x1b[31;20m"
@@ -869,7 +875,7 @@ def add_top_shap_features_to_anomalies(
 features_prepared = np.hstack([features_standardized, node_embeddings_reduced])
 feature_names = list(features_to_standardize) + [f'nodeEmbeddingPCA_{i}' for i in range(node_embeddings_reduced.shape[1])]
 
-anomaly_detection_results = tune_anomaly_detection_models(features_prepared)
+anomaly_detection_results = tune_anomaly_detection_models(features_prepared, parameters)
 if anomaly_detection_results.is_empty():
     sys.exit(0)
 
diff --git a/domains/anomaly-detection/tunedLeidenCommunityDetection.py b/domains/anomaly-detection/tunedLeidenCommunityDetection.py
@@ -359,10 +359,10 @@ def objective(trial):
     study.enqueue_trial({'gamma': 1.14, 'theta': 0.001, 'max_levels': 10})
 
     # Execute the hyperparameter tuning
-    study.optimize(objective, n_trials=20, timeout=30)
+    study.optimize(objective, n_trials=20, timeout=20)
 
     # Output tuning results
-    print(f"Best Leiden Community Detection parameters for {parameters.get_projection_name()} (Optuna):", study.best_params)
+    print(f"Best Leiden Community Detection parameters for {parameters.get_projection_name()} after {len(study.trials)}/20 trials with best #{study.best_trial.number} (Optuna):", study.best_params)
     if parameters.is_verbose():
         output_detailed_optuna_tuning_results(study)
 
diff --git a/domains/anomaly-detection/tunedNodeEmbeddingClustering.py b/domains/anomaly-detection/tunedNodeEmbeddingClustering.py
@@ -308,7 +308,7 @@ def objective(trial):
 
     # Start the hyperparameter tuning
     study.optimize(objective, n_trials=20, timeout=10)
-    print(f"Best HDBSCAN parameters (Optuna):", study.best_params)
+    print(f"Best HDBSCAN parameters after {len(study.trials)}/20 trials with best #{study.best_trial.number} (Optuna):", study.best_params)
     if parameters.is_verbose():
         output_detailed_optuna_tuning_results(study, 'HDBSCAN')
 
@@ -709,10 +709,12 @@ def objective(trial):
     study.enqueue_trial({'embedding_dimension': 128, 'forth_iteration_weight': 1.0, 'normalization_strength': 0.5})
     study.enqueue_trial({'embedding_dimension': 256, 'forth_iteration_weight': 0.5, 'normalization_strength': 0.3})
     study.enqueue_trial({'embedding_dimension': 256, 'forth_iteration_weight': 1.0, 'normalization_strength': 0.3})
+    study.enqueue_trial({'embedding_dimension': 64, 'normalization_strength': -0.4, 'forth_iteration_weight': 1.4})
+    study.enqueue_trial({'embedding_dimension': 256, 'normalization_strength': 0.3, 'forth_iteration_weight': 1.0})
 
     # Start the hyperparameter tuning
     study.optimize(objective, n_trials=80, timeout=40)
-    print(f"Best Fast Random Projection (FastRP) parameters for {parameters.get_projection_name()} (Optuna):", study.best_params)
+    print(f"Best Fast Random Projection (FastRP) parameters for {parameters.get_projection_name()} after {len(study.trials)}/80 trials with best #{study.best_trial.number} (Optuna):", study.best_params)
     if parameters.is_verbose():
         output_detailed_optuna_tuning_results(study, 'Fast Random Projection (FastRP)')