Skip to content

Commit 4cd6bae

Browse files
committed
Tweak optimization trials and summary logs for anomaly detection.
1 parent a493d0a commit 4cd6bae

File tree

3 files changed

+16
-8
lines changed

3 files changed

+16
-8
lines changed

domains/anomaly-detection/tunedAnomalyDetectionExplained.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -384,10 +384,11 @@ def no_anomalies(cls):
384384

385385
def tune_anomaly_detection_models(
386386
feature_matrix: np.ndarray,
387+
parameters: Parameters,
387388
contamination: float | typing.Literal["auto"] = 0.05,
388389
random_seed: int = 42,
389390
number_of_trials: int = 25,
390-
optimization_timeout_in_seconds: int = 60
391+
optimization_timeout_in_seconds: int = 50
391392
) -> AnomalyDetectionResults:
392393
"""
393394
Tunes both Isolation Forest and a proxy Random Forest using Optuna, maximizing the F1 score
@@ -464,7 +465,7 @@ def objective(trial) -> float:
464465

465466
# Print the number of samples and features in the feature matrix
466467
n_samples = feature_matrix.shape[0]
467-
print(f"tunedAnomalyDetectionExplained: Tuned Anomaly Detection: Number of samples: {n_samples}, Number of features: {feature_matrix.shape[1]}, Number of trials: {number_of_trials}")
468+
print(f"tunedAnomalyDetectionExplained: Tuning Anomaly Detection: Number of samples: {n_samples}, Number of features: {feature_matrix.shape[1]}, Number of trials: {number_of_trials}")
468469

469470
# Run Optuna optimization
470471
study = create_study(direction="maximize", sampler=TPESampler(seed=random_seed), study_name="AnomalyDetection_Tuning")
@@ -480,7 +481,12 @@ def objective(trial) -> float:
480481
study.enqueue_trial({'isolation_max_samples': 0.10015063610944819, 'isolation_n_estimators': 329, 'proxy_n_estimators': 314, 'proxy_max_depth': 8})
481482

482483
study.optimize(objective, n_trials=number_of_trials, timeout=optimization_timeout_in_seconds)
483-
output_optuna_tuning_results(study, study.study_name)
484+
485+
# Output tuning results
486+
print(f"Best Isolation & Random Forest parameters for {parameters.get_plot_prefix()} after {len(study.trials)}/{number_of_trials} trials with best #{study.best_trial.number} (Optuna):", study.best_params)
487+
488+
if parameters.is_verbose():
489+
output_optuna_tuning_results(study, study.study_name)
484490

485491
if np.isclose(study.best_value, 0.0, rtol=1e-09, atol=1e-09):
486492
red = "\x1b[31;20m"
@@ -869,7 +875,7 @@ def add_top_shap_features_to_anomalies(
869875
features_prepared = np.hstack([features_standardized, node_embeddings_reduced])
870876
feature_names = list(features_to_standardize) + [f'nodeEmbeddingPCA_{i}' for i in range(node_embeddings_reduced.shape[1])]
871877

872-
anomaly_detection_results = tune_anomaly_detection_models(features_prepared)
878+
anomaly_detection_results = tune_anomaly_detection_models(features_prepared, parameters)
873879
if anomaly_detection_results.is_empty():
874880
sys.exit(0)
875881

domains/anomaly-detection/tunedLeidenCommunityDetection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,10 @@ def objective(trial):
359359
study.enqueue_trial({'gamma': 1.14, 'theta': 0.001, 'max_levels': 10})
360360

361361
# Execute the hyperparameter tuning
362-
study.optimize(objective, n_trials=20, timeout=30)
362+
study.optimize(objective, n_trials=20, timeout=20)
363363

364364
# Output tuning results
365-
print(f"Best Leiden Community Detection parameters for {parameters.get_projection_name()} (Optuna):", study.best_params)
365+
print(f"Best Leiden Community Detection parameters for {parameters.get_projection_name()} after {len(study.trials)}/20 trials with best #{study.best_trial.number} (Optuna):", study.best_params)
366366
if parameters.is_verbose():
367367
output_detailed_optuna_tuning_results(study)
368368

domains/anomaly-detection/tunedNodeEmbeddingClustering.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def objective(trial):
308308

309309
# Start the hyperparameter tuning
310310
study.optimize(objective, n_trials=20, timeout=10)
311-
print(f"Best HDBSCAN parameters (Optuna):", study.best_params)
311+
print(f"Best HDBSCAN parameters after {len(study.trials)}/20 trials with best #{study.best_trial.number} (Optuna):", study.best_params)
312312
if parameters.is_verbose():
313313
output_detailed_optuna_tuning_results(study, 'HDBSCAN')
314314

@@ -709,10 +709,12 @@ def objective(trial):
709709
study.enqueue_trial({'embedding_dimension': 128, 'forth_iteration_weight': 1.0, 'normalization_strength': 0.5})
710710
study.enqueue_trial({'embedding_dimension': 256, 'forth_iteration_weight': 0.5, 'normalization_strength': 0.3})
711711
study.enqueue_trial({'embedding_dimension': 256, 'forth_iteration_weight': 1.0, 'normalization_strength': 0.3})
712+
study.enqueue_trial({'embedding_dimension': 64, 'normalization_strength': -0.4, 'forth_iteration_weight': 1.4})
713+
study.enqueue_trial({'embedding_dimension': 256, 'normalization_strength': 0.3, 'forth_iteration_weight': 1.0})
712714

713715
# Start the hyperparameter tuning
714716
study.optimize(objective, n_trials=80, timeout=40)
715-
print(f"Best Fast Random Projection (FastRP) parameters for {parameters.get_projection_name()} (Optuna):", study.best_params)
717+
print(f"Best Fast Random Projection (FastRP) parameters for {parameters.get_projection_name()} after {len(study.trials)}/80 trials with best #{study.best_trial.number} (Optuna):", study.best_params)
716718
if parameters.is_verbose():
717719
output_detailed_optuna_tuning_results(study, 'Fast Random Projection (FastRP)')
718720

0 commit comments

Comments
 (0)