Skip to content

Commit 1a35e05

Browse files
committed
Output Markdown table with shapley explained top features
1 parent 1733c48 commit 1a35e05

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

domains/anomaly-detection/tunedAnomalyDetectionExplained.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# - Provide the password for Neo4j in the environment variable "NEO4J_INITIAL_PASSWORD".
1111
# - Requires "tunedLeidenCommunityDetection.py", "tunedNodeEmbeddingClustering.py" and "umap2dNodeEmbedding.py" to be executed before this script to provide the necessary data.
1212

13+
from re import M
1314
import typing
1415
import numpy.typing as numpy_typing
1516

@@ -947,6 +948,58 @@ def add_node_embedding_shap_sum(
947948
return anomaly_detected_features
948949

949950

951+
def output_top_shap_explained_global_features_as_markdown_table(
952+
shap_anomaly_values: np.ndarray,
953+
feature_names: list[str],
954+
output_file_path: str,
955+
top_n_features: int = 10
956+
):
957+
# Compute mean absolute shap value across all samples for each feature (importance ranking)
958+
mean_absolute_shap_values = np.abs(shap_anomaly_values).mean(axis=0)
959+
960+
# Create DataFrame with feature names and mean shap values
961+
feature_importance = pd.DataFrame({
962+
"Feature": feature_names,
963+
"Mean absolute SHAP value": mean_absolute_shap_values
964+
})
965+
966+
# Aggregate all nodeEmbedding* features
967+
mask = feature_importance["Feature"].str.startswith("nodeEmbedding")
968+
node_embedding_sum = feature_importance.loc[mask, "Mean absolute SHAP value"].sum()
969+
970+
# Append aggregated feature
971+
feature_importance = pd.concat([
972+
feature_importance,
973+
pd.DataFrame([{
974+
"Feature": "*Node embeddings aggregated*",
975+
"Mean absolute SHAP value": node_embedding_sum
976+
}])
977+
])
978+
979+
# Sort by importance
980+
top_features = feature_importance.sort_values("Mean absolute SHAP value", ascending=False).head(top_n_features + 1)
981+
982+
# Build markdown table manually using column names
983+
headers = list(top_features.columns)
984+
rows = top_features.values.tolist()
985+
986+
markdown_header_row = "| " + " | ".join(headers) + " |\n"
987+
markdown_table = markdown_header_row
988+
989+
markdown_header_separator_row = "| " + " | ".join(["---"] * len(headers)) + " |\n"
990+
markdown_table += markdown_header_separator_row
991+
992+
for row in rows:
993+
markdown_data_row = "| " + " | ".join([str(row[0]), f"{row[1]:.6f}"]) + " |\n"
994+
markdown_table += markdown_data_row
995+
996+
# Save to file
997+
with open(output_file_path, "w") as f:
998+
f.write(markdown_table)
999+
1000+
print(f"tunedAnomalyDetectionExplained: Markdown table with top {top_n_features} SHAP explained features saved to {output_file_path}")
1001+
1002+
9501003
# ------------------------------------------------------------------------------------------------------------
9511004
# MAIN
9521005
# ------------------------------------------------------------------------------------------------------------
@@ -1051,6 +1104,12 @@ def add_node_embedding_shap_sum(
10511104
anomaly_detected_features=features
10521105
)
10531106

1107+
output_top_shap_explained_global_features_as_markdown_table(
1108+
shap_anomaly_values=explanation_results.shap_anomaly_values,
1109+
feature_names=feature_names,
1110+
output_file_path=get_file_path(f"{plot_prefix}_Top_anomaly_features", parameters, 'md')
1111+
)
1112+
10541113
if parameters.is_verbose():
10551114
print("tunedAnomalyDetectionExplained: Features with added anomaly score explanation columns:")
10561115
print(features[features["anomalyLabel"] == 1].sort_values(by='anomalyScore', ascending=False).head(10))

0 commit comments

Comments
 (0)