Skip to content

Commit 429c75c

Browse files
committed
Add anomaly detection Markdown summary report
1 parent 4d361d1 commit 429c75c

File tree

2 files changed

+89
-0
lines changed

2 files changed

+89
-0
lines changed

domains/anomaly-detection/anomalyDetectionCsv.sh

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,32 @@ source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
3333
# Define functions to create and delete Graph Projections like "createUndirectedDependencyProjection"
3434
source "${SCRIPTS_DIR}/projectionFunctions.sh"
3535

36+
# Appends a Markdown table to an existing file and
37+
# removes redundant header + separator rows.
38+
#
39+
# Usage:
40+
# cat newTable.md | append_table myMarkdownFile.md
41+
#
42+
# append_table myMarkdownFile.md <<'EOF'
43+
# | Name | Score | Archetype |
44+
# | --- | --- | --- |
45+
# | Bar | 0.9 | Something |
46+
# EOF
47+
#
48+
# Behavior:
49+
# - Keeps the first header row and its following separator row.
50+
# - Removes all subsequent duplicate header + separator pairs.
51+
# - Leaves all data rows untouched.
52+
append_to_markdown_table() {
53+
local file="$1"
54+
55+
# Append stdin to the target file
56+
cat >> "${file}"
57+
58+
# Clean up duplicate headers (header row + --- row)
59+
awk '!seen[$0]++ || NR <= 2' "${file}" > "${file}.tmp" && mv "${file}.tmp" "${file}"
60+
}
61+
3662
# Query or recalculate features.
3763
#
3864
# Required Parameters:
@@ -67,6 +93,8 @@ anomaly_detection_features() {
6793
# Required Parameters:
6894
# - projection_node_label=...
6995
# Label of the nodes that will be used for the projection. Example: "Package"
96+
# - projection_language=...
97+
# Name of the associated programming language. Default: "Java". Example: "Typescript"
7098
anomaly_detection_queries() {
7199
local nodeLabel
72100
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -92,6 +120,8 @@ anomaly_detection_queries() {
92120
# Required Parameters:
93121
# - projection_node_label=...
94122
# Label of the nodes that will be used for the projection. Example: "Package"
123+
# - projection_language=...
124+
# Name of the associated programming language. Examples: "Java", "Typescript"
95125
anomaly_detection_labels() {
96126
local nodeLabel
97127
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -109,6 +139,33 @@ anomaly_detection_labels() {
109139
# execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeOutlier.cypher" "${@}"
110140
}
111141

142+
# Initialize anomaly summary (Markdown) report.
143+
# Intended to be run the before the first call of "anomaly_detection_summary_report".
144+
initialize_anomaly_detection_summary_report() {
145+
# Remove Markdown Summary if it exists
146+
rm -f "${FULL_REPORT_DIRECTORY}/AnomalySummaryByArchetype.md"
147+
}
148+
149+
# Summarize all results in a Markdown report.
150+
# Note: Call "initialize_anomaly_detection_summary_report" before the first call of this function.
151+
#
152+
# Required Parameters:
153+
# - projection_node_label=...
154+
# Label of the nodes that will be used for the projection. Example: "Package"
155+
# - projection_language=...
156+
# Name of the associated programming language. Examples: "Java", "Typescript"
157+
anomaly_detection_summary_report() {
158+
local nodeLabel
159+
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
160+
161+
local language
162+
language=$( extractQueryParameter "projection_language" "${@}" )
163+
164+
echo "anomalyDetectionCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Updating ${language} ${nodeLabel} anomaly summary Markdown report..."
165+
166+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeSummary.cypher" "${@}" --output-markdown-table | append_to_markdown_table "${FULL_REPORT_DIRECTORY}/AnomalySummaryByArchetype.md"
167+
}
168+
112169
# Run the anomaly detection pipeline.
113170
#
114171
# Required Parameters:
@@ -118,10 +175,13 @@ anomaly_detection_labels() {
118175
# Label of the nodes that will be used for the projection. Example: "Package"
119176
# - projection_weight_property=...
120177
# Name of the node property that contains the dependency weight. Example: "weight"
178+
# - projection_language=...
179+
# Name of the associated programming language. Examples: "Java", "Typescript"
121180
anomaly_detection_csv_reports() {
122181
time anomaly_detection_features "${@}"
123182
time anomaly_detection_queries "${@}"
124183
time anomaly_detection_labels "${@}"
184+
time anomaly_detection_summary_report "${@}"
125185
}
126186

127187
# Create report directory
@@ -146,6 +206,8 @@ ALGORITHM_LANGUAGE="projection_language"
146206
COMMUNITY_PROPERTY="community_property=communityLeidenIdTuned"
147207
EMBEDDING_PROPERTY="embedding_property=embeddingsFastRandomProjectionTunedForClustering"
148208

209+
initialize_anomaly_detection_summary_report
210+
149211
# -- Java Artifact Node Embeddings -------------------------------
150212

151213
if createUndirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"; then
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Anomaly Detection Labels: Summarizes all labelled archetypes by their anomaly score including their archetype rank. For code units with more than one archetype, the one with the higher rank is shown. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
UNWIND keys(codeUnit) AS codeUnitProperty
6+
WITH *
7+
WHERE codeUnitProperty starts with 'anomaly'
8+
AND codeUnitProperty ends with 'Rank'
9+
WITH *
10+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
11+
,split(split(codeUnitProperty, 'anomaly')[1], 'Rank')[0] AS archetype
12+
,codeUnit[codeUnitProperty] AS archetypeRank
13+
,codeUnit.anomalyScore AS anomalyScore
14+
ORDER BY codeUnit.anomalyScore DESC, archetypeRank ASC, codeUnitName ASC, archetype ASC
15+
RETURN $projection_language + ' ' + $projection_node_label AS `Code Unit`
16+
,codeUnitName AS `Name`
17+
,round(anomalyScore, 4, 'HALF_UP') AS `Score`
18+
,collect(archetype)[0] AS `Archetype`
19+
,collect(archetypeRank)[0] AS `Archetype Rank`
20+
,coalesce(codeUnit.anomalyTopFeature1, "") AS `Top Feature 1`
21+
,coalesce(round(codeUnit.anomalyTopFeatureSHAPValue1, 4, 'HALF_UP'), 0.0) AS `Top Feature 1 SHAP`
22+
,coalesce(codeUnit.anomalyTopFeature2, "") AS `Top Feature 2`
23+
,coalesce(round(codeUnit.anomalyTopFeatureSHAPValue2, 4, 'HALF_UP'), 0.0) AS `Top Feature 2 SHAP`
24+
,coalesce(codeUnit.anomalyTopFeature3, "") AS `Top Feature 3`
25+
,coalesce(round(codeUnit.anomalyTopFeatureSHAPValue3, 4, 'HALF_UP'), 0.0) AS `Top Feature 3 SHAP`
26+
//,collect(archetype)[1] AS secondaryArchetype
27+
//,collect(archetypeRank)[1] AS secondaryArchetypeRank

0 commit comments

Comments
 (0)