Skip to content

Commit f9a7d6d

Browse files
authored
Merge pull request #114 from JohT/fix/113-skip-empty-projection-analysis
Skip analysis for empty projections
2 parents 8d489c1 + 3d84246 commit f9a7d6d

File tree

6 files changed

+226
-86
lines changed

6 files changed

+226
-86
lines changed

scripts/parseCsvFunctions.sh

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env bash
2+
3+
# Provides functions to parse strings in CSV format.
4+
5+
# Note: This script was generated by Chat-GPT after some messages back and forth:
6+
# https://chat.openai.com/share/0bd3cde7-32d0-460d-830c-79b7d00a2492
7+
8+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
9+
set -o errexit -o pipefail
10+
11+
# Function to get the value of a specific column in a CSV string
12+
# that only consists of a header line with the column names and a second line of values.
13+
#
14+
# Parameters:
15+
# $1: CSV string (two lines)
16+
# $2: Column name
17+
get_csv_column_value() {
18+
csv_string="$1"
19+
column_name="$2"
20+
21+
# Remove leading and trailing double quotes, and spaces
22+
csv_string=$(echo "$csv_string" | sed 's/"//g' | tr -d ' ')
23+
24+
# Extract header and values
25+
header=$(echo "$csv_string" | head -n 1)
26+
values=$(echo "$csv_string" | tail -n 1)
27+
28+
# Convert comma-separated strings into arrays
29+
IFS=', ' read -r -a header_array <<< "$header"
30+
IFS=', ' read -r -a values_array <<< "$values"
31+
32+
# Find the index of the column
33+
index=-1
34+
for i in "${!header_array[@]}"; do
35+
if [ "${header_array[$i]}" = "$column_name" ]; then
36+
index="$i"
37+
break
38+
fi
39+
done
40+
41+
if [ "$index" -eq -1 ]; then
42+
echo "Error: Column '$column_name' not found"
43+
exit 1
44+
else
45+
# Print the value at the corresponding index
46+
echo "${values_array[$index]}"
47+
fi
48+
}
49+
50+
# Function to get the value of a specific column in a CSV string and check if its greater than zero.
51+
#
52+
# Parameters:
53+
# $1: CSV string (two lines)
54+
# $2: Column name with a numeric value
55+
is_csv_column_greater_zero() {
56+
columnValue=$(get_csv_column_value "${@}")
57+
if [[ "${columnValue}" -gt 0 ]]; then
58+
true;
59+
else
60+
false;
61+
fi
62+
}

scripts/reports/CentralityCsv.sh

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# The reports (csv files) will be written into the sub directory reports/centrality-csv.
66
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
77

8-
# Requires executeQueryFunctions.sh
8+
# Requires executeQueryFunctions.sh, parseCsvFunctions.sh
99

1010
# Overrideable Constants (defaults also defined in sub scripts)
1111
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
@@ -31,6 +31,9 @@ echo "centralityCsv: CYPHER_DIR=$CYPHER_DIR"
3131
# Define functions to execute a cypher query from within the given file (first and only argument)
3232
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
3333

34+
# Define function(s) (e.g. is_csv_column_greater_zero) to parse CSV format strings from Cypher query results.
35+
source "${SCRIPTS_DIR}/parseCsvFunctions.sh"
36+
3437
# Create report directory
3538
REPORT_NAME="centrality-csv"
3639
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
@@ -49,11 +52,13 @@ mkdir -p "${FULL_REPORT_DIRECTORY}"
4952
# Name of the node property that contains the dependency weight. Example: "weight"
5053
createDependencyProjection() {
5154
local PROJECTION_CYPHER_DIR="$CYPHER_DIR/Dependencies_Projection"
55+
local projectionResult
5256

5357
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_1_Delete_Projection.cypher" "${@}"
5458
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}"
5559
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_3_Create_Projection.cypher" "${@}"
56-
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}"
60+
projectionResult=$( execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}")
61+
is_csv_column_greater_zero "${projectionResult}" "relationshipCount"
5762
}
5863

5964
# Centrality preparation for Type nodes
@@ -65,9 +70,11 @@ createDependencyProjection() {
6570
# Name prefix for the in-memory projection name for dependencies. Example: "package"
6671
createTypeProjection() {
6772
local PROJECTION_CYPHER_DIR="$CYPHER_DIR/Dependencies_Projection"
73+
local projectionResult
6874

6975
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}"
70-
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_3c_Create_Type_Projection.cypher" "${@}"
76+
projectionResult=$( execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_3c_Create_Type_Projection.cypher" "${@}")
77+
is_csv_column_greater_zero "${projectionResult}" "relationshipCount"
7178
}
7279

7380
# Centrality preparation for method calls
@@ -79,9 +86,11 @@ createTypeProjection() {
7986
# Name prefix for the in-memory projection name for dependencies. Example: "package"
8087
createMethodProjection() {
8188
local PROJECTION_CYPHER_DIR="$CYPHER_DIR/Method_Projection"
89+
local projectionResult
8290

8391
execute_cypher "${PROJECTION_CYPHER_DIR}/Methods_1_Delete_Projection.cypher" "${@}"
84-
execute_cypher "${PROJECTION_CYPHER_DIR}/Methods_2_Create_Projection.cypher" "${@}"
92+
projectionResult=$( execute_cypher "${PROJECTION_CYPHER_DIR}/Methods_2_Create_Projection.cypher" "${@}")
93+
is_csv_column_greater_zero "${projectionResult}" "relationshipCount"
8594
}
8695

8796
# Apply the centrality algorithm "Page Rank".
@@ -388,9 +397,11 @@ ARTIFACT_WEIGHT="dependencies_projection_weight_property=weight"
388397

389398
# Artifact Centrality
390399
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..."
391-
createDependencyProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"
392-
runCentralityAlgorithms "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"
393-
400+
if createDependencyProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"; then
401+
runCentralityAlgorithms "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"
402+
else
403+
echo "centralityCsv: No data. Artifacts analysis skipped."
404+
fi
394405
# ---------------------------------------------------------------
395406

396407
# Package Query Parameters
@@ -400,9 +411,11 @@ PACKAGE_WEIGHT="dependencies_projection_weight_property=weight25PercentInterface
400411

401412
# Package Centrality
402413
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing package dependencies..."
403-
createDependencyProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"
404-
runCentralityAlgorithms "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"
405-
414+
if createDependencyProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"; then
415+
runCentralityAlgorithms "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"
416+
else
417+
echo "centralityCsv: No data. Package analysis skipped."
418+
fi
406419
# ---------------------------------------------------------------
407420

408421
# Type Query Parameters
@@ -412,9 +425,11 @@ TYPE_WEIGHT="dependencies_projection_weight_property=weight"
412425

413426
# Type Centrality
414427
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..."
415-
createTypeProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"
416-
runCentralityAlgorithms "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"
417-
428+
if createTypeProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"; then
429+
runCentralityAlgorithms "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"
430+
else
431+
echo "centralityCsv: No data. Type analysis skipped."
432+
fi
418433
# ---------------------------------------------------------------
419434

420435
# Method Query Parameters
@@ -424,9 +439,11 @@ METHOD_WEIGHT="dependencies_projection_weight_property="
424439

425440
# Method Centrality
426441
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing method dependencies..."
427-
createMethodProjection "${METHOD_PROJECTION}"
428-
runCentralityAlgorithms "${METHOD_PROJECTION}" "${METHOD_NODE}" "${METHOD_WEIGHT}"
429-
442+
if createMethodProjection "${METHOD_PROJECTION}"; then
443+
runCentralityAlgorithms "${METHOD_PROJECTION}" "${METHOD_NODE}" "${METHOD_WEIGHT}"
444+
else
445+
echo "centralityCsv: No data. Method analysis skipped."
446+
fi
430447
# ---------------------------------------------------------------
431448

432-
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished"
449+
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

scripts/reports/CommunityCsv.sh

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
88

9-
# Requires executeQueryFunctions.sh
9+
# Requires executeQueryFunctions.sh, parseCsvFunctions.sh
1010

1111
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
1212
set -o errexit -o pipefail
@@ -32,6 +32,9 @@ echo "communityCsv: CYPHER_DIR=${CYPHER_DIR}"
3232
# Define functions to execute a cypher query from within the given file (first and only argument)
3333
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
3434

35+
# Define function(s) (e.g. is_csv_column_greater_zero) to parse CSV format strings from Cypher query results.
36+
source "${SCRIPTS_DIR}/parseCsvFunctions.sh"
37+
3538
# Create report directory
3639
REPORT_NAME="community-csv"
3740
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
@@ -50,11 +53,13 @@ mkdir -p "${FULL_REPORT_DIRECTORY}"
5053
# Name of the node property that contains the dependency weight. Example: "weight"
5154
createProjection() {
5255
local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection"
56+
local projectionResult
5357

5458
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_1_Delete_Projection.cypher" "${@}"
5559
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}"
5660
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_4_Create_Undirected_Projection.cypher" "${@}"
57-
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}"
61+
projectionResult=$( execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}")
62+
is_csv_column_greater_zero "${projectionResult}" "relationshipCount"
5863
}
5964

6065
# Community Detection Preparation for Types
@@ -66,9 +71,11 @@ createProjection() {
6671
# Name prefix for the in-memory projection name for dependencies. Example: "package"
6772
createTypeProjection() {
6873
local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection"
74+
local projectionResult
6975

7076
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}"
71-
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_4c_Create_Undirected_Type_Projection.cypher" "${@}"
77+
projectionResult=$( execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_4c_Create_Undirected_Type_Projection.cypher" "${@}")
78+
is_csv_column_greater_zero "${projectionResult}" "relationshipCount"
7279
}
7380

7481
# Community Detection using the Louvain Algorithm
@@ -411,10 +418,12 @@ ARTIFACT_KCUT="dependencies_maxkcut=5" # default = 2
411418

412419
# Artifact Community Detection
413420
echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..."
414-
createProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"
415-
detectCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" "${ARTIFACT_KCUT}"
416-
writeLeidenModularity "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"
417-
421+
if createProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"; then
422+
detectCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" "${ARTIFACT_KCUT}"
423+
writeLeidenModularity "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"
424+
else
425+
echo "communityCsv: No data. Artifact analysis skipped."
426+
fi
418427
# ---------------------------------------------------------------
419428

420429
# Package Query Parameters
@@ -426,13 +435,15 @@ PACKAGE_KCUT="dependencies_maxkcut=20" # default = 2
426435

427436
# Package Community Detection
428437
echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z'): Processing package dependencies..."
429-
createProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"
430-
detectCommunities "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" "${PACKAGE_KCUT}"
431-
writeLeidenModularity "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"
432-
433-
# Package Community Detection - Special CSV Queries after update
434-
execute_cypher "${CYPHER_DIR}/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Package_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
435-
438+
if createProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"; then
439+
detectCommunities "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" "${PACKAGE_KCUT}"
440+
writeLeidenModularity "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"
441+
442+
# Package Community Detection - Special CSV Queries after update
443+
execute_cypher "${CYPHER_DIR}/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Package_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
444+
else
445+
echo "communityCsv: No data. Package analysis skipped."
446+
fi
436447
# ---------------------------------------------------------------
437448

438449
# Type Query Parameters
@@ -444,12 +455,16 @@ TYPE_KCUT="dependencies_maxkcut=100" # default = 2
444455

445456
# Type Community Detection
446457
echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..."
447-
createTypeProjection "${TYPE_PROJECTION}"
448-
detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" "${TYPE_KCUT}"
449-
450-
# Type Community Detection - Special CSV Queries after update
451-
execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
452-
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_with_few_members_in_foreign_packages.csv"
453-
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_that_span_the_most_packages_with_type_statistics.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_that_span_the_most_packages_with_type_statistics.csv"
458+
if createTypeProjection "${TYPE_PROJECTION}"; then
459+
detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" "${TYPE_KCUT}"
460+
461+
# Type Community Detection - Special CSV Queries after update
462+
execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
463+
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_with_few_members_in_foreign_packages.csv"
464+
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_that_span_the_most_packages_with_type_statistics.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_that_span_the_most_packages_with_type_statistics.csv"
465+
else
466+
echo "communityCsv: No data. Type analysis skipped."
467+
fi
468+
# ---------------------------------------------------------------
454469

455-
echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished"
470+
echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

0 commit comments

Comments
 (0)