Skip to content

Commit 74a98ef

Browse files
committed
Integrate git log import into analysis preparation
1 parent 5b4ec7c commit 74a98ef

File tree

9 files changed

+70
-20
lines changed

9 files changed

+70
-20
lines changed

COMMANDS.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
- [Setup jQAssistant Java Code Analyzer](#setup-jqassistant-java-code-analyzer)
2525
- [Download Maven Artifacts to analyze](#download-maven-artifacts-to-analyze)
2626
- [Reset the database and scan the java artifacts](#reset-the-database-and-scan-the-java-artifacts)
27+
- [Import git log](#import-git-log)
2728
- [Database Queries](#database-queries)
2829
- [Cypher Shell](#cypher-shell)
2930
- [HTTP API](#http-api)
@@ -70,7 +71,7 @@ a profile, the newest versions will be used. Profiles are scripts that can be fo
7071
### Notes
7172

7273
- Be sure to use Java 17 for Neo4j v5 and Java 11 for Neo4j v4
73-
- Use your own initial Neo4j password
74+
- Use your own initial Neo4j password with `export NEO4J_INITIAL_PASSWORD=my_own_password`
7475
- For more details have a look at the script [analyze.sh](./scripts/analysis/analyze.sh)
7576

7677
### Examples
@@ -214,6 +215,18 @@ enhance the data further with relationships between artifacts and packages.
214215

215216
Be aware that this script deletes all previous relationships and nodes in the local Neo4j Graph database.
216217

218+
### Import git log
219+
220+
Use [importGitLog.sh](./scripts/importGitLog.sh) to import git log data into the Graph.
221+
It uses `git log` to extract commits, their authors and the changed filenames into an intermediate CSV file that is then imported into Neo4j with the following schema:
222+
223+
```Cypher
224+
(Git:Author)-[:AUTHORED]->(Git:Commit)->[:CHANGED]->(Git:File)
225+
```
226+
The optional parameter `--repository directory-path-to-a-git-repository` can be used to select a different directory for the repository. By default, the `source` directory within the analysis workspace directory is used. This command only needs the git history to be present so a `git clone --bare` is sufficient. If the `source` directory is also used for the analysis then a full git clone is of course needed (like for Typescript).
227+
228+
👉**Note:** Commit messages containing `[bot]` are filtered out to ignore changes made by bots.
229+
217230
## Database Queries
218231

219232
### Cypher Shell
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Create index for the file name
2+
3+
CREATE INDEX INDEX_FILE_NAME IF NOT EXISTS FOR (t:File) ON (t.fileName)

cypher/Imports/Import_git_log_csv_data.cypher

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ LOAD CSV WITH HEADERS FROM "file:///gitLog.csv" AS row
44
CALL { WITH row
55
MERGE (git_author:Git:Author {name: row.author, email: row.email})
66
MERGE (git_commit:Git:Commit {
7-
hash: row.hash,
8-
message: row.message,
9-
timestamp: datetime(row.timestamp),
7+
hash: row.hash,
8+
message: row.message,
9+
timestamp: datetime(row.timestamp),
1010
timestamp_unix: toInteger(row.timestamp_unix)
1111
})
1212
MERGE (git_file:Git:File {fileName: row.filename})
1313
MERGE (git_author)-[:AUTHORED]->(git_commit)
1414
MERGE (git_commit)-[:CHANGED]->(git_file)
1515
} IN TRANSACTIONS OF 1000 ROWS
16-
RETURN count(DISTINCT row.author) AS numberOfAuthors
17-
,count(DISTINCT row.filename) AS numberOfFiles
18-
,count(DISTINCT row.hash) AS numberOfCommits
16+
RETURN count(DISTINCT row.author) AS numberOfAuthors
17+
,count(DISTINCT row.filename) AS numberOfFiles
18+
,count(DISTINCT row.hash) AS numberOfCommits
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// Connect git files to Java files with a RESOLVES_TO relationship if their names match
2+
3+
MATCH (file:File&!Git)
4+
WITH file
5+
,replace(file.fileName, '.class', '.java') AS fileName
6+
MATCH (git_file:File&Git)
7+
WHERE git_file.fileName ENDS WITH fileName
8+
MERGE (git_file)-[:RESOLVES_TO]->(file)
9+
SET git_file.resolved = true
10+
RETURN labels(file)[0..4] AS labels
11+
,count(DISTINCT fileName) AS numberOfFileNames
12+
,collect(DISTINCT fileName)[0..4] AS examples
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// Connect git files to Typescript files with a RESOLVES_TO relationship if their names match
2+
3+
MATCH (file:File&!Git)
4+
WITH file
5+
,coalesce(file.absoluteFileName, file.globalFqn, file.fileName) AS fileName
6+
MATCH (git_file:File&Git)
7+
WHERE fileName ENDS WITH git_file.fileName
8+
MERGE (git_file)-[:RESOLVES_TO]->(file)
9+
SET git_file.resolved = true
10+
RETURN labels(file)[0..4] AS labels
11+
,count(DISTINCT fileName) AS numberOfFileNames
12+
,collect(DISTINCT fileName)[0..4] AS examples

scripts/analysis/analyze.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22

33
# Coordinates the end-to-end analysis process, encompassing tool installation, graph generation, and report generation.
44
# - Download and setup Neo4j and JQAssistant
5-
# - Scan and analyze the contents of the artifacts directory to create the graph
5+
# - Scan and analyze the contents of the artifacts and source directory to create the graph
66
# - Trigger all requested reports
77

88
# Note: Everything is done in the current (=working) directory and one directory above (shared downloads).
9-
# It is recommended to create an empty directory (preferrable "temp") and
9+
# It is recommended to create an empty directory (preferable "temp") and
1010
# within that another one for the analysis (e.g. "MyCodebaseName-Version")
1111
# and change into it prior to starting this script.
1212

scripts/downloader/downloadAxonFramework.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,15 @@ SCRIPT_FILE_NAME="$(basename -- "${BASH_SOURCE[0]}")"
1515
SCRIPT_FILE_NAME_WITHOUT_EXTENSION="${SCRIPT_FILE_NAME%%.*}"
1616
SCRIPT_FILE_NAME_WITHOUT_PREFIX_AND_EXTENSION="${SCRIPT_FILE_NAME_WITHOUT_EXTENSION##download}"
1717
ANALYSIS_NAME="${SCRIPT_FILE_NAME_WITHOUT_PREFIX_AND_EXTENSION}"
18+
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"} # Get the source repository directory (defaults to "source")
1819

1920
echo "download${ANALYSIS_NAME}: SCRIPT_FILE_NAME=${SCRIPT_FILE_NAME}"
2021
echo "download${ANALYSIS_NAME}: SCRIPT_FILE_NAME_WITHOUT_EXTENSION=${SCRIPT_FILE_NAME_WITHOUT_EXTENSION}"
2122
echo "download${ANALYSIS_NAME}: ANALYSIS_NAME=${ANALYSIS_NAME}"
2223

2324
# Read the first input argument containing the version(s) of the artifact(s)
2425
if [ "$#" -ne 1 ]; then
25-
echo "Error (download${ANALYSIS_NAME}): Usage: $0 <version>" >&2
26+
echo "Error (download${ANALYSIS_NAME}): Usage: $0 <version> (e.g. 4.9.3)" >&2
2627
exit 1
2728
fi
2829
ARTIFACTS_VERSION=$1
@@ -41,12 +42,15 @@ echo "download${ANALYSIS_NAME}: SCRIPTS_DIR=${SCRIPTS_DIR}"
4142

4243
################################################################
4344
# Download Artifacts that will be analyzed
44-
################################################################
4545
ARTIFACTS_GROUP="org.axonframework"
4646
source "${SCRIPTS_DIR}/downloadMavenArtifact.sh" -g ${ARTIFACTS_GROUP} -a axon-configuration -v ${ARTIFACTS_VERSION} || exit 2
4747
source "${SCRIPTS_DIR}/downloadMavenArtifact.sh" -g ${ARTIFACTS_GROUP} -a axon-disruptor -v ${ARTIFACTS_VERSION} || exit 2
4848
source "${SCRIPTS_DIR}/downloadMavenArtifact.sh" -g ${ARTIFACTS_GROUP} -a axon-eventsourcing -v ${ARTIFACTS_VERSION} || exit 2
4949
source "${SCRIPTS_DIR}/downloadMavenArtifact.sh" -g ${ARTIFACTS_GROUP} -a axon-messaging -v ${ARTIFACTS_VERSION} || exit 2
5050
source "${SCRIPTS_DIR}/downloadMavenArtifact.sh" -g ${ARTIFACTS_GROUP} -a axon-modelling -v ${ARTIFACTS_VERSION} || exit 2
5151
source "${SCRIPTS_DIR}/downloadMavenArtifact.sh" -g ${ARTIFACTS_GROUP} -a axon-test -v ${ARTIFACTS_VERSION} || exit 2
52+
53+
# Download the git history (bare clone without working tree) into the "source" folder.
54+
# This makes it possible to additionally import the git log into the graph
55+
git clone --bare https://github.com/AxonFramework/AxonFramework.git --branch "axon-${ARTIFACTS_VERSION}" "${SOURCE_DIRECTORY}/.git"
5256
################################################################

scripts/downloader/downloadReactRouter.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111

1212
# Note: react-router uses pnpm as package manager which needs to be installed
1313

14-
# Requires downloadMavenArtifact.sh
15-
1614
# Fail on any error (errexit = exit on first error, errtrace = error inherited from sub-shell ,pipefail exist on errors within piped commands)
1715
set -o errexit -o errtrace -o pipefail
1816

@@ -21,6 +19,7 @@ SCRIPT_FILE_NAME="$(basename -- "${BASH_SOURCE[0]}")"
2119
SCRIPT_FILE_NAME_WITHOUT_EXTENSION="${SCRIPT_FILE_NAME%%.*}"
2220
SCRIPT_FILE_NAME_WITHOUT_PREFIX_AND_EXTENSION="${SCRIPT_FILE_NAME_WITHOUT_EXTENSION##download}"
2321
ANALYSIS_NAME="${SCRIPT_FILE_NAME_WITHOUT_PREFIX_AND_EXTENSION}"
22+
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"} # Get the source repository directory (defaults to "source")
2423

2524
echo "download${ANALYSIS_NAME}: SCRIPT_FILE_NAME=${SCRIPT_FILE_NAME}"
2625
echo "download${ANALYSIS_NAME}: SCRIPT_FILE_NAME_WITHOUT_EXTENSION=${SCRIPT_FILE_NAME_WITHOUT_EXTENSION}"
@@ -40,13 +39,13 @@ mkdir -p ./runtime/logs
4039
################################################################
4140
# Download react-router source files to be analyzed
4241
################################################################
43-
git clone https://github.com/remix-run/react-router.git source
42+
git clone https://github.com/remix-run/react-router.git "${SOURCE_DIRECTORY}"
4443
(
45-
cd source || exit
44+
cd "${SOURCE_DIRECTORY}" || exit
4645
git checkout "react-router@${PROJECT_VERSION}" || exit
4746
pnpm install --frozen-lockfile || exit
4847
npx --yes @jqassistant/ts-lce >./../runtime/logs/jqassostant-typescript-scan.log 2>&1 || exit
4948
)
5049
mkdir -p artifacts
51-
mv -nv "source/.reports/jqa/ts-output.json" "artifacts/ts-react-router-${PROJECT_VERSION}.json"
50+
mv -nv "${SOURCE_DIRECTORY}/.reports/jqa/ts-output.json" "artifacts/ts-react-router-${PROJECT_VERSION}.json"
5251
################################################################

scripts/prepareAnalysis.sh

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Prepares and validates the graph database before analysis
44

5-
# Requires executeQueryFunctions.sh, parseCsvFunctions.sh
5+
# Requires executeQueryFunctions.sh, parseCsvFunctions.sh, importGitLog.sh
66

77
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
88
set -o errexit -o pipefail
@@ -24,10 +24,10 @@ fi
2424
CYPHER_DIR=${CYPHER_DIR:-"${SCRIPTS_DIR}/../cypher"} # Repository directory containing the cypher queries
2525
echo "prepareAnalysis: CYPHER_DIR=${CYPHER_DIR}"
2626

27-
# Define functions to execute a cypher query from within the given file (first and only argument)
27+
# Define functions (like execute_cypher) to execute a cypher query from within the given file (first and only argument)
2828
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
2929

30-
# Define function(s) (e.g. is_csv_column_greater_zero) to parse CSV format strings from Cypher query results.
30+
# Define functions (like is_csv_column_greater_zero) to parse CSV format strings from Cypher query results.
3131
source "${SCRIPTS_DIR}/parseCsvFunctions.sh"
3232

3333
# Local Constants
@@ -37,17 +37,24 @@ EXTERNAL_DEPENDENCIES_CYPHER_DIR="$CYPHER_DIR/External_Dependencies"
3737
ARTIFACT_DEPENDENCIES_CYPHER_DIR="$CYPHER_DIR/Artifact_Dependencies"
3838
TYPES_CYPHER_DIR="$CYPHER_DIR/Types"
3939
TYPESCRIPT_CYPHER_DIR="$CYPHER_DIR/Typescript_Enrichment"
40+
JAVA_CYPHER_DIR="$CYPHER_DIR/Java_Enrichment"
4041

41-
# Preparation - Data verification: DEPENDS_ON releationships
42+
# Preparation - Data verification: DEPENDS_ON relationships
4243
dataVerificationResult=$( execute_cypher "${CYPHER_DIR}/Data_verification_DEPENDS_ON_relationships.cypher" "${@}")
4344
if ! is_csv_column_greater_zero "${dataVerificationResult}" "sourceNodeCount"; then
4445
echo "prepareAnalysis: Error: Data verification failed. At least one DEPENDS_ON relationship required. Check if the artifacts directory is empty or if the scan failed."
4546
exit 1
4647
fi
4748

49+
# Preparation - Import git log if source or history is available
50+
source "${SCRIPTS_DIR}/importGitLog.sh"
51+
execute_cypher "${JAVA_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files.cypher"
52+
execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files.cypher"
53+
4854
# Preparation - Create indices
4955
execute_cypher "${CYPHER_DIR}/Create_Java_Type_index_for_full_qualified_name.cypher"
5056
execute_cypher "${CYPHER_DIR}/Create_Typescript_index_for_full_qualified_name.cypher"
57+
execute_cypher "${CYPHER_DIR}/Create_File_index_for_name.cypher"
5158

5259
# Preparation - Enrich Graph for Typescript by adding "module" and "name" properties
5360
execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Add_name_and_module_properties.cypher"

0 commit comments

Comments
 (0)