From cdd58248211e1d97e744ec4cbc8399808aa691a6 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 16 Jun 2024 09:29:46 +0200 Subject: [PATCH 1/3] Add parent git commit nodes and connect them --- COMMANDS.md | 1 + .../Add_HAS_PARENT_relationships_to_commits.cypher | 6 ++++++ cypher/GitLog/Import_git_log_csv_data.cypher | 1 + cypher/GitLog/Index_commit_parent.cypher | 3 +++ scripts/importGitLog.sh | 13 +++++++++---- 5 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 cypher/GitLog/Add_HAS_PARENT_relationships_to_commits.cypher create mode 100644 cypher/GitLog/Index_commit_parent.cypher diff --git a/COMMANDS.md b/COMMANDS.md index a1f01bd33..27049c48b 100644 --- a/COMMANDS.md +++ b/COMMANDS.md @@ -234,6 +234,7 @@ It uses `git log` to extract commits, their authors and the names of the files c ```Cypher (Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS]->(Git:Log:File) +(Git:Log:Commit)->[:HAS_PARENT]-(Git:Log:Commit) ``` 👉**Note:** Commit messages containing `[bot]` are filtered out to ignore changes made by bots. diff --git a/cypher/GitLog/Add_HAS_PARENT_relationships_to_commits.cypher b/cypher/GitLog/Add_HAS_PARENT_relationships_to_commits.cypher new file mode 100644 index 000000000..c8ad57730 --- /dev/null +++ b/cypher/GitLog/Add_HAS_PARENT_relationships_to_commits.cypher @@ -0,0 +1,6 @@ +// Creates a HAS_PARENT relationship between Git Commit nodes and their parent. + +MATCH (git_commit:Git:Commit) +MATCH (parent_commit:Git:Commit{hash: git_commit.parent}) +MERGE (git_commit)-[:HAS_PARENT]->(parent_commit) +RETURN count(DISTINCT git_commit.hash) AS numberOfCommitsWithParent \ No newline at end of file diff --git a/cypher/GitLog/Import_git_log_csv_data.cypher b/cypher/GitLog/Import_git_log_csv_data.cypher index 113ed8bca..e7961e604 100644 --- a/cypher/GitLog/Import_git_log_csv_data.cypher +++ b/cypher/GitLog/Import_git_log_csv_data.cypher @@ -5,6 +5,7 @@ CALL { WITH row MERGE (git_author:Git:Log:Author {name: row.author, email: row.email}) MERGE (git_commit:Git:Log:Commit { hash: row.hash, + parent: coalesce(row.parent, ''), message: row.message, timestamp: datetime(row.timestamp), timestamp_unix: toInteger(row.timestamp_unix) diff --git a/cypher/GitLog/Index_commit_parent.cypher b/cypher/GitLog/Index_commit_parent.cypher new file mode 100644 index 000000000..37da0cd4d --- /dev/null +++ b/cypher/GitLog/Index_commit_parent.cypher @@ -0,0 +1,3 @@ +// Create index for parent commit hash (git data) + +CREATE INDEX INDEX_COMMIT_PARENT IF NOT EXISTS FOR (n:Commit) ON (n.parent) \ No newline at end of file diff --git a/scripts/importGitLog.sh b/scripts/importGitLog.sh index 93add3ef7..548928d92 100755 --- a/scripts/importGitLog.sh +++ b/scripts/importGitLog.sh @@ -78,17 +78,18 @@ echo "importGitLog: Creating ${OUTPUT_CSV_FILENAME} from git log..." cd "${repository}" || exit # Prints the header line of the CSV file with the names of the columns. - echo "hash,author,email,timestamp,timestamp_unix,message,filename" > "${OUTPUT_CSV_FILENAME}" + echo "hash,parent,author,email,timestamp,timestamp_unix,message,filename" > "${OUTPUT_CSV_FILENAME}" # Prints the git log in CSV format including the changed files. # Includes quoted strings, double quote escaping and supports commas in strings. - git log --no-merges --pretty=format:' %h,,,%an,,,%ae,,,%aI,,,%ct,,,%s' --name-only | \ - awk 'BEGIN { COMMA=",";QUOTE="\"" } /^ / { split($0, a, ",,,"); gsub(/^ /, "", a[1]); gsub(/"/, "\"\"", a[2]); gsub(/"/, "\"\"", a[3]); gsub(/"/, "\"\"", a[6]); gsub(/\\/, " ", a[6]); commit=a[1] COMMA QUOTE a[2] QUOTE COMMA QUOTE a[3] QUOTE COMMA a[4] COMMA a[5] COMMA QUOTE a[6] QUOTE } NF && !/^\ / { print commit ",\""$0"\"" }' | \ + git log --no-merges --pretty=format:' %h,,,%p,,,%an,,,%ae,,,%aI,,,%ct,,,%s' --name-only | \ + awk 'BEGIN { COMMA=",";QUOTE="\"" } /^ / { split($0, a, ",,,"); gsub(/^ /, "", a[1]); gsub(/"/, "\"\"", a[3]); gsub(/"/, "\"\"", a[4]); gsub(/"/, "\"\"", a[7]); gsub(/\\/, " ", a[7]); commit=a[1] COMMA a[2] COMMA QUOTE a[3] QUOTE COMMA QUOTE a[4] QUOTE COMMA a[5] COMMA a[6] COMMA QUOTE a[7] QUOTE } NF && !/^\ / { print commit ",\""$0"\"" }' | \ grep -v -F '[bot]' >> "${OUTPUT_CSV_FILENAME}" # Explanation: # # - --no-merges: Excludes merge commits from the log. # - %h: Abbreviated commit hash + # - %p: Abbreviated parent commit hash # - %an: Author name # - %ae: Author email # - %aI: Author date, ISO 8601 format @@ -125,6 +126,7 @@ GIT_LOG_CYPHER_DIR="${CYPHER_DIR}/GitLog" echo "importGitLog: Prepare import by creating indexes..." execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_author_name.cypher" execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_commit_hash.cypher" +execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_commit_parent.cypher" execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_file_name.cypher" echo "importGitLog: Deleting all existing git data in the Graph..." @@ -133,7 +135,10 @@ execute_cypher "${GIT_LOG_CYPHER_DIR}/Delete_git_log_data.cypher" echo "importGitLog: Importing git log data into the Graph..." time execute_cypher "${GIT_LOG_CYPHER_DIR}/Import_git_log_csv_data.cypher" -echo "importGitLog: Creating connections to nodes with matching file names..." +echo "importGitLog: Creating relationships for parent commits..." +execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_HAS_PARENT_relationships_to_commits.cypher" + +echo "importGitLog: Creating relationships to nodes with matching file names..." execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher" execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher" execute_cypher "${GIT_LOG_CYPHER_DIR}/Set_number_of_git_commits.cypher" From 56d2c3bb3076847f19d3c386ba858aade0872d36 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 16 Jun 2024 10:39:58 +0200 Subject: [PATCH 2/3] Document and tune markdown link checking --- README.md | 1 + markdown-lint-check-config.json | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index dac7a07bc..a4a856489 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,7 @@ The [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.ym - [openTSNE](https://github.com/pavlin-policar/openTSNE) - [wordcloud](https://github.com/amueller/word_cloud) - [Graph Visualization](./graph-visualization/README.md) uses [node.js](https://nodejs.org/de) and the dependencies listed in [package.json](./graph-visualization/package.json). +- [Check links in markdown documentation (GitHub workflow)](./.github/workflows/check-links-in-documentation.yml) uses [markdown-link-check](https://github.com/tcort/markdown-link-check). **Big shout-out** 📣 to all the creators and contributors of these great libraries 👍. Projects like this wouldn't be possible without them. Feel free to [create an issue](https://github.com/JohT/code-graph-analysis-pipeline/issues/new/choose) if something is missing or wrong in the list. diff --git a/markdown-lint-check-config.json b/markdown-lint-check-config.json index e56bbcac3..3d67e393b 100644 --- a/markdown-lint-check-config.json +++ b/markdown-lint-check-config.json @@ -4,5 +4,9 @@ "pattern": "^http://localhost" } ], + "timeout": "30s", + "retryOn429": true, + "retryCount": 3, + "fallbackRetryDelay": "60s", "aliveStatusCodes": [200, 202, 206] } \ No newline at end of file From c5f8e445017a781fa314c5be205a095424252495 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 16 Jun 2024 10:49:11 +0200 Subject: [PATCH 3/3] Add template for new pull requests --- .github/pull_request_template.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..22efefa16 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,15 @@ +### 🚀 Feature + +- + +### ⚙️ Optimization + +- + +### 🛠 Fix + +- + +### 📖 Documentation + +- \ No newline at end of file