Skip to content

Commit 3210af3

Browse files
authored
Merge pull request #159 from JohT/feature/add-commit-parent-from-git-log
Add parent git commit nodes and connect them
2 parents b8c1f57 + c5f8e44 commit 3210af3

File tree

8 files changed

+40
-4
lines changed

8 files changed

+40
-4
lines changed

.github/pull_request_template.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
### 🚀 Feature
2+
3+
-
4+
5+
### ⚙️ Optimization
6+
7+
-
8+
9+
### 🛠 Fix
10+
11+
-
12+
13+
### 📖 Documentation
14+
15+
-

COMMANDS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ It uses `git log` to extract commits, their authors and the names of the files c
234234

235235
```Cypher
236236
(Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS]->(Git:Log:File)
237+
(Git:Log:Commit)->[:HAS_PARENT]-(Git:Log:Commit)
237238
```
238239

239240
👉**Note:** Commit messages containing `[bot]` are filtered out to ignore changes made by bots.

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ The [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.ym
125125
- [openTSNE](https://github.com/pavlin-policar/openTSNE)
126126
- [wordcloud](https://github.com/amueller/word_cloud)
127127
- [Graph Visualization](./graph-visualization/README.md) uses [node.js](https://nodejs.org/de) and the dependencies listed in [package.json](./graph-visualization/package.json).
128+
- [Check links in markdown documentation (GitHub workflow)](./.github/workflows/check-links-in-documentation.yml) uses [markdown-link-check](https://github.com/tcort/markdown-link-check).
128129

129130
**Big shout-out** 📣 to all the creators and contributors of these great libraries 👍. Projects like this wouldn't be possible without them. Feel free to [create an issue](https://github.com/JohT/code-graph-analysis-pipeline/issues/new/choose) if something is missing or wrong in the list.
130131

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// Creates a HAS_PARENT relationship between Git Commit nodes and their parent.
2+
3+
MATCH (git_commit:Git:Commit)
4+
MATCH (parent_commit:Git:Commit{hash: git_commit.parent})
5+
MERGE (git_commit)-[:HAS_PARENT]->(parent_commit)
6+
RETURN count(DISTINCT git_commit.hash) AS numberOfCommitsWithParent

cypher/GitLog/Import_git_log_csv_data.cypher

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CALL { WITH row
55
MERGE (git_author:Git:Log:Author {name: row.author, email: row.email})
66
MERGE (git_commit:Git:Log:Commit {
77
hash: row.hash,
8+
parent: coalesce(row.parent, ''),
89
message: row.message,
910
timestamp: datetime(row.timestamp),
1011
timestamp_unix: toInteger(row.timestamp_unix)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Create index for parent commit hash (git data)
2+
3+
CREATE INDEX INDEX_COMMIT_PARENT IF NOT EXISTS FOR (n:Commit) ON (n.parent)

markdown-lint-check-config.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,9 @@
44
"pattern": "^http://localhost"
55
}
66
],
7+
"timeout": "30s",
8+
"retryOn429": true,
9+
"retryCount": 3,
10+
"fallbackRetryDelay": "60s",
711
"aliveStatusCodes": [200, 202, 206]
812
}

scripts/importGitLog.sh

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,18 @@ echo "importGitLog: Creating ${OUTPUT_CSV_FILENAME} from git log..."
7878
cd "${repository}" || exit
7979

8080
# Prints the header line of the CSV file with the names of the columns.
81-
echo "hash,author,email,timestamp,timestamp_unix,message,filename" > "${OUTPUT_CSV_FILENAME}"
81+
echo "hash,parent,author,email,timestamp,timestamp_unix,message,filename" > "${OUTPUT_CSV_FILENAME}"
8282

8383
# Prints the git log in CSV format including the changed files.
8484
# Includes quoted strings, double quote escaping and supports commas in strings.
85-
git log --no-merges --pretty=format:' %h,,,%an,,,%ae,,,%aI,,,%ct,,,%s' --name-only | \
86-
awk 'BEGIN { COMMA=",";QUOTE="\"" } /^ / { split($0, a, ",,,"); gsub(/^ /, "", a[1]); gsub(/"/, "\"\"", a[2]); gsub(/"/, "\"\"", a[3]); gsub(/"/, "\"\"", a[6]); gsub(/\\/, " ", a[6]); commit=a[1] COMMA QUOTE a[2] QUOTE COMMA QUOTE a[3] QUOTE COMMA a[4] COMMA a[5] COMMA QUOTE a[6] QUOTE } NF && !/^\ / { print commit ",\""$0"\"" }' | \
85+
git log --no-merges --pretty=format:' %h,,,%p,,,%an,,,%ae,,,%aI,,,%ct,,,%s' --name-only | \
86+
awk 'BEGIN { COMMA=",";QUOTE="\"" } /^ / { split($0, a, ",,,"); gsub(/^ /, "", a[1]); gsub(/"/, "\"\"", a[3]); gsub(/"/, "\"\"", a[4]); gsub(/"/, "\"\"", a[7]); gsub(/\\/, " ", a[7]); commit=a[1] COMMA a[2] COMMA QUOTE a[3] QUOTE COMMA QUOTE a[4] QUOTE COMMA a[5] COMMA a[6] COMMA QUOTE a[7] QUOTE } NF && !/^\ / { print commit ",\""$0"\"" }' | \
8787
grep -v -F '[bot]' >> "${OUTPUT_CSV_FILENAME}"
8888
# Explanation:
8989
#
9090
# - --no-merges: Excludes merge commits from the log.
9191
# - %h: Abbreviated commit hash
92+
# - %p: Abbreviated parent commit hash
9293
# - %an: Author name
9394
# - %ae: Author email
9495
# - %aI: Author date, ISO 8601 format
@@ -125,6 +126,7 @@ GIT_LOG_CYPHER_DIR="${CYPHER_DIR}/GitLog"
125126
echo "importGitLog: Prepare import by creating indexes..."
126127
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_author_name.cypher"
127128
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_commit_hash.cypher"
129+
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_commit_parent.cypher"
128130
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_file_name.cypher"
129131

130132
echo "importGitLog: Deleting all existing git data in the Graph..."
@@ -133,7 +135,10 @@ execute_cypher "${GIT_LOG_CYPHER_DIR}/Delete_git_log_data.cypher"
133135
echo "importGitLog: Importing git log data into the Graph..."
134136
time execute_cypher "${GIT_LOG_CYPHER_DIR}/Import_git_log_csv_data.cypher"
135137

136-
echo "importGitLog: Creating connections to nodes with matching file names..."
138+
echo "importGitLog: Creating relationships for parent commits..."
139+
execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_HAS_PARENT_relationships_to_commits.cypher"
140+
141+
echo "importGitLog: Creating relationships to nodes with matching file names..."
137142
execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher"
138143
execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher"
139144
execute_cypher "${GIT_LOG_CYPHER_DIR}/Set_number_of_git_commits.cypher"

0 commit comments

Comments
 (0)