Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
### 🚀 Feature

-

### ⚙️ Optimization

-

### 🛠 Fix

-

### 📖 Documentation

-
1 change: 1 addition & 0 deletions COMMANDS.md
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ It uses `git log` to extract commits, their authors and the names of the files c

```Cypher
(Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS]->(Git:Log:File)
(Git:Log:Commit)->[:HAS_PARENT]-(Git:Log:Commit)
```

👉**Note:** Commit messages containing `[bot]` are filtered out to ignore changes made by bots.
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ The [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.ym
- [openTSNE](https://github.com/pavlin-policar/openTSNE)
- [wordcloud](https://github.com/amueller/word_cloud)
- [Graph Visualization](./graph-visualization/README.md) uses [node.js](https://nodejs.org/de) and the dependencies listed in [package.json](./graph-visualization/package.json).
- [Check links in markdown documentation (GitHub workflow)](./.github/workflows/check-links-in-documentation.yml) uses [markdown-link-check](https://github.com/tcort/markdown-link-check).

**Big shout-out** 📣 to all the creators and contributors of these great libraries 👍. Projects like this wouldn't be possible without them. Feel free to [create an issue](https://github.com/JohT/code-graph-analysis-pipeline/issues/new/choose) if something is missing or wrong in the list.

Expand Down
6 changes: 6 additions & 0 deletions cypher/GitLog/Add_HAS_PARENT_relationships_to_commits.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Creates a HAS_PARENT relationship between Git Commit nodes and their parent.

MATCH (git_commit:Git:Commit)
MATCH (parent_commit:Git:Commit{hash: git_commit.parent})
MERGE (git_commit)-[:HAS_PARENT]->(parent_commit)
RETURN count(DISTINCT git_commit.hash) AS numberOfCommitsWithParent
1 change: 1 addition & 0 deletions cypher/GitLog/Import_git_log_csv_data.cypher
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CALL { WITH row
MERGE (git_author:Git:Log:Author {name: row.author, email: row.email})
MERGE (git_commit:Git:Log:Commit {
hash: row.hash,
parent: coalesce(row.parent, ''),
message: row.message,
timestamp: datetime(row.timestamp),
timestamp_unix: toInteger(row.timestamp_unix)
Expand Down
3 changes: 3 additions & 0 deletions cypher/GitLog/Index_commit_parent.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Create index for parent commit hash (git data)

CREATE INDEX INDEX_COMMIT_PARENT IF NOT EXISTS FOR (n:Commit) ON (n.parent)
4 changes: 4 additions & 0 deletions markdown-lint-check-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,9 @@
"pattern": "^http://localhost"
}
],
"timeout": "30s",
"retryOn429": true,
"retryCount": 3,
"fallbackRetryDelay": "60s",
"aliveStatusCodes": [200, 202, 206]
}
13 changes: 9 additions & 4 deletions scripts/importGitLog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,18 @@ echo "importGitLog: Creating ${OUTPUT_CSV_FILENAME} from git log..."
cd "${repository}" || exit

# Prints the header line of the CSV file with the names of the columns.
echo "hash,author,email,timestamp,timestamp_unix,message,filename" > "${OUTPUT_CSV_FILENAME}"
echo "hash,parent,author,email,timestamp,timestamp_unix,message,filename" > "${OUTPUT_CSV_FILENAME}"

# Prints the git log in CSV format including the changed files.
# Includes quoted strings, double quote escaping and supports commas in strings.
git log --no-merges --pretty=format:' %h,,,%an,,,%ae,,,%aI,,,%ct,,,%s' --name-only | \
awk 'BEGIN { COMMA=",";QUOTE="\"" } /^ / { split($0, a, ",,,"); gsub(/^ /, "", a[1]); gsub(/"/, "\"\"", a[2]); gsub(/"/, "\"\"", a[3]); gsub(/"/, "\"\"", a[6]); gsub(/\\/, " ", a[6]); commit=a[1] COMMA QUOTE a[2] QUOTE COMMA QUOTE a[3] QUOTE COMMA a[4] COMMA a[5] COMMA QUOTE a[6] QUOTE } NF && !/^\ / { print commit ",\""$0"\"" }' | \
git log --no-merges --pretty=format:' %h,,,%p,,,%an,,,%ae,,,%aI,,,%ct,,,%s' --name-only | \
awk 'BEGIN { COMMA=",";QUOTE="\"" } /^ / { split($0, a, ",,,"); gsub(/^ /, "", a[1]); gsub(/"/, "\"\"", a[3]); gsub(/"/, "\"\"", a[4]); gsub(/"/, "\"\"", a[7]); gsub(/\\/, " ", a[7]); commit=a[1] COMMA a[2] COMMA QUOTE a[3] QUOTE COMMA QUOTE a[4] QUOTE COMMA a[5] COMMA a[6] COMMA QUOTE a[7] QUOTE } NF && !/^\ / { print commit ",\""$0"\"" }' | \
grep -v -F '[bot]' >> "${OUTPUT_CSV_FILENAME}"
# Explanation:
#
# - --no-merges: Excludes merge commits from the log.
# - %h: Abbreviated commit hash
# - %p: Abbreviated parent commit hash
# - %an: Author name
# - %ae: Author email
# - %aI: Author date, ISO 8601 format
Expand Down Expand Up @@ -125,6 +126,7 @@ GIT_LOG_CYPHER_DIR="${CYPHER_DIR}/GitLog"
echo "importGitLog: Prepare import by creating indexes..."
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_author_name.cypher"
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_commit_hash.cypher"
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_commit_parent.cypher"
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_file_name.cypher"

echo "importGitLog: Deleting all existing git data in the Graph..."
Expand All @@ -133,7 +135,10 @@ execute_cypher "${GIT_LOG_CYPHER_DIR}/Delete_git_log_data.cypher"
echo "importGitLog: Importing git log data into the Graph..."
time execute_cypher "${GIT_LOG_CYPHER_DIR}/Import_git_log_csv_data.cypher"

echo "importGitLog: Creating connections to nodes with matching file names..."
echo "importGitLog: Creating relationships for parent commits..."
execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_HAS_PARENT_relationships_to_commits.cypher"

echo "importGitLog: Creating relationships to nodes with matching file names..."
execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher"
execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher"
execute_cypher "${GIT_LOG_CYPHER_DIR}/Set_number_of_git_commits.cypher"
Expand Down