Skip to content

Commit 178e50b

Browse files
committed
Add git repository node and connect it to the corresponding git log nodes
1 parent 699d753 commit 178e50b

File tree

5 files changed

+68
-9
lines changed

5 files changed

+68
-9
lines changed

COMMANDS.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,10 @@ It uses `git log` to extract commits, their authors and the names of the files c
235235

236236
```Cypher
237237
(Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS_CHANGED]->(Git:Log:File)
238-
(Git:Log:Commit)->[:HAS_PARENT]-(Git:Log:Commit)
238+
(Git:Log:Commit)-[:HAS_PARENT]->(Git:Log:Commit)
239+
(Git:Repository)-[:HAS_COMMIT]->(Git:Log:Commit)
240+
(Git:Repository)-[:HAS_AUTHOR]->(Git:Log:Author)
241+
(Git:Repository)-[:HAS_FILE]->(Git:Log:File)
239242
```
240243

241244
👉**Note:** Commit messages containing `[bot]` are filtered out to ignore changes made by bots.
@@ -252,6 +255,9 @@ Here is the resulting schema:
252255

253256
```Cypher
254257
(Git:Log:Author)-[:AUTHORED]->(Git:Log:ChangeSpan)-[:CONTAINS_CHANGED]->(Git:Log:File)
258+
(Git:Repository)-[:HAS_CHANGE_SPAN]->(Git:Log:ChangeSpan)
259+
(Git:Repository)-[:HAS_AUTHOR]->(Git:Log:Author)
260+
(Git:Repository)-[:HAS_FILE]->(Git:Log:File)
255261
```
256262

257263
#### Parameter
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Create git repository information node
2+
3+
MERGE (git_repository:Git:Repository {
4+
name: $git_repository_origin,
5+
tags: split($git_repository_current_tags, ','),
6+
branch: coalesce($git_repository_current_branch, ''),
7+
commit: coalesce($git_repository_current_commit, ''),
8+
fileName: $git_repository_directory_name,
9+
absoluteFileName: $git_repository_absolute_directory_name
10+
})

cypher/GitLog/Import_aggregated_git_log_csv_data.cypher

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
// Import aggregated git log CSV data with the following schema: (Git:Log:Author)-[:AUTHORED]->(Git:Log:ChangeSpan)-[:CONTAINS]->(Git:Log:File)
1+
// Import aggregated git log CSV data with the following schema: (Git:Log:Author)-[:AUTHORED]->(Git:Log:ChangeSpan)-[:CONTAINS]->(Git:Log:File) , (Git:Repository)-[:HAS_CHANGE_SPAN]->(Git:Log:ChangeSpan) , (Git:Repository)-[:HAS_AUTHER]->(Git:Log:Auther) , (Git:Repository)-[:HAS_FILE]->(Git:Log:File). Variables: git_repository_absolute_directory_name
22

33
LOAD CSV WITH HEADERS FROM "file:///aggregatedGitLog.csv" AS row
44
CALL { WITH row
5+
MATCH (git_repository:Git:Repository{absoluteFileName: $git_repository_absolute_directory_name})
56
MERGE (git_author:Git:Log:Author {name: row.author, email: row.email})
67
MERGE (git_change_span:Git:Log:ChangeSpan {
78
year: toInteger(row.year),
@@ -11,6 +12,9 @@ CALL { WITH row
1112
MERGE (git_file:Git:Log:File {fileName: row.filename})
1213
MERGE (git_author)-[:AUTHORED]->(git_change_span)
1314
MERGE (git_change_span)-[:CONTAINS_CHANGED]->(git_file)
15+
MERGE (git_repository)-[:HAS_CHANGE_SPAN]->(git_change_span)
16+
MERGE (git_repository)-[:HAS_AUTHOR]->(git_file)
17+
MERGE (git_repository)-[:HAS_FILE]->(git_author)
1418
} IN TRANSACTIONS OF 1000 ROWS
1519
RETURN count(DISTINCT row.author) AS numberOfAuthors
1620
,count(DISTINCT row.filename) AS numberOfFiles

cypher/GitLog/Import_git_log_csv_data.cypher

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
// Import git log CSV data with the following schema: (Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)-[:CONTAINS]->(Git:Log:File)
1+
// Import git log CSV data with the following schema: (Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)-[:CONTAINS]->(Git:Log:File) , (Git:Repository)-[:HAS_HAS_COMMIT]->(Git:Log:Commit) , (Git:Repository)-[:HAS_HAS_AUTHOR]->(Git:Log:Author) , (Git:Repository)-[:HAS_HAS_FILE]->(Git:Log:File). Variables: git_repository_absolute_directory_name
22

33
LOAD CSV WITH HEADERS FROM "file:///gitLog.csv" AS row
44
CALL { WITH row
5+
MATCH (git_repository:Git:Repository{absoluteFileName: $git_repository_absolute_directory_name})
56
MERGE (git_author:Git:Log:Author {name: row.author, email: row.email})
67
MERGE (git_commit:Git:Log:Commit {
78
hash: row.hash,
@@ -13,6 +14,9 @@ CALL { WITH row
1314
MERGE (git_file:Git:Log:File {fileName: row.filename})
1415
MERGE (git_author)-[:AUTHORED]->(git_commit)
1516
MERGE (git_commit)-[:CONTAINS_CHANGED]->(git_file)
17+
MERGE (git_repository)-[:HAS_COMMIT]->(git_commit)
18+
MERGE (git_repository)-[:HAS_AUTHOR]->(git_author)
19+
MERGE (git_repository)-[:HAS_FILE]->(git_file)
1620
} IN TRANSACTIONS OF 1000 ROWS
1721
RETURN count(DISTINCT row.author) AS numberOfAuthors
1822
,count(DISTINCT row.filename) AS numberOfFiles

scripts/importGit.sh

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,37 @@ deleteExistingGitData() {
6565
execute_cypher "${GIT_LOG_CYPHER_DIR}/Delete_git_log_data.cypher"
6666
}
6767

68+
# Creates one (Git:Repository) node with information about the repository.
69+
# The first and only parameter is the absolute/full repository directory path.
70+
create_git_repository_node() {
71+
local full_local_repository_path=${1}
72+
echo "importGit: - full_local_repository_path=${full_local_repository_path}"
73+
74+
local_repository=$(basename "${full_local_repository_path}")
75+
echo "importGit: - local_repository= ${local_repository}"
76+
77+
remote_origin=$(cd "${full_local_repository_path}" ;git config --get remote.origin.url || true)
78+
remote_origin=$(basename -s .git "${remote_origin}" || true)
79+
echo "importGit: - remote_origin= ${remote_origin}"
80+
81+
current_tags=$(cd "${full_local_repository_path}" ;git tag --points-at HEAD | paste -sd "," - || true)
82+
echo "importGit: - current_tags= ${current_tags}"
83+
84+
current_branch=$(cd "${full_local_repository_path}" ;git rev-parse --abbrev-ref HEAD 2>/dev/null || true)
85+
echo "importGit: - current_branch= ${current_branch}"
86+
87+
current_commit=$(cd "${full_local_repository_path}" ;git rev-parse HEAD || true)
88+
echo "importGit: - current_commit= ${current_commit}"
89+
90+
execute_cypher "${GIT_LOG_CYPHER_DIR}/Create_git_repository_node.cypher" \
91+
"git_repository_origin=${remote_origin}" \
92+
"git_repository_current_tags=${current_tags}" \
93+
"git_repository_current_branch=${current_branch}" \
94+
"git_repository_current_commit=${current_commit}" \
95+
"git_repository_directory_name=${local_repository}" \
96+
"git_repository_absolute_directory_name=${full_local_repository_path}"
97+
}
98+
6899
importGitLog() {
69100
echo "importGit: Preparing import by creating indexes for the full git log..."
70101
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_author_name.cypher"
@@ -73,7 +104,7 @@ importGitLog() {
73104
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_file_name.cypher"
74105

75106
echo "importGit: Importing full git log data into the Graph..."
76-
time execute_cypher "${GIT_LOG_CYPHER_DIR}/Import_git_log_csv_data.cypher"
107+
time execute_cypher "${GIT_LOG_CYPHER_DIR}/Import_git_log_csv_data.cypher" "${@}"
77108

78109
echo "importGit: Creating relationships for parent commits..."
79110
execute_cypher "${GIT_LOG_CYPHER_DIR}/Add_HAS_PARENT_relationships_to_commits.cypher"
@@ -86,7 +117,7 @@ importAggregatedGitLog() {
86117
execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_file_name.cypher"
87118

88119
echo "importGit: Importing aggregated git log data into the Graph..."
89-
time execute_cypher "${GIT_LOG_CYPHER_DIR}/Import_aggregated_git_log_csv_data.cypher"
120+
time execute_cypher "${GIT_LOG_CYPHER_DIR}/Import_aggregated_git_log_csv_data.cypher" "${@}"
90121
}
91122

92123
commonPostGitLogImport() {
@@ -125,16 +156,20 @@ if [[ ! ${IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT} == "none" ]]; then
125156
fi
126157

127158
echo "importGit: Importing git repository ${repository}"
159+
full_repository_path=$(cd "${repository}"; pwd)
160+
161+
create_git_repository_node "${full_repository_path}"
162+
128163
if [[ ${IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT} == "aggregated" ]]; then
129164
# Import pre-aggregated git log data (no single commits) when IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT = "aggregated"
130165
(cd "${repository}" && source "${SCRIPTS_DIR}/createAggregatedGitLogCsv.sh" "${NEO4J_FULL_IMPORT_DIRECTORY}/aggregatedGitLog.csv")
131-
importAggregatedGitLog
132-
postAggregatedGitLogImport
166+
importAggregatedGitLog "git_repository_absolute_directory_name=${full_repository_path}"
167+
postAggregatedGitLogImport
133168
else
134169
# Import git log data with every commit when IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT = "full" (default)
135170
(cd "${repository}" && source "${SCRIPTS_DIR}/createGitLogCsv.sh" "${NEO4J_FULL_IMPORT_DIRECTORY}/gitLog.csv")
136-
importGitLog
137-
postGitLogImport
171+
importGitLog "git_repository_absolute_directory_name=${full_repository_path}"
172+
postGitLogImport
138173
fi
139174
done
140175
fi

0 commit comments

Comments
 (0)