You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Uses git log to create a comma separated values (CSV) file containing aggregated changes, their author name and email address, year and month for all the files that were changed. The CSV is then imported into Neo4j.
4
+
5
+
# Note: This script needs the path to a git repository directory. It defaults to SOURCE_DIRECTORY ("source").
6
+
# Note: Import will be skipped without an error if the directory is not a git repository.
7
+
# Note: This script needs git to be installed.
8
+
9
+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
10
+
set -o errexit -o pipefail
11
+
12
+
# Overrideable Defaults
13
+
NEO4J_EDITION=${NEO4J_EDITION:-"community"}# Choose "community" or "enterprise"
14
+
NEO4J_VERSION=${NEO4J_VERSION:-"5.16.0"}
15
+
TOOLS_DIRECTORY=${TOOLS_DIRECTORY:-"tools"}# Get the tools directory (defaults to "tools")
16
+
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"}# Get the source repository directory (defaults to "source")
17
+
18
+
# Default and initial values for command line options
# - --no-merges: Excludes merge commits from the log.
95
+
# - %ad: Author date (formatted as specified later)
96
+
# - %an: Author name
97
+
# - %ae: Author email
98
+
# - %ct: Commit date, Unix timestamp
99
+
# - %s: Subject of the commit
100
+
# - --date=format:'%Y,%m': Takes the year and the month of the date separated by a comma for example 2024,06
101
+
# - --name-only: Lists the files affected by each commit.
102
+
# - --pretty=format starts with a space that is needed to detect the start of a line.
103
+
# - The chosen delimiters ,,, are used to separate these fields to make parsing easier.
104
+
# It is very unlikely that they appear in the contents and will be used as an intermediate step before escaping.
105
+
#
106
+
# - BEGIN { COMMA=","; QUOTE="\"" }: Initializes the variables COMMA and QUOTE to hold a comma and a double-quote character respectively.
107
+
# - /^ / { ... }: Processes lines that start with a space (indicating a file name in git log --name-only output).
108
+
# - gsub(/^ /, "", a[1]): Removes leading spaces from the first field (commit hash) that was used to indicate a new commit.
109
+
# - gsub(/"/, "\"\"", a[2]) escapes double quotes with two double quotes (CSV standard).
110
+
# a[2] is the commit author. Double quote escaping is done for every string column
111
+
# - commit=...: Constructs the commit information in CSV format, including the year-month of the change, quoted author name, and email.
112
+
# - NF && !/^\ / { print "\""$0"\"," commit }: For non-empty lines that do not start with a space (indicating commit information),
113
+
# it prints the commit information followed by the file name(s), enclosed in quotes.
114
+
#
115
+
# - grep -v -F '[bot]': Filters out commits where the commit message includes [bot]
116
+
# Used to identify commits made by automated systems or bots.
117
+
#
118
+
# - sort | uniq -c: Sorts the lines by their content (order of columns essential for that), removes duplicate lines and adds the number of duplicates at the beginning of each line
119
+
#- sed -E 's/^ *([0-9]+) (.+)/\2,\1/g': Reformats each line so that the commits count are the last column delimited by a comma.
# Uses git log to create a comma separated values (CSV) file containing all commits, their author, email address, date and all the file names that were changed with it. The CSV is then imported into Neo4j.
4
+
5
+
# Note: This script needs the path to a git repository directory. It defaults to SOURCE_DIRECTORY ("source").
6
+
# Note: Import will be skipped without an error if the directory is not a git repository.
7
+
# Note: This script needs git to be installed.
8
+
9
+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
10
+
set -o errexit -o pipefail
11
+
12
+
# Overrideable Defaults
13
+
NEO4J_EDITION=${NEO4J_EDITION:-"community"}# Choose "community" or "enterprise"
14
+
NEO4J_VERSION=${NEO4J_VERSION:-"5.16.0"}
15
+
TOOLS_DIRECTORY=${TOOLS_DIRECTORY:-"tools"}# Get the tools directory (defaults to "tools")
16
+
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"}# Get the source repository directory (defaults to "source")
17
+
18
+
# Default and initial values for command line options
# - --no-merges: Excludes merge commits from the log.
92
+
# - %h: Abbreviated commit hash
93
+
# - %an: Author name
94
+
# - %ae: Author email
95
+
# - %aI: Author date, ISO 8601 format
96
+
# - %ct: Commit date, Unix timestamp
97
+
# - %s: Subject of the commit
98
+
# - --name-only: Lists the files affected by each commit.
99
+
# - --pretty=format starts with a space that is needed to detect the start of a line.
100
+
# - The chosen delimiters ,,, are used to separate these fields to make parsing easier.
101
+
# It is very unlikely that they appear in the contents and will be used as an intermediate step before escaping.
102
+
#
103
+
# - BEGIN { COMMA=","; QUOTE="\"" }: Initializes the variables COMMA and QUOTE to hold a comma and a double-quote character respectively.
104
+
# - /^ / { ... }: Processes lines that start with a space (indicating a file name in git log --name-only output).
105
+
# - gsub(/^ /, "", a[1]): Removes leading spaces from the first field (commit hash) that was used to indicate a new commit.
106
+
# - gsub(/"/, "\"\"", a[6]) escapes double quotes with two double quotes (CSV standard).
107
+
# a[6] is the commit message column. Double quote escaping is done for every string column
108
+
# - gsub(/\\/, " ", a[6]): Replaces backslashes in the commit message with spaces.
109
+
# Otherwise, \" would lead to an error since it would be seen as an non escaped double quote.
110
+
# - commit=...: Constructs the commit information in CSV format, including the quoted author name, author email, and commit message except for the file name.
111
+
# - NF && !/^\ / { print commit ",\""$0"\"" }: For non-empty lines that do not start with a space (indicating commit information),
112
+
# it prints the commit information followed by the file name(s), enclosed in quotes.
113
+
#
114
+
# - grep -v -F '[bot]': Filters out commits where the commit message includes [bot]
115
+
# Used to identify commits made by automated systems or bots.
0 commit comments