Skip to content

Commit 4203f10

Browse files
committed
Add file distance to DEPENDS_ON relationships
1 parent 7bfd6a5 commit 4203f10

File tree

5 files changed

+128
-4
lines changed

5 files changed

+128
-4
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Get file distance distribution for dependencies (intuitively the fewest number of change directory commands needed)
2+
3+
MATCH (source:File)-[dependency:DEPENDS_ON]->(target:File)
4+
WHERE dependency.fileDistanceAsFewestChangeDirectoryCommands IS NOT NULL
5+
RETURN dependency.fileDistanceAsFewestChangeDirectoryCommands
6+
,count(*) AS numberOfDependencies
7+
,count(DISTINCT source) AS numberOfDependencyUsers
8+
,count(DISTINCT target) AS numberOfDependencyProviders
9+
,collect(source.fileName + ' uses ' + target.fileName)[0..4] AS examples
10+
ORDER BY dependency.fileDistanceAsFewestChangeDirectoryCommands
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Set file distance for dependencies as the shortest path of CONTAINS relationships (intuitively the fewest number of change directory commands needed)
2+
3+
MATCH (source:File)-[dependency:DEPENDS_ON]->(target:File)
4+
MATCH changeDirectoryPath = shortestPath((source)-[:CONTAINS*1..15]-(target))
5+
WHERE ALL ( file IN nodes(changeDirectoryPath) WHERE "File" IN labels(file) )
6+
OPTIONAL MATCH (source)<-[:CONTAINS]-(commonDirectory:Directory)-[:CONTAINS]->(target)
7+
WITH *, CASE commonDirectory
8+
WHEN IS NOT NULL THEN 0
9+
ELSE length(changeDirectoryPath)
10+
END AS fileDistanceAsFewestChangeDirectoryCommands
11+
SET dependency.fileDistanceAsFewestChangeDirectoryCommands
12+
=fileDistanceAsFewestChangeDirectoryCommands
13+
RETURN fileDistanceAsFewestChangeDirectoryCommands
14+
,count(*) AS numberOfDependencies
15+
,count(DISTINCT source) AS numberOfDependencyUsers
16+
,count(DISTINCT target) AS numberOfDependencyProviders
17+
,collect(source.fileName + ' uses ' + target.fileName)[0..4] AS examples
18+
ORDER BY fileDistanceAsFewestChangeDirectoryCommands

jupyter/InternalDependenciesJava.ipynb

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,27 @@
7070
" return pd.DataFrame([r.values() for r in records], columns=keys)"
7171
]
7272
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"id": "c09da482",
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = 10_000):\n",
81+
" \"\"\"\n",
82+
" Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
83+
" If all given file names result in empty results, the last (empty) result will be returned.\n",
84+
" By additionally specifying \"limit=\" the \"LIMIT\" keyword will appended to query so that only the first results get returned.\n",
85+
" \"\"\" \n",
86+
" result=pd.DataFrame()\n",
87+
" for filename in filenames:\n",
88+
" result=query_cypher_to_data_frame(filename, limit)\n",
89+
" if not result.empty:\n",
90+
" return result\n",
91+
" return result"
92+
]
93+
},
7394
{
7495
"cell_type": "code",
7596
"execution_count": null,
@@ -562,6 +583,27 @@
562583
"annotated_elements=query_cypher_to_data_frame(\"../cypher/Java/Annotated_code_elements.cypher\", limit=30)\n",
563584
"annotated_elements"
564585
]
586+
},
587+
{
588+
"cell_type": "markdown",
589+
"id": "7bf903e2",
590+
"metadata": {},
591+
"source": [
592+
"### Table 10 - Distance distribution between dependent files\n",
593+
"\n",
594+
"This table shows the file directory distance distribution between dependent files. Intuitively, the distance is given by the fewest number of change directory commands needed to navigate between a file and a dependency it uses. Those are aggregate to see how many dependent files are in the same directory, how many are just one change directory command apart, and so on."
595+
]
596+
},
597+
{
598+
"cell_type": "code",
599+
"execution_count": null,
600+
"id": "5b86a804",
601+
"metadata": {},
602+
"outputs": [],
603+
"source": [
604+
"query_first_non_empty_cypher_to_data_frame(\"../cypher/Internal_Dependencies/Get_file_distance_as_shortest_contains_path_for_dependencies.cypher\",\n",
605+
" \"../cypher/Internal_Dependencies/Set_file_distance_as_shortest_contains_path_for_dependencies.cypher\", limit=20)"
606+
]
565607
}
566608
],
567609
"metadata": {

jupyter/InternalDependenciesTypescript.ipynb

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,27 @@
7070
" return pd.DataFrame([r.values() for r in records], columns=keys)"
7171
]
7272
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"id": "bb3646d7",
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = 10_000):\n",
81+
" \"\"\"\n",
82+
" Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
83+
" If all given file names result in empty results, the last (empty) result will be returned.\n",
84+
" By additionally specifying \"limit=\" the \"LIMIT\" keyword will appended to query so that only the first results get returned.\n",
85+
" \"\"\" \n",
86+
" result=pd.DataFrame()\n",
87+
" for filename in filenames:\n",
88+
" result=query_cypher_to_data_frame(filename, limit)\n",
89+
" if not result.empty:\n",
90+
" return result\n",
91+
" return result"
92+
]
93+
},
7394
{
7495
"cell_type": "code",
7596
"execution_count": null,
@@ -404,6 +425,27 @@
404425
"used_packages_of_dependent_artifact=query_cypher_to_data_frame(\"../cypher/Internal_Dependencies/How_many_elements_compared_to_all_existing_are_used_by_dependent_modules_for_Typescript.cypher\",limit=30)\n",
405426
"used_packages_of_dependent_artifact"
406427
]
428+
},
429+
{
430+
"cell_type": "markdown",
431+
"id": "d06d91b7",
432+
"metadata": {},
433+
"source": [
434+
"### Table 3c - Distance distribution between dependent files\n",
435+
"\n",
436+
"This table shows the file directory distance distribution between dependent files. Intuitively, the distance is given by the fewest number of change directory commands needed to navigate between a file and a dependency it uses. Those are aggregate to see how many dependent files are in the same directory, how many are just one change directory command apart, and so on."
437+
]
438+
},
439+
{
440+
"cell_type": "code",
441+
"execution_count": null,
442+
"id": "80166282",
443+
"metadata": {},
444+
"outputs": [],
445+
"source": [
446+
"query_first_non_empty_cypher_to_data_frame(\"../cypher/Internal_Dependencies/Get_file_distance_as_shortest_contains_path_for_dependencies.cypher\",\n",
447+
" \"../cypher/Internal_Dependencies/Set_file_distance_as_shortest_contains_path_for_dependencies.cypher\", limit=20)"
448+
]
407449
}
408450
],
409451
"metadata": {

scripts/reports/InternalDependenciesCsv.sh

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ echo "InternalDependenciesCsv: REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR}"
2121

2222
# Get the "scripts" directory by taking the path of this script and going one directory up.
2323
SCRIPTS_DIR=${SCRIPTS_DIR:-"${REPORTS_SCRIPT_DIR}/.."} # Repository directory containing the shell scripts
24-
echo "InternalDependenciesCsv SCRIPTS_DIR=${SCRIPTS_DIR}"
24+
echo "InternalDependenciesCsv: SCRIPTS_DIR=${SCRIPTS_DIR}"
2525

2626
# Get the "cypher" directory by taking the path of this script and going two directory up and then to "cypher".
2727
CYPHER_DIR=${CYPHER_DIR:-"${REPORTS_SCRIPT_DIR}/../../cypher"}
28-
echo "InternalDependenciesCsv CYPHER_DIR=${CYPHER_DIR}"
28+
echo "InternalDependenciesCsv: CYPHER_DIR=${CYPHER_DIR}"
2929

3030
# Define functions to execute cypher queries from within a given file
3131
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
@@ -39,7 +39,15 @@ mkdir -p "${FULL_REPORT_DIRECTORY}"
3939
CYCLIC_DEPENDENCIES_CYPHER_DIR="${CYPHER_DIR}/Cyclic_Dependencies"
4040
INTERNAL_DEPENDENCIES_CYPHER_DIR="${CYPHER_DIR}/Internal_Dependencies"
4141

42-
# Cyclic Dependencies Java
42+
# Calculate the fewest number of change directory commands needed between dependent files as a distance metric
43+
echo "InternalDependenciesCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Calculating distance between dependent files..."
44+
execute_cypher_queries_until_results "${INTERNAL_DEPENDENCIES_CYPHER_DIR}/Get_file_distance_as_shortest_contains_path_for_dependencies.cypher" \
45+
"${INTERNAL_DEPENDENCIES_CYPHER_DIR}/Set_file_distance_as_shortest_contains_path_for_dependencies.cypher" \
46+
> "${FULL_REPORT_DIRECTORY}/Distance_distribution_between_dependent_files.csv"
47+
48+
# Internal Dependencies for Java
49+
echo "InternalDependenciesCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing internal dependencies for Java..."
50+
4351
execute_cypher "${CYCLIC_DEPENDENCIES_CYPHER_DIR}/Cyclic_Dependencies.cypher" > "${FULL_REPORT_DIRECTORY}/Cyclic_Dependencies.csv"
4452
execute_cypher "${CYCLIC_DEPENDENCIES_CYPHER_DIR}/Cyclic_Dependencies_Breakdown.cypher" > "${FULL_REPORT_DIRECTORY}/Cyclic_Dependencies_Breakdown.csv"
4553
execute_cypher "${CYCLIC_DEPENDENCIES_CYPHER_DIR}/Cyclic_Dependencies_Breakdown_Backward_Only.cypher" > "${FULL_REPORT_DIRECTORY}/Cyclic_Dependencies_Breakdown_Backward_Only.csv"
@@ -53,6 +61,8 @@ execute_cypher "${INTERNAL_DEPENDENCIES_CYPHER_DIR}/How_many_packages_compared_t
5361
execute_cypher "${INTERNAL_DEPENDENCIES_CYPHER_DIR}/How_many_classes_compared_to_all_existing_in_the_same_package_are_used_by_dependent_packages_across_different_artifacts.cypher" > "${FULL_REPORT_DIRECTORY}/ClassesPerPackageUsageAcrossArtifacts.csv"
5462

5563
# Internal Dependencies for TypeScript
64+
echo "InternalDependenciesCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing internal dependencies for TypeScript..."
65+
5666
execute_cypher "${CYCLIC_DEPENDENCIES_CYPHER_DIR}/Cyclic_Dependencies_for_Typescript.cypher" > "${FULL_REPORT_DIRECTORY}/Cyclic_Dependencies_for_Typescript.csv"
5767
execute_cypher "${CYCLIC_DEPENDENCIES_CYPHER_DIR}/Cyclic_Dependencies_Breakdown_for_Typescript.cypher" > "${FULL_REPORT_DIRECTORY}/Cyclic_Dependencies_Breakdown_for_Typescript.csv"
5868
execute_cypher "${CYCLIC_DEPENDENCIES_CYPHER_DIR}/Cyclic_Dependencies_Breakdown_Backward_Only_for_Typescript.cypher" > "${FULL_REPORT_DIRECTORY}/Cyclic_Dependencies_Breakdown_Backward_Only_for_Typescript.csv"
@@ -62,4 +72,6 @@ execute_cypher "${INTERNAL_DEPENDENCIES_CYPHER_DIR}/List_elements_that_are_used_
6272
execute_cypher "${INTERNAL_DEPENDENCIES_CYPHER_DIR}/How_many_elements_compared_to_all_existing_are_used_by_dependent_modules_for_Typescript.cypher" > "${FULL_REPORT_DIRECTORY}/ModuleElementsUsageTypescript.csv"
6373

6474
# Clean-up after report generation. Empty reports will be deleted.
65-
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${FULL_REPORT_DIRECTORY}"
75+
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${FULL_REPORT_DIRECTORY}"
76+
77+
echo "InternalDependenciesCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

0 commit comments

Comments
 (0)