Skip to content

Commit de7480e

Browse files
committed
Optimize object oriented metrics report
1 parent 6ef4b4c commit de7480e

File tree

1 file changed

+72
-50
lines changed

1 file changed

+72
-50
lines changed

jupyter/ObjectOrientedDesignMetrics.ipynb

Lines changed: 72 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"- [jqassistant](https://jqassistant.org)\n",
1616
"- [notebook walks through examples for integrating various packages with Neo4j](https://nicolewhite.github.io/neo4j-jupyter/hello-world.html)\n",
1717
"- [OO Design Quality Metrics](https://api.semanticscholar.org/CorpusID:18246616)\n",
18-
"- [py2neo](https://py2neo.org/2021.1/)"
18+
"- [Neo4j Python Driver](https://neo4j.com/docs/api/python-driver/current)"
1919
]
2020
},
2121
{
@@ -100,29 +100,6 @@
100100
"</style>"
101101
]
102102
},
103-
{
104-
"attachments": {},
105-
"cell_type": "markdown",
106-
"id": "91d80bf7",
107-
"metadata": {},
108-
"source": [
109-
"## Artifacts\n",
110-
"\n",
111-
"#### Table 1\n",
112-
"\n",
113-
"- List all the artifacts this notebook is based on"
114-
]
115-
},
116-
{
117-
"cell_type": "code",
118-
"execution_count": null,
119-
"id": "dc682db6",
120-
"metadata": {},
121-
"outputs": [],
122-
"source": [
123-
"query_cypher_to_data_frame(\"../cypher/List_all_existing_artifacts.cypher\")"
124-
]
125-
},
126103
{
127104
"attachments": {},
128105
"cell_type": "markdown",
@@ -256,7 +233,7 @@
256233
"\n",
257234
"#### Table 6\n",
258235
"\n",
259-
"- Show the top 20 packages with the highest distance from the \"main sequence\""
236+
"- Show the top 30 packages with the highest distance from the \"main sequence\""
260237
]
261238
},
262239
{
@@ -267,7 +244,7 @@
267244
"outputs": [],
268245
"source": [
269246
"instabilityPerAbstractness = query_cypher_to_data_frame(\"../cypher/Metrics/Calculate_distance_between_abstractness_and_instability.cypher\")\n",
270-
"instabilityPerAbstractness.head(20)"
247+
"instabilityPerAbstractness.head(30)"
271248
]
272249
},
273250
{
@@ -295,14 +272,24 @@
295272
"# Function that returns the number of past (index smaller than given index) rows \n",
296273
"# with the same value in columnName1 and columnName2\n",
297274
"# If there was a row with the same columnName1 and columnName2 values\n",
298-
"def countPastEntriesWithSameValues(dataFrame, index, columnName1, columnName2):\n",
299-
" columnValue1 = dataFrame[columnName1][index]\n",
300-
" columnValue2 = dataFrame[columnName2][index]\n",
301-
" return len(dataFrame[\n",
302-
" (dataFrame.index.isin(range(0, index + 1))) & \n",
303-
" (dataFrame[columnName1]==columnValue1) & \n",
304-
" (dataFrame[columnName2]==columnValue2)\n",
305-
" ]) - 1"
275+
"# def countPastEntriesWithSameValues(dataFrame, index, columnName1, columnName2):\n",
276+
"# columnValue1 = dataFrame[columnName1][index]\n",
277+
"# columnValue2 = dataFrame[columnName2][index]\n",
278+
"# return len(dataFrame[\n",
279+
"# (dataFrame.index.isin(range(0, index + 1))) & \n",
280+
"# (dataFrame[columnName1]==columnValue1) & \n",
281+
"# (dataFrame[columnName2]==columnValue2)\n",
282+
"# ]) - 1"
283+
]
284+
},
285+
{
286+
"cell_type": "code",
287+
"execution_count": null,
288+
"id": "36d8cf50",
289+
"metadata": {},
290+
"outputs": [],
291+
"source": [
292+
"instabilityPerAbstractness.packageName[0]"
306293
]
307294
},
308295
{
@@ -312,6 +299,37 @@
312299
"metadata": {},
313300
"outputs": [],
314301
"source": [
302+
"def annotate_plot(data_frame: pd.DataFrame, index: int):\n",
303+
" \"\"\"\n",
304+
" Annotates the data points identified by the \"index\" in the plot of the \"data_frame\" \n",
305+
" \"\"\"\n",
306+
" x_position = data_frame.abstractness[index].item()\n",
307+
" y_position = data_frame.instability[index].item()\n",
308+
" artifact_name = data_frame.artifactName[index].item()\n",
309+
" package_name = data_frame.packageName[index].item()\n",
310+
"\n",
311+
" label_box=dict(boxstyle=\"round4,pad=0.5\", fc=\"w\", alpha=0.8)\n",
312+
" plot.annotate(artifact_name + '\\n' + package_name\n",
313+
" ,xy=(x_position, y_position)\n",
314+
" ,xycoords='data'\n",
315+
" ,xytext=(20, 0)\n",
316+
" ,textcoords='offset points'\n",
317+
" ,size=6\n",
318+
" ,bbox=label_box\n",
319+
" ,arrowprops=dict(arrowstyle=\"-|>\", mutation_scale=10, color=\"black\")\n",
320+
" )\n",
321+
"\n",
322+
"def index_of_sorted(data_frame: pd.DataFrame, highest: list[str] = []):\n",
323+
" \"\"\"\n",
324+
" Sorts the \"data_frame\" by columns 'abstractness','instability','typesInPackage', 'artifactName'\n",
325+
" and returns the index of the first row.\n",
326+
" Columns that are contained in the list of strings parameter \"highest\" will be sorted descending.\n",
327+
" \"\"\"\n",
328+
" by = ['abstractness','instability','typesInPackage', 'artifactName']\n",
329+
" ascending = [('abstractness' not in highest), ('instability' not in highest), False, True]\n",
330+
" return data_frame.sort_values(by=by, ascending=ascending).head(1).index\n",
331+
"\n",
332+
"\n",
315333
"# data points scaled by the number of types and colored by the distance to the \"main sequence\"\n",
316334
"plot.scatter(\n",
317335
" instabilityPerAbstractness.abstractness, # x axis shows abstractness\n",
@@ -323,22 +341,26 @@
323341
"# green \"main sequence\" line\n",
324342
"plot.plot([0,1], [1,0], c='lightgreen', linestyle='dashed') \n",
325343
"\n",
326-
"# add the packagenames to the those with the 15 highest distance values\n",
327-
"distanceAnnotationThreshold = instabilityPerAbstractness.distance.nlargest(15).iloc[-1]\n",
328-
"# (variant) highest 15% (quantile) of all distance values\n",
329-
"# distanceAnnotationThreshold = instabilityPerAbstractness.distance.quantile(0.85)\n",
330-
"for i, name in enumerate(instabilityPerAbstractness.packageName):\n",
331-
" if (instabilityPerAbstractness.distance[i] >= distanceAnnotationThreshold):\n",
332-
" x_position = instabilityPerAbstractness.abstractness[i]\n",
333-
" y_position = instabilityPerAbstractness.instability[i]\n",
334-
" # To overcome overlapping text annotations for multiple data points on the same position, \n",
335-
" # entries with same position values in the past indizes are count and used to offset the y-position\n",
336-
" # so that multiple names are written underneath each other.\n",
337-
" alreadyExistingPositions = countPastEntriesWithSameValues(instabilityPerAbstractness, i, 'abstractness', 'instability')\n",
338-
" y_position = y_position - alreadyExistingPositions / len(instabilityPerAbstractness) * 2\n",
339-
" \n",
340-
" plot.annotate(name, (x_position, y_position), size=6)\n",
341-
" \n",
344+
"# Annotate largest package with the highest abstractness and instability\n",
345+
"annotation_index = index_of_sorted(highest=['abstractness','instability'], data_frame=instabilityPerAbstractness)\n",
346+
"annotate_plot(instabilityPerAbstractness, annotation_index)\n",
347+
"\n",
348+
"# Annotate largest package with the lowest abstractness and highest instability\n",
349+
"annotation_index = index_of_sorted(highest=['instability'], data_frame=instabilityPerAbstractness)\n",
350+
"annotate_plot(instabilityPerAbstractness, annotation_index)\n",
351+
"\n",
352+
"# Annotate largest package with the lowest abstractness and lowest instability\n",
353+
"annotation_index = index_of_sorted(highest=[], data_frame=instabilityPerAbstractness)\n",
354+
"annotate_plot(instabilityPerAbstractness, annotation_index)\n",
355+
"\n",
356+
"# Annotate largest package with the highest abstractness and lowest instability\n",
357+
"annotation_index = index_of_sorted(highest=['abstractness'], data_frame=instabilityPerAbstractness)\n",
358+
"annotate_plot(instabilityPerAbstractness, annotation_index)\n",
359+
"\n",
360+
"# Annotate largest packages with the highest abstractness and instability near 0.5% \n",
361+
"annotation_index = index_of_sorted(highest=['abstractness', 'instability'], data_frame=instabilityPerAbstractness.query('abstractness <= 0.5 & instability <= 0.5'))\n",
362+
"annotate_plot(instabilityPerAbstractness, annotation_index)\n",
363+
"\n",
342364
"plot.title('Abstractness vs. Instability (\"Main Sequence\")')\n",
343365
"plot.xlabel('Abstractness')\n",
344366
"plot.ylabel('Instability')\n",

0 commit comments

Comments
 (0)