Optimize object oriented metrics report

JohT · JohT · commit be6330d30d20 · 2023-09-13T08:12:31.000+02:00
diff --git a/jupyter/ObjectOrientedDesignMetrics.ipynb b/jupyter/ObjectOrientedDesignMetrics.ipynb
@@ -15,7 +15,7 @@
     "- [jqassistant](https://jqassistant.org)\n",
     "- [notebook walks through examples for integrating various packages with Neo4j](https://nicolewhite.github.io/neo4j-jupyter/hello-world.html)\n",
     "- [OO Design Quality Metrics](https://api.semanticscholar.org/CorpusID:18246616)\n",
-    "- [py2neo](https://py2neo.org/2021.1/)"
+    "- [Neo4j Python Driver](https://neo4j.com/docs/api/python-driver/current)"
    ]
   },
   {
@@ -100,29 +100,6 @@
     "</style>"
    ]
   },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "91d80bf7",
-   "metadata": {},
-   "source": [
-    "## Artifacts\n",
-    "\n",
-    "#### Table 1\n",
-    "\n",
-    "- List all the artifacts this notebook is based on"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dc682db6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "query_cypher_to_data_frame(\"../cypher/List_all_existing_artifacts.cypher\")"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -256,7 +233,7 @@
     "\n",
     "#### Table 6\n",
     "\n",
-    "- Show the top 20 packages with the highest distance from the \"main sequence\""
+    "- Show the top 30 packages with the highest distance from the \"main sequence\""
    ]
   },
   {
@@ -267,7 +244,7 @@
    "outputs": [],
    "source": [
     "instabilityPerAbstractness = query_cypher_to_data_frame(\"../cypher/Metrics/Calculate_distance_between_abstractness_and_instability.cypher\")\n",
-    "instabilityPerAbstractness.head(20)"
+    "instabilityPerAbstractness.head(30)"
    ]
   },
   {
@@ -295,14 +272,24 @@
     "# Function that returns the number of past (index smaller than given index) rows \n",
     "# with the same value in columnName1 and columnName2\n",
     "# If there was a row with the same columnName1 and columnName2 values\n",
-    "def countPastEntriesWithSameValues(dataFrame, index, columnName1, columnName2):\n",
-    "    columnValue1 = dataFrame[columnName1][index]\n",
-    "    columnValue2 = dataFrame[columnName2][index]\n",
-    "    return len(dataFrame[\n",
-    "        (dataFrame.index.isin(range(0, index + 1))) & \n",
-    "        (dataFrame[columnName1]==columnValue1) & \n",
-    "        (dataFrame[columnName2]==columnValue2)\n",
-    "    ]) - 1"
+    "# def countPastEntriesWithSameValues(dataFrame, index, columnName1, columnName2):\n",
+    "#     columnValue1 = dataFrame[columnName1][index]\n",
+    "#     columnValue2 = dataFrame[columnName2][index]\n",
+    "#     return len(dataFrame[\n",
+    "#         (dataFrame.index.isin(range(0, index + 1))) & \n",
+    "#         (dataFrame[columnName1]==columnValue1) & \n",
+    "#         (dataFrame[columnName2]==columnValue2)\n",
+    "#     ]) - 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "36d8cf50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "instabilityPerAbstractness.packageName[0]"
    ]
   },
   {
@@ -312,6 +299,37 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "def annotate_plot(data_frame: pd.DataFrame, index: int):\n",
+    "    \"\"\"\n",
+    "    Annotates the data points identified by the \"index\" in the plot of the \"data_frame\" \n",
+    "    \"\"\"\n",
+    "    x_position = data_frame.abstractness[index].item()\n",
+    "    y_position = data_frame.instability[index].item()\n",
+    "    artifact_name = data_frame.artifactName[index].item()\n",
+    "    package_name = data_frame.packageName[index].item()\n",
+    "\n",
+    "    label_box=dict(boxstyle=\"round4,pad=0.5\", fc=\"w\", alpha=0.8)\n",
+    "    plot.annotate(artifact_name + '\\n' + package_name\n",
+    "        ,xy=(x_position, y_position)\n",
+    "        ,xycoords='data'\n",
+    "        ,xytext=(20, 0)\n",
+    "        ,textcoords='offset points'\n",
+    "        ,size=6\n",
+    "        ,bbox=label_box\n",
+    "        ,arrowprops=dict(arrowstyle=\"-|>\", mutation_scale=10, color=\"black\")\n",
+    "    )\n",
+    "\n",
+    "def index_of_sorted(data_frame: pd.DataFrame, highest: list[str] = []):\n",
+    "    \"\"\"\n",
+    "    Sorts the \"data_frame\" by columns 'abstractness','instability','typesInPackage', 'artifactName'\n",
+    "    and returns the index of the first row.\n",
+    "    Columns that are contained in the list of strings parameter \"highest\" will be sorted descending.\n",
+    "    \"\"\"\n",
+    "    by = ['abstractness','instability','typesInPackage', 'artifactName']\n",
+    "    ascending = [('abstractness' not in highest), ('instability' not in highest), False, True]\n",
+    "    return data_frame.sort_values(by=by, ascending=ascending).head(1).index\n",
+    "\n",
+    "\n",
     "# data points scaled by the number of types and colored by the distance to the \"main sequence\"\n",
     "plot.scatter(\n",
     "    instabilityPerAbstractness.abstractness, # x axis shows abstractness\n",
@@ -323,22 +341,26 @@
     "# green \"main sequence\" line\n",
     "plot.plot([0,1], [1,0], c='lightgreen', linestyle='dashed') \n",
     "\n",
-    "# add the packagenames to the those with the 15 highest distance values\n",
-    "distanceAnnotationThreshold = instabilityPerAbstractness.distance.nlargest(15).iloc[-1]\n",
-    "# (variant) highest 15% (quantile) of all distance values\n",
-    "# distanceAnnotationThreshold = instabilityPerAbstractness.distance.quantile(0.85)\n",
-    "for i, name in enumerate(instabilityPerAbstractness.packageName):\n",
-    "    if (instabilityPerAbstractness.distance[i] >= distanceAnnotationThreshold):\n",
-    "        x_position = instabilityPerAbstractness.abstractness[i]\n",
-    "        y_position = instabilityPerAbstractness.instability[i]\n",
-    "        # To overcome overlapping text annotations for multiple data points on the same position, \n",
-    "        # entries with same position values in the past indizes are count and used to offset the y-position\n",
-    "        # so that multiple names are written underneath each other.\n",
-    "        alreadyExistingPositions = countPastEntriesWithSameValues(instabilityPerAbstractness, i, 'abstractness', 'instability')\n",
-    "        y_position = y_position - alreadyExistingPositions / len(instabilityPerAbstractness) * 2\n",
-    "        \n",
-    "        plot.annotate(name, (x_position, y_position), size=6)\n",
-    "            \n",
+    "# Annotate largest package with the highest abstractness and instability\n",
+    "annotation_index = index_of_sorted(highest=['abstractness','instability'], data_frame=instabilityPerAbstractness)\n",
+    "annotate_plot(instabilityPerAbstractness, annotation_index)\n",
+    "\n",
+    "# Annotate largest package with the lowest abstractness and highest instability\n",
+    "annotation_index = index_of_sorted(highest=['instability'], data_frame=instabilityPerAbstractness)\n",
+    "annotate_plot(instabilityPerAbstractness, annotation_index)\n",
+    "\n",
+    "# Annotate largest package with the lowest abstractness and lowest instability\n",
+    "annotation_index = index_of_sorted(highest=[], data_frame=instabilityPerAbstractness)\n",
+    "annotate_plot(instabilityPerAbstractness, annotation_index)\n",
+    "\n",
+    "# Annotate largest package with the highest abstractness and lowest instability\n",
+    "annotation_index = index_of_sorted(highest=['abstractness'], data_frame=instabilityPerAbstractness)\n",
+    "annotate_plot(instabilityPerAbstractness, annotation_index)\n",
+    "\n",
+    "# Annotate largest packages with the highest abstractness and instability near 0.5% \n",
+    "annotation_index = index_of_sorted(highest=['abstractness', 'instability'], data_frame=instabilityPerAbstractness.query('abstractness <= 0.5 & instability <= 0.5'))\n",
+    "annotate_plot(instabilityPerAbstractness, annotation_index)\n",
+    "\n",
     "plot.title('Abstractness vs. Instability (\"Main Sequence\")')\n",
     "plot.xlabel('Abstractness')\n",
     "plot.ylabel('Instability')\n",