diff --git a/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher b/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher index ee752248a..b4c66604e 100644 --- a/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher +++ b/cypher/GitLog/List_pairwise_changed_files_with_dependencies.cypher @@ -2,7 +2,8 @@ MATCH (firstCodeFile:File)-[dependency:DEPENDS_ON]->(secondCodeFile:File) MATCH (firstCodeFile)-[pairwiseChange:CHANGED_TOGETHER_WITH]-(secondCodeFile) -WHERE elementId(firstCodeFile) < elementId(secondCodeFile) +//De-duplicating the pairs of files isn't necessary, because the dependency relation is directed. +//WHERE elementId(firstCodeFile) < elementId(secondCodeFile) WITH firstCodeFile.fileName AS firstFileName ,secondCodeFile.fileName AS secondFileName ,coalesce(dependency.weight, dependency.cardinality) AS dependencyWeight diff --git a/jupyter/GitHistoryGeneral.ipynb b/jupyter/GitHistoryGeneral.ipynb index 4010bb5b0..6192d4ce3 100644 --- a/jupyter/GitHistoryGeneral.ipynb +++ b/jupyter/GitHistoryGeneral.ipynb @@ -1338,15 +1338,26 @@ "display(pairwise_changed_git_files_with_dependencies.corr(method='pearson'))\n", "\n", "display(\"Pairwise changed git files compared to dependency weights - Spearman Correlation\")\n", - "display(pairwise_changed_git_files_with_dependencies.corr(method='spearman'))\n", - "\n", - "from scipy.stats import pearsonr, spearmanr\n", + "display(pairwise_changed_git_files_with_dependencies.corr(method='spearman'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a4ae651", + "metadata": {}, + "outputs": [], + "source": [ + "if pairwise_changed_git_files_with_dependencies.shape[0] < 5:\n", + " print(\"Less than 5 samples are not enough to calculate p-values\")\n", + "else:\n", + " from scipy.stats import pearsonr, spearmanr\n", "\n", - "display(\"Pearson Correlation with p-value for commitCount and dependencyWeight\")\n", - "display(pearsonr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))\n", + " display(\"Pearson Correlation with p-value for commitCount and dependencyWeight\")\n", + " display(pearsonr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))\n", "\n", - "display(\"Spearman Correlation with p-value for commitCount and dependencyWeight\")\n", - "display(spearmanr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))" + " display(\"Spearman Correlation with p-value for commitCount and dependencyWeight\")\n", + " display(spearmanr(pairwise_changed_git_files_with_dependencies['commitCount'], pairwise_changed_git_files_with_dependencies['dependencyWeight']))" ] }, {