|
91 | 91 | "source": [ |
92 | 92 | "def get_cypher_query_from_file(cypher_file_name : str):\n", |
93 | 93 | " with open(cypher_file_name) as file:\n", |
94 | | - " return ' '.join(file.readlines())" |
95 | | - ] |
96 | | - }, |
97 | | - { |
98 | | - "cell_type": "code", |
99 | | - "execution_count": null, |
100 | | - "id": "59310f6f", |
101 | | - "metadata": {}, |
102 | | - "outputs": [], |
103 | | - "source": [ |
| 94 | + " return ' '.join(file.readlines())\n", |
| 95 | + "\n", |
| 96 | + "\n", |
104 | 97 | "def query_cypher_to_data_frame(filename : str, limit: int = -1):\n", |
105 | 98 | " \"\"\"\n", |
106 | 99 | " Execute the Cypher query of the given file and returns the result.\n", |
|
111 | 104 | " if limit > 0:\n", |
112 | 105 | " cypher_query = \"{query}\\nLIMIT {row_limit}\".format(query = cypher_query, row_limit = limit)\n", |
113 | 106 | " records, summary, keys = driver.execute_query(cypher_query)\n", |
114 | | - " return pd.DataFrame([r.values() for r in records], columns=keys)" |
115 | | - ] |
116 | | - }, |
117 | | - { |
118 | | - "cell_type": "code", |
119 | | - "execution_count": null, |
120 | | - "id": "c09da482", |
121 | | - "metadata": {}, |
122 | | - "outputs": [], |
123 | | - "source": [ |
| 107 | + " return pd.DataFrame([r.values() for r in records], columns=keys)\n", |
| 108 | + "\n", |
| 109 | + "\n", |
124 | 110 | "def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = -1):\n", |
125 | 111 | " \"\"\"\n", |
126 | 112 | " Executes the Cypher queries of the given files and returns the first result that is not empty.\n", |
|
632 | 618 | "### Main author per directory" |
633 | 619 | ] |
634 | 620 | }, |
635 | | - { |
636 | | - "cell_type": "code", |
637 | | - "execution_count": null, |
638 | | - "id": "29069753", |
639 | | - "metadata": {}, |
640 | | - "outputs": [], |
641 | | - "source": [ |
642 | | - "# TODO delete unused code" |
643 | | - ] |
644 | | - }, |
645 | | - { |
646 | | - "cell_type": "raw", |
647 | | - "id": "7ccca44e", |
648 | | - "metadata": {}, |
649 | | - "source": [ |
650 | | - "# TODO experiment again with plotly express\n", |
651 | | - "\n", |
652 | | - "import plotly.express as plotly_express\n", |
653 | | - "\n", |
654 | | - "plotly_treemap_color_settings = dict(\n", |
655 | | - " color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n", |
656 | | - " color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n", |
657 | | - ")\n", |
658 | | - "plotly_treemap_commit_statistics_custom_data= dict(\n", |
659 | | - " custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n", |
660 | | - ")\n", |
661 | | - "plotly_treemap_traces_base_settings = dict(\n", |
662 | | - " root_color=\"lightgrey\",\n", |
663 | | - " textinfo=\"label+value\",\n", |
664 | | - " marker=dict(cornerradius=5),\n", |
665 | | - ")\n", |
666 | | - "plotly_treemap_traces_commit_statistics_settings = dict(\n", |
667 | | - " **plotly_treemap_traces_base_settings,\n", |
668 | | - " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n", |
669 | | - ")\n", |
670 | | - "plotly_treemap_layout_base_settings = dict(\n", |
671 | | - " margin=dict(t=50, l=15, r=15, b=15),\n", |
672 | | - ")\n", |
673 | | - "\n", |
674 | | - "# Extract unique authors for category orders\n", |
675 | | - "#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n", |
676 | | - "\n", |
677 | | - "figure = plotly_express.treemap(\n", |
678 | | - " git_files_with_commit_statistics,\n", |
679 | | - " **plotly_treemap_color_settings,\n", |
680 | | - " **plotly_treemap_commit_statistics_custom_data,\n", |
681 | | - " ids='directoryPath',\n", |
682 | | - " names='directoryName',\n", |
683 | | - " parents='directoryParentPath',\n", |
684 | | - " # Without values, much more squares are shown which gives a much better overview\n", |
685 | | - " # values='fileCount', \n", |
686 | | - " color='mainAuthor',\n", |
687 | | - " title='Directories and their main author (discrete coloring, no legend?)',\n", |
688 | | - ")\n", |
689 | | - "figure.update_traces(\n", |
690 | | - " **plotly_treemap_traces_commit_statistics_settings,\n", |
691 | | - ")\n", |
692 | | - "figure.update_layout(\n", |
693 | | - " **plotly_treemap_layout_base_settings,\n", |
694 | | - " # coloraxis_colorbar=dict(title=\"Author\"),\n", |
695 | | - " legend_title_text='Main Author',\n", |
696 | | - " showlegend=True,\n", |
697 | | - " legend_visible=True,\n", |
698 | | - ") \n", |
699 | | - "\n", |
700 | | - "figure.show(**plotly_treemap_figure_show_settings)" |
701 | | - ] |
702 | | - }, |
703 | 621 | { |
704 | 622 | "cell_type": "code", |
705 | 623 | "execution_count": null, |
|
994 | 912 | ")\n", |
995 | 913 | "figure.show(**plotly_treemap_figure_show_settings)" |
996 | 914 | ] |
| 915 | + }, |
| 916 | + { |
| 917 | + "cell_type": "markdown", |
| 918 | + "id": "14e87aff", |
| 919 | + "metadata": {}, |
| 920 | + "source": [ |
| 921 | + "## WordCloud of git authors" |
| 922 | + ] |
| 923 | + }, |
| 924 | + { |
| 925 | + "cell_type": "code", |
| 926 | + "execution_count": null, |
| 927 | + "id": "e2f68f02", |
| 928 | + "metadata": {}, |
| 929 | + "outputs": [], |
| 930 | + "source": [ |
| 931 | + "# Query data from graph database\n", |
| 932 | + "git_author_words_with_frequency = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_git_author_Wordcloud_with_frequency.cypher\")\n", |
| 933 | + "# Debug \n", |
| 934 | + "# display(git_author_words_with_frequency.head(10))" |
| 935 | + ] |
| 936 | + }, |
| 937 | + { |
| 938 | + "cell_type": "code", |
| 939 | + "execution_count": null, |
| 940 | + "id": "d83ce5f4", |
| 941 | + "metadata": {}, |
| 942 | + "outputs": [], |
| 943 | + "source": [ |
| 944 | + "from wordcloud import WordCloud\n", |
| 945 | + "import matplotlib.pyplot as plot\n", |
| 946 | + "\n", |
| 947 | + "if not git_author_words_with_frequency.empty:\n", |
| 948 | + " # Expects the first column of the DataFrame to contain the words/text and the second column to contain the count/frequency.\n", |
| 949 | + " words_with_frequency_dict=git_author_words_with_frequency.set_index(git_author_words_with_frequency.columns[0]).to_dict()[git_author_words_with_frequency.columns[1]]\n", |
| 950 | + " wordcloud = WordCloud(\n", |
| 951 | + " width=800, \n", |
| 952 | + " height=800,\n", |
| 953 | + " max_words=600, \n", |
| 954 | + " collocations=False,\n", |
| 955 | + " background_color='white', \n", |
| 956 | + " colormap='viridis'\n", |
| 957 | + " ).generate_from_frequencies(words_with_frequency_dict)\n", |
| 958 | + "\n", |
| 959 | + " # Plot the word cloud\n", |
| 960 | + " plot.figure(figsize=(15,15))\n", |
| 961 | + " plot.imshow(wordcloud, interpolation='bilinear')\n", |
| 962 | + " plot.axis(\"off\")\n", |
| 963 | + " plot.title('Wordcloud of git authors')\n", |
| 964 | + " plot.show()" |
| 965 | + ] |
997 | 966 | } |
998 | 967 | ], |
999 | 968 | "metadata": { |
|
0 commit comments