3030
3131from visualization import plot_annotation_style , annotate_each , annotate_each_with_index , scale_marker_sizes , zoom_into_center , zoom_into_center_while_preserving_scores_above_threshold , zoom_into_center_while_preserving_top_scores
3232
33+
3334class Parameters :
3435 required_parameters_ = ["projection_node_label" ]
3536
@@ -88,9 +89,20 @@ def example(cls):
8889 def get_query_parameters (self ) -> typing .Dict [str , str ]:
8990 return self .query_parameters_ .copy () # copy enforces immutability
9091
91- def get_projection_node_label (self ) -> str :
92+ def __get_projection_node_label (self ) -> str :
9293 return self .query_parameters_ ["projection_node_label" ]
9394
95+ def __is_code_language_available (self ) -> bool :
96+ return "projection_language" in self .query_parameters_
97+
98+ def __get_projection_language (self ) -> str :
99+ return self .query_parameters_ ["projection_language" ] if self .__is_code_language_available () else ""
100+
101+ def get_plot_prefix (self ) -> str :
102+ if self .__is_code_language_available ():
103+ return self .__get_projection_language () + " " + self .__get_projection_node_label ()
104+ return self .__get_projection_node_label ()
105+
94106 def get_report_directory (self ) -> str :
95107 return self .report_directory
96108
@@ -451,7 +463,7 @@ def plot_clustering_coefficient_vs_page_rank(
451463
452464 common_column_names_for_annotations = {
453465 "name_column" : 'shortName' ,
454- "x_position_column" : 'clusteringCoefficient' ,
466+ "x_position_column" : 'clusteringCoefficient' ,
455467 "y_position_column" : 'pageRank'
456468 }
457469
@@ -461,8 +473,8 @@ def plot_clustering_coefficient_vs_page_rank(
461473 threshold_page_rank = mean_page_rank + 1.5 * standard_deviation_page_rank
462474 significant_points = combined_data [combined_data ['pageRank' ] > threshold_page_rank ].sort_values (by = 'pageRank' , ascending = False ).reset_index (drop = True ).head (10 )
463475 annotate_each_with_index (
464- significant_points ,
465- using = plot .annotate ,
476+ significant_points ,
477+ using = plot .annotate ,
466478 value_column = 'pageRank' ,
467479 ** common_column_names_for_annotations
468480 )
@@ -473,8 +485,8 @@ def plot_clustering_coefficient_vs_page_rank(
473485 top_clustering_coefficients = combined_data .sort_values (by = 'clusteringCoefficient' , ascending = False ).reset_index (drop = True ).head (20 )
474486 top_clustering_coefficients = top_clustering_coefficients .sort_values (by = 'pageRank' , ascending = True ).reset_index (drop = True ).head (5 )
475487 annotate_each_with_index (
476- top_clustering_coefficients ,
477- using = plot .annotate ,
488+ top_clustering_coefficients ,
489+ using = plot .annotate ,
478490 value_column = 'clusteringCoefficient' ,
479491 ** common_column_names_for_annotations
480492 )
@@ -508,9 +520,9 @@ def plot_clusters(
508520 # Setup columns
509521 node_size_column = centrality_column_name
510522
511- clustering_visualization_dataframe_zoomed = zoom_into_center (
512- clustering_visualization_dataframe ,
513- x_position_column ,
523+ clustering_visualization_dataframe_zoomed = zoom_into_center (
524+ clustering_visualization_dataframe ,
525+ x_position_column ,
514526 y_position_column
515527 )
516528
@@ -617,10 +629,10 @@ def plot_clusters_probabilities(
617629 print ("No projected data to plot available" )
618630 return
619631
620- clustering_visualization_dataframe_zoomed = zoom_into_center_while_preserving_top_scores (
621- clustering_visualization_dataframe ,
622- x_position_column ,
623- y_position_column ,
632+ clustering_visualization_dataframe_zoomed = zoom_into_center_while_preserving_top_scores (
633+ clustering_visualization_dataframe ,
634+ x_position_column ,
635+ y_position_column ,
624636 cluster_probability_column ,
625637 annotate_n_lowest_probabilities ,
626638 lowest_scores = True
@@ -636,7 +648,7 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
636648 "y" : data [y_position_column ],
637649 "s" : data [size_column + '_scaled' ],
638650 }
639-
651+
640652 cluster_noise = clustering_visualization_dataframe_zoomed [clustering_visualization_dataframe_zoomed [cluster_label_column ] == - 1 ]
641653 cluster_non_noise = clustering_visualization_dataframe_zoomed [clustering_visualization_dataframe_zoomed [cluster_label_column ] != - 1 ]
642654 cluster_even_labels = clustering_visualization_dataframe_zoomed [clustering_visualization_dataframe_zoomed [cluster_label_column ] % 2 == 0 ]
@@ -800,7 +812,7 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
800812# ------------------------------------------------------------------------------------------------------------
801813
802814parameters = parse_input_parameters ()
803- plot_type = parameters .get_projection_node_label ()
815+ plot_prefix = parameters .get_plot_prefix ()
804816report_directory = parameters .get_report_directory ()
805817
806818driver = get_graph_database_driver ()
@@ -813,31 +825,31 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
813825 data ['pageRank' ],
814826 data ['articleRank' ],
815827 data ['shortCodeUnitName' ],
816- title = f"{ plot_type } distribution of PageRank - ArticleRank differences" ,
817- plot_file_path = get_file_path (f"{ plot_type } _PageRank_Minus_ArticleRank_Distribution" , parameters )
828+ title = f"{ plot_prefix } distribution of PageRank - ArticleRank differences" ,
829+ plot_file_path = get_file_path (f"{ plot_prefix } _PageRank_Minus_ArticleRank_Distribution" , parameters )
818830)
819831
820832plot_clustering_coefficient_distribution (
821833 data ['clusteringCoefficient' ],
822- title = f"{ plot_type } distribution of clustering coefficients" ,
823- plot_file_path = get_file_path (f"{ plot_type } _ClusteringCoefficient_distribution" , parameters )
834+ title = f"{ plot_prefix } distribution of clustering coefficients" ,
835+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusteringCoefficient_distribution" , parameters )
824836)
825837
826838plot_clustering_coefficient_vs_page_rank (
827839 data ['clusteringCoefficient' ],
828840 data ['pageRank' ],
829841 data ['shortCodeUnitName' ],
830842 data ['clusterNoise' ],
831- title = f"{ plot_type } clustering coefficient versus PageRank" ,
832- plot_file_path = get_file_path (f"{ plot_type } _ClusteringCoefficient_versus_PageRank" , parameters )
843+ title = f"{ plot_prefix } clustering coefficient versus PageRank" ,
844+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusteringCoefficient_versus_PageRank" , parameters )
833845)
834846
835847if (overall_cluster_count < 20 ):
836848 print (f"anomalyDetectionFeaturePlots: Less than 20 clusters: { overall_cluster_count } . Only one plot containing all clusters will be created." )
837849 plot_clusters (
838850 clustering_visualization_dataframe = data ,
839- title = f"{ plot_type } all clusters overall (less than 20)" ,
840- plot_file_path = get_file_path (f"{ plot_type } _Clusters_Overall" , parameters )
851+ title = f"{ plot_prefix } all clusters overall (less than 20)" ,
852+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_Overall" , parameters )
841853 )
842854else :
843855 print (f"anomalyDetectionFeaturePlots: More than 20 clusters: { overall_cluster_count } . Different plots focussing on different features like cluster size will be created." )
@@ -846,57 +858,57 @@ def get_common_plot_parameters(data: pd.DataFrame) -> dict:
846858 )
847859 plot_clusters (
848860 clustering_visualization_dataframe = clusters_by_largest_size ,
849- title = f"{ plot_type } clusters with the largest size" ,
850- plot_file_path = get_file_path (f"{ plot_type } _Clusters_largest_size" , parameters )
861+ title = f"{ plot_prefix } clusters with the largest size" ,
862+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_largest_size" , parameters )
851863 )
852864
853865 clusters_by_largest_max_radius = get_clusters_by_criteria (
854866 data , by = 'clusterRadiusMax' , ascending = False , cluster_count = 20
855867 )
856868 plot_clusters (
857869 clustering_visualization_dataframe = clusters_by_largest_max_radius ,
858- title = f"{ plot_type } clusters with the largest max radius" ,
859- plot_file_path = get_file_path (f"{ plot_type } _Clusters_largest_max_radius" , parameters )
870+ title = f"{ plot_prefix } clusters with the largest max radius" ,
871+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_largest_max_radius" , parameters )
860872 )
861873
862874 clusters_by_largest_average_radius = get_clusters_by_criteria (
863875 data , by = 'clusterRadiusAverage' , ascending = False , cluster_count = 20
864876 )
865877 plot_clusters (
866878 clustering_visualization_dataframe = clusters_by_largest_average_radius ,
867- title = f"{ plot_type } clusters with the largest average radius" ,
868- plot_file_path = get_file_path (f"{ plot_type } _Clusters_largest_average_radius" , parameters )
879+ title = f"{ plot_prefix } clusters with the largest average radius" ,
880+ plot_file_path = get_file_path (f"{ plot_prefix } _Clusters_largest_average_radius" , parameters )
869881 )
870882
871883plot_clusters_probabilities (
872- clustering_visualization_dataframe = data ,
873- title = f"{ plot_type } clustering probabilities (red=high uncertainty)" ,
874- plot_file_path = get_file_path (f"{ plot_type } _Cluster_probabilities" , parameters )
884+ clustering_visualization_dataframe = data ,
885+ title = f"{ plot_prefix } clustering probabilities (red=high uncertainty)" ,
886+ plot_file_path = get_file_path (f"{ plot_prefix } _Cluster_probabilities" , parameters )
875887)
876888
877889plot_cluster_noise (
878890 clustering_visualization_dataframe = data ,
879- title = f"{ plot_type } clustering noise points that are surprisingly central (red) or popular (size)" ,
891+ title = f"{ plot_prefix } clustering noise points that are surprisingly central (red) or popular (size)" ,
880892 size_column_name = 'degree' ,
881893 color_column_name = 'pageRank' ,
882- plot_file_path = get_file_path (f"{ plot_type } _ClusterNoise_highly_central_and_popular" , parameters )
894+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusterNoise_highly_central_and_popular" , parameters )
883895)
884896
885897plot_cluster_noise (
886898 clustering_visualization_dataframe = data ,
887- title = f"{ plot_type } clustering noise points that bridge flow (red) and are poorly integrated (size)" ,
899+ title = f"{ plot_prefix } clustering noise points that bridge flow (red) and are poorly integrated (size)" ,
888900 size_column_name = 'inverseClusteringCoefficient' ,
889901 color_column_name = 'betweenness' ,
890- plot_file_path = get_file_path (f"{ plot_type } _ClusterNoise_poorly_integrated_bridges" , parameters ),
902+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusterNoise_poorly_integrated_bridges" , parameters ),
891903 downscale_normal_sizes = 0.4
892904)
893905
894906plot_cluster_noise (
895907 clustering_visualization_dataframe = data ,
896- title = f"{ plot_type } clustering noise points with role inversion (size) possibly violating layering or dependency direction (red)" ,
908+ title = f"{ plot_prefix } clustering noise points with role inversion (size) possibly violating layering or dependency direction (red)" ,
897909 size_column_name = 'pageToArticleRankDifference' ,
898910 color_column_name = 'betweenness' ,
899- plot_file_path = get_file_path (f"{ plot_type } _ClusterNoise_role_inverted_bridges" , parameters )
911+ plot_file_path = get_file_path (f"{ plot_prefix } _ClusterNoise_role_inverted_bridges" , parameters )
900912)
901913
902914driver .close ()
0 commit comments