diff --git a/dashboard.py b/dashboard.py new file mode 100644 index 0000000..6e580c7 --- /dev/null +++ b/dashboard.py @@ -0,0 +1,131 @@ +import streamlit as st +from mlos_analyzer.core.storage import storage +from mlos_analyzer.visualization.plots import plot_whisker_plots +from mlos_analyzer.visualization.correlation import plot_heatmap, plot_correlation_table_target +from mlos_analyzer.visualization.failure_metrics import ( + plot_success_failure_distribution, + plot_failure_rate_by_config, +) +from mlos_analyzer.visualization.statistical import ( + run_pairwise_stat_tests, + compare_score_distributions, +) +from mlos_analyzer.visualization.timeseries import plot_metric_over_time, plot_moving_average +from mlos_analyzer.visualization.distributions import ( + plot_metric_distribution, + plot_violin_comparison, +) +from mlos_analyzer.visualization.performance import ( + plot_parallel_coordinates, + plot_performance_radar, +) + + +def main(): + st.set_page_config(page_title="MLOS Analyzer Dashboard", layout="wide") + st.title("MLOS Experiment Analysis Dashboard") + + st.sidebar.header("Settings") + experiment_ids = list(storage.experiments.keys()) + selected_experiment = st.sidebar.selectbox("Select Experiment", experiment_ids) + + if selected_experiment: + df = storage.experiments[selected_experiment].results_df + metrics = [col for col in df.columns if col.startswith("result")] + + tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs( + ["Overview", "Performance", "Time Series", "Distributions", "Failures", "Statistics"] + ) + + with tab1: + st.header("Experiment Overview") + col1, col2 = st.columns(2) + with col1: + st.subheader("Dataset Info") + st.write(df.describe()) + with col2: + st.subheader("Configuration Distribution") + config_counts = df["tunable_config_id"].value_counts() + st.bar_chart(config_counts) + + with tab2: + st.header("Performance Analysis") + selected_metric = st.selectbox("Select Metric", metrics, key="perf_metric") + + col1, col2 = st.columns(2) + with col1: + fig_whisker = plot_whisker_plots(df, selected_metric) + st.plotly_chart(fig_whisker) + with col2: + fig_heatmap = plot_heatmap(df) + st.plotly_chart(fig_heatmap) + + selected_metrics = st.multiselect( + "Select Metrics for Advanced Analysis", metrics, default=metrics[:3] + ) + if selected_metrics: + col3, col4 = st.columns(2) + with col3: + fig = plot_parallel_coordinates(df, selected_metrics) + st.plotly_chart(fig) + with col4: + fig = plot_performance_radar(df, selected_metrics) + st.plotly_chart(fig) + + with tab3: + st.header("Time Series Analysis") + metric = st.selectbox("Select Metric", metrics, key="ts_metric") + window = st.slider("Moving Average Window", 2, 20, 5) + + col1, col2 = st.columns(2) + with col1: + fig = plot_metric_over_time(df, metric) + st.plotly_chart(fig) + with col2: + fig = plot_moving_average(df, metric, window) + st.plotly_chart(fig) + + with tab4: + st.header("Distribution Analysis") + metric = st.selectbox("Select Metric", metrics, key="dist_metric") + + col1, col2 = st.columns(2) + with col1: + fig = plot_metric_distribution(df, metric) + st.plotly_chart(fig) + with col2: + fig = plot_violin_comparison(df, metric) + st.plotly_chart(fig) + + with tab5: + st.header("Failure Analysis") + col1, col2 = st.columns(2) + with col1: + fig_dist = plot_success_failure_distribution(df) + st.plotly_chart(fig_dist) + with col2: + fig_rate = plot_failure_rate_by_config(df) + st.plotly_chart(fig_rate) + + with tab6: + st.header("Statistical Analysis") + test_metric = st.selectbox("Select Test Metric", metrics) + alpha = st.slider("Significance Level (α)", 0.01, 0.10, 0.05) + + results = run_pairwise_stat_tests(df, test_metric, alpha=alpha) + st.dataframe(results) + + st.subheader("Configuration Comparison") + config1, config2 = st.columns(2) + with config1: + cfg1 = st.selectbox("First Configuration", df["tunable_config_id"].unique()) + with config2: + cfg2 = st.selectbox("Second Configuration", df["tunable_config_id"].unique()) + + if cfg1 != cfg2: + fig_compare = compare_score_distributions(df, test_metric, cfg1, cfg2) + st.plotly_chart(fig_compare) + + +if __name__ == "__main__": + main() diff --git a/mlos_demo_mysql.ipynb b/mlos_demo_mysql.ipynb index 5db1f25..58f18a0 100644 --- a/mlos_demo_mysql.ipynb +++ b/mlos_demo_mysql.ipynb @@ -567,12 +567,593 @@ "First, let's select a configuration parameter we want to study along with the benchmark metric we've collected durting our trials. Here we pick the MySQL `innodb_buffer_pool_instances` configuration parameter, and see how cheanging it impacts the benchmark's latency 95th percentile (the `latency_pct` metric). We also pick a secondary configuration parameter, `innodb_flush_method` to add an extra dimension to our analysis." ] }, + { + "cell_type": "markdown", + "id": "f6eb4cd1", + "metadata": {}, + "source": [ + "### First Explore and Learn Interactively" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "15a4de22", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e9be2c4f82d9438ba3e539365e81a1d5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(Dropdown(description='X-axis:', options=('config.innodb_buffer_pool_dump_pct', '…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7a08122fc2dc48dfacf1ebe627c07989", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from ipywidgets import interact, widgets, HBox, VBox\n", + "\n", + "# Define columns of interest\n", + "config_columns = [col for col in df.columns if col.startswith(\"config.\")]\n", + "result_columns = [col for col in df.columns if col.startswith(\"result.\")]\n", + "status_options = tuple(df[\"status\"].unique()) # Convert to tuple for SelectMultiple\n", + "\n", + "# Define a function for plotting relationships interactively with filtering\n", + "def enhanced_plot_relationship(x_axis, y_axis, status_filter, show_grid, save_plot):\n", + " # Filter data based on status\n", + " filtered_df = df[df[\"status\"].isin(status_filter)]\n", + " \n", + " # Create the plot\n", + " plt.figure(figsize=(12, 8))\n", + " sns.scatterplot(data=filtered_df, x=x_axis, y=y_axis, hue=\"status\")\n", + " plt.title(f\"Relationship: {y_axis} vs {x_axis}\")\n", + " plt.xlabel(x_axis)\n", + " plt.ylabel(y_axis)\n", + " plt.grid(show_grid)\n", + " plt.legend(title=\"Status\")\n", + " plt.show()\n", + "\n", + " # Save the plot if the option is selected\n", + " if save_plot:\n", + " plt.savefig(f\"{x_axis}_vs_{y_axis}.png\", dpi=300)\n", + " print(f\"Plot saved as {x_axis}_vs_{y_axis}.png\")\n", + "\n", + "# Widgets for filtering and plot settings\n", + "x_axis_dropdown = widgets.Dropdown(options=config_columns, description=\"X-axis:\")\n", + "y_axis_dropdown = widgets.Dropdown(options=result_columns, description=\"Y-axis:\")\n", + "status_filter_multi = widgets.SelectMultiple(options=status_options, description=\"Status:\", value=status_options)\n", + "show_grid_toggle = widgets.Checkbox(value=True, description=\"Show Grid\")\n", + "save_plot_toggle = widgets.Checkbox(value=False, description=\"Save Plot\")\n", + "\n", + "# Display widgets in a neat layout\n", + "ui = VBox([\n", + " HBox([x_axis_dropdown, y_axis_dropdown]),\n", + " HBox([status_filter_multi, show_grid_toggle, save_plot_toggle])\n", + "])\n", + "\n", + "# Connect widgets to the enhanced plotting function\n", + "out = widgets.interactive_output(\n", + " enhanced_plot_relationship,\n", + " {\n", + " \"x_axis\": x_axis_dropdown,\n", + " \"y_axis\": y_axis_dropdown,\n", + " \"status_filter\": status_filter_multi,\n", + " \"show_grid\": show_grid_toggle,\n", + " \"save_plot\": save_plot_toggle,\n", + " }\n", + ")\n", + "\n", + "# Display UI and output\n", + "display(ui, out)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "46d463f7", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8f2dac0852c5496bb018b6a949d5cc8a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='Result Column:', options=('result.errors', 'result.events', 'resul…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def plot_histogram(result_column, bins):\n", + " plt.figure(figsize=(10, 6))\n", + " sns.histplot(df[result_column].dropna(), bins=bins, kde=True)\n", + " plt.title(f\"Histogram of {result_column}\")\n", + " plt.xlabel(result_column)\n", + " plt.ylabel(\"Frequency\")\n", + " plt.grid(True)\n", + " plt.show()\n", + "\n", + "# Interactive widget for histogram\n", + "widgets.interact(\n", + " plot_histogram,\n", + " result_column=widgets.Dropdown(options=result_columns, description=\"Result Column:\"),\n", + " bins=widgets.IntSlider(value=20, min=5, max=50, step=5, description=\"Bins:\")\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "3f5921c6", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "adc57aae2f05412b83f298a6277c5846", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='Group By:', options=('config.innodb_buffer_pool_dump_pct', 'config…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def plot_whisker(selected_config, selected_result):\n", + " \"\"\"\n", + " Create a box-and-whisker plot to show the distribution of a result metric\n", + " grouped by a configuration parameter or status.\n", + " \"\"\"\n", + " plt.figure(figsize=(12, 6))\n", + " sns.boxplot(\n", + " data=df,\n", + " x=selected_config,\n", + " y=selected_result,\n", + " hue=\"status\", # Color-code by status for additional insights\n", + " showfliers=True, # Show outliers\n", + " palette=\"Set2\"\n", + " )\n", + " plt.title(f\"Distribution of {selected_result} by {selected_config} (Grouped by Status)\")\n", + " plt.xlabel(selected_config)\n", + " plt.ylabel(selected_result)\n", + " plt.xticks(rotation=45)\n", + " plt.legend(title=\"Status\", loc=\"upper right\")\n", + " plt.grid(True)\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "# Interactive widget for whisker plot\n", + "widgets.interact(\n", + " plot_whisker,\n", + " selected_config=widgets.Dropdown(\n", + " options=config_columns + [\"status\"], # Add 'status' for grouping by trial outcome\n", + " description=\"Group By:\"\n", + " ),\n", + " selected_result=widgets.Dropdown(\n", + " options=result_columns,\n", + " description=\"Metric:\"\n", + " )\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "4dfe14bb", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3346432cb6824a3f80b62585b46882aa", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='Correlation Method:', options=('pearson', 'spearman', 'kendall'), …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def plot_heatmap(corr_method):\n", + " # Select only numeric columns from config and result columns\n", + " numeric_columns = df[config_columns + result_columns].select_dtypes(include=[\"number\"])\n", + " \n", + " if numeric_columns.empty:\n", + " print(\"No numeric columns available for correlation.\")\n", + " return\n", + " \n", + " corr_matrix = numeric_columns.corr(method=corr_method)\n", + " \n", + " plt.figure(figsize=(12, 8))\n", + " sns.heatmap(corr_matrix, annot=True, cmap=\"coolwarm\", fmt=\".2f\")\n", + " plt.title(f\"Correlation Heatmap ({corr_method.capitalize()})\")\n", + " plt.show()\n", + "\n", + "# Interactive widget for heatmap\n", + "widgets.interact(\n", + " plot_heatmap,\n", + " corr_method=widgets.Dropdown(\n", + " options=[\"pearson\", \"spearman\", \"kendall\"],\n", + " description=\"Correlation Method:\",\n", + " value=\"pearson\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7086c477", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "002afb14e8674a37a5d29c04438a3826", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='Metric:', options=('result.errors', 'result.events', 'result.laten…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def find_optimal_config(metric, percentile):\n", + " \"\"\"\n", + " Find configurations with a result metric below the specified percentile.\n", + " \"\"\"\n", + " if metric not in result_columns:\n", + " print(f\"Invalid metric selected. Please choose from: {result_columns}\")\n", + " return\n", + " \n", + " # Compute the specified percentile threshold\n", + " threshold = df[metric].quantile(percentile / 100)\n", + "\n", + " # Filter configurations meeting the threshold\n", + " optimal_configs = df[df[metric] <= threshold]\n", + "\n", + " # Display the results\n", + " print(f\"Optimal Configurations (Top {percentile}% based on {metric} ≤ {threshold:.2f}):\")\n", + " display(optimal_configs[config_columns + [metric]].sort_values(by=metric))\n", + "\n", + " # Highlight the best configuration\n", + " if not optimal_configs.empty:\n", + " best_config = optimal_configs.loc[optimal_configs[metric].idxmin()]\n", + " print(\"\\nBest Configuration:\")\n", + " display(best_config[config_columns + [metric]])\n", + " else:\n", + " print(\"No configurations meet the criteria.\")\n", + "\n", + "# Interactive widget for percentile analysis\n", + "widgets.interact(\n", + " find_optimal_config,\n", + " metric=widgets.Dropdown(\n", + " options=result_columns,\n", + " description=\"Metric:\"\n", + " ),\n", + " percentile=widgets.IntSlider(\n", + " value=10,\n", + " min=1,\n", + " max=100,\n", + " step=1,\n", + " description=\"Percentile:\"\n", + " )\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "1f9bf98d", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "551ce55d380b431894bd99cd3eb10925", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='X-axis:', options=('config.innodb_buffer_pool_dump_pct', 'config.i…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def plot_pairplots(selected_x, selected_y, hue_col):\n", + " \"\"\"\n", + " Create a pair plot with the given x, y, and hue for visualization.\n", + " \"\"\"\n", + " # Filter relevant columns\n", + " data_subset = df[[selected_x, selected_y, hue_col]].dropna()\n", + "\n", + " # Create the pair plot\n", + " plt.figure(figsize=(10, 6))\n", + " sns.scatterplot(\n", + " data=data_subset, x=selected_x, y=selected_y, hue=hue_col, alpha=0.7\n", + " )\n", + " plt.title(f\"{selected_y} vs {selected_x} (Hue: {hue_col})\")\n", + " plt.xlabel(selected_x)\n", + " plt.ylabel(selected_y)\n", + " plt.grid(True)\n", + " plt.legend(title=hue_col, loc=\"upper right\")\n", + " plt.show()\n", + "\n", + "# Interactive widget for pair plot exploration\n", + "widgets.interact(\n", + " plot_pairplots,\n", + " selected_x=widgets.Dropdown(\n", + " options=config_columns,\n", + " description=\"X-axis:\"\n", + " ),\n", + " selected_y=widgets.Dropdown(\n", + " options=result_columns,\n", + " description=\"Y-axis:\"\n", + " ),\n", + " hue_col=widgets.Dropdown(\n", + " options=[\"status\"] + config_columns + result_columns,\n", + " description=\"Hue:\"\n", + " )\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "a587a2be", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "566f635a9eee4bba883cdb947a85ffd1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='Config ID 1:', options=(1088, 1089, 1090, 1125, 1126, 1127, 1128, …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# Function to compare two configurations\n", + "def compare_two_configs(config_id_1, config_id_2, metric):\n", + " \"\"\"\n", + " Compare results of two configuration IDs graphically.\n", + "\n", + " Parameters:\n", + " - config_id_1: First configuration ID\n", + " - config_id_2: Second configuration ID\n", + " - metric: Performance metric to compare\n", + " \"\"\"\n", + " config_1_data = df[df[\"tunable_config_id\"] == config_id_1]\n", + " config_2_data = df[df[\"tunable_config_id\"] == config_id_2]\n", + "\n", + " plt.figure(figsize=(12, 6))\n", + "\n", + " # Plot results for config 1\n", + " sns.lineplot(\n", + " data=config_1_data,\n", + " x=\"trial_id\",\n", + " y=metric,\n", + " marker=\"o\",\n", + " label=f\"Config {config_id_1}\",\n", + " color=\"blue\"\n", + " )\n", + "\n", + " # Plot results for config 2\n", + " sns.lineplot(\n", + " data=config_2_data,\n", + " x=\"trial_id\",\n", + " y=metric,\n", + " marker=\"o\",\n", + " label=f\"Config {config_id_2}\",\n", + " color=\"orange\"\n", + " )\n", + "\n", + " plt.title(f\"Comparison of {metric} for Config {config_id_1} vs Config {config_id_2}\")\n", + " plt.xlabel(\"Trial ID\")\n", + " plt.ylabel(metric)\n", + " plt.legend()\n", + " plt.grid(True)\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "# Interactive widgets for selecting configurations and metric\n", + "interact(\n", + " compare_two_configs,\n", + " config_id_1=widgets.Dropdown(\n", + " options=df[\"tunable_config_id\"].unique(),\n", + " description=\"Config ID 1:\"\n", + " ),\n", + " config_id_2=widgets.Dropdown(\n", + " options=df[\"tunable_config_id\"].unique(),\n", + " description=\"Config ID 2:\"\n", + " ),\n", + " metric=widgets.Dropdown(\n", + " options=result_columns,\n", + " description=\"Metric:\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "03f87a86", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2a4eab45ef2741cabc1fd01d0a11bb1a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='Config 1 ID:', options=(1088, 1089, 1090, 1125, 1126, 1127, 1128, …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def compare_configs(config1, config2):\n", + " # Filter rows for the selected configurations\n", + " config1_row = df.loc[df['tunable_config_id'] == config1]\n", + " config2_row = df.loc[df['tunable_config_id'] == config2]\n", + "\n", + " # Ensure both configurations are found\n", + " if config1_row.empty or config2_row.empty:\n", + " print(\"One or both of the selected configurations do not exist.\")\n", + " return\n", + "\n", + " # Combine configuration and result columns for display\n", + " compare_columns = config_columns + result_columns\n", + " comparison_df = pd.DataFrame({\n", + " \"Parameter\": compare_columns,\n", + " f\"Config {config1}\": config1_row[compare_columns].iloc[0].values,\n", + " f\"Config {config2}\": config2_row[compare_columns].iloc[0].values\n", + " })\n", + "\n", + " # Display the comparison as a table\n", + " display(comparison_df)\n", + "\n", + "# Interactive widget for configuration comparison\n", + "widgets.interact(\n", + " compare_configs,\n", + " config1=widgets.Dropdown(\n", + " options=df[\"tunable_config_id\"].unique(),\n", + " description=\"Config 1 ID:\"\n", + " ),\n", + " config2=widgets.Dropdown(\n", + " options=df[\"tunable_config_id\"].unique(),\n", + " description=\"Config 2 ID:\"\n", + " )\n", + ")" + ] + }, { "cell_type": "markdown", "id": "7cb4794f", "metadata": {}, "source": [ - "### First automatically with mlos_viz" + "### Also automatically with mlos_viz" ] }, {