From 3ca44c7c6c0125c64acdbc2253076a36fbd55729 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 09:48:54 +0200 Subject: [PATCH 1/9] add highligh utils --- doc/utils/style_tables.py | 143 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 doc/utils/style_tables.py diff --git a/doc/utils/style_tables.py b/doc/utils/style_tables.py new file mode 100644 index 0000000..3f5750c --- /dev/null +++ b/doc/utils/style_tables.py @@ -0,0 +1,143 @@ +import numpy as np +import pandas as pd +from pandas.io.formats.style import Styler + + +# Define highlighting tiers as a list of dictionaries or tuples +# Each element defines: dist, props. Applied in order (later rules can override). +# Order: from least specific (largest dist) to most specific (smallest dist) +# or ensure the _apply_highlight_range logic correctly handles overlaps if props are different. +# Current logic: more specific (smaller dist) rules are applied last and override. +HIGHLIGHT_TIERS = [ + {"dist": 1.0, "props": "color:black;background-color:red;"}, + {"dist": 0.1, "props": "color:black;background-color:yellow;"}, + {"dist": 0.05, "props": "color:white;background-color:darkgreen;"}, +] + + +def _apply_highlight_range( + s_col: pd.Series, level: float, dist: float, props: str +) -> np.ndarray: + """ + Helper function for Styler.apply. Applies CSS properties based on a numeric range. + Returns an array of CSS strings. + """ + s_numeric = pd.to_numeric( + s_col, errors="coerce" + ) # Convert to numeric, non-convertibles become NaN + # Apply style ONLY if value is WITHIN the current dist from level + # This means for tiered styling, the order of applying styles in the calling function matters. + # If a value falls into multiple dist categories, the LAST applied style for that dist will win. + condition = (s_numeric >= level - dist) & (s_numeric <= level + dist) + return np.where(condition, props, "") + + +def color_coverage_columns( + styler: Styler, level: float, coverage_cols: list[str] = ["Coverage"] +) -> Styler: + """ + Applies tiered highlighting to specified coverage columns of a Styler object. + The order of application matters: more specific (narrower dist) rules are applied last to override. + """ + if not isinstance(styler, Styler): + raise TypeError("Expected a pandas Styler object.") + + # Ensure coverage_cols is a list + if isinstance(coverage_cols, str): + coverage_cols = [coverage_cols] + + # Filter for columns that actually exist in the DataFrame being styled + valid_coverage_cols = [col for col in coverage_cols if col in styler.data.columns] + + if not valid_coverage_cols: + return styler # No valid columns to style + + # Apply highlighting rules from the defined tiers + # The order in HIGHLIGHT_TIERS is important if props are meant to override. + # Pandas Styler.apply applies styles sequentially. If a cell matches multiple + # conditions from different .apply calls, the styles from later calls typically override + # or merge with earlier ones, depending on the CSS properties. + # For background-color, later calls will override. + current_styler = styler + for tier in HIGHLIGHT_TIERS: + current_styler = current_styler.apply( + _apply_highlight_range, + level=level, + dist=tier["dist"], + props=tier["props"], + subset=valid_coverage_cols, + ) + + # Set font to bold for the coverage columns + current_styler = current_styler.set_properties( + **{"font-weight": "bold"}, subset=valid_coverage_cols + ) + return current_styler + + +def create_styled_table( + df: pd.DataFrame, + level: float, + n_rep: int, # Or Union[int, str] if "N/A" is possible + caption_prefix: str = "Coverage", + coverage_cols: list[str] = ["Coverage"], + float_precision: str = "{:.3f}", +) -> Styler: + """ + Creates a styled pandas DataFrame (Styler object) for display. + - Hides the DataFrame index. + - Formats float columns to a specified precision. + - Applies conditional highlighting to coverage columns. + - Sets a descriptive caption. + """ + if not isinstance(df, pd.DataFrame): + return pd.DataFrame({"Error": ["Input is not a DataFrame."]}).style.hide( + axis="index" + ) + + if df.empty: + empty_df_cols = df.columns if df.columns.tolist() else ["Info"] + message_val = ( + ["No data to display."] + if not df.columns.tolist() + else [None] * len(empty_df_cols) + ) + return ( + pd.DataFrame( + ( + dict(zip(empty_df_cols, [[v] for v in message_val])) + if not df.columns.tolist() + else [] + ), + columns=empty_df_cols, + ) + .style.hide(axis="index") + .set_caption("No data to display.") + ) + + # Prepare float formatting dictionary + float_cols = df.select_dtypes(include=["float"]).columns + format_dict = {col: float_precision for col in float_cols if col in df.columns} + + # Create and set the caption text + level_percent = level * 100 + if abs(level_percent - round(level_percent)) < 1e-9: + level_display = f"{int(round(level_percent))}" + else: + level_display = f"{level_percent:.1f}" + + n_rep_display = str(n_rep) # Ensure n_rep is a string for the caption + + caption_text = f"{caption_prefix} for {level_display}%-Confidence Interval over {n_rep_display} Repetitions" + + # Chain Styler methods + styled_df = ( + df.style.hide(axis="index") + .format( + format_dict if format_dict else None + ) # Pass None if no float cols to format + .pipe(color_coverage_columns, level=level, coverage_cols=coverage_cols) + .set_caption(caption_text) + ) + + return styled_df From 106c288554d5167c8818db1e6277636d65023798 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 10:11:13 +0200 Subject: [PATCH 2/9] update plr page highlighting --- doc/plm/plr.qmd | 245 +++++++++++++++++++----------------------------- 1 file changed, 95 insertions(+), 150 deletions(-) diff --git a/doc/plm/plr.qmd b/doc/plm/plr.qmd index 31300f5..226716a 100644 --- a/doc/plm/plr.qmd +++ b/doc/plm/plr.qmd @@ -10,54 +10,16 @@ jupyter: python3 import numpy as np import pandas as pd from itables import init_notebook_mode, show, options +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import create_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -79,34 +41,41 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_ate_coverage.csv", index_col=None) +df_coverage = pd.read_csv("../../results/plm/plr_ate_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1: + n_rep_coverage = df_coverage["repetition"].unique()[0] +elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1: + n_rep_coverage = df_coverage["n_rep"].unique()[0] +else: + n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_coverage = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ### Partialling out ```{python} # | echo: false -score = "partialling out" -level = 0.95 +score_po = "partialling out" +level_95 = 0.95 -df_ate_95 = df[(df["level"] == level) & (df["score"] == score)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +df_po_95 = df_coverage[(df_coverage["level"] == level_95) & (df_coverage["score"] == score_po)][display_columns_coverage].copy() +df_po_95.rename(columns={"Learner g": "Learner l"}, inplace=True) + +styled_table_po_95 = create_styled_table(df_po_95, level_95, n_rep_coverage, coverage_cols=["Coverage"]) +show(styled_table_po_95, allow_html=True) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 +level_90 = 0.9 + +df_po_90 = df_coverage[(df_coverage['level'] == level_90) & (df_coverage["score"] == score_po)][display_columns_coverage].copy() +df_po_90.rename(columns={"Learner g": "Learner l"}, inplace=True) -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +styled_table_po_90 = create_styled_table(df_po_90, level_90, n_rep_coverage, coverage_cols=["Coverage"]) +show(styled_table_po_90, allow_html=True) ``` ### IV-type @@ -115,20 +84,23 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -score = "IV-type" -level = 0.95 +score_iv = "IV-type" +# level_95 is already defined + +df_iv_95 = df_coverage[(df_coverage['level'] == level_95) & (df_coverage["score"] == score_iv)][display_columns_coverage] -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +styled_table_iv_95 = create_styled_table(df_iv_95, level_95, n_rep_coverage, coverage_cols=["Coverage"]) +show(styled_table_iv_95, allow_html=True) ``` ```{python} #| echo: false -score = "IV-type" -level = 0.9 +# level_90 is already defined -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +df_iv_90 = df_coverage[(df_coverage['level'] == level_90) & (df_coverage["score"] == score_iv)][display_columns_coverage] + +styled_table_iv_90 = create_styled_table(df_iv_90, level_90, n_rep_coverage, coverage_cols=["Coverage"]) +show(styled_table_iv_90, allow_html=True) ``` ## ATE Sensitivity @@ -142,9 +114,9 @@ Further, the corresponding confidence intervals are one-sided (since the directi ```{python} #| echo: false -metadata_file = '../../results/plm/plr_ate_sensitivity_metadata.csv' -metadata_df = pd.read_csv(metadata_file) -print(metadata_df.T.to_string(header=False)) +metadata_file_sens = '../../results/plm/plr_ate_sensitivity_metadata.csv' +metadata_df_sens = pd.read_csv(metadata_file_sens) +print(metadata_df_sens.T.to_string(header=False)) ``` ::: @@ -153,91 +125,52 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_ate_sensitivity.csv", index_col=None) +df_sensitivity = pd.read_csv("../../results/plm/plr_ate_sensitivity.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +if "repetition" in df_sensitivity.columns and df_sensitivity["repetition"].nunique() == 1: + n_rep_sensitivity = df_sensitivity["repetition"].unique()[0] +elif "n_rep" in df_sensitivity.columns and df_sensitivity["n_rep"].nunique() == 1: + n_rep_sensitivity = df_sensitivity["n_rep"].unique()[0] +else: + n_rep_sensitivity = "N/A" -display_columns = [ +display_columns_sensitivity = [ "Learner g", "Learner m", "Bias", "Bias (Lower)", "Bias (Upper)", "Coverage", "Coverage (Lower)", "Coverage (Upper)", "RV", "RVa"] ``` -```{python} -#| echo: false - -import numpy as np -import pandas as pd -from itables import init_notebook_mode, show, options - -init_notebook_mode(all_interactive=True) - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Coverage (Upper)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Coverage (Upper)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Coverage (Upper)"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Coverage (Upper)"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` ### Partialling out ```{python} #| echo: false -score = "partialling out" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +# score_po and level_95 are defined above + +df_sens_po_95 = df_sensitivity[(df_sensitivity['level'] == level_95) & (df_sensitivity["score"] == score_po)][display_columns_sensitivity].copy() +df_sens_po_95.rename(columns={"Learner g": "Learner l"}, inplace=True) + +styled_sens_po_95 = create_styled_table( + df_sens_po_95, + level_95, + n_rep_sensitivity, + coverage_cols=["Coverage", "Coverage (Upper)"] # Highlight both +) +show(styled_sens_po_95, allow_html=True) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +# score_po and level_90 are defined above + +df_sens_po_90 = df_sensitivity[(df_sensitivity['level'] == level_90) & (df_sensitivity["score"] == score_po)][display_columns_sensitivity].copy() +df_sens_po_90.rename(columns={"Learner g": "Learner l"}, inplace=True) + +styled_sens_po_90 = create_styled_table( + df_sens_po_90, + level_90, + n_rep_sensitivity, + coverage_cols=["Coverage", "Coverage (Upper)"] +) +show(styled_sens_po_90, allow_html=True) ``` ### IV-type @@ -246,18 +179,30 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -score = "IV-type" -level = 0.95 +# score_iv and level_95 are defined above -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +df_sens_iv_95 = df_sensitivity[(df_sensitivity['level'] == level_95) & (df_sensitivity["score"] == score_iv)][display_columns_sensitivity] + +styled_sens_iv_95 = create_styled_table( + df_sens_iv_95, + level_95, + n_rep_sensitivity, + coverage_cols=["Coverage", "Coverage (Upper)"] +) +show(styled_sens_iv_95, allow_html=True) ``` ```{python} #| echo: false -score = "IV-type" -level = 0.9 +# score_iv and level_90 are defined above + +df_sens_iv_90 = df_sensitivity[(df_sensitivity['level'] == level_90) & (df_sensitivity["score"] == score_iv)][display_columns_sensitivity] -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +styled_sens_iv_90 = create_styled_table( + df_sens_iv_90, + level_90, + n_rep_sensitivity, + coverage_cols=["Coverage", "Coverage (Upper)"] +) +show(styled_sens_iv_90, allow_html=True) ``` From ad0b2970c20c7ba8ffe115ff9d67df834903df83 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 12:24:35 +0200 Subject: [PATCH 3/9] extend utils for tables --- doc/utils/style_tables.py | 116 +++++++++++++++++++++++++++++++++----- 1 file changed, 102 insertions(+), 14 deletions(-) diff --git a/doc/utils/style_tables.py b/doc/utils/style_tables.py index 3f5750c..5d73466 100644 --- a/doc/utils/style_tables.py +++ b/doc/utils/style_tables.py @@ -1,6 +1,8 @@ import numpy as np import pandas as pd from pandas.io.formats.style import Styler +from typing import Union, Optional, List, Any +from itables import show # Define highlighting tiers as a list of dictionaries or tuples @@ -78,9 +80,9 @@ def color_coverage_columns( def create_styled_table( df: pd.DataFrame, level: float, - n_rep: int, # Or Union[int, str] if "N/A" is possible + n_rep: Union[int, str], caption_prefix: str = "Coverage", - coverage_cols: list[str] = ["Coverage"], + coverage_cols: List[str] = ["Coverage"], float_precision: str = "{:.3f}", ) -> Styler: """ @@ -102,21 +104,18 @@ def create_styled_table( if not df.columns.tolist() else [None] * len(empty_df_cols) ) - return ( - pd.DataFrame( - ( - dict(zip(empty_df_cols, [[v] for v in message_val])) - if not df.columns.tolist() - else [] - ), - columns=empty_df_cols, - ) - .style.hide(axis="index") - .set_caption("No data to display.") + df_to_style = pd.DataFrame( + ( + dict(zip(empty_df_cols, [[v] for v in message_val])) + if not df.columns.tolist() + else {} # Pass empty dict for empty DataFrame with columns + ), + columns=empty_df_cols, ) + return df_to_style.style.hide(axis="index").set_caption("No data to display.") # Prepare float formatting dictionary - float_cols = df.select_dtypes(include=["float"]).columns + float_cols = df.select_dtypes(include=["float", "float64", "float32"]).columns format_dict = {col: float_precision for col in float_cols if col in df.columns} # Create and set the caption text @@ -141,3 +140,92 @@ def create_styled_table( ) return styled_df + + +def generate_and_show_styled_table( + main_df: pd.DataFrame, + filters: dict[str, Any], + display_cols: List[str], + n_rep: Union[int, str], + level_col: str = "level", + rename_map: Optional[dict[str, str]] = None, + caption_prefix: str = "Coverage", + coverage_highlight_cols: List[str] = ["Coverage"], + float_precision: str = "{:.3f}", +): + """ + Filters a DataFrame based on a dictionary of conditions, + creates a styled table, and displays it. + """ + if main_df.empty: + print("Warning: Input DataFrame is empty.") + # Optionally, show an empty table or a message + empty_styled_df = ( + pd.DataFrame(columns=display_cols) + .style.hide(axis="index") + .set_caption("No data available (input empty).") + ) + show(empty_styled_df, allow_html=True) + return + + # Build filter condition + current_df = main_df + filter_conditions = [] + filter_description_parts = [] + + for col, value in filters.items(): + if col not in current_df.columns: + print( + f"Warning: Filter column '{col}' not found in DataFrame. Skipping this filter." + ) + continue + current_df = current_df[current_df[col] == value] + filter_conditions.append(f"{col} == {value}") + filter_description_parts.append(f"{col}='{value}'") + + filter_description = " & ".join(filter_description_parts) + + if current_df.empty: + level_val = filters.get(level_col, "N/A") + level_percent_display = ( + f"{level_val*100}%" if isinstance(level_val, (int, float)) else level_val + ) + caption_msg = f"No data after filtering for {filter_description} at {level_percent_display} level." + print(f"Warning: {caption_msg}") + empty_styled_df = ( + pd.DataFrame(columns=display_cols) + .style.hide(axis="index") + .set_caption(caption_msg) + ) + show(empty_styled_df, allow_html=True) + return + + df_filtered = current_df[ + display_cols + ].copy() # Select display columns after filtering + + if rename_map: + df_filtered.rename(columns=rename_map, inplace=True) + + # Determine the level for styling from the filters, if present + styling_level = filters.get(level_col) + if styling_level is None or not isinstance(styling_level, (float, int)): + print( + f"Warning: '{level_col}' not found in filters or is not numeric. Cannot determine styling level for highlighting." + ) + # Fallback or raise error, for now, we'll proceed without level-specific caption part if it's missing + # Or you could try to infer it if there's only one unique level in the filtered data + if level_col in df_filtered.columns and df_filtered[level_col].nunique() == 1: + styling_level = df_filtered[level_col].iloc[0] + else: # Default to a common value or skip styling that depends on 'level' + styling_level = 0.95 # Default, or handle error + + styled_table = create_styled_table( + df_filtered, + styling_level, # Use the level from filters for styling + n_rep, + caption_prefix=caption_prefix, + coverage_cols=coverage_highlight_cols, + float_precision=float_precision, + ) + show(styled_table, allow_html=True) From d7a54448510c61edf0c5412ff70b759a02179087 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 12:57:21 +0200 Subject: [PATCH 4/9] update plm websites --- doc/plm/pliv.qmd | 120 +++++++++++++++++---------------------- doc/plm/plr.qmd | 132 ++++++++++++++++++++++--------------------- doc/plm/plr_cate.qmd | 96 ++++++++++++------------------- doc/plm/plr_gate.qmd | 95 ++++++++++++------------------- 4 files changed, 191 insertions(+), 252 deletions(-) diff --git a/doc/plm/pliv.qmd b/doc/plm/pliv.qmd index 6c4d2a9..65d87f2 100644 --- a/doc/plm/pliv.qmd +++ b/doc/plm/pliv.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## LATE Coverage @@ -79,34 +41,46 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/pliv_late_coverage.csv", index_col=None) +df_coverage_pliv = pd.read_csv("../../results/plm/pliv_late_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +if "repetition" in df_coverage_pliv.columns and df_coverage_pliv["repetition"].nunique() == 1: + n_rep_pliv = df_coverage_pliv["repetition"].unique()[0] +elif "n_rep" in df_coverage_pliv.columns and df_coverage_pliv["n_rep"].nunique() == 1: + n_rep_pliv = df_coverage_pliv["n_rep"].unique()[0] +else: + n_rep_pliv = "N/A" -display_columns = ["Learner g", "Learner m", "Learner r", "Bias", "CI Length", "Coverage"] +display_columns_pliv = ["Learner g", "Learner m", "Learner r", "Bias", "CI Length", "Coverage"] ``` ### Partialling out ```{python} #| echo: false -score = "partialling out" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.95, "score": "partialling out"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.90, "score": "partialling out"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ### IV-type @@ -115,18 +89,26 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -score = "IV-type" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.95, "score": "IV-type"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "IV-type" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.9, "score": "IV-type"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/plm/plr.qmd b/doc/plm/plr.qmd index 226716a..a5ba041 100644 --- a/doc/plm/plr.qmd +++ b/doc/plm/plr.qmd @@ -9,7 +9,7 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode import os import sys @@ -17,7 +17,7 @@ doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) if doc_dir not in sys.path: sys.path.append(doc_dir) -from utils.style_tables import create_styled_table +from utils.style_tables import generate_and_show_styled_table init_notebook_mode(all_interactive=True) ``` @@ -57,25 +57,30 @@ display_columns_coverage = ["Learner g", "Learner m", "Bias", "CI Length", "Cove ```{python} # | echo: false -score_po = "partialling out" -level_95 = 0.95 -df_po_95 = df_coverage[(df_coverage["level"] == level_95) & (df_coverage["score"] == score_po)][display_columns_coverage].copy() -df_po_95.rename(columns={"Learner g": "Learner l"}, inplace=True) - -styled_table_po_95 = create_styled_table(df_po_95, level_95, n_rep_coverage, coverage_cols=["Coverage"]) -show(styled_table_po_95, allow_html=True) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.95, "score": "partialling out"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level_90 = 0.9 -df_po_90 = df_coverage[(df_coverage['level'] == level_90) & (df_coverage["score"] == score_po)][display_columns_coverage].copy() -df_po_90.rename(columns={"Learner g": "Learner l"}, inplace=True) - -styled_table_po_90 = create_styled_table(df_po_90, level_90, n_rep_coverage, coverage_cols=["Coverage"]) -show(styled_table_po_90, allow_html=True) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.9, "score": "partialling out"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ### IV-type @@ -84,23 +89,28 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -score_iv = "IV-type" -# level_95 is already defined -df_iv_95 = df_coverage[(df_coverage['level'] == level_95) & (df_coverage["score"] == score_iv)][display_columns_coverage] - -styled_table_iv_95 = create_styled_table(df_iv_95, level_95, n_rep_coverage, coverage_cols=["Coverage"]) -show(styled_table_iv_95, allow_html=True) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.95, "score": "IV-type"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -# level_90 is already defined - -df_iv_90 = df_coverage[(df_coverage['level'] == level_90) & (df_coverage["score"] == score_iv)][display_columns_coverage] -styled_table_iv_90 = create_styled_table(df_iv_90, level_90, n_rep_coverage, coverage_cols=["Coverage"]) -show(styled_table_iv_90, allow_html=True) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.9, "score": "IV-type"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## ATE Sensitivity @@ -143,34 +153,30 @@ display_columns_sensitivity = [ ```{python} #| echo: false -# score_po and level_95 are defined above - -df_sens_po_95 = df_sensitivity[(df_sensitivity['level'] == level_95) & (df_sensitivity["score"] == score_po)][display_columns_sensitivity].copy() -df_sens_po_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -styled_sens_po_95 = create_styled_table( - df_sens_po_95, - level_95, - n_rep_sensitivity, - coverage_cols=["Coverage", "Coverage (Upper)"] # Highlight both +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.95, "score": "partialling out"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] ) -show(styled_sens_po_95, allow_html=True) ``` ```{python} #| echo: false -# score_po and level_90 are defined above - -df_sens_po_90 = df_sensitivity[(df_sensitivity['level'] == level_90) & (df_sensitivity["score"] == score_po)][display_columns_sensitivity].copy() -df_sens_po_90.rename(columns={"Learner g": "Learner l"}, inplace=True) - -styled_sens_po_90 = create_styled_table( - df_sens_po_90, - level_90, - n_rep_sensitivity, - coverage_cols=["Coverage", "Coverage (Upper)"] +#| +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.9, "score": "partialling out"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] ) -show(styled_sens_po_90, allow_html=True) ``` ### IV-type @@ -179,30 +185,26 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -# score_iv and level_95 are defined above - -df_sens_iv_95 = df_sensitivity[(df_sensitivity['level'] == level_95) & (df_sensitivity["score"] == score_iv)][display_columns_sensitivity] -styled_sens_iv_95 = create_styled_table( - df_sens_iv_95, - level_95, - n_rep_sensitivity, - coverage_cols=["Coverage", "Coverage (Upper)"] +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.95, "score": "IV-type"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] ) -show(styled_sens_iv_95, allow_html=True) ``` ```{python} #| echo: false -# score_iv and level_90 are defined above - -df_sens_iv_90 = df_sensitivity[(df_sensitivity['level'] == level_90) & (df_sensitivity["score"] == score_iv)][display_columns_sensitivity] -styled_sens_iv_90 = create_styled_table( - df_sens_iv_90, - level_90, - n_rep_sensitivity, - coverage_cols=["Coverage", "Coverage (Upper)"] +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.9, "score": "IV-type"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] ) -show(styled_sens_iv_90, allow_html=True) ``` diff --git a/doc/plm/plr_cate.qmd b/doc/plm/plr_cate.qmd index 57be897..f62fbaf 100644 --- a/doc/plm/plr_cate.qmd +++ b/doc/plm/plr_cate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## CATE Coverage @@ -81,28 +43,44 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_cate_coverage.csv", index_col=None) +df_cate = pd.read_csv("../../results/plm/plr_cate_coverage.csv", index_col=None) # Renamed to df_cate + +# Your existing logic for n_rep is fine, just using the new df_cate name +if "repetition" in df_cate.columns and df_cate["repetition"].nunique() == 1: + n_rep_cate = df_cate["repetition"].unique()[0] +elif "n_rep" in df_cate.columns and df_cate["n_rep"].nunique() == 1: + n_rep_cate = df_cate["n_rep"].unique()[0] +else: + n_rep_cate = "N/A" -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_cate = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cate, + filters={"level": 0.95}, + display_cols=display_columns_cate, + n_rep=n_rep_cate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cate, + filters={"level": 0.9}, + display_cols=display_columns_cate, + n_rep=n_rep_cate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/plm/plr_gate.qmd b/doc/plm/plr_gate.qmd index 8f5490d..0f0c105 100644 --- a/doc/plm/plr_gate.qmd +++ b/doc/plm/plr_gate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## GATE Coverage @@ -81,28 +43,43 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_gate_coverage.csv", index_col=None) +df_gate = pd.read_csv("../../results/plm/plr_gate_coverage.csv", index_col=None) # Renamed to df_gate for clarity + +if "repetition" in df_gate.columns and df_gate["repetition"].nunique() == 1: + n_rep_gate = df_gate["repetition"].unique()[0] +elif "n_rep" in df_gate.columns and df_gate["n_rep"].nunique() == 1: # Check for n_rep as well + n_rep_gate = df_gate["n_rep"].unique()[0] +else: + n_rep_gate = "N/A" # Fallback if n_rep cannot be determined -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_gate = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_gate, + filters={"level": 0.95}, + display_cols=display_columns_gate, + n_rep=n_rep_gate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_gate, + filters={"level": 0.9}, + display_cols=display_columns_gate, + n_rep=n_rep_gate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` From 0d6a61d2ff7aa23ce2a868f100db01bd9c641b21 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 13:20:38 +0200 Subject: [PATCH 5/9] add site-url --- doc/_quarto-dev.yml | 1 + doc/_website.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/_quarto-dev.yml b/doc/_quarto-dev.yml index 11c7caf..94b8c12 100644 --- a/doc/_quarto-dev.yml +++ b/doc/_quarto-dev.yml @@ -6,6 +6,7 @@ metadata-files: - _website.yml website: + site-url: https://docs.doubleml.org/doubleml-coverage/dev/ drafts: - index.qmd # IRM diff --git a/doc/_website.yml b/doc/_website.yml index 4beb151..d6c79d3 100644 --- a/doc/_website.yml +++ b/doc/_website.yml @@ -1,6 +1,7 @@ website: title: "DoubleML Coverage" favicon: _static/favicon.ico + site-url: https://docs.doubleml.org/doubleml-coverage/ search: location: sidebar sidebar: From 70d2f7aad631402c867556422d66d0929e02d6e4 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 13:55:06 +0200 Subject: [PATCH 6/9] update ssm qmd files --- doc/ssm/ssm_mar.qmd | 81 +++++++++++------------------------- doc/ssm/ssm_nonignorable.qmd | 80 +++++++++++------------------------ 2 files changed, 50 insertions(+), 111 deletions(-) diff --git a/doc/ssm/ssm_mar.qmd b/doc/ssm/ssm_mar.qmd index 1ffa222..65a1871 100644 --- a/doc/ssm/ssm_mar.qmd +++ b/doc/ssm/ssm_mar.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -90,17 +52,24 @@ display_columns = ["Learner g", "Learner m", "Learner pi", "Bias", "CI Length", ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/ssm/ssm_nonignorable.qmd b/doc/ssm/ssm_nonignorable.qmd index 3afb9b9..f0a807d 100644 --- a/doc/ssm/ssm_nonignorable.qmd +++ b/doc/ssm/ssm_nonignorable.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -90,16 +52,24 @@ display_columns = ["Learner g", "Learner m", "Learner pi", "Bias", "CI Length", ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` From 8e46589792c587d4272479007420056240bd6509 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 14:20:26 +0200 Subject: [PATCH 7/9] update rdd qmd files --- doc/rdd/rdd.qmd | 120 ++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 71 deletions(-) diff --git a/doc/rdd/rdd.qmd b/doc/rdd/rdd.qmd index 7e083f5..4a74fb7 100644 --- a/doc/rdd/rdd.qmd +++ b/doc/rdd/rdd.qmd @@ -10,55 +10,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## Sharp Design @@ -80,28 +42,36 @@ print(metadata_df.T.to_string(header=False)) # | echo: false # set up data and rename columns -df = pd.read_csv("../../results/rdd/rdd_sharp_coverage.csv", index_col=None) +df_sharp = pd.read_csv("../../results/rdd/rdd_sharp_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_sharp["repetition"].nunique() == 1 +n_rep_sharp = df_sharp["repetition"].unique()[0] -display_columns = ["Method", "Learner g", "fs specification", "Bias", "CI Length", "Coverage"] +display_columns_sharp = ["Method", "Learner g", "fs specification", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_sharp, + filters={"level": 0.95}, + display_cols=display_columns_sharp, + n_rep=n_rep_sharp, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_sharp, + filters={"level": 0.9}, + display_cols=display_columns_sharp, + n_rep=n_rep_sharp, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -124,26 +94,34 @@ print(metadata_df.T.to_string(header=False)) # | echo: false # set up data and rename columns -df = pd.read_csv("../../results/rdd/rdd_fuzzy_coverage.csv", index_col=None) +df_fuzzy = pd.read_csv("../../results/rdd/rdd_fuzzy_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_fuzzy["repetition"].nunique() == 1 +n_rep_fuzzy = df_fuzzy["repetition"].unique()[0] -display_columns = ["Method", "Learner g", "Learner m", "fs specification", "Bias", "CI Length", "Coverage"] +display_columns_fuzzy = ["Method", "Learner g", "Learner m", "fs specification", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_fuzzy, + filters={"level": 0.95}, + display_cols=display_columns_fuzzy, + n_rep=n_rep_fuzzy, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_fuzzy, + filters={"level": 0.9}, + display_cols=display_columns_fuzzy, + n_rep=n_rep_fuzzy, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` From f0827bf7930ac74bb6461dc2eb19a6ecc43b6751 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 14:32:47 +0200 Subject: [PATCH 8/9] update did qmd files --- doc/did/did_cs.qmd | 107 ++++++---------- doc/did/did_multi.qmd | 282 ++++++++++++++++++++++-------------------- doc/did/did_pa.qmd | 104 ++++++---------- 3 files changed, 228 insertions(+), 265 deletions(-) diff --git a/doc/did/did_cs.qmd b/doc/did/did_cs.qmd index 90055cc..eab72ec 100644 --- a/doc/did/did_cs.qmd +++ b/doc/did/did_cs.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATTE Coverage @@ -91,20 +53,27 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### Experimental Score @@ -113,18 +82,24 @@ Remark that the only two valid DGPs are DGP $5$ and DGP $6$. All other DGPs are ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/did/did_multi.qmd b/doc/did/did_multi.qmd index fc5131e..2751420 100644 --- a/doc/did/did_multi.qmd +++ b/doc/did/did_multi.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATTE Coverage @@ -97,20 +59,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` @@ -120,20 +88,26 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Aggregated Effects @@ -148,10 +122,10 @@ The non-uniform results (coverage, ci length and bias) refer to averaged values #| echo: false # set up data -df = pd.read_csv("../../results/did/did_multi_group.csv", index_col=None) +df_group = pd.read_csv("../../results/did/did_multi_group.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_group["repetition"].nunique() == 1 +n_rep_group = df_group["repetition"].unique()[0] display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` @@ -160,20 +134,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` #### Experimental Score @@ -182,20 +162,26 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ### Time Effects @@ -204,10 +190,10 @@ make_pretty(df_ate_9, level, n_rep) #| echo: false # set up data -df = pd.read_csv("../../results/did/did_multi_time.csv", index_col=None) +df_time = pd.read_csv("../../results/did/did_multi_time.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_time["repetition"].nunique() == 1 +n_rep_time = df_time["repetition"].unique()[0] display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` @@ -216,20 +202,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` #### Experimental Score @@ -238,20 +230,26 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ### Event Study Aggregation @@ -260,10 +258,10 @@ make_pretty(df_ate_9, level, n_rep) #| echo: false # set up data -df = pd.read_csv("../../results/did/did_multi_eventstudy.csv", index_col=None) +df_es = pd.read_csv("../../results/did/did_multi_eventstudy.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_es["repetition"].nunique() == 1 +n_rep_es = df_es["repetition"].unique()[0] display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` @@ -272,20 +270,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` #### Experimental Score @@ -295,18 +299,24 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/did/did_pa.qmd b/doc/did/did_pa.qmd index 7435731..94f16ed 100644 --- a/doc/did/did_pa.qmd +++ b/doc/did/did_pa.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATTE Coverage @@ -91,20 +53,28 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -114,18 +84,26 @@ Remark that the only two valid DGPs are DGP $5$ and DGP $6$. All other DGPs are ```{python} #| echo: false -score = "experimental" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` From ffd55cc2390908f637002becc70e2bfae5feeb6b Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 26 May 2025 16:08:50 +0200 Subject: [PATCH 9/9] update irm qmd files --- doc/_quarto-dev.yml | 1 + doc/_website.yml | 1 + doc/irm/apo.qmd | 202 ++++++--------- doc/irm/iivm.qmd | 78 ++---- doc/irm/irm.qmd | 256 ++++++++----------- doc/irm/irm_cate.qmd | 78 ++---- doc/irm/irm_gate.qmd | 78 ++---- doc/irm/qte.qmd | 573 +++++++++++++++---------------------------- 8 files changed, 444 insertions(+), 823 deletions(-) diff --git a/doc/_quarto-dev.yml b/doc/_quarto-dev.yml index 94b8c12..5c3587a 100644 --- a/doc/_quarto-dev.yml +++ b/doc/_quarto-dev.yml @@ -15,6 +15,7 @@ website: - irm/irm_cate.qmd - irm/apo.qmd - irm/qte.qmd + - irm/iivm.qmd # PLM - plm/plr.qmd - plm/plr_gate.qmd diff --git a/doc/_website.yml b/doc/_website.yml index d6c79d3..e400e91 100644 --- a/doc/_website.yml +++ b/doc/_website.yml @@ -18,6 +18,7 @@ website: - irm/irm_cate.qmd - irm/apo.qmd - irm/qte.qmd + - irm/iivm.qmd - text: "PLM" menu: - plm/plr.qmd diff --git a/doc/irm/apo.qmd b/doc/irm/apo.qmd index 9e1787a..3b8607a 100644 --- a/doc/irm/apo.qmd +++ b/doc/irm/apo.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## APO Pointwise Coverage @@ -78,31 +40,41 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_apo_coverage_apo.csv", index_col=None) +# set up data +df_apo = pd.read_csv("../../results/irm/irm_apo_coverage_apo.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_apo["repetition"].nunique() == 1 +n_rep_apo = df_apo["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Treatment Level", "Bias", "CI Length", "Coverage"] +display_columns_apo = ["Learner g", "Learner m", "Treatment Level", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_apo, + filters={"level": 0.95}, + display_cols=display_columns_apo, + n_rep=n_rep_apo, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_apo, + filters={"level": 0.9}, + display_cols=display_columns_apo, + n_rep=n_rep_apo, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -126,80 +98,40 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - -```{python} -#| echo: false - -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_apo_coverage_apos.csv", index_col=None) +# set up data +df_apos = pd.read_csv("../../results/irm/irm_apo_coverage_apos.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_apos["repetition"].nunique() == 1 +n_rep_apos = df_apos["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_apos = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_apos, + filters={"level": 0.95}, + display_cols=display_columns_apos, + n_rep=n_rep_apos, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_apos, + filters={"level": 0.9}, + display_cols=display_columns_apos, + n_rep=n_rep_apos, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Causal Contrast Coverage @@ -222,28 +154,38 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_apo_coverage_apos_contrast.csv", index_col=None) +# set up data +df_contrast = pd.read_csv("../../results/irm/irm_apo_coverage_apos_contrast.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_contrast["repetition"].nunique() == 1 +n_rep_contrast = df_contrast["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_contrast = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_contrast, + filters={"level": 0.95}, + display_cols=display_columns_contrast, + n_rep=n_rep_contrast, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_contrast, + filters={"level": 0.9}, + display_cols=display_columns_contrast, + n_rep=n_rep_contrast, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/irm/iivm.qmd b/doc/irm/iivm.qmd index c74277b..00f4184 100644 --- a/doc/irm/iivm.qmd +++ b/doc/irm/iivm.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## LATE Coverage @@ -91,16 +53,26 @@ display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/irm/irm.qmd b/doc/irm/irm.qmd index a62fff5..01fae21 100644 --- a/doc/irm/irm.qmd +++ b/doc/irm/irm.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -78,31 +40,41 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_ate_coverage.csv", index_col=None) +# set up data +df_ate_cov = pd.read_csv("../../results/irm/irm_ate_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_ate_cov["repetition"].nunique() == 1 +n_rep_ate_cov = df_ate_cov["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_ate_cov = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_cov, + filters={"level": 0.95}, + display_cols=display_columns_ate_cov, + n_rep=n_rep_ate_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_cov, + filters={"level": 0.9}, + display_cols=display_columns_ate_cov, + n_rep=n_rep_ate_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -124,31 +96,41 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_atte_coverage.csv", index_col=None) +# set up data +df_atte_cov = pd.read_csv("../../results/irm/irm_atte_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_atte_cov["repetition"].nunique() == 1 +n_rep_atte_cov = df_atte_cov["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_atte_cov = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_atte_95 = df[df['level'] == level][display_columns] -make_pretty(df_atte_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_cov, + filters={"level": 0.95}, + display_cols=display_columns_atte_cov, + n_rep=n_rep_atte_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_atte_9 = df[df['level'] == level][display_columns] -make_pretty(df_atte_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_cov, + filters={"level": 0.9}, + display_cols=display_columns_atte_cov, + n_rep=n_rep_atte_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## Sensitivity @@ -158,62 +140,6 @@ The simulations are based on the the ADD-DGP with $10,000$ observations. As the The confounding is set such that both sensitivity parameters are approximately $cf_y=cf_d=0.1$, such that the robustness value $RV$ should be approximately $10\%$. Further, the corresponding confidence intervals are one-sided (since the direction of the bias is unkown), such that only one side should approximate the corresponding coverage level (here only the lower coverage is relevant since the bias is positive). Remark that for the coverage level the value of $\rho$ has to be correctly specified, such that the coverage level will be generally (significantly) larger than the nominal level under the conservative choice of $|\rho|=1$. -```{python} -#| echo: false - -import numpy as np -import pandas as pd -from itables import init_notebook_mode, show, options - -init_notebook_mode(all_interactive=True) - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Coverage (Lower)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Coverage (Lower)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Coverage (Lower)"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Coverage (Lower)"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - ### ATE ::: {.callout-note title="Metadata" collapse="true"} @@ -231,33 +157,41 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/irm/irm_ate_sensitivity.csv", index_col=None) +df_ate_sens = pd.read_csv("../../results/irm/irm_ate_sensitivity.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_ate_sens["repetition"].nunique() == 1 +n_rep_ate_sens = df_ate_sens["repetition"].unique()[0] -display_columns = [ +display_columns_ate_sens = [ "Learner g", "Learner m", "Bias", "Bias (Lower)", "Bias (Upper)", "Coverage", "Coverage (Lower)", "Coverage (Upper)", "RV", "RVa"] +rename_map_sens = {"Learner g": "Learner l"} +coverage_highlight_cols_sens = ["Coverage", "Coverage (Lower)"] ``` ```{python} #| echo: false -score = "partialling out" -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_sens, + filters={"level": 0.95}, + display_cols=display_columns_ate_sens, + n_rep=n_rep_ate_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_sens, + filters={"level": 0.9}, + display_cols=display_columns_ate_sens, + n_rep=n_rep_ate_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` ### ATTE @@ -276,32 +210,38 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_atte_sensitivity.csv", index_col=None) +# set up data +df_atte_sens = pd.read_csv("../../results/irm/irm_atte_sensitivity.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_atte_sens["repetition"].nunique() == 1 +n_rep_atte_sens = df_atte_sens["repetition"].unique()[0] -display_columns = [ +display_columns_atte_sens = [ "Learner g", "Learner m", "Bias", "Bias (Lower)", "Bias (Upper)", "Coverage", "Coverage (Lower)", "Coverage (Upper)", "RV", "RVa"] ``` ```{python} #| echo: false -score = "partialling out" -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_sens, + filters={"level": 0.95}, + display_cols=display_columns_atte_sens, + n_rep=n_rep_atte_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_sens, + filters={"level": 0.9}, + display_cols=display_columns_atte_sens, + n_rep=n_rep_atte_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` diff --git a/doc/irm/irm_cate.qmd b/doc/irm/irm_cate.qmd index e9c4c49..52b5abb 100644 --- a/doc/irm/irm_cate.qmd +++ b/doc/irm/irm_cate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## CATE Coverage @@ -93,15 +55,25 @@ display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "U ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/irm/irm_gate.qmd b/doc/irm/irm_gate.qmd index d4e6f36..c552771 100644 --- a/doc/irm/irm_gate.qmd +++ b/doc/irm/irm_gate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## GATE Coverage @@ -92,15 +54,25 @@ display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "U ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/irm/qte.qmd b/doc/irm/qte.qmd index 1974942..afce285 100644 --- a/doc/irm/qte.qmd +++ b/doc/irm/qte.qmd @@ -8,55 +8,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## QTE @@ -79,105 +41,82 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/pq_coverage_qte.csv", index_col=None) +# set up data +df_qte = pd.read_csv("../../results/irm/pq_coverage_qte.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_qte["repetition"].nunique() == 1 +n_rep_qte = df_qte["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_qte = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_qte, + filters={"level": 0.95}, + display_cols=display_columns_qte, + n_rep=n_rep_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_qte, + filters={"level": 0.9}, + display_cols=display_columns_qte, + n_rep=n_rep_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Potential Quantiles -```{python} -#| echo: false - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` ### Y(0) - Quantile ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/pq_coverage_pq0.csv", index_col=None) +# set up data +df_pq0 = pd.read_csv("../../results/irm/pq_coverage_pq0.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_pq0["repetition"].nunique() == 1 +n_rep_pq0 = df_pq0["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_pq = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq0, + filters={"level": 0.95}, + display_cols=display_columns_pq, + n_rep=n_rep_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq0, + filters={"level": 0.9}, + display_cols=display_columns_pq, + n_rep=n_rep_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### Y(1) - Quantile @@ -186,82 +125,42 @@ make_pretty(df_ate_9, level, n_rep) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/irm/pq_coverage_pq1.csv", index_col=None) +df_pq1 = pd.read_csv("../../results/irm/pq_coverage_pq1.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_pq1["repetition"].nunique() == 1 +n_rep_pq1 = df_pq1["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +# display_columns_pq is the same as for Y(0) ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq1, + filters={"level": 0.95}, + display_cols=display_columns_pq, + n_rep=n_rep_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq1, + filters={"level": 0.9}, + display_cols=display_columns_pq, + n_rep=n_rep_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## LQTE -```{python} -#| echo: false - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - The results are based on a location-scale model as described the corresponding [Example](https://docs.doubleml.org/stable/examples/py_double_ml_pq.html#Local-Potential-Quantiles-(LPQs)) with $10,000$ observations. The non-uniform results (coverage, ci length and bias) refer to averaged values over all quantiles (point-wise confidende intervals). @@ -280,105 +179,81 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/lpq_coverage_lqte.csv", index_col=None) +# set up data +df_lqte = pd.read_csv("../../results/irm/lpq_coverage_lqte.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_lqte["repetition"].nunique() == 1 +n_rep_lqte = df_lqte["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_lqte = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_lqte, + filters={"level": 0.95}, + display_cols=display_columns_lqte, + n_rep=n_rep_lqte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_lqte, + filters={"level": 0.9}, + display_cols=display_columns_lqte, + n_rep=n_rep_lqte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Local Potential Quantiles -```{python} -#| echo: false - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - ### Local Y(0) - Quantile ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/lpq_coverage_lpq0.csv", index_col=None) +# set up data +df_lpq0 = pd.read_csv("../../results/irm/lpq_coverage_lpq0.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_lpq0["repetition"].nunique() == 1 +n_rep_lpq0 = df_lpq0["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_lpq = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq0, + filters={"level": 0.95}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq0, + filters={"level": 0.9}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### Local Y(1) - Quantile @@ -386,83 +261,43 @@ make_pretty(df_ate_9, level, n_rep) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/lpq_coverage_lpq1.csv", index_col=None) +# set up data +df_lpq1 = pd.read_csv("../../results/irm/lpq_coverage_lpq1.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_lpq1["repetition"].nunique() == 1 +n_rep_lpq1 = df_lpq1["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +# display_columns_lpq is the same as for Local Y(0) ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq1, + filters={"level": 0.95}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq1, + filters={"level": 0.9}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## CVaR Effects -```{python} -#| echo: false - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - The results are based on a location-scale model as described the corresponding [Example](https://docs.doubleml.org/stable/examples/py_double_ml_cvar.html) with $5,000$ observations. Remark that the process is not linear. The non-uniform results (coverage, ci length and bias) refer to averaged values over all quantiles (point-wise confidende intervals). @@ -481,105 +316,81 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/cvar_coverage_qte.csv", index_col=None) +# set up data +df_cvar_qte = pd.read_csv("../../results/irm/cvar_coverage_qte.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_cvar_qte["repetition"].nunique() == 1 +n_rep_cvar_qte = df_cvar_qte["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_cvar_qte = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_qte, + filters={"level": 0.95}, + display_cols=display_columns_cvar_qte, + n_rep=n_rep_cvar_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_qte, + filters={"level": 0.9}, + display_cols=display_columns_cvar_qte, + n_rep=n_rep_cvar_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## CVaR Potential Quantiles -```{python} -#| echo: false - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - ### CVaR Y(0) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/cvar_coverage_pq0.csv", index_col=None) +# set up data +df_cvar_pq0 = pd.read_csv("../../results/irm/cvar_coverage_pq0.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_cvar_pq0["repetition"].nunique() == 1 +n_rep_cvar_pq0 = df_cvar_pq0["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_cvar_pq = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq0, + filters={"level": 0.95}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq0, + filters={"level": 0.9}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### CVaR Y(1) @@ -587,27 +398,37 @@ make_pretty(df_ate_9, level, n_rep) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/cvar_coverage_pq1.csv", index_col=None) +# set up data +df_cvar_pq1 = pd.read_csv("../../results/irm/cvar_coverage_pq1.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_cvar_pq1["repetition"].nunique() == 1 +n_rep_cvar_pq1 = df_cvar_pq1["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +# display_columns_cvar_pq is the same as for CVaR Y(0) ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq1, + filters={"level": 0.95}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq1, + filters={"level": 0.9}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ```