diff --git a/doc/_quarto-dev.yml b/doc/_quarto-dev.yml index 11c7caf..5c3587a 100644 --- a/doc/_quarto-dev.yml +++ b/doc/_quarto-dev.yml @@ -6,6 +6,7 @@ metadata-files: - _website.yml website: + site-url: https://docs.doubleml.org/doubleml-coverage/dev/ drafts: - index.qmd # IRM @@ -14,6 +15,7 @@ website: - irm/irm_cate.qmd - irm/apo.qmd - irm/qte.qmd + - irm/iivm.qmd # PLM - plm/plr.qmd - plm/plr_gate.qmd diff --git a/doc/_website.yml b/doc/_website.yml index 4beb151..e400e91 100644 --- a/doc/_website.yml +++ b/doc/_website.yml @@ -1,6 +1,7 @@ website: title: "DoubleML Coverage" favicon: _static/favicon.ico + site-url: https://docs.doubleml.org/doubleml-coverage/ search: location: sidebar sidebar: @@ -17,6 +18,7 @@ website: - irm/irm_cate.qmd - irm/apo.qmd - irm/qte.qmd + - irm/iivm.qmd - text: "PLM" menu: - plm/plr.qmd diff --git a/doc/did/did_cs.qmd b/doc/did/did_cs.qmd index 90055cc..eab72ec 100644 --- a/doc/did/did_cs.qmd +++ b/doc/did/did_cs.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATTE Coverage @@ -91,20 +53,27 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### Experimental Score @@ -113,18 +82,24 @@ Remark that the only two valid DGPs are DGP $5$ and DGP $6$. All other DGPs are ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/did/did_multi.qmd b/doc/did/did_multi.qmd index fc5131e..2751420 100644 --- a/doc/did/did_multi.qmd +++ b/doc/did/did_multi.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATTE Coverage @@ -97,20 +59,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` @@ -120,20 +88,26 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Aggregated Effects @@ -148,10 +122,10 @@ The non-uniform results (coverage, ci length and bias) refer to averaged values #| echo: false # set up data -df = pd.read_csv("../../results/did/did_multi_group.csv", index_col=None) +df_group = pd.read_csv("../../results/did/did_multi_group.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_group["repetition"].nunique() == 1 +n_rep_group = df_group["repetition"].unique()[0] display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` @@ -160,20 +134,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` #### Experimental Score @@ -182,20 +162,26 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_group, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_group, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ### Time Effects @@ -204,10 +190,10 @@ make_pretty(df_ate_9, level, n_rep) #| echo: false # set up data -df = pd.read_csv("../../results/did/did_multi_time.csv", index_col=None) +df_time = pd.read_csv("../../results/did/did_multi_time.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_time["repetition"].nunique() == 1 +n_rep_time = df_time["repetition"].unique()[0] display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` @@ -216,20 +202,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` #### Experimental Score @@ -238,20 +230,26 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_time, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_time, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ### Event Study Aggregation @@ -260,10 +258,10 @@ make_pretty(df_ate_9, level, n_rep) #| echo: false # set up data -df = pd.read_csv("../../results/did/did_multi_eventstudy.csv", index_col=None) +df_es = pd.read_csv("../../results/did/did_multi_eventstudy.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_es["repetition"].nunique() == 1 +n_rep_es = df_es["repetition"].unique()[0] display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` @@ -272,20 +270,26 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` #### Experimental Score @@ -295,18 +299,24 @@ The results are only valid for the DGP 6, as the experimental score assumes a ra ```{python} #| echo: false -score = "experimental" -level = 0.95 - -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_es, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep_es, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/did/did_pa.qmd b/doc/did/did_pa.qmd index 7435731..94f16ed 100644 --- a/doc/did/did_pa.qmd +++ b/doc/did/did_pa.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATTE Coverage @@ -91,20 +53,28 @@ display_columns = ["Learner g", "Learner m", "DGP", "In-sample-norm.", "Bias", " ```{python} #| echo: false -score = "observational" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "observational" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "observational"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -114,18 +84,26 @@ Remark that the only two valid DGPs are DGP $5$ and DGP $6$. All other DGPs are ```{python} #| echo: false -score = "experimental" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "experimental" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["Score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9, "Score": "experimental"}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/irm/apo.qmd b/doc/irm/apo.qmd index 9e1787a..3b8607a 100644 --- a/doc/irm/apo.qmd +++ b/doc/irm/apo.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## APO Pointwise Coverage @@ -78,31 +40,41 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_apo_coverage_apo.csv", index_col=None) +# set up data +df_apo = pd.read_csv("../../results/irm/irm_apo_coverage_apo.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_apo["repetition"].nunique() == 1 +n_rep_apo = df_apo["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Treatment Level", "Bias", "CI Length", "Coverage"] +display_columns_apo = ["Learner g", "Learner m", "Treatment Level", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_apo, + filters={"level": 0.95}, + display_cols=display_columns_apo, + n_rep=n_rep_apo, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_apo, + filters={"level": 0.9}, + display_cols=display_columns_apo, + n_rep=n_rep_apo, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -126,80 +98,40 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - -```{python} -#| echo: false - -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_apo_coverage_apos.csv", index_col=None) +# set up data +df_apos = pd.read_csv("../../results/irm/irm_apo_coverage_apos.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_apos["repetition"].nunique() == 1 +n_rep_apos = df_apos["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_apos = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_apos, + filters={"level": 0.95}, + display_cols=display_columns_apos, + n_rep=n_rep_apos, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_apos, + filters={"level": 0.9}, + display_cols=display_columns_apos, + n_rep=n_rep_apos, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Causal Contrast Coverage @@ -222,28 +154,38 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_apo_coverage_apos_contrast.csv", index_col=None) +# set up data +df_contrast = pd.read_csv("../../results/irm/irm_apo_coverage_apos_contrast.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_contrast["repetition"].nunique() == 1 +n_rep_contrast = df_contrast["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_contrast = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_contrast, + filters={"level": 0.95}, + display_cols=display_columns_contrast, + n_rep=n_rep_contrast, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_contrast, + filters={"level": 0.9}, + display_cols=display_columns_contrast, + n_rep=n_rep_contrast, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/irm/iivm.qmd b/doc/irm/iivm.qmd index c74277b..00f4184 100644 --- a/doc/irm/iivm.qmd +++ b/doc/irm/iivm.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## LATE Coverage @@ -91,16 +53,26 @@ display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/irm/irm.qmd b/doc/irm/irm.qmd index a62fff5..01fae21 100644 --- a/doc/irm/irm.qmd +++ b/doc/irm/irm.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -78,31 +40,41 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_ate_coverage.csv", index_col=None) +# set up data +df_ate_cov = pd.read_csv("../../results/irm/irm_ate_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_ate_cov["repetition"].nunique() == 1 +n_rep_ate_cov = df_ate_cov["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_ate_cov = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_cov, + filters={"level": 0.95}, + display_cols=display_columns_ate_cov, + n_rep=n_rep_ate_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_cov, + filters={"level": 0.9}, + display_cols=display_columns_ate_cov, + n_rep=n_rep_ate_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -124,31 +96,41 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_atte_coverage.csv", index_col=None) +# set up data +df_atte_cov = pd.read_csv("../../results/irm/irm_atte_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_atte_cov["repetition"].nunique() == 1 +n_rep_atte_cov = df_atte_cov["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_atte_cov = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_atte_95 = df[df['level'] == level][display_columns] -make_pretty(df_atte_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_cov, + filters={"level": 0.95}, + display_cols=display_columns_atte_cov, + n_rep=n_rep_atte_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_atte_9 = df[df['level'] == level][display_columns] -make_pretty(df_atte_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_cov, + filters={"level": 0.9}, + display_cols=display_columns_atte_cov, + n_rep=n_rep_atte_cov, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## Sensitivity @@ -158,62 +140,6 @@ The simulations are based on the the ADD-DGP with $10,000$ observations. As the The confounding is set such that both sensitivity parameters are approximately $cf_y=cf_d=0.1$, such that the robustness value $RV$ should be approximately $10\%$. Further, the corresponding confidence intervals are one-sided (since the direction of the bias is unkown), such that only one side should approximate the corresponding coverage level (here only the lower coverage is relevant since the bias is positive). Remark that for the coverage level the value of $\rho$ has to be correctly specified, such that the coverage level will be generally (significantly) larger than the nominal level under the conservative choice of $|\rho|=1$. -```{python} -#| echo: false - -import numpy as np -import pandas as pd -from itables import init_notebook_mode, show, options - -init_notebook_mode(all_interactive=True) - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Coverage (Lower)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Coverage (Lower)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Coverage (Lower)"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Coverage (Lower)"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - ### ATE ::: {.callout-note title="Metadata" collapse="true"} @@ -231,33 +157,41 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/irm/irm_ate_sensitivity.csv", index_col=None) +df_ate_sens = pd.read_csv("../../results/irm/irm_ate_sensitivity.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_ate_sens["repetition"].nunique() == 1 +n_rep_ate_sens = df_ate_sens["repetition"].unique()[0] -display_columns = [ +display_columns_ate_sens = [ "Learner g", "Learner m", "Bias", "Bias (Lower)", "Bias (Upper)", "Coverage", "Coverage (Lower)", "Coverage (Upper)", "RV", "RVa"] +rename_map_sens = {"Learner g": "Learner l"} +coverage_highlight_cols_sens = ["Coverage", "Coverage (Lower)"] ``` ```{python} #| echo: false -score = "partialling out" -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_sens, + filters={"level": 0.95}, + display_cols=display_columns_ate_sens, + n_rep=n_rep_ate_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_ate_sens, + filters={"level": 0.9}, + display_cols=display_columns_ate_sens, + n_rep=n_rep_ate_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` ### ATTE @@ -276,32 +210,38 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/irm_atte_sensitivity.csv", index_col=None) +# set up data +df_atte_sens = pd.read_csv("../../results/irm/irm_atte_sensitivity.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_atte_sens["repetition"].nunique() == 1 +n_rep_atte_sens = df_atte_sens["repetition"].unique()[0] -display_columns = [ +display_columns_atte_sens = [ "Learner g", "Learner m", "Bias", "Bias (Lower)", "Bias (Upper)", "Coverage", "Coverage (Lower)", "Coverage (Upper)", "RV", "RVa"] ``` ```{python} #| echo: false -score = "partialling out" -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_sens, + filters={"level": 0.95}, + display_cols=display_columns_atte_sens, + n_rep=n_rep_atte_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_atte_sens, + filters={"level": 0.9}, + display_cols=display_columns_atte_sens, + n_rep=n_rep_atte_sens, + level_col="level", + rename_map=rename_map_sens, + coverage_highlight_cols=coverage_highlight_cols_sens +) ``` diff --git a/doc/irm/irm_cate.qmd b/doc/irm/irm_cate.qmd index e9c4c49..52b5abb 100644 --- a/doc/irm/irm_cate.qmd +++ b/doc/irm/irm_cate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## CATE Coverage @@ -93,15 +55,25 @@ display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "U ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/irm/irm_gate.qmd b/doc/irm/irm_gate.qmd index d4e6f36..c552771 100644 --- a/doc/irm/irm_gate.qmd +++ b/doc/irm/irm_gate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## GATE Coverage @@ -92,15 +54,25 @@ display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "U ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/irm/qte.qmd b/doc/irm/qte.qmd index 1974942..afce285 100644 --- a/doc/irm/qte.qmd +++ b/doc/irm/qte.qmd @@ -8,55 +8,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## QTE @@ -79,105 +41,82 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/pq_coverage_qte.csv", index_col=None) +# set up data +df_qte = pd.read_csv("../../results/irm/pq_coverage_qte.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_qte["repetition"].nunique() == 1 +n_rep_qte = df_qte["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_qte = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_qte, + filters={"level": 0.95}, + display_cols=display_columns_qte, + n_rep=n_rep_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_qte, + filters={"level": 0.9}, + display_cols=display_columns_qte, + n_rep=n_rep_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Potential Quantiles -```{python} -#| echo: false - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` ### Y(0) - Quantile ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/pq_coverage_pq0.csv", index_col=None) +# set up data +df_pq0 = pd.read_csv("../../results/irm/pq_coverage_pq0.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_pq0["repetition"].nunique() == 1 +n_rep_pq0 = df_pq0["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_pq = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq0, + filters={"level": 0.95}, + display_cols=display_columns_pq, + n_rep=n_rep_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq0, + filters={"level": 0.9}, + display_cols=display_columns_pq, + n_rep=n_rep_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### Y(1) - Quantile @@ -186,82 +125,42 @@ make_pretty(df_ate_9, level, n_rep) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/irm/pq_coverage_pq1.csv", index_col=None) +df_pq1 = pd.read_csv("../../results/irm/pq_coverage_pq1.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_pq1["repetition"].nunique() == 1 +n_rep_pq1 = df_pq1["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +# display_columns_pq is the same as for Y(0) ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq1, + filters={"level": 0.95}, + display_cols=display_columns_pq, + n_rep=n_rep_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_pq1, + filters={"level": 0.9}, + display_cols=display_columns_pq, + n_rep=n_rep_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## LQTE -```{python} -#| echo: false - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - The results are based on a location-scale model as described the corresponding [Example](https://docs.doubleml.org/stable/examples/py_double_ml_pq.html#Local-Potential-Quantiles-(LPQs)) with $10,000$ observations. The non-uniform results (coverage, ci length and bias) refer to averaged values over all quantiles (point-wise confidende intervals). @@ -280,105 +179,81 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/lpq_coverage_lqte.csv", index_col=None) +# set up data +df_lqte = pd.read_csv("../../results/irm/lpq_coverage_lqte.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_lqte["repetition"].nunique() == 1 +n_rep_lqte = df_lqte["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_lqte = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_lqte, + filters={"level": 0.95}, + display_cols=display_columns_lqte, + n_rep=n_rep_lqte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_lqte, + filters={"level": 0.9}, + display_cols=display_columns_lqte, + n_rep=n_rep_lqte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## Local Potential Quantiles -```{python} -#| echo: false - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - ### Local Y(0) - Quantile ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/lpq_coverage_lpq0.csv", index_col=None) +# set up data +df_lpq0 = pd.read_csv("../../results/irm/lpq_coverage_lpq0.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_lpq0["repetition"].nunique() == 1 +n_rep_lpq0 = df_lpq0["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_lpq = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq0, + filters={"level": 0.95}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq0, + filters={"level": 0.9}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### Local Y(1) - Quantile @@ -386,83 +261,43 @@ make_pretty(df_ate_9, level, n_rep) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/lpq_coverage_lpq1.csv", index_col=None) +# set up data +df_lpq1 = pd.read_csv("../../results/irm/lpq_coverage_lpq1.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_lpq1["repetition"].nunique() == 1 +n_rep_lpq1 = df_lpq1["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +# display_columns_lpq is the same as for Local Y(0) ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq1, + filters={"level": 0.95}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_lpq1, + filters={"level": 0.9}, + display_cols=display_columns_lpq, + n_rep=n_rep_lpq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## CVaR Effects -```{python} -#| echo: false - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - The results are based on a location-scale model as described the corresponding [Example](https://docs.doubleml.org/stable/examples/py_double_ml_cvar.html) with $5,000$ observations. Remark that the process is not linear. The non-uniform results (coverage, ci length and bias) refer to averaged values over all quantiles (point-wise confidende intervals). @@ -481,105 +316,81 @@ print(metadata_df.T.to_string(header=False)) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/cvar_coverage_qte.csv", index_col=None) +# set up data +df_cvar_qte = pd.read_csv("../../results/irm/cvar_coverage_qte.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_cvar_qte["repetition"].nunique() == 1 +n_rep_cvar_qte = df_cvar_qte["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_cvar_qte = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_qte, + filters={"level": 0.95}, + display_cols=display_columns_cvar_qte, + n_rep=n_rep_cvar_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_qte, + filters={"level": 0.9}, + display_cols=display_columns_cvar_qte, + n_rep=n_rep_cvar_qte, + level_col="level", + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ## CVaR Potential Quantiles -```{python} -#| echo: false - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` - ### CVaR Y(0) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/cvar_coverage_pq0.csv", index_col=None) +# set up data +df_cvar_pq0 = pd.read_csv("../../results/irm/cvar_coverage_pq0.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_cvar_pq0["repetition"].nunique() == 1 +n_rep_cvar_pq0 = df_cvar_pq0["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_cvar_pq = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq0, + filters={"level": 0.95}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq0, + filters={"level": 0.9}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq0, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ### CVaR Y(1) @@ -587,27 +398,37 @@ make_pretty(df_ate_9, level, n_rep) ```{python} #| echo: false -# set up data and rename columns -df = pd.read_csv("../../results/irm/cvar_coverage_pq1.csv", index_col=None) +# set up data +df_cvar_pq1 = pd.read_csv("../../results/irm/cvar_coverage_pq1.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_cvar_pq1["repetition"].nunique() == 1 +n_rep_cvar_pq1 = df_cvar_pq1["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +# display_columns_cvar_pq is the same as for CVaR Y(0) ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq1, + filters={"level": 0.95}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cvar_pq1, + filters={"level": 0.9}, + display_cols=display_columns_cvar_pq, + n_rep=n_rep_cvar_pq1, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/plm/pliv.qmd b/doc/plm/pliv.qmd index 6c4d2a9..65d87f2 100644 --- a/doc/plm/pliv.qmd +++ b/doc/plm/pliv.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## LATE Coverage @@ -79,34 +41,46 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/pliv_late_coverage.csv", index_col=None) +df_coverage_pliv = pd.read_csv("../../results/plm/pliv_late_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +if "repetition" in df_coverage_pliv.columns and df_coverage_pliv["repetition"].nunique() == 1: + n_rep_pliv = df_coverage_pliv["repetition"].unique()[0] +elif "n_rep" in df_coverage_pliv.columns and df_coverage_pliv["n_rep"].nunique() == 1: + n_rep_pliv = df_coverage_pliv["n_rep"].unique()[0] +else: + n_rep_pliv = "N/A" -display_columns = ["Learner g", "Learner m", "Learner r", "Bias", "CI Length", "Coverage"] +display_columns_pliv = ["Learner g", "Learner m", "Learner r", "Bias", "CI Length", "Coverage"] ``` ### Partialling out ```{python} #| echo: false -score = "partialling out" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.95, "score": "partialling out"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.90, "score": "partialling out"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ### IV-type @@ -115,18 +89,26 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -score = "IV-type" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.95, "score": "IV-type"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "IV-type" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage_pliv, + filters={"level": 0.9, "score": "IV-type"}, + display_cols=display_columns_pliv, + n_rep=n_rep_pliv, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/plm/plr.qmd b/doc/plm/plr.qmd index 31300f5..a5ba041 100644 --- a/doc/plm/plr.qmd +++ b/doc/plm/plr.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -79,34 +41,46 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_ate_coverage.csv", index_col=None) +df_coverage = pd.read_csv("../../results/plm/plr_ate_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +if "repetition" in df_coverage.columns and df_coverage["repetition"].nunique() == 1: + n_rep_coverage = df_coverage["repetition"].unique()[0] +elif "n_rep" in df_coverage.columns and df_coverage["n_rep"].nunique() == 1: + n_rep_coverage = df_coverage["n_rep"].unique()[0] +else: + n_rep_coverage = "N/A" # Fallback if n_rep cannot be determined -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] +display_columns_coverage = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage"] ``` ### Partialling out ```{python} # | echo: false -score = "partialling out" -level = 0.95 -df_ate_95 = df[(df["level"] == level) & (df["score"] == score)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.95, "score": "partialling out"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.9, "score": "partialling out"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage"] +) ``` ### IV-type @@ -115,20 +89,28 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -score = "IV-type" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.95, "score": "IV-type"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "IV-type" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_coverage, + filters={"level": 0.9, "score": "IV-type"}, + display_cols=display_columns_coverage, + n_rep=n_rep_coverage, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ## ATE Sensitivity @@ -142,9 +124,9 @@ Further, the corresponding confidence intervals are one-sided (since the directi ```{python} #| echo: false -metadata_file = '../../results/plm/plr_ate_sensitivity_metadata.csv' -metadata_df = pd.read_csv(metadata_file) -print(metadata_df.T.to_string(header=False)) +metadata_file_sens = '../../results/plm/plr_ate_sensitivity_metadata.csv' +metadata_df_sens = pd.read_csv(metadata_file_sens) +print(metadata_df_sens.T.to_string(header=False)) ``` ::: @@ -153,91 +135,48 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_ate_sensitivity.csv", index_col=None) +df_sensitivity = pd.read_csv("../../results/plm/plr_ate_sensitivity.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +if "repetition" in df_sensitivity.columns and df_sensitivity["repetition"].nunique() == 1: + n_rep_sensitivity = df_sensitivity["repetition"].unique()[0] +elif "n_rep" in df_sensitivity.columns and df_sensitivity["n_rep"].nunique() == 1: + n_rep_sensitivity = df_sensitivity["n_rep"].unique()[0] +else: + n_rep_sensitivity = "N/A" -display_columns = [ +display_columns_sensitivity = [ "Learner g", "Learner m", "Bias", "Bias (Lower)", "Bias (Upper)", "Coverage", "Coverage (Lower)", "Coverage (Upper)", "RV", "RVa"] ``` -```{python} -#| echo: false - -import numpy as np -import pandas as pd -from itables import init_notebook_mode, show, options - -init_notebook_mode(all_interactive=True) - -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Coverage (Upper)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Coverage (Upper)"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Coverage (Upper)"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Coverage (Upper)"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) -``` ### Partialling out ```{python} #| echo: false -score = "partialling out" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.95, "score": "partialling out"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +#| +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.9, "score": "partialling out"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] +) ``` ### IV-type @@ -246,18 +185,26 @@ For the IV-type score, the learners `ml_l` and `ml_g` are both set to the same t ```{python} #| echo: false -score = "IV-type" -level = 0.95 -df_ate_95 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.95, "score": "IV-type"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] +) ``` ```{python} #| echo: false -score = "IV-type" -level = 0.9 -df_ate_9 = df[(df['level'] == level) & (df["score"] == score)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_sensitivity, + filters={"level": 0.9, "score": "IV-type"}, + display_cols=display_columns_sensitivity, + n_rep=n_rep_sensitivity, + level_col="level", + coverage_highlight_cols=["Coverage", "Coverage (Upper)"] +) ``` diff --git a/doc/plm/plr_cate.qmd b/doc/plm/plr_cate.qmd index 57be897..f62fbaf 100644 --- a/doc/plm/plr_cate.qmd +++ b/doc/plm/plr_cate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## CATE Coverage @@ -81,28 +43,44 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_cate_coverage.csv", index_col=None) +df_cate = pd.read_csv("../../results/plm/plr_cate_coverage.csv", index_col=None) # Renamed to df_cate + +# Your existing logic for n_rep is fine, just using the new df_cate name +if "repetition" in df_cate.columns and df_cate["repetition"].nunique() == 1: + n_rep_cate = df_cate["repetition"].unique()[0] +elif "n_rep" in df_cate.columns and df_cate["n_rep"].nunique() == 1: + n_rep_cate = df_cate["n_rep"].unique()[0] +else: + n_rep_cate = "N/A" -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_cate = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_cate, + filters={"level": 0.95}, + display_cols=display_columns_cate, + n_rep=n_rep_cate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_cate, + filters={"level": 0.9}, + display_cols=display_columns_cate, + n_rep=n_rep_cate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/plm/plr_gate.qmd b/doc/plm/plr_gate.qmd index 8f5490d..0f0c105 100644 --- a/doc/plm/plr_gate.qmd +++ b/doc/plm/plr_gate.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage", "Uniform Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage", "Uniform Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage", "Uniform Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## GATE Coverage @@ -81,28 +43,43 @@ print(metadata_df.T.to_string(header=False)) #| echo: false # set up data and rename columns -df = pd.read_csv("../../results/plm/plr_gate_coverage.csv", index_col=None) +df_gate = pd.read_csv("../../results/plm/plr_gate_coverage.csv", index_col=None) # Renamed to df_gate for clarity + +if "repetition" in df_gate.columns and df_gate["repetition"].nunique() == 1: + n_rep_gate = df_gate["repetition"].unique()[0] +elif "n_rep" in df_gate.columns and df_gate["n_rep"].nunique() == 1: # Check for n_rep as well + n_rep_gate = df_gate["n_rep"].unique()[0] +else: + n_rep_gate = "N/A" # Fallback if n_rep cannot be determined -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] -display_columns = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] +display_columns_gate = ["Learner g", "Learner m", "Bias", "CI Length", "Coverage", "Uniform CI Length", "Uniform Coverage"] ``` ```{python} #| echo: false -level = 0.95 -df_ate_95 = df[df['level'] == level][display_columns] -df_ate_95.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_gate, + filters={"level": 0.95}, + display_cols=display_columns_gate, + n_rep=n_rep_gate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 -df_ate_9 = df[df['level'] == level][display_columns] -df_ate_9.rename(columns={"Learner g": "Learner l"}, inplace=True) -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_gate, + filters={"level": 0.9}, + display_cols=display_columns_gate, + n_rep=n_rep_gate, + level_col="level", + rename_map={"Learner g": "Learner l"}, + coverage_highlight_cols=["Coverage", "Uniform Coverage"] +) ``` diff --git a/doc/rdd/rdd.qmd b/doc/rdd/rdd.qmd index 7e083f5..4a74fb7 100644 --- a/doc/rdd/rdd.qmd +++ b/doc/rdd/rdd.qmd @@ -10,55 +10,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## Sharp Design @@ -80,28 +42,36 @@ print(metadata_df.T.to_string(header=False)) # | echo: false # set up data and rename columns -df = pd.read_csv("../../results/rdd/rdd_sharp_coverage.csv", index_col=None) +df_sharp = pd.read_csv("../../results/rdd/rdd_sharp_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_sharp["repetition"].nunique() == 1 +n_rep_sharp = df_sharp["repetition"].unique()[0] -display_columns = ["Method", "Learner g", "fs specification", "Bias", "CI Length", "Coverage"] +display_columns_sharp = ["Method", "Learner g", "fs specification", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_sharp, + filters={"level": 0.95}, + display_cols=display_columns_sharp, + n_rep=n_rep_sharp, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_sharp, + filters={"level": 0.9}, + display_cols=display_columns_sharp, + n_rep=n_rep_sharp, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` @@ -124,26 +94,34 @@ print(metadata_df.T.to_string(header=False)) # | echo: false # set up data and rename columns -df = pd.read_csv("../../results/rdd/rdd_fuzzy_coverage.csv", index_col=None) +df_fuzzy = pd.read_csv("../../results/rdd/rdd_fuzzy_coverage.csv", index_col=None) -assert df["repetition"].nunique() == 1 -n_rep = df["repetition"].unique()[0] +assert df_fuzzy["repetition"].nunique() == 1 +n_rep_fuzzy = df_fuzzy["repetition"].unique()[0] -display_columns = ["Method", "Learner g", "Learner m", "fs specification", "Bias", "CI Length", "Coverage"] +display_columns_fuzzy = ["Method", "Learner g", "Learner m", "fs specification", "Bias", "CI Length", "Coverage"] ``` ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df_fuzzy, + filters={"level": 0.95}, + display_cols=display_columns_fuzzy, + n_rep=n_rep_fuzzy, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df_fuzzy, + filters={"level": 0.9}, + display_cols=display_columns_fuzzy, + n_rep=n_rep_fuzzy, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/ssm/ssm_mar.qmd b/doc/ssm/ssm_mar.qmd index 1ffa222..65a1871 100644 --- a/doc/ssm/ssm_mar.qmd +++ b/doc/ssm/ssm_mar.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -90,17 +52,24 @@ display_columns = ["Learner g", "Learner m", "Learner pi", "Bias", "CI Length", ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -score = "partialling out" -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/ssm/ssm_nonignorable.qmd b/doc/ssm/ssm_nonignorable.qmd index 3afb9b9..f0a807d 100644 --- a/doc/ssm/ssm_nonignorable.qmd +++ b/doc/ssm/ssm_nonignorable.qmd @@ -9,55 +9,17 @@ jupyter: python3 import numpy as np import pandas as pd -from itables import init_notebook_mode, show, options +from itables import init_notebook_mode +import os +import sys -init_notebook_mode(all_interactive=True) +doc_dir = os.path.abspath(os.path.join(os.getcwd(), "..")) +if doc_dir not in sys.path: + sys.path.append(doc_dir) + +from utils.style_tables import generate_and_show_styled_table -def highlight_range(s, level=0.95, dist=0.05, props=''): - color_grid = np.where((s >= level-dist) & - (s <= level+dist), props, '') - return color_grid - - -def color_coverage(df, level): - # color coverage column order is important - styled_df = df.apply( - highlight_range, - level=level, - dist=1.0, - props='color:black;background-color:red', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.1, - props='color:black;background-color:yellow', - subset=["Coverage"]) - styled_df = styled_df.apply( - highlight_range, - level=level, - dist=0.05, - props='color:white;background-color:darkgreen', - subset=["Coverage"]) - - # set all coverage values to bold - styled_df = styled_df.set_properties( - **{'font-weight': 'bold'}, - subset=["Coverage"]) - return styled_df - - -def make_pretty(df, level, n_rep): - styled_df = df.style.hide(axis="index") - # Format only float columns - float_cols = df.select_dtypes(include=['float']).columns - styled_df = styled_df.format({col: "{:.3f}" for col in float_cols}) - - # color coverage column order is important - styled_df = color_coverage(styled_df, level) - caption = f"Coverage for {level*100}%-Confidence Interval over {n_rep} Repetitions" - - return show(styled_df, caption=caption, allow_html=True) +init_notebook_mode(all_interactive=True) ``` ## ATE Coverage @@ -90,16 +52,24 @@ display_columns = ["Learner g", "Learner m", "Learner pi", "Bias", "CI Length", ```{python} #| echo: false -level = 0.95 - -df_ate_95 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_95, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.95}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` ```{python} #| echo: false -level = 0.9 - -df_ate_9 = df[(df['level'] == level)][display_columns] -make_pretty(df_ate_9, level, n_rep) +generate_and_show_styled_table( + main_df=df, + filters={"level": 0.9}, + display_cols=display_columns, + n_rep=n_rep, + level_col="level", + coverage_highlight_cols=["Coverage"] +) ``` diff --git a/doc/utils/style_tables.py b/doc/utils/style_tables.py new file mode 100644 index 0000000..5d73466 --- /dev/null +++ b/doc/utils/style_tables.py @@ -0,0 +1,231 @@ +import numpy as np +import pandas as pd +from pandas.io.formats.style import Styler +from typing import Union, Optional, List, Any +from itables import show + + +# Define highlighting tiers as a list of dictionaries or tuples +# Each element defines: dist, props. Applied in order (later rules can override). +# Order: from least specific (largest dist) to most specific (smallest dist) +# or ensure the _apply_highlight_range logic correctly handles overlaps if props are different. +# Current logic: more specific (smaller dist) rules are applied last and override. +HIGHLIGHT_TIERS = [ + {"dist": 1.0, "props": "color:black;background-color:red;"}, + {"dist": 0.1, "props": "color:black;background-color:yellow;"}, + {"dist": 0.05, "props": "color:white;background-color:darkgreen;"}, +] + + +def _apply_highlight_range( + s_col: pd.Series, level: float, dist: float, props: str +) -> np.ndarray: + """ + Helper function for Styler.apply. Applies CSS properties based on a numeric range. + Returns an array of CSS strings. + """ + s_numeric = pd.to_numeric( + s_col, errors="coerce" + ) # Convert to numeric, non-convertibles become NaN + # Apply style ONLY if value is WITHIN the current dist from level + # This means for tiered styling, the order of applying styles in the calling function matters. + # If a value falls into multiple dist categories, the LAST applied style for that dist will win. + condition = (s_numeric >= level - dist) & (s_numeric <= level + dist) + return np.where(condition, props, "") + + +def color_coverage_columns( + styler: Styler, level: float, coverage_cols: list[str] = ["Coverage"] +) -> Styler: + """ + Applies tiered highlighting to specified coverage columns of a Styler object. + The order of application matters: more specific (narrower dist) rules are applied last to override. + """ + if not isinstance(styler, Styler): + raise TypeError("Expected a pandas Styler object.") + + # Ensure coverage_cols is a list + if isinstance(coverage_cols, str): + coverage_cols = [coverage_cols] + + # Filter for columns that actually exist in the DataFrame being styled + valid_coverage_cols = [col for col in coverage_cols if col in styler.data.columns] + + if not valid_coverage_cols: + return styler # No valid columns to style + + # Apply highlighting rules from the defined tiers + # The order in HIGHLIGHT_TIERS is important if props are meant to override. + # Pandas Styler.apply applies styles sequentially. If a cell matches multiple + # conditions from different .apply calls, the styles from later calls typically override + # or merge with earlier ones, depending on the CSS properties. + # For background-color, later calls will override. + current_styler = styler + for tier in HIGHLIGHT_TIERS: + current_styler = current_styler.apply( + _apply_highlight_range, + level=level, + dist=tier["dist"], + props=tier["props"], + subset=valid_coverage_cols, + ) + + # Set font to bold for the coverage columns + current_styler = current_styler.set_properties( + **{"font-weight": "bold"}, subset=valid_coverage_cols + ) + return current_styler + + +def create_styled_table( + df: pd.DataFrame, + level: float, + n_rep: Union[int, str], + caption_prefix: str = "Coverage", + coverage_cols: List[str] = ["Coverage"], + float_precision: str = "{:.3f}", +) -> Styler: + """ + Creates a styled pandas DataFrame (Styler object) for display. + - Hides the DataFrame index. + - Formats float columns to a specified precision. + - Applies conditional highlighting to coverage columns. + - Sets a descriptive caption. + """ + if not isinstance(df, pd.DataFrame): + return pd.DataFrame({"Error": ["Input is not a DataFrame."]}).style.hide( + axis="index" + ) + + if df.empty: + empty_df_cols = df.columns if df.columns.tolist() else ["Info"] + message_val = ( + ["No data to display."] + if not df.columns.tolist() + else [None] * len(empty_df_cols) + ) + df_to_style = pd.DataFrame( + ( + dict(zip(empty_df_cols, [[v] for v in message_val])) + if not df.columns.tolist() + else {} # Pass empty dict for empty DataFrame with columns + ), + columns=empty_df_cols, + ) + return df_to_style.style.hide(axis="index").set_caption("No data to display.") + + # Prepare float formatting dictionary + float_cols = df.select_dtypes(include=["float", "float64", "float32"]).columns + format_dict = {col: float_precision for col in float_cols if col in df.columns} + + # Create and set the caption text + level_percent = level * 100 + if abs(level_percent - round(level_percent)) < 1e-9: + level_display = f"{int(round(level_percent))}" + else: + level_display = f"{level_percent:.1f}" + + n_rep_display = str(n_rep) # Ensure n_rep is a string for the caption + + caption_text = f"{caption_prefix} for {level_display}%-Confidence Interval over {n_rep_display} Repetitions" + + # Chain Styler methods + styled_df = ( + df.style.hide(axis="index") + .format( + format_dict if format_dict else None + ) # Pass None if no float cols to format + .pipe(color_coverage_columns, level=level, coverage_cols=coverage_cols) + .set_caption(caption_text) + ) + + return styled_df + + +def generate_and_show_styled_table( + main_df: pd.DataFrame, + filters: dict[str, Any], + display_cols: List[str], + n_rep: Union[int, str], + level_col: str = "level", + rename_map: Optional[dict[str, str]] = None, + caption_prefix: str = "Coverage", + coverage_highlight_cols: List[str] = ["Coverage"], + float_precision: str = "{:.3f}", +): + """ + Filters a DataFrame based on a dictionary of conditions, + creates a styled table, and displays it. + """ + if main_df.empty: + print("Warning: Input DataFrame is empty.") + # Optionally, show an empty table or a message + empty_styled_df = ( + pd.DataFrame(columns=display_cols) + .style.hide(axis="index") + .set_caption("No data available (input empty).") + ) + show(empty_styled_df, allow_html=True) + return + + # Build filter condition + current_df = main_df + filter_conditions = [] + filter_description_parts = [] + + for col, value in filters.items(): + if col not in current_df.columns: + print( + f"Warning: Filter column '{col}' not found in DataFrame. Skipping this filter." + ) + continue + current_df = current_df[current_df[col] == value] + filter_conditions.append(f"{col} == {value}") + filter_description_parts.append(f"{col}='{value}'") + + filter_description = " & ".join(filter_description_parts) + + if current_df.empty: + level_val = filters.get(level_col, "N/A") + level_percent_display = ( + f"{level_val*100}%" if isinstance(level_val, (int, float)) else level_val + ) + caption_msg = f"No data after filtering for {filter_description} at {level_percent_display} level." + print(f"Warning: {caption_msg}") + empty_styled_df = ( + pd.DataFrame(columns=display_cols) + .style.hide(axis="index") + .set_caption(caption_msg) + ) + show(empty_styled_df, allow_html=True) + return + + df_filtered = current_df[ + display_cols + ].copy() # Select display columns after filtering + + if rename_map: + df_filtered.rename(columns=rename_map, inplace=True) + + # Determine the level for styling from the filters, if present + styling_level = filters.get(level_col) + if styling_level is None or not isinstance(styling_level, (float, int)): + print( + f"Warning: '{level_col}' not found in filters or is not numeric. Cannot determine styling level for highlighting." + ) + # Fallback or raise error, for now, we'll proceed without level-specific caption part if it's missing + # Or you could try to infer it if there's only one unique level in the filtered data + if level_col in df_filtered.columns and df_filtered[level_col].nunique() == 1: + styling_level = df_filtered[level_col].iloc[0] + else: # Default to a common value or skip styling that depends on 'level' + styling_level = 0.95 # Default, or handle error + + styled_table = create_styled_table( + df_filtered, + styling_level, # Use the level from filters for styling + n_rep, + caption_prefix=caption_prefix, + coverage_cols=coverage_highlight_cols, + float_precision=float_precision, + ) + show(styled_table, allow_html=True)