From ac858cdc4b8e0b6a86d70e9db4980934912b6e26 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 2 Jun 2025 14:20:28 +0200 Subject: [PATCH 01/41] add a cross-sectional dgp --- doubleml/did/datasets/dgp_did_cs_CS2021.py | 190 +++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 doubleml/did/datasets/dgp_did_cs_CS2021.py diff --git a/doubleml/did/datasets/dgp_did_cs_CS2021.py b/doubleml/did/datasets/dgp_did_cs_CS2021.py new file mode 100644 index 00000000..95119b94 --- /dev/null +++ b/doubleml/did/datasets/dgp_did_cs_CS2021.py @@ -0,0 +1,190 @@ +import numpy as np + +from doubleml.did.datasets.dgp_did_CS2021 import make_did_CS2021 + +# Based on https://doi.org/10.1016/j.jeconom.2020.12.001 (see Appendix SC) +# and https://d2cml-ai.github.io/csdid/examples/csdid_basic.html#Examples-with-simulated-data +# Cross-sectional version of the data generating process (DGP) for Callaway and Sant'Anna (2021) + + +def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambda_t=0.5, time_type="datetime", **kwargs): + """ + Generate synthetic repeated cross-sectional data for difference-in-differences analysis based on + Callaway and Sant'Anna (2021). + + This function creates repeated cross-sectional data with heterogeneous treatment effects across time periods and groups. + The data includes pre-treatment periods, multiple treatment groups that receive treatment at different times, + and optionally a never-treated group that serves as a control. The true average treatment effect on the + treated (ATT) has a heterogeneous structure dependent on covariates and exposure time. + + The data generating process offers six variations (``dgp_type`` 1-6) that differ in how the regression features + and propensity score features are derived: + + - DGP 1: Outcome and propensity score are linear (in Z) + - DGP 2: Outcome is linear, propensity score is nonlinear + - DGP 3: Outcome is nonlinear, propensity score is linear + - DGP 4: Outcome and propensity score are nonlinear + - DGP 5: Outcome is linear, propensity score is constant (experimental setting) + - DGP 6: Outcome is nonlinear, propensity score is constant (experimental setting) + + Let :math:`X= (X_1, X_2, X_3, X_4)^T \\sim \\mathcal{N}(0, \\Sigma)`, where :math:`\\Sigma` is a matrix with entries + :math:`\\Sigma_{kj} = c^{|j-k|}`. The default value is :math:`c = 0`, corresponding to the identity matrix. + + Further, define :math:`Z_j = (\\tilde{Z_j} - \\mathbb{E}[\\tilde{Z}_j]) / \\sqrt{\\text{Var}(\\tilde{Z}_j)}`, + where :math:`\\tilde{Z}_1 = \\exp(0.5 \\cdot X_1)`, :math:`\\tilde{Z}_2 = 10 + X_2/(1 + \\exp(X_1))`, + :math:`\\tilde{Z}_3 = (0.6 + X_1 \\cdot X_3 / 25)^3` and :math:`\\tilde{Z}_4 = (20 + X_2 + X_4)^2`. + + For a feature vector :math:`W=(W_1, W_2, W_3, W_4)^T` (either X or Z based on ``dgp_type``), the core functions are: + + 1. Time-varying outcome regression function for each time period :math:`t`: + + .. math:: + + f_{reg,t}(W) = 210 + \\frac{t}{T} \\cdot (27.4 \\cdot W_1 + 13.7 \\cdot W_2 + 13.7 \\cdot W_3 + 13.7 \\cdot W_4) + + 2. Group-specific propensity function for each treatment group :math:`g`: + + .. math:: + + f_{ps,g}(W) = \\xi \\cdot \\left(1-\\frac{g}{G}\\right) \\cdot + (-W_1 + 0.5 \\cdot W_2 - 0.25 \\cdot W_3 - 0.2\\cdot W_4) + + where :math:`T` is the number of time periods, :math:`G` is the number of treatment groups, and :math:`\\xi` is a + scale parameter (default: 0.9). + + The panel data model is defined with the following components: + + 1. Time effects: :math:`\\delta_t = t` for time period :math:`t` + + 2. Individual effects: :math:`\\eta_i \\sim \\mathcal{N}(g_i, 1)` where :math:`g_i` is unit :math:`i`'s treatment group + + 3. Treatment effects: For a unit in treatment group :math:`g`, the effect in period :math:`t` is: + + .. math:: + + \\theta_{i,t,g} = \\max(t - t_g + 1, 0) + 0.1 \\cdot X_{i,1} \\cdot \\max(t - t_g + 1, 0) + + where :math:`t_g` is the first treatment period for group :math:`g`, :math:`X_{i,1}` is the first covariate for unit + :math:`i`, and :math:`\\max(t - t_g + 1, 0)` represents the exposure time (0 for pre-treatment periods). + + 4. Potential outcomes for unit :math:`i` in period :math:`t`: + + .. math:: + + Y_{i,t}(0) &= f_{reg,t}(W_{reg}) + \\delta_t + \\eta_i + \\varepsilon_{i,0,t} + + Y_{i,t}(1) &= Y_{i,t}(0) + \\theta_{i,t,g} + (\\varepsilon_{i,1,t} - \\varepsilon_{i,0,t}) + + where :math:`\\varepsilon_{i,0,t}, \\varepsilon_{i,1,t} \\sim \\mathcal{N}(0, 1)`. + + 5. Observed outcomes: + + .. math:: + + Y_{i,t} = Y_{i,t}(1) \\cdot 1\\{t \\geq t_g\\} + Y_{i,t}(0) \\cdot 1\\{t < t_g\\} + + 6. Treatment assignment: + + For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is: + + .. math:: + + P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))} + + For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability: + + .. math:: + + P(G_i = g) = \\frac{1}{G} \\text{ for all } g + + 7. Steps 1-6 generate panel data. To obtain repeated cross-sectional data, the number of generated indivials is increased + to `n_obs/lambda_t`, where `lambda_t` denotes the pobability to observe a unit at each time period (time constant). + for each + + + The variables :math:`W_{reg}` and :math:`W_{ps}` are selected based on the DGP type: + + .. math:: + + DGP1:\\quad W_{reg} &= Z \\quad W_{ps} = Z + + DGP2:\\quad W_{reg} &= Z \\quad W_{ps} = X + + DGP3:\\quad W_{reg} &= X \\quad W_{ps} = Z + + DGP4:\\quad W_{reg} &= X \\quad W_{ps} = X + + DGP5:\\quad W_{reg} &= Z \\quad W_{ps} = 0 + + DGP6:\\quad W_{reg} &= X \\quad W_{ps} = 0 + + where settings 5-6 correspond to experimental designs with equal probability across treatment groups. + + + Parameters + ---------- + n_obs : int, default=1000 + The number of observations to simulate. + + dgp_type : int, default=1 + The data generating process to be used (1-6). + + include_never_treated : bool, default=True + Whether to include units that are never treated. + + lambda_t : float, default=0.5 + Probability of observing a unit at each time period. + + time_type : str, default="datetime" + Type of time variable. Either "datetime" or "float". + + **kwargs + Additional keyword arguments. Accepts the following parameters: + + `c` (float, default=0.0): + Parameter for correlation structure in X. + + `dim_x` (int, default=4): + Dimension of feature vectors. + + `xi` (float, default=0.9): + Scale parameter for the propensity score function. + + `n_periods` (int, default=5): + Number of time periods. + + `anticipation_periods` (int, default=0): + Number of periods before treatment where anticipation effects occur. + + `n_pre_treat_periods` (int, default=2): + Number of pre-treatment periods. + + `start_date` (str, default="2025-01"): + Start date for datetime time variables. + + Returns + ------- + pandas.DataFrame + DataFrame containing the simulated panel data. + + References + ---------- + Callaway, B. and Sant’Anna, P. H. (2021), + Difference-in-Differences with multiple time periods. Journal of Econometrics, 225(2), 200-230. + doi:`10.1016/j.jeconom.2020.12.001 `_. + """ + + n_obs_panel = int(np.ceil(n_obs / lambda_t)) + df_panel = make_did_CS2021( + n_obs=n_obs_panel, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + time_type=time_type, + **kwargs, + ) + + # for each time period, randomly select units to observe + observed_units = np.random.binomial(1, lambda_t, size=(len(df_panel.index))) + df_repeated_cs = df_panel[observed_units == 1].copy() + + return df_repeated_cs From 10e532e79600cced091cf471c729269ba7b7b983 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 2 Jun 2025 14:21:04 +0200 Subject: [PATCH 02/41] add simple test cases for cross sectional dgp --- doubleml/did/datasets/__init__.py | 2 ++ doubleml/did/tests/test_datasets.py | 54 ++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/doubleml/did/datasets/__init__.py b/doubleml/did/datasets/__init__.py index aaa5fc0a..306e7b10 100644 --- a/doubleml/did/datasets/__init__.py +++ b/doubleml/did/datasets/__init__.py @@ -3,9 +3,11 @@ """ from .dgp_did_CS2021 import make_did_CS2021 +from .dgp_did_cs_CS2021 import make_did_cs_CS2021 from .dgp_did_SZ2020 import make_did_SZ2020 __all__ = [ "make_did_SZ2020", "make_did_CS2021", + "make_did_cs_CS2021", ] diff --git a/doubleml/did/tests/test_datasets.py b/doubleml/did/tests/test_datasets.py index 0e323ec9..54eb4074 100644 --- a/doubleml/did/tests/test_datasets.py +++ b/doubleml/did/tests/test_datasets.py @@ -3,7 +3,7 @@ import pytest from doubleml import DoubleMLData -from doubleml.did.datasets import make_did_CS2021, make_did_SZ2020 +from doubleml.did.datasets import make_did_CS2021, make_did_cs_CS2021, make_did_SZ2020 msg_inv_return_type = "Invalid return_type." @@ -77,3 +77,55 @@ def test_make_did_CS2021_exceptions(): msg = r"time_type must be one of \('datetime', 'float'\). Got 2." with pytest.raises(ValueError, match=msg): _ = make_did_CS2021(n_obs=100, time_type=2) + + +@pytest.fixture(scope="function", params=[0.5, 0.1]) +def lambda_t(request): + return request.param + + +@pytest.mark.ci +def test_make_did_cs_CS2021_return_types(dgp_type, include_never_treated, lambda_t, time_type, anticipation_periods): + np.random.seed(3141) + df = make_did_cs_CS2021( + n_obs=100, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + lambda_t=lambda_t, + time_type=time_type, + anticipation_periods=anticipation_periods, + ) + assert isinstance(df, pd.DataFrame) + + +@pytest.mark.ci +def test_panel_vs_cs_make_did_CS2021(dgp_type, include_never_treated, time_type, anticipation_periods): + np.random.seed(3141) + df_cs = make_did_cs_CS2021( + n_obs=100, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + lambda_t=1.0, + time_type=time_type, + anticipation_periods=anticipation_periods, + ) + + np.random.seed(3141) + df_panel = make_did_CS2021( + n_obs=100, + dgp_type=dgp_type, + include_never_treated=include_never_treated, + time_type=time_type, + anticipation_periods=anticipation_periods, + ) + + # check if df_cs close to df_panel + assert df_cs.shape[0] == df_panel.shape[0] + # Select numerical columns + df_cs_numeric = df_cs.select_dtypes(include=np.number) + df_panel_numeric = df_panel.select_dtypes(include=np.number) + + # Ensure the same numerical columns are being compared, in the same order + pd.testing.assert_index_equal(df_cs_numeric.columns, df_panel_numeric.columns) + + assert np.allclose(df_cs_numeric.values, df_panel_numeric.values, atol=1e-5, rtol=1e-5) From c96605d28b6628c7d1bcf32c8fee0e0f8609b171 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Tue, 3 Jun 2025 13:28:48 +0200 Subject: [PATCH 03/41] reset index for in panel data --- doubleml/data/panel_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py index f548ae6a..4e416183 100644 --- a/doubleml/data/panel_data.py +++ b/doubleml/data/panel_data.py @@ -106,6 +106,8 @@ def __init__( force_all_x_finite=force_all_x_finite, force_all_d_finite=False, ) + # reset index to ensure a simple RangeIndex + self.data.reset_index(drop=True, inplace=True) if self.n_treat != 1: raise ValueError("Only one treatment column is allowed for panel data.") From 61dbf11470ca1f97be57196ac8c2b03e83ed94f6 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Tue, 3 Jun 2025 13:29:30 +0200 Subject: [PATCH 04/41] add basic did_cs_binary version with simple tests --- doubleml/did/__init__.py | 2 + doubleml/did/did_cs_binary.py | 592 ++++++++++++++++++ ...test_did_cs_binary_external_predictions.py | 92 +++ ...test_did_cs_binary_vs_did_cs_two_period.py | 163 +++++ 4 files changed, 849 insertions(+) create mode 100644 doubleml/did/did_cs_binary.py create mode 100644 doubleml/did/tests/test_did_cs_binary_external_predictions.py create mode 100644 doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py diff --git a/doubleml/did/__init__.py b/doubleml/did/__init__.py index 354ffaa5..369353ef 100644 --- a/doubleml/did/__init__.py +++ b/doubleml/did/__init__.py @@ -6,6 +6,7 @@ from .did_aggregation import DoubleMLDIDAggregation from .did_binary import DoubleMLDIDBinary from .did_cs import DoubleMLDIDCS +from .did_cs_binary import DoubleMLDIDCSBinary from .did_multi import DoubleMLDIDMulti __all__ = [ @@ -13,5 +14,6 @@ "DoubleMLDID", "DoubleMLDIDCS", "DoubleMLDIDBinary", + "DoubleMLDIDCSBinary", "DoubleMLDIDMulti", ] diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py new file mode 100644 index 00000000..ce57384c --- /dev/null +++ b/doubleml/did/did_cs_binary.py @@ -0,0 +1,592 @@ +import warnings + +import numpy as np +from sklearn.utils import check_X_y + +from doubleml.data.panel_data import DoubleMLPanelData +from doubleml.did.utils._did_utils import ( + _check_anticipation_periods, + _check_control_group, + _check_gt_combination, + _check_gt_values, + _get_id_positions, + _get_never_treated_value, + _is_never_treated, + _set_id_positions, +) +from doubleml.double_ml import DoubleML +from doubleml.double_ml_score_mixins import LinearScoreMixin +from doubleml.utils._checks import ( + _check_bool, + _check_finite_predictions, + _check_is_propensity, + _check_score, + _check_trimming, +) +from doubleml.utils._estimation import _dml_cv_predict, _get_cond_smpls_2d +from doubleml.utils._propensity_score import _trimm + + +class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML): + + def __init__( + self, + obj_dml_data, + g_value, + t_value_pre, + t_value_eval, + ml_g, + ml_m=None, + control_group="never_treated", + anticipation_periods=0, + n_folds=5, + n_rep=1, + score="observational", + in_sample_normalization=True, + trimming_rule="truncate", + trimming_threshold=1e-2, + draw_sample_splitting=True, + print_periods=False, + ): + super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False) + + self._check_data(self._dml_data) + g_values = self._dml_data.g_values + t_values = self._dml_data.t_values + + _check_bool(print_periods, "print_periods") + self._print_periods = print_periods + self._control_group = _check_control_group(control_group) + self._never_treated_value = _get_never_treated_value(g_values) + self._anticipation_periods = _check_anticipation_periods(anticipation_periods) + + _check_gt_combination( + (g_value, t_value_pre, t_value_eval), g_values, t_values, self.never_treated_value, self.anticipation_periods + ) + self._g_value = g_value + self._t_value_pre = t_value_pre + self._t_value_eval = t_value_eval + + # check if post_treatment evaluation + if g_value <= t_value_eval: + post_treatment = True + else: + post_treatment = False + + self._post_treatment = post_treatment + + if self._print_periods: + print( + f"Evaluation of ATT({g_value}, {t_value_eval}), with pre-treatment period {t_value_pre},\n" + + f"post-treatment: {post_treatment}. Control group: {control_group}.\n" + ) + + # Preprocess data + self._data_subset = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval) + + # Handling id values to match pairwise evaluation & simultaneous inference + if not np.all(np.isin(self.data_subset.index, self._dml_data.data.index)): + raise ValueError("The index values in the data subset are not a subset of the original index values.") + + # Find position of data subset in original data + # These entries should be replaced by nuisance predictions, all others should be set to 0. + self._id_positions = self.data_subset.index + + # Numeric values for positions of the entries in id_panel_data inside id_original + # np.nonzero(np.isin(id_original, id_panel_data)) + self._n_subset = self.data_subset.shape[0] + self._n_obs = self._n_subset # Effective sample size used for resampling + + # Save x and y for later ML estimation + self._x_data = self.data_subset.loc[:, self._dml_data.x_cols].values + self._y_data = self.data_subset.loc[:, self._dml_data.y_col].values + self._g_data = self.data_subset.loc[:, "G_indicator"].values + self._t_data = self.data_subset.loc[:, "t_indicator"].values + + valid_scores = ["observational", "experimental"] + _check_score(self.score, valid_scores, allow_callable=False) + + self._in_sample_normalization = in_sample_normalization + if not isinstance(self.in_sample_normalization, bool): + raise TypeError( + "in_sample_normalization indicator has to be boolean. " + + f"Object of type {str(type(self.in_sample_normalization))} passed." + ) + + # set stratication for resampling + self._strata = self.data_subset["G_indicator"] + 2 * self.data_subset["t_indicator"] + if draw_sample_splitting: + self.draw_sample_splitting() + + # check learners + ml_g_is_classifier = self._check_learner(ml_g, "ml_g", regressor=True, classifier=True) + if self.score == "observational": + _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) + self._learner = {"ml_g": ml_g, "ml_m": ml_m} + else: + assert self.score == "experimental" + if ml_m is not None: + warnings.warn( + ( + 'A learner ml_m has been provided for score = "experimental" but will be ignored. ' + "A learner ml_m is not required for estimation." + ) + ) + self._learner = {"ml_g": ml_g} + + if ml_g_is_classifier: + if obj_dml_data.binary_outcome: + self._predict_method = {"ml_g": "predict_proba"} + else: + raise ValueError( + f"The ml_g learner {str(ml_g)} was identified as classifier " + "but the outcome variable is not binary with values 0 and 1." + ) + else: + self._predict_method = {"ml_g": "predict"} + + if "ml_m" in self._learner: + self._predict_method["ml_m"] = "predict_proba" + self._initialize_ml_nuisance_params() + + self._trimming_rule = trimming_rule + self._trimming_threshold = trimming_threshold + _check_trimming(self._trimming_rule, self._trimming_threshold) + + self._sensitivity_implemented = False + self._external_predictions_implemented = True + + @property + def g_value(self): + """ + The value indicating the treatment group (first period with treatment). + """ + return self._g_value + + @property + def t_value_eval(self): + """ + The value indicating the evaluation period. + """ + return self._t_value_eval + + @property + def t_value_pre(self): + """ + The value indicating the pre-treatment period. + """ + return self._t_value_pre + + @property + def never_treated_value(self): + """ + The value indicating that a unit was never treated. + """ + return self._never_treated_value + + @property + def post_treatment(self): + """ + Indicates whether the evaluation period is after the treatment period. + """ + return self._post_treatment + + @property + def control_group(self): + """ + The control group. + """ + return self._control_group + + @property + def anticipation_periods(self): + """ + The number of anticipation periods. + """ + return self._anticipation_periods + + @property + def data_subset(self): + """ + The preprocessed data subset. + """ + return self._data_subset + + @property + def id_positions(self): + """ + The positions of the id values in the original data. + """ + return self._id_positions + + @property + def in_sample_normalization(self): + """ + Indicates whether the in sample normalization of weights are used. + """ + return self._in_sample_normalization + + @property + def trimming_rule(self): + """ + Specifies the used trimming rule. + """ + return self._trimming_rule + + @property + def trimming_threshold(self): + """ + Specifies the used trimming threshold. + """ + return self._trimming_threshold + + @property + def n_obs(self): + """ + The number of observations used for estimation. + """ + return self._n_subset + + def _initialize_ml_nuisance_params(self): + if self.score == "observational": + valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"] + else: + assert self.score == "experimental" + valid_learner = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner} + + def _check_data(self, obj_dml_data): + if not isinstance(obj_dml_data, DoubleMLPanelData): + raise TypeError( + "For repeated outcomes the data must be of DoubleMLPanelData type. " + f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed." + ) + if obj_dml_data.z_cols is not None: + raise NotImplementedError( + "Incompatible data. " + " and ".join(obj_dml_data.z_cols) + " have been set as instrumental variable(s). " + "At the moment there are not DiD models with instruments implemented." + ) + + one_treat = obj_dml_data.n_treat == 1 + if not (one_treat): + raise ValueError( + "Incompatible data. " + "To fit an DID model with DML " + "exactly one variable needs to be specified as treatment variable." + ) + _check_gt_values(obj_dml_data.g_values, obj_dml_data.t_values) + return + + def _preprocess_data(self, g_value, pre_t, eval_t): + data = self._dml_data.data + + t_col = self._dml_data.t_col + id_col = self._dml_data.id_col + g_col = self._dml_data.g_col + + # relevant data subset + data_subset_indicator = data[t_col].isin([pre_t, eval_t]) + data_subset = data[data_subset_indicator].sort_values(by=[id_col, t_col]) + + # Construct G (treatment group) indicating treatment period in g + G_indicator = (data_subset[g_col] == g_value).astype(int) + + # Construct C (control group) indicating never treated or not yet treated + never_treated = _is_never_treated(data_subset[g_col], self.never_treated_value).reshape(-1) + if self.control_group == "never_treated": + C_indicator = never_treated.astype(int) + + elif self.control_group == "not_yet_treated": + # adjust max_g_value for anticipation periods + t_values = self._dml_data.t_values + max_g_value = t_values[min(np.where(t_values == eval_t)[0][0] + self.anticipation_periods, len(t_values) - 1)] + # not in G just as a additional check + later_treated = (data_subset[g_col] > max_g_value) & (G_indicator == 0) + not_yet_treated = never_treated | later_treated + C_indicator = not_yet_treated.astype(int) + + if np.sum(C_indicator) == 0: + raise ValueError("No observations in the control group.") + + data_subset = data_subset.assign(C_indicator=C_indicator, G_indicator=G_indicator) + # reduce to relevant subset + data_subset = data_subset[(data_subset["G_indicator"] == 1) | (data_subset["C_indicator"] == 1)] + # check if G and C are disjoint + assert sum(G_indicator & C_indicator) == 0 + + # add time indicator + data_subset = data_subset.assign(t_indicator=data_subset[t_col] == eval_t) + return data_subset + + def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): + + # Here: d is a binary treatment indicator + x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False) + _, d = check_X_y(x, self._g_data, force_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data, force_all_finite=False) + + # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold + # nuisance estimates of the uncond. treatment prob. + p_hat = np.full_like(d, d.mean(), dtype="float64") + lambda_hat = np.full_like(t, t.mean(), dtype="float64") + + # nuisance g + smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) + + # nuisance g for d==0 & t==0 + if external_predictions["ml_g_d0_t0"] is not None: + ml_g_d0_t0_targets = np.full_like(y, np.nan, dtype="float64") + ml_g_d0_t0_targets[((d == 0) & (t == 0))] = y[((d == 0) & (t == 0))] + ml_d0_t0_pred = _get_id_positions(external_predictions["ml_g_d0_t0"], self.id_positions) + g_hat_d0_t0 = {"preds": ml_d0_t0_pred, "targets": ml_g_d0_t0_targets, "models": None} + else: + g_hat_d0_t0 = _dml_cv_predict( + self._learner["ml_g"], + x, + y, + smpls_d0_t0, + n_jobs=n_jobs_cv, + est_params=self._get_params("ml_g_d0_t0"), + method=self._predict_method["ml_g"], + return_models=return_models, + ) + + _check_finite_predictions(g_hat_d0_t0["preds"], self._learner["ml_g"], "ml_g", smpls) + # adjust target values to consider only compatible subsamples + g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float) + g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan + + # nuisance g for d==0 & t==1 + if external_predictions["ml_g_d0_t1"] is not None: + ml_g_d0_t1_targets = np.full_like(y, np.nan, dtype="float64") + ml_g_d0_t1_targets[((d == 0) & (t == 1))] = y[((d == 0) & (t == 1))] + ml_d0_t1_pred = _get_id_positions(external_predictions["ml_g_d0_t1"], self.id_positions) + g_hat_d0_t1 = {"preds": ml_d0_t1_pred, "targets": ml_g_d0_t1_targets, "models": None} + else: + g_hat_d0_t1 = _dml_cv_predict( + self._learner["ml_g"], + x, + y, + smpls_d0_t1, + n_jobs=n_jobs_cv, + est_params=self._get_params("ml_g_d0_t1"), + method=self._predict_method["ml_g"], + return_models=return_models, + ) + + _check_finite_predictions(g_hat_d0_t1["preds"], self._learner["ml_g"], "ml_g", smpls) + # adjust target values to consider only compatible subsamples + g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float) + g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan + + # nuisance g for d==1 & t==0 + if external_predictions["ml_g_d1_t0"] is not None: + ml_g_d1_t0_targets = np.full_like(y, np.nan, dtype="float64") + ml_g_d1_t0_targets[((d == 1) & (t == 0))] = y[((d == 1) & (t == 0))] + ml_d1_t0_pred = _get_id_positions(external_predictions["ml_g_d1_t0"], self.id_positions) + g_hat_d1_t0 = {"preds": ml_d1_t0_pred, "targets": ml_g_d1_t0_targets, "models": None} + else: + g_hat_d1_t0 = _dml_cv_predict( + self._learner["ml_g"], + x, + y, + smpls_d1_t0, + n_jobs=n_jobs_cv, + est_params=self._get_params("ml_g_d1_t0"), + method=self._predict_method["ml_g"], + return_models=return_models, + ) + + _check_finite_predictions(g_hat_d1_t0["preds"], self._learner["ml_g"], "ml_g", smpls) + # adjust target values to consider only compatible subsamples + g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float) + g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan + + # nuisance g for d==1 & t==1 + if external_predictions["ml_g_d1_t1"] is not None: + ml_g_d1_t1_targets = np.full_like(y, np.nan, dtype="float64") + ml_g_d1_t1_targets[((d == 1) & (t == 1))] = y[((d == 1) & (t == 1))] + ml_d1_t1_pred = _get_id_positions(external_predictions["ml_g_d1_t1"], self.id_positions) + g_hat_d1_t1 = {"preds": ml_d1_t1_pred, "targets": ml_g_d1_t1_targets, "models": None} + else: + g_hat_d1_t1 = _dml_cv_predict( + self._learner["ml_g"], + x, + y, + smpls_d1_t1, + n_jobs=n_jobs_cv, + est_params=self._get_params("ml_g_d1_t1"), + method=self._predict_method["ml_g"], + return_models=return_models, + ) + + _check_finite_predictions(g_hat_d1_t1["preds"], self._learner["ml_g"], "ml_g", smpls) + # adjust target values to consider only compatible subsamples + g_hat_d1_t1["targets"] = g_hat_d1_t1["targets"].astype(float) + g_hat_d1_t1["targets"][np.invert((d == 1) & (t == 1))] = np.nan + + # only relevant for observational setting + m_hat = {"preds": None, "targets": None, "models": None} + if self.score == "observational": + # nuisance m + if external_predictions["ml_m"] is not None: + ml_m_pred = _get_id_positions(external_predictions["ml_m"], self.id_positions) + m_hat = {"preds": ml_m_pred, "targets": d, "models": None} + else: + m_hat = _dml_cv_predict( + self._learner["ml_m"], + x, + d, + smpls=smpls, + n_jobs=n_jobs_cv, + est_params=self._get_params("ml_m"), + method=self._predict_method["ml_m"], + return_models=return_models, + ) + + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) + _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) + m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + psi_a, psi_b = self._score_elements( + y, + d, + t, + g_hat_d0_t0["preds"], + g_hat_d0_t1["preds"], + g_hat_d1_t0["preds"], + g_hat_d1_t1["preds"], + m_hat["preds"], + p_hat, + lambda_hat, + ) + + extend_kwargs = { + "n_obs": self._dml_data.data.shape[0], + "id_positions": self.id_positions, + } + psi_elements = { + "psi_a": _set_id_positions(psi_a, fill_value=0.0, **extend_kwargs), + "psi_b": _set_id_positions(psi_b, fill_value=0.0, **extend_kwargs), + } + preds = { + "predictions": { + "ml_g_d0_t0": _set_id_positions(g_hat_d0_t0["preds"], fill_value=np.nan, **extend_kwargs), + "ml_g_d0_t1": _set_id_positions(g_hat_d0_t1["preds"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t0": _set_id_positions(g_hat_d1_t0["preds"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t1": _set_id_positions(g_hat_d1_t1["preds"], fill_value=np.nan, **extend_kwargs), + "ml_m": _set_id_positions(m_hat["preds"], fill_value=np.nan, **extend_kwargs), + }, + "targets": { + "ml_g_d0_t0": _set_id_positions(g_hat_d0_t0["targets"], fill_value=np.nan, **extend_kwargs), + "ml_g_d0_t1": _set_id_positions(g_hat_d0_t1["targets"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t0": _set_id_positions(g_hat_d1_t0["targets"], fill_value=np.nan, **extend_kwargs), + "ml_g_d1_t1": _set_id_positions(g_hat_d1_t1["targets"], fill_value=np.nan, **extend_kwargs), + "ml_m": _set_id_positions(m_hat["targets"], fill_value=np.nan, **extend_kwargs), + }, + "models": { + "ml_g_d0_t0": g_hat_d0_t0["models"], + "ml_g_d0_t1": g_hat_d0_t1["models"], + "ml_g_d1_t0": g_hat_d1_t0["models"], + "ml_g_d1_t1": g_hat_d1_t1["models"], + "ml_m": m_hat["models"], + }, + } + + return psi_elements, preds + + def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_d1_t1, m_hat, p_hat, lambda_hat): + # calculate residuals + resid_d0_t0 = y - g_hat_d0_t0 + resid_d0_t1 = y - g_hat_d0_t1 + resid_d1_t0 = y - g_hat_d1_t0 + resid_d1_t1 = y - g_hat_d1_t1 + + d1t1 = np.multiply(d, t) + d1t0 = np.multiply(d, 1.0 - t) + d0t1 = np.multiply(1.0 - d, t) + d0t0 = np.multiply(1.0 - d, 1.0 - t) + + if self.score == "observational": + if self.in_sample_normalization: + weight_psi_a = np.divide(d, np.mean(d)) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.mean(d1t1)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.mean(d1t0)) + + prop_weighting = np.divide(m_hat, 1.0 - m_hat) + unscaled_d0_t1 = np.multiply(d0t1, prop_weighting) + weight_resid_d0_t1 = -1.0 * np.divide(unscaled_d0_t1, np.mean(unscaled_d0_t1)) + + unscaled_d0_t0 = np.multiply(d0t0, prop_weighting) + weight_resid_d0_t0 = np.divide(unscaled_d0_t0, np.mean(unscaled_d0_t0)) + else: + weight_psi_a = np.divide(d, p_hat) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat)) + + prop_weighting = np.divide(m_hat, 1.0 - m_hat) + weight_resid_d0_t1 = -1.0 * np.multiply(np.divide(d0t1, np.multiply(p_hat, lambda_hat)), prop_weighting) + weight_resid_d0_t0 = np.multiply(np.divide(d0t0, np.multiply(p_hat, 1.0 - lambda_hat)), prop_weighting) + else: + assert self.score == "experimental" + if self.in_sample_normalization: + weight_psi_a = np.ones_like(y) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.mean(d1t1)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.mean(d1t0)) + weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.mean(d0t1)) + weight_resid_d0_t0 = np.divide(d0t0, np.mean(d0t0)) + else: + weight_psi_a = np.ones_like(y) + weight_g_d1_t1 = weight_psi_a + weight_g_d1_t0 = -1.0 * weight_psi_a + weight_g_d0_t1 = -1.0 * weight_psi_a + weight_g_d0_t0 = weight_psi_a + + weight_resid_d1_t1 = np.divide(d1t1, np.multiply(p_hat, lambda_hat)) + weight_resid_d1_t0 = -1.0 * np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat)) + weight_resid_d0_t1 = -1.0 * np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat)) + weight_resid_d0_t0 = np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat)) + + # set score elements + psi_a = -1.0 * weight_psi_a + + # psi_b + psi_b_1 = ( + np.multiply(weight_g_d1_t1, g_hat_d1_t1) + + np.multiply(weight_g_d1_t0, g_hat_d1_t0) + + np.multiply(weight_g_d0_t0, g_hat_d0_t0) + + np.multiply(weight_g_d0_t1, g_hat_d0_t1) + ) + psi_b_2 = ( + np.multiply(weight_resid_d1_t1, resid_d1_t1) + + np.multiply(weight_resid_d1_t0, resid_d1_t0) + + np.multiply(weight_resid_d0_t0, resid_d0_t0) + + np.multiply(weight_resid_d0_t1, resid_d0_t1) + ) + + psi_b = psi_b_1 + psi_b_2 + + return psi_a, psi_b + + def _nuisance_tuning( + self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search + ): + pass + + def _sensitivity_element_est(self, preds): + pass diff --git a/doubleml/did/tests/test_did_cs_binary_external_predictions.py b/doubleml/did/tests/test_did_cs_binary_external_predictions.py new file mode 100644 index 00000000..4e09dfe0 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_external_predictions.py @@ -0,0 +1,92 @@ +import math + +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.did import DoubleMLDIDCSBinary +from doubleml.did.datasets import make_did_SZ2020 +from doubleml.tests._utils import draw_smpls +from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def did_score(request): + return request.param + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_did_cs_fixture(did_score, n_rep): + n_obs = 500 + n_folds = 5 + + ext_predictions = {"d": {}} + dml_data = make_did_SZ2020(n_obs=n_obs, return_type="DoubleMLPanelData") + + kwargs = { + "obj_dml_data": dml_data, + "g_value": 1, + "t_value_pre": 0, + "t_value_eval": 1, + "score": did_score, + "n_rep": n_rep, + "draw_sample_splitting": False, + } + + dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + strata = dml_did.data_subset["G_indicator"] + 2 * dml_did.data_subset["t_indicator"] + all_smpls = draw_smpls(2 * n_obs, n_folds, n_rep=n_rep, groups=strata) + dml_did.set_sample_splitting(all_smpls) + + np.random.seed(3141) + dml_did.fit(store_predictions=True) + + all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + for key in all_keys: + ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + + dml_did_ext = DoubleMLDIDCSBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) + dml_did_ext.set_sample_splitting(all_smpls) + np.random.seed(3141) + dml_did_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did.coef[0], + "coef_ext": dml_did_ext.coef[0], + "se": dml_did.se[0], + "se_ext": dml_did_ext.se[0], + "score": dml_did.psi, + "score_ext": dml_did_ext.psi, + "dml_did_nuisance_loss": dml_did.nuisance_loss, + "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss, + } + + return res_dict + + +@pytest.mark.ci +def test_coef(doubleml_did_cs_fixture): + assert math.isclose(doubleml_did_cs_fixture["coef"], doubleml_did_cs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_se(doubleml_did_cs_fixture): + assert math.isclose(doubleml_did_cs_fixture["se"], doubleml_did_cs_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_score(doubleml_did_cs_fixture): + assert np.allclose(doubleml_did_cs_fixture["score"], doubleml_did_cs_fixture["score_ext"], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_nuisance_loss(doubleml_did_cs_fixture): + for key, value in doubleml_did_cs_fixture["dml_did_nuisance_loss"].items(): + assert np.allclose(value, doubleml_did_cs_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3) diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py new file mode 100644 index 00000000..2c8c34f3 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py @@ -0,0 +1,163 @@ +import math + +import numpy as np +import pytest +from sklearn.base import clone +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +from ...tests._utils import draw_smpls +from ._utils_did_cs_manual import fit_did_cs +from ._utils_did_manual import boot_did + + +@pytest.fixture( + scope="module", + params=[ + [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)], + [ + RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42), + ], + ], +) +def learner(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[0.1]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold): + boot_methods = ["normal"] + n_folds = 2 + n_rep_boot = 499 + + # collect data + dml_panel_data = generate_data_did_binary + df = dml_panel_data._data.sort_values(by=["id", "t"]) + + n_obs = df.shape[0] + all_smpls = draw_smpls(n_obs, n_folds) + obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + # Set machine learning methods for m & g + ml_g = clone(learner[0]) + ml_m = clone(learner[1]) + + dml_args = { + "ml_g": ml_g, + "ml_m": ml_m, + "n_folds": n_folds, + "score": score, + "in_sample_normalization": in_sample_normalization, + "trimming_threshold": trimming_threshold, + "draw_sample_splitting": False, + } + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + **dml_args, + ) + + dml_did_obj = dml.DoubleMLDIDCS( + obj_dml_data, + **dml_args, + ) + + # synchronize the sample splitting + dml_did_obj.set_sample_splitting(all_smpls=all_smpls) + dml_did_binary_obj.set_sample_splitting(all_smpls=all_smpls) + + dml_did_obj.fit() + dml_did_binary_obj.fit() + + # manual fit + y = df["y"].values + d = df["d"].values + x = df[["Z1", "Z2", "Z3", "Z4"]].values + t = df["t"].values + + np.random.seed(3141) + res_manual = fit_did_cs( + y, + x, + d, + t, + clone(learner[0]), + clone(learner[1]), + all_smpls, + score, + in_sample_normalization, + trimming_threshold=trimming_threshold, + ) + + res_dict = { + "coef": dml_did_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "coef_manual": res_manual["theta"], + "se": dml_did_obj.se, + "se_binary": dml_did_binary_obj.se, + "se_manual": res_manual["se"], + "nuisance_loss": dml_did_obj.nuisance_loss, + "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss, + "boot_methods": boot_methods, + } + + for bootstrap in boot_methods: + np.random.seed(3141) + boot_t_stat = boot_did( + y, + res_manual["thetas"], + res_manual["ses"], + res_manual["all_psi_a"], + res_manual["all_psi_b"], + all_smpls, + bootstrap, + n_rep_boot, + ) + + np.random.seed(3141) + dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + np.random.seed(3141) + dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1) + + return res_dict + + +@pytest.mark.ci +def test_coefs(dml_did_cs_binary_vs_did_cs_fixture): + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["coef"][0], + dml_did_cs_binary_vs_did_cs_fixture["coef_manual"], + rel_tol=1e-9, + abs_tol=1e-4, + ) + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["coef_binary"][0], + dml_did_cs_binary_vs_did_cs_fixture["coef"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) From ceebc6ee5d462016f8ddaab3e8d0c2f9325665be Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Tue, 3 Jun 2025 14:19:28 +0200 Subject: [PATCH 05/41] add internal atribute _score_dim to DoubleML class --- doubleml/double_ml.py | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 764865a4..0ab80cfa 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -101,6 +101,7 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting): if draw_sample_splitting: self.draw_sample_splitting() + self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs) # initialize arrays according to obj_dml_data and the resampling settings ( self._psi, @@ -1021,9 +1022,7 @@ def _initalize_fit(self, store_predictions, store_models): self._initialize_models() if self._sensitivity_implemented: - self._sensitivity_elements = self._initialize_sensitivity_elements( - (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs) - ) + self._sensitivity_elements = self._initialize_sensitivity_elements(self._score_dim) def _fit_nuisance_and_score_elements(self, n_jobs_cv, store_predictions, external_predictions, store_models): ext_prediction_dict = _set_external_predictions( @@ -1076,30 +1075,26 @@ def _fit_sensitivity_elements(self, nuisance_predictions): def _initialize_arrays(self): # scores - psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - psi_deriv = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - psi_elements = self._initialize_score_elements((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs)) + psi = np.full(self._score_dim, np.nan) + psi_deriv = np.full(self._score_dim, np.nan) + psi_elements = self._initialize_score_elements(self._score_dim) - var_scaling_factors = np.full(self._dml_data.n_treat, np.nan) + n_rep = self._score_dim[1] + n_thetas = self._score_dim[2] + var_scaling_factors = np.full(n_thetas, np.nan) # coefficients and ses - coef = np.full(self._dml_data.n_coefs, np.nan) - se = np.full(self._dml_data.n_coefs, np.nan) + coef = np.full(n_thetas, np.nan) + se = np.full(n_thetas, np.nan) - all_coef = np.full((self._dml_data.n_coefs, self.n_rep), np.nan) - all_se = np.full((self._dml_data.n_coefs, self.n_rep), np.nan) + all_coef = np.full((n_thetas, n_rep), np.nan) + all_se = np.full((n_thetas, n_rep), np.nan) return psi, psi_deriv, psi_elements, var_scaling_factors, coef, se, all_coef, all_se def _initialize_predictions_and_targets(self): - self._predictions = { - learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - for learner in self.params_names - } - self._nuisance_targets = { - learner: np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan) - for learner in self.params_names - } + self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names} + self._nuisance_targets = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names} def _initialize_nuisance_loss(self): self._nuisance_loss = {learner: np.full((self.n_rep, self._dml_data.n_coefs), np.nan) for learner in self.params_names} From ade3b9a451bb0cb20367661773d9a00eb3f9968e Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Tue, 3 Jun 2025 14:25:12 +0200 Subject: [PATCH 06/41] check prediction size based on internal n_obs --- doubleml/double_ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 0ab80cfa..911487a3 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1005,7 +1005,7 @@ def _check_fit(self, n_jobs_cv, store_predictions, external_predictions, store_m external_predictions=external_predictions, valid_treatments=self._dml_data.d_cols, valid_learners=self.params_names, - n_obs=self._dml_data.n_obs, + n_obs=self.n_obs, n_rep=self.n_rep, ) elif not self._external_predictions_implemented and external_predictions is not None: From f113e61e1375f807c039f20b94f777d73e0c6504 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Tue, 3 Jun 2025 14:26:28 +0200 Subject: [PATCH 07/41] update score dimensions init in the cs object --- doubleml/did/did_cs_binary.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index ce57384c..e550eb60 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -50,6 +50,20 @@ def __init__( ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False) + self._n_obs = obj_dml_data.data.shape[0] + self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat) + # reinitialze arrays + ( + self._psi, + self._psi_deriv, + self._psi_elements, + self._var_scaling_factors, + self._coef, + self._se, + self._all_coef, + self._all_se, + ) = self._initialize_arrays() + self._check_data(self._dml_data) g_values = self._dml_data.g_values t_values = self._dml_data.t_values From 9ef4e53f975c5feb577aeab59ce67f63530640cd Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 5 Jun 2025 06:57:05 +0200 Subject: [PATCH 08/41] update lambda and p calculation in did_cs --- doubleml/did/did_cs.py | 8 ++------ doubleml/did/tests/_utils_did_cs_manual.py | 8 ++++---- doubleml/did/tests/_utils_did_manual.py | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index ab2af5b9..5984399c 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -219,14 +219,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold # nuisance estimates of the uncond. treatment prob. - p_hat = np.full_like(d, np.nan, dtype="float64") - for train_index, test_index in smpls: - p_hat[test_index] = np.mean(d[train_index]) + p_hat = np.full_like(d, d.mean(), dtype="float64") # nuisance estimates of the uncond. time prob. - lambda_hat = np.full_like(t, np.nan, dtype="float64") - for train_index, test_index in smpls: - lambda_hat[test_index] = np.mean(t[train_index]) + lambda_hat = np.full_like(t, t.mean(), dtype="float64") # nuisance g smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py index f14a52a0..ce6f8870 100644 --- a/doubleml/did/tests/_utils_did_cs_manual.py +++ b/doubleml/did/tests/_utils_did_cs_manual.py @@ -178,12 +178,12 @@ def fit_nuisance_did_cs( m_hat_list.append(np.zeros_like(g_hat_d1_t1_list[idx], dtype="float64")) p_hat_list = [] - for train_index, _ in smpls: - p_hat_list.append(np.mean(d[train_index])) + for _ in smpls: + p_hat_list.append(np.mean(d)) lambda_hat_list = [] - for train_index, _ in smpls: - lambda_hat_list.append(np.mean(t[train_index])) + for _ in smpls: + lambda_hat_list.append(np.mean(t)) return g_hat_d0_t0_list, g_hat_d0_t1_list, g_hat_d1_t0_list, g_hat_d1_t1_list, m_hat_list, p_hat_list, lambda_hat_list diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py index e314c301..b067e44d 100644 --- a/doubleml/did/tests/_utils_did_manual.py +++ b/doubleml/did/tests/_utils_did_manual.py @@ -104,7 +104,7 @@ def fit_nuisance_did( m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) p_hat_list = [] - for train_index, _ in smpls: + for _ in smpls: p_hat_list.append(np.mean(d)) return g_hat0_list, g_hat1_list, m_hat_list, p_hat_list From e90441b9366f2c46daecd01809575635a56faeb8 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 5 Jun 2025 11:18:11 +0200 Subject: [PATCH 09/41] add _score_dim property to doubleml class --- doubleml/double_ml.py | 57 +++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 911487a3..c2d3727b 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -103,16 +103,7 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting): self._score_dim = (self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs) # initialize arrays according to obj_dml_data and the resampling settings - ( - self._psi, - self._psi_deriv, - self._psi_elements, - self._var_scaling_factors, - self._coef, - self._se, - self._all_coef, - self._all_se, - ) = self._initialize_arrays() + self._initialize_arrays() # initialize instance attributes which are later used for iterating self._i_rep = None @@ -1075,22 +1066,20 @@ def _fit_sensitivity_elements(self, nuisance_predictions): def _initialize_arrays(self): # scores - psi = np.full(self._score_dim, np.nan) - psi_deriv = np.full(self._score_dim, np.nan) - psi_elements = self._initialize_score_elements(self._score_dim) + self._psi = np.full(self._score_dim, np.nan) + self._psi_deriv = np.full(self._score_dim, np.nan) + self._psi_elements = self._initialize_score_elements(self._score_dim) n_rep = self._score_dim[1] n_thetas = self._score_dim[2] - var_scaling_factors = np.full(n_thetas, np.nan) + self._var_scaling_factors = np.full(n_thetas, np.nan) # coefficients and ses - coef = np.full(n_thetas, np.nan) - se = np.full(n_thetas, np.nan) + self._coef = np.full(n_thetas, np.nan) + self._se = np.full(n_thetas, np.nan) - all_coef = np.full((n_thetas, n_rep), np.nan) - all_se = np.full((n_thetas, n_rep), np.nan) - - return psi, psi_deriv, psi_elements, var_scaling_factors, coef, se, all_coef, all_se + self._all_coef = np.full((n_thetas, n_rep), np.nan) + self._all_se = np.full((n_thetas, n_rep), np.nan) def _initialize_predictions_and_targets(self): self._predictions = {learner: np.full(self._score_dim, np.nan) for learner in self.params_names} @@ -1211,7 +1200,7 @@ def evaluate_learners(self, learners=None, metric=_rmse): f"The learners have to be a subset of {str(self.params_names)}. Learners {str(learners)} provided." ) - def draw_sample_splitting(self): + def draw_sample_splitting(self, n_obs=None): """ Draw sample splitting for DoubleML models. @@ -1221,26 +1210,27 @@ def draw_sample_splitting(self): Parameters ---------- n_obs : int or None - The number of observations. If ``None``, the number of observations is set to the number of observations in - the data set. + The number of observations to resample. If ``None``, the number of observations is set to the number + of observations in the data set. Returns ------- self : object """ + if n_obs is None: + n_obs = self.n_obs + if self._is_cluster_data: obj_dml_resampling = DoubleMLClusterResampling( n_folds=self._n_folds_per_cluster, n_rep=self.n_rep, - n_obs=self.n_obs, + n_obs=n_obs, n_cluster_vars=self._dml_data.n_cluster_vars, cluster_vars=self._dml_data.cluster_vars, ) self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples() else: - obj_dml_resampling = DoubleMLResampling( - n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self.n_obs, stratify=self._strata - ) + obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds, n_rep=self.n_rep, n_obs=n_obs, stratify=self._strata) self._smpls = obj_dml_resampling.split_samples() return self @@ -1309,16 +1299,9 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self.n_obs ) - ( - self._psi, - self._psi_deriv, - self._psi_elements, - self._var_scaling_factors, - self._coef, - self._se, - self._all_coef, - self._all_se, - ) = self._initialize_arrays() + # set sample splitting can update the number of repetitions + self._score_dim = (self._score_dim[0], self._n_rep, self._score_dim[2]) + self._initialize_arrays() self._initialize_ml_nuisance_params() return self From 9f6f5d432a9c259344cab0dcc80fc19a7af5ac35 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 5 Jun 2025 16:10:57 +0200 Subject: [PATCH 10/41] add _n_obs_sample_splitting property to doubleml class --- doubleml/double_ml.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index c2d3727b..58b8692a 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -98,6 +98,7 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting): # perform sample splitting self._smpls = None self._smpls_cluster = None + self._n_obs_sample_splitting = self.n_obs if draw_sample_splitting: self.draw_sample_splitting() @@ -1200,37 +1201,30 @@ def evaluate_learners(self, learners=None, metric=_rmse): f"The learners have to be a subset of {str(self.params_names)}. Learners {str(learners)} provided." ) - def draw_sample_splitting(self, n_obs=None): + def draw_sample_splitting(self): """ Draw sample splitting for DoubleML models. The samples are drawn according to the attributes ``n_folds`` and ``n_rep``. - Parameters - ---------- - n_obs : int or None - The number of observations to resample. If ``None``, the number of observations is set to the number - of observations in the data set. - Returns ------- self : object """ - if n_obs is None: - n_obs = self.n_obs - if self._is_cluster_data: obj_dml_resampling = DoubleMLClusterResampling( n_folds=self._n_folds_per_cluster, n_rep=self.n_rep, - n_obs=n_obs, + n_obs=self._n_obs_sample_splitting, n_cluster_vars=self._dml_data.n_cluster_vars, cluster_vars=self._dml_data.cluster_vars, ) self._smpls, self._smpls_cluster = obj_dml_resampling.split_samples() else: - obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds, n_rep=self.n_rep, n_obs=n_obs, stratify=self._strata) + obj_dml_resampling = DoubleMLResampling( + n_folds=self.n_folds, n_rep=self.n_rep, n_obs=self._n_obs_sample_splitting, stratify=self._strata + ) self._smpls = obj_dml_resampling.split_samples() return self From eb951c40ba6c6ed58854a1f6bede79411e2c7efb Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 5 Jun 2025 16:49:51 +0200 Subject: [PATCH 11/41] update check_resampling input --- doubleml/double_ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 58b8692a..d2a7a641 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -1290,7 +1290,7 @@ def set_sample_splitting(self, all_smpls, all_smpls_cluster=None): >>> dml_plr_obj.set_sample_splitting(smpls) """ self._smpls, self._smpls_cluster, self._n_rep, self._n_folds = _check_sample_splitting( - all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self.n_obs + all_smpls, all_smpls_cluster, self._dml_data, self._is_cluster_data, n_obs=self._n_obs_sample_splitting ) # set sample splitting can update the number of repetitions From a6c6507fabe396ac084c1d9825b2fdf6a7850e33 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 5 Jun 2025 16:50:01 +0200 Subject: [PATCH 12/41] update did binary classes with n_obs_subset and n_obs_sample_splitting --- doubleml/did/did_binary.py | 12 ++++++------ doubleml/did/did_cs_binary.py | 19 +++++-------------- .../tests/test_did_binary_control_groups.py | 2 +- .../test_did_binary_external_predictions.py | 2 +- .../did/tests/test_did_binary_vs_did_panel.py | 2 +- 5 files changed, 14 insertions(+), 23 deletions(-) diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index e4d309db..a4876f74 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -171,8 +171,7 @@ def __init__( # Numeric values for positions of the entries in id_panel_data inside id_original # np.nonzero(np.isin(id_original, id_panel_data)) - self._n_subset = self._panel_data_wide.shape[0] - self._n_obs = self._n_subset # Effective sample size used for resampling + self._n_obs_subset = self._panel_data_wide.shape[0] # Effective sample size used for resampling self._n_treated_subset = self._panel_data_wide["G_indicator"].sum() # Save x and y for later ML estimation @@ -192,6 +191,7 @@ def __init__( # set stratication for resampling self._strata = self._panel_data_wide["G_indicator"] + self._n_obs_sample_splitting = self.n_obs_subset if draw_sample_splitting: self.draw_sample_splitting() @@ -244,7 +244,7 @@ def __str__(self): f"Evaluation period: {str(self.t_value_eval)}\n" f"Control group: {str(self.control_group)}\n" f"Anticipation periods: {str(self.anticipation_periods)}\n" - f"Effective sample size: {str(self.n_obs)}\n" + f"Effective sample size: {str(self.n_obs_subset)}\n" ) learner_info = "" for key, value in self.learner.items(): @@ -371,11 +371,11 @@ def trimming_threshold(self): return self._trimming_threshold @property - def n_obs(self): + def n_obs_subset(self): """ The number of observations used for estimation. """ - return self._n_subset + return self._n_obs_subset def _initialize_ml_nuisance_params(self): if self.score == "observational": @@ -713,7 +713,7 @@ def _sensitivity_element_est(self, preds): } # add scaling to make variance estimation consistent (sample size difference) - scaling = self._dml_data.n_obs / self._n_subset + scaling = self._dml_data.n_obs / self._n_obs_subset element_dict = { "sigma2": sigma2, "nu2": nu2, diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index e550eb60..d571e107 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -53,16 +53,7 @@ def __init__( self._n_obs = obj_dml_data.data.shape[0] self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat) # reinitialze arrays - ( - self._psi, - self._psi_deriv, - self._psi_elements, - self._var_scaling_factors, - self._coef, - self._se, - self._all_coef, - self._all_se, - ) = self._initialize_arrays() + self._initialize_arrays() self._check_data(self._dml_data) g_values = self._dml_data.g_values @@ -108,8 +99,7 @@ def __init__( # Numeric values for positions of the entries in id_panel_data inside id_original # np.nonzero(np.isin(id_original, id_panel_data)) - self._n_subset = self.data_subset.shape[0] - self._n_obs = self._n_subset # Effective sample size used for resampling + self._n_obs_subset = self.data_subset.shape[0] # Effective sample size used for resampling # Save x and y for later ML estimation self._x_data = self.data_subset.loc[:, self._dml_data.x_cols].values @@ -129,6 +119,7 @@ def __init__( # set stratication for resampling self._strata = self.data_subset["G_indicator"] + 2 * self.data_subset["t_indicator"] + self._n_obs_sample_splitting = self.n_obs_subset if draw_sample_splitting: self.draw_sample_splitting() @@ -255,11 +246,11 @@ def trimming_threshold(self): return self._trimming_threshold @property - def n_obs(self): + def n_obs_subset(self): """ The number of observations used for estimation. """ - return self._n_subset + return self._n_obs_subset def _initialize_ml_nuisance_params(self): if self.score == "observational": diff --git a/doubleml/did/tests/test_did_binary_control_groups.py b/doubleml/did/tests/test_did_binary_control_groups.py index b8406b15..627cf50a 100644 --- a/doubleml/did/tests/test_did_binary_control_groups.py +++ b/doubleml/did/tests/test_did_binary_control_groups.py @@ -21,7 +21,7 @@ def test_control_groups_different(): dml_did_never_treated = dml.did.DoubleMLDIDBinary(control_group="never_treated", **args) dml_did_not_yet_treated = dml.did.DoubleMLDIDBinary(control_group="not_yet_treated", **args) - assert dml_did_never_treated._n_subset != dml_did_not_yet_treated._n_subset + assert dml_did_never_treated.n_obs_subset != dml_did_not_yet_treated.n_obs_subset # same treatment group assert dml_did_never_treated._n_treated_subset == dml_did_not_yet_treated._n_treated_subset diff --git a/doubleml/did/tests/test_did_binary_external_predictions.py b/doubleml/did/tests/test_did_binary_external_predictions.py index ccc136d0..0cb3e055 100644 --- a/doubleml/did/tests/test_did_binary_external_predictions.py +++ b/doubleml/did/tests/test_did_binary_external_predictions.py @@ -112,7 +112,7 @@ def doubleml_did_panel_fixture(did_score, n_rep): } dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) - all_smpls = draw_smpls(n_obs=dml_did._n_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel) dml_did.set_sample_splitting(all_smpls) np.random.seed(3141) diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py index 1eacdf6a..7d1dc947 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_panel.py +++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py @@ -178,7 +178,7 @@ def test_sensitivity_elements(dml_did_binary_vs_did_fixture): ) for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]: dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"] - scaling = dml_binary_obj._n_subset / dml_binary_obj._dml_data.n_obs + scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_obs binary_sensitivity_element = scaling * _get_id_positions( dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions ) From d54b272235261a090792990b180c1b74b4e861da Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 09:19:43 +0200 Subject: [PATCH 13/41] update tune without folds to n_obs of doubleml obj --- doubleml/double_ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index d2a7a641..88f677ef 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -848,7 +848,7 @@ def tune( self.set_ml_nuisance_params(nuisance_model, self._dml_data.d_cols[i_d], params) else: - smpls = [(np.arange(self._dml_data.n_obs), np.arange(self._dml_data.n_obs))] + smpls = [(np.arange(self.n_obs), np.arange(self.n_obs))] # tune hyperparameters res = self._nuisance_tuning( smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search From 693e109bd65d6cb0987c9de2363266cf48c61d32 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 09:20:31 +0200 Subject: [PATCH 14/41] change n_obs for panel data add n_ids for did_binary obj --- doubleml/data/panel_data.py | 4 +-- doubleml/data/tests/test_panel_data.py | 5 ++-- doubleml/did/did_binary.py | 12 ++++++-- doubleml/did/did_cs_binary.py | 5 ---- .../did/tests/test_did_binary_vs_did_panel.py | 2 +- .../test_did_multi_external_predictions.py | 7 +++++ .../did/tests/test_did_multi_return_types.py | 5 ++-- doubleml/did/tests/test_return_types.py | 13 ++++++-- doubleml/utils/_check_return_types.py | 30 ++++++------------- 9 files changed, 45 insertions(+), 38 deletions(-) diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py index 4e416183..59ad531c 100644 --- a/doubleml/data/panel_data.py +++ b/doubleml/data/panel_data.py @@ -215,9 +215,9 @@ def id_var_unique(self): return self._id_var_unique @property - def n_obs(self): + def n_ids(self): """ - The number of observations. For panel data, the number of unique values for id_col. + The number of unique values for id_col. """ return len(self._id_var_unique) diff --git a/doubleml/data/tests/test_panel_data.py b/doubleml/data/tests/test_panel_data.py index 2f2250ba..e1a7c925 100644 --- a/doubleml/data/tests/test_panel_data.py +++ b/doubleml/data/tests/test_panel_data.py @@ -56,7 +56,7 @@ def test_id_col_setter(): dml_data.id_col = "id_new" assert np.array_equal(dml_data.id_var, id_comp) assert dml_data._id_var_unique == np.unique(id_comp) - assert dml_data.n_obs == 1 + assert dml_data.n_ids == 1 msg = "Invalid id variable id_col. a13 is no data column." with pytest.raises(ValueError, match=msg): @@ -169,7 +169,8 @@ def test_panel_data_properties(): assert np.array_equal(dml_data.id_var, df["id"].values) assert np.array_equal(dml_data.id_var_unique, np.unique(df["id"].values)) - assert dml_data.n_obs == len(np.unique(df["id"].values)) + assert dml_data.n_obs == df.shape[0] + assert dml_data.n_ids == len(np.unique(df["id"].values)) assert dml_data.g_col == "d" assert np.array_equal(dml_data.g_values, np.sort(np.unique(df["d"].values))) assert dml_data.n_groups == len(np.unique(df["d"].values)) diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index a4876f74..a9939c97 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -124,6 +124,12 @@ def __init__( super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False) self._check_data(self._dml_data) + # for did panel data the scores are based on the number of unique ids + self._n_obs = obj_dml_data.n_ids + self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat) + # reinitialze arrays + self._initialize_arrays() + g_values = self._dml_data.g_values t_values = self._dml_data.t_values @@ -542,7 +548,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], p_hat) extend_kwargs = { - "n_obs": self._dml_data.n_obs, + "n_obs": self._dml_data.n_ids, "id_positions": self.id_positions, } psi_elements = { @@ -707,13 +713,13 @@ def _sensitivity_element_est(self, preds): psi_nu2 = nu2_score_element - nu2 extend_kwargs = { - "n_obs": self._dml_data.n_obs, + "n_obs": self._dml_data.n_ids, "id_positions": self.id_positions, "fill_value": 0.0, } # add scaling to make variance estimation consistent (sample size difference) - scaling = self._dml_data.n_obs / self._n_obs_subset + scaling = self._dml_data.n_ids / self._n_obs_subset element_dict = { "sigma2": sigma2, "nu2": nu2, diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index d571e107..e1786242 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -50,11 +50,6 @@ def __init__( ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting=False) - self._n_obs = obj_dml_data.data.shape[0] - self._score_dim = (self._n_obs, self.n_rep, self._dml_data.n_treat) - # reinitialze arrays - self._initialize_arrays() - self._check_data(self._dml_data) g_values = self._dml_data.g_values t_values = self._dml_data.t_values diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py index 7d1dc947..9da81739 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_panel.py +++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py @@ -178,7 +178,7 @@ def test_sensitivity_elements(dml_did_binary_vs_did_fixture): ) for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]: dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"] - scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_obs + scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_ids binary_sensitivity_element = scaling * _get_id_positions( dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions ) diff --git a/doubleml/did/tests/test_did_multi_external_predictions.py b/doubleml/did/tests/test_did_multi_external_predictions.py index 2e7003f9..e336487d 100644 --- a/doubleml/did/tests/test_did_multi_external_predictions.py +++ b/doubleml/did/tests/test_did_multi_external_predictions.py @@ -100,3 +100,10 @@ def test_coef(doubleml_did_multi_ext_fixture): assert math.isclose( doubleml_did_multi_ext_fixture["coef"], doubleml_did_multi_ext_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3 ) + + +@pytest.mark.ci +def test_se(doubleml_did_multi_ext_fixture): + assert math.isclose( + doubleml_did_multi_ext_fixture["se"], doubleml_did_multi_ext_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3 + ) diff --git a/doubleml/did/tests/test_did_multi_return_types.py b/doubleml/did/tests/test_did_multi_return_types.py index 2e12ce10..c11544ed 100644 --- a/doubleml/did/tests/test_did_multi_return_types.py +++ b/doubleml/did/tests/test_did_multi_return_types.py @@ -17,6 +17,7 @@ N_REP = 1 N_FOLDS = 3 N_REP_BOOT = 314 +N_PERIODS = 5 dml_args = { "n_rep": N_REP, @@ -30,7 +31,7 @@ datasets = {} # panel data -df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=5, time_type="float") +df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0]) datasets["did_panel"] = DoubleMLPanelData( df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] @@ -89,7 +90,7 @@ def test_panel_property_types_and_shapes(fitted_dml_obj): assert dml_obj.n_gt_atts == n_treat assert dml_obj.n_rep == N_REP assert dml_obj.n_folds == N_FOLDS - assert dml_obj._dml_data.n_obs == N_OBS + assert dml_obj._dml_data.n_obs == N_OBS * N_PERIODS assert dml_obj.n_rep_boot == N_REP_BOOT assert isinstance(dml_obj.all_coef, np.ndarray) diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py index a59cec6c..1b6fa736 100644 --- a/doubleml/did/tests/test_return_types.py +++ b/doubleml/did/tests/test_return_types.py @@ -79,7 +79,8 @@ def test_sensitivity_return_types(fitted_dml_obj): # panel data -df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=5, time_type="float") +N_PERIODS = 5 +df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0]) datasets["did_panel"] = DoubleMLPanelData( df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] @@ -160,7 +161,15 @@ def fitted_panel_dml_obj(request): @pytest.mark.ci def test_panel_property_types_and_shapes(fitted_panel_dml_obj): - check_basic_property_types_and_shapes(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP, N_FOLDS, N_REP_BOOT) + check_basic_property_types_and_shapes( + fitted_panel_dml_obj, + n_obs=N_PERIODS * N_OBS, + n_treat=N_TREAT, + n_rep=N_REP, + n_folds=N_FOLDS, + n_rep_boot=N_REP_BOOT, + score_dim=(N_OBS, N_REP, N_TREAT), + ) check_basic_predictions_and_targets(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP) diff --git a/doubleml/utils/_check_return_types.py b/doubleml/utils/_check_return_types.py index 54462059..54e72833 100644 --- a/doubleml/utils/_check_return_types.py +++ b/doubleml/utils/_check_return_types.py @@ -31,10 +31,14 @@ def check_basic_return_types(dml_obj, cls): assert isinstance(dml_obj._dml_data.__str__(), str) -def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_folds, n_rep_boot): +def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_folds, n_rep_boot, score_dim=None): # not checked: learner, learner_names, params, params_names, score # already checked: summary + # use default combination + if score_dim is None: + score_dim = (n_obs, n_rep, n_treat) + # check that the setting is still in line with the hard-coded values assert dml_obj._dml_data.n_treat == n_treat assert dml_obj.n_rep == n_rep @@ -55,35 +59,19 @@ def check_basic_property_types_and_shapes(dml_obj, n_obs, n_treat, n_rep, n_fold assert dml_obj.coef.shape == (n_treat,) assert isinstance(dml_obj.psi, np.ndarray) - assert dml_obj.psi.shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi.shape == score_dim is_nonlinear = isinstance(dml_obj, NonLinearScoreMixin) if is_nonlinear: for score_element in dml_obj._score_element_names: assert isinstance(dml_obj.psi_elements[score_element], np.ndarray) - assert dml_obj.psi_elements[score_element].shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi_elements[score_element].shape == score_dim else: assert isinstance(dml_obj.psi_elements["psi_a"], np.ndarray) - assert dml_obj.psi_elements["psi_a"].shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi_elements["psi_a"].shape == score_dim assert isinstance(dml_obj.psi_elements["psi_b"], np.ndarray) - assert dml_obj.psi_elements["psi_b"].shape == ( - n_obs, - n_rep, - n_treat, - ) + assert dml_obj.psi_elements["psi_b"].shape == score_dim assert isinstance(dml_obj.framework, DoubleMLFramework) assert isinstance(dml_obj.pval, np.ndarray) From 7d6ef350f5116241a84017e49ae5e9dd59f56895 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 12:18:04 +0200 Subject: [PATCH 15/41] fix order test --- doubleml/did/did_cs_binary.py | 2 +- ...test_did_cs_binary_vs_did_cs_two_period.py | 47 ++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index e1786242..5d6e3638 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -462,7 +462,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ) extend_kwargs = { - "n_obs": self._dml_data.data.shape[0], + "n_obs": self._dml_data.n_obs, "id_positions": self.id_positions, } psi_elements = { diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py index 2c8c34f3..a0a25718 100644 --- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py @@ -51,10 +51,14 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score # collect data dml_panel_data = generate_data_did_binary df = dml_panel_data._data.sort_values(by=["id", "t"]) + # Reorder data before to make both approaches compatible + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) n_obs = df.shape[0] all_smpls = draw_smpls(n_obs, n_folds) - obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) # Set machine learning methods for m & g ml_g = clone(learner[0]) @@ -161,3 +165,44 @@ def test_coefs(dml_did_cs_binary_vs_did_cs_fixture): rel_tol=1e-9, abs_tol=1e-4, ) + + +@pytest.mark.ci +def test_ses(dml_did_cs_binary_vs_did_cs_fixture): + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["se"][0], + dml_did_cs_binary_vs_did_cs_fixture["se_manual"], + rel_tol=1e-9, + abs_tol=1e-4, + ) + assert math.isclose( + dml_did_cs_binary_vs_did_cs_fixture["se_binary"][0], + dml_did_cs_binary_vs_did_cs_fixture["se"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) + + +@pytest.mark.ci +def test_boot(dml_did_cs_binary_vs_did_cs_fixture): + for bootstrap in dml_did_cs_binary_vs_did_cs_fixture["boot_methods"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap], + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap + "_manual"], + atol=1e-4, + ) + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap], + dml_did_cs_binary_vs_did_cs_fixture["boot_t_stat" + bootstrap + "_binary"], + atol=1e-4, + ) + + +@pytest.mark.ci +def test_nuisance_loss(dml_did_cs_binary_vs_did_cs_fixture): + assert ( + dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].keys() + == dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"].keys() + ) + for key, value in dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].items(): + assert np.allclose(value, dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3) From 18c38445220a723dc6935fa3c9f788aea4e54e48 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 12:30:57 +0200 Subject: [PATCH 16/41] add sensitivity estimation to did_cs_binary --- doubleml/did/did_cs_binary.py | 101 +++++++++++++++++- ...test_did_cs_binary_vs_did_cs_two_period.py | 76 ++++++++++++- 2 files changed, 174 insertions(+), 3 deletions(-) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 5d6e3638..479cba93 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -153,7 +153,7 @@ def __init__( self._trimming_threshold = trimming_threshold _check_trimming(self._trimming_rule, self._trimming_threshold) - self._sensitivity_implemented = False + self._sensitivity_implemented = True self._external_predictions_implemented = True @property @@ -589,4 +589,101 @@ def _nuisance_tuning( pass def _sensitivity_element_est(self, preds): - pass + y = self._y_data + d = self._g_data + t = self._t_data + + m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions) + g_hat_d0_t0 = _get_id_positions(preds["predictions"]["ml_g_d0_t0"], self.id_positions) + g_hat_d0_t1 = _get_id_positions(preds["predictions"]["ml_g_d0_t1"], self.id_positions) + g_hat_d1_t0 = _get_id_positions(preds["predictions"]["ml_g_d1_t0"], self.id_positions) + g_hat_d1_t1 = _get_id_positions(preds["predictions"]["ml_g_d1_t1"], self.id_positions) + + d0t0 = np.multiply(1.0 - d, 1.0 - t) + d0t1 = np.multiply(1.0 - d, t) + d1t0 = np.multiply(d, 1.0 - t) + d1t1 = np.multiply(d, t) + + g_hat = ( + np.multiply(d0t0, g_hat_d0_t0) + + np.multiply(d0t1, g_hat_d0_t1) + + np.multiply(d1t0, g_hat_d1_t0) + + np.multiply(d1t1, g_hat_d1_t1) + ) + sigma2_score_element = np.square(y - g_hat) + sigma2 = np.mean(sigma2_score_element) + psi_sigma2 = sigma2_score_element - sigma2 + + # calc m(W,alpha) and Riesz representer + p_hat = np.mean(d) + lambda_hat = np.mean(t) + if self.score == "observational": + propensity_weight_d0 = np.divide(m_hat, 1.0 - m_hat) + if self.in_sample_normalization: + weight_d0t1 = np.multiply(d0t1, propensity_weight_d0) + weight_d0t0 = np.multiply(d0t0, propensity_weight_d0) + mean_weight_d0t1 = np.mean(weight_d0t1) + mean_weight_d0t0 = np.mean(weight_d0t0) + + m_alpha = np.multiply( + np.divide(d, p_hat), + np.divide(1.0, np.mean(d1t1)) + + np.divide(1.0, np.mean(d1t0)) + + np.divide(propensity_weight_d0, mean_weight_d0t1) + + np.divide(propensity_weight_d0, mean_weight_d0t0), + ) + + rr = ( + np.divide(d1t1, np.mean(d1t1)) + - np.divide(d1t0, np.mean(d1t0)) + - np.divide(weight_d0t1, mean_weight_d0t1) + + np.divide(weight_d0t0, mean_weight_d0t0) + ) + else: + m_alpha_1 = np.divide(1.0, lambda_hat) + np.divide(1.0, 1.0 - lambda_hat) + m_alpha = np.multiply(np.divide(d, np.square(p_hat)), np.multiply(m_alpha_1, 1.0 + propensity_weight_d0)) + + rr_1 = np.divide(t, np.multiply(p_hat, lambda_hat)) + np.divide(1.0 - t, np.multiply(p_hat, 1.0 - lambda_hat)) + rr_2 = d + np.multiply(1.0 - d, propensity_weight_d0) + rr = np.multiply(rr_1, rr_2) + else: + assert self.score == "experimental" + if self.in_sample_normalization: + m_alpha = ( + np.divide(1.0, np.mean(d1t1)) + + np.divide(1.0, np.mean(d1t0)) + + np.divide(1.0, np.mean(d0t1)) + + np.divide(1.0, np.mean(d0t0)) + ) + rr = ( + np.divide(d1t1, np.mean(d1t1)) + - np.divide(d1t0, np.mean(d1t0)) + - np.divide(d0t1, np.mean(d0t1)) + + np.divide(d0t0, np.mean(d0t0)) + ) + else: + m_alpha = ( + np.divide(1.0, np.multiply(p_hat, lambda_hat)) + + np.divide(1.0, np.multiply(p_hat, 1.0 - lambda_hat)) + + np.divide(1.0, np.multiply(1.0 - p_hat, lambda_hat)) + + np.divide(1.0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat)) + ) + rr = ( + np.divide(d1t1, np.multiply(p_hat, lambda_hat)) + - np.divide(d1t0, np.multiply(p_hat, 1.0 - lambda_hat)) + - np.divide(d0t1, np.multiply(1.0 - p_hat, lambda_hat)) + + np.divide(d0t0, np.multiply(1.0 - p_hat, 1.0 - lambda_hat)) + ) + + nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr) + nu2 = np.mean(nu2_score_element) + psi_nu2 = nu2_score_element - nu2 + + element_dict = { + "sigma2": sigma2, + "nu2": nu2, + "psi_sigma2": psi_sigma2, + "psi_nu2": psi_nu2, + "riesz_rep": rr, + } + return element_dict diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py index a0a25718..73e6b827 100644 --- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py @@ -9,7 +9,7 @@ import doubleml as dml from ...tests._utils import draw_smpls -from ._utils_did_cs_manual import fit_did_cs +from ._utils_did_cs_manual import fit_did_cs, fit_sensitivity_elements_did_cs from ._utils_did_manual import boot_did @@ -148,6 +148,30 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1) + # sensitivity tests + res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements + res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_did_cs( + y, + d, + t, + all_coef=dml_did_obj.all_coef, + predictions=dml_did_obj.predictions, + score=score, + in_sample_normalization=in_sample_normalization, + n_rep=1, + ) + + # sensitivity tests + res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements + + dml_did_obj.sensitivity_analysis() + dml_did_binary_obj.sensitivity_analysis() + + res_dict["sensitivity_params"] = dml_did_obj.sensitivity_params + res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params + return res_dict @@ -206,3 +230,53 @@ def test_nuisance_loss(dml_did_cs_binary_vs_did_cs_fixture): ) for key, value in dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss"].items(): assert np.allclose(value, dml_did_cs_binary_vs_did_cs_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_sensitivity_elements(dml_did_cs_binary_vs_did_cs_fixture): + sensitivity_element_names = ["sigma2", "nu2", "psi_sigma2", "psi_nu2"] + for sensitivity_element in sensitivity_element_names: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_manual"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_binary"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + for sensitivity_element in ["riesz_rep"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements"][sensitivity_element], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_elements_binary"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + + +@pytest.mark.ci +def test_sensitivity_params(dml_did_cs_binary_vs_did_cs_fixture): + for key in ["theta", "se", "ci"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key]["lower"], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key]["lower"], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key]["upper"], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key]["upper"], + rtol=1e-9, + atol=1e-4, + ) + + for key in ["rv", "rva"]: + assert np.allclose( + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params"][key], + dml_did_cs_binary_vs_did_cs_fixture["sensitivity_params_binary"][key], + rtol=1e-9, + atol=1e-4, + ) From 5d2232b455bede3866f0c2b7626f682f4cbad14d Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 13:00:20 +0200 Subject: [PATCH 17/41] fix id positions and scaling for sensitivity --- doubleml/did/did_cs_binary.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 479cba93..6b2206a3 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -679,11 +679,19 @@ def _sensitivity_element_est(self, preds): nu2 = np.mean(nu2_score_element) psi_nu2 = nu2_score_element - nu2 + extend_kwargs = { + "n_obs": self._dml_data.n_obs, + "id_positions": self.id_positions, + "fill_value": 0.0, + } + + # add scaling to make variance estimation consistent (sample size difference) + scaling = self._dml_data.n_obs / self._n_obs_subset element_dict = { "sigma2": sigma2, "nu2": nu2, - "psi_sigma2": psi_sigma2, - "psi_nu2": psi_nu2, - "riesz_rep": rr, + "psi_sigma2": scaling * _set_id_positions(psi_sigma2, **extend_kwargs), + "psi_nu2": scaling * _set_id_positions(psi_nu2, **extend_kwargs), + "riesz_rep": scaling * _set_id_positions(rr, **extend_kwargs), } return element_dict From 7f01b6b5accc1293fba5435ae81129cca4b5f630 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 13:00:33 +0200 Subject: [PATCH 18/41] add placebo test for did_cs_binary --- .../did/tests/test_did_cs_binary_placebo.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 doubleml/did/tests/test_did_cs_binary_placebo.py diff --git a/doubleml/did/tests/test_did_cs_binary_placebo.py b/doubleml/did/tests/test_did_cs_binary_placebo.py new file mode 100644 index 00000000..61def691 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_placebo.py @@ -0,0 +1,58 @@ +import numpy as np +import pytest +from lightgbm import LGBMClassifier, LGBMRegressor + +from doubleml.data import DoubleMLPanelData +from doubleml.did import DoubleMLDIDCSBinary +from doubleml.did.datasets import make_did_CS2021 + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def did_score(request): + return request.param + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_did_fixture(did_score, n_rep): + n_obs = 500 + dgp = 5 # has to be experimental (for experimental score to be valid) + df = make_did_CS2021(n_obs=n_obs, dgp=dgp, n_pre_treat_periods=3) + dml_data = DoubleMLPanelData(df, y_col="y", d_cols="d", t_col="t", id_col="id", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + kwargs = { + "obj_dml_data": dml_data, + "g_value": dml_data.g_values[0], + "t_value_pre": dml_data.t_values[0], + "t_value_eval": dml_data.t_values[1], + "ml_g": LGBMRegressor(verbose=-1), + "ml_m": LGBMClassifier(verbose=-1), + "score": did_score, + "n_rep": n_rep, + "n_folds": 5, + "draw_sample_splitting": True, + } + + dml_did = DoubleMLDIDCSBinary(**kwargs) + + np.random.seed(3141) + dml_did.fit() + ci = dml_did.confint(level=0.99) + + res_dict = { + "coef": dml_did.coef[0], + "ci_lower": ci.iloc[0, 0], + "ci_upper": ci.iloc[0, 1], + } + + return res_dict + + +@pytest.mark.ci +def test_zero(doubleml_did_fixture): + assert doubleml_did_fixture["ci_lower"] <= 0.0 + assert doubleml_did_fixture["ci_upper"] >= 0.0 From 3fafccc2cddb36397880ea9dfed993efe8c8d0ad Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 13:08:45 +0200 Subject: [PATCH 19/41] extend ext prediction tests for did_cs_binary --- ...test_did_cs_binary_external_predictions.py | 81 ++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/doubleml/did/tests/test_did_cs_binary_external_predictions.py b/doubleml/did/tests/test_did_cs_binary_external_predictions.py index 4e09dfe0..477c6dc7 100644 --- a/doubleml/did/tests/test_did_cs_binary_external_predictions.py +++ b/doubleml/did/tests/test_did_cs_binary_external_predictions.py @@ -4,8 +4,9 @@ import pytest from sklearn.linear_model import LinearRegression, LogisticRegression +from doubleml.data import DoubleMLPanelData from doubleml.did import DoubleMLDIDCSBinary -from doubleml.did.datasets import make_did_SZ2020 +from doubleml.did.datasets import make_did_cs_CS2021, make_did_SZ2020 from doubleml.tests._utils import draw_smpls from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor @@ -90,3 +91,81 @@ def test_score(doubleml_did_cs_fixture): def test_nuisance_loss(doubleml_did_cs_fixture): for key, value in doubleml_did_cs_fixture["dml_did_nuisance_loss"].items(): assert np.allclose(value, doubleml_did_cs_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3) + + +@pytest.fixture(scope="module") +def doubleml_did_cs_panel_fixture(did_score, n_rep): + n_obs = 500 + n_folds = 5 + dgp = 1 + + ext_predictions = {"d": {}} + df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dgp, time_type="float") + dml_panel_data = DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + kwargs = { + "obj_dml_data": dml_panel_data, + "g_value": 2, + "t_value_pre": 0, + "t_value_eval": 1, + "score": did_score, + "n_rep": n_rep, + "draw_sample_splitting": False, + } + + dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data) + dml_did.set_sample_splitting(all_smpls) + + np.random.seed(3141) + dml_did.fit(store_predictions=True) + + all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + for key in all_keys: + ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + dml_did_ext = DoubleMLDIDCSBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) + dml_did_ext.set_sample_splitting(all_smpls) + np.random.seed(3141) + dml_did_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did.coef[0], + "coef_ext": dml_did_ext.coef[0], + "se": dml_did.se[0], + "se_ext": dml_did_ext.se[0], + "score": dml_did.psi, + "score_ext": dml_did_ext.psi, + "dml_did_nuisance_loss": dml_did.nuisance_loss, + "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss, + } + + return res_dict + + +@pytest.mark.ci +def test_panel_coef(doubleml_did_cs_panel_fixture): + assert math.isclose( + doubleml_did_cs_panel_fixture["coef"], doubleml_did_cs_panel_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3 + ) + + +@pytest.mark.ci +def test_panel_se(doubleml_did_cs_panel_fixture): + assert math.isclose( + doubleml_did_cs_panel_fixture["se"], doubleml_did_cs_panel_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3 + ) + + +@pytest.mark.ci +def test_panel_score(doubleml_did_cs_panel_fixture): + assert np.allclose( + doubleml_did_cs_panel_fixture["score"], doubleml_did_cs_panel_fixture["score_ext"], rtol=1e-9, atol=1e-3 + ) + + +@pytest.mark.ci +def test_panel_nuisance_loss(doubleml_did_cs_panel_fixture): + for key, value in doubleml_did_cs_panel_fixture["dml_did_nuisance_loss"].items(): + assert np.allclose(value, doubleml_did_cs_panel_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3) From 9e378518109c15529782025e646825f071790d1c Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 13:11:44 +0200 Subject: [PATCH 20/41] add control group test for did_cs_binary --- .../test_did_cs_binary_control_groups.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 doubleml/did/tests/test_did_cs_binary_control_groups.py diff --git a/doubleml/did/tests/test_did_cs_binary_control_groups.py b/doubleml/did/tests/test_did_cs_binary_control_groups.py new file mode 100644 index 00000000..ea4f2933 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_control_groups.py @@ -0,0 +1,31 @@ +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +df = dml.did.datasets.make_did_cs_CS2021(n_obs=500, dgp_type=1, n_pre_treat_periods=2, n_periods=4, time_type="float") +dml_data = dml.data.DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + +args = { + "obj_dml_data": dml_data, + "ml_g": LinearRegression(), + "ml_m": LogisticRegression(), + "g_value": 2, + "t_value_pre": 0, + "t_value_eval": 1, + "score": "observational", + "n_rep": 1, +} + + +def test_control_groups_different(): + dml_did_never_treated = dml.did.DoubleMLDIDCSBinary(control_group="never_treated", **args) + dml_did_not_yet_treated = dml.did.DoubleMLDIDCSBinary(control_group="not_yet_treated", **args) + + assert dml_did_never_treated.n_obs_subset != dml_did_not_yet_treated.n_obs_subset + # same treatment group + assert dml_did_never_treated.data_subset["G_indicator"].sum() == dml_did_not_yet_treated.data_subset["G_indicator"].sum() + + dml_did_never_treated.fit() + dml_did_not_yet_treated.fit() + + assert dml_did_never_treated.coef != dml_did_not_yet_treated.coef From 810eade37a837a3f8c3f0dbae8728c51fc8fea79 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 13:40:50 +0200 Subject: [PATCH 21/41] add tune to did_cs_binary --- doubleml/did/did_cs_binary.py | 115 ++++++++- doubleml/did/tests/test_did_cs_binary_tune.py | 221 ++++++++++++++++++ 2 files changed, 334 insertions(+), 2 deletions(-) create mode 100644 doubleml/did/tests/test_did_cs_binary_tune.py diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 6b2206a3..161a31c3 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -23,7 +23,7 @@ _check_score, _check_trimming, ) -from doubleml.utils._estimation import _dml_cv_predict, _get_cond_smpls_2d +from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d from doubleml.utils._propensity_score import _trimm @@ -586,7 +586,118 @@ def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_ def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - pass + x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False) + _, d = check_X_y(x, self._g_data, force_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data, force_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_g": None, "ml_m": None} + + # nuisance training sets conditional on d and t + smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) + train_inds = [train_index for (train_index, _) in smpls] + train_inds_d0_t0 = [train_index for (train_index, _) in smpls_d0_t0] + train_inds_d0_t1 = [train_index for (train_index, _) in smpls_d0_t1] + train_inds_d1_t0 = [train_index for (train_index, _) in smpls_d1_t0] + train_inds_d1_t1 = [train_index for (train_index, _) in smpls_d1_t1] + + tune_args = { + "n_folds_tune": n_folds_tune, + "n_jobs_cv": n_jobs_cv, + "search_mode": search_mode, + "n_iter_randomized_search": n_iter_randomized_search, + } + + g_d0_t0_tune_res = _dml_tune( + y, + x, + train_inds_d0_t0, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + g_d0_t1_tune_res = _dml_tune( + y, + x, + train_inds_d0_t1, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + g_d1_t0_tune_res = _dml_tune( + y, + x, + train_inds_d1_t0, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + g_d1_t1_tune_res = _dml_tune( + y, + x, + train_inds_d1_t1, + self._learner["ml_g"], + param_grids["ml_g"], + scoring_methods["ml_g"], + **tune_args, + ) + + m_tune_res = list() + if self.score == "observational": + m_tune_res = _dml_tune( + d, + x, + train_inds, + self._learner["ml_m"], + param_grids["ml_m"], + scoring_methods["ml_m"], + **tune_args, + ) + + g_d0_t0_best_params = [xx.best_params_ for xx in g_d0_t0_tune_res] + g_d0_t1_best_params = [xx.best_params_ for xx in g_d0_t1_tune_res] + g_d1_t0_best_params = [xx.best_params_ for xx in g_d1_t0_tune_res] + g_d1_t1_best_params = [xx.best_params_ for xx in g_d1_t1_tune_res] + + if self.score == "observational": + m_best_params = [xx.best_params_ for xx in m_tune_res] + params = { + "ml_g_d0_t0": g_d0_t0_best_params, + "ml_g_d0_t1": g_d0_t1_best_params, + "ml_g_d1_t0": g_d1_t0_best_params, + "ml_g_d1_t1": g_d1_t1_best_params, + "ml_m": m_best_params, + } + tune_res = { + "g_d0_t0_tune": g_d0_t0_tune_res, + "g_d0_t1_tune": g_d0_t1_tune_res, + "g_d1_t0_tune": g_d1_t0_tune_res, + "g_d1_t1_tune": g_d1_t1_tune_res, + "m_tune": m_tune_res, + } + else: + params = { + "ml_g_d0_t0": g_d0_t0_best_params, + "ml_g_d0_t1": g_d0_t1_best_params, + "ml_g_d1_t0": g_d1_t0_best_params, + "ml_g_d1_t1": g_d1_t1_best_params, + } + tune_res = { + "g_d0_t0_tune": g_d0_t0_tune_res, + "g_d0_t1_tune": g_d0_t1_tune_res, + "g_d1_t0_tune": g_d1_t0_tune_res, + "g_d1_t1_tune": g_d1_t1_tune_res, + } + + res = {"params": params, "tune_res": tune_res} + + return res def _sensitivity_element_est(self, preds): y = self._y_data diff --git a/doubleml/did/tests/test_did_cs_binary_tune.py b/doubleml/did/tests/test_did_cs_binary_tune.py new file mode 100644 index 00000000..0bd2c6ab --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_tune.py @@ -0,0 +1,221 @@ +import math + +import numpy as np +import pytest +from sklearn.base import clone +from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import LogisticRegression + +import doubleml as dml + +from ...tests._utils import draw_smpls +from ._utils_did_cs_manual import fit_did_cs, tune_nuisance_did_cs +from ._utils_did_manual import boot_did + + +@pytest.fixture(scope="module", params=[RandomForestRegressor(random_state=42)]) +def learner_g(request): + return request.param + + +@pytest.fixture(scope="module", params=[LogisticRegression()]) +def learner_m(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def tune_on_folds(request): + return request.param + + +def get_par_grid(learner): + if learner.__class__ in [RandomForestRegressor]: + par_grid = {"n_estimators": [5, 10, 20]} + else: + assert learner.__class__ in [LogisticRegression] + par_grid = {"C": np.logspace(-4, 2, 10)} + return par_grid + + +@pytest.fixture(scope="module") +def dml_did_fixture(generate_data_did_binary, learner_g, learner_m, score, in_sample_normalization, tune_on_folds): + par_grid = {"ml_g": get_par_grid(learner_g), "ml_m": get_par_grid(learner_m)} + n_folds_tune = 4 + + boot_methods = ["normal"] + n_folds = 2 + n_rep_boot = 499 + + # collect data + dml_panel_data = generate_data_did_binary + df = dml_panel_data._data.sort_values(by=["id", "t"]) + # Reorder data before to make both approaches compatible + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + obj_dml_data = dml.DoubleMLData(df, y_col="y", d_cols="d", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + n_obs = df.shape[0] + strata = df["d"] + 2 * df["t"] # only valid since it values are binary + all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=strata) + + # Set machine learning methods for m & g + ml_g = clone(learner_g) + ml_m = clone(learner_m) + + dml_args = { + "ml_g": ml_g, + "ml_m": ml_m, + "n_folds": n_folds, + "score": score, + "in_sample_normalization": in_sample_normalization, + "draw_sample_splitting": False, + } + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + **dml_args, + ) + + dml_did_obj = dml.DoubleMLDIDCS( + obj_dml_data, + **dml_args, + ) + + # synchronize the sample splitting + dml_did_obj.set_sample_splitting(all_smpls=all_smpls) + dml_did_binary_obj.set_sample_splitting(all_smpls=all_smpls) + + # tune hyperparameters + np.random.seed(3141) + tune_res = dml_did_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False) + assert isinstance(tune_res, dml.DoubleMLDIDCS) + np.random.seed(3141) + tune_res_binary = dml_did_binary_obj.tune( + par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune, return_tune_res=False + ) + assert isinstance(tune_res_binary, dml.did.DoubleMLDIDCSBinary) + + dml_did_obj.fit() + dml_did_binary_obj.fit() + + # manual fit + y = df["y"].values + d = df["d"].values + x = df[["Z1", "Z2", "Z3", "Z4"]].values + t = df["t"].values + np.random.seed(3141) + smpls = all_smpls[0] + + if tune_on_folds: + g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs( + y, x, d, t, clone(learner_g), clone(learner_m), smpls, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"] + ) + else: + xx = [(np.arange(len(y)), np.array([]))] + g_d0_t0_params, g_d0_t1_params, g_d1_t0_params, g_d1_t1_params, m_params = tune_nuisance_did_cs( + y, x, d, t, clone(learner_g), clone(learner_m), xx, score, n_folds_tune, par_grid["ml_g"], par_grid["ml_m"] + ) + g_d0_t0_params = g_d0_t0_params * n_folds + g_d0_t1_params = g_d0_t1_params * n_folds + g_d1_t0_params = g_d1_t0_params * n_folds + g_d1_t1_params = g_d1_t1_params * n_folds + if score == "observational": + m_params = m_params * n_folds + else: + assert score == "experimental" + m_params = None + + res_manual = fit_did_cs( + y, + x, + d, + t, + clone(learner_g), + clone(learner_m), + all_smpls, + score, + in_sample_normalization, + g_d0_t0_params=g_d0_t0_params, + g_d0_t1_params=g_d0_t1_params, + g_d1_t0_params=g_d1_t0_params, + g_d1_t1_params=g_d1_t1_params, + m_params=m_params, + ) + + res_dict = { + "coef": dml_did_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "coef_manual": res_manual["theta"], + "se": dml_did_obj.se, + "se_binary": dml_did_binary_obj.se, + "se_manual": res_manual["se"], + "boot_methods": boot_methods, + } + + for bootstrap in boot_methods: + np.random.seed(3141) + boot_t_stat = boot_did( + y, + res_manual["thetas"], + res_manual["ses"], + res_manual["all_psi_a"], + res_manual["all_psi_b"], + all_smpls, + bootstrap, + n_rep_boot, + ) + + np.random.seed(3141) + dml_did_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + np.random.seed(3141) + dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + res_dict["boot_t_stat" + bootstrap] = dml_did_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_binary"] = dml_did_binary_obj.boot_t_stat + res_dict["boot_t_stat" + bootstrap + "_manual"] = boot_t_stat.reshape(-1, 1, 1) + + return res_dict + + +@pytest.mark.ci +def test_dml_did_coef(dml_did_fixture): + assert math.isclose(dml_did_fixture["coef"][0], dml_did_fixture["coef_manual"], rel_tol=1e-9, abs_tol=1e-4) + assert math.isclose(dml_did_fixture["coef_binary"][0], dml_did_fixture["coef"][0], rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_dml_did_se(dml_did_fixture): + assert math.isclose(dml_did_fixture["se"][0], dml_did_fixture["se_manual"], rel_tol=1e-9, abs_tol=1e-4) + assert math.isclose(dml_did_fixture["se_binary"][0], dml_did_fixture["se"][0], rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_boot(dml_did_fixture): + for bootstrap in dml_did_fixture["boot_methods"]: + assert np.allclose( + dml_did_fixture["boot_t_stat" + bootstrap], + dml_did_fixture["boot_t_stat" + bootstrap + "_manual"], + rtol=1e-9, + atol=1e-4, + ) + + assert np.allclose( + dml_did_fixture["boot_t_stat" + bootstrap], + dml_did_fixture["boot_t_stat" + bootstrap + "_binary"], + rtol=1e-9, + atol=1e-4, + ) From 6b6116cb608414a3c9313447d89c51a0c04c3651 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 13:53:03 +0200 Subject: [PATCH 22/41] update did_cs_binary sdout test --- .../did/tests/test_did_cs_binary_stdout.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 doubleml/did/tests/test_did_cs_binary_stdout.py diff --git a/doubleml/did/tests/test_did_cs_binary_stdout.py b/doubleml/did/tests/test_did_cs_binary_stdout.py new file mode 100644 index 00000000..16135636 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_stdout.py @@ -0,0 +1,49 @@ +import io +from contextlib import redirect_stdout + +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +dml_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData") + + +@pytest.mark.ci +def test_print_periods(): + """Test that print_periods parameter correctly controls output printing.""" + + # Create test data + dml_data = dml.did.datasets.make_did_SZ2020(n_obs=100, return_type="DoubleMLPanelData") + + # Test 1: Default case (print_periods=False) - should not print anything + f = io.StringIO() + with redirect_stdout(f): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + print_periods=False, # Default + ) + output_default = f.getvalue() + assert output_default.strip() == "", "Expected no output with print_periods=False" + + # Test 2: With print_periods=True - should print information + f = io.StringIO() + with redirect_stdout(f): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + print_periods=True, + ) + output_print = f.getvalue() + assert "Evaluation of ATT(1, 1), with pre-treatment period 0" in output_print + assert "post-treatment: True" in output_print + assert "Control group: never_treated" in output_print From de324cfe102bc466e571f298e0be2f499911b0f0 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 13:59:54 +0200 Subject: [PATCH 23/41] add exceptions and tests --- doubleml/did/did_cs_binary.py | 28 ++++ .../tests/test_did_cs_binary_exceptions.py | 152 ++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 doubleml/did/tests/test_did_cs_binary_exceptions.py diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 161a31c3..a34dbf2a 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -806,3 +806,31 @@ def _sensitivity_element_est(self, preds): "riesz_rep": scaling * _set_id_positions(rr, **extend_kwargs), } return element_dict + + def sensitivity_benchmark(self, benchmarking_set, fit_args=None): + """ + Computes a benchmark for a given set of features. + Returns a DataFrame containing the corresponding values for cf_y, cf_d, rho and the change in estimates. + + Parameters + ---------- + benchmarking_set : list + List of features to be used for benchmarking. + + fit_args : dict, optional + Additional arguments for the fit method. + Default is None. + + Returns + ------- + benchmark_results : pandas.DataFrame + Benchmark results. + """ + if self.score == "experimental": + warnings.warn( + "Sensitivity benchmarking for experimental score may not be meaningful. " + "Consider using score='observational' for conditional treatment assignment.", + UserWarning, + ) + + return super().sensitivity_benchmark(benchmarking_set, fit_args) diff --git a/doubleml/did/tests/test_did_cs_binary_exceptions.py b/doubleml/did/tests/test_did_cs_binary_exceptions.py new file mode 100644 index 00000000..b506da2d --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_exceptions.py @@ -0,0 +1,152 @@ +from unittest.mock import patch + +import numpy as np +import pandas as pd +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml + +dml_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData") + +valid_arguments = { + "obj_dml_data": dml_data, + "ml_g": LinearRegression(), + "ml_m": LogisticRegression(), + "g_value": 1, + "t_value_pre": 0, + "t_value_eval": 1, + "score": "observational", + "n_rep": 1, + "draw_sample_splitting": True, +} + + +@pytest.mark.ci +def test_input(): + # control group + msg = r"The control group has to be one of \['never_treated', 'not_yet_treated'\]. 0 was passed." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"control_group": 0} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # g value + msg = r"The value test is not in the set of treatment group values \[0 1\]." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"g_value": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + msg = r"The never treated group is not allowed as treatment group \(g_value=0\)." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"g_value": 0} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + msg = r"The never treated group is not allowed as treatment group \(g_value=0\)." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"g_value": 0.0} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # t values + msg = r"The value test is not in the set of evaluation period values \[0 1\]." + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"t_value_pre": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"t_value_eval": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # in-sample normalization + msg = "in_sample_normalization indicator has to be boolean. Object of type passed." + with pytest.raises(TypeError, match=msg): + invalid_arguments = {"in_sample_normalization": "test"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + # ml_g classifier + msg = r"The ml_g learner LogisticRegression\(\) was identified as" + with pytest.raises(ValueError, match=msg): + invalid_arguments = {"ml_g": LogisticRegression()} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + +@pytest.mark.ci +def test_no_control_group_exception(): + msg = "No observations in the control group." + with pytest.raises(ValueError, match=msg): + invalid_data = dml.did.datasets.make_did_SZ2020(n_obs=500, dgp_type=1, return_type="DoubleMLPanelData") + invalid_data.data["d"] = 1.0 + invalid_arguments = {"obj_dml_data": invalid_data, "control_group": "not_yet_treated"} + _ = dml.did.DoubleMLDIDCSBinary(**(valid_arguments | invalid_arguments)) + + +@pytest.mark.ci +def test_check_data_exceptions(): + """Test exception handling for _check_data method in DoubleMLDIDCSBinary""" + df = pd.DataFrame(np.random.normal(size=(10, 5)), columns=[f"Col_{i}" for i in range(5)]) + + # Test 1: Data has to be DoubleMLPanelData + invalid_data_types = [ + dml.data.DoubleMLData(df, y_col="Col_0", d_cols="Col_1"), + ] + + for invalid_data in invalid_data_types: + msg = r"For repeated outcomes the data must be of DoubleMLPanelData type\." + with pytest.raises(TypeError, match=msg): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=invalid_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + ) + + # Test 2: Data cannot have instrumental variables + df_with_z = dml_data.data.copy() + dml_data_with_z = dml.data.DoubleMLPanelData( + df_with_z, y_col="y", d_cols="d", id_col="id", t_col="t", z_cols=["Z1"], x_cols=["Z2", "Z3", "Z4"] + ) + + msg = r"Incompatible data. Z1 have been set as instrumental variable\(s\)." + with pytest.raises(NotImplementedError, match=msg): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data_with_z, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + ) + + # Test 3: Data must have exactly one treatment variable (using mock) + with patch.object(dml_data.__class__, "n_treat", property(lambda self: 2)): + msg = ( + "Incompatible data. To fit an DID model with DML exactly one variable needs to be specified as treatment variable." + ) + with pytest.raises(ValueError, match=msg): + _ = dml.did.DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + g_value=1, + t_value_pre=0, + t_value_eval=1, + ) + + +@pytest.mark.ci +def test_benchmark_warning(): + """Test warning when sensitivity_benchmark is called with experimental score""" + args = { + "obj_dml_data": dml_data, + "ml_g": LinearRegression(), + "ml_m": LogisticRegression(), + "g_value": 1, + "t_value_pre": 0, + "t_value_eval": 1, + "n_rep": 1, + } + # Create a DID model with experimental score + did_model = dml.did.DoubleMLDIDCSBinary(**args, score="experimental") + did_model.fit() + with pytest.warns(UserWarning, match="Sensitivity benchmarking for experimental score may not be meaningful"): + did_model.sensitivity_benchmark(["Z1", "Z2"]) From 8d0c52c54405a089e9171a5380185fc7ebff272a Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 6 Jun 2025 15:02:54 +0200 Subject: [PATCH 24/41] simplify did_cs_binary nuisance estimation --- doubleml/did/did_cs_binary.py | 131 +++++++++++----------------------- 1 file changed, 40 insertions(+), 91 deletions(-) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index a34dbf2a..fafcecf4 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -318,6 +318,34 @@ def _preprocess_data(self, g_value, pre_t, eval_t): data_subset = data_subset.assign(t_indicator=data_subset[t_col] == eval_t) return data_subset + def _estimate_conditional_g( + self, x, y, d_val, t_val, d_arr, t_arr, smpls_cond, external_prediction, learner_param_key, n_jobs_cv, return_models + ): + """Helper function to estimate conditional g_hat for fixed d and t.""" + g_hat_cond = {} + condition = (d_arr == d_val) & (t_arr == t_val) + + if external_prediction is not None: + ml_g_targets = np.full_like(y, np.nan, dtype="float64") + ml_g_targets[condition] = y[condition] + ml_pred = _get_id_positions(external_prediction, self.id_positions) + g_hat_cond = {"preds": ml_pred, "targets": ml_g_targets, "models": None} + else: + g_hat_cond = _dml_cv_predict( + self._learner["ml_g"], + x, + y, + smpls_cond, + n_jobs=n_jobs_cv, + est_params=self._get_params(learner_param_key), + method=self._predict_method["ml_g"], + return_models=return_models, + ) + _check_finite_predictions(g_hat_cond["preds"], self._learner["ml_g"], "ml_g", smpls_cond) + g_hat_cond["targets"] = g_hat_cond["targets"].astype(float) + g_hat_cond["targets"][~condition] = np.nan + return g_hat_cond + def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): # Here: d is a binary treatment indicator @@ -333,97 +361,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa # nuisance g smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) - # nuisance g for d==0 & t==0 - if external_predictions["ml_g_d0_t0"] is not None: - ml_g_d0_t0_targets = np.full_like(y, np.nan, dtype="float64") - ml_g_d0_t0_targets[((d == 0) & (t == 0))] = y[((d == 0) & (t == 0))] - ml_d0_t0_pred = _get_id_positions(external_predictions["ml_g_d0_t0"], self.id_positions) - g_hat_d0_t0 = {"preds": ml_d0_t0_pred, "targets": ml_g_d0_t0_targets, "models": None} - else: - g_hat_d0_t0 = _dml_cv_predict( - self._learner["ml_g"], - x, - y, - smpls_d0_t0, - n_jobs=n_jobs_cv, - est_params=self._get_params("ml_g_d0_t0"), - method=self._predict_method["ml_g"], - return_models=return_models, - ) - - _check_finite_predictions(g_hat_d0_t0["preds"], self._learner["ml_g"], "ml_g", smpls) - # adjust target values to consider only compatible subsamples - g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float) - g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan - - # nuisance g for d==0 & t==1 - if external_predictions["ml_g_d0_t1"] is not None: - ml_g_d0_t1_targets = np.full_like(y, np.nan, dtype="float64") - ml_g_d0_t1_targets[((d == 0) & (t == 1))] = y[((d == 0) & (t == 1))] - ml_d0_t1_pred = _get_id_positions(external_predictions["ml_g_d0_t1"], self.id_positions) - g_hat_d0_t1 = {"preds": ml_d0_t1_pred, "targets": ml_g_d0_t1_targets, "models": None} - else: - g_hat_d0_t1 = _dml_cv_predict( - self._learner["ml_g"], - x, - y, - smpls_d0_t1, - n_jobs=n_jobs_cv, - est_params=self._get_params("ml_g_d0_t1"), - method=self._predict_method["ml_g"], - return_models=return_models, - ) - - _check_finite_predictions(g_hat_d0_t1["preds"], self._learner["ml_g"], "ml_g", smpls) - # adjust target values to consider only compatible subsamples - g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float) - g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan - - # nuisance g for d==1 & t==0 - if external_predictions["ml_g_d1_t0"] is not None: - ml_g_d1_t0_targets = np.full_like(y, np.nan, dtype="float64") - ml_g_d1_t0_targets[((d == 1) & (t == 0))] = y[((d == 1) & (t == 0))] - ml_d1_t0_pred = _get_id_positions(external_predictions["ml_g_d1_t0"], self.id_positions) - g_hat_d1_t0 = {"preds": ml_d1_t0_pred, "targets": ml_g_d1_t0_targets, "models": None} - else: - g_hat_d1_t0 = _dml_cv_predict( - self._learner["ml_g"], - x, - y, - smpls_d1_t0, - n_jobs=n_jobs_cv, - est_params=self._get_params("ml_g_d1_t0"), - method=self._predict_method["ml_g"], - return_models=return_models, - ) - - _check_finite_predictions(g_hat_d1_t0["preds"], self._learner["ml_g"], "ml_g", smpls) - # adjust target values to consider only compatible subsamples - g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float) - g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan - - # nuisance g for d==1 & t==1 - if external_predictions["ml_g_d1_t1"] is not None: - ml_g_d1_t1_targets = np.full_like(y, np.nan, dtype="float64") - ml_g_d1_t1_targets[((d == 1) & (t == 1))] = y[((d == 1) & (t == 1))] - ml_d1_t1_pred = _get_id_positions(external_predictions["ml_g_d1_t1"], self.id_positions) - g_hat_d1_t1 = {"preds": ml_d1_t1_pred, "targets": ml_g_d1_t1_targets, "models": None} - else: - g_hat_d1_t1 = _dml_cv_predict( - self._learner["ml_g"], - x, - y, - smpls_d1_t1, - n_jobs=n_jobs_cv, - est_params=self._get_params("ml_g_d1_t1"), - method=self._predict_method["ml_g"], - return_models=return_models, - ) - - _check_finite_predictions(g_hat_d1_t1["preds"], self._learner["ml_g"], "ml_g", smpls) - # adjust target values to consider only compatible subsamples - g_hat_d1_t1["targets"] = g_hat_d1_t1["targets"].astype(float) - g_hat_d1_t1["targets"][np.invert((d == 1) & (t == 1))] = np.nan + g_hat_d0_t0 = self._estimate_conditional_g( + x, y, 0, 0, d, t, smpls_d0_t0, external_predictions["ml_g_d0_t0"], "ml_g_d0_t0", n_jobs_cv, return_models + ) + g_hat_d0_t1 = self._estimate_conditional_g( + x, y, 0, 1, d, t, smpls_d0_t1, external_predictions["ml_g_d0_t1"], "ml_g_d0_t1", n_jobs_cv, return_models + ) + g_hat_d1_t0 = self._estimate_conditional_g( + x, y, 1, 0, d, t, smpls_d1_t0, external_predictions["ml_g_d1_t0"], "ml_g_d1_t0", n_jobs_cv, return_models + ) + g_hat_d1_t1 = self._estimate_conditional_g( + x, y, 1, 1, d, t, smpls_d1_t1, external_predictions["ml_g_d1_t1"], "ml_g_d1_t1", n_jobs_cv, return_models + ) # only relevant for observational setting m_hat = {"preds": None, "targets": None, "models": None} From af45f7fb8b7dee34a480b7843054163a37d47efc Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 08:34:48 +0200 Subject: [PATCH 25/41] add __str__ method to did_cs_binary --- doubleml/did/did_cs_binary.py | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index fafcecf4..7788f4b3 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -156,6 +156,59 @@ def __init__( self._sensitivity_implemented = True self._external_predictions_implemented = True + def __str__(self): + class_name = self.__class__.__name__ + header = f"================== {class_name} Object ==================\n" + data_summary = self._dml_data._data_summary_str() + score_info = ( + f"Score function: {str(self.score)}\n" + f"Treatment group: {str(self.g_value)}\n" + f"Pre-treatment period: {str(self.t_value_pre)}\n" + f"Evaluation period: {str(self.t_value_eval)}\n" + f"Control group: {str(self.control_group)}\n" + f"Anticipation periods: {str(self.anticipation_periods)}\n" + f"Effective sample size: {str(self.n_obs_subset)}\n" + ) + learner_info = "" + for key, value in self.learner.items(): + learner_info += f"Learner {key}: {str(value)}\n" + if self.nuisance_loss is not None: + learner_info += "Out-of-sample Performance:\n" + is_classifier = [value for value in self._is_classifier.values()] + is_regressor = [not value for value in is_classifier] + if any(is_regressor): + learner_info += "Regression:\n" + for learner in [key for key, value in self._is_classifier.items() if value is False]: + learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n" + if any(is_classifier): + learner_info += "Classification:\n" + for learner in [key for key, value in self._is_classifier.items() if value is True]: + learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n" + + if self._is_cluster_data: + resampling_info = ( + f"No. folds per cluster: {self._n_folds_per_cluster}\n" + f"No. folds: {self.n_folds}\n" + f"No. repeated sample splits: {self.n_rep}\n" + ) + else: + resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n" + fit_summary = str(self.summary) + res = ( + header + + "\n------------------ Data summary ------------------\n" + + data_summary + + "\n------------------ Score & algorithm ------------------\n" + + score_info + + "\n------------------ Machine learner ------------------\n" + + learner_info + + "\n------------------ Resampling ------------------\n" + + resampling_info + + "\n------------------ Fit summary ------------------\n" + + fit_summary + ) + return res + @property def g_value(self): """ From 698f161945dadb75a4d2637e311c811c9542338a Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 09:31:54 +0200 Subject: [PATCH 26/41] add test on panel data to did_cs binary --- doubleml/did/did_cs.py | 18 +- .../test_did_cs_binary_vs_did_cs_panel.py | 202 ++++++++++++++++++ 2 files changed, 215 insertions(+), 5 deletions(-) create mode 100644 doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 5984399c..8136f60c 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -227,7 +227,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa # nuisance g smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t) if external_predictions["ml_g_d0_t0"] is not None: - g_hat_d0_t0 = {"preds": external_predictions["ml_g_d0_t0"], "targets": None, "models": None} + g_hat_d0_t0_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d0_t0_targets[(d == 0) & (t == 0)] = y[(d == 0) & (t == 0)] + g_hat_d0_t0 = {"preds": external_predictions["ml_g_d0_t0"], "targets": g_hat_d0_t0_targets, "models": None} else: g_hat_d0_t0 = _dml_cv_predict( self._learner["ml_g"], @@ -243,7 +245,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat_d0_t0["targets"] = g_hat_d0_t0["targets"].astype(float) g_hat_d0_t0["targets"][np.invert((d == 0) & (t == 0))] = np.nan if external_predictions["ml_g_d0_t1"] is not None: - g_hat_d0_t1 = {"preds": external_predictions["ml_g_d0_t1"], "targets": None, "models": None} + g_hat_d0_t1_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d0_t1_targets[(d == 0) & (t == 1)] = y[(d == 0) & (t == 1)] + g_hat_d0_t1 = {"preds": external_predictions["ml_g_d0_t1"], "targets": g_hat_d0_t1_targets, "models": None} else: g_hat_d0_t1 = _dml_cv_predict( self._learner["ml_g"], @@ -258,7 +262,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat_d0_t1["targets"] = g_hat_d0_t1["targets"].astype(float) g_hat_d0_t1["targets"][np.invert((d == 0) & (t == 1))] = np.nan if external_predictions["ml_g_d1_t0"] is not None: - g_hat_d1_t0 = {"preds": external_predictions["ml_g_d1_t0"], "targets": None, "models": None} + g_hat_d1_t0_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d1_t0_targets[(d == 1) & (t == 0)] = y[(d == 1) & (t == 0)] + g_hat_d1_t0 = {"preds": external_predictions["ml_g_d1_t0"], "targets": g_hat_d1_t0_targets, "models": None} else: g_hat_d1_t0 = _dml_cv_predict( self._learner["ml_g"], @@ -273,7 +279,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa g_hat_d1_t0["targets"] = g_hat_d1_t0["targets"].astype(float) g_hat_d1_t0["targets"][np.invert((d == 1) & (t == 0))] = np.nan if external_predictions["ml_g_d1_t1"] is not None: - g_hat_d1_t1 = {"preds": external_predictions["ml_g_d1_t1"], "targets": None, "models": None} + g_hat_d1_t1_targets = np.full_like(y, np.nan, dtype="float64") + g_hat_d1_t1_targets[(d == 1) & (t == 1)] = y[(d == 1) & (t == 1)] + g_hat_d1_t1 = {"preds": external_predictions["ml_g_d1_t1"], "targets": g_hat_d1_t1_targets, "models": None} else: g_hat_d1_t1 = _dml_cv_predict( self._learner["ml_g"], @@ -293,7 +301,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa if self.score == "observational": # nuisance m if external_predictions["ml_m"] is not None: - m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None} + m_hat = {"preds": external_predictions["ml_m"], "targets": d, "models": None} else: m_hat = _dml_cv_predict( self._learner["ml_m"], diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py new file mode 100644 index 00000000..8fab2615 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py @@ -0,0 +1,202 @@ +import math + +import numpy as np +import pytest +from sklearn.base import clone +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.did.datasets import make_did_CS2021 +from doubleml.did.utils._did_utils import _get_id_positions + + +@pytest.fixture( + scope="module", + params=[ + [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)], + [ + RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42), + ], + ], +) +def learner(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[0.1]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope="module", params=["datetime", "float"]) +def time_type(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): + n_obs = 500 + dpg = 1 + + # collect data + df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type) + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + + dml_args = { + "ml_g": clone(learner[0]), + "ml_m": clone(learner[1]), + "n_folds": 3, + "score": score, + "in_sample_normalization": in_sample_normalization, + "trimming_threshold": trimming_threshold, + "draw_sample_splitting": True, + } + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=dml_panel_data.g_values[0], + t_value_pre=dml_panel_data.t_values[0], + t_value_eval=dml_panel_data.t_values[1], + **dml_args, + ) + dml_did_binary_obj.fit() + + df_subset = dml_did_binary_obj.data_subset.copy() + dml_data = dml.data.DoubleMLData( + df_subset, y_col="y", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"], t_col="t_indicator" + ) + dml_did_obj = dml.DoubleMLDIDCS( + dml_data, + **dml_args, + ) + + # use external predictions (sample splitting is hard to synchronize) + ext_predictions = {"G_indicator": {}} + ext_predictions["G_indicator"]["ml_g_d0_t0"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d0_t0"][:, :, 0], dml_did_binary_obj._id_positions + ) + ext_predictions["G_indicator"]["ml_g_d0_t1"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d0_t1"][:, :, 0], dml_did_binary_obj._id_positions + ) + ext_predictions["G_indicator"]["ml_g_d1_t0"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d1_t0"][:, :, 0], dml_did_binary_obj._id_positions + ) + ext_predictions["G_indicator"]["ml_g_d1_t1"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_g_d1_t1"][:, :, 0], dml_did_binary_obj._id_positions + ) + if score == "observational": + ext_predictions["G_indicator"]["ml_m"] = _get_id_positions( + dml_did_binary_obj.predictions["ml_m"][:, :, 0], dml_did_binary_obj._id_positions + ) + dml_did_obj.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "se": dml_did_obj.se, + "se_binary": dml_did_binary_obj.se, + "nuisance_loss": dml_did_obj.nuisance_loss, + "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss, + "dml_did_binary_obj": dml_did_binary_obj, + } + + # sensitivity tests + res_dict["sensitivity_elements"] = dml_did_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.sensitivity_elements + + dml_did_obj.sensitivity_analysis() + dml_did_binary_obj.sensitivity_analysis() + + res_dict["sensitivity_params"] = dml_did_obj.sensitivity_params + res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params + + return res_dict + + +@pytest.mark.ci +def test_coefs(dml_did_binary_vs_did_fixture): + assert math.isclose( + dml_did_binary_vs_did_fixture["coef_binary"][0], dml_did_binary_vs_did_fixture["coef"][0], rel_tol=1e-9, abs_tol=1e-4 + ) + + +@pytest.mark.ci +def test_ses(dml_did_binary_vs_did_fixture): + assert math.isclose( + dml_did_binary_vs_did_fixture["se_binary"][0], dml_did_binary_vs_did_fixture["se"][0], rel_tol=1e-9, abs_tol=1e-4 + ) + + +# No Boostrap Tests as the observations are not ordered in the same way + + +@pytest.mark.ci +def test_nuisance_loss(dml_did_binary_vs_did_fixture): + assert ( + dml_did_binary_vs_did_fixture["nuisance_loss"].keys() == dml_did_binary_vs_did_fixture["nuisance_loss_binary"].keys() + ) + for key, value in dml_did_binary_vs_did_fixture["nuisance_loss"].items(): + assert np.allclose(value, dml_did_binary_vs_did_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_sensitivity_elements(dml_did_binary_vs_did_fixture): + sensitivity_element_names = ["sigma2", "nu2"] + for sensitivity_element in sensitivity_element_names: + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_elements"][sensitivity_element], + dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + for sensitivity_element in ["psi_sigma2", "psi_nu2", "riesz_rep"]: + dml_binary_obj = dml_did_binary_vs_did_fixture["dml_did_binary_obj"] + scaling = dml_binary_obj.n_obs_subset / dml_binary_obj._dml_data.n_obs + binary_sensitivity_element = scaling * _get_id_positions( + dml_did_binary_vs_did_fixture["sensitivity_elements_binary"][sensitivity_element], dml_binary_obj._id_positions + ) + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_elements"][sensitivity_element], + binary_sensitivity_element, + rtol=1e-9, + atol=1e-4, + ) + + +@pytest.mark.ci +def test_sensitivity_params(dml_did_binary_vs_did_fixture): + for key in ["theta", "se", "ci"]: + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_params"][key]["lower"], + dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key]["lower"], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_params"][key]["upper"], + dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key]["upper"], + rtol=1e-9, + atol=1e-4, + ) + + for key in ["rv", "rva"]: + assert np.allclose( + dml_did_binary_vs_did_fixture["sensitivity_params"][key], + dml_did_binary_vs_did_fixture["sensitivity_params_binary"][key], + rtol=1e-9, + atol=1e-4, + ) From 0a46b5966a993111b66405a069af64b6969d019e Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 11:00:54 +0200 Subject: [PATCH 27/41] add panel type to did multi --- doubleml/did/did_multi.py | 38 +++- .../did/tests/test_did_multi_vs_binary.py | 2 +- .../did/tests/test_did_multi_vs_cs_binary.py | 208 ++++++++++++++++++ 3 files changed, 238 insertions(+), 10 deletions(-) create mode 100644 doubleml/did/tests/test_did_multi_vs_cs_binary.py diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index 8c5d5163..c8f54313 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -12,6 +12,7 @@ from doubleml.data import DoubleMLPanelData from doubleml.did.did_aggregation import DoubleMLDIDAggregation from doubleml.did.did_binary import DoubleMLDIDBinary +from doubleml.did.did_cs_binary import DoubleMLDIDCSBinary from doubleml.did.utils._aggregation import ( _check_did_aggregation_dict, _compute_did_eventstudy_aggregation_weights, @@ -31,7 +32,7 @@ from doubleml.did.utils._plot import add_jitter from doubleml.double_ml import DoubleML from doubleml.double_ml_framework import concat -from doubleml.utils._checks import _check_score, _check_trimming +from doubleml.utils._checks import _check_bool, _check_score, _check_trimming from doubleml.utils._descriptive import generate_summary from doubleml.utils.gain_statistics import gain_statistics @@ -80,6 +81,10 @@ class DoubleMLDIDMulti: from the pretreatment covariates. Default is ``'observational'``. + panel : bool + Indicates whether to rely on panel data structure (``True``) or repeated cross sections (``False``). + Default is ``True``. + in_sample_normalization : bool Indicates whether to use in-sample normalization of weights. Default is ``True``. @@ -140,6 +145,7 @@ def __init__( n_folds=5, n_rep=1, score="observational", + panel=True, in_sample_normalization=True, trimming_rule="truncate", trimming_threshold=1e-2, @@ -179,6 +185,9 @@ def __init__( valid_scores = ["observational", "experimental"] _check_score(self.score, valid_scores, allow_callable=False) + _check_bool(panel, "panel") + self._panel = panel + # initialize framework which is constructed after the fit method is called self._framework = None @@ -332,6 +341,13 @@ def never_treated_value(self): """ return self._never_treated_value + @property + def panel(self): + """ + Indicates whether to rely on panel data structure (``True``) or repeated cross sections (``False``). + """ + return self._panel + @property def in_sample_normalization(self): """ @@ -1250,7 +1266,10 @@ def _check_external_predictions(self, external_predictions): + f"Passed keys: {set(external_predictions.keys())}." ) - expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"] + if self.panel: + expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"] + else: + expected_learner_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"] for key, value in external_predictions.items(): if not isinstance(value, dict): raise TypeError( @@ -1268,12 +1287,7 @@ def _rename_external_predictions(self, external_predictions): d_col = self._dml_data.d_cols[0] ext_pred_dict = {gt_combination: {d_col: {}} for gt_combination in self.gt_labels} for gt_combination in self.gt_labels: - if "ml_g0" in external_predictions[gt_combination]: - ext_pred_dict[gt_combination][d_col]["ml_g0"] = external_predictions[gt_combination]["ml_g0"] - if "ml_g1" in external_predictions[gt_combination]: - ext_pred_dict[gt_combination][d_col]["ml_g1"] = external_predictions[gt_combination]["ml_g1"] - if "ml_m" in external_predictions[gt_combination]: - ext_pred_dict[gt_combination][d_col]["ml_m"] = external_predictions[gt_combination]["ml_m"] + ext_pred_dict[gt_combination][d_col].update(external_predictions[gt_combination]) return ext_pred_dict @@ -1304,9 +1318,15 @@ def _initialize_models(self): "draw_sample_splitting": True, "print_periods": self._print_periods, } + if self.panel: + ModelClass = DoubleMLDIDBinary + else: + ModelClass = DoubleMLDIDCSBinary + + # iterate over all group-time combinations for i_model, (g_value, t_value_pre, t_value_eval) in enumerate(self.gt_combinations): # initialize models for all levels - model = DoubleMLDIDBinary(g_value=g_value, t_value_pre=t_value_pre, t_value_eval=t_value_eval, **kwargs) + model = ModelClass(g_value=g_value, t_value_pre=t_value_pre, t_value_eval=t_value_eval, **kwargs) modellist[i_model] = model diff --git a/doubleml/did/tests/test_did_multi_vs_binary.py b/doubleml/did/tests/test_did_multi_vs_binary.py index 40b877b2..15d3fd0c 100644 --- a/doubleml/did/tests/test_did_multi_vs_binary.py +++ b/doubleml/did/tests/test_did_multi_vs_binary.py @@ -49,7 +49,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_nor n_obs = 500 dpg = 1 boot_methods = ["normal"] - n_rep_boot = 50000 + n_rep_boot = 500 # collect data df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type) diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py new file mode 100644 index 00000000..59886854 --- /dev/null +++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py @@ -0,0 +1,208 @@ +import math + +import numpy as np +import pytest +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.did.datasets import make_did_CS2021 +from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor + + +@pytest.fixture( + scope="module", + params=[ + [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)], + [ + RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42), + RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42), + ], + ], +) +def learner(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def in_sample_normalization(request): + return request.param + + +@pytest.fixture(scope="module", params=[0.1]) +def trimming_threshold(request): + return request.param + + +@pytest.fixture(scope="module", params=["datetime", "float"]) +def time_type(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): + n_obs = 500 + dpg = 1 + boot_methods = ["normal"] + n_rep_boot = 500 + + # collect data + df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type) + dml_panel_data = dml.data.DoubleMLPanelData( + df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] + ) + + dml_args = { + "n_folds": 3, + "score": score, + "in_sample_normalization": in_sample_normalization, + "trimming_threshold": trimming_threshold, + "draw_sample_splitting": True, + } + gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])] + dml_did_multi_obj = dml.did.DoubleMLDIDMulti( + dml_panel_data, + ml_g=learner[0], + ml_m=learner[1], + gt_combinations=gt_combination, + panel=False, + **dml_args, + ) + dml_did_multi_obj.fit() + + treatment_col = dml_panel_data.d_cols[0] + ext_pred_dict = {treatment_col: {}} + all_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] + for key in all_keys: + ext_pred_dict["d"][key] = dml_did_multi_obj.modellist[0].predictions[key][:, :, 0] + if score == "observational": + ext_pred_dict[treatment_col]["ml_m"] = dml_did_multi_obj.modellist[0].predictions["ml_m"][:, :, 0] + + dml_did_binary_obj = dml.did.DoubleMLDIDCSBinary( + dml_panel_data, + g_value=gt_combination[0][0], + t_value_pre=gt_combination[0][1], + t_value_eval=gt_combination[0][2], + ml_g=DMLDummyRegressor(), + ml_m=DMLDummyClassifier(), + **dml_args, + ) + dml_did_binary_obj.fit(external_predictions=ext_pred_dict) + + res_dict = { + "coef_multi": dml_did_multi_obj.coef, + "coef_binary": dml_did_binary_obj.coef, + "se_multi": dml_did_multi_obj.se, + "se_binary": dml_did_binary_obj.se, + "boot_methods": boot_methods, + "nuisance_loss_multi": dml_did_multi_obj.nuisance_loss, + "nuisance_loss_binary": dml_did_binary_obj.nuisance_loss, + } + + for bootstrap in boot_methods: + np.random.seed(3141) + dml_did_multi_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + np.random.seed(3141) + dml_did_binary_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) + + # approximately same ci (bootstrap not identical due to size of score) + res_dict["boot_ci" + bootstrap + "_multi"] = dml_did_multi_obj.confint(joint=True) + res_dict["boot_ci" + bootstrap + "_binary"] = dml_did_binary_obj.confint(joint=True) + + # sensitivity tests + res_dict["sensitivity_elements_multi"] = dml_did_multi_obj.sensitivity_elements + res_dict["sensitivity_elements_binary"] = dml_did_binary_obj.framework.sensitivity_elements + + dml_did_multi_obj.sensitivity_analysis() + dml_did_binary_obj.sensitivity_analysis() + + res_dict["sensitivity_params_multi"] = dml_did_multi_obj.sensitivity_params + res_dict["sensitivity_params_binary"] = dml_did_binary_obj.sensitivity_params + + return res_dict + + +@pytest.mark.ci +def test_coefs(dml_did_binary_vs_did_multi_fixture): + assert math.isclose( + dml_did_binary_vs_did_multi_fixture["coef_binary"][0], + dml_did_binary_vs_did_multi_fixture["coef_multi"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) + + +@pytest.mark.ci +def test_se(dml_did_binary_vs_did_multi_fixture): + assert math.isclose( + dml_did_binary_vs_did_multi_fixture["se_binary"][0], + dml_did_binary_vs_did_multi_fixture["se_multi"][0], + rel_tol=1e-9, + abs_tol=1e-4, + ) + + +@pytest.mark.ci +def test_boot(dml_did_binary_vs_did_multi_fixture): + for bootstrap in dml_did_binary_vs_did_multi_fixture["boot_methods"]: + assert np.allclose( + dml_did_binary_vs_did_multi_fixture["boot_ci" + bootstrap + "_multi"].values, + dml_did_binary_vs_did_multi_fixture["boot_ci" + bootstrap + "_binary"].values, + atol=1e-2, + ) + + +@pytest.mark.ci +def test_nuisance_loss(dml_did_binary_vs_did_multi_fixture): + assert ( + dml_did_binary_vs_did_multi_fixture["nuisance_loss_multi"].keys() + == dml_did_binary_vs_did_multi_fixture["nuisance_loss_binary"].keys() + ) + for key, value in dml_did_binary_vs_did_multi_fixture["nuisance_loss_multi"].items(): + assert np.allclose(value, dml_did_binary_vs_did_multi_fixture["nuisance_loss_binary"][key], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_sensitivity_elements(dml_did_binary_vs_did_multi_fixture): + elements_multi = dml_did_binary_vs_did_multi_fixture["sensitivity_elements_multi"] + elements_binary = dml_did_binary_vs_did_multi_fixture["sensitivity_elements_binary"] + sensitivity_element_names = ["max_bias", "psi_max_bias", "sigma2", "nu2"] + for sensitivity_element in sensitivity_element_names: + assert np.allclose( + elements_multi[sensitivity_element], + elements_binary[sensitivity_element], + rtol=1e-9, + atol=1e-4, + ) + + +@pytest.mark.ci +def test_sensitivity_params(dml_did_binary_vs_did_multi_fixture): + multi_params = dml_did_binary_vs_did_multi_fixture["sensitivity_params_multi"] + binary_params = dml_did_binary_vs_did_multi_fixture["sensitivity_params_binary"] + for key in ["theta", "se", "ci"]: + assert np.allclose( + multi_params[key]["lower"], + binary_params[key]["lower"], + rtol=1e-9, + atol=1e-4, + ) + assert np.allclose( + multi_params[key]["upper"], + binary_params[key]["upper"], + rtol=1e-9, + atol=1e-4, + ) + + for key in ["rv", "rva"]: + assert np.allclose( + multi_params[key], + binary_params[key], + rtol=1e-9, + atol=1e-4, + ) From 45dfcf5a7fe98b1d700a21426ce27e81159b3985 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 11:47:11 +0200 Subject: [PATCH 28/41] update single gt tests for did_cs --- .../did/tests/test_did_multi_aggregation_single_gt.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py index 0f71d91b..a6ffcd49 100644 --- a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py +++ b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py @@ -27,6 +27,11 @@ def score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[True, False]) def in_sample_normalization(request): return request.param @@ -43,7 +48,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_single_gt_aggregation(aggregation, time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, trimming_threshold): n_obs = 500 dpg = 1 @@ -56,6 +61,7 @@ def dml_single_gt_aggregation(aggregation, time_type, learner, score, in_sample_ dml_args = { "n_folds": 3, "score": score, + "panel": panel, "in_sample_normalization": in_sample_normalization, "trimming_threshold": trimming_threshold, "draw_sample_splitting": True, From 29b0ee7114c7d1a99c5c22558c84f6a62e0d3403 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 11:47:35 +0200 Subject: [PATCH 29/41] update exception tests for did cs --- doubleml/did/tests/test_did_multi_exceptions.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doubleml/did/tests/test_did_multi_exceptions.py b/doubleml/did/tests/test_did_multi_exceptions.py index aead8e48..88d373e3 100644 --- a/doubleml/did/tests/test_did_multi_exceptions.py +++ b/doubleml/did/tests/test_did_multi_exceptions.py @@ -18,6 +18,7 @@ "ml_g": LinearRegression(), "ml_m": LogisticRegression(), "gt_combinations": [(1, 0, 1)], + "panel": True, } @@ -43,6 +44,12 @@ def test_input(): invalid_arguments = {"control_group": 0} _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) + # non boolean panel + msg = "panel has to be boolean. test of type was passed." + with pytest.raises(TypeError, match=msg): + invalid_arguments = {"panel": "test"} + _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) + # propensity score adjustments msg = "in_sample_normalization indicator has to be boolean. Object of type passed." with pytest.raises(TypeError, match=msg): @@ -170,6 +177,12 @@ def test_check_external_predictions(): valid_pred = {model.gt_labels[0]: {"ml_g0": None, "ml_g1": None, "ml_m": None}} model._check_external_predictions(valid_pred) + model_cs = dml.did.DoubleMLDIDMulti(**valid_arguments | {"panel": False}) + valid_pred = { + model.gt_labels[0]: {"ml_g_d0_t0": None, "ml_g_d0_t1": None, "ml_g_d1_t0": None, "ml_g_d1_t1": None, "ml_m": None} + } + model_cs._check_external_predictions(valid_pred) + @pytest.mark.ci def test_exceptions_before_fit(): From 895a7627d2d3c444cb900cacfee5a0f475514556 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 11:47:50 +0200 Subject: [PATCH 30/41] update external prediction tests for did cs --- .../tests/test_did_multi_external_predictions.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/doubleml/did/tests/test_did_multi_external_predictions.py b/doubleml/did/tests/test_did_multi_external_predictions.py index e336487d..9bafdc6f 100644 --- a/doubleml/did/tests/test_did_multi_external_predictions.py +++ b/doubleml/did/tests/test_did_multi_external_predictions.py @@ -14,6 +14,11 @@ def did_score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[1, 3]) def n_rep(request): return request.param @@ -30,7 +35,7 @@ def set_ml_g_ext(request): @pytest.fixture(scope="module") -def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext): +def doubleml_did_multi_ext_fixture(did_score, panel, n_rep, set_ml_m_ext, set_ml_g_ext): n_obs = 500 n_folds = 5 dgp = 1 @@ -47,6 +52,7 @@ def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext) "obj_dml_data": dml_panel_data, "gt_combinations": [(2, 0, 1)], "score": did_score, + "panel": panel, "n_rep": n_rep, "n_folds": n_folds, } @@ -69,9 +75,12 @@ def doubleml_did_multi_ext_fixture(did_score, n_rep, set_ml_m_ext, set_ml_g_ext) ml_m_ext = ml_m if set_ml_g_ext: + g_keys = ["ml_g0", "ml_g1"] if panel else ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1"] for i_gt_combination, gt_label in enumerate(dml_obj.gt_labels): - ext_pred_dict[gt_label]["ml_g0"] = dml_obj.modellist[i_gt_combination].predictions["ml_g0"][:, :, 0] - ext_pred_dict[gt_label]["ml_g1"] = dml_obj.modellist[i_gt_combination].predictions["ml_g1"][:, :, 0] + predictions = dml_obj.modellist[i_gt_combination].predictions + for key in g_keys: + ext_pred_dict[gt_label][key] = predictions[key][:, :, 0] + ml_g_ext = DMLDummyRegressor() else: ml_g_ext = ml_g From b6ace7dae151340d2b2462e9a0d64af6d0b7ce0e Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 11:48:02 +0200 Subject: [PATCH 31/41] update placebo tests for did cs multi --- doubleml/did/tests/test_did_multi_placebo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doubleml/did/tests/test_did_multi_placebo.py b/doubleml/did/tests/test_did_multi_placebo.py index 8f01d426..12435871 100644 --- a/doubleml/did/tests/test_did_multi_placebo.py +++ b/doubleml/did/tests/test_did_multi_placebo.py @@ -12,13 +12,18 @@ def did_score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[1, 3]) def n_rep(request): return request.param @pytest.fixture(scope="module") -def doubleml_did_fixture(did_score, n_rep): +def doubleml_did_fixture(did_score, panel, n_rep): n_obs = 1000 dgp = 5 # has to be experimental (for experimental score to be valid) np.random.seed(42) @@ -36,6 +41,7 @@ def doubleml_did_fixture(did_score, n_rep): "ml_m": LogisticRegression(), "gt_combinations": gt_combinations, "score": did_score, + "panel": panel, "n_rep": n_rep, "n_folds": 5, "draw_sample_splitting": True, From 176a99d8b3ac2dd8728a5a08368be3d974f23e86 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 13:34:06 +0200 Subject: [PATCH 32/41] update plot and return type tests for did multi --- doubleml/did/did_multi.py | 5 ++ doubleml/did/tests/test_did_multi_plot.py | 8 +++- .../did/tests/test_did_multi_return_types.py | 47 ++++++++++++++----- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index c8f54313..646ad41d 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -187,6 +187,11 @@ def __init__( _check_bool(panel, "panel") self._panel = panel + # set score dim (n_elements, n_thetas, n_rep), just for checking purposes + if self.panel: + self._score_dim = (self._dml_data.n_ids, self.n_gt_atts, self.n_rep) + else: + self._score_dim = (self._dml_data.n_obs, self.n_gt_atts, self.n_rep) # initialize framework which is constructed after the fit method is called self._framework = None diff --git a/doubleml/did/tests/test_did_multi_plot.py b/doubleml/did/tests/test_did_multi_plot.py index 2eb15dcc..bcb8b786 100644 --- a/doubleml/did/tests/test_did_multi_plot.py +++ b/doubleml/did/tests/test_did_multi_plot.py @@ -13,13 +13,18 @@ def did_score(request): return request.param +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + @pytest.fixture(scope="module", params=[1, 3]) def n_rep(request): return request.param @pytest.fixture(scope="module") -def doubleml_did_fixture(did_score, n_rep): +def doubleml_did_fixture(did_score, panel, n_rep): n_obs = 1000 dgp = 5 # has to be experimental (for experimental score to be valid) np.random.seed(42) @@ -32,6 +37,7 @@ def doubleml_did_fixture(did_score, n_rep): "ml_m": LogisticRegression(), "gt_combinations": "all", "score": did_score, + "panel": panel, "n_rep": n_rep, "n_folds": 2, "draw_sample_splitting": True, diff --git a/doubleml/did/tests/test_did_multi_return_types.py b/doubleml/did/tests/test_did_multi_return_types.py index c11544ed..d797230e 100644 --- a/doubleml/did/tests/test_did_multi_return_types.py +++ b/doubleml/did/tests/test_did_multi_return_types.py @@ -13,7 +13,7 @@ from doubleml.double_ml_framework import DoubleMLFramework # Test constants -N_OBS = 200 +N_IDS = 200 N_REP = 1 N_FOLDS = 3 N_REP_BOOT = 314 @@ -31,7 +31,7 @@ datasets = {} # panel data -df_panel = make_did_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") +df_panel = make_did_CS2021(n_obs=N_IDS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") df_panel["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel.shape[0]) datasets["did_panel"] = DoubleMLPanelData( df_panel, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] @@ -42,10 +42,23 @@ dml_objs = [ - (DoubleMLDIDMulti(datasets["did_panel"], ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args), DoubleMLDIDMulti), + ( + DoubleMLDIDMulti(datasets["did_panel"], panel=True, ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args), + DoubleMLDIDMulti, + ), + ( + DoubleMLDIDMulti(datasets["did_panel"], panel=False, ml_g=Lasso(), ml_m=LogisticRegression(), **dml_args), + DoubleMLDIDMulti, + ), + ( + DoubleMLDIDMulti( + datasets["did_panel_binary_outcome"], panel=True, ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args + ), + DoubleMLDIDMulti, + ), ( DoubleMLDIDMulti( - datasets["did_panel_binary_outcome"], ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args + datasets["did_panel_binary_outcome"], panel=False, ml_g=LogisticRegression(), ml_m=LogisticRegression(), **dml_args ), DoubleMLDIDMulti, ), @@ -84,13 +97,20 @@ def test_panel_property_types_and_shapes(fitted_dml_obj): n_treat = len(fitted_dml_obj.gt_combinations) dml_obj = fitted_dml_obj + if dml_obj.panel: + score_dim = (N_IDS, n_treat, N_REP) + else: + score_dim = (df_panel.shape[0], n_treat, N_REP) + + assert dml_obj._score_dim == score_dim + # check_basic_property_types_and_shapes # check that the setting is still in line with the hard-coded values assert dml_obj._dml_data.n_treat == 1 assert dml_obj.n_gt_atts == n_treat assert dml_obj.n_rep == N_REP assert dml_obj.n_folds == N_FOLDS - assert dml_obj._dml_data.n_obs == N_OBS * N_PERIODS + assert dml_obj._dml_data.n_obs == df_panel.shape[0] assert dml_obj.n_rep_boot == N_REP_BOOT assert isinstance(dml_obj.all_coef, np.ndarray) @@ -112,11 +132,7 @@ def test_panel_property_types_and_shapes(fitted_dml_obj): assert dml_obj.t_stat.shape == (n_treat,) assert isinstance(dml_obj.framework.scaled_psi, np.ndarray) - assert dml_obj.framework.scaled_psi.shape == ( - N_OBS, - n_treat, - N_REP, - ) + assert dml_obj.framework.scaled_psi.shape == score_dim assert isinstance(dml_obj.framework, DoubleMLFramework) assert isinstance(dml_obj.pval, np.ndarray) @@ -126,7 +142,10 @@ def test_panel_property_types_and_shapes(fitted_dml_obj): assert len(dml_obj._dml_data.binary_treats) == 1 # check_basic_predictions_and_targets - expected_keys = ["ml_g0", "ml_g1", "ml_m"] + if dml_obj.panel: + expected_keys = ["ml_g0", "ml_g1", "ml_m"] + else: + expected_keys = ["ml_g_d0_t0", "ml_g_d0_t1", "ml_g_d1_t0", "ml_g_d1_t1", "ml_m"] for key in expected_keys: assert isinstance(dml_obj.nuisance_loss[key], np.ndarray) assert dml_obj.nuisance_loss[key].shape == (N_REP, n_treat) @@ -137,6 +156,10 @@ def test_panel_sensitivity_return_types(fitted_dml_obj): n_treat = len(fitted_dml_obj.gt_combinations) benchmarking_set = [fitted_dml_obj._dml_data.x_cols[0]] dml_obj = fitted_dml_obj + if dml_obj.panel: + score_dim = (N_IDS, n_treat, N_REP) + else: + score_dim = (df_panel.shape[0], n_treat, N_REP) assert isinstance(dml_obj.sensitivity_elements, dict) for key in ["sigma2", "nu2", "max_bias"]: @@ -144,7 +167,7 @@ def test_panel_sensitivity_return_types(fitted_dml_obj): assert dml_obj.sensitivity_elements[key].shape == (1, n_treat, N_REP) for key in ["psi_max_bias"]: assert isinstance(dml_obj.sensitivity_elements[key], np.ndarray) - assert dml_obj.sensitivity_elements[key].shape == (N_OBS, n_treat, N_REP) + assert dml_obj.sensitivity_elements[key].shape == score_dim assert isinstance(dml_obj.sensitivity_summary, str) dml_obj.sensitivity_analysis() From 9d59e5b1e378a2aa9c573b92aad02267cf3da586 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 11 Jun 2025 13:53:45 +0200 Subject: [PATCH 33/41] add additional did multi aggregation test --- ...st_did_multi_aggregation_manual_weights.py | 199 +++++++++++++++++- ...test_did_multi_aggregation_weight_index.py | 1 - 2 files changed, 198 insertions(+), 2 deletions(-) delete mode 100644 doubleml/did/tests/test_did_multi_aggregation_weight_index.py diff --git a/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py b/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py index 35512d8f..57b00b31 100644 --- a/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py +++ b/doubleml/did/tests/test_did_multi_aggregation_manual_weights.py @@ -1 +1,198 @@ -# TODO: For each aggregation method check if the manual weights equal the string aggregation method. +import math + +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.did.datasets import make_did_CS2021 +from doubleml.did.utils._aggregation import ( + _compute_did_eventstudy_aggregation_weights, + _compute_did_group_aggregation_weights, + _compute_did_time_aggregation_weights, +) + + +@pytest.fixture(scope="module", params=["group", "time", "eventstudy"]) +def aggregation_method(request): + return request.param + + +@pytest.fixture(scope="module", params=[True, False]) +def panel(request): + return request.param + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def score(request): + return request.param + + +@pytest.fixture(scope="module") +def dml_fitted_obj(panel, score): + """Create a fitted DML object for testing.""" + n_obs = 200 + + # Create data + df = make_did_CS2021(n_obs=n_obs, dgp_type=1, time_type="float") + dml_data = dml.data.DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + # Create and fit model + ml_g = LinearRegression() + ml_m = LogisticRegression(solver="lbfgs", max_iter=250) + + dml_obj = dml.did.DoubleMLDIDMulti( + obj_dml_data=dml_data, + ml_g=ml_g, + ml_m=ml_m, + gt_combinations="standard", + panel=panel, + score=score, + n_folds=3, + n_rep=1, + ) + dml_obj.fit() + + return dml_obj + + +def _extract_manual_weights(dml_obj, aggregation_method): + """Extract manual weights from the aggregation method.""" + # Get the mask for non-masked values + selected_gt_mask = ~dml_obj.gt_index.mask + + if aggregation_method == "group": + # Exclude pre-treatment combinations for group aggregation + selected_gt_mask = selected_gt_mask & dml_obj._post_treatment_mask + aggregation_dict = _compute_did_group_aggregation_weights( + gt_index=dml_obj.gt_index, + g_values=dml_obj.g_values, + d_values=dml_obj._dml_data.d, + selected_gt_mask=selected_gt_mask, + ) + aggregation_dict["method"] = "Group" + elif aggregation_method == "time": + # Exclude pre-treatment combinations for time aggregation + selected_gt_mask = selected_gt_mask & dml_obj._post_treatment_mask + aggregation_dict = _compute_did_time_aggregation_weights( + gt_index=dml_obj.gt_index, + g_values=dml_obj.g_values, + t_values=dml_obj.t_values, + d_values=dml_obj._dml_data.d, + selected_gt_mask=selected_gt_mask, + ) + aggregation_dict["method"] = "Time" + else: + assert aggregation_method == "eventstudy" + aggregation_dict = _compute_did_eventstudy_aggregation_weights( + gt_index=dml_obj.gt_index, + g_values=dml_obj.g_values, + t_values=dml_obj.t_values, + d_values=dml_obj._dml_data.d, + time_values=dml_obj._dml_data.t, + selected_gt_mask=selected_gt_mask, + ) + aggregation_dict["method"] = "Event Study" + return aggregation_dict + + +@pytest.mark.ci +def test_string_vs_manual_weights_aggregation(dml_fitted_obj, aggregation_method): + """Test that string aggregation methods produce identical results to manual weights.""" + + # Get string-based aggregation result + agg_string = dml_fitted_obj.aggregate(aggregation=aggregation_method) + + # Extract manual weights + manual_weights_dict = _extract_manual_weights(dml_fitted_obj, aggregation_method) + + # Get manual aggregation result + agg_manual = dml_fitted_obj.aggregate(aggregation=manual_weights_dict) + + # Compare aggregated frameworks - coefficients + np.testing.assert_allclose( + agg_string.aggregated_frameworks.thetas, + agg_manual.aggregated_frameworks.thetas, + rtol=1e-9, + atol=1e-12, + ) + + # Compare aggregated frameworks - standard errors + np.testing.assert_allclose( + agg_string.aggregated_frameworks.ses, + agg_manual.aggregated_frameworks.ses, + rtol=1e-9, + atol=1e-12, + ) + + # Compare overall aggregated framework - coefficients + np.testing.assert_allclose( + agg_string.overall_aggregated_framework.thetas, + agg_manual.overall_aggregated_framework.thetas, + rtol=1e-9, + atol=1e-12, + ) + + # Compare overall aggregated framework - standard errors + np.testing.assert_allclose( + agg_string.overall_aggregated_framework.ses, + agg_manual.overall_aggregated_framework.ses, + rtol=1e-9, + atol=1e-12, + ) + + # Compare aggregation weights + np.testing.assert_allclose( + agg_string.aggregation_weights, + agg_manual.aggregation_weights, + rtol=1e-9, + atol=1e-12, + ) + + # Compare overall aggregation weights + np.testing.assert_allclose( + agg_string.overall_aggregation_weights, + agg_manual.overall_aggregation_weights, + rtol=1e-9, + atol=1e-12, + ) + + # Compare aggregation names + assert agg_string.aggregation_names == agg_manual.aggregation_names + + # Compare number of aggregations + assert agg_string.n_aggregations == agg_manual.n_aggregations + + +@pytest.mark.ci +def test_manual_weights_properties(dml_fitted_obj, aggregation_method): + """Test that manual weights have the expected properties.""" + + manual_weights_dict = _extract_manual_weights(dml_fitted_obj, aggregation_method) + + # Check that required keys are present + assert "weight_masks" in manual_weights_dict + assert "agg_names" in manual_weights_dict + assert "agg_weights" in manual_weights_dict + + weight_masks = manual_weights_dict["weight_masks"] + agg_weights = manual_weights_dict["agg_weights"] + + # Check weight masks properties + assert isinstance(weight_masks, np.ma.MaskedArray) + assert weight_masks.ndim == 4 + assert weight_masks.shape[:-1] == dml_fitted_obj.gt_index.shape + + # Check that aggregation weights sum to 1 + assert math.isclose(np.sum(agg_weights), 1.0, rel_tol=1e-9, abs_tol=1e-12) + + # Check that individual weight masks sum to 1 (for non-masked elements) + n_aggregations = weight_masks.shape[-1] + for i in range(n_aggregations): + weights = weight_masks[..., i].compressed() + if len(weights) > 0: + assert math.isclose(np.sum(weights), 1.0, rel_tol=1e-9, abs_tol=1e-12) + + # Check that weight masks have the same mask as gt_index + for i in range(n_aggregations): + np.testing.assert_array_equal(weight_masks[..., i].mask, dml_fitted_obj.gt_index.mask) diff --git a/doubleml/did/tests/test_did_multi_aggregation_weight_index.py b/doubleml/did/tests/test_did_multi_aggregation_weight_index.py deleted file mode 100644 index d001a4a8..00000000 --- a/doubleml/did/tests/test_did_multi_aggregation_weight_index.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: For each aggregation method check if the aggregated weights correspond to certain gt_combinations (group, time etc.) From 9e3e6d62a00a9cf3b3394476cd7fbf71e30ea31f Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 12 Jun 2025 09:25:55 +0200 Subject: [PATCH 34/41] update did cs multi test for cs data --- doubleml/did/tests/test_did_multi_vs_cs_binary.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py index 59886854..7af8d74d 100644 --- a/doubleml/did/tests/test_did_multi_vs_cs_binary.py +++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py @@ -6,7 +6,7 @@ from sklearn.linear_model import LinearRegression, LogisticRegression import doubleml as dml -from doubleml.did.datasets import make_did_CS2021 +from doubleml.did.datasets import make_did_cs_CS2021 from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor @@ -44,15 +44,20 @@ def time_type(request): return request.param +@pytest.fixture(scope="module", params=[0.5, 0.1]) +def lambda_t(request): + return request.param + + @pytest.fixture(scope="module") -def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, trimming_threshold): n_obs = 500 dpg = 1 boot_methods = ["normal"] n_rep_boot = 500 # collect data - df = make_did_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type) + df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dpg, time_type=time_type, lambda_t=lambda_t) dml_panel_data = dml.data.DoubleMLPanelData( df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] ) From 5c4d1e25a2c0e9560e6af3f01ac287e933367f81 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 12 Jun 2025 12:57:17 +0200 Subject: [PATCH 35/41] update did binary to work with unbalanced panels --- doubleml/did/did_binary.py | 7 +- ..._binary_external_predictions_unbalanced.py | 93 +++++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index a9939c97..6fa19e0d 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -421,9 +421,10 @@ def _preprocess_data(self, g_value, pre_t, eval_t): id_col = self._dml_data.id_col g_col = self._dml_data.g_col - # relevent data subset - data_subset_indicator = data[t_col].isin([pre_t, eval_t]) - data_subset = data[data_subset_indicator].sort_values(by=[id_col, t_col]) + # relevent data subset: Only include units which are observed in both periods + relevant_time_data = data[data[t_col].isin([pre_t, eval_t])] + ids_with_both_periods_filter = relevant_time_data.groupby(id_col)[t_col].transform("nunique") == 2 + data_subset = relevant_time_data[ids_with_both_periods_filter].sort_values(by=[id_col, t_col]) # Construct G (treatment group) indicating treatment period in g G_indicator = (data_subset[g_col] == g_value).astype(int) diff --git a/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py new file mode 100644 index 00000000..ffeadb51 --- /dev/null +++ b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py @@ -0,0 +1,93 @@ +import math + +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.data import DoubleMLPanelData +from doubleml.did import DoubleMLDIDBinary +from doubleml.did.datasets import make_did_cs_CS2021 +from doubleml.tests._utils import draw_smpls +from doubleml.utils import DMLDummyClassifier, DMLDummyRegressor + + +@pytest.fixture(scope="module", params=["observational", "experimental"]) +def did_score(request): + return request.param + + +@pytest.fixture(scope="module", params=[1, 3]) +def n_rep(request): + return request.param + + +@pytest.fixture(scope="module") +def doubleml_did_panel_fixture(did_score, n_rep): + n_obs = 500 + n_folds = 5 + dgp = 1 + + ext_predictions = {"d": {}} + df = make_did_cs_CS2021(n_obs=n_obs, dgp_type=dgp, time_type="float") + dml_panel_data = DoubleMLPanelData(df, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"]) + + kwargs = { + "obj_dml_data": dml_panel_data, + "g_value": 2, + "t_value_pre": 0, + "t_value_eval": 1, + "score": did_score, + "n_rep": n_rep, + "draw_sample_splitting": False, + } + + dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel) + dml_did.set_sample_splitting(all_smpls) + + np.random.seed(3141) + dml_did.fit(store_predictions=True) + + all_keys = ["ml_g0", "ml_g1"] + for key in all_keys: + ext_predictions["d"][key] = dml_did.predictions[key][:, :, 0] + if did_score == "observational": + ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0] + dml_did_ext = DoubleMLDIDBinary(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs) + dml_did_ext.set_sample_splitting(all_smpls) + np.random.seed(3141) + dml_did_ext.fit(external_predictions=ext_predictions) + + res_dict = { + "coef": dml_did.coef[0], + "coef_ext": dml_did_ext.coef[0], + "se": dml_did.se[0], + "se_ext": dml_did_ext.se[0], + "score": dml_did.psi, + "score_ext": dml_did_ext.psi, + "dml_did_nuisance_loss": dml_did.nuisance_loss, + "dml_did_ext_nuisance_loss": dml_did_ext.nuisance_loss, + } + + return res_dict + + +@pytest.mark.ci +def test_panel_coef(doubleml_did_panel_fixture): + assert math.isclose(doubleml_did_panel_fixture["coef"], doubleml_did_panel_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_panel_se(doubleml_did_panel_fixture): + assert math.isclose(doubleml_did_panel_fixture["se"], doubleml_did_panel_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-3) + + +@pytest.mark.ci +def test_panel_score(doubleml_did_panel_fixture): + assert np.allclose(doubleml_did_panel_fixture["score"], doubleml_did_panel_fixture["score_ext"], rtol=1e-9, atol=1e-3) + + +@pytest.mark.ci +def test_panel_nuisance_loss(doubleml_did_panel_fixture): + for key, value in doubleml_did_panel_fixture["dml_did_nuisance_loss"].items(): + assert np.allclose(value, doubleml_did_panel_fixture["dml_did_ext_nuisance_loss"][key], rtol=1e-9, atol=1e-3) From 3fe83ff6683cd6d609cb841dda453ce24a7a2d5c Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 12 Jun 2025 15:21:07 +0200 Subject: [PATCH 36/41] align subset naming in did binary and cs version --- doubleml/did/did_binary.py | 32 +++++++++---------- doubleml/did/did_cs_binary.py | 26 +++++++-------- .../test_did_binary_external_predictions.py | 4 +-- ..._binary_external_predictions_unbalanced.py | 2 +- .../did/tests/test_did_binary_vs_did_panel.py | 2 +- ...test_did_cs_binary_external_predictions.py | 2 +- doubleml/did/tests/test_return_types.py | 12 +++---- 7 files changed, 40 insertions(+), 40 deletions(-) diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index 6fa19e0d..99e18e28 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -163,10 +163,10 @@ def __init__( # Preprocess data # Y1, Y0 might be needed if we want to support custom estimators and scores; currently only output y_diff - self._panel_data_wide = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval) + self._data_subset = self._preprocess_data(self._g_value, self._t_value_pre, self._t_value_eval) # Handling id values to match pairwise evaluation & simultaneous inference - id_panel_data = self._panel_data_wide[self._dml_data.id_col].values + id_panel_data = self._data_subset[self._dml_data.id_col].values id_original = self._dml_data.id_var_unique if not np.all(np.isin(id_panel_data, id_original)): raise ValueError("The id values in the panel data are not a subset of the original id values.") @@ -177,13 +177,13 @@ def __init__( # Numeric values for positions of the entries in id_panel_data inside id_original # np.nonzero(np.isin(id_original, id_panel_data)) - self._n_obs_subset = self._panel_data_wide.shape[0] # Effective sample size used for resampling - self._n_treated_subset = self._panel_data_wide["G_indicator"].sum() + self._n_obs_subset = self._data_subset.shape[0] # Effective sample size used for resampling + self._n_treated_subset = self._data_subset["G_indicator"].sum() # Save x and y for later ML estimation - self._x_panel = self._panel_data_wide.loc[:, self._dml_data.x_cols].values - self._y_panel = self._panel_data_wide.loc[:, "y_diff"].values - self._g_panel = self._panel_data_wide.loc[:, "G_indicator"].values + self._x_data_subset = self._data_subset.loc[:, self._dml_data.x_cols].values + self._y_data_subset = self._data_subset.loc[:, "y_diff"].values + self._g_data_subset = self._data_subset.loc[:, "G_indicator"].values valid_scores = ["observational", "experimental"] _check_score(self.score, valid_scores, allow_callable=False) @@ -196,7 +196,7 @@ def __init__( ) # set stratication for resampling - self._strata = self._panel_data_wide["G_indicator"] + self._strata = self._data_subset["G_indicator"] self._n_obs_sample_splitting = self.n_obs_subset if draw_sample_splitting: self.draw_sample_splitting() @@ -342,11 +342,11 @@ def anticipation_periods(self): return self._anticipation_periods @property - def panel_data_wide(self): + def data_subset(self): """ The preprocessed panel data in wide format. """ - return self._panel_data_wide + return self._data_subset @property def id_positions(self): @@ -470,8 +470,8 @@ def _preprocess_data(self, g_value, pre_t, eval_t): def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): # Here: d is a binary treatment indicator - x, y = check_X_y(self._x_panel, self._y_panel, force_all_finite=False) - x, d = check_X_y(x, self._g_panel, force_all_finite=False) + x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False) + x, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # nuisance g # get train indices for d == 0 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) @@ -611,8 +611,8 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, p_hat): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._x_panel, self._y_panel, force_all_finite=False) - x, d = check_X_y(x, self._g_panel, force_all_finite=False) + x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False) + x, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # get train indices for d == 0 and d == 1 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) @@ -676,8 +676,8 @@ def _nuisance_tuning( return res def _sensitivity_element_est(self, preds): - y = self._y_panel - d = self._g_panel + y = self._y_data_subset + d = self._g_data_subset m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions) g_hat0 = _get_id_positions(preds["predictions"]["ml_g0"], self.id_positions) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 7788f4b3..9e5ee6c2 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -97,10 +97,10 @@ def __init__( self._n_obs_subset = self.data_subset.shape[0] # Effective sample size used for resampling # Save x and y for later ML estimation - self._x_data = self.data_subset.loc[:, self._dml_data.x_cols].values - self._y_data = self.data_subset.loc[:, self._dml_data.y_col].values - self._g_data = self.data_subset.loc[:, "G_indicator"].values - self._t_data = self.data_subset.loc[:, "t_indicator"].values + self._x_data_subset = self.data_subset.loc[:, self._dml_data.x_cols].values + self._y_data_subset = self.data_subset.loc[:, self._dml_data.y_col].values + self._g_data_subset = self.data_subset.loc[:, "G_indicator"].values + self._t_data_subset = self.data_subset.loc[:, "t_indicator"].values valid_scores = ["observational", "experimental"] _check_score(self.score, valid_scores, allow_callable=False) @@ -402,9 +402,9 @@ def _estimate_conditional_g( def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): # Here: d is a binary treatment indicator - x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False) - _, d = check_X_y(x, self._g_data, force_all_finite=False) # (d is the G_indicator) - _, t = check_X_y(x, self._t_data, force_all_finite=False) + x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False) + _, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data_subset, force_all_finite=False) # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold # nuisance estimates of the uncond. treatment prob. @@ -588,9 +588,9 @@ def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_ def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(X=self._x_data, y=self._y_data, force_all_finite=False) - _, d = check_X_y(x, self._g_data, force_all_finite=False) # (d is the G_indicator) - _, t = check_X_y(x, self._t_data, force_all_finite=False) + x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False) + _, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data_subset, force_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_g": None, "ml_m": None} @@ -702,9 +702,9 @@ def _nuisance_tuning( return res def _sensitivity_element_est(self, preds): - y = self._y_data - d = self._g_data - t = self._t_data + y = self._y_data_subset + d = self._g_data_subset + t = self._t_data_subset m_hat = _get_id_positions(preds["predictions"]["ml_m"], self.id_positions) g_hat_d0_t0 = _get_id_positions(preds["predictions"]["ml_g_d0_t0"], self.id_positions) diff --git a/doubleml/did/tests/test_did_binary_external_predictions.py b/doubleml/did/tests/test_did_binary_external_predictions.py index 0cb3e055..0a6cf2f0 100644 --- a/doubleml/did/tests/test_did_binary_external_predictions.py +++ b/doubleml/did/tests/test_did_binary_external_predictions.py @@ -40,7 +40,7 @@ def doubleml_did_fixture(did_score, n_rep): } dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) - all_smpls = draw_smpls(n_obs, n_folds, n_rep=n_rep, groups=dml_did._g_panel) + all_smpls = draw_smpls(n_obs, n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) dml_did.set_sample_splitting(all_smpls) np.random.seed(3141) @@ -112,7 +112,7 @@ def doubleml_did_panel_fixture(did_score, n_rep): } dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) - all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) dml_did.set_sample_splitting(all_smpls) np.random.seed(3141) diff --git a/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py index ffeadb51..a921efee 100644 --- a/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py +++ b/doubleml/did/tests/test_did_binary_external_predictions_unbalanced.py @@ -42,7 +42,7 @@ def doubleml_did_panel_fixture(did_score, n_rep): } dml_did = DoubleMLDIDBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) - all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_panel) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) dml_did.set_sample_splitting(all_smpls) np.random.seed(3141) diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py index 9da81739..426b413c 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_panel.py +++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py @@ -78,7 +78,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza ) dml_did_binary_obj.fit() - df_wide = dml_did_binary_obj._panel_data_wide.copy() + df_wide = dml_did_binary_obj.data_subset.copy() dml_data = dml.data.DoubleMLData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"]) dml_did_obj = dml.DoubleMLDID( dml_data, diff --git a/doubleml/did/tests/test_did_cs_binary_external_predictions.py b/doubleml/did/tests/test_did_cs_binary_external_predictions.py index 477c6dc7..f6b77f0b 100644 --- a/doubleml/did/tests/test_did_cs_binary_external_predictions.py +++ b/doubleml/did/tests/test_did_cs_binary_external_predictions.py @@ -114,7 +114,7 @@ def doubleml_did_cs_panel_fixture(did_score, n_rep): } dml_did = DoubleMLDIDCSBinary(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs) - all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data) + all_smpls = draw_smpls(n_obs=dml_did.n_obs_subset, n_folds=n_folds, n_rep=n_rep, groups=dml_did._g_data_subset) dml_did.set_sample_splitting(all_smpls) np.random.seed(3141) diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py index 1b6fa736..683b1dc1 100644 --- a/doubleml/did/tests/test_return_types.py +++ b/doubleml/did/tests/test_return_types.py @@ -122,12 +122,12 @@ def test_panel_return_types(dml_obj, cls): assert isinstance(dml_obj.t_value_pre, (int, np.integer, float, np.floating)) assert isinstance(dml_obj.post_treatment, bool) - # Test panel_data_wide property - assert isinstance(dml_obj.panel_data_wide, pd.DataFrame) - assert dml_obj.panel_data_wide.shape[0] <= N_OBS - assert "G_indicator" in dml_obj.panel_data_wide.columns - assert "C_indicator" in dml_obj.panel_data_wide.columns - assert "y_diff" in dml_obj.panel_data_wide.columns + # Test data_subset property + assert isinstance(dml_obj.data_subset, pd.DataFrame) + assert dml_obj.data_subset.shape[0] <= N_OBS + assert "G_indicator" in dml_obj.data_subset.columns + assert "C_indicator" in dml_obj.data_subset.columns + assert "y_diff" in dml_obj.data_subset.columns # Test id_positions property assert isinstance(dml_obj.id_positions, np.ndarray) From e7a9f5c75e0fd7fc5aff78998012052e7e993f51 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Thu, 12 Jun 2025 16:39:36 +0200 Subject: [PATCH 37/41] update return type tests for did cs binary --- doubleml/did/did_cs_binary.py | 2 +- doubleml/did/tests/test_return_types.py | 58 +++++++++++++++++++++---- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 9e5ee6c2..a6005d53 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -90,7 +90,7 @@ def __init__( # Find position of data subset in original data # These entries should be replaced by nuisance predictions, all others should be set to 0. - self._id_positions = self.data_subset.index + self._id_positions = self.data_subset.index.values # Numeric values for positions of the entries in id_panel_data inside id_original # np.nonzero(np.isin(id_original, id_panel_data)) diff --git a/doubleml/did/tests/test_return_types.py b/doubleml/did/tests/test_return_types.py index 683b1dc1..37105c3e 100644 --- a/doubleml/did/tests/test_return_types.py +++ b/doubleml/did/tests/test_return_types.py @@ -4,8 +4,8 @@ from sklearn.linear_model import Lasso, LogisticRegression from doubleml.data import DoubleMLData, DoubleMLPanelData -from doubleml.did import DoubleMLDID, DoubleMLDIDBinary, DoubleMLDIDCS -from doubleml.did.datasets import make_did_CS2021, make_did_SZ2020 +from doubleml.did import DoubleMLDID, DoubleMLDIDBinary, DoubleMLDIDCS, DoubleMLDIDCSBinary +from doubleml.did.datasets import make_did_CS2021, make_did_cs_CS2021, make_did_SZ2020 from doubleml.utils._check_return_types import ( check_basic_predictions_and_targets, check_basic_property_types_and_shapes, @@ -89,6 +89,17 @@ def test_sensitivity_return_types(fitted_dml_obj): df_panel, y_col="y_binary", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] ) +# Create a dataset for DoubleMLDIDCSBinary +df_panel_cs = make_did_cs_CS2021(n_obs=N_OBS, dgp_type=1, n_pre_treat_periods=2, n_periods=N_PERIODS, time_type="float") +df_panel_cs["y_binary"] = np.random.binomial(n=1, p=0.5, size=df_panel_cs.shape[0]) +datasets["did_panel_cs"] = DoubleMLPanelData( + df_panel_cs, y_col="y", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] +) +datasets["did_panel_cs_binary_outcome"] = DoubleMLPanelData( + df_panel_cs, y_col="y_binary", d_cols="d", id_col="id", t_col="t", x_cols=["Z1", "Z2", "Z3", "Z4"] +) + + dml_panel_binary_args = dml_args | { "g_value": 2, "t_value_pre": 0, @@ -106,6 +117,19 @@ def test_sensitivity_return_types(fitted_dml_obj): ), DoubleMLDIDBinary, ), + ( + DoubleMLDIDCSBinary(datasets["did_panel_cs"], ml_g=Lasso(), ml_m=LogisticRegression(), **dml_panel_binary_args), + DoubleMLDIDCSBinary, + ), + ( + DoubleMLDIDCSBinary( + datasets["did_panel_cs_binary_outcome"], + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + **dml_panel_binary_args, + ), + DoubleMLDIDCSBinary, + ), ] @@ -124,10 +148,14 @@ def test_panel_return_types(dml_obj, cls): # Test data_subset property assert isinstance(dml_obj.data_subset, pd.DataFrame) - assert dml_obj.data_subset.shape[0] <= N_OBS + if isinstance(dml_obj, DoubleMLDIDBinary): + assert dml_obj.data_subset.shape[0] <= N_OBS + assert "y_diff" in dml_obj.data_subset.columns + elif isinstance(dml_obj, DoubleMLDIDCSBinary): + assert dml_obj.data_subset.shape[0] <= N_OBS * 2 + assert "t_indicator" in dml_obj.data_subset.columns assert "G_indicator" in dml_obj.data_subset.columns assert "C_indicator" in dml_obj.data_subset.columns - assert "y_diff" in dml_obj.data_subset.columns # Test id_positions property assert isinstance(dml_obj.id_positions, np.ndarray) @@ -142,7 +170,10 @@ def test_panel_return_types(dml_obj, cls): # Test n_obs property assert isinstance(dml_obj.n_obs, (int, np.integer)) - assert dml_obj.n_obs <= N_OBS + if isinstance(dml_obj, DoubleMLDIDBinary): + assert dml_obj.n_obs <= N_OBS + elif isinstance(dml_obj, DoubleMLDIDCSBinary): + assert dml_obj.n_obs <= N_OBS * N_PERIODS # Test consistency between properties if dml_obj.post_treatment: @@ -161,20 +192,29 @@ def fitted_panel_dml_obj(request): @pytest.mark.ci def test_panel_property_types_and_shapes(fitted_panel_dml_obj): + # n_obs for psi, psi_a, psi_b checks within check_basic_property_types_and_shapes + # This should be the number of observations used for the score calculation. + # For DIDBinary, it's n_ids. For DIDCSBinary, it's _n_obs_subset. + # Both are consistently available as fitted_panel_dml_obj.n_obs. + actual_score_dim = (fitted_panel_dml_obj.n_obs, N_REP, N_TREAT) + check_basic_property_types_and_shapes( fitted_panel_dml_obj, - n_obs=N_PERIODS * N_OBS, + n_obs=fitted_panel_dml_obj._dml_data.n_obs, n_treat=N_TREAT, n_rep=N_REP, n_folds=N_FOLDS, n_rep_boot=N_REP_BOOT, - score_dim=(N_OBS, N_REP, N_TREAT), + score_dim=actual_score_dim, # Used for psi shape ) - check_basic_predictions_and_targets(fitted_panel_dml_obj, N_OBS, N_TREAT, N_REP) + + check_basic_predictions_and_targets(fitted_panel_dml_obj, fitted_panel_dml_obj.n_obs, N_TREAT, N_REP) @pytest.mark.ci def test_panel_sensitivity_return_types(fitted_panel_dml_obj): if fitted_panel_dml_obj._sensitivity_implemented: benchmarking_set = [fitted_panel_dml_obj._dml_data.x_cols[0]] - check_sensitivity_return_types(fitted_panel_dml_obj, N_OBS, N_REP, N_TREAT, benchmarking_set=benchmarking_set) + check_sensitivity_return_types( + fitted_panel_dml_obj, fitted_panel_dml_obj.n_obs, N_REP, N_TREAT, benchmarking_set=benchmarking_set + ) From 6bac76e99959d092125abced8caa32ee44bd7c5a Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 13 Jun 2025 08:34:27 +0200 Subject: [PATCH 38/41] enhance did_multi plotting with anticipation periods and update color palette handling --- doubleml/data/utils/panel_data_utils.py | 50 +++++++++ doubleml/did/did_multi.py | 123 +++++++++++++--------- doubleml/did/tests/test_did_multi_plot.py | 2 +- 3 files changed, 123 insertions(+), 52 deletions(-) diff --git a/doubleml/data/utils/panel_data_utils.py b/doubleml/data/utils/panel_data_utils.py index abd365eb..cc94d39f 100644 --- a/doubleml/data/utils/panel_data_utils.py +++ b/doubleml/data/utils/panel_data_utils.py @@ -1,8 +1,58 @@ +import pandas as pd + valid_datetime_units = {"Y", "M", "D", "h", "m", "s", "ms", "us", "ns"} +# Units that can be used with pd.Timedelta (unambiguous) +timedelta_compatible_units = {"D", "h", "m", "s", "ms", "us", "ns"} + +# Units that require period arithmetic (ambiguous) +period_only_units = {"Y", "M"} + def _is_valid_datetime_unit(unit): if unit not in valid_datetime_units: raise ValueError("Invalid datetime unit.") else: return unit + + +def _is_timedelta_compatible(unit): + """Check if a datetime unit can be used with pd.Timedelta.""" + return unit in timedelta_compatible_units + + +def _subtract_periods_safe(datetime_values, reference_datetime, periods, unit): + """ + Safely subtract periods from datetime values, handling both timedelta-compatible + and period-only units. + + Parameters + ---------- + datetime_values : pandas.Series or numpy.array + Array of datetime values to compare + reference_datetime : datetime-like + Reference datetime to subtract periods from + periods : int + Number of periods to subtract + unit : str + Datetime unit + + Returns + ------- + numpy.array + Boolean array indicating which datetime_values are >= (reference_datetime - periods) + """ + if periods == 0: + # No anticipation periods, so no datetime arithmetic needed + return datetime_values >= reference_datetime + + if _is_timedelta_compatible(unit): + # Use Timedelta for unambiguous units + period_offset = pd.Timedelta(periods, unit=unit) + return datetime_values >= (reference_datetime - period_offset) + else: + # Use Period arithmetic for ambiguous units like 'M' and 'Y' + ref_period = pd.Period(reference_datetime, freq=unit) + ref_minus_periods = ref_period - periods + datetime_periods = pd.PeriodIndex(datetime_values, freq=unit) + return datetime_periods >= ref_minus_periods diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index 646ad41d..cdfe0756 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -10,6 +10,7 @@ from sklearn.base import clone from doubleml.data import DoubleMLPanelData +from doubleml.data.utils.panel_data_utils import _subtract_periods_safe from doubleml.did.did_aggregation import DoubleMLDIDAggregation from doubleml.did.did_binary import DoubleMLDIDBinary from doubleml.did.did_cs_binary import DoubleMLDIDCSBinary @@ -989,8 +990,9 @@ def plot_effects( first_treated_periods = sorted(df["First Treated"].unique()) n_periods = len(first_treated_periods) - # Set up colors - colors = dict(zip(["pre", "post"], sns.color_palette(color_palette)[:2])) + # Set up colors - ensure 'post' always gets the second color + palette_colors = sns.color_palette(color_palette) + colors = {"pre": palette_colors[0], "post": palette_colors[1], "anticipation": palette_colors[2]} # Check if x-axis is datetime or convert to float is_datetime = pd.api.types.is_datetime64_any_dtype(df["Evaluation Period"]) @@ -1034,9 +1036,20 @@ def plot_effects( Line2D([0], [0], color="red", linestyle=":", alpha=0.7, label="Treatment start"), Line2D([0], [0], color="black", linestyle="--", alpha=0.5, label="Zero effect"), Line2D([0], [0], marker="o", color=colors["pre"], linestyle="None", label="Pre-treatment", markersize=5), - Line2D([0], [0], marker="o", color=colors["post"], linestyle="None", label="Post-treatment", markersize=5), ] - legend_ax.legend(handles=legend_elements, loc="center", ncol=4, mode="expand", borderaxespad=0.0) + + if self.anticipation_periods > 0: + legend_elements.append( + Line2D( + [0], [0], marker="o", color=colors["anticipation"], linestyle="None", label="Anticipation", markersize=5 + ) + ) + + legend_elements.append( + Line2D([0], [0], marker="o", color=colors["post"], linestyle="None", label="Post-treatment", markersize=5) + ) + + legend_ax.legend(handles=legend_elements, loc="center", ncol=len(legend_elements), mode="expand", borderaxespad=0.0) # Set title and layout plt.suptitle(title, y=1.02) @@ -1057,7 +1070,7 @@ def _plot_single_group(self, ax, period_df, period, colors, is_datetime, jitter_ period : int or datetime Treatment period for this group. colors : dict - Dictionary with 'pre' and 'post' color values. + Dictionary with 'pre', 'anticipation' (if applicable), and 'post' color values. is_datetime : bool Whether the x-axis represents datetime values. jitter_value : float @@ -1074,56 +1087,64 @@ def _plot_single_group(self, ax, period_df, period, colors, is_datetime, jitter_ ax.axvline(x=period, color="red", linestyle=":", alpha=0.7) ax.axhline(y=0, color="black", linestyle="--", alpha=0.5) - # Split and jitter data - pre_treatment = add_jitter( - period_df[period_df["Pre-Treatment"]], - "Evaluation Period", - is_datetime=is_datetime, - jitter_value=jitter_value, - ) - post_treatment = add_jitter( - period_df[~period_df["Pre-Treatment"]], - "Evaluation Period", - is_datetime=is_datetime, - jitter_value=jitter_value, - ) - - # Plot pre-treatment points - if not pre_treatment.empty: - ax.scatter(pre_treatment["jittered_x"], pre_treatment["Estimate"], color=colors["pre"], alpha=0.8, s=30) - ax.errorbar( - pre_treatment["jittered_x"], - pre_treatment["Estimate"], - yerr=[ - pre_treatment["Estimate"] - pre_treatment["CI Lower"], - pre_treatment["CI Upper"] - pre_treatment["Estimate"], - ], - fmt="o", - capsize=3, - color=colors["pre"], - markersize=4, - markeredgewidth=1, - linewidth=1, + # Categorize periods + if is_datetime: + # For datetime, use safe period arithmetic that handles both timedelta-compatible and period-only units + anticipation_ge_mask = _subtract_periods_safe( + period_df["Evaluation Period"], period, self.anticipation_periods, self._dml_data.datetime_unit ) + anticipation_mask = ( + (self.anticipation_periods > 0) + & period_df["Pre-Treatment"] + & anticipation_ge_mask + & (period_df["Evaluation Period"] < period) + ) + else: + # For numeric periods, simple arithmetic works + anticipation_mask = ( + (self.anticipation_periods > 0) + & period_df["Pre-Treatment"] + & (period_df["Evaluation Period"] >= period - self.anticipation_periods) + & (period_df["Evaluation Period"] < period) + ) + + pre_treatment_mask = period_df["Pre-Treatment"] & ~anticipation_mask + post_treatment_mask = ~period_df["Pre-Treatment"] + + # Define category mappings + categories = [("pre", pre_treatment_mask), ("anticipation", anticipation_mask), ("post", post_treatment_mask)] - # Plot post-treatment points - if not post_treatment.empty: - ax.scatter(post_treatment["jittered_x"], post_treatment["Estimate"], color=colors["post"], alpha=0.8, s=30) - ax.errorbar( - post_treatment["jittered_x"], - post_treatment["Estimate"], - yerr=[ - post_treatment["Estimate"] - post_treatment["CI Lower"], - post_treatment["CI Upper"] - post_treatment["Estimate"], - ], - fmt="o", - capsize=3, - color=colors["post"], - markersize=4, - markeredgewidth=1, - linewidth=1, + # Plot each category + for category_name, mask in categories: + if not mask.any(): + continue + + category_data = add_jitter( + period_df[mask], + "Evaluation Period", + is_datetime=is_datetime, + jitter_value=jitter_value, ) + if not category_data.empty: + ax.scatter( + category_data["jittered_x"], category_data["Estimate"], color=colors[category_name], alpha=0.8, s=30 + ) + ax.errorbar( + category_data["jittered_x"], + category_data["Estimate"], + yerr=[ + category_data["Estimate"] - category_data["CI Lower"], + category_data["CI Upper"] - category_data["Estimate"], + ], + fmt="o", + capsize=3, + color=colors[category_name], + markersize=4, + markeredgewidth=1, + linewidth=1, + ) + # Format axes if is_datetime: period_str = np.datetime64(period, self._dml_data.datetime_unit) diff --git a/doubleml/did/tests/test_did_multi_plot.py b/doubleml/did/tests/test_did_multi_plot.py index bcb8b786..5bcd0aae 100644 --- a/doubleml/did/tests/test_did_multi_plot.py +++ b/doubleml/did/tests/test_did_multi_plot.py @@ -130,7 +130,7 @@ def test_plot_effects_color_palette(doubleml_did_fixture): assert isinstance(fig, plt.Figure) # Test with a custom color list - custom_colors = [(1, 0, 0), (0, 1, 0)] # Red and green + custom_colors = [(1, 0, 0), (0, 1, 0), (0, 0, 1)] # Red, Green, Blue fig, _ = dml_obj.plot_effects(color_palette=custom_colors) assert isinstance(fig, plt.Figure) From 77b1a6b53634841e90bb6f2fe848bddad2c038cd Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 13 Jun 2025 10:19:35 +0200 Subject: [PATCH 39/41] update data summary to include unique IDs count in DoubleMLPanelData --- doubleml/data/panel_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py index 59ad531c..4ba659ce 100644 --- a/doubleml/data/panel_data.py +++ b/doubleml/data/panel_data.py @@ -141,7 +141,7 @@ def _data_summary_str(self): f"Id variable: {self.id_col}\n" ) - data_summary += f"No. Observations: {self.n_obs}\n" + data_summary += f"No. Unique Ids: {self.n_ids}\n" f"No. Observations: {self.n_obs}\n" return data_summary @classmethod From e52122f348e222b06b581df323c306825e3fb108 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 16 Jun 2025 11:04:59 +0200 Subject: [PATCH 40/41] add flexible summary with multiple formats --- doubleml/did/did_binary.py | 63 ++++---------------- doubleml/did/did_cs_binary.py | 65 ++++---------------- doubleml/double_ml.py | 109 +++++++++++++++++++++++----------- doubleml/irm/iivm.py | 15 +---- 4 files changed, 100 insertions(+), 152 deletions(-) diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index 99e18e28..99ce7ef9 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -239,58 +239,17 @@ def __init__( self._sensitivity_implemented = True self._external_predictions_implemented = True - def __str__(self): - class_name = self.__class__.__name__ - header = f"================== {class_name} Object ==================\n" - data_summary = self._dml_data._data_summary_str() - score_info = ( - f"Score function: {str(self.score)}\n" - f"Treatment group: {str(self.g_value)}\n" - f"Pre-treatment period: {str(self.t_value_pre)}\n" - f"Evaluation period: {str(self.t_value_eval)}\n" - f"Control group: {str(self.control_group)}\n" - f"Anticipation periods: {str(self.anticipation_periods)}\n" - f"Effective sample size: {str(self.n_obs_subset)}\n" - ) - learner_info = "" - for key, value in self.learner.items(): - learner_info += f"Learner {key}: {str(value)}\n" - if self.nuisance_loss is not None: - learner_info += "Out-of-sample Performance:\n" - is_classifier = [value for value in self._is_classifier.values()] - is_regressor = [not value for value in is_classifier] - if any(is_regressor): - learner_info += "Regression:\n" - for learner in [key for key, value in self._is_classifier.items() if value is False]: - learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n" - if any(is_classifier): - learner_info += "Classification:\n" - for learner in [key for key, value in self._is_classifier.items() if value is True]: - learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n" - - if self._is_cluster_data: - resampling_info = ( - f"No. folds per cluster: {self._n_folds_per_cluster}\n" - f"No. folds: {self.n_folds}\n" - f"No. repeated sample splits: {self.n_rep}\n" - ) - else: - resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n" - fit_summary = str(self.summary) - res = ( - header - + "\n------------------ Data summary ------------------\n" - + data_summary - + "\n------------------ Score & algorithm ------------------\n" - + score_info - + "\n------------------ Machine learner ------------------\n" - + learner_info - + "\n------------------ Resampling ------------------\n" - + resampling_info - + "\n------------------ Fit summary ------------------\n" - + fit_summary - ) - return res + def _format_score_info_str(self): + lines = [ + f"Score function: {str(self.score)}", + f"Treatment group: {str(self.g_value)}", + f"Pre-treatment period: {str(self.t_value_pre)}", + f"Evaluation period: {str(self.t_value_eval)}", + f"Control group: {str(self.control_group)}", + f"Anticipation periods: {str(self.anticipation_periods)}", + f"Effective sample size: {str(self.n_obs_subset)}", + ] + return "\\n".join(lines) @property def g_value(self): diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index a6005d53..73b9152f 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -156,58 +156,19 @@ def __init__( self._sensitivity_implemented = True self._external_predictions_implemented = True - def __str__(self): - class_name = self.__class__.__name__ - header = f"================== {class_name} Object ==================\n" - data_summary = self._dml_data._data_summary_str() - score_info = ( - f"Score function: {str(self.score)}\n" - f"Treatment group: {str(self.g_value)}\n" - f"Pre-treatment period: {str(self.t_value_pre)}\n" - f"Evaluation period: {str(self.t_value_eval)}\n" - f"Control group: {str(self.control_group)}\n" - f"Anticipation periods: {str(self.anticipation_periods)}\n" - f"Effective sample size: {str(self.n_obs_subset)}\n" - ) - learner_info = "" - for key, value in self.learner.items(): - learner_info += f"Learner {key}: {str(value)}\n" - if self.nuisance_loss is not None: - learner_info += "Out-of-sample Performance:\n" - is_classifier = [value for value in self._is_classifier.values()] - is_regressor = [not value for value in is_classifier] - if any(is_regressor): - learner_info += "Regression:\n" - for learner in [key for key, value in self._is_classifier.items() if value is False]: - learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n" - if any(is_classifier): - learner_info += "Classification:\n" - for learner in [key for key, value in self._is_classifier.items() if value is True]: - learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n" - - if self._is_cluster_data: - resampling_info = ( - f"No. folds per cluster: {self._n_folds_per_cluster}\n" - f"No. folds: {self.n_folds}\n" - f"No. repeated sample splits: {self.n_rep}\n" - ) - else: - resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n" - fit_summary = str(self.summary) - res = ( - header - + "\n------------------ Data summary ------------------\n" - + data_summary - + "\n------------------ Score & algorithm ------------------\n" - + score_info - + "\n------------------ Machine learner ------------------\n" - + learner_info - + "\n------------------ Resampling ------------------\n" - + resampling_info - + "\n------------------ Fit summary ------------------\n" - + fit_summary - ) - return res + def _format_score_info_str(self): + lines = [ + f"Score function: {str(self.score)}", + f"Treatment group: {str(self.g_value)}", + f"Pre-treatment period: {str(self.t_value_pre)}", + f"Evaluation period: {str(self.t_value_eval)}", + f"Control group: {str(self.control_group)}", + f"Anticipation periods: {str(self.anticipation_periods)}", + f"Effective sample size: {str(self.n_obs_subset)}", + ] + return "\n".join(lines) + + # _format_learner_info_str method is inherited from DoubleML base class. @property def g_value(self): diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 88f677ef..72f3b44a 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -110,50 +110,87 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting): self._i_rep = None self._i_treat = None - def __str__(self): + def _format_header_str(self): class_name = self.__class__.__name__ - header = f"================== {class_name} Object ==================\n" - data_summary = self._dml_data._data_summary_str() - score_info = f"Score function: {str(self.score)}\n" + return f"================== {class_name} Object ==================" + + def _format_score_info_str(self): + return f"Score function: {str(self.score)}" + + def _format_learner_info_str(self): learner_info = "" - for key, value in self.learner.items(): - learner_info += f"Learner {key}: {str(value)}\n" + if self.learner is not None: + for key, value in self.learner.items(): + learner_info += f"Learner {key}: {str(value)}\\n" if self.nuisance_loss is not None: - learner_info += "Out-of-sample Performance:\n" - is_classifier = [value for value in self._is_classifier.values()] - is_regressor = [not value for value in is_classifier] - if any(is_regressor): - learner_info += "Regression:\n" - for learner in [key for key, value in self._is_classifier.items() if value is False]: - learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n" - if any(is_classifier): - learner_info += "Classification:\n" - for learner in [key for key, value in self._is_classifier.items() if value is True]: - learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n" + learner_info += "Out-of-sample Performance:\\n" + # Check if _is_classifier is populated, otherwise, it might be called before fit + if self._is_classifier: + is_classifier_any = any(self._is_classifier.values()) + is_regressor_any = any(not v for v in self._is_classifier.values()) + + if is_regressor_any: + learner_info += "Regression:\\n" + for learner_name in self.params_names: # Iterate through known learners + if not self._is_classifier.get(learner_name, True): # Default to not regressor if not found + loss_val = self.nuisance_loss.get(learner_name, "N/A") + learner_info += f"Learner {learner_name} RMSE: {loss_val}\\n" + if is_classifier_any: + learner_info += "Classification:\\n" + for learner_name in self.params_names: # Iterate through known learners + if self._is_classifier.get(learner_name, False): # Default to not classifier if not found + loss_val = self.nuisance_loss.get(learner_name, "N/A") + learner_info += f"Learner {learner_name} Log Loss: {loss_val}\\n" + else: + learner_info += " (Run .fit() to see out-of-sample performance)\\n" + return learner_info.strip() + def _format_resampling_info_str(self): if self._is_cluster_data: - resampling_info = ( - f"No. folds per cluster: {self._n_folds_per_cluster}\n" - f"No. folds: {self.n_folds}\n" - f"No. repeated sample splits: {self.n_rep}\n" + return ( + f"No. folds per cluster: {self._n_folds_per_cluster}\\\\n" + f"No. folds: {self.n_folds}\\\\n" + f"No. repeated sample splits: {self.n_rep}" ) else: - resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n" - fit_summary = str(self.summary) - res = ( - header - + "\n------------------ Data summary ------------------\n" - + data_summary - + "\n------------------ Score & algorithm ------------------\n" - + score_info - + "\n------------------ Machine learner ------------------\n" - + learner_info - + "\n------------------ Resampling ------------------\n" - + resampling_info - + "\n------------------ Fit summary ------------------\n" - + fit_summary + return f"No. folds: {self.n_folds}\\\\nNo. repeated sample splits: {self.n_rep}" + + def _format_additional_info_str(self): + """ + Hook for subclasses to add additional information to the string representation. + Returns an empty string by default. + Subclasses should override this method to provide content. + The content should not include the 'Additional Information' header itself. + """ + return "" + + def __str__(self): + header = self._format_header_str() + # Assumes self._dml_data._data_summary_str() exists and is well-formed + data_summary = self._dml_data._data_summary_str() + score_info = self._format_score_info_str() + learner_info = self._format_learner_info_str() + resampling_info = self._format_resampling_info_str() + fit_summary = str(self.summary) # Assumes self.summary is well-formed + + representation = ( + f"{header}\\n" + f"\\n------------------ Data Summary ------------------\\n" + f"{data_summary}\\n" + f"\\n------------------ Score & Algorithm ------------------\\n" + f"{score_info}\\n" + f"\\n------------------ Machine Learner ------------------\\n" + f"{learner_info}\\n" + f"\\n------------------ Resampling ------------------\\n" + f"{resampling_info}\\n" + f"\\n------------------ Fit Summary ------------------\\n" + f"{fit_summary}" ) - return res + + additional_info = self._format_additional_info_str() + if additional_info: + representation += f"\\n\\n------------------ Additional Information ------------------\\n" f"{additional_info}" + return representation @property def n_folds(self): diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index a43c0a03..b3cc11e7 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -197,22 +197,13 @@ def __init__( self.subgroups = subgroups self._external_predictions_implemented = True - def __str__(self): - parent_str = super().__str__() - - # add robust confset + def _format_additional_info_str(self): if self.framework is None: - confset_str = "" + return "" else: confset = self.robust_confset() formatted_confset = ", ".join([f"[{lower:.4f}, {upper:.4f}]" for lower, upper in confset]) - confset_str = ( - "\n\n--------------- Additional Information ----------------\n" - + f"Robust Confidence Set: {formatted_confset}\n" - ) - - res = parent_str + confset_str - return res + return f"Robust Confidence Set: {formatted_confset}" @property def normalize_ipw(self): From bf7e16af8a6b3dde11f7fd80c76549659b1e11a7 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 16 Jun 2025 12:09:09 +0200 Subject: [PATCH 41/41] fix format --- doubleml/double_ml.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 72f3b44a..694968bc 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -121,39 +121,39 @@ def _format_learner_info_str(self): learner_info = "" if self.learner is not None: for key, value in self.learner.items(): - learner_info += f"Learner {key}: {str(value)}\\n" + learner_info += f"Learner {key}: {str(value)}\n" if self.nuisance_loss is not None: - learner_info += "Out-of-sample Performance:\\n" + learner_info += "Out-of-sample Performance:\n" # Check if _is_classifier is populated, otherwise, it might be called before fit if self._is_classifier: is_classifier_any = any(self._is_classifier.values()) is_regressor_any = any(not v for v in self._is_classifier.values()) if is_regressor_any: - learner_info += "Regression:\\n" + learner_info += "Regression:\n" for learner_name in self.params_names: # Iterate through known learners if not self._is_classifier.get(learner_name, True): # Default to not regressor if not found loss_val = self.nuisance_loss.get(learner_name, "N/A") - learner_info += f"Learner {learner_name} RMSE: {loss_val}\\n" + learner_info += f"Learner {learner_name} RMSE: {loss_val}\n" if is_classifier_any: - learner_info += "Classification:\\n" + learner_info += "Classification:\n" for learner_name in self.params_names: # Iterate through known learners if self._is_classifier.get(learner_name, False): # Default to not classifier if not found loss_val = self.nuisance_loss.get(learner_name, "N/A") - learner_info += f"Learner {learner_name} Log Loss: {loss_val}\\n" + learner_info += f"Learner {learner_name} Log Loss: {loss_val}\n" else: - learner_info += " (Run .fit() to see out-of-sample performance)\\n" + learner_info += " (Run .fit() to see out-of-sample performance)\n" return learner_info.strip() def _format_resampling_info_str(self): if self._is_cluster_data: return ( - f"No. folds per cluster: {self._n_folds_per_cluster}\\\\n" - f"No. folds: {self.n_folds}\\\\n" + f"No. folds per cluster: {self._n_folds_per_cluster}\n" + f"No. folds: {self.n_folds}\n" f"No. repeated sample splits: {self.n_rep}" ) else: - return f"No. folds: {self.n_folds}\\\\nNo. repeated sample splits: {self.n_rep}" + return f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}" def _format_additional_info_str(self): """ @@ -174,22 +174,22 @@ def __str__(self): fit_summary = str(self.summary) # Assumes self.summary is well-formed representation = ( - f"{header}\\n" - f"\\n------------------ Data Summary ------------------\\n" - f"{data_summary}\\n" - f"\\n------------------ Score & Algorithm ------------------\\n" - f"{score_info}\\n" - f"\\n------------------ Machine Learner ------------------\\n" - f"{learner_info}\\n" - f"\\n------------------ Resampling ------------------\\n" - f"{resampling_info}\\n" - f"\\n------------------ Fit Summary ------------------\\n" + f"{header}\n" + f"\n------------------ Data Summary ------------------\n" + f"{data_summary}\n" + f"\n------------------ Score & Algorithm ------------------\n" + f"{score_info}\n" + f"\n------------------ Machine Learner ------------------\n" + f"{learner_info}\n" + f"\n------------------ Resampling ------------------\n" + f"{resampling_info}\n" + f"\n------------------ Fit Summary ------------------\n" f"{fit_summary}" ) additional_info = self._format_additional_info_str() if additional_info: - representation += f"\\n\\n------------------ Additional Information ------------------\\n" f"{additional_info}" + representation += f"\n\n------------------ Additional Information ------------------\n" f"{additional_info}" return representation @property