diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py index 29313bbd..b95fd069 100644 --- a/doubleml/double_ml_iivm.py +++ b/doubleml/double_ml_iivm.py @@ -308,7 +308,9 @@ def _score_elements(self, y, z, d, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls) - np.divide(np.multiply(1.0-z, w_hat0), 1.0 - m_hat)) else: assert callable(self.score) - psi_a, psi_b = self.score(y, z, d, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls) + psi_a, psi_b = self.score(y=y, z=z, d=d, + g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat, r_hat0=r_hat0, r_hat1=r_hat1, + smpls=smpls) return psi_a, psi_b diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py index ccdb2683..4ecd2b78 100644 --- a/doubleml/double_ml_irm.py +++ b/doubleml/double_ml_irm.py @@ -254,7 +254,9 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls): psi_a = - np.divide(d, p_hat) else: assert callable(self.score) - psi_a, psi_b = self.score(y, d, g_hat0, g_hat1, m_hat, smpls) + psi_a, psi_b = self.score(y=y, d=d, + g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat, + smpls=smpls) return psi_a, psi_b diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py index b5da0564..8d66d240 100644 --- a/doubleml/double_ml_pliv.py +++ b/doubleml/double_ml_pliv.py @@ -5,10 +5,29 @@ from sklearn.linear_model import LinearRegression from sklearn.dummy import DummyRegressor +import warnings +from functools import wraps + from .double_ml import DoubleML from ._utils import _dml_cv_predict, _dml_tune, _check_finite_predictions +# To be removed in version 0.6.0 +def changed_api_decorator(f): + @wraps(f) + def wrapper(*args, **kwds): + ml_l_missing = (len(set(kwds).intersection({'obj_dml_data', 'ml_l', 'ml_m', 'ml_r'})) + len(args)) < 5 + if ml_l_missing & ('ml_g' in kwds): + warnings.warn(("The required positional argument ml_g was renamed to ml_l. " + "Please adapt the argument name accordingly. " + "ml_g is redirected to ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + kwds['ml_l'] = kwds.pop('ml_g') + return f(*args, **kwds) + return wrapper + + class DoubleMLPLIV(DoubleML): """Double machine learning for partially linear IV regression models @@ -17,9 +36,9 @@ class DoubleMLPLIV(DoubleML): obj_dml_data : :class:`DoubleMLData` object The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model. - ml_g : estimator implementing ``fit()`` and ``predict()`` + ml_l : estimator implementing ``fit()`` and ``predict()`` A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. - :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(X) = E[Y|X]`. + :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`\\ell_0(X) = E[Y|X]`. ml_m : estimator implementing ``fit()`` and ``predict()`` A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. @@ -29,6 +48,13 @@ class DoubleMLPLIV(DoubleML): A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`r_0(X) = E[D|X]`. + ml_g : estimator implementing ``fit()`` and ``predict()`` + A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function + :math:`g_0(X) = E[Y - D \\theta_0|X]`. + Note: The learner `ml_g` is only required for the score ``'IV-type'``. Optionally, it can be specified and + estimated for callable scores. + n_folds : int Number of folds. Default is ``5``. @@ -38,8 +64,9 @@ class DoubleMLPLIV(DoubleML): Default is ``1``. score : str or callable - A str (``'partialling out'`` is the only choice) specifying the score function - or a callable object / function with signature ``psi_a, psi_b = score(y, z, d, g_hat, m_hat, r_hat, smpls)``. + A str (``'partialling out'`` or ``'IV-type'``) specifying the score function + or a callable object / function with signature + ``psi_a, psi_b = score(y, z, d, l_hat, m_hat, r_hat, g_hat, smpls)``. Default is ``'partialling out'``. dml_procedure : str @@ -63,12 +90,12 @@ class DoubleMLPLIV(DoubleML): >>> from sklearn.base import clone >>> np.random.seed(3141) >>> learner = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2) - >>> ml_g = clone(learner) + >>> ml_l = clone(learner) >>> ml_m = clone(learner) >>> ml_r = clone(learner) >>> data = make_pliv_CHS2015(alpha=0.5, n_obs=500, dim_x=20, dim_z=1, return_type='DataFrame') >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='Z1') - >>> dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r) + >>> dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r) >>> dml_pliv_obj.fit().summary coef std err t P>|t| 2.5 % 97.5 % d 0.522753 0.082263 6.354688 2.088504e-10 0.361521 0.683984 @@ -88,11 +115,13 @@ class DoubleMLPLIV(DoubleML): :math:`X = (X_1, \\ldots, X_p)` consists of other confounding covariates, and :math:`\\zeta` and :math:`V` are stochastic errors. """ + @changed_api_decorator def __init__(self, obj_dml_data, - ml_g, + ml_l, ml_m, ml_r, + ml_g=None, n_folds=5, n_rep=1, score='partialling out', @@ -108,22 +137,35 @@ def __init__(self, apply_cross_fitting) self._check_data(self._dml_data) - self._check_score(self.score) self.partialX = True self.partialZ = False - _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False) + self._check_score(self.score) + _ = self._check_learner(ml_l, 'ml_l', regressor=True, classifier=False) _ = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=False) _ = self._check_learner(ml_r, 'ml_r', regressor=True, classifier=False) - self._learner = {'ml_g': ml_g, 'ml_m': ml_m, 'ml_r': ml_r} - self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'} + self._learner = {'ml_l': ml_l, 'ml_m': ml_m, 'ml_r': ml_r} + if ml_g is not None: + if (isinstance(self.score, str) & (self.score == 'IV-type')) | callable(self.score): + _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False) + self._learner['ml_g'] = ml_g + else: + assert (isinstance(self.score, str) & (self.score == 'partialling out')) + warnings.warn(('A learner ml_g has been provided for score = "partialling out" but will be ignored. "' + 'A learner ml_g is not required for estimation.')) + elif isinstance(self.score, str) & (self.score == 'IV-type'): + raise ValueError("For score = 'IV-type', learners ml_l, ml_m, ml_r and ml_g need to be specified.") + self._predict_method = {'ml_l': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'} + if 'ml_g' in self._learner: + self._predict_method['ml_g'] = 'predict' self._initialize_ml_nuisance_params() @classmethod def _partialX(cls, obj_dml_data, - ml_g, + ml_l, ml_m, ml_r, + ml_g=None, n_folds=5, n_rep=1, score='partialling out', @@ -131,9 +173,10 @@ def _partialX(cls, draw_sample_splitting=True, apply_cross_fitting=True): obj = cls(obj_dml_data, - ml_g, + ml_l, ml_m, ml_r, + ml_g, n_folds, n_rep, score, @@ -141,14 +184,14 @@ def _partialX(cls, draw_sample_splitting, apply_cross_fitting) obj._check_data(obj._dml_data) - obj._check_score(obj.score) obj.partialX = True obj.partialZ = False - _ = obj._check_learner(ml_g, 'ml_g', regressor=True, classifier=False) + obj._check_score(obj.score) + _ = obj._check_learner(ml_l, 'ml_l', regressor=True, classifier=False) _ = obj._check_learner(ml_m, 'ml_m', regressor=True, classifier=False) _ = obj._check_learner(ml_r, 'ml_r', regressor=True, classifier=False) - obj._learner = {'ml_g': ml_g, 'ml_m': ml_m, 'ml_r': ml_r} - obj._predict_method = {'ml_g': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'} + obj._learner = {'ml_l': ml_l, 'ml_m': ml_m, 'ml_r': ml_r} + obj._predict_method = {'ml_l': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'} obj._initialize_ml_nuisance_params() return obj @@ -162,11 +205,12 @@ def _partialZ(cls, dml_procedure='dml2', draw_sample_splitting=True, apply_cross_fitting=True): - # to pass the checks for the learners, we temporarily set ml_g and ml_m to DummyRegressor() + # to pass the checks for the learners, we temporarily set ml_l and ml_m to DummyRegressor() obj = cls(obj_dml_data, DummyRegressor(), DummyRegressor(), ml_r, + None, n_folds, n_rep, score, @@ -174,9 +218,9 @@ def _partialZ(cls, draw_sample_splitting, apply_cross_fitting) obj._check_data(obj._dml_data) - obj._check_score(obj.score) obj.partialX = False obj.partialZ = True + obj._check_score(obj.score) _ = obj._check_learner(ml_r, 'ml_r', regressor=True, classifier=False) obj._learner = {'ml_r': ml_r} obj._predict_method = {'ml_r': 'predict'} @@ -186,7 +230,7 @@ def _partialZ(cls, @classmethod def _partialXZ(cls, obj_dml_data, - ml_g, + ml_l, ml_m, ml_r, n_folds=5, @@ -196,9 +240,10 @@ def _partialXZ(cls, draw_sample_splitting=True, apply_cross_fitting=True): obj = cls(obj_dml_data, - ml_g, + ml_l, ml_m, ml_r, + None, n_folds, n_rep, score, @@ -206,41 +251,34 @@ def _partialXZ(cls, draw_sample_splitting, apply_cross_fitting) obj._check_data(obj._dml_data) - obj._check_score(obj.score) obj.partialX = True obj.partialZ = True - _ = obj._check_learner(ml_g, 'ml_g', regressor=True, classifier=False) + obj._check_score(obj.score) + _ = obj._check_learner(ml_l, 'ml_l', regressor=True, classifier=False) _ = obj._check_learner(ml_m, 'ml_m', regressor=True, classifier=False) _ = obj._check_learner(ml_r, 'ml_r', regressor=True, classifier=False) - obj._learner = {'ml_g': ml_g, 'ml_m': ml_m, 'ml_r': ml_r} - obj._predict_method = {'ml_g': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'} + obj._learner = {'ml_l': ml_l, 'ml_m': ml_m, 'ml_r': ml_r} + obj._predict_method = {'ml_l': 'predict', 'ml_m': 'predict', 'ml_r': 'predict'} obj._initialize_ml_nuisance_params() return obj def _initialize_ml_nuisance_params(self): - if self.partialX & (not self.partialZ): - if self._dml_data.n_instr == 1: - valid_learner = ['ml_g', 'ml_m', 'ml_r'] - else: - valid_learner = ['ml_g', 'ml_r'] + ['ml_m_' + z_col for z_col in self._dml_data.z_cols] - elif (not self.partialX) & self.partialZ: - valid_learner = ['ml_r'] + if self.partialX & (not self.partialZ) & (self._dml_data.n_instr > 1): + param_names = ['ml_l', 'ml_r'] + ['ml_m_' + z_col for z_col in self._dml_data.z_cols] else: - assert (self.partialX & self.partialZ) - valid_learner = ['ml_g', 'ml_m', 'ml_r'] + param_names = self._learner.keys() self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} - for learner in valid_learner} + for learner in param_names} def _check_score(self, score): if isinstance(score, str): - valid_score = ['partialling out'] - # check whether its worth implementing the IV_type as well - # In CCDHNR equation (4.7) a score of this type is provided; - # however in the following paragraph it is explained that one might - # still need to estimate the partialling out type first + if self.partialX & (not self.partialZ) & (self._dml_data.n_instr == 1): + valid_score = ['partialling out', 'IV-type'] + else: + valid_score = ['partialling out'] if score not in valid_score: raise ValueError('Invalid score ' + score + '. ' + - 'Valid score ' + 'partialling out.') + 'Valid score ' + ' or '.join(valid_score) + '.') else: if not callable(score): raise TypeError('score should be either a string or a callable. ' @@ -255,6 +293,17 @@ def _check_data(self, obj_dml_data): 'use DoubleMLPLR instead of DoubleMLPLIV.') return + # To be removed in version 0.6.0 + def set_ml_nuisance_params(self, learner, treat_var, params): + if isinstance(self.score, str) & (self.score == 'partialling out') & (learner == 'ml_g'): + warnings.warn(("Learner ml_g was renamed to ml_l. " + "Please adapt the argument learner accordingly. " + "The provided parameters are set for ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + learner = 'ml_l' + super(DoubleMLPLIV, self).set_ml_nuisance_params(learner, treat_var, params) + def _nuisance_est(self, smpls, n_jobs_cv): if self.partialX & (not self.partialZ): psi_a, psi_b, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv) @@ -287,10 +336,10 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv): x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - # nuisance g - g_hat = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls, n_jobs=n_jobs_cv, - est_params=self._get_params('ml_g'), method=self._predict_method['ml_g']) - _check_finite_predictions(g_hat, self._learner['ml_g'], 'ml_g', smpls) + # nuisance l + l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_l'), method=self._predict_method['ml_l']) + _check_finite_predictions(l_hat, self._learner['ml_l'], 'ml_l', smpls) # nuisance m if self._dml_data.n_instr == 1: @@ -316,16 +365,29 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv): est_params=self._get_params('ml_r'), method=self._predict_method['ml_r']) _check_finite_predictions(r_hat, self._learner['ml_r'], 'ml_r', smpls) - psi_a, psi_b = self._score_elements(y, z, d, g_hat, m_hat, r_hat, smpls) - preds = {'ml_g': g_hat, + g_hat = None + if (self._dml_data.n_instr == 1) & ('ml_g' in self._learner): + # an estimate of g is obtained for the IV-type score and callable scores + # get an initial estimate for theta using the partialling out score + psi_a = -np.multiply(d - r_hat, z - m_hat) + psi_b = np.multiply(z - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + # nuisance g + g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_g'), method=self._predict_method['ml_g']) + _check_finite_predictions(g_hat, self._learner['ml_g'], 'ml_g', smpls) + + psi_a, psi_b = self._score_elements(y, z, d, l_hat, m_hat, r_hat, g_hat, smpls) + preds = {'ml_l': l_hat, 'ml_m': m_hat, - 'ml_r': r_hat} + 'ml_r': r_hat, + 'ml_g': g_hat} return psi_a, psi_b, preds - def _score_elements(self, y, z, d, g_hat, m_hat, r_hat, smpls): + def _score_elements(self, y, z, d, l_hat, m_hat, r_hat, g_hat, smpls): # compute residuals - u_hat = y - g_hat + u_hat = y - l_hat w_hat = d - r_hat v_hat = z - m_hat @@ -338,11 +400,16 @@ def _score_elements(self, y, z, d, g_hat, m_hat, r_hat, smpls): r_hat_tilde = reg.predict(v_hat) if isinstance(self.score, str): - assert self.score == 'partialling out' if self._dml_data.n_instr == 1: - psi_a = -np.multiply(w_hat, v_hat) - psi_b = np.multiply(v_hat, u_hat) + if self.score == 'partialling out': + psi_a = -np.multiply(w_hat, v_hat) + psi_b = np.multiply(v_hat, u_hat) + else: + assert self.score == 'IV-type' + psi_a = -np.multiply(v_hat, d) + psi_b = np.multiply(v_hat, y - g_hat) else: + assert self.score == 'partialling out' psi_a = -np.multiply(w_hat, r_hat_tilde) psi_b = np.multiply(r_hat_tilde, u_hat) else: @@ -352,8 +419,9 @@ def _score_elements(self, y, z, d, g_hat, m_hat, r_hat, smpls): 'with several instruments.') else: assert self._dml_data.n_instr == 1 - psi_a, psi_b = self.score(y, z, d, - g_hat, m_hat, r_hat, smpls) + psi_a, psi_b = self.score(y=y, z=z, d=d, + l_hat=l_hat, m_hat=m_hat, r_hat=r_hat, g_hat=g_hat, + smpls=smpls) return psi_a, psi_b @@ -389,10 +457,10 @@ def _nuisance_est_partial_xz(self, smpls, n_jobs_cv): x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - # nuisance g - g_hat = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls, n_jobs=n_jobs_cv, - est_params=self._get_params('ml_g'), method=self._predict_method['ml_g']) - _check_finite_predictions(g_hat, self._learner['ml_g'], 'ml_g', smpls) + # nuisance l + l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_l'), method=self._predict_method['ml_l']) + _check_finite_predictions(l_hat, self._learner['ml_l'], 'ml_l', smpls) # nuisance m m_hat, m_hat_on_train = _dml_cv_predict(self._learner['ml_m'], xz, d, smpls=smpls, n_jobs=n_jobs_cv, @@ -406,7 +474,7 @@ def _nuisance_est_partial_xz(self, smpls, n_jobs_cv): _check_finite_predictions(m_hat_tilde, self._learner['ml_r'], 'ml_r', smpls) # compute residuals - u_hat = y - g_hat + u_hat = y - l_hat w_hat = d - m_hat_tilde if isinstance(self.score, str): @@ -417,12 +485,45 @@ def _nuisance_est_partial_xz(self, smpls, n_jobs_cv): assert callable(self.score) raise NotImplementedError('Callable score not implemented for DoubleMLPLIV.partialXZ.') - preds = {'ml_g': g_hat, + preds = {'ml_l': l_hat, 'ml_m': m_hat, 'ml_r': m_hat_tilde} return psi_a, psi_b, preds + # To be removed in version 0.6.0 + def tune(self, + param_grids, + tune_on_folds=False, + scoring_methods=None, # if None the estimator's score method is used + n_folds_tune=5, + search_mode='grid_search', + n_iter_randomized_search=100, + n_jobs_cv=None, + set_as_params=True, + return_tune_res=False): + + if isinstance(self.score, str) and (self.score == 'partialling out') and (param_grids is not None) and \ + ('ml_g' in param_grids) and ('ml_l' not in param_grids): + warnings.warn(("Learner ml_g was renamed to ml_l. " + "Please adapt the key of param_grids accordingly. " + "The provided param_grids for ml_g are set for ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + param_grids['ml_l'] = param_grids.pop('ml_g') + + if isinstance(self.score, str) and (self.score == 'partialling out') and (scoring_methods is not None) and \ + ('ml_g' in scoring_methods) and ('ml_l' not in scoring_methods): + warnings.warn(("Learner ml_g was renamed to ml_l. " + "Please adapt the key of scoring_methods accordingly. " + "The provided scoring_methods for ml_g are set for ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + scoring_methods['ml_l'] = scoring_methods.pop('ml_g') + + super(DoubleMLPLIV, self).tune(param_grids, tune_on_folds, scoring_methods, n_folds_tune, search_mode, + n_iter_randomized_search, n_jobs_cv, set_as_params, return_tune_res) + def _nuisance_tuning_partial_x(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search): x, y = check_X_y(self._dml_data.x, self._dml_data.y, @@ -431,13 +532,14 @@ def _nuisance_tuning_partial_x(self, smpls, param_grids, scoring_methods, n_fold force_all_finite=False) if scoring_methods is None: - scoring_methods = {'ml_g': None, + scoring_methods = {'ml_l': None, 'ml_m': None, - 'ml_r': None} + 'ml_r': None, + 'ml_g': None} train_inds = [train_index for (train_index, _) in smpls] - g_tune_res = _dml_tune(y, x, train_inds, - self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], + l_tune_res = _dml_tune(y, x, train_inds, + self._learner['ml_l'], param_grids['ml_l'], scoring_methods['ml_l'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) if self._dml_data.n_instr > 1: @@ -464,22 +566,51 @@ def _nuisance_tuning_partial_x(self, smpls, param_grids, scoring_methods, n_fold self._learner['ml_r'], param_grids['ml_r'], scoring_methods['ml_r'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) - g_best_params = [xx.best_params_ for xx in g_tune_res] + l_best_params = [xx.best_params_ for xx in l_tune_res] r_best_params = [xx.best_params_ for xx in r_tune_res] if self._dml_data.n_instr > 1: - params = {'ml_g': g_best_params, + params = {'ml_l': l_best_params, 'ml_r': r_best_params} for instr_var in self._dml_data.z_cols: params['ml_m_' + instr_var] = [xx.best_params_ for xx in m_tune_res[instr_var]] + tune_res = {'l_tune': l_tune_res, + 'm_tune': m_tune_res, + 'r_tune': r_tune_res} else: m_best_params = [xx.best_params_ for xx in m_tune_res] - params = {'ml_g': g_best_params, - 'ml_m': m_best_params, - 'ml_r': r_best_params} - - tune_res = {'g_tune': g_tune_res, - 'm_tune': m_tune_res, - 'r_tune': r_tune_res} + # an ML model for g is obtained for the IV-type score and callable scores + if 'ml_g' in self._learner: + # construct an initial theta estimate from the tuned models using the partialling out score + l_hat = np.full_like(y, np.nan) + m_hat = np.full_like(z, np.nan) + r_hat = np.full_like(d, np.nan) + for idx, (train_index, _) in enumerate(smpls): + l_hat[train_index] = l_tune_res[idx].predict(x[train_index, :]) + m_hat[train_index] = m_tune_res[idx].predict(x[train_index, :]) + r_hat[train_index] = r_tune_res[idx].predict(x[train_index, :]) + psi_a = -np.multiply(d - r_hat, z - m_hat) + psi_b = np.multiply(z - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + g_tune_res = _dml_tune(y - theta_initial * d, x, train_inds, + self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], + n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) + g_best_params = [xx.best_params_ for xx in g_tune_res] + + params = {'ml_l': l_best_params, + 'ml_m': m_best_params, + 'ml_r': r_best_params, + 'ml_g': g_best_params} + tune_res = {'l_tune': l_tune_res, + 'm_tune': m_tune_res, + 'r_tune': r_tune_res, + 'g_tune': g_tune_res} + else: + params = {'ml_l': l_best_params, + 'ml_m': m_best_params, + 'ml_r': r_best_params} + tune_res = {'l_tune': l_tune_res, + 'm_tune': m_tune_res, + 'r_tune': r_tune_res} res = {'params': params, 'tune_res': tune_res} @@ -522,13 +653,13 @@ def _nuisance_tuning_partial_xz(self, smpls, param_grids, scoring_methods, n_fol force_all_finite=False) if scoring_methods is None: - scoring_methods = {'ml_g': None, + scoring_methods = {'ml_l': None, 'ml_m': None, 'ml_r': None} train_inds = [train_index for (train_index, _) in smpls] - g_tune_res = _dml_tune(y, x, train_inds, - self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], + l_tune_res = _dml_tune(y, x, train_inds, + self._learner['ml_l'], param_grids['ml_l'], scoring_methods['ml_l'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) m_tune_res = _dml_tune(d, xz, train_inds, self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'], @@ -550,15 +681,15 @@ def _nuisance_tuning_partial_xz(self, smpls, param_grids, scoring_methods, n_fol n_iter=n_iter_randomized_search) r_tune_res.append(r_grid_search.fit(x[train_index, :], m_hat)) - g_best_params = [xx.best_params_ for xx in g_tune_res] + l_best_params = [xx.best_params_ for xx in l_tune_res] m_best_params = [xx.best_params_ for xx in m_tune_res] r_best_params = [xx.best_params_ for xx in r_tune_res] - params = {'ml_g': g_best_params, + params = {'ml_l': l_best_params, 'ml_m': m_best_params, 'ml_r': r_best_params} - tune_res = {'g_tune': g_tune_res, + tune_res = {'l_tune': l_tune_res, 'm_tune': m_tune_res, 'r_tune': r_tune_res} diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py index c5d4432c..5d25600e 100644 --- a/doubleml/double_ml_plr.py +++ b/doubleml/double_ml_plr.py @@ -1,11 +1,31 @@ import numpy as np from sklearn.utils import check_X_y from sklearn.utils.multiclass import type_of_target +from sklearn.base import clone + +import warnings +from functools import wraps from .double_ml import DoubleML from ._utils import _dml_cv_predict, _dml_tune, _check_finite_predictions +# To be removed in version 0.6.0 +def changed_api_decorator(f): + @wraps(f) + def wrapper(*args, **kwds): + ml_l_missing = (len(set(kwds).intersection({'obj_dml_data', 'ml_l', 'ml_m'})) + len(args)) < 4 + if ml_l_missing & ('ml_g' in kwds): + warnings.warn(("The required positional argument ml_g was renamed to ml_l. " + "Please adapt the argument name accordingly. " + "ml_g is redirected to ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + kwds['ml_l'] = kwds.pop('ml_g') + return f(*args, **kwds) + return wrapper + + class DoubleMLPLR(DoubleML): """Double machine learning for partially linear regression models @@ -14,9 +34,9 @@ class DoubleMLPLR(DoubleML): obj_dml_data : :class:`DoubleMLData` object The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model. - ml_g : estimator implementing ``fit()`` and ``predict()`` + ml_l : estimator implementing ``fit()`` and ``predict()`` A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. - :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(X) = E[Y|X]`. + :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`\\ell_0(X) = E[Y|X]`. ml_m : estimator implementing ``fit()`` and ``predict()`` A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. @@ -25,6 +45,13 @@ class DoubleMLPLR(DoubleML): ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``, ``predict_proba()`` is used otherwise ``predict()``. + ml_g : estimator implementing ``fit()`` and ``predict()`` + A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function + :math:`g_0(X) = E[Y - D \\theta_0|X]`. + Note: The learner `ml_g` is only required for the score ``'IV-type'``. Optionally, it can be specified and + estimated for callable scores. + n_folds : int Number of folds. Default is ``5``. @@ -35,7 +62,7 @@ class DoubleMLPLR(DoubleML): score : str or callable A str (``'partialling out'`` or ``'IV-type'``) specifying the score function - or a callable object / function with signature ``psi_a, psi_b = score(y, d, g_hat, m_hat, smpls)``. + or a callable object / function with signature ``psi_a, psi_b = score(y, d, l_hat, m_hat, g_hat, smpls)``. Default is ``'partialling out'``. dml_procedure : str @@ -81,10 +108,12 @@ class DoubleMLPLR(DoubleML): The high-dimensional vector :math:`X = (X_1, \\ldots, X_p)` consists of other confounding covariates, and :math:`\\zeta` and :math:`V` are stochastic errors. """ + @changed_api_decorator def __init__(self, obj_dml_data, - ml_g, + ml_l, ml_m, + ml_g=None, n_folds=5, n_rep=1, score='partialling out', @@ -101,22 +130,41 @@ def __init__(self, self._check_data(self._dml_data) self._check_score(self.score) - _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False) + + _ = self._check_learner(ml_l, 'ml_l', regressor=True, classifier=False) ml_m_is_classifier = self._check_learner(ml_m, 'ml_m', regressor=True, classifier=True) - self._learner = {'ml_g': ml_g, 'ml_m': ml_m} + self._learner = {'ml_l': ml_l, 'ml_m': ml_m} + + if ml_g is not None: + if (isinstance(self.score, str) & (self.score == 'IV-type')) | callable(self.score): + _ = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=False) + self._learner['ml_g'] = ml_g + else: + assert (isinstance(self.score, str) & (self.score == 'partialling out')) + warnings.warn(('A learner ml_g has been provided for score = "partialling out" but will be ignored. "' + 'A learner ml_g is not required for estimation.')) + elif isinstance(self.score, str) & (self.score == 'IV-type'): + warnings.warn(("For score = 'IV-type', learners ml_l and ml_g should be specified. " + "Set ml_g = clone(ml_l).")) + self._learner['ml_g'] = clone(ml_l) + + self._predict_method = {'ml_l': 'predict'} + if 'ml_g' in self._learner: + self._predict_method['ml_g'] = 'predict' if ml_m_is_classifier: - if obj_dml_data.binary_treats.all(): - self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict_proba'} + if self._dml_data.binary_treats.all(): + self._predict_method['ml_m'] = 'predict_proba' else: raise ValueError(f'The ml_m learner {str(ml_m)} was identified as classifier ' 'but at least one treatment variable is not binary with values 0 and 1.') else: - self._predict_method = {'ml_g': 'predict', 'ml_m': 'predict'} + self._predict_method['ml_m'] = 'predict' self._initialize_ml_nuisance_params() def _initialize_ml_nuisance_params(self): - self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ['ml_g', 'ml_m']} + self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} + for learner in self._learner} def _check_score(self, score): if isinstance(score, str): @@ -138,16 +186,27 @@ def _check_data(self, obj_dml_data): 'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.') return + # To be removed in version 0.6.0 + def set_ml_nuisance_params(self, learner, treat_var, params): + if isinstance(self.score, str) & (self.score == 'partialling out') & (learner == 'ml_g'): + warnings.warn(("Learner ml_g was renamed to ml_l. " + "Please adapt the argument learner accordingly. " + "The provided parameters are set for ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + learner = 'ml_l' + super(DoubleMLPLR, self).set_ml_nuisance_params(learner, treat_var, params) + def _nuisance_est(self, smpls, n_jobs_cv): x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - # nuisance g - g_hat = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls, n_jobs=n_jobs_cv, - est_params=self._get_params('ml_g'), method=self._predict_method['ml_g']) - _check_finite_predictions(g_hat, self._learner['ml_g'], 'ml_g', smpls) + # nuisance l + l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_l'), method=self._predict_method['ml_l']) + _check_finite_predictions(l_hat, self._learner['ml_l'], 'ml_l', smpls) # nuisance m m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv, @@ -163,31 +222,79 @@ def _nuisance_est(self, smpls, n_jobs_cv): 'observed to be binary with values 0 and 1. Make sure that for classifiers ' 'probabilities and not labels are predicted.') - psi_a, psi_b = self._score_elements(y, d, g_hat, m_hat, smpls) - preds = {'ml_g': g_hat, - 'ml_m': m_hat} + # an estimate of g is obtained for the IV-type score and callable scores + g_hat = None + if 'ml_g' in self._learner: + # get an initial estimate for theta using the partialling out score + psi_a = -np.multiply(d - m_hat, d - m_hat) + psi_b = np.multiply(d - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + # nuisance g + g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv, + est_params=self._get_params('ml_g'), method=self._predict_method['ml_g']) + _check_finite_predictions(g_hat, self._learner['ml_g'], 'ml_g', smpls) + + psi_a, psi_b = self._score_elements(y, d, l_hat, m_hat, g_hat, smpls) + preds = {'ml_l': l_hat, + 'ml_m': m_hat, + 'ml_g': g_hat} return psi_a, psi_b, preds - def _score_elements(self, y, d, g_hat, m_hat, smpls): + def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls): # compute residuals - u_hat = y - g_hat + u_hat = y - l_hat v_hat = d - m_hat - v_hatd = np.multiply(v_hat, d) if isinstance(self.score, str): if self.score == 'IV-type': - psi_a = -v_hatd + psi_a = - np.multiply(v_hat, d) + psi_b = np.multiply(v_hat, y - g_hat) else: assert self.score == 'partialling out' psi_a = -np.multiply(v_hat, v_hat) - psi_b = np.multiply(v_hat, u_hat) + psi_b = np.multiply(v_hat, u_hat) else: assert callable(self.score) - psi_a, psi_b = self.score(y, d, g_hat, m_hat, smpls) + psi_a, psi_b = self.score(y=y, d=d, + l_hat=l_hat, m_hat=m_hat, g_hat=g_hat, + smpls=smpls) return psi_a, psi_b + # To be removed in version 0.6.0 + def tune(self, + param_grids, + tune_on_folds=False, + scoring_methods=None, # if None the estimator's score method is used + n_folds_tune=5, + search_mode='grid_search', + n_iter_randomized_search=100, + n_jobs_cv=None, + set_as_params=True, + return_tune_res=False): + + if isinstance(self.score, str) and (self.score == 'partialling out') and (param_grids is not None) and \ + ('ml_g' in param_grids) and ('ml_l' not in param_grids): + warnings.warn(("Learner ml_g was renamed to ml_l. " + "Please adapt the key of param_grids accordingly. " + "The provided param_grids for ml_g are set for ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + param_grids['ml_l'] = param_grids.pop('ml_g') + + if isinstance(self.score, str) and (self.score == 'partialling out') and (scoring_methods is not None) and \ + ('ml_g' in scoring_methods) and ('ml_l' not in scoring_methods): + warnings.warn(("Learner ml_g was renamed to ml_l. " + "Please adapt the key of scoring_methods accordingly. " + "The provided scoring_methods for ml_g are set for ml_l. " + "The redirection will be removed in a future version."), + DeprecationWarning, stacklevel=2) + scoring_methods['ml_l'] = scoring_methods.pop('ml_g') + + super(DoubleMLPLR, self).tune(param_grids, tune_on_folds, scoring_methods, n_folds_tune, search_mode, + n_iter_randomized_search, n_jobs_cv, set_as_params, return_tune_res) + def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search): x, y = check_X_y(self._dml_data.x, self._dml_data.y, @@ -196,25 +303,48 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_ force_all_finite=False) if scoring_methods is None: - scoring_methods = {'ml_g': None, - 'ml_m': None} + scoring_methods = {'ml_l': None, + 'ml_m': None, + 'ml_g': None} train_inds = [train_index for (train_index, _) in smpls] - g_tune_res = _dml_tune(y, x, train_inds, - self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], + l_tune_res = _dml_tune(y, x, train_inds, + self._learner['ml_l'], param_grids['ml_l'], scoring_methods['ml_l'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) m_tune_res = _dml_tune(d, x, train_inds, self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'], n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) - g_best_params = [xx.best_params_ for xx in g_tune_res] + l_best_params = [xx.best_params_ for xx in l_tune_res] m_best_params = [xx.best_params_ for xx in m_tune_res] - params = {'ml_g': g_best_params, - 'ml_m': m_best_params} - - tune_res = {'g_tune': g_tune_res, - 'm_tune': m_tune_res} + # an ML model for g is obtained for the IV-type score and callable scores + if 'ml_g' in self._learner: + # construct an initial theta estimate from the tuned models using the partialling out score + l_hat = np.full_like(y, np.nan) + m_hat = np.full_like(d, np.nan) + for idx, (train_index, _) in enumerate(smpls): + l_hat[train_index] = l_tune_res[idx].predict(x[train_index, :]) + m_hat[train_index] = m_tune_res[idx].predict(x[train_index, :]) + psi_a = -np.multiply(d - m_hat, d - m_hat) + psi_b = np.multiply(d - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + g_tune_res = _dml_tune(y - theta_initial*d, x, train_inds, + self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'], + n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search) + + g_best_params = [xx.best_params_ for xx in g_tune_res] + params = {'ml_l': l_best_params, + 'ml_m': m_best_params, + 'ml_g': g_best_params} + tune_res = {'l_tune': l_tune_res, + 'm_tune': m_tune_res, + 'g_tune': g_tune_res} + else: + params = {'ml_l': l_best_params, + 'ml_m': m_best_params} + tune_res = {'l_tune': l_tune_res, + 'm_tune': m_tune_res} res = {'params': params, 'tune_res': tune_res} diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py index 3f318701..d9152bcb 100644 --- a/doubleml/tests/_utils.py +++ b/doubleml/tests/_utils.py @@ -1,5 +1,6 @@ import numpy as np from sklearn.model_selection import KFold, GridSearchCV +from sklearn.base import clone def draw_smpls(n_obs, n_folds, n_rep=1): @@ -58,3 +59,11 @@ def tune_grid_search(y, x, ml_model, smpls, param_grid, n_folds_tune, train_cond tune_res[idx] = g_grid_search.fit(x[train_index_cond, :], y[train_index_cond]) return tune_res + + +def _clone(learner): + if learner is None: + res = None + else: + res = clone(learner) + return res diff --git a/doubleml/tests/_utils_pliv_manual.py b/doubleml/tests/_utils_pliv_manual.py index 75e1d707..35ee8618 100644 --- a/doubleml/tests/_utils_pliv_manual.py +++ b/doubleml/tests/_utils_pliv_manual.py @@ -1,41 +1,45 @@ import numpy as np +from sklearn.base import clone from ._utils_boot import boot_manual, draw_weights from ._utils import fit_predict, tune_grid_search def fit_pliv(y, x, d, z, - learner_g, learner_m, learner_r, all_smpls, dml_procedure, score, - n_rep=1, g_params=None, m_params=None, r_params=None): + learner_l, learner_m, learner_r, learner_g, all_smpls, dml_procedure, score, + n_rep=1, l_params=None, m_params=None, r_params=None, g_params=None): n_obs = len(y) thetas = np.zeros(n_rep) ses = np.zeros(n_rep) - all_g_hat = list() + all_l_hat = list() all_m_hat = list() all_r_hat = list() + all_g_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] - g_hat, m_hat, r_hat = fit_nuisance_pliv(y, x, d, z, - learner_g, learner_m, learner_r, - smpls, - g_params, m_params, r_params) + fit_g = (score == 'IV-type') | callable(score) + l_hat, m_hat, r_hat, g_hat = fit_nuisance_pliv(y, x, d, z, + learner_l, learner_m, learner_r, learner_g, + smpls, fit_g, + l_params, m_params, r_params, g_params) - all_g_hat.append(g_hat) + all_l_hat.append(l_hat) all_m_hat.append(m_hat) all_r_hat.append(r_hat) + all_g_hat.append(g_hat) if dml_procedure == 'dml1': thetas[i_rep], ses[i_rep] = pliv_dml1(y, x, d, z, - g_hat, m_hat, r_hat, + l_hat, m_hat, r_hat, g_hat, smpls, score) else: assert dml_procedure == 'dml2' thetas[i_rep], ses[i_rep] = pliv_dml2(y, x, d, z, - g_hat, m_hat, r_hat, + l_hat, m_hat, r_hat, g_hat, smpls, score) theta = np.median(thetas) @@ -43,94 +47,140 @@ def fit_pliv(y, x, d, z, res = {'theta': theta, 'se': se, 'thetas': thetas, 'ses': ses, - 'all_g_hat': all_g_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat} + 'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat, 'all_g_hat': all_g_hat} return res -def fit_nuisance_pliv(y, x, d, z, ml_g, ml_m, ml_r, smpls, g_params=None, m_params=None, r_params=None): - g_hat = fit_predict(y, x, ml_g, g_params, smpls) +def fit_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, fit_g=True, + l_params=None, m_params=None, r_params=None, g_params=None): + l_hat = fit_predict(y, x, ml_l, l_params, smpls) m_hat = fit_predict(z, x, ml_m, m_params, smpls) r_hat = fit_predict(d, x, ml_r, r_params, smpls) - return g_hat, m_hat, r_hat + if fit_g: + y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, _ = compute_pliv_residuals( + y, d, z, l_hat, m_hat, r_hat, [], smpls) + psi_a = -np.multiply(d_minus_r_hat, z_minus_m_hat) + psi_b = np.multiply(z_minus_m_hat, y_minus_l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + + ml_g = clone(ml_g) + g_hat = fit_predict(y - theta_initial * d, x, ml_g, g_params, smpls) + else: + g_hat = [] + + return l_hat, m_hat, r_hat, g_hat -def tune_nuisance_pliv(y, x, d, z, ml_g, ml_m, ml_r, smpls, n_folds_tune, param_grid_g, param_grid_m, param_grid_r): - g_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune) +def tune_nuisance_pliv(y, x, d, z, ml_l, ml_m, ml_r, ml_g, smpls, n_folds_tune, + param_grid_l, param_grid_m, param_grid_r, param_grid_g, tune_g=True): + l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune) m_tune_res = tune_grid_search(z, x, ml_m, smpls, param_grid_m, n_folds_tune) r_tune_res = tune_grid_search(d, x, ml_r, smpls, param_grid_r, n_folds_tune) - g_best_params = [xx.best_params_ for xx in g_tune_res] + if tune_g: + l_hat = np.full_like(y, np.nan) + m_hat = np.full_like(z, np.nan) + r_hat = np.full_like(d, np.nan) + for idx, (train_index, _) in enumerate(smpls): + l_hat[train_index] = l_tune_res[idx].predict(x[train_index, :]) + m_hat[train_index] = m_tune_res[idx].predict(x[train_index, :]) + r_hat[train_index] = r_tune_res[idx].predict(x[train_index, :]) + psi_a = -np.multiply(d - m_hat, d - m_hat) + psi_b = np.multiply(d - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + + g_tune_res = tune_grid_search(y - theta_initial*d, x, ml_g, smpls, param_grid_g, n_folds_tune) + g_best_params = [xx.best_params_ for xx in g_tune_res] + else: + g_best_params = [] + + l_best_params = [xx.best_params_ for xx in l_tune_res] m_best_params = [xx.best_params_ for xx in m_tune_res] r_best_params = [xx.best_params_ for xx in r_tune_res] - return g_best_params, m_best_params, r_best_params + return l_best_params, m_best_params, r_best_params, g_best_params -def compute_pliv_residuals(y, d, z, g_hat, m_hat, r_hat, smpls): - u_hat = np.full_like(y, np.nan, dtype='float64') - v_hat = np.full_like(z, np.nan, dtype='float64') - w_hat = np.full_like(d, np.nan, dtype='float64') +def compute_pliv_residuals(y, d, z, l_hat, m_hat, r_hat, g_hat, smpls): + y_minus_l_hat = np.full_like(y, np.nan, dtype='float64') + z_minus_m_hat = np.full_like(y, np.nan, dtype='float64') + d_minus_r_hat = np.full_like(d, np.nan, dtype='float64') + y_minus_g_hat = np.full_like(y, np.nan, dtype='float64') for idx, (_, test_index) in enumerate(smpls): - u_hat[test_index] = y[test_index] - g_hat[idx] - v_hat[test_index] = z[test_index] - m_hat[idx] - w_hat[test_index] = d[test_index] - r_hat[idx] + y_minus_l_hat[test_index] = y[test_index] - l_hat[idx] + z_minus_m_hat[test_index] = z[test_index] - m_hat[idx] + d_minus_r_hat[test_index] = d[test_index] - r_hat[idx] + if len(g_hat) > 0: + y_minus_g_hat[test_index] = y[test_index] - g_hat[idx] - return u_hat, v_hat, w_hat + return y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat -def pliv_dml1(y, x, d, z, g_hat, m_hat, r_hat, smpls, score): +def pliv_dml1(y, x, d, z, l_hat, m_hat, r_hat, g_hat, smpls, score): thetas = np.zeros(len(smpls)) n_obs = len(y) - u_hat, v_hat, w_hat = compute_pliv_residuals(y, d, z, g_hat, m_hat, r_hat, smpls) + y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals( + y, d, z, l_hat, m_hat, r_hat, g_hat, smpls) for idx, (_, test_index) in enumerate(smpls): - thetas[idx] = pliv_orth(u_hat[test_index], v_hat[test_index], w_hat[test_index], d[test_index], score) + thetas[idx] = pliv_orth(y_minus_l_hat[test_index], z_minus_m_hat[test_index], + d_minus_r_hat[test_index], y_minus_g_hat[test_index], + d[test_index], score) theta_hat = np.mean(thetas) if len(smpls) > 1: - se = np.sqrt(var_pliv(theta_hat, d, u_hat, v_hat, w_hat, score, n_obs)) + se = np.sqrt(var_pliv(theta_hat, d, y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, score, n_obs)) else: assert len(smpls) == 1 test_index = smpls[0][1] n_obs = len(test_index) se = np.sqrt(var_pliv(theta_hat, d[test_index], - u_hat[test_index], v_hat[test_index], w_hat[test_index], + y_minus_l_hat[test_index], z_minus_m_hat[test_index], + d_minus_r_hat[test_index], y_minus_g_hat[test_index], score, n_obs)) return theta_hat, se -def pliv_dml2(y, x, d, z, g_hat, m_hat, r_hat, smpls, score): +def pliv_dml2(y, x, d, z, l_hat, m_hat, r_hat, g_hat, smpls, score): n_obs = len(y) - u_hat, v_hat, w_hat = compute_pliv_residuals(y, d, z, g_hat, m_hat, r_hat, smpls) - theta_hat = pliv_orth(u_hat, v_hat, w_hat, d, score) - se = np.sqrt(var_pliv(theta_hat, d, u_hat, v_hat, w_hat, score, n_obs)) + y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals( + y, d, z, l_hat, m_hat, r_hat, g_hat, smpls) + theta_hat = pliv_orth(y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, d, score) + se = np.sqrt(var_pliv(theta_hat, d, y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, score, n_obs)) return theta_hat, se -def var_pliv(theta, d, u_hat, v_hat, w_hat, score, n_obs): - assert score == 'partialling out' - var = 1/n_obs * 1/np.power(np.mean(np.multiply(v_hat, w_hat)), 2) * \ - np.mean(np.power(np.multiply(u_hat - w_hat*theta, v_hat), 2)) +def var_pliv(theta, d, y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, score, n_obs): + if score == 'partialling out': + var = 1/n_obs * 1/np.power(np.mean(np.multiply(z_minus_m_hat, d_minus_r_hat)), 2) * \ + np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_r_hat*theta, z_minus_m_hat), 2)) + else: + assert score == 'IV-type' + var = 1/n_obs * 1/np.power(np.mean(np.multiply(z_minus_m_hat, d)), 2) * \ + np.mean(np.power(np.multiply(y_minus_g_hat - d*theta, z_minus_m_hat), 2)) return var -def pliv_orth(u_hat, v_hat, w_hat, d, score): - assert score == 'partialling out' - res = np.mean(np.multiply(v_hat, u_hat))/np.mean(np.multiply(v_hat, w_hat)) +def pliv_orth(y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat, d, score): + if score == 'partialling out': + res = np.mean(np.multiply(z_minus_m_hat, y_minus_l_hat))/np.mean(np.multiply(z_minus_m_hat, d_minus_r_hat)) + else: + assert score == 'IV-type' + res = np.mean(np.multiply(z_minus_m_hat, y_minus_g_hat))/np.mean(np.multiply(z_minus_m_hat, d)) return res -def boot_pliv(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, +def boot_pliv(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_g_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1, apply_cross_fitting=True): all_boot_theta = list() @@ -144,7 +194,7 @@ def boot_pliv(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, n_obs = len(test_index) weights = draw_weights(bootstrap, n_rep_boot, n_obs) boot_theta, boot_t_stat = boot_pliv_single_split( - thetas[i_rep], y, d, z, all_g_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], smpls, + thetas[i_rep], y, d, z, all_l_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_g_hat[i_rep], smpls, score, ses[i_rep], weights, n_rep_boot, apply_cross_fitting) all_boot_theta.append(boot_theta) all_boot_t_stat.append(boot_t_stat) @@ -155,18 +205,30 @@ def boot_pliv(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, return boot_theta, boot_t_stat -def boot_pliv_single_split(theta, y, d, z, g_hat, m_hat, r_hat, +def boot_pliv_single_split(theta, y, d, z, l_hat, m_hat, r_hat, g_hat, smpls, score, se, weights, n_rep_boot, apply_cross_fitting): - assert score == 'partialling out' - u_hat, v_hat, w_hat = compute_pliv_residuals(y, d, z, g_hat, m_hat, r_hat, smpls) + y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals( + y, d, z, l_hat, m_hat, r_hat, g_hat, smpls) if apply_cross_fitting: - J = np.mean(-np.multiply(v_hat, w_hat)) + if score == 'partialling out': + J = np.mean(-np.multiply(z_minus_m_hat, d_minus_r_hat)) + else: + assert score == 'IV-type' + J = np.mean(-np.multiply(z_minus_m_hat, d)) else: test_index = smpls[0][1] - J = np.mean(-np.multiply(v_hat[test_index], w_hat[test_index])) + if score == 'partialling out': + J = np.mean(-np.multiply(z_minus_m_hat[test_index], d_minus_r_hat[test_index])) + else: + assert score == 'IV-type' + J = np.mean(-np.multiply(z_minus_m_hat[test_index], d[test_index])) - psi = np.multiply(u_hat - w_hat*theta, v_hat) + if score == 'partialling out': + psi = np.multiply(y_minus_l_hat - d_minus_r_hat*theta, z_minus_m_hat) + else: + assert score == 'IV-type' + psi = np.multiply(y_minus_g_hat - d*theta, z_minus_m_hat) boot_theta, boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting) diff --git a/doubleml/tests/_utils_pliv_partial_x_manual.py b/doubleml/tests/_utils_pliv_partial_x_manual.py index 53716e01..e83d7e83 100644 --- a/doubleml/tests/_utils_pliv_partial_x_manual.py +++ b/doubleml/tests/_utils_pliv_partial_x_manual.py @@ -6,37 +6,37 @@ def fit_pliv_partial_x(y, x, d, z, - learner_g, learner_m, learner_r, all_smpls, dml_procedure, score, - n_rep=1, g_params=None, m_params=None, r_params=None): + learner_l, learner_m, learner_r, all_smpls, dml_procedure, score, + n_rep=1, l_params=None, m_params=None, r_params=None): n_obs = len(y) thetas = np.zeros(n_rep) ses = np.zeros(n_rep) - all_g_hat = list() + all_l_hat = list() all_m_hat = list() all_r_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] - g_hat, m_hat, r_hat = fit_nuisance_pliv_partial_x(y, x, d, z, - learner_g, learner_m, learner_r, + l_hat, m_hat, r_hat = fit_nuisance_pliv_partial_x(y, x, d, z, + learner_l, learner_m, learner_r, smpls, - g_params, m_params, r_params) + l_params, m_params, r_params) - all_g_hat.append(g_hat) + all_l_hat.append(l_hat) all_m_hat.append(m_hat) all_r_hat.append(r_hat) if dml_procedure == 'dml1': thetas[i_rep], ses[i_rep] = pliv_partial_x_dml1(y, x, d, z, - g_hat, m_hat, r_hat, + l_hat, m_hat, r_hat, smpls, score) else: assert dml_procedure == 'dml2' thetas[i_rep], ses[i_rep] = pliv_partial_x_dml2(y, x, d, z, - g_hat, m_hat, r_hat, + l_hat, m_hat, r_hat, smpls, score) theta = np.median(thetas) @@ -44,14 +44,14 @@ def fit_pliv_partial_x(y, x, d, z, res = {'theta': theta, 'se': se, 'thetas': thetas, 'ses': ses, - 'all_g_hat': all_g_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat} + 'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat} return res -def fit_nuisance_pliv_partial_x(y, x, d, z, ml_g, ml_m, ml_r, smpls, g_params=None, m_params=None, r_params=None): +def fit_nuisance_pliv_partial_x(y, x, d, z, ml_l, ml_m, ml_r, smpls, l_params=None, m_params=None, r_params=None): assert z.ndim == 2 - g_hat = fit_predict(y, x, ml_g, g_params, smpls) + l_hat = fit_predict(y, x, ml_l, l_params, smpls) m_hat = list() for i_instr in range(z.shape[1]): @@ -71,12 +71,12 @@ def fit_nuisance_pliv_partial_x(y, x, d, z, ml_g, ml_m, ml_r, smpls, g_params=No r_hat_tilde = LinearRegression(fit_intercept=True).fit(z - m_hat_array, d - r_hat_array).predict(z - m_hat_array) - return g_hat, r_hat, r_hat_tilde + return l_hat, r_hat, r_hat_tilde -def tune_nuisance_pliv_partial_x(y, x, d, z, ml_g, ml_m, ml_r, smpls, n_folds_tune, - param_grid_g, param_grid_m, param_grid_r): - g_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune) +def tune_nuisance_pliv_partial_x(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_tune, + param_grid_l, param_grid_m, param_grid_r): + l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune) m_tune_res = list() for i_instr in range(z.shape[1]): @@ -84,27 +84,27 @@ def tune_nuisance_pliv_partial_x(y, x, d, z, ml_g, ml_m, ml_r, smpls, n_folds_tu r_tune_res = tune_grid_search(d, x, ml_r, smpls, param_grid_r, n_folds_tune) - g_best_params = [xx.best_params_ for xx in g_tune_res] + l_best_params = [xx.best_params_ for xx in l_tune_res] m_best_params = [[xx.best_params_ for xx in m_tune_res[i_instr]] for i_instr in range(z.shape[1])] r_best_params = [xx.best_params_ for xx in r_tune_res] - return g_best_params, m_best_params, r_best_params + return l_best_params, m_best_params, r_best_params -def compute_pliv_partial_x_residuals(y, d, g_hat, r_hat, smpls): +def compute_pliv_partial_x_residuals(y, d, l_hat, r_hat, smpls): u_hat = np.full_like(y, np.nan, dtype='float64') w_hat = np.full_like(y, np.nan, dtype='float64') for idx, (_, test_index) in enumerate(smpls): - u_hat[test_index] = y[test_index] - g_hat[idx] + u_hat[test_index] = y[test_index] - l_hat[idx] w_hat[test_index] = d[test_index] - r_hat[idx] return u_hat, w_hat -def pliv_partial_x_dml1(y, x, d, z, g_hat, r_hat, r_hat_tilde, smpls, score): +def pliv_partial_x_dml1(y, x, d, z, l_hat, r_hat, r_hat_tilde, smpls, score): thetas = np.zeros(len(smpls)) n_obs = len(y) - u_hat, w_hat = compute_pliv_partial_x_residuals(y, d, g_hat, r_hat, smpls) + u_hat, w_hat = compute_pliv_partial_x_residuals(y, d, l_hat, r_hat, smpls) for idx, (_, test_index) in enumerate(smpls): thetas[idx] = pliv_partial_x_orth(u_hat[test_index], w_hat[test_index], r_hat_tilde[test_index], @@ -116,9 +116,9 @@ def pliv_partial_x_dml1(y, x, d, z, g_hat, r_hat, r_hat_tilde, smpls, score): return theta_hat, se -def pliv_partial_x_dml2(y, x, d, z, g_hat, r_hat, r_hat_tilde, smpls, score): +def pliv_partial_x_dml2(y, x, d, z, l_hat, r_hat, r_hat_tilde, smpls, score): n_obs = len(y) - u_hat, w_hat = compute_pliv_partial_x_residuals(y, d, g_hat, r_hat, smpls) + u_hat, w_hat = compute_pliv_partial_x_residuals(y, d, l_hat, r_hat, smpls) theta_hat = pliv_partial_x_orth(u_hat, w_hat, r_hat_tilde, d, score) se = np.sqrt(var_pliv_partial_x(theta_hat, d, u_hat, w_hat, r_hat_tilde, score, n_obs)) @@ -140,7 +140,7 @@ def pliv_partial_x_orth(u_hat, w_hat, r_hat_tilde, d, score): return res -def boot_pliv_partial_x(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, +def boot_pliv_partial_x(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1): all_boot_theta = list() @@ -149,7 +149,7 @@ def boot_pliv_partial_x(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, n_obs = len(y) weights = draw_weights(bootstrap, n_rep_boot, n_obs) boot_theta, boot_t_stat = boot_pliv_partial_x_single_split( - thetas[i_rep], y, d, z, all_g_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_smpls[i_rep], + thetas[i_rep], y, d, z, all_l_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_smpls[i_rep], score, ses[i_rep], weights, n_rep_boot) all_boot_theta.append(boot_theta) all_boot_t_stat.append(boot_t_stat) @@ -160,10 +160,10 @@ def boot_pliv_partial_x(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, return boot_theta, boot_t_stat -def boot_pliv_partial_x_single_split(theta, y, d, z, g_hat, r_hat, r_hat_tilde, +def boot_pliv_partial_x_single_split(theta, y, d, z, l_hat, r_hat, r_hat_tilde, smpls, score, se, weights, n_rep_boot): assert score == 'partialling out' - u_hat, w_hat = compute_pliv_partial_x_residuals(y, d, g_hat, r_hat, smpls) + u_hat, w_hat = compute_pliv_partial_x_residuals(y, d, l_hat, r_hat, smpls) J = np.mean(-np.multiply(r_hat_tilde, w_hat)) diff --git a/doubleml/tests/_utils_pliv_partial_xz_manual.py b/doubleml/tests/_utils_pliv_partial_xz_manual.py index 515d373b..ba28ee06 100644 --- a/doubleml/tests/_utils_pliv_partial_xz_manual.py +++ b/doubleml/tests/_utils_pliv_partial_xz_manual.py @@ -6,37 +6,37 @@ def fit_pliv_partial_xz(y, x, d, z, - learner_g, learner_m, learner_r, all_smpls, dml_procedure, score, - n_rep=1, g_params=None, m_params=None, r_params=None): + learner_l, learner_m, learner_r, all_smpls, dml_procedure, score, + n_rep=1, l_params=None, m_params=None, r_params=None): n_obs = len(y) thetas = np.zeros(n_rep) ses = np.zeros(n_rep) - all_g_hat = list() + all_l_hat = list() all_m_hat = list() all_r_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] - g_hat, m_hat, r_hat = fit_nuisance_pliv_partial_xz(y, x, d, z, - learner_g, learner_m, learner_r, + l_hat, m_hat, r_hat = fit_nuisance_pliv_partial_xz(y, x, d, z, + learner_l, learner_m, learner_r, smpls, - g_params, m_params, r_params) + l_params, m_params, r_params) - all_g_hat.append(g_hat) + all_l_hat.append(l_hat) all_m_hat.append(m_hat) all_r_hat.append(r_hat) if dml_procedure == 'dml1': thetas[i_rep], ses[i_rep] = pliv_partial_xz_dml1(y, x, d, z, - g_hat, m_hat, r_hat, + l_hat, m_hat, r_hat, smpls, score) else: assert dml_procedure == 'dml2' thetas[i_rep], ses[i_rep] = pliv_partial_xz_dml2(y, x, d, z, - g_hat, m_hat, r_hat, + l_hat, m_hat, r_hat, smpls, score) theta = np.median(thetas) @@ -44,13 +44,13 @@ def fit_pliv_partial_xz(y, x, d, z, res = {'theta': theta, 'se': se, 'thetas': thetas, 'ses': ses, - 'all_g_hat': all_g_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat} + 'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_r_hat': all_r_hat} return res -def fit_nuisance_pliv_partial_xz(y, x, d, z, ml_g, ml_m, ml_r, smpls, g_params=None, m_params=None, r_params=None): - g_hat = fit_predict(y, x, ml_g, g_params, smpls) +def fit_nuisance_pliv_partial_xz(y, x, d, z, ml_l, ml_m, ml_r, smpls, l_params=None, m_params=None, r_params=None): + l_hat = fit_predict(y, x, ml_l, l_params, smpls) xz = np.hstack((x, z)) m_hat = [] @@ -68,12 +68,12 @@ def fit_nuisance_pliv_partial_xz(y, x, d, z, ml_g, ml_m, ml_r, smpls, g_params=N ml_r.set_params(**r_params[idx]) m_hat_tilde.append(ml_r.fit(x[train_index], m_hat_train[idx]).predict(x[test_index])) - return g_hat, m_hat, m_hat_tilde + return l_hat, m_hat, m_hat_tilde -def tune_nuisance_pliv_partial_xz(y, x, d, z, ml_g, ml_m, ml_r, smpls, n_folds_tune, - param_grid_g, param_grid_m, param_grid_r): - g_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune) +def tune_nuisance_pliv_partial_xz(y, x, d, z, ml_l, ml_m, ml_r, smpls, n_folds_tune, + param_grid_l, param_grid_m, param_grid_r): + l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune) xz = np.hstack((x, z)) m_tune_res = tune_grid_search(d, xz, ml_m, smpls, param_grid_m, n_folds_tune) @@ -86,29 +86,29 @@ def tune_nuisance_pliv_partial_xz(y, x, d, z, ml_g, ml_m, ml_r, smpls, n_folds_t cv=r_tune_resampling) r_tune_res[idx] = r_grid_search.fit(x[train_index, :], m_hat) - g_best_params = [xx.best_params_ for xx in g_tune_res] + l_best_params = [xx.best_params_ for xx in l_tune_res] m_best_params = [xx.best_params_ for xx in m_tune_res] r_best_params = [xx.best_params_ for xx in r_tune_res] - return g_best_params, m_best_params, r_best_params + return l_best_params, m_best_params, r_best_params -def compute_pliv_partial_xz_residuals(y, d, g_hat, m_hat, m_hat_tilde, smpls): +def compute_pliv_partial_xz_residuals(y, d, l_hat, m_hat, m_hat_tilde, smpls): u_hat = np.full_like(y, np.nan, dtype='float64') v_hat = np.full_like(y, np.nan, dtype='float64') w_hat = np.full_like(y, np.nan, dtype='float64') for idx, (_, test_index) in enumerate(smpls): - u_hat[test_index] = y[test_index] - g_hat[idx] + u_hat[test_index] = y[test_index] - l_hat[idx] v_hat[test_index] = m_hat[idx] - m_hat_tilde[idx] w_hat[test_index] = d[test_index] - m_hat_tilde[idx] return u_hat, v_hat, w_hat -def pliv_partial_xz_dml1(y, x, d, z, g_hat, m_hat, m_hat_tilde, smpls, score): +def pliv_partial_xz_dml1(y, x, d, z, l_hat, m_hat, m_hat_tilde, smpls, score): thetas = np.zeros(len(smpls)) n_obs = len(y) - u_hat, v_hat, w_hat = compute_pliv_partial_xz_residuals(y, d, g_hat, m_hat, m_hat_tilde, smpls) + u_hat, v_hat, w_hat = compute_pliv_partial_xz_residuals(y, d, l_hat, m_hat, m_hat_tilde, smpls) for idx, (_, test_index) in enumerate(smpls): thetas[idx] = pliv_partial_xz_orth(u_hat[test_index], v_hat[test_index], w_hat[test_index], @@ -120,9 +120,9 @@ def pliv_partial_xz_dml1(y, x, d, z, g_hat, m_hat, m_hat_tilde, smpls, score): return theta_hat, se -def pliv_partial_xz_dml2(y, x, d, z, g_hat, m_hat, m_hat_tilde, smpls, score): +def pliv_partial_xz_dml2(y, x, d, z, l_hat, m_hat, m_hat_tilde, smpls, score): n_obs = len(y) - u_hat, v_hat, w_hat = compute_pliv_partial_xz_residuals(y, d, g_hat, m_hat, m_hat_tilde, smpls) + u_hat, v_hat, w_hat = compute_pliv_partial_xz_residuals(y, d, l_hat, m_hat, m_hat_tilde, smpls) theta_hat = pliv_partial_xz_orth(u_hat, v_hat, w_hat, d, score) se = np.sqrt(var_pliv_partial_xz(theta_hat, d, u_hat, v_hat, w_hat, score, n_obs)) @@ -144,7 +144,7 @@ def pliv_partial_xz_orth(u_hat, v_hat, w_hat, d, score): return res -def boot_pliv_partial_xz(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, +def boot_pliv_partial_xz(y, d, z, thetas, ses, all_l_hat, all_m_hat, all_r_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1): all_boot_theta = list() @@ -153,7 +153,7 @@ def boot_pliv_partial_xz(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, n_obs = len(y) weights = draw_weights(bootstrap, n_rep_boot, n_obs) boot_theta, boot_t_stat = boot_pliv_partial_xz_single_split( - thetas[i_rep], y, d, z, all_g_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_smpls[i_rep], + thetas[i_rep], y, d, z, all_l_hat[i_rep], all_m_hat[i_rep], all_r_hat[i_rep], all_smpls[i_rep], score, ses[i_rep], weights, n_rep_boot) all_boot_theta.append(boot_theta) all_boot_t_stat.append(boot_t_stat) @@ -164,10 +164,10 @@ def boot_pliv_partial_xz(y, d, z, thetas, ses, all_g_hat, all_m_hat, all_r_hat, return boot_theta, boot_t_stat -def boot_pliv_partial_xz_single_split(theta, y, d, z, g_hat, m_hat, m_hat_tilde, +def boot_pliv_partial_xz_single_split(theta, y, d, z, l_hat, m_hat, m_hat_tilde, smpls, score, se, weights, n_rep_boot): assert score == 'partialling out' - u_hat, v_hat, w_hat = compute_pliv_partial_xz_residuals(y, d, g_hat, m_hat, m_hat_tilde, smpls) + u_hat, v_hat, w_hat = compute_pliv_partial_xz_residuals(y, d, l_hat, m_hat, m_hat_tilde, smpls) J = np.mean(-np.multiply(v_hat, w_hat)) diff --git a/doubleml/tests/_utils_plr_manual.py b/doubleml/tests/_utils_plr_manual.py index 4f1ba7cf..6b569062 100644 --- a/doubleml/tests/_utils_plr_manual.py +++ b/doubleml/tests/_utils_plr_manual.py @@ -6,22 +6,24 @@ from ._utils import fit_predict, fit_predict_proba, tune_grid_search -def fit_plr_multitreat(y, x, d, learner_g, learner_m, all_smpls, dml_procedure, score, - n_rep=1, g_params=None, m_params=None, +def fit_plr_multitreat(y, x, d, learner_l, learner_m, learner_g, all_smpls, dml_procedure, score, + n_rep=1, l_params=None, m_params=None, g_params=None, use_other_treat_as_covariate=True): n_obs = len(y) n_d = d.shape[1] thetas = list() ses = list() - all_g_hat = list() + all_l_hat = list() all_m_hat = list() + all_g_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] thetas_this_rep = np.full(n_d, np.nan) ses_this_rep = np.full(n_d, np.nan) - all_g_hat_this_rep = list() + all_l_hat_this_rep = list() all_m_hat_this_rep = list() + all_g_hat_this_rep = list() for i_d in range(n_d): if use_other_treat_as_covariate: @@ -29,15 +31,20 @@ def fit_plr_multitreat(y, x, d, learner_g, learner_m, all_smpls, dml_procedure, else: xd = x - g_hat, m_hat, thetas_this_rep[i_d], ses_this_rep[i_d] = fit_plr_single_split( - y, xd, d[:, i_d], learner_g, learner_m, smpls, dml_procedure, score, g_params, m_params) - all_g_hat_this_rep.append(g_hat) + l_hat, m_hat, g_hat, thetas_this_rep[i_d], ses_this_rep[i_d] = fit_plr_single_split( + y, xd, d[:, i_d], + learner_l, learner_m, learner_g, + smpls, dml_procedure, score, + l_params, m_params, g_params) + all_l_hat_this_rep.append(l_hat) all_m_hat_this_rep.append(m_hat) + all_g_hat_this_rep.append(g_hat) thetas.append(thetas_this_rep) ses.append(ses_this_rep) - all_g_hat.append(all_g_hat_this_rep) + all_l_hat.append(all_l_hat_this_rep) all_m_hat.append(all_m_hat_this_rep) + all_g_hat.append(all_g_hat_this_rep) theta = np.full(n_d, np.nan) se = np.full(n_d, np.nan) @@ -49,151 +56,205 @@ def fit_plr_multitreat(y, x, d, learner_g, learner_m, all_smpls, dml_procedure, res = {'theta': theta, 'se': se, 'thetas': thetas, 'ses': ses, - 'all_g_hat': all_g_hat, 'all_m_hat': all_m_hat} + 'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat} return res -def fit_plr(y, x, d, learner_g, learner_m, all_smpls, dml_procedure, score, - n_rep=1, g_params=None, m_params=None): +def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, dml_procedure, score, + n_rep=1, l_params=None, m_params=None, g_params=None): n_obs = len(y) thetas = np.zeros(n_rep) ses = np.zeros(n_rep) - all_g_hat = list() + all_l_hat = list() all_m_hat = list() + all_g_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] - g_hat, m_hat, thetas[i_rep], ses[i_rep] = fit_plr_single_split( - y, x, d, learner_g, learner_m, smpls, dml_procedure, score, g_params, m_params) - all_g_hat.append(g_hat) + l_hat, m_hat, g_hat, thetas[i_rep], ses[i_rep] = fit_plr_single_split( + y, x, d, + learner_l, learner_m, learner_g, + smpls, dml_procedure, score, + l_params, m_params, g_params) + all_l_hat.append(l_hat) all_m_hat.append(m_hat) + all_g_hat.append(g_hat) theta = np.median(thetas) se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs) res = {'theta': theta, 'se': se, 'thetas': thetas, 'ses': ses, - 'all_g_hat': all_g_hat, 'all_m_hat': all_m_hat} + 'all_l_hat': all_l_hat, 'all_m_hat': all_m_hat, 'all_g_hat': all_g_hat} return res -def fit_plr_single_split(y, x, d, learner_g, learner_m, smpls, dml_procedure, score, g_params=None, m_params=None): +def fit_plr_single_split(y, x, d, learner_l, learner_m, learner_g, smpls, dml_procedure, score, + l_params=None, m_params=None, g_params=None): + fit_g = (score == 'IV-type') | callable(score) if is_classifier(learner_m): - g_hat, m_hat = fit_nuisance_plr_classifier(y, x, d, - learner_g, learner_m, smpls, - g_params, m_params) + l_hat, m_hat, g_hat = fit_nuisance_plr_classifier(y, x, d, + learner_l, learner_m, learner_g, + smpls, fit_g, + l_params, m_params, g_params) else: - g_hat, m_hat = fit_nuisance_plr(y, x, d, - learner_g, learner_m, smpls, - g_params, m_params) + l_hat, m_hat, g_hat = fit_nuisance_plr(y, x, d, + learner_l, learner_m, learner_g, + smpls, fit_g, + l_params, m_params, g_params) if dml_procedure == 'dml1': theta, se = plr_dml1(y, x, d, - g_hat, m_hat, + l_hat, m_hat, g_hat, smpls, score) else: assert dml_procedure == 'dml2' theta, se = plr_dml2(y, x, d, - g_hat, m_hat, + l_hat, m_hat, g_hat, smpls, score) - return g_hat, m_hat, theta, se + return l_hat, m_hat, g_hat, theta, se -def fit_nuisance_plr(y, x, d, learner_g, learner_m, smpls, g_params=None, m_params=None): - ml_g = clone(learner_g) - g_hat = fit_predict(y, x, ml_g, g_params, smpls) +def fit_nuisance_plr(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True, + l_params=None, m_params=None, g_params=None): + ml_l = clone(learner_l) + l_hat = fit_predict(y, x, ml_l, l_params, smpls) ml_m = clone(learner_m) m_hat = fit_predict(d, x, ml_m, m_params, smpls) - return g_hat, m_hat + if fit_g: + y_minus_l_hat, d_minus_m_hat, _ = compute_plr_residuals(y, d, l_hat, m_hat, [], smpls) + psi_a = -np.multiply(d_minus_m_hat, d_minus_m_hat) + psi_b = np.multiply(d_minus_m_hat, y_minus_l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + + ml_g = clone(learner_g) + g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls) + else: + g_hat = [] + + return l_hat, m_hat, g_hat -def fit_nuisance_plr_classifier(y, x, d, learner_g, learner_m, smpls, g_params=None, m_params=None): - ml_g = clone(learner_g) - g_hat = fit_predict(y, x, ml_g, g_params, smpls) +def fit_nuisance_plr_classifier(y, x, d, learner_l, learner_m, learner_g, smpls, fit_g=True, + l_params=None, m_params=None, g_params=None): + ml_l = clone(learner_l) + l_hat = fit_predict(y, x, ml_l, l_params, smpls) ml_m = clone(learner_m) m_hat = fit_predict_proba(d, x, ml_m, m_params, smpls) - return g_hat, m_hat + if fit_g: + y_minus_l_hat, d_minus_m_hat, _ = compute_plr_residuals(y, d, l_hat, m_hat, [], smpls) + psi_a = -np.multiply(d_minus_m_hat, d_minus_m_hat) + psi_b = np.multiply(d_minus_m_hat, y_minus_l_hat) + theta_initial = -np.mean(psi_b) / np.mean(psi_a) + + ml_g = clone(learner_g) + g_hat = fit_predict(y - theta_initial*d, x, ml_g, g_params, smpls) + else: + g_hat = [] + + return l_hat, m_hat, g_hat -def tune_nuisance_plr(y, x, d, ml_g, ml_m, smpls, n_folds_tune, param_grid_g, param_grid_m): - g_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune) +def tune_nuisance_plr(y, x, d, ml_l, ml_m, ml_g, smpls, n_folds_tune, param_grid_l, param_grid_m, param_grid_g, tune_g=True): + l_tune_res = tune_grid_search(y, x, ml_l, smpls, param_grid_l, n_folds_tune) m_tune_res = tune_grid_search(d, x, ml_m, smpls, param_grid_m, n_folds_tune) - g_best_params = [xx.best_params_ for xx in g_tune_res] + if tune_g: + l_hat = np.full_like(y, np.nan) + m_hat = np.full_like(d, np.nan) + for idx, (train_index, _) in enumerate(smpls): + l_hat[train_index] = l_tune_res[idx].predict(x[train_index, :]) + m_hat[train_index] = m_tune_res[idx].predict(x[train_index, :]) + psi_a = -np.multiply(d - m_hat, d - m_hat) + psi_b = np.multiply(d - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + + g_tune_res = tune_grid_search(y - theta_initial*d, x, ml_g, smpls, param_grid_g, n_folds_tune) + g_best_params = [xx.best_params_ for xx in g_tune_res] + else: + g_best_params = [] + + l_best_params = [xx.best_params_ for xx in l_tune_res] m_best_params = [xx.best_params_ for xx in m_tune_res] - return g_best_params, m_best_params + return l_best_params, m_best_params, g_best_params -def compute_plr_residuals(y, d, g_hat, m_hat, smpls): - u_hat = np.full_like(y, np.nan, dtype='float64') - v_hat = np.full_like(d, np.nan, dtype='float64') +def compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls): + y_minus_l_hat = np.full_like(y, np.nan, dtype='float64') + d_minus_m_hat = np.full_like(d, np.nan, dtype='float64') + y_minus_g_hat = np.full_like(y, np.nan, dtype='float64') for idx, (_, test_index) in enumerate(smpls): - u_hat[test_index] = y[test_index] - g_hat[idx] - v_hat[test_index] = d[test_index] - m_hat[idx] - return u_hat, v_hat + y_minus_l_hat[test_index] = y[test_index] - l_hat[idx] + if len(g_hat) > 0: + y_minus_g_hat[test_index] = y[test_index] - g_hat[idx] + d_minus_m_hat[test_index] = d[test_index] - m_hat[idx] + return y_minus_l_hat, d_minus_m_hat, y_minus_g_hat -def plr_dml1(y, x, d, g_hat, m_hat, smpls, score): +def plr_dml1(y, x, d, l_hat, m_hat, g_hat, smpls, score): thetas = np.zeros(len(smpls)) n_obs = len(y) - u_hat, v_hat = compute_plr_residuals(y, d, g_hat, m_hat, smpls) + y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls) for idx, (_, test_index) in enumerate(smpls): - thetas[idx] = plr_orth(v_hat[test_index], u_hat[test_index], d[test_index], score) + thetas[idx] = plr_orth(y_minus_l_hat[test_index], d_minus_m_hat[test_index], y_minus_g_hat[test_index], + d[test_index], score) theta_hat = np.mean(thetas) if len(smpls) > 1: - se = np.sqrt(var_plr(theta_hat, d, u_hat, v_hat, score, n_obs)) + se = np.sqrt(var_plr(theta_hat, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs)) else: assert len(smpls) == 1 test_index = smpls[0][1] n_obs = len(test_index) - se = np.sqrt(var_plr(theta_hat, d[test_index], u_hat[test_index], v_hat[test_index], score, n_obs)) + se = np.sqrt(var_plr(theta_hat, d[test_index], + y_minus_l_hat[test_index], d_minus_m_hat[test_index], y_minus_g_hat[test_index], + score, n_obs)) return theta_hat, se -def plr_dml2(y, x, d, g_hat, m_hat, smpls, score): +def plr_dml2(y, x, d, l_hat, m_hat, g_hat, smpls, score): n_obs = len(y) - u_hat, v_hat = compute_plr_residuals(y, d, g_hat, m_hat, smpls) - theta_hat = plr_orth(v_hat, u_hat, d, score) - se = np.sqrt(var_plr(theta_hat, d, u_hat, v_hat, score, n_obs)) + y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls) + theta_hat = plr_orth(y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, d, score) + se = np.sqrt(var_plr(theta_hat, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs)) return theta_hat, se -def var_plr(theta, d, u_hat, v_hat, score, n_obs): +def var_plr(theta, d, y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, score, n_obs): if score == 'partialling out': - var = 1/n_obs * 1/np.power(np.mean(np.multiply(v_hat, v_hat)), 2) * \ - np.mean(np.power(np.multiply(u_hat - v_hat*theta, v_hat), 2)) + var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d_minus_m_hat)), 2) * \ + np.mean(np.power(np.multiply(y_minus_l_hat - d_minus_m_hat*theta, d_minus_m_hat), 2)) else: assert score == 'IV-type' - var = 1/n_obs * 1/np.power(np.mean(np.multiply(v_hat, d)), 2) * \ - np.mean(np.power(np.multiply(u_hat - d*theta, v_hat), 2)) + var = 1/n_obs * 1/np.power(np.mean(np.multiply(d_minus_m_hat, d)), 2) * \ + np.mean(np.power(np.multiply(y_minus_g_hat - d*theta, d_minus_m_hat), 2)) return var -def plr_orth(v_hat, u_hat, d, score): +def plr_orth(y_minus_l_hat, d_minus_m_hat, y_minus_g_hat, d, score): if score == 'IV-type': - res = np.mean(np.multiply(v_hat, u_hat))/np.mean(np.multiply(v_hat, d)) + res = np.mean(np.multiply(d_minus_m_hat, y_minus_g_hat))/np.mean(np.multiply(d_minus_m_hat, d)) else: assert score == 'partialling out' - res = scipy.linalg.lstsq(v_hat.reshape(-1, 1), u_hat)[0] + res = scipy.linalg.lstsq(d_minus_m_hat.reshape(-1, 1), y_minus_l_hat)[0] return res -def boot_plr(y, d, thetas, ses, all_g_hat, all_m_hat, +def boot_plr(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1, apply_cross_fitting=True): all_boot_theta = list() @@ -208,7 +269,7 @@ def boot_plr(y, d, thetas, ses, all_g_hat, all_m_hat, weights = draw_weights(bootstrap, n_rep_boot, n_obs) boot_theta, boot_t_stat = boot_plr_single_split( - thetas[i_rep], y, d, all_g_hat[i_rep], all_m_hat[i_rep], smpls, + thetas[i_rep], y, d, all_l_hat[i_rep], all_m_hat[i_rep], all_g_hat[i_rep], smpls, score, ses[i_rep], weights, n_rep_boot, apply_cross_fitting) all_boot_theta.append(boot_theta) @@ -220,7 +281,7 @@ def boot_plr(y, d, thetas, ses, all_g_hat, all_m_hat, return boot_theta, boot_t_stat -def boot_plr_multitreat(y, d, thetas, ses, all_g_hat, all_m_hat, +def boot_plr_multitreat(y, d, thetas, ses, all_l_hat, all_m_hat, all_g_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=1, apply_cross_fitting=True): n_d = d.shape[1] @@ -240,7 +301,7 @@ def boot_plr_multitreat(y, d, thetas, ses, all_g_hat, all_m_hat, for i_d in range(n_d): boot_theta[i_d, :], boot_t_stat[i_d, :] = boot_plr_single_split( thetas[i_rep][i_d], y, d[:, i_d], - all_g_hat[i_rep][i_d], all_m_hat[i_rep][i_d], + all_l_hat[i_rep][i_d], all_m_hat[i_rep][i_d], all_g_hat[i_rep][i_d], smpls, score, ses[i_rep][i_d], weights, n_rep_boot, apply_cross_fitting) all_boot_theta.append(boot_theta) @@ -252,29 +313,29 @@ def boot_plr_multitreat(y, d, thetas, ses, all_g_hat, all_m_hat, return boot_theta, boot_t_stat -def boot_plr_single_split(theta, y, d, g_hat, m_hat, +def boot_plr_single_split(theta, y, d, l_hat, m_hat, g_hat, smpls, score, se, weights, n_rep, apply_cross_fitting): - u_hat, v_hat = compute_plr_residuals(y, d, g_hat, m_hat, smpls) + y_minus_l_hat, d_minus_m_hat, y_minus_g_hat = compute_plr_residuals(y, d, l_hat, m_hat, g_hat, smpls) if apply_cross_fitting: if score == 'partialling out': - J = np.mean(-np.multiply(v_hat, v_hat)) + J = np.mean(-np.multiply(d_minus_m_hat, d_minus_m_hat)) else: assert score == 'IV-type' - J = np.mean(-np.multiply(v_hat, d)) + J = np.mean(-np.multiply(d_minus_m_hat, d)) else: test_index = smpls[0][1] if score == 'partialling out': - J = np.mean(-np.multiply(v_hat[test_index], v_hat[test_index])) + J = np.mean(-np.multiply(d_minus_m_hat[test_index], d_minus_m_hat[test_index])) else: assert score == 'IV-type' - J = np.mean(-np.multiply(v_hat[test_index], d[test_index])) + J = np.mean(-np.multiply(d_minus_m_hat[test_index], d[test_index])) if score == 'partialling out': - psi = np.multiply(u_hat - v_hat * theta, v_hat) + psi = np.multiply(y_minus_l_hat - d_minus_m_hat * theta, d_minus_m_hat) else: assert score == 'IV-type' - psi = np.multiply(u_hat - d * theta, v_hat) + psi = np.multiply(y_minus_g_hat - d * theta, d_minus_m_hat) boot_theta, boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep, apply_cross_fitting) diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py index f281fc6a..875902d1 100644 --- a/doubleml/tests/test_doubleml_exceptions.py +++ b/doubleml/tests/test_doubleml_exceptions.py @@ -3,24 +3,28 @@ import numpy as np from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, DoubleMLClusterData -from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data,\ +from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \ make_pliv_multiway_cluster_CKMS2021 from sklearn.linear_model import Lasso, LogisticRegression from sklearn.base import BaseEstimator np.random.seed(3141) -dml_data = make_plr_CCDDHNR2018(n_obs=10) -ml_g = Lasso() +dml_data = make_plr_CCDDHNR2018(n_obs=50) +ml_l = Lasso() ml_m = Lasso() +ml_g = Lasso() ml_r = Lasso() -dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m) +dml_plr = DoubleMLPLR(dml_data, ml_l, ml_m) +dml_plr_iv_type = DoubleMLPLR(dml_data, ml_l, ml_m, ml_g, score='IV-type') + +dml_data_pliv = make_pliv_CHS2015(n_obs=50, dim_z=1) +dml_pliv = DoubleMLPLIV(dml_data_pliv, ml_l, ml_m, ml_r) -dml_data_irm = make_irm_data(n_obs=10) -dml_data_iivm = make_iivm_data(n_obs=10) -dml_data_pliv = make_pliv_CHS2015(n_obs=10, dim_z=1) +dml_data_irm = make_irm_data(n_obs=50) +dml_data_iivm = make_iivm_data(n_obs=50) dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N=10, M=10) -(x, y, d, z) = make_iivm_data(n_obs=30, return_type="array") +(x, y, d, z) = make_iivm_data(n_obs=50, return_type="array") y[y > 0] = 1 y[y < 0] = 0 dml_data_irm_binary_outcome = DoubleMLData.from_arrays(x, y, d) @@ -31,13 +35,13 @@ def test_doubleml_exception_data(): msg = 'The data must be of DoubleMLData type.' with pytest.raises(TypeError, match=msg): - _ = DoubleMLPLR(pd.DataFrame(), ml_g, ml_m) + _ = DoubleMLPLR(pd.DataFrame(), ml_l, ml_m) # PLR with IV msg = (r'Incompatible data. Z1 have been set as instrumental variable\(s\). ' 'To fit a partially linear IV regression model use DoubleMLPLIV instead of DoubleMLPLR.') with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data_pliv, ml_g, ml_m) + _ = DoubleMLPLR(dml_data_pliv, ml_l, ml_m) # PLIV without IV msg = ('Incompatible data. ' @@ -55,7 +59,7 @@ def test_doubleml_exception_data(): msg = ('Incompatible data. To fit an IRM model with DML exactly one binary variable with values 0 and 1 ' 'needs to be specified as treatment variable.') df_irm = dml_data_irm.data.copy() - df_irm['d'] = df_irm['d']*2 + df_irm['d'] = df_irm['d'] * 2 with pytest.raises(ValueError, match=msg): # non-binary D for IRM _ = DoubleMLIRM(DoubleMLData(df_irm, 'y', 'd'), @@ -103,10 +107,10 @@ def test_doubleml_exception_data(): def test_doubleml_exception_scores(): msg = 'Invalid score IV. Valid score IV-type or partialling out.' with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, score='IV') + _ = DoubleMLPLR(dml_data, ml_l, ml_m, score='IV') msg = 'score should be either a string or a callable. 0 was passed.' with pytest.raises(TypeError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, score=0) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, score=0) msg = 'Invalid score IV. Valid score ATE or ATTE.' with pytest.raises(ValueError, match=msg): @@ -173,65 +177,71 @@ def test_doubleml_exception_subgroups(): def test_doubleml_exception_resampling(): msg = "The number of folds must be of int type. 1.5 of type was passed." with pytest.raises(TypeError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=1.5) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=1.5) msg = ('The number of repetitions for the sample splitting must be of int type. ' "1.5 of type was passed.") with pytest.raises(TypeError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, n_rep=1.5) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_rep=1.5) msg = 'The number of folds must be positive. 0 was passed.' with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=0) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=0) msg = 'The number of repetitions for the sample splitting must be positive. 0 was passed.' with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, n_rep=0) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, n_rep=0) msg = 'apply_cross_fitting must be True or False. Got 1.' with pytest.raises(TypeError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, apply_cross_fitting=1) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, apply_cross_fitting=1) msg = 'draw_sample_splitting must be True or False. Got true.' with pytest.raises(TypeError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, draw_sample_splitting='true') + _ = DoubleMLPLR(dml_data, ml_l, ml_m, draw_sample_splitting='true') @pytest.mark.ci def test_doubleml_exception_dml_procedure(): msg = 'dml_procedure must be "dml1" or "dml2". Got 1.' with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, dml_procedure='1') + _ = DoubleMLPLR(dml_data, ml_l, ml_m, dml_procedure='1') msg = 'dml_procedure must be "dml1" or "dml2". Got dml.' with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, dml_procedure='dml') + _ = DoubleMLPLR(dml_data, ml_l, ml_m, dml_procedure='dml') @pytest.mark.ci def test_doubleml_warning_crossfitting_onefold(): msg = 'apply_cross_fitting is set to False. Cross-fitting is not supported for n_folds = 1.' with pytest.warns(UserWarning, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, apply_cross_fitting=True, n_folds=1) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, apply_cross_fitting=True, n_folds=1) @pytest.mark.ci def test_doubleml_exception_no_cross_fit(): msg = 'Estimation without cross-fitting not supported for n_folds > 2.' with pytest.raises(AssertionError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, ml_m, apply_cross_fitting=False) + _ = DoubleMLPLR(dml_data, ml_l, ml_m, apply_cross_fitting=False) @pytest.mark.ci def test_doubleml_exception_get_params(): - msg = 'Invalid nuisance learner ml_r. Valid nuisance learner ml_g or ml_m.' + msg = 'Invalid nuisance learner ml_r. Valid nuisance learner ml_l or ml_m.' with pytest.raises(ValueError, match=msg): dml_plr.get_params('ml_r') + msg = 'Invalid nuisance learner ml_g. Valid nuisance learner ml_l or ml_m.' + with pytest.raises(ValueError, match=msg): + dml_plr.get_params('ml_g') + msg = 'Invalid nuisance learner ml_r. Valid nuisance learner ml_l or ml_m or ml_g.' + with pytest.raises(ValueError, match=msg): + dml_plr_iv_type.get_params('ml_r') @pytest.mark.ci def test_doubleml_exception_smpls(): msg = ('Sample splitting not specified. ' r'Either draw samples via .draw_sample splitting\(\) or set external samples via .set_sample_splitting\(\).') - dml_plr_no_smpls = DoubleMLPLR(dml_data, ml_g, ml_m, draw_sample_splitting=False) + dml_plr_no_smpls = DoubleMLPLR(dml_data, ml_l, ml_m, draw_sample_splitting=False) with pytest.raises(ValueError, match=msg): _ = dml_plr_no_smpls.smpls msg = 'Sample splitting not specified. Draw samples via .draw_sample splitting().' - dml_pliv_cluster_no_smpls = DoubleMLPLIV(dml_cluster_data_pliv, ml_g, ml_m, ml_r, draw_sample_splitting=False) + dml_pliv_cluster_no_smpls = DoubleMLPLIV(dml_cluster_data_pliv, ml_l, ml_m, ml_r, draw_sample_splitting=False) with pytest.raises(ValueError, match=msg): _ = dml_pliv_cluster_no_smpls.smpls_cluster with pytest.raises(ValueError, match=msg): @@ -253,7 +263,7 @@ def test_doubleml_exception_fit(): @pytest.mark.ci def test_doubleml_exception_bootstrap(): - dml_plr_boot = DoubleMLPLR(dml_data, ml_g, ml_m) + dml_plr_boot = DoubleMLPLR(dml_data, ml_l, ml_m) msg = r'Apply fit\(\) before bootstrap\(\).' with pytest.raises(ValueError, match=msg): dml_plr_boot.bootstrap() @@ -272,7 +282,7 @@ def test_doubleml_exception_bootstrap(): @pytest.mark.ci def test_doubleml_exception_confint(): - dml_plr_confint = DoubleMLPLR(dml_data, ml_g, ml_m) + dml_plr_confint = DoubleMLPLR(dml_data, ml_l, ml_m) msg = 'joint must be True or False. Got 1.' with pytest.raises(TypeError, match=msg): @@ -300,7 +310,7 @@ def test_doubleml_exception_confint(): @pytest.mark.ci def test_doubleml_exception_p_adjust(): - dml_plr_p_adjust = DoubleMLPLR(dml_data, ml_g, ml_m) + dml_plr_p_adjust = DoubleMLPLR(dml_data, ml_l, ml_m) msg = r'Apply fit\(\) before p_adjust\(\).' with pytest.raises(ValueError, match=msg): @@ -320,19 +330,50 @@ def test_doubleml_exception_p_adjust(): @pytest.mark.ci def test_doubleml_exception_tune(): - - msg = r'Invalid param_grids \[0.05, 0.5\]. param_grids must be a dictionary with keys ml_g and ml_m' + msg = r'Invalid param_grids \[0.05, 0.5\]. param_grids must be a dictionary with keys ml_l and ml_m' with pytest.raises(ValueError, match=msg): dml_plr.tune([0.05, 0.5]) - msg = (r"Invalid param_grids {'ml_g': {'alpha': \[0.05, 0.5\]}}. " - "param_grids must be a dictionary with keys ml_g and ml_m.") - with pytest.raises(ValueError, match=msg): - dml_plr.tune({'ml_g': {'alpha': [0.05, 0.5]}}) - - param_grids = {'ml_g': {'alpha': [0.05, 0.5]}, 'ml_m': {'alpha': [0.05, 0.5]}} + msg = (r"Invalid param_grids {'ml_r': {'alpha': \[0.05, 0.5\]}}. " + "param_grids must be a dictionary with keys ml_l and ml_m.") + with pytest.raises(ValueError, match=msg): + dml_plr.tune({'ml_r': {'alpha': [0.05, 0.5]}}) + + msg = r'Invalid param_grids \[0.05, 0.5\]. param_grids must be a dictionary with keys ml_l and ml_m and ml_g' + with pytest.raises(ValueError, match=msg): + dml_plr_iv_type.tune([0.05, 0.5]) + msg = (r"Invalid param_grids {'ml_g': {'alpha': \[0.05, 0.5\]}, 'ml_m': {'alpha': \[0.05, 0.5\]}}. " + "param_grids must be a dictionary with keys ml_l and ml_m and ml_g.") + with pytest.raises(ValueError, match=msg): + dml_plr_iv_type.tune({'ml_g': {'alpha': [0.05, 0.5]}, + 'ml_m': {'alpha': [0.05, 0.5]}}) + + msg = 'Learner ml_g was renamed to ml_l. ' + with pytest.warns(DeprecationWarning, match=msg): + dml_plr.tune({'ml_g': {'alpha': [0.05, 0.5]}, + 'ml_m': {'alpha': [0.05, 0.5]}}) + with pytest.warns(DeprecationWarning, match=msg): + dml_plr.tune({'ml_l': {'alpha': [0.05, 0.5]}, + 'ml_m': {'alpha': [0.05, 0.5]}}, + scoring_methods={'ml_g': 'explained_variance', + 'ml_m': 'explained_variance'}) + + msg = 'Learner ml_g was renamed to ml_l. ' + with pytest.warns(DeprecationWarning, match=msg): + dml_pliv.tune({'ml_g': {'alpha': [0.05, 0.5]}, + 'ml_m': {'alpha': [0.05, 0.5]}, + 'ml_r': {'alpha': [0.05, 0.5]}}) + with pytest.warns(DeprecationWarning, match=msg): + dml_pliv.tune({'ml_l': {'alpha': [0.05, 0.5]}, + 'ml_m': {'alpha': [0.05, 0.5]}, + 'ml_r': {'alpha': [0.05, 0.5]}}, + scoring_methods={'ml_g': 'explained_variance', + 'ml_m': 'explained_variance', + 'ml_r': 'explained_variance'}) + + param_grids = {'ml_l': {'alpha': [0.05, 0.5]}, 'ml_m': {'alpha': [0.05, 0.5]}} msg = ('Invalid scoring_methods neg_mean_absolute_error. ' 'scoring_methods must be a dictionary. ' - 'Valid keys are ml_g and ml_m.') + 'Valid keys are ml_l and ml_m.') with pytest.raises(ValueError, match=msg): dml_plr.tune(param_grids, scoring_methods='neg_mean_absolute_error') @@ -374,13 +415,12 @@ def test_doubleml_exception_tune(): @pytest.mark.ci def test_doubleml_exception_set_ml_nuisance_params(): - - msg = 'Invalid nuisance learner g. Valid nuisance learner ml_g or ml_m.' + msg = 'Invalid nuisance learner g. Valid nuisance learner ml_l or ml_m.' with pytest.raises(ValueError, match=msg): dml_plr.set_ml_nuisance_params('g', 'd', {'alpha': 0.1}) msg = 'Invalid treatment variable y. Valid treatment variable d.' with pytest.raises(ValueError, match=msg): - dml_plr.set_ml_nuisance_params('ml_g', 'y', {'alpha': 0.1}) + dml_plr.set_ml_nuisance_params('ml_l', 'y', {'alpha': 0.1}) class _DummyNoSetParams: @@ -412,8 +452,8 @@ def predict(self, X): @pytest.mark.ci def test_doubleml_exception_learner(): - err_msg_prefix = 'Invalid learner provided for ml_g: ' - warn_msg_prefix = 'Learner provided for ml_g is probably invalid: ' + err_msg_prefix = 'Invalid learner provided for ml_l: ' + warn_msg_prefix = 'Learner provided for ml_l is probably invalid: ' msg = err_msg_prefix + 'provide an instance of a learner instead of a class.' with pytest.raises(TypeError, match=msg): @@ -432,7 +472,7 @@ def test_doubleml_exception_learner(): with pytest.warns(UserWarning): _ = DoubleMLIRM(dml_data_irm, Lasso(), _DummyNoClassifier()) - # ToDo: Currently for ml_g (and others) we only check whether the learner can be identified as regressor. However, + # ToDo: Currently for ml_l (and others) we only check whether the learner can be identified as regressor. However, # we do not check whether it can instead be identified as classifier, which could be used to throw an error. msg = warn_msg_prefix + r'LogisticRegression\(\) is \(probably\) no regressor.' with pytest.warns(UserWarning, match=msg): @@ -444,6 +484,31 @@ def test_doubleml_exception_learner(): with pytest.raises(ValueError, match=msg): _ = DoubleMLPLR(dml_data, Lasso(), LogisticRegression()) + msg = 'ml_g was renamed to ml_l' + with pytest.warns(DeprecationWarning, match=msg): + _ = DoubleMLPLR(dml_data, ml_g=Lasso(), ml_m=ml_m) # pylint: disable=no-value-for-parameter + + msg = r"For score = 'IV-type', learners ml_l and ml_g should be specified. Set ml_g = clone\(ml_l\)." + with pytest.warns(UserWarning, match=msg): + _ = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=ml_m, score='IV-type') + + msg = 'A learner ml_g has been provided for score = "partialling out" but will be ignored.' + with pytest.warns(UserWarning, match=msg): + _ = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), ml_g=Lasso(), score='partialling out') + + msg = 'ml_g was renamed to ml_l' + with pytest.warns(DeprecationWarning, match=msg): + _ = DoubleMLPLIV(dml_data_pliv, ml_g=Lasso(), ml_m=ml_m, ml_r=ml_r) # pylint: disable=no-value-for-parameter + + msg = "For score = 'IV-type', learners ml_l, ml_m, ml_r and ml_g need to be specified." + with pytest.raises(ValueError, match=msg): + _ = DoubleMLPLIV(dml_data_pliv, ml_l=ml_l, ml_m=ml_m, ml_r=ml_r, + score='IV-type') + + msg = 'A learner ml_g has been provided for score = "partialling out" but will be ignored.' + with pytest.warns(UserWarning, match=msg): + _ = DoubleMLPLIV(dml_data_pliv, ml_l=Lasso(), ml_m=Lasso(), ml_r=Lasso(), ml_g=Lasso(), score='partialling out') + # we allow classifiers for ml_g for binary treatment variables in IRM msg = (r'The ml_g learner LogisticRegression\(\) was identified as classifier ' 'but the outcome variable is not binary with values 0 and 1.') @@ -503,6 +568,14 @@ def test_doubleml_exception_learner(): dml_iivm_hidden_classifier.set_ml_nuisance_params('ml_g0', 'd', {'max_iter': 314}) dml_iivm_hidden_classifier.fit() + msg = 'Learner ml_g was renamed to ml_l. ' + with pytest.warns(DeprecationWarning, match=msg): + dml_plr.set_ml_nuisance_params('ml_g', 'd', {'max_iter': 314}) + + msg = 'Learner ml_g was renamed to ml_l. ' + with pytest.warns(DeprecationWarning, match=msg): + dml_pliv.set_ml_nuisance_params('ml_g', 'd', {'max_iter': 314}) + @pytest.mark.ci @pytest.mark.filterwarnings("ignore:Learner provided for") @@ -566,17 +639,16 @@ def predict(self, X): @pytest.mark.ci def test_doubleml_nan_prediction(): - - msg = r'Predictions from learner LassoWithNanPred\(\) for ml_g are not finite.' + msg = r'Predictions from learner LassoWithNanPred\(\) for ml_l are not finite.' with pytest.raises(ValueError, match=msg): _ = DoubleMLPLR(dml_data, LassoWithNanPred(), ml_m).fit() - msg = r'Predictions from learner LassoWithInfPred\(\) for ml_g are not finite.' + msg = r'Predictions from learner LassoWithInfPred\(\) for ml_l are not finite.' with pytest.raises(ValueError, match=msg): _ = DoubleMLPLR(dml_data, LassoWithInfPred(), ml_m).fit() msg = r'Predictions from learner LassoWithNanPred\(\) for ml_m are not finite.' with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, LassoWithNanPred()).fit() + _ = DoubleMLPLR(dml_data, ml_l, LassoWithNanPred()).fit() msg = r'Predictions from learner LassoWithInfPred\(\) for ml_m are not finite.' with pytest.raises(ValueError, match=msg): - _ = DoubleMLPLR(dml_data, ml_g, LassoWithInfPred()).fit() + _ = DoubleMLPLR(dml_data, ml_l, LassoWithInfPred()).fit() diff --git a/doubleml/tests/test_doubleml_scores.py b/doubleml/tests/test_doubleml_scores.py index 3a4a26ad..8d0a04fb 100644 --- a/doubleml/tests/test_doubleml_scores.py +++ b/doubleml/tests/test_doubleml_scores.py @@ -14,6 +14,8 @@ dml_plr = DoubleMLPLR(dml_data_plr, Lasso(), Lasso()) dml_plr.fit() +dml_plr_iv_type = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(), score='IV-type') +dml_plr_iv_type.fit() dml_pliv = DoubleMLPLIV(dml_data_pliv, Lasso(), Lasso(), Lasso()) dml_pliv.fit() dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression()) @@ -28,6 +30,12 @@ dml_plr_callable_score.set_sample_splitting(dml_plr.smpls) dml_plr_callable_score.fit(store_predictions=True) +plr_iv_type_score = dml_plr_iv_type._score_elements +dml_plr_iv_type_callable_score = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(), + score=plr_iv_type_score, draw_sample_splitting=False) +dml_plr_iv_type_callable_score.set_sample_splitting(dml_plr_iv_type.smpls) +dml_plr_iv_type_callable_score.fit(store_predictions=True) + irm_score = dml_irm._score_elements dml_irm_callable_score = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), score=irm_score, draw_sample_splitting=False) @@ -41,9 +49,37 @@ dml_iivm_callable_score.fit(store_predictions=True) +def non_orth_score_w_g(y, d, l_hat, m_hat, g_hat, smpls): + u_hat = y - g_hat + psi_a = -np.multiply(d, d) + psi_b = np.multiply(d, u_hat) + return psi_a, psi_b + + +def non_orth_score_w_l(y, d, l_hat, m_hat, g_hat, smpls): + p_a = -np.multiply(d - m_hat, d - m_hat) + p_b = np.multiply(d - m_hat, y - l_hat) + theta_initial = -np.nanmean(p_b) / np.nanmean(p_a) + g_hat = l_hat - theta_initial * np.multiply(d, m_hat) + + u_hat = y - g_hat + psi_a = -np.multiply(d, d) + psi_b = np.multiply(d, u_hat) + return psi_a, psi_b + + +dml_plr_non_orth_score_w_g = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), Lasso(), + score=non_orth_score_w_g) +dml_plr_non_orth_score_w_g.fit(store_predictions=True) + +dml_plr_non_orth_score_w_l = DoubleMLPLR(dml_data_plr, Lasso(), Lasso(), + score=non_orth_score_w_l) +dml_plr_non_orth_score_w_l.fit(store_predictions=True) + + @pytest.mark.ci @pytest.mark.parametrize('dml_obj', - [dml_plr, dml_pliv, dml_irm, dml_iivm]) + [dml_plr, dml_plr_iv_type, dml_pliv, dml_irm, dml_iivm]) def test_linear_score(dml_obj): assert np.allclose(dml_obj.psi, dml_obj.psi_a * dml_obj.coef + dml_obj.psi_b, @@ -63,10 +99,11 @@ def test_plr_callable_vs_str_score(): @pytest.mark.ci def test_plr_callable_vs_pred_export(): preds = dml_plr_callable_score.predictions - g_hat = preds['ml_g'].squeeze() + l_hat = preds['ml_l'].squeeze() m_hat = preds['ml_m'].squeeze() + g_hat = None psi_a, psi_b = plr_score(dml_data_plr.y, dml_data_plr.d, - g_hat, m_hat, + l_hat, m_hat, g_hat, dml_plr_callable_score.smpls[0]) assert np.allclose(dml_plr.psi_a.squeeze(), psi_a, @@ -76,6 +113,67 @@ def test_plr_callable_vs_pred_export(): rtol=1e-9, atol=1e-4) +@pytest.mark.ci +def test_plr_iv_type_callable_vs_str_score(): + assert np.allclose(dml_plr_iv_type.psi, + dml_plr_iv_type_callable_score.psi, + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_plr_iv_type.coef, + dml_plr_iv_type_callable_score.coef, + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_plr_iv_type_callable_vs_pred_export(): + preds = dml_plr_iv_type_callable_score.predictions + l_hat = preds['ml_l'].squeeze() + m_hat = preds['ml_m'].squeeze() + g_hat = preds['ml_g'].squeeze() + psi_a, psi_b = plr_iv_type_score(dml_data_plr.y, dml_data_plr.d, + l_hat, m_hat, g_hat, + dml_plr_iv_type_callable_score.smpls[0]) + assert np.allclose(dml_plr_iv_type.psi_a.squeeze(), + psi_a, + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_plr_iv_type.psi_b.squeeze(), + psi_b, + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_plr_non_orth_score_w_g_callable_vs_pred_export(): + preds = dml_plr_non_orth_score_w_g.predictions + l_hat = preds['ml_l'].squeeze() + m_hat = preds['ml_m'].squeeze() + g_hat = preds['ml_g'].squeeze() + psi_a, psi_b = non_orth_score_w_g(dml_data_plr.y, dml_data_plr.d, + l_hat, m_hat, g_hat, + dml_plr_non_orth_score_w_g.smpls[0]) + assert np.allclose(dml_plr_non_orth_score_w_g.psi_a.squeeze(), + psi_a, + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_plr_non_orth_score_w_g.psi_b.squeeze(), + psi_b, + rtol=1e-9, atol=1e-4) + + +@pytest.mark.ci +def test_plr_non_orth_score_w_l_callable_vs_pred_export(): + preds = dml_plr_non_orth_score_w_l.predictions + l_hat = preds['ml_l'].squeeze() + m_hat = preds['ml_m'].squeeze() + g_hat = None + psi_a, psi_b = non_orth_score_w_l(dml_data_plr.y, dml_data_plr.d, + l_hat, m_hat, g_hat, + dml_plr_non_orth_score_w_l.smpls[0]) + assert np.allclose(dml_plr_non_orth_score_w_l.psi_a.squeeze(), + psi_a, + rtol=1e-9, atol=1e-4) + assert np.allclose(dml_plr_non_orth_score_w_l.psi_b.squeeze(), + psi_b, + rtol=1e-9, atol=1e-4) + + @pytest.mark.ci def test_irm_callable_vs_str_score(): assert np.allclose(dml_irm.psi, diff --git a/doubleml/tests/test_doubleml_set_sample_splitting.py b/doubleml/tests/test_doubleml_set_sample_splitting.py index fa7d65f7..e05436e4 100644 --- a/doubleml/tests/test_doubleml_set_sample_splitting.py +++ b/doubleml/tests/test_doubleml_set_sample_splitting.py @@ -8,9 +8,9 @@ np.random.seed(3141) dml_data = make_plr_CCDDHNR2018(n_obs=10) -ml_g = Lasso() +ml_l = Lasso() ml_m = Lasso() -dml_plr = DoubleMLPLR(dml_data, ml_g, ml_m, +dml_plr = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=7, n_rep=8, draw_sample_splitting=False) @@ -172,9 +172,9 @@ def test_doubleml_set_sample_splitting_all_list(): @pytest.mark.ci def test_doubleml_draw_vs_set(): np.random.seed(3141) - dml_plr_set = DoubleMLPLR(dml_data, ml_g, ml_m, n_folds=7, n_rep=8) + dml_plr_set = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=7, n_rep=8) - dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, + dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=1, n_rep=1, apply_cross_fitting=False) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) @@ -183,7 +183,7 @@ def test_doubleml_draw_vs_set(): dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0][0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) - dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, + dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1, apply_cross_fitting=False) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) @@ -192,26 +192,26 @@ def test_doubleml_draw_vs_set(): dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0][0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) - dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, + dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1, apply_cross_fitting=True) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) - dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, + dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=5, n_rep=1, apply_cross_fitting=True) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls[0]) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) - dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, + dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=5, n_rep=3, apply_cross_fitting=True) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) - dml_plr_drawn = DoubleMLPLR(dml_data, ml_g, ml_m, + dml_plr_drawn = DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=4, apply_cross_fitting=False) dml_plr_set.set_sample_splitting(dml_plr_drawn.smpls) _assert_resampling_pars(dml_plr_drawn, dml_plr_set) diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py index 9e18f28b..73b66b27 100644 --- a/doubleml/tests/test_multiway_cluster.py +++ b/doubleml/tests/test_multiway_cluster.py @@ -2,14 +2,13 @@ import pytest import math -from sklearn.base import clone - from sklearn.linear_model import LinearRegression, Lasso from sklearn.ensemble import RandomForestRegressor import doubleml as dml from doubleml.datasets import make_pliv_multiway_cluster_CKMS2021 +from ._utils import _clone from ._utils_cluster import DoubleMLMultiwayResampling, var_one_way_cluster, est_one_way_cluster_dml2,\ est_two_way_cluster_dml2, var_two_way_cluster from ._utils_pliv_manual import fit_pliv, compute_pliv_residuals @@ -39,6 +38,12 @@ def learner(request): return request.param +@pytest.fixture(scope='module', + params=['partialling out', 'IV-type']) +def score(request): + return request.param + + @pytest.fixture(scope='module', params=['dml1', 'dml2']) def dml_procedure(request): @@ -55,10 +60,10 @@ def dml_pliv_multiway_cluster_old_vs_new_fixture(generate_data_iv, learner): obj_dml_multiway_resampling = DoubleMLMultiwayResampling(n_folds, smpl_sizes) _, smpls_lin_ind = obj_dml_multiway_resampling.split_samples() - # Set machine learning methods for g, m & r - ml_g = clone(learner) - ml_m = clone(learner) - ml_r = clone(learner) + # Set machine learning methods for l, m & r + ml_l = _clone(learner) + ml_m = _clone(learner) + ml_r = _clone(learner) df = obj_dml_cluster_data.data.set_index(['cluster_var_i', 'cluster_var_j']) obj_dml_data = dml.DoubleMLData(df, @@ -68,8 +73,8 @@ def dml_pliv_multiway_cluster_old_vs_new_fixture(generate_data_iv, learner): z_cols=obj_dml_cluster_data.z_cols) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure, draw_sample_splitting=False) dml_pliv_obj.set_sample_splitting(smpls_lin_ind) @@ -78,8 +83,8 @@ def dml_pliv_multiway_cluster_old_vs_new_fixture(generate_data_iv, learner): np.random.seed(3141) dml_pliv_obj_cluster = dml.DoubleMLPLIV(obj_dml_cluster_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj_cluster.fit() @@ -97,20 +102,23 @@ def test_dml_pliv_multiway_cluster_old_vs_new_coef(dml_pliv_multiway_cluster_old @pytest.fixture(scope='module') -def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, dml_procedure): +def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, score, dml_procedure): n_folds = 2 n_rep = 2 - score = 'partialling out' - # Set machine learning methods for g, m & r - ml_g = clone(learner) - ml_m = clone(learner) - ml_r = clone(learner) + # Set machine learning methods for l, m, r & g + ml_l = _clone(learner) + ml_m = _clone(learner) + ml_r = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_cluster_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, n_rep=n_rep, score=score, dml_procedure=dml_procedure) @@ -125,28 +133,44 @@ def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, dml_procedure): z = np.ravel(obj_dml_cluster_data.z) res_manual = fit_pliv(y, x, d, z, - clone(learner), clone(learner), clone(learner), + _clone(learner), _clone(learner), _clone(learner), _clone(learner), dml_pliv_obj.smpls, dml_procedure, score, n_rep=n_rep) thetas = np.full(n_rep, np.nan) ses = np.full(n_rep, np.nan) for i_rep in range(n_rep): - g_hat = res_manual['all_g_hat'][i_rep] + l_hat = res_manual['all_l_hat'][i_rep] m_hat = res_manual['all_m_hat'][i_rep] r_hat = res_manual['all_r_hat'][i_rep] + g_hat = res_manual['all_g_hat'][i_rep] smpls_one_split = dml_pliv_obj.smpls[i_rep] - u_hat, v_hat, w_hat = compute_pliv_residuals(y, d, z, g_hat, m_hat, r_hat, smpls_one_split) - - psi_a = -np.multiply(v_hat, w_hat) - if dml_procedure == 'dml2': - psi_b = np.multiply(v_hat, u_hat) - theta = est_two_way_cluster_dml2(psi_a, psi_b, - obj_dml_cluster_data.cluster_vars[:, 0], - obj_dml_cluster_data.cluster_vars[:, 1], - smpls_one_split) + y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals( + y, d, z, l_hat, m_hat, r_hat, g_hat, smpls_one_split) + + if score == 'partialling out': + psi_a = -np.multiply(z_minus_m_hat, d_minus_r_hat) + if dml_procedure == 'dml2': + psi_b = np.multiply(z_minus_m_hat, y_minus_l_hat) + theta = est_two_way_cluster_dml2(psi_a, psi_b, + obj_dml_cluster_data.cluster_vars[:, 0], + obj_dml_cluster_data.cluster_vars[:, 1], + smpls_one_split) + else: + theta = res_manual['thetas'][i_rep] + psi = np.multiply(y_minus_l_hat - d_minus_r_hat * theta, z_minus_m_hat) else: - theta = res_manual['thetas'][i_rep] - psi = np.multiply(u_hat - w_hat * theta, v_hat) + assert score == 'IV-type' + psi_a = -np.multiply(z_minus_m_hat, d) + if dml_procedure == 'dml2': + psi_b = np.multiply(z_minus_m_hat, y_minus_g_hat) + theta = est_two_way_cluster_dml2(psi_a, psi_b, + obj_dml_cluster_data.cluster_vars[:, 0], + obj_dml_cluster_data.cluster_vars[:, 1], + smpls_one_split) + else: + theta = res_manual['thetas'][i_rep] + psi = np.multiply(y_minus_g_hat - d * theta, z_minus_m_hat) + var = var_two_way_cluster(psi, psi_a, obj_dml_cluster_data.cluster_vars[:, 0], obj_dml_cluster_data.cluster_vars[:, 1], @@ -184,19 +208,22 @@ def test_dml_pliv_multiway_cluster_se(dml_pliv_multiway_cluster_fixture): @pytest.fixture(scope='module') -def dml_pliv_oneway_cluster_fixture(generate_data_iv, learner, dml_procedure): +def dml_pliv_oneway_cluster_fixture(generate_data_iv, learner, score, dml_procedure): n_folds = 3 - score = 'partialling out' - # Set machine learning methods for g, m & r - ml_g = clone(learner) - ml_m = clone(learner) - ml_r = clone(learner) + # Set machine learning methods for l, m, r & g + ml_l = _clone(learner) + ml_m = _clone(learner) + ml_r = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_oneway_cluster_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, score=score, dml_procedure=dml_procedure) @@ -210,23 +237,38 @@ def dml_pliv_oneway_cluster_fixture(generate_data_iv, learner, dml_procedure): z = np.ravel(obj_dml_oneway_cluster_data.z) res_manual = fit_pliv(y, x, d, z, - clone(learner), clone(learner), clone(learner), + _clone(learner), _clone(learner), _clone(learner), _clone(learner), dml_pliv_obj.smpls, dml_procedure, score) - g_hat = res_manual['all_g_hat'][0] + l_hat = res_manual['all_l_hat'][0] m_hat = res_manual['all_m_hat'][0] r_hat = res_manual['all_r_hat'][0] + g_hat = res_manual['all_g_hat'][0] smpls_one_split = dml_pliv_obj.smpls[0] - u_hat, v_hat, w_hat = compute_pliv_residuals(y, d, z, g_hat, m_hat, r_hat, smpls_one_split) - - psi_a = -np.multiply(v_hat, w_hat) - if dml_procedure == 'dml2': - psi_b = np.multiply(v_hat, u_hat) - theta = est_one_way_cluster_dml2(psi_a, psi_b, - obj_dml_oneway_cluster_data.cluster_vars[:, 0], - smpls_one_split) + y_minus_l_hat, z_minus_m_hat, d_minus_r_hat, y_minus_g_hat = compute_pliv_residuals( + y, d, z, l_hat, m_hat, r_hat, g_hat, smpls_one_split) + + if score == 'partialling out': + psi_a = -np.multiply(z_minus_m_hat, d_minus_r_hat) + if dml_procedure == 'dml2': + psi_b = np.multiply(z_minus_m_hat, y_minus_l_hat) + theta = est_one_way_cluster_dml2(psi_a, psi_b, + obj_dml_oneway_cluster_data.cluster_vars[:, 0], + smpls_one_split) + else: + theta = res_manual['theta'] + psi = np.multiply(y_minus_l_hat - d_minus_r_hat * theta, z_minus_m_hat) else: - theta = res_manual['theta'] - psi = np.multiply(u_hat - w_hat * theta, v_hat) + assert score == 'IV-type' + psi_a = -np.multiply(z_minus_m_hat, d) + if dml_procedure == 'dml2': + psi_b = np.multiply(z_minus_m_hat, y_minus_g_hat) + theta = est_one_way_cluster_dml2(psi_a, psi_b, + obj_dml_oneway_cluster_data.cluster_vars[:, 0], + smpls_one_split) + else: + theta = res_manual['theta'] + psi = np.multiply(y_minus_g_hat - d * theta, z_minus_m_hat) + var = var_one_way_cluster(psi, psi_a, obj_dml_oneway_cluster_data.cluster_vars[:, 0], smpls_one_split) @@ -263,15 +305,15 @@ def dml_plr_cluster_with_index(generate_data1, learner, dml_procedure): data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for m & l + ml_l = _clone(learner) + ml_m = _clone(learner) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) np.random.seed(3141) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, - n_folds, + ml_l, ml_m, + n_folds=n_folds, dml_procedure=dml_procedure) dml_plr_obj.fit() @@ -283,8 +325,8 @@ def dml_plr_cluster_with_index(generate_data1, learner, dml_procedure): cluster_cols='index') np.random.seed(3141) dml_plr_cluster_obj = dml.DoubleMLPLR(dml_cluster_data, - ml_g, ml_m, - n_folds, + ml_l, ml_m, + n_folds=n_folds, dml_procedure=dml_procedure) dml_plr_cluster_obj.fit() diff --git a/doubleml/tests/test_pliv.py b/doubleml/tests/test_pliv.py index a2f9a4ac..34fdca8b 100644 --- a/doubleml/tests/test_pliv.py +++ b/doubleml/tests/test_pliv.py @@ -22,7 +22,7 @@ def learner(request): @pytest.fixture(scope='module', - params=['partialling out']) + params=['partialling out', 'IV-type']) def score(request): return request.param @@ -43,17 +43,27 @@ def dml_pliv_fixture(generate_data_iv, learner, score, dml_procedure): data = generate_data_iv x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for g, m & r - ml_g = clone(learner) + # Set machine learning methods for l, m, r & g + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) + ml_g = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1') - dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, - dml_procedure=dml_procedure) + if score == 'partialling out': + dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, + ml_l, ml_m, ml_r, + n_folds=n_folds, + score=score, + dml_procedure=dml_procedure) + else: + assert score == 'IV-type' + dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, + score=score, + dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -66,7 +76,8 @@ def dml_pliv_fixture(generate_data_iv, learner, score, dml_procedure): all_smpls = draw_smpls(n_obs, n_folds) res_manual = fit_pliv(y, x, d, z, - clone(learner), clone(learner), clone(learner), all_smpls, dml_procedure, score) + clone(learner), clone(learner), clone(learner), clone(learner), + all_smpls, dml_procedure, score) res_dict = {'coef': dml_pliv_obj.coef, 'coef_manual': res_manual['theta'], @@ -77,7 +88,8 @@ def dml_pliv_fixture(generate_data_iv, learner, score, dml_procedure): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv(y, d, z, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], + res_manual['all_g_hat'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) diff --git a/doubleml/tests/test_pliv_no_cross_fit.py b/doubleml/tests/test_pliv_no_cross_fit.py index 7265742f..f014009b 100644 --- a/doubleml/tests/test_pliv_no_cross_fit.py +++ b/doubleml/tests/test_pliv_no_cross_fit.py @@ -2,13 +2,11 @@ import pytest import math -from sklearn.base import clone - from sklearn.ensemble import RandomForestRegressor import doubleml as dml -from ._utils import draw_smpls +from ._utils import draw_smpls, _clone from ._utils_pliv_manual import fit_pliv, boot_pliv @@ -19,7 +17,7 @@ def learner(request): @pytest.fixture(scope='module', - params=['partialling out']) + params=['partialling out', 'IV-type']) def score(request): return request.param @@ -40,16 +38,21 @@ def dml_pliv_no_cross_fit_fixture(generate_data_iv, learner, score, n_folds): data = generate_data_iv x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for g, m & r - ml_g = clone(learner) - ml_m = clone(learner) - ml_r = clone(learner) + # Set machine learning methods for l, m, r & g + ml_l = _clone(learner) + ml_m = _clone(learner) + ml_r = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1') dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, + score=score, dml_procedure=dml_procedure, apply_cross_fitting=False) @@ -69,7 +72,8 @@ def dml_pliv_no_cross_fit_fixture(generate_data_iv, learner, score, n_folds): smpls = [smpls[0]] res_manual = fit_pliv(y, x, d, z, - clone(learner), clone(learner), clone(learner), [smpls], dml_procedure, score) + _clone(learner), _clone(learner), _clone(learner), _clone(learner), + [smpls], dml_procedure, score) res_dict = {'coef': dml_pliv_obj.coef, 'coef_manual': res_manual['theta'], @@ -80,7 +84,8 @@ def dml_pliv_no_cross_fit_fixture(generate_data_iv, learner, score, n_folds): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv(y, d, z, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], + res_manual['all_r_hat'], res_manual['all_g_hat'], [smpls], score, bootstrap, n_rep_boot, apply_cross_fitting=False) diff --git a/doubleml/tests/test_pliv_partial_x.py b/doubleml/tests/test_pliv_partial_x.py index 49affaf3..63f5b52f 100644 --- a/doubleml/tests/test_pliv_partial_x.py +++ b/doubleml/tests/test_pliv_partial_x.py @@ -39,15 +39,15 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner, score, dml_ # collect data obj_dml_data = generate_data_pliv_partialX - # Set machine learning methods for g, m & r - ml_g = clone(learner) + # Set machine learning methods for l, m & r + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialX(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -73,7 +73,7 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner, score, dml_ for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv_partial_x(y, d, z, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], all_smpls, score, bootstrap, n_rep_boot) diff --git a/doubleml/tests/test_pliv_partial_x_tune.py b/doubleml/tests/test_pliv_partial_x_tune.py index 88818ced..a1cedd0e 100644 --- a/doubleml/tests/test_pliv_partial_x_tune.py +++ b/doubleml/tests/test_pliv_partial_x_tune.py @@ -15,7 +15,7 @@ @pytest.fixture(scope='module', params=[ElasticNet()]) -def learner_g(request): +def learner_l(request): return request.param @@ -59,9 +59,9 @@ def get_par_grid(learner): @pytest.fixture(scope='module') -def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_g, learner_m, learner_r, score, +def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_l, learner_m, learner_r, score, dml_procedure, tune_on_folds): - par_grid = {'ml_g': get_par_grid(learner_g), + par_grid = {'ml_l': get_par_grid(learner_l), 'ml_m': get_par_grid(learner_m), 'ml_r': get_par_grid(learner_r)} n_folds_tune = 4 @@ -73,15 +73,15 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_g, learner_m # collect data obj_dml_data = generate_data_pliv_partialX - # Set machine learning methods for g, m & r - ml_g = clone(learner_g) + # Set machine learning methods for l, m & r + ml_l = clone(learner_l) ml_m = clone(learner_m) ml_r = clone(learner_r) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialX(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure) # tune hyperparameters @@ -99,32 +99,32 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_g, learner_m smpls = all_smpls[0] if tune_on_folds: - g_params, m_params, r_params = tune_nuisance_pliv_partial_x(y, x, d, z, - clone(learner_g), + l_params, m_params, r_params = tune_nuisance_pliv_partial_x(y, x, d, z, + clone(learner_l), clone(learner_m), clone(learner_r), smpls, n_folds_tune, - par_grid['ml_g'], + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r']) else: xx = [(np.arange(len(y)), np.array([]))] - g_params, m_params, r_params = tune_nuisance_pliv_partial_x(y, x, d, z, - clone(learner_g), + l_params, m_params, r_params = tune_nuisance_pliv_partial_x(y, x, d, z, + clone(learner_l), clone(learner_m), clone(learner_r), xx, n_folds_tune, - par_grid['ml_g'], + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r']) - g_params = g_params * n_folds + l_params = l_params * n_folds m_params = [xx * n_folds for xx in m_params] r_params = r_params * n_folds res_manual = fit_pliv_partial_x(y, x, d, z, - clone(learner_g), clone(learner_m), clone(learner_r), + clone(learner_l), clone(learner_m), clone(learner_r), all_smpls, dml_procedure, score, - g_params=g_params, m_params=m_params, r_params=r_params) + l_params=l_params, m_params=m_params, r_params=r_params) res_dict = {'coef': dml_pliv_obj.coef, 'coef_manual': res_manual['theta'], @@ -135,7 +135,7 @@ def dml_pliv_partial_x_fixture(generate_data_pliv_partialX, learner_g, learner_m for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv_partial_x(y, d, z, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], all_smpls, score, bootstrap, n_rep_boot) diff --git a/doubleml/tests/test_pliv_partial_xz.py b/doubleml/tests/test_pliv_partial_xz.py index 961929c8..1cff6904 100644 --- a/doubleml/tests/test_pliv_partial_xz.py +++ b/doubleml/tests/test_pliv_partial_xz.py @@ -39,15 +39,15 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner, score, dm # collect data obj_dml_data = generate_data_pliv_partialXZ - # Set machine learning methods for g, m & r - ml_g = clone(learner) + # Set machine learning methods for l, m & r + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -73,7 +73,7 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner, score, dm for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv_partial_xz(y, d, z, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], all_smpls, score, bootstrap, n_rep_boot) diff --git a/doubleml/tests/test_pliv_partial_xz_tune.py b/doubleml/tests/test_pliv_partial_xz_tune.py index 28f5739f..389fb418 100644 --- a/doubleml/tests/test_pliv_partial_xz_tune.py +++ b/doubleml/tests/test_pliv_partial_xz_tune.py @@ -15,7 +15,7 @@ @pytest.fixture(scope='module', params=[ElasticNet()]) -def learner_g(request): +def learner_l(request): return request.param @@ -59,9 +59,9 @@ def get_par_grid(learner): @pytest.fixture(scope='module') -def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_g, learner_m, learner_r, score, +def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_l, learner_m, learner_r, score, dml_procedure, tune_on_folds): - par_grid = {'ml_g': get_par_grid(learner_g), + par_grid = {'ml_l': get_par_grid(learner_l), 'ml_m': get_par_grid(learner_m), 'ml_r': get_par_grid(learner_r)} n_folds_tune = 4 @@ -73,15 +73,15 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_g, learner # collect data obj_dml_data = generate_data_pliv_partialXZ - # Set machine learning methods for g, m & r - ml_g = clone(learner_g) + # Set machine learning methods for l, m & r + ml_l = clone(learner_l) ml_m = clone(learner_m) ml_r = clone(learner_r) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure) # tune hyperparameters @@ -99,32 +99,32 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_g, learner smpls = all_smpls[0] if tune_on_folds: - g_params, m_params, r_params = tune_nuisance_pliv_partial_xz(y, x, d, z, - clone(learner_g), + l_params, m_params, r_params = tune_nuisance_pliv_partial_xz(y, x, d, z, + clone(learner_l), clone(learner_m), clone(learner_r), smpls, n_folds_tune, - par_grid['ml_g'], + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r']) else: xx = [(np.arange(len(y)), np.arange(len(y)))] - g_params, m_params, r_params = tune_nuisance_pliv_partial_xz(y, x, d, z, - clone(learner_g), + l_params, m_params, r_params = tune_nuisance_pliv_partial_xz(y, x, d, z, + clone(learner_l), clone(learner_m), clone(learner_r), xx, n_folds_tune, - par_grid['ml_g'], + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r']) - g_params = g_params * n_folds + l_params = l_params * n_folds m_params = m_params * n_folds r_params = r_params * n_folds res_manual = fit_pliv_partial_xz(y, x, d, z, - clone(learner_g), clone(learner_m), clone(learner_r), + clone(learner_l), clone(learner_m), clone(learner_r), all_smpls, dml_procedure, score, - g_params=g_params, m_params=m_params, r_params=r_params) + l_params=l_params, m_params=m_params, r_params=r_params) res_dict = {'coef': dml_pliv_obj.coef, 'coef_manual': res_manual['theta'], @@ -135,7 +135,7 @@ def dml_pliv_partial_xz_fixture(generate_data_pliv_partialXZ, learner_g, learner for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv_partial_xz(y, d, z, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], all_smpls, score, bootstrap, n_rep_boot) diff --git a/doubleml/tests/test_pliv_partial_z.py b/doubleml/tests/test_pliv_partial_z.py index ec586fa1..b182a61f 100644 --- a/doubleml/tests/test_pliv_partial_z.py +++ b/doubleml/tests/test_pliv_partial_z.py @@ -48,7 +48,7 @@ def dml_pliv_partial_z_fixture(generate_data_pliv_partialZ, learner, score, dml_ obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, z_cols) dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() diff --git a/doubleml/tests/test_pliv_partial_z_tune.py b/doubleml/tests/test_pliv_partial_z_tune.py index bba2bc96..751965f0 100644 --- a/doubleml/tests/test_pliv_partial_z_tune.py +++ b/doubleml/tests/test_pliv_partial_z_tune.py @@ -63,7 +63,7 @@ def dml_pliv_partial_z_fixture(generate_data_pliv_partialZ, learner_r, score, dm obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, z_cols) dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) # tune hyperparameters diff --git a/doubleml/tests/test_pliv_tune.py b/doubleml/tests/test_pliv_tune.py index 43fde957..40252bb7 100644 --- a/doubleml/tests/test_pliv_tune.py +++ b/doubleml/tests/test_pliv_tune.py @@ -2,20 +2,18 @@ import pytest import math -from sklearn.base import clone - -from sklearn.linear_model import ElasticNet -from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import Lasso, ElasticNet import doubleml as dml -from ._utils import draw_smpls +from ._utils import draw_smpls, _clone from ._utils_pliv_manual import fit_pliv, boot_pliv, tune_nuisance_pliv @pytest.fixture(scope='module', - params=[ElasticNet()]) -def learner_g(request): + params=[Lasso(), + ElasticNet()]) +def learner_l(request): return request.param @@ -32,7 +30,13 @@ def learner_r(request): @pytest.fixture(scope='module', - params=['partialling out']) + params=[ElasticNet()]) +def learner_g(request): + return request.param + + +@pytest.fixture(scope='module', + params=['partialling out', 'IV-type']) def score(request): return request.param @@ -50,8 +54,8 @@ def tune_on_folds(request): def get_par_grid(learner): - if learner.__class__ == RandomForestRegressor: - par_grid = {'n_estimators': [5, 10, 20]} + if learner.__class__ == Lasso: + par_grid = {'alpha': np.linspace(0.05, .95, 7)} else: assert learner.__class__ == ElasticNet par_grid = {'l1_ratio': [.1, .5, .7, .9, .95, .99, 1], 'alpha': np.linspace(0.05, 1., 7)} @@ -59,10 +63,11 @@ def get_par_grid(learner): @pytest.fixture(scope='module') -def dml_pliv_fixture(generate_data_iv, learner_g, learner_m, learner_r, score, dml_procedure, tune_on_folds): - par_grid = {'ml_g': get_par_grid(learner_g), +def dml_pliv_fixture(generate_data_iv, learner_l, learner_m, learner_r, learner_g, score, dml_procedure, tune_on_folds): + par_grid = {'ml_l': get_par_grid(learner_l), 'ml_m': get_par_grid(learner_m), - 'ml_r': get_par_grid(learner_r)} + 'ml_r': get_par_grid(learner_r), + 'ml_g': get_par_grid(learner_g)} n_folds_tune = 4 boot_methods = ['Bayes', 'normal', 'wild'] @@ -73,16 +78,21 @@ def dml_pliv_fixture(generate_data_iv, learner_g, learner_m, learner_r, score, d data = generate_data_iv x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for g, m & r - ml_g = clone(learner_g) - ml_m = clone(learner_m) - ml_r = clone(learner_r) + # Set machine learning methods for l, m, r & g + ml_l = _clone(learner_l) + ml_m = _clone(learner_m) + ml_r = _clone(learner_r) + if score == 'IV-type': + ml_g = _clone(learner_g) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1') dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, + score=score, dml_procedure=dml_procedure) # tune hyperparameters @@ -99,24 +109,31 @@ def dml_pliv_fixture(generate_data_iv, learner_g, learner_m, learner_r, score, d all_smpls = draw_smpls(n_obs, n_folds) smpls = all_smpls[0] + tune_g = (score == 'IV-type') | callable(score) if tune_on_folds: - g_params, m_params, r_params = tune_nuisance_pliv(y, x, d, z, - clone(learner_g), clone(learner_m), clone(learner_r), - smpls, n_folds_tune, - par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) + l_params, m_params, r_params, g_params = tune_nuisance_pliv( + y, x, d, z, + _clone(learner_l), _clone(learner_m), _clone(learner_r), _clone(learner_g), + smpls, n_folds_tune, + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r'], par_grid['ml_g'], + tune_g) else: xx = [(np.arange(len(y)), np.array([]))] - g_params, m_params, r_params = tune_nuisance_pliv(y, x, d, z, - clone(learner_g), clone(learner_m), clone(learner_r), - xx, n_folds_tune, - par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) - g_params = g_params * n_folds + l_params, m_params, r_params, g_params = tune_nuisance_pliv( + y, x, d, z, + _clone(learner_l), _clone(learner_m), _clone(learner_r), _clone(learner_g), + xx, n_folds_tune, + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_r'], par_grid['ml_g'], + tune_g) + + l_params = l_params * n_folds m_params = m_params * n_folds r_params = r_params * n_folds + g_params = g_params * n_folds - res_manual = fit_pliv(y, x, d, z, clone(learner_g), clone(learner_m), clone(learner_r), + res_manual = fit_pliv(y, x, d, z, _clone(learner_l), _clone(learner_m), _clone(learner_r), _clone(learner_g), all_smpls, dml_procedure, score, - g_params=g_params, m_params=m_params, r_params=r_params) + l_params=l_params, m_params=m_params, r_params=r_params, g_params=g_params) res_dict = {'coef': dml_pliv_obj.coef, 'coef_manual': res_manual['theta'], @@ -127,7 +144,8 @@ def dml_pliv_fixture(generate_data_iv, learner_g, learner_m, learner_r, score, d for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv(y, d, z, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], + res_manual['all_r_hat'], res_manual['all_g_hat'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py index 971abf55..7b8b56d9 100644 --- a/doubleml/tests/test_plr.py +++ b/doubleml/tests/test_plr.py @@ -45,16 +45,25 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure): x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) + ml_g = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) - dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, - n_folds, - score=score, - dml_procedure=dml_procedure) + if score == 'partialling out': + dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, + ml_l, ml_m, + n_folds=n_folds, + score=score, + dml_procedure=dml_procedure) + else: + assert score == 'IV-type' + dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, + ml_l, ml_m, ml_g, + n_folds, + score=score, + dml_procedure=dml_procedure) dml_plr_obj.fit() @@ -65,7 +74,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure): n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) - res_manual = fit_plr(y, x, d, clone(learner), clone(learner), + res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner), all_smpls, dml_procedure, score) res_dict = {'coef': dml_plr_obj.coef, @@ -77,7 +86,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) @@ -127,15 +136,24 @@ def dml_plr_ols_manual_fixture(generate_data1, score, dml_procedure): x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g + ml_l = clone(learner) ml_g = clone(learner) ml_m = clone(learner) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) - dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, - n_folds, - score=score, - dml_procedure=dml_procedure) + if score == 'partialling out': + dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, + ml_l, ml_m, + n_folds=n_folds, + score=score, + dml_procedure=dml_procedure) + else: + assert score == 'IV-type' + dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, + ml_l, ml_m, ml_g, + n_folds, + score=score, + dml_procedure=dml_procedure) n = data.shape[0] this_smpl = list() @@ -157,24 +175,38 @@ def dml_plr_ols_manual_fixture(generate_data1, score, dml_procedure): smpls = dml_plr_obj.smpls[0] - g_hat = [] + l_hat = [] + l_hat_vec = np.full_like(y, np.nan) for (train_index, test_index) in smpls: ols_est = scipy.linalg.lstsq(x[train_index], y[train_index])[0] - g_hat.append(np.dot(x[test_index], ols_est)) + preds = np.dot(x[test_index], ols_est) + l_hat.append(preds) + l_hat_vec[test_index] = preds m_hat = [] + m_hat_vec = np.full_like(d, np.nan) for (train_index, test_index) in smpls: ols_est = scipy.linalg.lstsq(x[train_index], d[train_index])[0] - m_hat.append(np.dot(x[test_index], ols_est)) + preds = np.dot(x[test_index], ols_est) + m_hat.append(preds) + m_hat_vec[test_index] = preds + + g_hat = [] + if score == 'IV-type': + theta_initial = scipy.linalg.lstsq((d - m_hat_vec).reshape(-1, 1), y - l_hat_vec)[0] + for (train_index, test_index) in smpls: + ols_est = scipy.linalg.lstsq(x[train_index], + y[train_index] - d[train_index] * theta_initial)[0] + g_hat.append(np.dot(x[test_index], ols_est)) if dml_procedure == 'dml1': res_manual, se_manual = plr_dml1(y, x, d, - g_hat, m_hat, + l_hat, m_hat, g_hat, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = plr_dml2(y, x, d, - g_hat, m_hat, + l_hat, m_hat, g_hat, smpls, score) res_dict = {'coef': dml_plr_obj.coef, @@ -186,7 +218,7 @@ def dml_plr_ols_manual_fixture(generate_data1, score, dml_procedure): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, [res_manual], [se_manual], - [g_hat], [m_hat], + [l_hat], [m_hat], [g_hat], [smpls], score, bootstrap, n_rep_boot) np.random.seed(3141) diff --git a/doubleml/tests/test_plr_classifier.py b/doubleml/tests/test_plr_classifier.py index af2e14b8..6be28cff 100644 --- a/doubleml/tests/test_plr_classifier.py +++ b/doubleml/tests/test_plr_classifier.py @@ -2,15 +2,13 @@ import pytest import math -from sklearn.base import clone - from sklearn.linear_model import Lasso, LogisticRegression from sklearn.ensemble import RandomForestClassifier import doubleml as dml from doubleml.datasets import fetch_bonus -from ._utils import draw_smpls +from ._utils import draw_smpls, _clone from ._utils_plr_manual import fit_plr, boot_plr bonus_data = fetch_bonus() @@ -42,13 +40,17 @@ def dml_plr_binary_classifier_fixture(learner, score, dml_procedure): n_folds = 2 n_rep_boot = 502 - # Set machine learning methods for m & g - ml_g = Lasso() - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = Lasso(alpha=0.3) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = Lasso() + else: + ml_g = None np.random.seed(3141) dml_plr_obj = dml.DoubleMLPLR(bonus_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, score=score, dml_procedure=dml_procedure) @@ -62,7 +64,7 @@ def dml_plr_binary_classifier_fixture(learner, score, dml_procedure): n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) - res_manual = fit_plr(y, x, d, clone(ml_g), clone(ml_m), + res_manual = fit_plr(y, x, d, _clone(ml_l), _clone(ml_m), _clone(ml_g), all_smpls, dml_procedure, score) res_dict = {'coef': dml_plr_obj.coef, @@ -74,7 +76,7 @@ def dml_plr_binary_classifier_fixture(learner, score, dml_procedure): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) diff --git a/doubleml/tests/test_plr_multi_treat.py b/doubleml/tests/test_plr_multi_treat.py index 669fe180..f3077828 100644 --- a/doubleml/tests/test_plr_multi_treat.py +++ b/doubleml/tests/test_plr_multi_treat.py @@ -1,14 +1,12 @@ import numpy as np import pytest -from sklearn.base import clone - from sklearn.linear_model import Lasso from sklearn.ensemble import RandomForestRegressor import doubleml as dml -from ._utils import draw_smpls +from ._utils import draw_smpls, _clone from ._utils_plr_manual import fit_plr_multitreat, boot_plr_multitreat @@ -59,14 +57,18 @@ def dml_plr_multitreat_fixture(generate_data_bivariate, generate_data_toeplitz, x_cols = data.columns[data.columns.str.startswith('X')].tolist() d_cols = data.columns[data.columns.str.startswith('d')].tolist() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', d_cols, x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, n_rep, score=score, dml_procedure=dml_procedure) @@ -81,7 +83,7 @@ def dml_plr_multitreat_fixture(generate_data_bivariate, generate_data_toeplitz, all_smpls = draw_smpls(n_obs, n_folds, n_rep) res_manual = fit_plr_multitreat(y, x, d, - clone(learner), clone(learner), + _clone(learner), _clone(learner), _clone(learner), all_smpls, dml_procedure, score, n_rep=n_rep) @@ -96,7 +98,7 @@ def dml_plr_multitreat_fixture(generate_data_bivariate, generate_data_toeplitz, boot_theta, boot_t_stat = boot_plr_multitreat( y, d, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'], all_smpls, score, bootstrap, n_rep_boot, n_rep) diff --git a/doubleml/tests/test_plr_no_cross_fit.py b/doubleml/tests/test_plr_no_cross_fit.py index d1e5c94f..761b19bc 100644 --- a/doubleml/tests/test_plr_no_cross_fit.py +++ b/doubleml/tests/test_plr_no_cross_fit.py @@ -2,13 +2,11 @@ import pytest import math -from sklearn.base import clone - from sklearn.linear_model import Lasso import doubleml as dml -from ._utils import draw_smpls +from ._utils import draw_smpls, _clone from ._utils_plr_manual import fit_plr, plr_dml1, fit_nuisance_plr, boot_plr, tune_nuisance_plr @@ -40,14 +38,18 @@ def dml_plr_no_cross_fit_fixture(generate_data1, learner, score, n_folds): data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, score=score, dml_procedure=dml_procedure, @@ -67,7 +69,7 @@ def dml_plr_no_cross_fit_fixture(generate_data1, learner, score, n_folds): smpls = all_smpls[0] smpls = [smpls[0]] - res_manual = fit_plr(y, x, d, clone(learner), clone(learner), + res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner), [smpls], dml_procedure, score) res_dict = {'coef': dml_plr_obj.coef, @@ -79,7 +81,7 @@ def dml_plr_no_cross_fit_fixture(generate_data1, learner, score, n_folds): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'], [smpls], score, bootstrap, n_rep_boot, apply_cross_fitting=False) @@ -132,14 +134,18 @@ def dml_plr_rep_no_cross_fit_fixture(generate_data1, learner, score, n_rep): data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, n_rep, score, @@ -160,19 +166,21 @@ def dml_plr_rep_no_cross_fit_fixture(generate_data1, learner, score, n_rep): thetas = np.zeros(n_rep) ses = np.zeros(n_rep) - all_g_hat = list() + all_l_hat = list() all_m_hat = list() + all_g_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] - g_hat, m_hat = fit_nuisance_plr(y, x, d, - clone(learner), clone(learner), smpls) + l_hat, m_hat, g_hat = fit_nuisance_plr(y, x, d, + _clone(learner), _clone(learner), _clone(learner), smpls) - all_g_hat.append(g_hat) + all_l_hat.append(l_hat) all_m_hat.append(m_hat) + all_g_hat.append(g_hat) thetas[i_rep], ses[i_rep] = plr_dml1(y, x, d, - all_g_hat[i_rep], all_m_hat[i_rep], + all_l_hat[i_rep], all_m_hat[i_rep], all_g_hat[i_rep], smpls, score) res_manual = np.median(thetas) @@ -188,7 +196,7 @@ def dml_plr_rep_no_cross_fit_fixture(generate_data1, learner, score, n_rep): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, thetas, ses, - all_g_hat, all_m_hat, + all_l_hat, all_m_hat, all_g_hat, all_smpls, score, bootstrap, n_rep_boot, n_rep=n_rep, apply_cross_fitting=False) @@ -235,8 +243,10 @@ def tune_on_folds(request): @pytest.fixture(scope="module") def dml_plr_no_cross_fit_tune_fixture(generate_data1, learner, score, tune_on_folds): - par_grid = {'ml_g': {'alpha': np.linspace(0.05, .95, 7)}, + par_grid = {'ml_l': {'alpha': np.linspace(0.05, .95, 7)}, 'ml_m': {'alpha': np.linspace(0.05, .95, 7)}} + if score == 'IV-type': + par_grid['ml_g'] = {'alpha': np.linspace(0.05, .95, 7)} n_folds_tune = 3 boot_methods = ['normal'] @@ -247,14 +257,18 @@ def dml_plr_no_cross_fit_tune_fixture(generate_data1, learner, score, tune_on_fo data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for m & g - ml_g = Lasso() + # Set machine learning methods for l, m & g + ml_l = Lasso() ml_m = Lasso() + if score == 'IV-type': + ml_g = Lasso() + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds=2, score=score, dml_procedure=dml_procedure, @@ -276,18 +290,26 @@ def dml_plr_no_cross_fit_tune_fixture(generate_data1, learner, score, tune_on_fo smpls = all_smpls[0] smpls = [smpls[0]] + tune_g = (score == 'IV-type') + if not tune_g: + par_grid['ml_g'] = None if tune_on_folds: - g_params, m_params = tune_nuisance_plr(y, x, d, - clone(ml_g), clone(ml_m), smpls, n_folds_tune, - par_grid['ml_g'], par_grid['ml_m']) + l_params, m_params, g_params = tune_nuisance_plr(y, x, d, + _clone(ml_l), _clone(ml_m), _clone(ml_g), + smpls, n_folds_tune, + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_g'], + tune_g) else: xx = [(np.arange(len(y)), np.array([]))] - g_params, m_params = tune_nuisance_plr(y, x, d, - clone(ml_g), clone(ml_m), xx, n_folds_tune, - par_grid['ml_g'], par_grid['ml_m']) + l_params, m_params, g_params = tune_nuisance_plr(y, x, d, + _clone(ml_l), _clone(ml_m), _clone(ml_g), + xx, n_folds_tune, + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_g'], + tune_g) - res_manual = fit_plr(y, x, d, clone(ml_m), clone(ml_g), - [smpls], dml_procedure, score, g_params=g_params, m_params=m_params) + res_manual = fit_plr(y, x, d, _clone(ml_l), _clone(ml_m), _clone(ml_g), + [smpls], dml_procedure, score, + l_params=l_params, m_params=m_params, g_params=g_params) res_dict = {'coef': dml_plr_obj.coef, 'coef_manual': res_manual['theta'], @@ -298,7 +320,7 @@ def dml_plr_no_cross_fit_tune_fixture(generate_data1, learner, score, tune_on_fo for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'], [smpls], score, bootstrap, n_rep_boot, apply_cross_fitting=False) diff --git a/doubleml/tests/test_plr_reestimate_from_scores.py b/doubleml/tests/test_plr_reestimate_from_scores.py index 6a4b44bd..7d5d732a 100644 --- a/doubleml/tests/test_plr_reestimate_from_scores.py +++ b/doubleml/tests/test_plr_reestimate_from_scores.py @@ -2,11 +2,12 @@ import pytest import math -from sklearn.base import clone from sklearn.linear_model import LinearRegression import doubleml as dml +from ._utils import _clone + @pytest.fixture(scope='module', params=[LinearRegression()]) @@ -40,14 +41,18 @@ def dml_plr_reestimate_fixture(generate_data1, learner, score, dml_procedure, n_ data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, n_rep, score, @@ -56,7 +61,7 @@ def dml_plr_reestimate_fixture(generate_data1, learner, score, dml_procedure, n_ np.random.seed(3141) dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, n_rep, score, diff --git a/doubleml/tests/test_plr_rep_cross.py b/doubleml/tests/test_plr_rep_cross.py index 8f5eea2c..f2a50e21 100644 --- a/doubleml/tests/test_plr_rep_cross.py +++ b/doubleml/tests/test_plr_rep_cross.py @@ -2,14 +2,12 @@ import pytest import math -from sklearn.base import clone - from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor import doubleml as dml -from ._utils import draw_smpls +from ._utils import draw_smpls, _clone from ._utils_plr_manual import fit_plr, boot_plr @@ -48,14 +46,18 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep): data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, n_rep, score, @@ -70,7 +72,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep): n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds, n_rep) - res_manual = fit_plr(y, x, d, clone(learner), clone(learner), + res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner), all_smpls, dml_procedure, score, n_rep) res_dict = {'coef': dml_plr_obj.coef, @@ -83,7 +85,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep): for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'], all_smpls, score, bootstrap, n_rep_boot, n_rep) np.random.seed(3141) diff --git a/doubleml/tests/test_plr_set_ml_nuisance_pars.py b/doubleml/tests/test_plr_set_ml_nuisance_pars.py index 69d830a1..d8b99840 100644 --- a/doubleml/tests/test_plr_set_ml_nuisance_pars.py +++ b/doubleml/tests/test_plr_set_ml_nuisance_pars.py @@ -2,11 +2,12 @@ import pytest import math -from sklearn.base import clone from sklearn.linear_model import Lasso import doubleml as dml +from ._utils import _clone + @pytest.fixture(scope='module', params=['IV-type', 'partialling out']) @@ -31,14 +32,18 @@ def dml_plr_fixture(generate_data1, score, dml_procedure): alpha = 0.05 learner = Lasso(alpha=alpha) - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d']) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, score=score, dml_procedure=dml_procedure) @@ -47,17 +52,24 @@ def dml_plr_fixture(generate_data1, score, dml_procedure): np.random.seed(3141) learner = Lasso() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None dml_plr_obj_ext_set_par = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, score=score, dml_procedure=dml_procedure) - dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_g', 'd', {'alpha': alpha}) + dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_l', 'd', {'alpha': alpha}) dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_m', 'd', {'alpha': alpha}) + if score == 'IV-type': + dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_g', 'd', {'alpha': alpha}) + dml_plr_obj_ext_set_par.fit() res_dict = {'coef': dml_plr_obj.coef, diff --git a/doubleml/tests/test_plr_set_smpls_externally.py b/doubleml/tests/test_plr_set_smpls_externally.py index c745362f..e6268217 100644 --- a/doubleml/tests/test_plr_set_smpls_externally.py +++ b/doubleml/tests/test_plr_set_smpls_externally.py @@ -2,11 +2,12 @@ import pytest import math -from sklearn.base import clone from sklearn.linear_model import LinearRegression import doubleml as dml +from ._utils import _clone + @pytest.fixture(scope='module', params=[LinearRegression()]) @@ -40,14 +41,18 @@ def dml_plr_smpls_fixture(generate_data1, learner, score, dml_procedure, n_rep): data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() - # Set machine learning methods for m & g - ml_g = clone(learner) - ml_m = clone(learner) + # Set machine learning methods for l, m & g + ml_l = _clone(learner) + ml_m = _clone(learner) + if score == 'IV-type': + ml_g = _clone(learner) + else: + ml_g = None np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, n_rep, score, @@ -58,7 +63,7 @@ def dml_plr_smpls_fixture(generate_data1, learner, score, dml_procedure, n_rep): smpls = dml_plr_obj.smpls dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, score=score, dml_procedure=dml_procedure, draw_sample_splitting=False) diff --git a/doubleml/tests/test_plr_tune.py b/doubleml/tests/test_plr_tune.py index 4b25dfa0..c18c470e 100644 --- a/doubleml/tests/test_plr_tune.py +++ b/doubleml/tests/test_plr_tune.py @@ -2,20 +2,18 @@ import pytest import math -from sklearn.base import clone - from sklearn.linear_model import Lasso, ElasticNet import doubleml as dml -from ._utils import draw_smpls +from ._utils import draw_smpls, _clone from ._utils_plr_manual import fit_plr, boot_plr, tune_nuisance_plr @pytest.fixture(scope='module', params=[Lasso(), ElasticNet()]) -def learner_g(request): +def learner_l(request): return request.param @@ -27,7 +25,14 @@ def learner_m(request): @pytest.fixture(scope='module', - params=['partialling out']) + params=[Lasso(), + ElasticNet()]) +def learner_g(request): + return request.param + + +@pytest.fixture(scope='module', + params=['partialling out', 'IV-type']) def score(request): return request.param @@ -54,9 +59,10 @@ def get_par_grid(learner): @pytest.fixture(scope="module") -def dml_plr_fixture(generate_data2, learner_g, learner_m, score, dml_procedure, tune_on_folds): - par_grid = {'ml_g': get_par_grid(learner_g), - 'ml_m': get_par_grid(learner_m)} +def dml_plr_fixture(generate_data2, learner_l, learner_m, learner_g, score, dml_procedure, tune_on_folds): + par_grid = {'ml_l': get_par_grid(learner_l), + 'ml_m': get_par_grid(learner_m), + 'ml_g': get_par_grid(learner_g)} n_folds_tune = 4 boot_methods = ['normal'] @@ -67,12 +73,16 @@ def dml_plr_fixture(generate_data2, learner_g, learner_m, score, dml_procedure, obj_dml_data = generate_data2 # Set machine learning methods for m & g - ml_g = clone(learner_g) - ml_m = clone(learner_m) + ml_l = _clone(learner_l) + ml_m = _clone(learner_m) + if score == 'IV-type': + ml_g = _clone(learner_g) + else: + ml_g = None np.random.seed(3141) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds, score=score, dml_procedure=dml_procedure) @@ -91,21 +101,25 @@ def dml_plr_fixture(generate_data2, learner_g, learner_m, score, dml_procedure, all_smpls = draw_smpls(n_obs, n_folds) smpls = all_smpls[0] + tune_g = (score == 'IV-type') if tune_on_folds: - g_params, m_params = tune_nuisance_plr(y, x, d, - clone(learner_g), clone(learner_m), smpls, n_folds_tune, - par_grid['ml_g'], par_grid['ml_m']) + l_params, m_params, g_params = tune_nuisance_plr(y, x, d, + _clone(learner_l), _clone(learner_m), _clone(learner_g), + smpls, n_folds_tune, + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_g'], tune_g) else: xx = [(np.arange(len(y)), np.array([]))] - g_params, m_params = tune_nuisance_plr(y, x, d, - clone(learner_g), clone(learner_m), xx, n_folds_tune, - par_grid['ml_g'], par_grid['ml_m']) + l_params, m_params, g_params = tune_nuisance_plr(y, x, d, + _clone(learner_l), _clone(learner_m), _clone(learner_g), + xx, n_folds_tune, + par_grid['ml_l'], par_grid['ml_m'], par_grid['ml_g'], tune_g) + l_params = l_params * n_folds g_params = g_params * n_folds m_params = m_params * n_folds - res_manual = fit_plr(y, x, d, clone(learner_g), clone(learner_m), + res_manual = fit_plr(y, x, d, _clone(learner_l), _clone(learner_m), _clone(learner_g), all_smpls, dml_procedure, score, - g_params=g_params, m_params=m_params) + l_params=l_params, m_params=m_params, g_params=g_params) res_dict = {'coef': dml_plr_obj.coef, 'coef_manual': res_manual['theta'], @@ -116,7 +130,7 @@ def dml_plr_fixture(generate_data2, learner_g, learner_m, score, dml_procedure, for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], - res_manual['all_g_hat'], res_manual['all_m_hat'], + res_manual['all_l_hat'], res_manual['all_m_hat'], res_manual['all_g_hat'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141)