diff --git a/doubleml_py_vs_r/tests/_utils_pyvsr.py b/doubleml_py_vs_r/tests/_utils_pyvsr.py index 25454fc..eb82659 100644 --- a/doubleml_py_vs_r/tests/_utils_pyvsr.py +++ b/doubleml_py_vs_r/tests/_utils_pyvsr.py @@ -27,8 +27,13 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, n_rep, smpls_for_r) { data = data.table(data) + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' - mlmethod_g = 'regr.lm' + if (score == "IV-type") { + mlmethod_g = 'regr.lm' + } else { + mlmethod_g = NULL + } Xnames = names(data)[names(data) %in% c("y", "d") == FALSE] data_ml = double_ml_data_from_data_frame(data, y_col = "y", @@ -36,8 +41,9 @@ def export_smpl_split_to_r(smpls): double_mlplr_obj = DoubleMLPLR$new(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, + ml_g = mlmethod_g, dml_procedure = dml_procedure, score = score) smpls = list() @@ -63,9 +69,14 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, train_ids, test_ids) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' + if (score == "IV-type") { + mlmethod_g = 'regr.lm' + } else { + mlmethod_g = NULL + } Xnames = names(data)[names(data) %in% c("y", "d", "Z1") == FALSE] data_ml = double_ml_data_from_data_frame(data, y_col = "y", @@ -74,9 +85,10 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleMLPLIV$new(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, + ml_g = mlmethod_g, dml_procedure = dml_procedure, score = score) @@ -98,7 +110,7 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, train_ids, test_ids) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' @@ -110,7 +122,7 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleML:::DoubleMLPLIV.partialX(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, dml_procedure = dml_procedure, @@ -166,7 +178,7 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, train_ids, test_ids) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' @@ -178,7 +190,7 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleML:::DoubleMLPLIV.partialXZ(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, dml_procedure = dml_procedure, @@ -270,9 +282,14 @@ def export_smpl_split_to_r(smpls): train_ids, test_ids, cluster_var1, cluster_var2=NULL) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' + if (score == "IV-type") { + mlmethod_g = 'regr.lm' + } else { + mlmethod_g = NULL + } if (is.null(cluster_var2)) cluster_vars = cluster_var1 else cluster_vars = c(cluster_var1, cluster_var2) Xnames = names(data)[names(data) %in% c("Y", "D", "Z", cluster_vars) == FALSE] @@ -283,9 +300,10 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleMLPLIV$new(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, + ml_g = mlmethod_g, dml_procedure = dml_procedure, score = score) diff --git a/doubleml_py_vs_r/tests/test_iivm_pyvsr.py b/doubleml_py_vs_r/tests/test_iivm_pyvsr.py index cce9fc1..aa66f69 100644 --- a/doubleml_py_vs_r/tests/test_iivm_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_iivm_pyvsr.py @@ -41,7 +41,7 @@ def dml_iivm_pyvsr_fixture(generate_data_iivm, score, dml_procedure): dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) np.random.seed(3141) diff --git a/doubleml_py_vs_r/tests/test_irm_pyvsr.py b/doubleml_py_vs_r/tests/test_irm_pyvsr.py index 0bb5f6a..2862479 100644 --- a/doubleml_py_vs_r/tests/test_irm_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_irm_pyvsr.py @@ -40,7 +40,7 @@ def dml_irm_pyvsr_fixture(generate_data_irm, score, dml_procedure): dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m, - n_folds, + n_folds=n_folds, score=score, dml_procedure=dml_procedure) diff --git a/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py b/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py index 26738fa..6d21f97 100644 --- a/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py @@ -13,6 +13,12 @@ pandas2ri.activate() +@pytest.fixture(scope='module', + params=['partialling out', 'IV-type']) +def score(request): + return request.param + + @pytest.fixture(scope='module', params=['dml1', 'dml2']) def dml_procedure(request): @@ -20,22 +26,27 @@ def dml_procedure(request): @pytest.fixture(scope='module') -def dml_pliv_twoway_cluster_pyvsr_fixture(generate_data_pliv_two_way_cluster, dml_procedure): +def dml_pliv_twoway_cluster_pyvsr_fixture(generate_data_pliv_two_way_cluster, score, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_two_way_cluster - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m, & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, + score=score, dml_procedure=dml_procedure) print(obj_dml_data) dml_pliv_obj.fit() @@ -44,7 +55,7 @@ def dml_pliv_twoway_cluster_pyvsr_fixture(generate_data_pliv_two_way_cluster, dm all_train, all_test = export_smpl_split_to_r(dml_pliv_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(obj_dml_data.data) - res_r = r_MLPLIV_multiway_cluster(r_dataframe, 'partialling out', dml_procedure, + res_r = r_MLPLIV_multiway_cluster(r_dataframe, score, dml_procedure, all_train, all_test, obj_dml_data.cluster_cols[0], obj_dml_data.cluster_cols[1]) @@ -70,22 +81,27 @@ def test_dml_pliv_twoway_cluster_pyvsr_se(dml_pliv_twoway_cluster_pyvsr_fixture) @pytest.fixture(scope='module') -def dml_pliv_one_cluster_pyvsr_fixture(generate_data_pliv_one_way_cluster, dml_procedure): +def dml_pliv_one_cluster_pyvsr_fixture(generate_data_pliv_one_way_cluster, score, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_one_way_cluster - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, + score=score, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -93,7 +109,7 @@ def dml_pliv_one_cluster_pyvsr_fixture(generate_data_pliv_one_way_cluster, dml_p all_train, all_test = export_smpl_split_to_r(dml_pliv_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(obj_dml_data.data.drop(columns='cluster_var_j')) - res_r = r_MLPLIV_multiway_cluster(r_dataframe, 'partialling out', dml_procedure, + res_r = r_MLPLIV_multiway_cluster(r_dataframe, score, dml_procedure, all_train, all_test, obj_dml_data.cluster_cols[0]) diff --git a/doubleml_py_vs_r/tests/test_pliv_pyvsr.py b/doubleml_py_vs_r/tests/test_pliv_pyvsr.py index b270c25..ba9bd03 100644 --- a/doubleml_py_vs_r/tests/test_pliv_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_pliv_pyvsr.py @@ -15,7 +15,7 @@ @pytest.fixture(scope='module', - params=['partialling out']) + params=['partialling out', 'IV-type']) def score(request): return request.param @@ -33,16 +33,21 @@ def dml_pliv_pyvsr_fixture(generate_data_pliv, score, dml_procedure): # collect data obj_dml_data = generate_data_pliv - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m, r & g learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, ml_g, + n_folds=n_folds, + score=score, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -51,7 +56,7 @@ def dml_pliv_pyvsr_fixture(generate_data_pliv, score, dml_procedure): all_train, all_test = export_smpl_split_to_r(dml_pliv_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(obj_dml_data.data) - res_r = r_MLPLIV(r_dataframe, 'partialling out', dml_procedure, + res_r = r_MLPLIV(r_dataframe, score, dml_procedure, all_train, all_test) print(res_r) @@ -76,22 +81,22 @@ def test_dml_pliv_pyvsr_se(dml_pliv_pyvsr_fixture): @pytest.fixture(scope='module') -def dml_pliv_partial_x_pyvsr_fixture(generate_data_pliv_partialX, score, dml_procedure): +def dml_pliv_partial_x_pyvsr_fixture(generate_data_pliv_partialX, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_partialX - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -125,20 +130,20 @@ def test_dml_pliv_partial_x_pyvsr_se(dml_pliv_partial_x_pyvsr_fixture): @pytest.fixture(scope='module') -def dml_pliv_partial_z_pyvsr_fixture(generate_data_pliv_partialZ, score, dml_procedure): +def dml_pliv_partial_z_pyvsr_fixture(generate_data_pliv_partialZ, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_partialZ - # Set machine learning methods for g, m & r + # Set machine learning methods for r learner = LinearRegression() ml_r = clone(learner) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -172,22 +177,22 @@ def test_dml_pliv_partial_z_pyvsr_se(dml_pliv_partial_z_pyvsr_fixture): @pytest.fixture(scope='module') -def dml_pliv_partial_xz_pyvsr_fixture(generate_data_pliv_partialXZ, score, dml_procedure): +def dml_pliv_partial_xz_pyvsr_fixture(generate_data_pliv_partialXZ, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_partialXZ - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data, - ml_g, ml_m, ml_r, - n_folds, + ml_l, ml_m, ml_r, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() diff --git a/doubleml_py_vs_r/tests/test_plr_pyvsr.py b/doubleml_py_vs_r/tests/test_plr_pyvsr.py index d1b5875..f89f600 100644 --- a/doubleml_py_vs_r/tests/test_plr_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_plr_pyvsr.py @@ -37,14 +37,18 @@ def dml_plr_pyvsr_fixture(generate_data_plr, score, dml_procedure, n_rep): # collect data obj_dml_data = generate_data_plr - # Set machine learning methods for m & g + # Set machine learning methods for l, m & g learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, - n_folds, + ml_l, ml_m, ml_g, + n_folds=n_folds, n_rep=n_rep, score=score, dml_procedure=dml_procedure)