From 6025631fd9b852d44d01f7f836f09f44e14b95c7 Mon Sep 17 00:00:00 2001 From: "Malte S. Kurz" Date: Tue, 14 Jun 2022 09:56:07 +0200 Subject: [PATCH 1/2] adaptions for the iv type score and renamed nuisance functions --- doubleml_py_vs_r/tests/_utils_pyvsr.py | 20 +++++++++---------- doubleml_py_vs_r/tests/test_iivm_pyvsr.py | 2 +- doubleml_py_vs_r/tests/test_irm_pyvsr.py | 2 +- .../tests/test_pliv_multiway_cluster_pyvsr.py | 4 ++-- doubleml_py_vs_r/tests/test_pliv_pyvsr.py | 8 ++++---- doubleml_py_vs_r/tests/test_plr_pyvsr.py | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/doubleml_py_vs_r/tests/_utils_pyvsr.py b/doubleml_py_vs_r/tests/_utils_pyvsr.py index 25454fc..06226b5 100644 --- a/doubleml_py_vs_r/tests/_utils_pyvsr.py +++ b/doubleml_py_vs_r/tests/_utils_pyvsr.py @@ -27,8 +27,8 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, n_rep, smpls_for_r) { data = data.table(data) + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' - mlmethod_g = 'regr.lm' Xnames = names(data)[names(data) %in% c("y", "d") == FALSE] data_ml = double_ml_data_from_data_frame(data, y_col = "y", @@ -36,7 +36,7 @@ def export_smpl_split_to_r(smpls): double_mlplr_obj = DoubleMLPLR$new(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, dml_procedure = dml_procedure, score = score) @@ -63,7 +63,7 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, train_ids, test_ids) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' @@ -74,7 +74,7 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleMLPLIV$new(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, dml_procedure = dml_procedure, @@ -98,7 +98,7 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, train_ids, test_ids) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' @@ -110,7 +110,7 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleML:::DoubleMLPLIV.partialX(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, dml_procedure = dml_procedure, @@ -166,7 +166,7 @@ def export_smpl_split_to_r(smpls): f <- function(data, score, dml_procedure, train_ids, test_ids) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' @@ -178,7 +178,7 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleML:::DoubleMLPLIV.partialXZ(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, dml_procedure = dml_procedure, @@ -270,7 +270,7 @@ def export_smpl_split_to_r(smpls): train_ids, test_ids, cluster_var1, cluster_var2=NULL) { data = data.table(data) - mlmethod_g = 'regr.lm' + mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' @@ -283,7 +283,7 @@ def export_smpl_split_to_r(smpls): double_mlpliv_obj = DoubleMLPLIV$new(data_ml, n_folds = 2, - ml_g = mlmethod_g, + ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, dml_procedure = dml_procedure, diff --git a/doubleml_py_vs_r/tests/test_iivm_pyvsr.py b/doubleml_py_vs_r/tests/test_iivm_pyvsr.py index cce9fc1..aa66f69 100644 --- a/doubleml_py_vs_r/tests/test_iivm_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_iivm_pyvsr.py @@ -41,7 +41,7 @@ def dml_iivm_pyvsr_fixture(generate_data_iivm, score, dml_procedure): dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) np.random.seed(3141) diff --git a/doubleml_py_vs_r/tests/test_irm_pyvsr.py b/doubleml_py_vs_r/tests/test_irm_pyvsr.py index 0bb5f6a..2862479 100644 --- a/doubleml_py_vs_r/tests/test_irm_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_irm_pyvsr.py @@ -40,7 +40,7 @@ def dml_irm_pyvsr_fixture(generate_data_irm, score, dml_procedure): dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m, - n_folds, + n_folds=n_folds, score=score, dml_procedure=dml_procedure) diff --git a/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py b/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py index 26738fa..d699913 100644 --- a/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py @@ -35,7 +35,7 @@ def dml_pliv_twoway_cluster_pyvsr_fixture(generate_data_pliv_two_way_cluster, dm np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) print(obj_dml_data) dml_pliv_obj.fit() @@ -85,7 +85,7 @@ def dml_pliv_one_cluster_pyvsr_fixture(generate_data_pliv_one_way_cluster, dml_p np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() diff --git a/doubleml_py_vs_r/tests/test_pliv_pyvsr.py b/doubleml_py_vs_r/tests/test_pliv_pyvsr.py index b270c25..11d5fb1 100644 --- a/doubleml_py_vs_r/tests/test_pliv_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_pliv_pyvsr.py @@ -42,7 +42,7 @@ def dml_pliv_pyvsr_fixture(generate_data_pliv, score, dml_procedure): np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -91,7 +91,7 @@ def dml_pliv_partial_x_pyvsr_fixture(generate_data_pliv_partialX, score, dml_pro np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -138,7 +138,7 @@ def dml_pliv_partial_z_pyvsr_fixture(generate_data_pliv_partialZ, score, dml_pro np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -187,7 +187,7 @@ def dml_pliv_partial_xz_pyvsr_fixture(generate_data_pliv_partialXZ, score, dml_p np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data, ml_g, ml_m, ml_r, - n_folds, + n_folds=n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() diff --git a/doubleml_py_vs_r/tests/test_plr_pyvsr.py b/doubleml_py_vs_r/tests/test_plr_pyvsr.py index d1b5875..4687bde 100644 --- a/doubleml_py_vs_r/tests/test_plr_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_plr_pyvsr.py @@ -44,7 +44,7 @@ def dml_plr_pyvsr_fixture(generate_data_plr, score, dml_procedure, n_rep): dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, - n_folds, + n_folds=n_folds, n_rep=n_rep, score=score, dml_procedure=dml_procedure) From 274caf9405d768874100c9dffb06e43539cdbf7d Mon Sep 17 00:00:00 2001 From: "Malte S. Kurz" Date: Tue, 14 Jun 2022 10:10:10 +0200 Subject: [PATCH 2/2] adaptions and extensions for iv-type scores --- doubleml_py_vs_r/tests/_utils_pyvsr.py | 18 ++++++++++ .../tests/test_pliv_multiway_cluster_pyvsr.py | 36 +++++++++++++------ doubleml_py_vs_r/tests/test_pliv_pyvsr.py | 35 ++++++++++-------- doubleml_py_vs_r/tests/test_plr_pyvsr.py | 10 ++++-- 4 files changed, 71 insertions(+), 28 deletions(-) diff --git a/doubleml_py_vs_r/tests/_utils_pyvsr.py b/doubleml_py_vs_r/tests/_utils_pyvsr.py index 06226b5..eb82659 100644 --- a/doubleml_py_vs_r/tests/_utils_pyvsr.py +++ b/doubleml_py_vs_r/tests/_utils_pyvsr.py @@ -29,6 +29,11 @@ def export_smpl_split_to_r(smpls): data = data.table(data) mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' + if (score == "IV-type") { + mlmethod_g = 'regr.lm' + } else { + mlmethod_g = NULL + } Xnames = names(data)[names(data) %in% c("y", "d") == FALSE] data_ml = double_ml_data_from_data_frame(data, y_col = "y", @@ -38,6 +43,7 @@ def export_smpl_split_to_r(smpls): n_folds = 2, ml_l = mlmethod_l, ml_m = mlmethod_m, + ml_g = mlmethod_g, dml_procedure = dml_procedure, score = score) smpls = list() @@ -66,6 +72,11 @@ def export_smpl_split_to_r(smpls): mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' + if (score == "IV-type") { + mlmethod_g = 'regr.lm' + } else { + mlmethod_g = NULL + } Xnames = names(data)[names(data) %in% c("y", "d", "Z1") == FALSE] data_ml = double_ml_data_from_data_frame(data, y_col = "y", @@ -77,6 +88,7 @@ def export_smpl_split_to_r(smpls): ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, + ml_g = mlmethod_g, dml_procedure = dml_procedure, score = score) @@ -273,6 +285,11 @@ def export_smpl_split_to_r(smpls): mlmethod_l = 'regr.lm' mlmethod_m = 'regr.lm' mlmethod_r = 'regr.lm' + if (score == "IV-type") { + mlmethod_g = 'regr.lm' + } else { + mlmethod_g = NULL + } if (is.null(cluster_var2)) cluster_vars = cluster_var1 else cluster_vars = c(cluster_var1, cluster_var2) Xnames = names(data)[names(data) %in% c("Y", "D", "Z", cluster_vars) == FALSE] @@ -286,6 +303,7 @@ def export_smpl_split_to_r(smpls): ml_l = mlmethod_l, ml_m = mlmethod_m, ml_r = mlmethod_r, + ml_g = mlmethod_g, dml_procedure = dml_procedure, score = score) diff --git a/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py b/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py index d699913..6d21f97 100644 --- a/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_pliv_multiway_cluster_pyvsr.py @@ -13,6 +13,12 @@ pandas2ri.activate() +@pytest.fixture(scope='module', + params=['partialling out', 'IV-type']) +def score(request): + return request.param + + @pytest.fixture(scope='module', params=['dml1', 'dml2']) def dml_procedure(request): @@ -20,22 +26,27 @@ def dml_procedure(request): @pytest.fixture(scope='module') -def dml_pliv_twoway_cluster_pyvsr_fixture(generate_data_pliv_two_way_cluster, dml_procedure): +def dml_pliv_twoway_cluster_pyvsr_fixture(generate_data_pliv_two_way_cluster, score, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_two_way_cluster - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m, & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, + ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, + score=score, dml_procedure=dml_procedure) print(obj_dml_data) dml_pliv_obj.fit() @@ -44,7 +55,7 @@ def dml_pliv_twoway_cluster_pyvsr_fixture(generate_data_pliv_two_way_cluster, dm all_train, all_test = export_smpl_split_to_r(dml_pliv_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(obj_dml_data.data) - res_r = r_MLPLIV_multiway_cluster(r_dataframe, 'partialling out', dml_procedure, + res_r = r_MLPLIV_multiway_cluster(r_dataframe, score, dml_procedure, all_train, all_test, obj_dml_data.cluster_cols[0], obj_dml_data.cluster_cols[1]) @@ -70,22 +81,27 @@ def test_dml_pliv_twoway_cluster_pyvsr_se(dml_pliv_twoway_cluster_pyvsr_fixture) @pytest.fixture(scope='module') -def dml_pliv_one_cluster_pyvsr_fixture(generate_data_pliv_one_way_cluster, dml_procedure): +def dml_pliv_one_cluster_pyvsr_fixture(generate_data_pliv_one_way_cluster, score, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_one_way_cluster - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, + ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, + score=score, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -93,7 +109,7 @@ def dml_pliv_one_cluster_pyvsr_fixture(generate_data_pliv_one_way_cluster, dml_p all_train, all_test = export_smpl_split_to_r(dml_pliv_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(obj_dml_data.data.drop(columns='cluster_var_j')) - res_r = r_MLPLIV_multiway_cluster(r_dataframe, 'partialling out', dml_procedure, + res_r = r_MLPLIV_multiway_cluster(r_dataframe, score, dml_procedure, all_train, all_test, obj_dml_data.cluster_cols[0]) diff --git a/doubleml_py_vs_r/tests/test_pliv_pyvsr.py b/doubleml_py_vs_r/tests/test_pliv_pyvsr.py index 11d5fb1..ba9bd03 100644 --- a/doubleml_py_vs_r/tests/test_pliv_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_pliv_pyvsr.py @@ -15,7 +15,7 @@ @pytest.fixture(scope='module', - params=['partialling out']) + params=['partialling out', 'IV-type']) def score(request): return request.param @@ -33,16 +33,21 @@ def dml_pliv_pyvsr_fixture(generate_data_pliv, score, dml_procedure): # collect data obj_dml_data = generate_data_pliv - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m, r & g learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, + ml_l, ml_m, ml_r, ml_g, n_folds=n_folds, + score=score, dml_procedure=dml_procedure) dml_pliv_obj.fit() @@ -51,7 +56,7 @@ def dml_pliv_pyvsr_fixture(generate_data_pliv, score, dml_procedure): all_train, all_test = export_smpl_split_to_r(dml_pliv_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(obj_dml_data.data) - res_r = r_MLPLIV(r_dataframe, 'partialling out', dml_procedure, + res_r = r_MLPLIV(r_dataframe, score, dml_procedure, all_train, all_test) print(res_r) @@ -76,21 +81,21 @@ def test_dml_pliv_pyvsr_se(dml_pliv_pyvsr_fixture): @pytest.fixture(scope='module') -def dml_pliv_partial_x_pyvsr_fixture(generate_data_pliv_partialX, score, dml_procedure): +def dml_pliv_partial_x_pyvsr_fixture(generate_data_pliv_partialX, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_partialX - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, - ml_g, ml_m, ml_r, + ml_l, ml_m, ml_r, n_folds=n_folds, dml_procedure=dml_procedure) @@ -125,13 +130,13 @@ def test_dml_pliv_partial_x_pyvsr_se(dml_pliv_partial_x_pyvsr_fixture): @pytest.fixture(scope='module') -def dml_pliv_partial_z_pyvsr_fixture(generate_data_pliv_partialZ, score, dml_procedure): +def dml_pliv_partial_z_pyvsr_fixture(generate_data_pliv_partialZ, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_partialZ - # Set machine learning methods for g, m & r + # Set machine learning methods for r learner = LinearRegression() ml_r = clone(learner) @@ -172,21 +177,21 @@ def test_dml_pliv_partial_z_pyvsr_se(dml_pliv_partial_z_pyvsr_fixture): @pytest.fixture(scope='module') -def dml_pliv_partial_xz_pyvsr_fixture(generate_data_pliv_partialXZ, score, dml_procedure): +def dml_pliv_partial_xz_pyvsr_fixture(generate_data_pliv_partialXZ, dml_procedure): n_folds = 2 # collect data obj_dml_data = generate_data_pliv_partialXZ - # Set machine learning methods for g, m & r + # Set machine learning methods for l, m & r learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) dml_pliv_obj = dml.DoubleMLPLIV._partialXZ(obj_dml_data, - ml_g, ml_m, ml_r, + ml_l, ml_m, ml_r, n_folds=n_folds, dml_procedure=dml_procedure) diff --git a/doubleml_py_vs_r/tests/test_plr_pyvsr.py b/doubleml_py_vs_r/tests/test_plr_pyvsr.py index 4687bde..f89f600 100644 --- a/doubleml_py_vs_r/tests/test_plr_pyvsr.py +++ b/doubleml_py_vs_r/tests/test_plr_pyvsr.py @@ -37,13 +37,17 @@ def dml_plr_pyvsr_fixture(generate_data_plr, score, dml_procedure, n_rep): # collect data obj_dml_data = generate_data_plr - # Set machine learning methods for m & g + # Set machine learning methods for l, m & g learner = LinearRegression() - ml_g = clone(learner) + ml_l = clone(learner) ml_m = clone(learner) + if score == 'IV-type': + ml_g = clone(learner) + else: + ml_g = None dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, - ml_g, ml_m, + ml_l, ml_m, ml_g, n_folds=n_folds, n_rep=n_rep, score=score,