From 5c184af09f03d1ba145f39b934ba1e5d4bda2a50 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 09:31:34 +0100
Subject: [PATCH 01/54] add bias and score bias to sensitivity elements

---
 doubleml/double_ml.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index b1d723c6..205a77a3 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1046,6 +1046,7 @@ def _fit_sensitivity_elements(self, nuisance_predictions):
             else:
                 # compute sensitivity analysis elements
                 element_dict = self._sensitivity_element_est(nuisance_predictions)
+                element_dict["bias"], element_dict["psi_bias"] = self._compute_sensitivity_bias(**element_dict)
                 self._set_sensitivity_elements(element_dict, self._i_rep, self._i_treat)
 
     def _initialize_arrays(self):
@@ -1410,7 +1411,7 @@ def _sensitivity_element_est(self, preds):
 
     @property
     def _sensitivity_element_names(self):
-        return ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep"]
+        return ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "bias", "psi_bias"]
 
     # the dimensions will usually be (n_obs, n_rep, n_coefs) to be equal to the score dimensions psi
     def _initialize_sensitivity_elements(self, score_dim):
@@ -1420,9 +1421,19 @@ def _initialize_sensitivity_elements(self, score_dim):
             "psi_sigma2": np.full(score_dim, np.nan),
             "psi_nu2": np.full(score_dim, np.nan),
             "riesz_rep": np.full(score_dim, np.nan),
+            "bias": np.full((1, score_dim[1], score_dim[2]), np.nan),
+            "psi_bias": np.full(score_dim, np.nan)
         }
         return sensitivity_elements
 
+    def _compute_sensitivity_bias(self, sigma2, nu2, psi_sigma2, psi_nu2, riesz_rep=None):
+        bias = np.sqrt(np.multiply(sigma2, nu2))
+        psi_bias = np.divide(
+            np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)),
+            np.multiply(2.0, bias)
+        )
+        return bias, psi_bias
+
     def _get_sensitivity_elements(self, i_rep, i_treat):
         sensitivity_elements = {key: value[:, i_rep, i_treat] for key, value in self.sensitivity_elements.items()}
         return sensitivity_elements

From e21f17e27a589c8b6213107047e0f7274373e257 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 09:56:45 +0100
Subject: [PATCH 02/54] add bias and bias score to framework

---
 doubleml/double_ml.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 205a77a3..61781d88 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -565,6 +565,8 @@ def construct_framework(self):
                         "psi_sigma2": np.transpose(self.sensitivity_elements["psi_sigma2"], (0, 2, 1)),
                         "psi_nu2": np.transpose(self.sensitivity_elements["psi_nu2"], (0, 2, 1)),
                         "riesz_rep": np.transpose(self.sensitivity_elements["riesz_rep"], (0, 2, 1)),
+                        "bias": np.transpose(self.sensitivity_elements["bias"], (0, 2, 1)),
+                        "psi_bias": np.transpose(self.sensitivity_elements["psi_bias"], (0, 2, 1)),
                     }
                 }
             )

From 4d870b99494d4c79c6fe55c2d431678213a4e3fe Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 09:59:14 +0100
Subject: [PATCH 03/54] add bias and bias score to framework

---
 doubleml/double_ml_framework.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 2718629d..b501647a 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -983,7 +983,7 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
         else:
             if not isinstance(doubleml_dict["sensitivity_elements"], dict):
                 raise TypeError("sensitivity_elements must be a dictionary.")
-            expected_keys_sensitivity = ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep"]
+            expected_keys_sensitivity = ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "bias", "psi_bias"]
             if not all(key in doubleml_dict["sensitivity_elements"].keys() for key in expected_keys_sensitivity):
                 raise ValueError(
                     "The sensitivity_elements dict must contain the following keys: " + ", ".join(expected_keys_sensitivity)
@@ -1001,6 +1001,8 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
                 "psi_sigma2": doubleml_dict["sensitivity_elements"]["psi_sigma2"],
                 "psi_nu2": doubleml_dict["sensitivity_elements"]["psi_nu2"],
                 "riesz_rep": doubleml_dict["sensitivity_elements"]["riesz_rep"],
+                "bias": doubleml_dict["sensitivity_elements"]["bias"],
+                "psi_bias": doubleml_dict["sensitivity_elements"]["psi_bias"],
             }
 
         self._sensitivity_implemented = sensitivity_implemented

From b7e7c4c7d7272c56726311437670957799d5b4f7 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 10:27:18 +0100
Subject: [PATCH 04/54] add error handling for negative nu2

---
 doubleml/double_ml.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 61781d88..328a7ff1 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1429,6 +1429,10 @@ def _initialize_sensitivity_elements(self, score_dim):
         return sensitivity_elements
 
     def _compute_sensitivity_bias(self, sigma2, nu2, psi_sigma2, psi_nu2, riesz_rep=None):
+        if nu2 <= 0:
+            warnings.UserWarning("The estimated nu2 is not positive. Re-estimation based on riesz representer.")
+            nu2 = np.mean(np.power(riesz_rep, 2))
+
         bias = np.sqrt(np.multiply(sigma2, nu2))
         psi_bias = np.divide(
             np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)),

From 10e964833f297d47fece757064ca641942a3e5f7 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 11:28:34 +0100
Subject: [PATCH 05/54] rename to max_bias and psi_max_bias

---
 doubleml/double_ml.py           | 29 ++++++++++++++++-------------
 doubleml/double_ml_framework.py |  8 +++++---
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 328a7ff1..3f8a61b0 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -565,8 +565,8 @@ def construct_framework(self):
                         "psi_sigma2": np.transpose(self.sensitivity_elements["psi_sigma2"], (0, 2, 1)),
                         "psi_nu2": np.transpose(self.sensitivity_elements["psi_nu2"], (0, 2, 1)),
                         "riesz_rep": np.transpose(self.sensitivity_elements["riesz_rep"], (0, 2, 1)),
-                        "bias": np.transpose(self.sensitivity_elements["bias"], (0, 2, 1)),
-                        "psi_bias": np.transpose(self.sensitivity_elements["psi_bias"], (0, 2, 1)),
+                        "max_bias": np.transpose(self.sensitivity_elements["max_bias"], (0, 2, 1)),
+                        "psi_max_bias": np.transpose(self.sensitivity_elements["psi_max_bias"], (0, 2, 1)),
                     }
                 }
             )
@@ -1048,7 +1048,7 @@ def _fit_sensitivity_elements(self, nuisance_predictions):
             else:
                 # compute sensitivity analysis elements
                 element_dict = self._sensitivity_element_est(nuisance_predictions)
-                element_dict["bias"], element_dict["psi_bias"] = self._compute_sensitivity_bias(**element_dict)
+                element_dict["max_bias"], element_dict["psi_max_bias"] = self._compute_sensitivity_bias(**element_dict)
                 self._set_sensitivity_elements(element_dict, self._i_rep, self._i_treat)
 
     def _initialize_arrays(self):
@@ -1413,7 +1413,7 @@ def _sensitivity_element_est(self, preds):
 
     @property
     def _sensitivity_element_names(self):
-        return ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "bias", "psi_bias"]
+        return ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "max_bias", "psi_max_bias"]
 
     # the dimensions will usually be (n_obs, n_rep, n_coefs) to be equal to the score dimensions psi
     def _initialize_sensitivity_elements(self, score_dim):
@@ -1423,22 +1423,25 @@ def _initialize_sensitivity_elements(self, score_dim):
             "psi_sigma2": np.full(score_dim, np.nan),
             "psi_nu2": np.full(score_dim, np.nan),
             "riesz_rep": np.full(score_dim, np.nan),
-            "bias": np.full((1, score_dim[1], score_dim[2]), np.nan),
-            "psi_bias": np.full(score_dim, np.nan)
+            "max_bias": np.full((1, score_dim[1], score_dim[2]), np.nan),
+            "psi_max_bias": np.full(score_dim, np.nan)
         }
         return sensitivity_elements
 
-    def _compute_sensitivity_bias(self, sigma2, nu2, psi_sigma2, psi_nu2, riesz_rep=None):
+    def _compute_sensitivity_bias(self, sigma2, nu2, psi_sigma2, psi_nu2, riesz_rep):
         if nu2 <= 0:
-            warnings.UserWarning("The estimated nu2 is not positive. Re-estimation based on riesz representer.")
-            nu2 = np.mean(np.power(riesz_rep, 2))
+            warnings.UserWarning(
+                "The estimated nu2 is not positive. Re-estimation based on riesz representer (non-orthogonal)."
+            )
+            psi_nu2 = np.power(riesz_rep, 2)
+            nu2 = np.mean(psi_nu2)
 
-        bias = np.sqrt(np.multiply(sigma2, nu2))
-        psi_bias = np.divide(
+        max_bias = np.sqrt(np.multiply(sigma2, nu2))
+        psi_max_bias = np.divide(
             np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)),
-            np.multiply(2.0, bias)
+            np.multiply(2.0, max_bias)
         )
-        return bias, psi_bias
+        return max_bias, psi_max_bias
 
     def _get_sensitivity_elements(self, i_rep, i_treat):
         sensitivity_elements = {key: value[:, i_rep, i_treat] for key, value in self.sensitivity_elements.items()}
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index b501647a..d32257a0 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -476,6 +476,8 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         psi_sigma = self.sensitivity_elements["psi_sigma2"]
         psi_nu = self.sensitivity_elements["psi_nu2"]
         psi_scaled = self._scaled_psi
+        max_bias = self.sensitivity_elements["max_bias"]
+        psi_max_bias = self.sensitivity_elements["psi_max_bias"]
 
         if (np.any(sigma2 < 0)) | (np.any(nu2 < 0)):
             raise ValueError(
@@ -983,7 +985,7 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
         else:
             if not isinstance(doubleml_dict["sensitivity_elements"], dict):
                 raise TypeError("sensitivity_elements must be a dictionary.")
-            expected_keys_sensitivity = ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "bias", "psi_bias"]
+            expected_keys_sensitivity = ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "max_bias", "psi_max_bias"]
             if not all(key in doubleml_dict["sensitivity_elements"].keys() for key in expected_keys_sensitivity):
                 raise ValueError(
                     "The sensitivity_elements dict must contain the following keys: " + ", ".join(expected_keys_sensitivity)
@@ -1001,8 +1003,8 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
                 "psi_sigma2": doubleml_dict["sensitivity_elements"]["psi_sigma2"],
                 "psi_nu2": doubleml_dict["sensitivity_elements"]["psi_nu2"],
                 "riesz_rep": doubleml_dict["sensitivity_elements"]["riesz_rep"],
-                "bias": doubleml_dict["sensitivity_elements"]["bias"],
-                "psi_bias": doubleml_dict["sensitivity_elements"]["psi_bias"],
+                "max_bias": doubleml_dict["sensitivity_elements"]["max_bias"],
+                "psi_max_bias": doubleml_dict["sensitivity_elements"]["psi_max_bias"],
             }
 
         self._sensitivity_implemented = sensitivity_implemented

From badf5431fee537fef1bae8a4fc05f50cab45a819 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 11:33:27 +0100
Subject: [PATCH 06/54] update sensitivity_calculation to bias formula

---
 doubleml/double_ml_framework.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index d32257a0..1becccf3 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -473,8 +473,6 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         # set elements for readability
         sigma2 = self.sensitivity_elements["sigma2"]
         nu2 = self.sensitivity_elements["nu2"]
-        psi_sigma = self.sensitivity_elements["psi_sigma2"]
-        psi_nu = self.sensitivity_elements["psi_nu2"]
         psi_scaled = self._scaled_psi
         max_bias = self.sensitivity_elements["max_bias"]
         psi_max_bias = self.sensitivity_elements["psi_max_bias"]
@@ -488,16 +486,13 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
 
         # elementwise operations
         confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0 - cf_d))))
-        sensitivity_scaling = np.sqrt(np.multiply(sigma2, nu2))
 
-        # sigma2 and nu2 are of shape (1, n_thetas, n_rep), whereas the all_thetas is of shape (n_thetas, n_rep)
-        all_theta_lower = self.all_thetas - np.multiply(np.squeeze(sensitivity_scaling, axis=0), confounding_strength)
-        all_theta_upper = self.all_thetas + np.multiply(np.squeeze(sensitivity_scaling, axis=0), confounding_strength)
+        # max_bias is of shape (1, n_thetas, n_rep), whereas the all_thetas is of shape (n_thetas, n_rep)
+        all_theta_lower = self.all_thetas - np.multiply(np.squeeze(max_bias, axis=0), confounding_strength)
+        all_theta_upper = self.all_thetas + np.multiply(np.squeeze(max_bias, axis=0), confounding_strength)
 
-        psi_variances = np.multiply(sigma2, psi_nu) + np.multiply(nu2, psi_sigma)
-        psi_bias = np.multiply(np.divide(confounding_strength, np.multiply(2.0, sensitivity_scaling)), psi_variances)
-        psi_lower = psi_scaled - psi_bias
-        psi_upper = psi_scaled + psi_bias
+        psi_lower = psi_scaled - psi_max_bias
+        psi_upper = psi_scaled + psi_max_bias
 
         # shape (n_thetas, n_reps); includes scaling with n^{-1/2}
         all_sigma_lower = np.full_like(all_theta_lower, fill_value=np.nan)

From b33b4c0e7600cc68cd38aca54535b176ddaf7590 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 11:48:30 +0100
Subject: [PATCH 07/54] fix warning

---
 doubleml/double_ml.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 3f8a61b0..15e2faf2 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1430,8 +1430,9 @@ def _initialize_sensitivity_elements(self, score_dim):
 
     def _compute_sensitivity_bias(self, sigma2, nu2, psi_sigma2, psi_nu2, riesz_rep):
         if nu2 <= 0:
-            warnings.UserWarning(
-                "The estimated nu2 is not positive. Re-estimation based on riesz representer (non-orthogonal)."
+            warnings.warn(
+                "The estimated nu2 is not positive. Re-estimation based on riesz representer (non-orthogonal).",
+                UserWarning,
             )
             psi_nu2 = np.power(riesz_rep, 2)
             nu2 = np.mean(psi_nu2)

From 635e6b42fdc61669c539eae605abed7861cb2e33 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 11:48:44 +0100
Subject: [PATCH 08/54] fix bias score scaling

---
 doubleml/double_ml_framework.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 1becccf3..24f15282 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -488,11 +488,11 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0 - cf_d))))
 
         # max_bias is of shape (1, n_thetas, n_rep), whereas the all_thetas is of shape (n_thetas, n_rep)
-        all_theta_lower = self.all_thetas - np.multiply(np.squeeze(max_bias, axis=0), confounding_strength)
-        all_theta_upper = self.all_thetas + np.multiply(np.squeeze(max_bias, axis=0), confounding_strength)
+        all_theta_lower = self.all_thetas - np.multiply(confounding_strength, np.squeeze(max_bias, axis=0))
+        all_theta_upper = self.all_thetas + np.multiply(confounding_strength, np.squeeze(max_bias, axis=0))
 
-        psi_lower = psi_scaled - psi_max_bias
-        psi_upper = psi_scaled + psi_max_bias
+        psi_lower = psi_scaled - np.multiply(confounding_strength, psi_max_bias)
+        psi_upper = psi_scaled + np.multiply(confounding_strength, psi_max_bias)
 
         # shape (n_thetas, n_reps); includes scaling with n^{-1/2}
         all_sigma_lower = np.full_like(all_theta_lower, fill_value=np.nan)

From 9e4e6e55d73928fa469f767840a0df80472cb274 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 13:20:05 +0100
Subject: [PATCH 09/54] update framework sensitivity calculations

---
 doubleml/double_ml_framework.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 24f15282..f157412f 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -322,7 +322,6 @@ def __add__(self, other):
                 "cluster_dict": self._cluster_dict,
             }
 
-            # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
             if self._sensitivity_implemented and other._sensitivity_implemented:
                 nu2_score_element = (
                     self._sensitivity_elements["psi_nu2"]
@@ -334,12 +333,16 @@ def __add__(self, other):
                 nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
+                max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"]
+                psi_max_bias = self._sensitivity_elements["psi_max_bias"] + other._sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
                     "sigma2": self._sensitivity_elements["sigma2"],
                     "nu2": nu2,
                     "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
                     "psi_nu2": psi_nu2,
                     "riesz_rep": self._sensitivity_elements["riesz_rep"] + other._sensitivity_elements["riesz_rep"],
+                    "max_bias": max_bias,
+                    "psi_max_bias": psi_max_bias,
                 }
                 doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
@@ -394,12 +397,16 @@ def __sub__(self, other):
                 nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
+                max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"]
+                psi_max_bias = self._sensitivity_elements["psi_max_bias"] - other._sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
                     "sigma2": self._sensitivity_elements["sigma2"],
                     "nu2": nu2,
                     "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
                     "psi_nu2": psi_nu2,
                     "riesz_rep": self._sensitivity_elements["riesz_rep"] - other._sensitivity_elements["riesz_rep"],
+                    "max_bias": max_bias,
+                    "psi_max_bias": psi_max_bias,
                 }
                 doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
@@ -440,12 +447,16 @@ def __mul__(self, other):
                 nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
                 psi_nu2 = nu2_score_element - nu2
 
+                max_bias = abs(other) * self._sensitivity_elements["max_bias"]
+                psi_max_bias = abs(other) * self._sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
                     "sigma2": self._sensitivity_elements["sigma2"],
                     "nu2": nu2,
                     "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
                     "psi_nu2": psi_nu2,
                     "riesz_rep": np.multiply(other, self._sensitivity_elements["riesz_rep"]),
+                    "max_bias": max_bias,
+                    "psi_max_bias": psi_max_bias,
                 }
                 doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
@@ -1119,7 +1130,7 @@ def concat(objs):
 
     if all(obj._sensitivity_implemented for obj in objs):
         sensitivity_elements = {}
-        for key in ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep"]:
+        for key in ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "max_bias", "psi_max_bias"]:
             assert all(key in obj._sensitivity_elements.keys() for obj in objs)
             sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
 

From 287355fd75d69ecd51aff8ec066485ca337ad2a0 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 13:50:35 +0100
Subject: [PATCH 10/54] fix tests

---
 doubleml/tests/test_exceptions.py           | 10 +++++++++-
 doubleml/tests/test_framework_exceptions.py |  4 ++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 26c80bb3..b4be4df5 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -1229,7 +1229,15 @@ def test_doubleml_sensitivity_inputs():
         _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
 
     # test variances
-    sensitivity_elements = dict({"sigma2": 1.0, "nu2": -2.4, "psi_sigma2": 1.0, "psi_nu2": 1.0, "riesz_rep": 1.0})
+    sensitivity_elements = {
+        "sigma2": 1.0,
+        "nu2": -2.4,
+        "psi_sigma2": 1.0,
+        "psi_nu2": 1.0,
+        "riesz_rep": 1.0,
+        "max_bias": 1.0,
+        "psi_max_bias": 1.0,
+    }
     _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
     msg = (
         "sensitivity_elements sigma2 and nu2 have to be positive. "
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index b7ee2b8d..c25f0d24 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -22,6 +22,8 @@
     "psi_sigma2": np.ones(shape=(n_obs, n_thetas, n_rep)),
     "psi_nu2": np.ones(shape=(n_obs, n_thetas, n_rep)),
     "riesz_rep": np.ones(shape=(n_obs, n_thetas, n_rep)),
+    "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
+    "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
 }
 
 # combine objects and estimate parameters
@@ -394,6 +396,8 @@ def test_sensitivity_exceptions():
         "psi_sigma2": np.ones(shape=(n_obs, n_thetas, n_rep)),
         "psi_nu2": np.ones(shape=(n_obs, n_thetas, n_rep)),
         "riesz_rep": np.ones(shape=(n_obs, n_thetas, n_rep)),
+        "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
+        "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
     }
     dml_framework_sensitivity = DoubleMLFramework(sensitivity_dict)
 

From 7179a56fff3caf123194925852fce5101769f984 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 14:05:14 +0100
Subject: [PATCH 11/54] remove max_bias and psi_max_bias from doubleml class

---
 doubleml/double_ml.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 15e2faf2..78bf23d4 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -556,6 +556,8 @@ def construct_framework(self):
         }
 
         if self._sensitivity_implemented:
+
+            max_bias, psi_max_bias = self._compute_sensitivity_bias()
             # reshape sensitivity elements to (n_obs, n_coefs, n_rep)
             doubleml_dict.update(
                 {
@@ -565,8 +567,8 @@ def construct_framework(self):
                         "psi_sigma2": np.transpose(self.sensitivity_elements["psi_sigma2"], (0, 2, 1)),
                         "psi_nu2": np.transpose(self.sensitivity_elements["psi_nu2"], (0, 2, 1)),
                         "riesz_rep": np.transpose(self.sensitivity_elements["riesz_rep"], (0, 2, 1)),
-                        "max_bias": np.transpose(self.sensitivity_elements["max_bias"], (0, 2, 1)),
-                        "psi_max_bias": np.transpose(self.sensitivity_elements["psi_max_bias"], (0, 2, 1)),
+                        "max_bias": np.transpose(max_bias, (0, 2, 1)),
+                        "psi_max_bias": np.transpose(psi_max_bias, (0, 2, 1)),
                     }
                 }
             )
@@ -1048,7 +1050,6 @@ def _fit_sensitivity_elements(self, nuisance_predictions):
             else:
                 # compute sensitivity analysis elements
                 element_dict = self._sensitivity_element_est(nuisance_predictions)
-                element_dict["max_bias"], element_dict["psi_max_bias"] = self._compute_sensitivity_bias(**element_dict)
                 self._set_sensitivity_elements(element_dict, self._i_rep, self._i_treat)
 
     def _initialize_arrays(self):
@@ -1413,7 +1414,7 @@ def _sensitivity_element_est(self, preds):
 
     @property
     def _sensitivity_element_names(self):
-        return ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "max_bias", "psi_max_bias"]
+        return ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep"]
 
     # the dimensions will usually be (n_obs, n_rep, n_coefs) to be equal to the score dimensions psi
     def _initialize_sensitivity_elements(self, score_dim):
@@ -1423,19 +1424,30 @@ def _initialize_sensitivity_elements(self, score_dim):
             "psi_sigma2": np.full(score_dim, np.nan),
             "psi_nu2": np.full(score_dim, np.nan),
             "riesz_rep": np.full(score_dim, np.nan),
-            "max_bias": np.full((1, score_dim[1], score_dim[2]), np.nan),
-            "psi_max_bias": np.full(score_dim, np.nan)
         }
         return sensitivity_elements
 
-    def _compute_sensitivity_bias(self, sigma2, nu2, psi_sigma2, psi_nu2, riesz_rep):
-        if nu2 <= 0:
+    def _compute_sensitivity_bias(self):
+        sigma2 = self.sensitivity_elements["sigma2"]
+        nu2 = self.sensitivity_elements["nu2"]
+        psi_sigma2 = self.sensitivity_elements["psi_sigma2"]
+        psi_nu2 = self.sensitivity_elements["psi_nu2"]
+        riesz_rep = self.sensitivity_elements["riesz_rep"]
+
+        if np.any(nu2 <= 0):
             warnings.warn(
                 "The estimated nu2 is not positive. Re-estimation based on riesz representer (non-orthogonal).",
                 UserWarning,
             )
             psi_nu2 = np.power(riesz_rep, 2)
-            nu2 = np.mean(psi_nu2)
+            nu2 = np.mean(psi_nu2, axis=0, keepdims=True)
+
+        if (np.any(sigma2 < 0)) | (np.any(nu2 < 0)):
+            raise ValueError(
+                "sensitivity_elements sigma2 and nu2 have to be positive. "
+                f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
+                "Most likely this is due to low quality learners (especially propensity scores)."
+            )
 
         max_bias = np.sqrt(np.multiply(sigma2, nu2))
         psi_max_bias = np.divide(

From 33cb70562e6dceb3071380b7cb32b1b91ceb2c37 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Feb 2025 15:40:09 +0100
Subject: [PATCH 12/54] fix test

---
 doubleml/tests/test_exceptions.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index b4be4df5..2898ef64 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -1235,8 +1235,6 @@ def test_doubleml_sensitivity_inputs():
         "psi_sigma2": 1.0,
         "psi_nu2": 1.0,
         "riesz_rep": 1.0,
-        "max_bias": 1.0,
-        "psi_max_bias": 1.0,
     }
     _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
     msg = (

From b3ec350c98373ae6efd76e24449ab542fac54837 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 30 Jan 2025 17:02:15 +0100
Subject: [PATCH 13/54] remove nu2, sigma2 and scores from framework

---
 doubleml/double_ml.py           |  5 -----
 doubleml/double_ml_framework.py | 25 ++++++-------------------
 2 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 78bf23d4..d29b4525 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -562,11 +562,6 @@ def construct_framework(self):
             doubleml_dict.update(
                 {
                     "sensitivity_elements": {
-                        "sigma2": np.transpose(self.sensitivity_elements["sigma2"], (0, 2, 1)),
-                        "nu2": np.transpose(self.sensitivity_elements["nu2"], (0, 2, 1)),
-                        "psi_sigma2": np.transpose(self.sensitivity_elements["psi_sigma2"], (0, 2, 1)),
-                        "psi_nu2": np.transpose(self.sensitivity_elements["psi_nu2"], (0, 2, 1)),
-                        "riesz_rep": np.transpose(self.sensitivity_elements["riesz_rep"], (0, 2, 1)),
                         "max_bias": np.transpose(max_bias, (0, 2, 1)),
                         "psi_max_bias": np.transpose(psi_max_bias, (0, 2, 1)),
                     }
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index f157412f..3632b410 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -196,8 +196,8 @@ def boot_t_stat(self):
     def sensitivity_elements(self):
         """
         Values of the sensitivity components.
-        If available (e.g., PLR, IRM) a dictionary with entries ``sigma2``, ``nu2``, ``psi_sigma2``, ``psi_nu2``
-        and ``riesz_rep``.
+        If available (e.g., PLR, IRM) a dictionary with entries ``max_bias`` (shape (``1``, ``n_thetas``, ``n_rep``)) and
+         ``psi_max_bias`` (shape (``n_obs``, ``n_thetas``, ``n_rep``)).
         """
         return self._sensitivity_elements
 
@@ -482,19 +482,10 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
         _check_in_zero_one(level, "The confidence level", include_zero=False, include_one=False)
 
         # set elements for readability
-        sigma2 = self.sensitivity_elements["sigma2"]
-        nu2 = self.sensitivity_elements["nu2"]
         psi_scaled = self._scaled_psi
         max_bias = self.sensitivity_elements["max_bias"]
         psi_max_bias = self.sensitivity_elements["psi_max_bias"]
 
-        if (np.any(sigma2 < 0)) | (np.any(nu2 < 0)):
-            raise ValueError(
-                "sensitivity_elements sigma2 and nu2 have to be positive. "
-                f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
-                "Most likely this is due to low quality learners (especially propensity scores)."
-            )
-
         # elementwise operations
         confounding_strength = np.multiply(np.abs(rho), np.sqrt(np.multiply(cf_y, np.divide(cf_d, 1.0 - cf_d))))
 
@@ -991,7 +982,8 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
         else:
             if not isinstance(doubleml_dict["sensitivity_elements"], dict):
                 raise TypeError("sensitivity_elements must be a dictionary.")
-            expected_keys_sensitivity = ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "max_bias", "psi_max_bias"]
+
+            expected_keys_sensitivity = ["max_bias", "psi_max_bias"]
             if not all(key in doubleml_dict["sensitivity_elements"].keys() for key in expected_keys_sensitivity):
                 raise ValueError(
                     "The sensitivity_elements dict must contain the following keys: " + ", ".join(expected_keys_sensitivity)
@@ -1003,14 +995,9 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
 
             # set sensitivity elements
             sensitivity_implemented = True
+
             sensitivity_elements = {
-                "sigma2": doubleml_dict["sensitivity_elements"]["sigma2"],
-                "nu2": doubleml_dict["sensitivity_elements"]["nu2"],
-                "psi_sigma2": doubleml_dict["sensitivity_elements"]["psi_sigma2"],
-                "psi_nu2": doubleml_dict["sensitivity_elements"]["psi_nu2"],
-                "riesz_rep": doubleml_dict["sensitivity_elements"]["riesz_rep"],
-                "max_bias": doubleml_dict["sensitivity_elements"]["max_bias"],
-                "psi_max_bias": doubleml_dict["sensitivity_elements"]["psi_max_bias"],
+                key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_sensitivity
             }
 
         self._sensitivity_implemented = sensitivity_implemented

From ea1e2da0c485cd54a5858c084395240483fb2a85 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 30 Jan 2025 17:04:56 +0100
Subject: [PATCH 14/54] remove nu2, sigma2 and scores from input checks

---
 doubleml/double_ml_framework.py | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 3632b410..59403627 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -1029,30 +1029,14 @@ def _check_framework_shapes(self):
             )
 
         if self._sensitivity_implemented:
-            if self._sensitivity_elements["sigma2"].shape != (1, self._n_thetas, self.n_rep):
+            if self._sensitivity_elements["max_bias"].shape != (1, self._n_thetas, self.n_rep):
                 raise ValueError(
-                    f"The shape of sigma2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep})."
+                    f"The shape of max_bias does not match the expected shape (1, {self._n_thetas}, {self._n_rep})."
                 )
-            if self._sensitivity_elements["nu2"].shape != (1, self._n_thetas, self.n_rep):
-                raise ValueError(f"The shape of nu2 does not match the expected shape (1, {self._n_thetas}, {self._n_rep}).")
-            if self._sensitivity_elements["psi_sigma2"].shape != score_dim:
+            if self._sensitivity_elements["psi_max_bias"].shape != score_dim:
                 raise ValueError(
                     (
-                        "The shape of psi_sigma2 does not match the expected "
-                        f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
-                    )
-                )
-            if self._sensitivity_elements["psi_nu2"].shape != score_dim:
-                raise ValueError(
-                    (
-                        "The shape of psi_nu2 does not match the expected "
-                        f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
-                    )
-                )
-            if self._sensitivity_elements["riesz_rep"].shape != score_dim:
-                raise ValueError(
-                    (
-                        "The shape of riesz_rep does not match the expected "
+                        "The shape of psi_max_bias does not match the expected "
                         f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
                     )
                 )

From c413e0f0cb0b56ba69128dac9ba3801668f70560 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 4 Feb 2025 16:38:45 +0100
Subject: [PATCH 15/54] fix sensitivity input test

---
 doubleml/tests/test_framework_exceptions.py | 28 ++++-----------------
 1 file changed, 5 insertions(+), 23 deletions(-)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index c25f0d24..29745785 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -83,40 +83,22 @@ def test_input_exceptions():
         test_dict["sensitivity_elements"] = 1
         DoubleMLFramework(test_dict)
 
-    msg = "The sensitivity_elements dict must contain the following keys: sigma2, nu2, psi_sigma2, psi_nu2"
+    msg = "The sensitivity_elements dict must contain the following keys: max_bias, psi_max_bias"
     with pytest.raises(ValueError, match=msg):
         test_dict = doubleml_dict.copy()
         test_dict["sensitivity_elements"] = {"sensitivities": np.ones(shape=(n_obs, n_thetas, n_rep))}
         DoubleMLFramework(test_dict)
 
-    msg = r"The shape of sigma2 does not match the expected shape \(1, 2, 5\)\."
+    msg = r"The shape of max_bias does not match the expected shape \(1, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["sigma2"] = np.ones(shape=(n_obs, n_rep))
+        test_dict["sensitivity_elements"]["max_bias"] = np.ones(shape=(n_obs, n_rep))
         DoubleMLFramework(test_dict)
 
-    msg = r"The shape of nu2 does not match the expected shape \(1, 2, 5\)\."
+    msg = r"The shape of psi_max_bias does not match the expected shape \(10, 2, 5\)\."
     with pytest.raises(ValueError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["nu2"] = np.ones(shape=(n_obs, n_rep))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of psi_sigma2 does not match the expected shape \(10, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["psi_sigma2"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of psi_nu2 does not match the expected shape \(10, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["psi_nu2"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
-        DoubleMLFramework(test_dict)
-
-    msg = r"The shape of riesz_rep does not match the expected shape \(10, 2, 5\)\."
-    with pytest.raises(ValueError, match=msg):
-        test_dict = copy.deepcopy(doubleml_dict)
-        test_dict["sensitivity_elements"]["riesz_rep"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
+        test_dict["sensitivity_elements"]["psi_max_bias"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
     msg = "is_cluster_data has to be boolean. 1.0 of type <class 'float'> was passed."

From 7ed5e8c55e51934fe86bb9d0f9a0e20c4ef15823 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 4 Feb 2025 16:40:29 +0100
Subject: [PATCH 16/54] remove sigma2, nu2 and scores from the input

---
 doubleml/tests/test_framework_exceptions.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index 29745785..f6803391 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -17,11 +17,6 @@
 doubleml_dict = generate_dml_dict(psi_a, psi_b)
 # add sensitivity elements
 doubleml_dict["sensitivity_elements"] = {
-    "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
-    "nu2": np.ones(shape=(1, n_thetas, n_rep)),
-    "psi_sigma2": np.ones(shape=(n_obs, n_thetas, n_rep)),
-    "psi_nu2": np.ones(shape=(n_obs, n_thetas, n_rep)),
-    "riesz_rep": np.ones(shape=(n_obs, n_thetas, n_rep)),
     "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
     "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
 }
@@ -373,11 +368,6 @@ def test_sensitivity_exceptions():
 
     sensitivity_dict = generate_dml_dict(psi_a, psi_b)
     sensitivity_dict["sensitivity_elements"] = {
-        "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
-        "nu2": -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
-        "psi_sigma2": np.ones(shape=(n_obs, n_thetas, n_rep)),
-        "psi_nu2": np.ones(shape=(n_obs, n_thetas, n_rep)),
-        "riesz_rep": np.ones(shape=(n_obs, n_thetas, n_rep)),
         "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
         "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
     }

From a614728655908be6fa4636f5b2f64c676298e473 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 4 Feb 2025 16:43:18 +0100
Subject: [PATCH 17/54] remove nu2, sigma2 and scores from the  framework
 operations

---
 doubleml/double_ml_framework.py | 39 +--------------------------------
 1 file changed, 1 insertion(+), 38 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 59403627..c6a2d3b6 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -323,24 +323,9 @@ def __add__(self, other):
             }
 
             if self._sensitivity_implemented and other._sensitivity_implemented:
-                nu2_score_element = (
-                    self._sensitivity_elements["psi_nu2"]
-                    + other._sensitivity_elements["psi_nu2"]
-                    - np.multiply(
-                        2.0, np.multiply(self._sensitivity_elements["riesz_rep"], self._sensitivity_elements["riesz_rep"])
-                    )
-                )
-                nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
-                psi_nu2 = nu2_score_element - nu2
-
                 max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"]
                 psi_max_bias = self._sensitivity_elements["psi_max_bias"] + other._sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
-                    "sigma2": self._sensitivity_elements["sigma2"],
-                    "nu2": nu2,
-                    "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
-                    "psi_nu2": psi_nu2,
-                    "riesz_rep": self._sensitivity_elements["riesz_rep"] + other._sensitivity_elements["riesz_rep"],
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,
                 }
@@ -387,24 +372,10 @@ def __sub__(self, other):
 
             # sensitivity combination only available for same outcome and cond. expectation (e.g. IRM)
             if self._sensitivity_implemented and other._sensitivity_implemented:
-                nu2_score_element = (
-                    self._sensitivity_elements["psi_nu2"]
-                    - other._sensitivity_elements["psi_nu2"]
-                    + np.multiply(
-                        2.0, np.multiply(self._sensitivity_elements["riesz_rep"], self._sensitivity_elements["riesz_rep"])
-                    )
-                )
-                nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
-                psi_nu2 = nu2_score_element - nu2
 
                 max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"]
                 psi_max_bias = self._sensitivity_elements["psi_max_bias"] - other._sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
-                    "sigma2": self._sensitivity_elements["sigma2"],
-                    "nu2": nu2,
-                    "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
-                    "psi_nu2": psi_nu2,
-                    "riesz_rep": self._sensitivity_elements["riesz_rep"] - other._sensitivity_elements["riesz_rep"],
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,
                 }
@@ -443,18 +414,10 @@ def __mul__(self, other):
 
             # sensitivity combination only available for linear models
             if self._sensitivity_implemented:
-                nu2_score_element = np.multiply(np.square(other), self._sensitivity_elements["psi_nu2"])
-                nu2 = np.mean(nu2_score_element, axis=0, keepdims=True)
-                psi_nu2 = nu2_score_element - nu2
 
                 max_bias = abs(other) * self._sensitivity_elements["max_bias"]
                 psi_max_bias = abs(other) * self._sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
-                    "sigma2": self._sensitivity_elements["sigma2"],
-                    "nu2": nu2,
-                    "psi_sigma2": self._sensitivity_elements["psi_sigma2"],
-                    "psi_nu2": psi_nu2,
-                    "riesz_rep": np.multiply(other, self._sensitivity_elements["riesz_rep"]),
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,
                 }
@@ -1101,7 +1064,7 @@ def concat(objs):
 
     if all(obj._sensitivity_implemented for obj in objs):
         sensitivity_elements = {}
-        for key in ["sigma2", "nu2", "psi_sigma2", "psi_nu2", "riesz_rep", "max_bias", "psi_max_bias"]:
+        for key in ["max_bias", "psi_max_bias"]:
             assert all(key in obj._sensitivity_elements.keys() for obj in objs)
             sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
 

From 1bcd451484a6875b28bd43c5acba0c6c6774806c Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 4 Feb 2025 16:49:48 +0100
Subject: [PATCH 18/54] fix sensitivity shape tests

---
 doubleml/tests/test_framework_sensitivity.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py
index 209b72c1..eec80264 100644
--- a/doubleml/tests/test_framework_sensitivity.py
+++ b/doubleml/tests/test_framework_sensitivity.py
@@ -64,8 +64,8 @@ def test_dml_framework_sensitivity_shapes(dml_framework_sensitivity_fixture):
         "dml_framework_obj_sub_obj",
         "dml_framework_obj_mul_obj",
     ]
-    var_keys = ["sigma2", "nu2"]
-    score_keys = ["psi_sigma2", "psi_nu2", "riesz_rep"]
+    var_keys = ["max_bias"]
+    score_keys = ["psi_max_bias"]
 
     for obj in object_list:
         assert dml_framework_sensitivity_fixture[obj]._sensitivity_implemented

From f72fc698409af59fdedd48119698715ab60940bd Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 4 Feb 2025 17:18:40 +0100
Subject: [PATCH 19/54] update gain statistics

---
 doubleml/double_ml.py                     | 19 +++----------------
 doubleml/double_ml_framework.py           |  4 +---
 doubleml/irm/tests/test_apo_exceptions.py |  8 +++++---
 doubleml/tests/test_exceptions.py         |  8 +++++---
 doubleml/utils/_sensitivity.py            | 15 +++++++++++++++
 doubleml/utils/gain_statistics.py         | 20 +++++++++++++++++---
 6 files changed, 46 insertions(+), 28 deletions(-)
 create mode 100644 doubleml/utils/_sensitivity.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index d29b4525..ce739fbf 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -11,6 +11,7 @@
 from .double_ml_framework import DoubleMLFramework
 from .utils._checks import _check_external_predictions, _check_sample_splitting
 from .utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est
+from .utils._sensitivity import _validate_nu2
 from .utils.gain_statistics import gain_statistics
 from .utils.resampling import DoubleMLClusterResampling, DoubleMLResampling
 
@@ -1429,25 +1430,11 @@ def _compute_sensitivity_bias(self):
         psi_nu2 = self.sensitivity_elements["psi_nu2"]
         riesz_rep = self.sensitivity_elements["riesz_rep"]
 
-        if np.any(nu2 <= 0):
-            warnings.warn(
-                "The estimated nu2 is not positive. Re-estimation based on riesz representer (non-orthogonal).",
-                UserWarning,
-            )
-            psi_nu2 = np.power(riesz_rep, 2)
-            nu2 = np.mean(psi_nu2, axis=0, keepdims=True)
-
-        if (np.any(sigma2 < 0)) | (np.any(nu2 < 0)):
-            raise ValueError(
-                "sensitivity_elements sigma2 and nu2 have to be positive. "
-                f"Got sigma2 {str(sigma2)} and nu2 {str(nu2)}. "
-                "Most likely this is due to low quality learners (especially propensity scores)."
-            )
+        nu2, psi_nu2 = _validate_nu2(nu2=nu2, psi_nu2=psi_nu2, riesz_rep=riesz_rep)
 
         max_bias = np.sqrt(np.multiply(sigma2, nu2))
         psi_max_bias = np.divide(
-            np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)),
-            np.multiply(2.0, max_bias)
+            np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)), np.multiply(2.0, max_bias)
         )
         return max_bias, psi_max_bias
 
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index c6a2d3b6..0f51abc3 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -959,9 +959,7 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
             # set sensitivity elements
             sensitivity_implemented = True
 
-            sensitivity_elements = {
-                key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_sensitivity
-            }
+            sensitivity_elements = {key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_sensitivity}
 
         self._sensitivity_implemented = sensitivity_implemented
         self._sensitivity_elements = sensitivity_elements
diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py
index 7692177e..cfb6e93b 100644
--- a/doubleml/irm/tests/test_apo_exceptions.py
+++ b/doubleml/irm/tests/test_apo_exceptions.py
@@ -146,9 +146,11 @@ def test_apo_exception_weights():
             ml_m,
             treatment_level=0,
             weights={
-                "weights": -1
-                * np.ones(
-                    n,
+                "weights": (
+                    -1
+                    * np.ones(
+                        n,
+                    )
                 ),
                 "weights_bar": np.ones((n, 1)),
             },
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 2898ef64..cddaf223 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -543,9 +543,11 @@ def test_doubleml_exception_weights():
             Lasso(),
             LogisticRegression(),
             weights={
-                "weights": -1
-                * np.ones(
-                    n,
+                "weights": (
+                    -1
+                    * np.ones(
+                        n,
+                    )
                 ),
                 "weights_bar": np.ones((n, 1)),
             },
diff --git a/doubleml/utils/_sensitivity.py b/doubleml/utils/_sensitivity.py
new file mode 100644
index 00000000..25a54415
--- /dev/null
+++ b/doubleml/utils/_sensitivity.py
@@ -0,0 +1,15 @@
+import warnings
+
+import numpy as np
+
+
+def _validate_nu2(nu2, psi_nu2, riesz_rep):
+    if np.any(nu2 <= 0):
+        warnings.warn(
+            "The estimated nu2 is not positive. Re-estimation based on riesz representer (non-orthogonal).",
+            UserWarning,
+        )
+        nu2 = np.mean(np.power(riesz_rep, 2), axis=0, keepdims=True)
+        psi_nu2 = np.power(riesz_rep, 2)
+
+    return nu2, psi_nu2
diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
index 482d45cc..cc1b1598 100644
--- a/doubleml/utils/gain_statistics.py
+++ b/doubleml/utils/gain_statistics.py
@@ -1,5 +1,7 @@
 import numpy as np
 
+from ._sensitivity import _validate_nu2
+
 
 def gain_statistics(dml_long, dml_short):
     """
@@ -20,9 +22,21 @@ def gain_statistics(dml_long, dml_short):
         Benchmarking dictionary (dict) with values for ``cf_d``, ``cf_y``, ``rho``, and ``delta_theta``.
     """
 
-    # set input for readability
-    sensitivity_elements_long = dml_long.framework.sensitivity_elements
-    sensitivity_elements_short = dml_short.framework.sensitivity_elements
+    # set input for readability, transpose to match expected shape of framework
+
+    sensitivity_elements_long = {key: np.transpose(value, (0, 2, 1)) for key, value in dml_long.sensitivity_elements.items()}
+    sensitivity_elements_long["nu2"], sensitivity_elements_long["psi_nu2"] = _validate_nu2(
+        nu2=sensitivity_elements_long["nu2"],
+        psi_nu2=sensitivity_elements_long["psi_nu2"],
+        riesz_rep=sensitivity_elements_long["riesz_rep"],
+    )
+
+    sensitivity_elements_short = {key: np.transpose(value, (0, 2, 1)) for key, value in dml_short.sensitivity_elements.items()}
+    sensitivity_elements_short["nu2"], sensitivity_elements_short["psi_nu2"] = _validate_nu2(
+        nu2=sensitivity_elements_short["nu2"],
+        psi_nu2=sensitivity_elements_short["psi_nu2"],
+        riesz_rep=sensitivity_elements_short["riesz_rep"],
+    )
 
     if not isinstance(sensitivity_elements_long, dict):
         raise TypeError(

From 382ad701e6e2fb0e7962ca2124d482f4d8fb6cfc Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 08:25:32 +0100
Subject: [PATCH 20/54] add benchmark variables to framework

---
 doubleml/double_ml.py             |   2 +
 doubleml/double_ml_framework.py   | 113 +++++++++++++++++++++---------
 doubleml/utils/gain_statistics.py |  18 +----
 3 files changed, 85 insertions(+), 48 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index ce739fbf..27f39728 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -565,6 +565,8 @@ def construct_framework(self):
                     "sensitivity_elements": {
                         "max_bias": np.transpose(max_bias, (0, 2, 1)),
                         "psi_max_bias": np.transpose(psi_max_bias, (0, 2, 1)),
+                        "sigma2": np.transpose(self.sensitivity_elements["sigma2"], (0, 2, 1)),
+                        "nu2": np.transpose(self.sensitivity_elements["nu2"], (0, 2, 1)),
                     }
                 }
             )
diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index 0f51abc3..c85b2dd5 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -198,6 +198,7 @@ def sensitivity_elements(self):
         Values of the sensitivity components.
         If available (e.g., PLR, IRM) a dictionary with entries ``max_bias`` (shape (``1``, ``n_thetas``, ``n_rep``)) and
          ``psi_max_bias`` (shape (``n_obs``, ``n_thetas``, ``n_rep``)).
+        Optionally, additional entries ``sigma2`` and ``nu2``(shape (``1``, ``n_thetas``, ``n_rep``)) are available.
         """
         return self._sensitivity_elements
 
@@ -421,6 +422,13 @@ def __mul__(self, other):
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,
                 }
+                if self._benchmark_available:
+                    sensitivity_elements.update(
+                        {
+                            "sigma2": self._sensitivity_elements["sigma2"],
+                            "nu2": np.multiply(np.square(other), self._sensitivity_elements["nu2"]),
+                        }
+                    )
                 doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
             new_obj = DoubleMLFramework(doubleml_dict)
@@ -941,6 +949,7 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
         if "sensitivity_elements" not in doubleml_dict.keys():
             sensitivity_implemented = False
             sensitivity_elements = None
+            benchmark_available = False
 
         else:
             if not isinstance(doubleml_dict["sensitivity_elements"], dict):
@@ -958,50 +967,85 @@ def _check_and_set_sensitivity_elements(self, doubleml_dict):
 
             # set sensitivity elements
             sensitivity_implemented = True
-
             sensitivity_elements = {key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_sensitivity}
 
+            # check if benchmarks are available and update sensitivity elements
+            benchmark_available, sensitivity_elements_benchmark = self._check_sensitivity_benchmark(doubleml_dict)
+            sensitivity_elements.update(sensitivity_elements_benchmark)
+
+        # set attributes
         self._sensitivity_implemented = sensitivity_implemented
         self._sensitivity_elements = sensitivity_elements
+        self._benchmark_available = benchmark_available
         self._sensitivity_params = None
 
         return
 
-    def _check_framework_shapes(self):
-        score_dim = (self._n_obs, self._n_thetas, self.n_rep)
-        # check if all sizes match
-        if self._thetas.shape != (self._n_thetas,):
-            raise ValueError(f"The shape of thetas does not match the expected shape ({self._n_thetas},).")
-        if self._ses.shape != (self._n_thetas,):
-            raise ValueError(f"The shape of ses does not match the expected shape ({self._n_thetas},).")
-        if self._all_thetas.shape != (self._n_thetas, self._n_rep):
-            raise ValueError(f"The shape of all_thetas does not match the expected shape ({self._n_thetas}, {self._n_rep}).")
-        if self._all_ses.shape != (self._n_thetas, self._n_rep):
-            raise ValueError(f"The shape of all_ses does not match the expected shape ({self._n_thetas}, {self._n_rep}).")
-        if self._var_scaling_factors.shape != (self._n_thetas,):
-            raise ValueError(f"The shape of var_scaling_factors does not match the expected shape ({self._n_thetas},).")
-        # dimension of scaled_psi is n_obs x n_thetas x n_rep (per default)
-        if self._scaled_psi.shape != score_dim:
-            raise ValueError(
-                (
-                    "The shape of scaled_psi does not match the expected "
-                    f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
-                )
-            )
+    def _check_sensitivity_benchmark(self, doubleml_dict):
+        # check if benchmarks are available
+        expected_keys_benchmark = ["sigma2", "nu2"]
+        benchmark_available = all(key in doubleml_dict["sensitivity_elements"] for key in expected_keys_benchmark)
+        if benchmark_available:
+            # type checks
+            for key in expected_keys_benchmark:
+                if not isinstance(doubleml_dict["sensitivity_elements"][key], np.ndarray):
+                    raise TypeError(f"The sensitivity element {key} must be a numpy array.")
 
-        if self._sensitivity_implemented:
-            if self._sensitivity_elements["max_bias"].shape != (1, self._n_thetas, self.n_rep):
+            # additional constraints
+            if (np.any(doubleml_dict["sensitivity_elements"]["sigma2"] < 0)) | (
+                np.any(doubleml_dict["sensitivity_elements"]["nu2"] < 0)
+            ):
                 raise ValueError(
-                    f"The shape of max_bias does not match the expected shape (1, {self._n_thetas}, {self._n_rep})."
-                )
-            if self._sensitivity_elements["psi_max_bias"].shape != score_dim:
-                raise ValueError(
-                    (
-                        "The shape of psi_max_bias does not match the expected "
-                        f"shape ({self._n_obs}, {self._n_thetas}, {self._n_rep})."
-                    )
+                    "sensitivity_elements sigma2 and nu2 have to be positive. "
+                    f"Got sigma2 {str(doubleml_dict["sensitivity_elements"]['sigma2'])} "
+                    f"and nu2 {str(doubleml_dict["sensitivity_elements"]['nu2'])}. "
+                    "Most likely this is due to low quality learners (especially propensity scores)."
                 )
 
+            sensitivity_elements_benchmark = {
+                key: doubleml_dict["sensitivity_elements"][key] for key in expected_keys_benchmark
+            }
+        else:
+            sensitivity_elements_benchmark = {}
+
+        return benchmark_available, sensitivity_elements_benchmark
+
+    def _check_framework_shapes(self):
+        expected_shapes = {
+            "thetas": (self._n_thetas,),
+            "ses": (self._n_thetas,),
+            "all_thetas": (self._n_thetas, self._n_rep),
+            "all_ses": (self._n_thetas, self._n_rep),
+            "var_scaling_factors": (self._n_thetas,),
+            "scaled_psi": (self._n_obs, self._n_thetas, self.n_rep),
+        }
+
+        for attr, expected_shape in expected_shapes.items():
+            actual_shape = getattr(self, f"_{attr}").shape
+            if actual_shape != expected_shape:
+                raise ValueError(f"The shape of {attr} does not match the expected shape {expected_shape}.")
+
+        if self._sensitivity_implemented:
+            self._check_sensitivity_elements_shapes()
+
+        return None
+
+    def _check_sensitivity_elements_shapes(self):
+        expected_sensitivity_shapes = {
+            "max_bias": (1, self._n_thetas, self.n_rep),
+            "psi_max_bias": (self._n_obs, self._n_thetas, self.n_rep),
+        }
+
+        if self._benchmark_available:
+            expected_sensitivity_shapes.update(
+                {"sigma2": (1, self._n_thetas, self.n_rep), "nu2": (1, self._n_thetas, self.n_rep)}
+            )
+
+        for key, expected_shape in expected_sensitivity_shapes.items():
+            actual_shape = self._sensitivity_elements[key].shape
+            if actual_shape != expected_shape:
+                raise ValueError(f"The shape of {key} does not match the expected shape {expected_shape}.")
+
         return None
 
     def _check_treatment_names(self, treatment_names):
@@ -1066,6 +1110,11 @@ def concat(objs):
             assert all(key in obj._sensitivity_elements.keys() for obj in objs)
             sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
 
+        if all(obj._benchmark_available for obj in objs):
+            for key in ["sigma2", "nu2"]:
+                assert all(key in obj._sensitivity_elements.keys() for obj in objs)
+                sensitivity_elements[key] = np.concatenate([obj._sensitivity_elements[key] for obj in objs], axis=1)
+
         doubleml_dict["sensitivity_elements"] = sensitivity_elements
 
     new_obj = DoubleMLFramework(doubleml_dict)
diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
index cc1b1598..ed586afc 100644
--- a/doubleml/utils/gain_statistics.py
+++ b/doubleml/utils/gain_statistics.py
@@ -1,7 +1,5 @@
 import numpy as np
 
-from ._sensitivity import _validate_nu2
-
 
 def gain_statistics(dml_long, dml_short):
     """
@@ -23,20 +21,8 @@ def gain_statistics(dml_long, dml_short):
     """
 
     # set input for readability, transpose to match expected shape of framework
-
-    sensitivity_elements_long = {key: np.transpose(value, (0, 2, 1)) for key, value in dml_long.sensitivity_elements.items()}
-    sensitivity_elements_long["nu2"], sensitivity_elements_long["psi_nu2"] = _validate_nu2(
-        nu2=sensitivity_elements_long["nu2"],
-        psi_nu2=sensitivity_elements_long["psi_nu2"],
-        riesz_rep=sensitivity_elements_long["riesz_rep"],
-    )
-
-    sensitivity_elements_short = {key: np.transpose(value, (0, 2, 1)) for key, value in dml_short.sensitivity_elements.items()}
-    sensitivity_elements_short["nu2"], sensitivity_elements_short["psi_nu2"] = _validate_nu2(
-        nu2=sensitivity_elements_short["nu2"],
-        psi_nu2=sensitivity_elements_short["psi_nu2"],
-        riesz_rep=sensitivity_elements_short["riesz_rep"],
-    )
+    sensitivity_elements_long = dml_long.framework.sensitivity_elements
+    sensitivity_elements_short = dml_short.framework.sensitivity_elements
 
     if not isinstance(sensitivity_elements_long, dict):
         raise TypeError(

From 3a828c5bc820cafc3bc88a16461c96128ca25a65 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 08:28:40 +0100
Subject: [PATCH 21/54] add shape and input tests for sigma2 and nu2

---
 doubleml/tests/test_framework_exceptions.py | 50 ++++++++++++++++-----
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index f6803391..c33cf1f3 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -19,6 +19,8 @@
 doubleml_dict["sensitivity_elements"] = {
     "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
     "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
+    "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
+    "nu2": np.ones(shape=(1, n_thetas, n_rep)),
 }
 
 # combine objects and estimate parameters
@@ -96,6 +98,18 @@ def test_input_exceptions():
         test_dict["sensitivity_elements"]["psi_max_bias"] = np.ones(shape=(n_obs, n_thetas, n_rep, 3))
         DoubleMLFramework(test_dict)
 
+    msg = r"The shape of sigma2 does not match the expected shape \(1, 2, 5\)\."
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict["sensitivity_elements"]["sigma2"] = np.ones(shape=(n_obs, n_thetas, n_rep))
+        DoubleMLFramework(test_dict)
+
+    msg = r"The shape of nu2 does not match the expected shape \(1, 2, 5\)\."
+    with pytest.raises(ValueError, match=msg):
+        test_dict = copy.deepcopy(doubleml_dict)
+        test_dict["sensitivity_elements"]["nu2"] = np.ones(shape=(n_obs, n_thetas, n_rep))
+        DoubleMLFramework(test_dict)
+
     msg = "is_cluster_data has to be boolean. 1.0 of type <class 'float'> was passed."
     with pytest.raises(TypeError, match=msg):
         test_dict = copy.deepcopy(doubleml_dict)
@@ -366,13 +380,6 @@ def test_sensitivity_exceptions():
     with pytest.raises(TypeError, match=msg):
         _ = dml_framework_obj_1._calc_robustness_value(null_hypothesis=np.array([1]), level=0.95, rho=1.0, idx_treatment=0)
 
-    sensitivity_dict = generate_dml_dict(psi_a, psi_b)
-    sensitivity_dict["sensitivity_elements"] = {
-        "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
-        "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
-    }
-    dml_framework_sensitivity = DoubleMLFramework(sensitivity_dict)
-
     # test idx_treatment
     dml_framework_obj_1.sensitivity_analysis()
     msg = "idx_treatment must be an integer. 0.0 of type <class 'float'> was passed."
@@ -387,7 +394,30 @@ def test_sensitivity_exceptions():
     with pytest.raises(ValueError, match=msg):
         _ = dml_framework_obj_1.sensitivity_plot(idx_treatment=2)
 
-    # test variances
+    # test benchmark sensitivity elements
+    sensitivity_dict_benchmark = generate_dml_dict(psi_a, psi_b)
+    sensitivity_dict_benchmark["sensitivity_elements"] = {
+        "max_bias": np.ones(shape=(1, n_thetas, n_rep)),
+        "psi_max_bias": np.ones(shape=(n_obs, n_thetas, n_rep)),
+        "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
+        "nu2": 5.0,
+    }
+    msg = "The sensitivity element nu2 must be a numpy array."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLFramework(sensitivity_dict_benchmark)
+
+    sensitivity_dict_benchmark["sensitivity_elements"].update({
+        "sigma2": 5.0,
+        "nu2": np.ones(shape=(1, n_thetas, n_rep)),
+    })
+    msg = "The sensitivity element sigma2 must be a numpy array."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLFramework(sensitivity_dict_benchmark)
+
+    sensitivity_dict_benchmark["sensitivity_elements"].update({
+        "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
+        "nu2": -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
+    })
     msg = (
         r"sensitivity_elements sigma2 and nu2 have to be positive\. "
         r"Got sigma2 \[\[\[1\. 1\. 1\. 1\. 1\.\]\n\s+\[1\. 1\. 1\. 1\. 1\.\]\]\] "
@@ -395,9 +425,7 @@ def test_sensitivity_exceptions():
         r"Most likely this is due to low quality learners \(especially propensity scores\)\."
     )
     with pytest.raises(ValueError, match=msg):
-        _ = dml_framework_sensitivity._calc_sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95)
-    with pytest.raises(ValueError, match=msg):
-        dml_framework_sensitivity.sensitivity_analysis(cf_y=0.1, cf_d=0.15, rho=1.0, level=0.95)
+        _ = DoubleMLFramework(sensitivity_dict_benchmark)
 
 
 @pytest.mark.ci

From dbb4045bcd52dcb6c68d21f1734ecfc0a0820ef5 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 09:05:24 +0100
Subject: [PATCH 22/54] update nu2 reestimation

---
 doubleml/double_ml.py             | 28 ++++++++++++++++++++++++----
 doubleml/tests/test_exceptions.py | 26 ++++++++++----------------
 doubleml/utils/_sensitivity.py    | 15 ---------------
 3 files changed, 34 insertions(+), 35 deletions(-)
 delete mode 100644 doubleml/utils/_sensitivity.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 27f39728..dfb77fac 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -11,7 +11,6 @@
 from .double_ml_framework import DoubleMLFramework
 from .utils._checks import _check_external_predictions, _check_sample_splitting
 from .utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est
-from .utils._sensitivity import _validate_nu2
 from .utils.gain_statistics import gain_statistics
 from .utils.resampling import DoubleMLClusterResampling, DoubleMLResampling
 
@@ -526,6 +525,9 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
         # aggregated parameter estimates and standard errors from repeated cross-fitting
         self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se, self._var_scaling_factors)
 
+        # validate sensitivity elements (e.g., re-estimate nu2 if negative)
+        self._validate_sensitivity_elements()
+
         # construct framework for inference
         self._framework = self.construct_framework()
 
@@ -1425,14 +1427,32 @@ def _initialize_sensitivity_elements(self, score_dim):
         }
         return sensitivity_elements
 
+    def _validate_sensitivity_elements(self):
+        for i_treat in range(self._dml_data.n_treat):
+            nu2 = self.sensitivity_elements["nu2"][:, :, i_treat]
+            psi_nu2 = self.sensitivity_elements["psi_nu2"][:, :, i_treat]
+            riesz_rep = self.sensitivity_elements["riesz_rep"][:, :, i_treat]
+
+            if np.any(nu2 <= 0):
+                treatment_name = self._dml_data.d_cols[i_treat]
+                msg = (
+                    f"The estimated nu2 for {treatment_name} is not positive. "
+                    "Re-estimation based on riesz representer (non-orthogonal)."
+                )
+                warnings.warn(msg, UserWarning)
+                nu2 = np.mean(np.power(riesz_rep, 2), axis=0, keepdims=True)
+                psi_nu2 = np.power(riesz_rep, 2)
+
+                self.sensitivity_elements["nu2"][:, :, i_treat] = nu2
+                self.sensitivity_elements["psi_nu2"][:, :, i_treat] = psi_nu2
+
+        return
+
     def _compute_sensitivity_bias(self):
         sigma2 = self.sensitivity_elements["sigma2"]
         nu2 = self.sensitivity_elements["nu2"]
         psi_sigma2 = self.sensitivity_elements["psi_sigma2"]
         psi_nu2 = self.sensitivity_elements["psi_nu2"]
-        riesz_rep = self.sensitivity_elements["riesz_rep"]
-
-        nu2, psi_nu2 = _validate_nu2(nu2=nu2, psi_nu2=psi_nu2, riesz_rep=riesz_rep)
 
         max_bias = np.sqrt(np.multiply(sigma2, nu2))
         psi_max_bias = np.divide(
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index cddaf223..a23dc1d1 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -1230,22 +1230,16 @@ def test_doubleml_sensitivity_inputs():
     with pytest.raises(ValueError):
         _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
 
-    # test variances
-    sensitivity_elements = {
-        "sigma2": 1.0,
-        "nu2": -2.4,
-        "psi_sigma2": 1.0,
-        "psi_nu2": 1.0,
-        "riesz_rep": 1.0,
-    }
-    _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
-    msg = (
-        "sensitivity_elements sigma2 and nu2 have to be positive. "
-        r"Got sigma2 \[\[\[1.\]\]\] and nu2 \[\[\[-2.4\]\]\]. "
-        r"Most likely this is due to low quality learners \(especially propensity scores\)."
-    )
-    with pytest.raises(ValueError, match=msg):
-        dml_irm.sensitivity_analysis()
+
+def test_doubleml_sensitivity_reestimation_warning():
+    dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
+    dml_irm.fit()
+
+    dml_irm.sensitivity_elements["nu2"] = -1.0 * dml_irm.sensitivity_elements["nu2"]
+
+    msg = r"The estimated nu2 for d is not positive. Re-estimation based on riesz representer \(non-orthogonal\)."
+    with pytest.warns(UserWarning, match=msg):
+        dml_irm._validate_sensitivity_elements()
 
 
 def test_doubleml_sensitivity_summary():
diff --git a/doubleml/utils/_sensitivity.py b/doubleml/utils/_sensitivity.py
deleted file mode 100644
index 25a54415..00000000
--- a/doubleml/utils/_sensitivity.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import warnings
-
-import numpy as np
-
-
-def _validate_nu2(nu2, psi_nu2, riesz_rep):
-    if np.any(nu2 <= 0):
-        warnings.warn(
-            "The estimated nu2 is not positive. Re-estimation based on riesz representer (non-orthogonal).",
-            UserWarning,
-        )
-        nu2 = np.mean(np.power(riesz_rep, 2), axis=0, keepdims=True)
-        psi_nu2 = np.power(riesz_rep, 2)
-
-    return nu2, psi_nu2

From b1d1feeb3fa85e28588d4235aa5f20232767ff9d Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 09:10:20 +0100
Subject: [PATCH 23/54] fix sensitivity validation

---
 doubleml/double_ml.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index dfb77fac..b1ef0f57 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1428,23 +1428,24 @@ def _initialize_sensitivity_elements(self, score_dim):
         return sensitivity_elements
 
     def _validate_sensitivity_elements(self):
-        for i_treat in range(self._dml_data.n_treat):
-            nu2 = self.sensitivity_elements["nu2"][:, :, i_treat]
-            psi_nu2 = self.sensitivity_elements["psi_nu2"][:, :, i_treat]
-            riesz_rep = self.sensitivity_elements["riesz_rep"][:, :, i_treat]
-
-            if np.any(nu2 <= 0):
-                treatment_name = self._dml_data.d_cols[i_treat]
-                msg = (
-                    f"The estimated nu2 for {treatment_name} is not positive. "
-                    "Re-estimation based on riesz representer (non-orthogonal)."
-                )
-                warnings.warn(msg, UserWarning)
-                nu2 = np.mean(np.power(riesz_rep, 2), axis=0, keepdims=True)
-                psi_nu2 = np.power(riesz_rep, 2)
+        if self._sensitivity_implemented:
+            for i_treat in range(self._dml_data.n_treat):
+                nu2 = self.sensitivity_elements["nu2"][:, :, i_treat]
+                psi_nu2 = self.sensitivity_elements["psi_nu2"][:, :, i_treat]
+                riesz_rep = self.sensitivity_elements["riesz_rep"][:, :, i_treat]
+
+                if np.any(nu2 <= 0):
+                    treatment_name = self._dml_data.d_cols[i_treat]
+                    msg = (
+                        f"The estimated nu2 for {treatment_name} is not positive. "
+                        "Re-estimation based on riesz representer (non-orthogonal)."
+                    )
+                    warnings.warn(msg, UserWarning)
+                    nu2 = np.mean(np.power(riesz_rep, 2), axis=0, keepdims=True)
+                    psi_nu2 = np.power(riesz_rep, 2)
 
-                self.sensitivity_elements["nu2"][:, :, i_treat] = nu2
-                self.sensitivity_elements["psi_nu2"][:, :, i_treat] = psi_nu2
+                    self.sensitivity_elements["nu2"][:, :, i_treat] = nu2
+                    self.sensitivity_elements["psi_nu2"][:, :, i_treat] = psi_nu2
 
         return
 

From f62c3451e6048e5b99f09e0a95f08ac22d9cbb64 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 09:12:09 +0100
Subject: [PATCH 24/54] add treatment names to construct_framework

---
 doubleml/double_ml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index b1ef0f57..ebf014a4 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -556,6 +556,7 @@ def construct_framework(self):
             "var_scaling_factors": self._var_scaling_factors,
             "scaled_psi": scaled_psi_reshape,
             "is_cluster_data": self._is_cluster_data,
+            "treatment_names": self._dml_data.d_cols,
         }
 
         if self._sensitivity_implemented:

From a35ae96b485747debce3fe8c0181221787657f1b Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 09:31:29 +0100
Subject: [PATCH 25/54] fix warning message

---
 doubleml/double_ml_framework.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index c85b2dd5..b69171f2 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -997,8 +997,8 @@ def _check_sensitivity_benchmark(self, doubleml_dict):
             ):
                 raise ValueError(
                     "sensitivity_elements sigma2 and nu2 have to be positive. "
-                    f"Got sigma2 {str(doubleml_dict["sensitivity_elements"]['sigma2'])} "
-                    f"and nu2 {str(doubleml_dict["sensitivity_elements"]['nu2'])}. "
+                    f"Got sigma2 {str(doubleml_dict['sensitivity_elements']['sigma2'])} "
+                    f"and nu2 {str(doubleml_dict['sensitivity_elements']['nu2'])}. "
                     "Most likely this is due to low quality learners (especially propensity scores)."
                 )
 

From 6f05b1126c49488fa8c4299db90af8e2147179ac Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 09:37:33 +0100
Subject: [PATCH 26/54] remove unnecessary assignment

---
 doubleml/double_ml.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index ebf014a4..441fb15e 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1432,7 +1432,6 @@ def _validate_sensitivity_elements(self):
         if self._sensitivity_implemented:
             for i_treat in range(self._dml_data.n_treat):
                 nu2 = self.sensitivity_elements["nu2"][:, :, i_treat]
-                psi_nu2 = self.sensitivity_elements["psi_nu2"][:, :, i_treat]
                 riesz_rep = self.sensitivity_elements["riesz_rep"][:, :, i_treat]
 
                 if np.any(nu2 <= 0):
@@ -1442,8 +1441,8 @@ def _validate_sensitivity_elements(self):
                         "Re-estimation based on riesz representer (non-orthogonal)."
                     )
                     warnings.warn(msg, UserWarning)
-                    nu2 = np.mean(np.power(riesz_rep, 2), axis=0, keepdims=True)
                     psi_nu2 = np.power(riesz_rep, 2)
+                    nu2 = np.mean(psi_nu2, axis=0, keepdims=True)
 
                     self.sensitivity_elements["nu2"][:, :, i_treat] = nu2
                     self.sensitivity_elements["psi_nu2"][:, :, i_treat] = psi_nu2

From dc298f14fe65065113a63b2fb6d543977e702419 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 11:23:40 +0100
Subject: [PATCH 27/54] formatting

---
 doubleml/tests/test_framework_exceptions.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index c33cf1f3..f562f98d 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -406,18 +406,22 @@ def test_sensitivity_exceptions():
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLFramework(sensitivity_dict_benchmark)
 
-    sensitivity_dict_benchmark["sensitivity_elements"].update({
-        "sigma2": 5.0,
-        "nu2": np.ones(shape=(1, n_thetas, n_rep)),
-    })
+    sensitivity_dict_benchmark["sensitivity_elements"].update(
+        {
+            "sigma2": 5.0,
+            "nu2": np.ones(shape=(1, n_thetas, n_rep)),
+        }
+    )
     msg = "The sensitivity element sigma2 must be a numpy array."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLFramework(sensitivity_dict_benchmark)
 
-    sensitivity_dict_benchmark["sensitivity_elements"].update({
-        "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
-        "nu2": -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
-    })
+    sensitivity_dict_benchmark["sensitivity_elements"].update(
+        {
+            "sigma2": np.ones(shape=(1, n_thetas, n_rep)),
+            "nu2": -1.0 * np.ones(shape=(1, n_thetas, n_rep)),
+        }
+    )
     msg = (
         r"sensitivity_elements sigma2 and nu2 have to be positive\. "
         r"Got sigma2 \[\[\[1\. 1\. 1\. 1\. 1\.\]\n\s+\[1\. 1\. 1\. 1\. 1\.\]\]\] "

From a118bf2baa0d75e6ca357c0f479e10d7706d6287 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 12:46:27 +0100
Subject: [PATCH 28/54] fix difference framework bounds

---
 doubleml/double_ml_framework.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
index b69171f2..60786028 100644
--- a/doubleml/double_ml_framework.py
+++ b/doubleml/double_ml_framework.py
@@ -375,7 +375,7 @@ def __sub__(self, other):
             if self._sensitivity_implemented and other._sensitivity_implemented:
 
                 max_bias = self._sensitivity_elements["max_bias"] + other._sensitivity_elements["max_bias"]
-                psi_max_bias = self._sensitivity_elements["psi_max_bias"] - other._sensitivity_elements["psi_max_bias"]
+                psi_max_bias = self._sensitivity_elements["psi_max_bias"] + other._sensitivity_elements["psi_max_bias"]
                 sensitivity_elements = {
                     "max_bias": max_bias,
                     "psi_max_bias": psi_max_bias,

From 7353db8fecf6888f2dc7921eea25ccd0a5989906 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 13:17:58 +0100
Subject: [PATCH 29/54] add test comparing apo to irm

---
 doubleml/irm/tests/test_irm_vs_apos.py | 230 +++++++++++++++++++++++++
 1 file changed, 230 insertions(+)
 create mode 100644 doubleml/irm/tests/test_irm_vs_apos.py

diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
new file mode 100644
index 00000000..6635c5bb
--- /dev/null
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -0,0 +1,230 @@
+import copy
+
+import numpy as np
+import pytest
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+from doubleml.utils._estimation import _normalize_ipw
+
+
+@pytest.fixture(
+    scope="module",
+    params=[
+        [LinearRegression(), LogisticRegression(solver="lbfgs", max_iter=250)],
+        [
+            RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+            RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42),
+        ],
+    ],
+)
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[False, True])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[0.2, 0.15])
+def trimming_threshold(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold):
+
+    # collect data
+    (x, y, d) = generate_data_irm
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner[0])
+    ml_m = clone(learner[1])
+
+    n_folds = 5
+    kwargs = {
+        "n_folds": n_folds,
+        "n_rep": n_rep,
+        "trimming_threshold": trimming_threshold,
+        "normalize_ipw": normalize_ipw,
+    }
+
+    dml_irm = dml.DoubleMLIRM(
+        obj_dml_data=obj_dml_data,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        score="ATE",
+        **kwargs,
+    )
+    dml_irm.fit()
+
+    m_hat = dml_irm.predictions["ml_m"][:, :, 0]
+    g0_hat = dml_irm.predictions["ml_g0"][:, :, 0]
+    g1_hat = dml_irm.predictions["ml_g1"][:, :, 0]
+
+    external_predictions_apos = {
+        0: {
+            "ml_m": 1.0 - m_hat,
+            "ml_g1": g0_hat,
+            "ml_g0": g1_hat,
+        },
+        1: {
+            "ml_m": m_hat,
+            "ml_g1": g1_hat,
+            "ml_g0": g0_hat,
+        },
+    }
+
+    dml_apos = dml.DoubleMLAPOS(obj_dml_data=obj_dml_data, ml_g=ml_g, ml_m=ml_m, treatment_levels=[0, 1], **kwargs)
+    dml_apos = dml_apos.fit(external_predictions=external_predictions_apos)
+    causal_contrast = dml_apos.causal_contrast(reference_levels=[0])
+
+    result_dict = {"dml_irm": dml_irm, "dml_apos": dml_apos, "causal_contrast": causal_contrast}
+    return result_dict
+
+
+@pytest.mark.ci
+def test_apos_vs_irm_thetas(dml_irm_apos_fixture):
+    assert np.allclose(
+        dml_irm_apos_fixture["dml_irm"].framework.all_thetas,
+        dml_irm_apos_fixture["causal_contrast"].all_thetas,
+        rtol=1e-9,
+        atol=1e-4,
+    )
+
+
+@pytest.mark.ci
+def test_apos_vs_irm_ses(dml_irm_apos_fixture):
+    assert np.allclose(
+        dml_irm_apos_fixture["dml_irm"].framework.all_ses,
+        dml_irm_apos_fixture["causal_contrast"].all_ses,
+        rtol=1e-9,
+        atol=1e-4,
+    )
+
+
+@pytest.fixture(scope="module")
+def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold):
+
+    # collect data
+    (x, y, d) = generate_data_irm
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner[0])
+    ml_m = clone(learner[1])
+
+    n_folds = 5
+    kwargs = {
+        "n_folds": n_folds,
+        "n_rep": n_rep,
+        "trimming_threshold": trimming_threshold,
+        "normalize_ipw": normalize_ipw,
+    }
+
+    dml_irm = dml.DoubleMLIRM(
+        obj_dml_data=obj_dml_data,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        score="ATTE",
+        **kwargs,
+    )
+    dml_irm.fit()
+
+    m_hat = dml_irm.predictions["ml_m"][:, :, 0]
+    g0_hat = dml_irm.predictions["ml_g0"][:, :, 0]
+    g1_hat = dml_irm.predictions["ml_g1"][:, :, 0]
+
+    # define weights
+    p_hat = np.mean(d)
+    m_hat_adjusted = copy.deepcopy(m_hat)
+    if normalize_ipw:
+        for i_rep in range(n_rep):
+            m_hat_adjusted[:, i_rep] = _normalize_ipw(m_hat[:, i_rep], d)
+    weights_dict = {
+        "weights": d / p_hat,
+        "weights_bar": m_hat_adjusted / p_hat,
+    }
+
+    external_predictions_irm = {
+        "d": {
+            "ml_m": m_hat,
+            "ml_g1": g1_hat,
+            "ml_g0": g0_hat,
+        }
+    }
+    dml_irm_weighted = dml.DoubleMLIRM(
+        obj_dml_data=obj_dml_data,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        score="ATE",
+        weights=weights_dict,
+        **kwargs,
+    )
+    dml_irm_weighted.fit(external_predictions=external_predictions_irm)
+
+    external_predictions_apos = {
+        0: {
+            "ml_m": 1.0 - m_hat,
+            "ml_g1": g0_hat,
+            "ml_g0": g1_hat,
+        },
+        1: {
+            "ml_m": m_hat,
+            "ml_g1": g1_hat,
+            "ml_g0": g0_hat,
+        },
+    }
+
+    dml_apos = dml.DoubleMLAPOS(
+        obj_dml_data=obj_dml_data, ml_g=ml_g, ml_m=ml_m, treatment_levels=[0, 1], weights=weights_dict, **kwargs
+    )
+    dml_apos = dml_apos.fit(external_predictions=external_predictions_apos)
+    causal_contrast = dml_apos.causal_contrast(reference_levels=[0])
+
+    result_dict = {
+        "dml_irm": dml_irm,
+        "dml_irm_weighted": dml_irm_weighted,
+        "dml_apos": dml_apos,
+        "causal_contrast": causal_contrast,
+    }
+    return result_dict
+
+
+@pytest.mark.ci
+def test_apos_vs_irm_weighted_thetas(dml_irm_apos_weighted_fixture):
+    assert np.allclose(
+        dml_irm_apos_weighted_fixture["dml_irm"].framework.all_thetas,
+        dml_irm_apos_weighted_fixture["dml_irm_weighted"].framework.all_thetas,
+        rtol=1e-9,
+        atol=1e-4,
+    )
+
+    assert np.allclose(
+        dml_irm_apos_weighted_fixture["dml_irm_weighted"].framework.all_thetas,
+        dml_irm_apos_weighted_fixture["causal_contrast"].all_thetas,
+        rtol=1e-9,
+        atol=1e-4,
+    )
+
+
+@pytest.mark.ci
+def test_apos_vs_irm_weighted_ses(dml_irm_apos_weighted_fixture):
+    # Remark that the scores are slightly different (Y instead of g(1,X) and coefficient of theta)
+    # (for weighted and unweighted case in irm)
+
+    assert np.allclose(
+        dml_irm_apos_weighted_fixture["dml_irm_weighted"].framework.all_ses,
+        dml_irm_apos_weighted_fixture["causal_contrast"].all_ses,
+        rtol=1e-9,
+        atol=1e-4,
+    )

From 2a0aee0368d9be4879f7491153c592e5d2970104 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 13:35:31 +0100
Subject: [PATCH 30/54] add ci to irm apo comparison

---
 doubleml/irm/tests/test_irm_vs_apos.py | 38 +++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index 6635c5bb..248462f0 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -88,7 +88,16 @@ def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimm
     dml_apos = dml_apos.fit(external_predictions=external_predictions_apos)
     causal_contrast = dml_apos.causal_contrast(reference_levels=[0])
 
-    result_dict = {"dml_irm": dml_irm, "dml_apos": dml_apos, "causal_contrast": causal_contrast}
+    irm_confint = dml_irm.confint().values
+    causal_contrast_confint = causal_contrast.confint().values
+
+    result_dict = {
+        "dml_irm": dml_irm,
+        "dml_apos": dml_apos,
+        "causal_contrast": causal_contrast,
+        "irm_confint": irm_confint,
+        "causal_contrast_confint": causal_contrast_confint,
+    }
     return result_dict
 
 
@@ -112,6 +121,16 @@ def test_apos_vs_irm_ses(dml_irm_apos_fixture):
     )
 
 
+@pytest.mark.ci
+def test_apos_vs_irm_confint(dml_irm_apos_fixture):
+    assert np.allclose(
+        dml_irm_apos_fixture["irm_confint"],
+        dml_irm_apos_fixture["causal_contrast_confint"],
+        rtol=1e-9,
+        atol=1e-4,
+    )
+
+
 @pytest.fixture(scope="module")
 def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold):
 
@@ -191,11 +210,18 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i
     dml_apos = dml_apos.fit(external_predictions=external_predictions_apos)
     causal_contrast = dml_apos.causal_contrast(reference_levels=[0])
 
+    irm_confint = dml_irm.confint().values
+    irm_weighted_confint = dml_irm_weighted.confint().values
+    causal_contrast_confint = causal_contrast.confint().values
+
     result_dict = {
         "dml_irm": dml_irm,
         "dml_irm_weighted": dml_irm_weighted,
         "dml_apos": dml_apos,
         "causal_contrast": causal_contrast,
+        "irm_confint": irm_confint,
+        "irm_weighted_confint": irm_weighted_confint,
+        "causal_contrast_confint": causal_contrast_confint,
     }
     return result_dict
 
@@ -228,3 +254,13 @@ def test_apos_vs_irm_weighted_ses(dml_irm_apos_weighted_fixture):
         rtol=1e-9,
         atol=1e-4,
     )
+
+
+@pytest.mark.ci
+def test_apos_vs_irm_weighted_confint(dml_irm_apos_weighted_fixture):
+    assert np.allclose(
+        dml_irm_apos_weighted_fixture["irm_weighted_confint"],
+        dml_irm_apos_weighted_fixture["causal_contrast_confint"],
+        rtol=1e-9,
+        atol=1e-4,
+    )

From 38eb96ddeded8b3342ad484d29008413aeaf9484 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 17:05:27 +0100
Subject: [PATCH 31/54] update psi_a in irm and apo scores

---
 doubleml/irm/apo.py | 2 +-
 doubleml/irm/irm.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index 0a42da9a..514a6472 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -306,7 +306,7 @@ def _score_elements(self, y, treated, g_hat0, g_hat1, m_hat, smpls):
         u_hat = y - g_hat1
         weights, weights_bar = self._get_weights()
         psi_b = weights * g_hat1 + weights_bar * np.divide(np.multiply(treated, u_hat), m_hat_adj)
-        psi_a = np.full_like(m_hat_adj, -1.0)
+        psi_a = -1.0 * np.divide(weights, np.mean(weights))  # TODO: check if this is correct
 
         return psi_a, psi_b
 
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 53960872..c5765293 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -355,7 +355,7 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
                 np.divide(np.multiply(d, u_hat1), m_hat_adj) - np.divide(np.multiply(1.0 - d, u_hat0), 1.0 - m_hat_adj)
             )
             if self.score == "ATE":
-                psi_a = np.full_like(m_hat_adj, -1.0)
+                psi_a = -1.0 * np.divide(weights, np.mean(weights))  # TODO: check if this is correct
             else:
                 assert self.score == "ATTE"
                 psi_a = -1.0 * weights

From bb05d742c88e5545505f54a21b04e2b9655b4216 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Feb 2025 17:05:38 +0100
Subject: [PATCH 32/54] update comparison test weighted irm and apo

---
 doubleml/irm/tests/test_irm.py         |  3 +--
 doubleml/irm/tests/test_irm_vs_apos.py | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py
index 0a924626..04d9596d 100644
--- a/doubleml/irm/tests/test_irm.py
+++ b/doubleml/irm/tests/test_irm.py
@@ -340,7 +340,6 @@ def test_dml_irm_atte_weights(dml_irm_weights_fixture):
     assert math.isclose(
         dml_irm_weights_fixture["coef_atte"], dml_irm_weights_fixture["coef_atte_weights"], rel_tol=1e-9, abs_tol=1e-4
     )
-    # Remark that the scores are slightly different (Y instead of g(1,X) and coefficient of theta)
     assert math.isclose(
-        dml_irm_weights_fixture["se_atte"], dml_irm_weights_fixture["se_atte_weights"], rel_tol=1e-5, abs_tol=1e-3
+        dml_irm_weights_fixture["se_atte"], dml_irm_weights_fixture["se_atte_weights"], rel_tol=1e-9, abs_tol=1e-4
     )
diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index 248462f0..37016913 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -245,8 +245,12 @@ def test_apos_vs_irm_weighted_thetas(dml_irm_apos_weighted_fixture):
 
 @pytest.mark.ci
 def test_apos_vs_irm_weighted_ses(dml_irm_apos_weighted_fixture):
-    # Remark that the scores are slightly different (Y instead of g(1,X) and coefficient of theta)
-    # (for weighted and unweighted case in irm)
+    assert np.allclose(
+        dml_irm_apos_weighted_fixture["dml_irm"].framework.all_ses,
+        dml_irm_apos_weighted_fixture["dml_irm_weighted"].framework.all_ses,
+        rtol=1e-9,
+        atol=1e-4,
+    )
 
     assert np.allclose(
         dml_irm_apos_weighted_fixture["dml_irm_weighted"].framework.all_ses,
@@ -258,6 +262,13 @@ def test_apos_vs_irm_weighted_ses(dml_irm_apos_weighted_fixture):
 
 @pytest.mark.ci
 def test_apos_vs_irm_weighted_confint(dml_irm_apos_weighted_fixture):
+    assert np.allclose(
+        dml_irm_apos_weighted_fixture["irm_confint"],
+        dml_irm_apos_weighted_fixture["irm_weighted_confint"],
+        rtol=1e-9,
+        atol=1e-4,
+    )
+
     assert np.allclose(
         dml_irm_apos_weighted_fixture["irm_weighted_confint"],
         dml_irm_apos_weighted_fixture["causal_contrast_confint"],

From 8ea4a38bcb7f0695f9add97ae20ec2655216a562 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 6 Feb 2025 15:47:47 +0100
Subject: [PATCH 33/54] move _compute_sensitivity_bias to utils

---
 doubleml/double_ml.py          | 32 ++++++++++++++------------------
 doubleml/utils/_sensitivity.py |  9 +++++++++
 2 files changed, 23 insertions(+), 18 deletions(-)
 create mode 100644 doubleml/utils/_sensitivity.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 441fb15e..1b88c8ee 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -11,6 +11,7 @@
 from .double_ml_framework import DoubleMLFramework
 from .utils._checks import _check_external_predictions, _check_sample_splitting
 from .utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est
+from .utils._sensitivity import _compute_sensitivity_bias
 from .utils.gain_statistics import gain_statistics
 from .utils.resampling import DoubleMLClusterResampling, DoubleMLResampling
 
@@ -560,16 +561,23 @@ def construct_framework(self):
         }
 
         if self._sensitivity_implemented:
+            # reshape sensitivity elements to (1 or n_obs, n_coefs, n_rep)
+            sensitivity_dict = {
+                "sigma2": np.transpose(self.sensitivity_elements["sigma2"], (0, 2, 1)),
+                "nu2": np.transpose(self.sensitivity_elements["nu2"], (0, 2, 1)),
+                "psi_sigma2": np.transpose(self.sensitivity_elements["psi_sigma2"], (0, 2, 1)),
+                "psi_nu2": np.transpose(self.sensitivity_elements["psi_nu2"], (0, 2, 1)),
+            }
+
+            max_bias, psi_max_bias = _compute_sensitivity_bias(**sensitivity_dict)
 
-            max_bias, psi_max_bias = self._compute_sensitivity_bias()
-            # reshape sensitivity elements to (n_obs, n_coefs, n_rep)
             doubleml_dict.update(
                 {
                     "sensitivity_elements": {
-                        "max_bias": np.transpose(max_bias, (0, 2, 1)),
-                        "psi_max_bias": np.transpose(psi_max_bias, (0, 2, 1)),
-                        "sigma2": np.transpose(self.sensitivity_elements["sigma2"], (0, 2, 1)),
-                        "nu2": np.transpose(self.sensitivity_elements["nu2"], (0, 2, 1)),
+                        "max_bias": max_bias,
+                        "psi_max_bias": psi_max_bias,
+                        "sigma2": sensitivity_dict["sigma2"],
+                        "nu2": sensitivity_dict["nu2"],
                     }
                 }
             )
@@ -1449,18 +1457,6 @@ def _validate_sensitivity_elements(self):
 
         return
 
-    def _compute_sensitivity_bias(self):
-        sigma2 = self.sensitivity_elements["sigma2"]
-        nu2 = self.sensitivity_elements["nu2"]
-        psi_sigma2 = self.sensitivity_elements["psi_sigma2"]
-        psi_nu2 = self.sensitivity_elements["psi_nu2"]
-
-        max_bias = np.sqrt(np.multiply(sigma2, nu2))
-        psi_max_bias = np.divide(
-            np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)), np.multiply(2.0, max_bias)
-        )
-        return max_bias, psi_max_bias
-
     def _get_sensitivity_elements(self, i_rep, i_treat):
         sensitivity_elements = {key: value[:, i_rep, i_treat] for key, value in self.sensitivity_elements.items()}
         return sensitivity_elements
diff --git a/doubleml/utils/_sensitivity.py b/doubleml/utils/_sensitivity.py
new file mode 100644
index 00000000..3765f018
--- /dev/null
+++ b/doubleml/utils/_sensitivity.py
@@ -0,0 +1,9 @@
+import numpy as np
+
+
+def _compute_sensitivity_bias(sigma2, nu2, psi_sigma2, psi_nu2):
+    max_bias = np.sqrt(np.multiply(sigma2, nu2))
+    psi_max_bias = np.divide(
+        np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)), np.multiply(2.0, max_bias)
+    )
+    return max_bias, psi_max_bias

From 54f59b1683f9aa95cb3978c4b7e38dcfb40bf1f3 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 08:29:31 +0100
Subject: [PATCH 34/54] update apo model

---
 doubleml/irm/apo.py | 112 +++++++++++++++++++++++++-------------------
 1 file changed, 64 insertions(+), 48 deletions(-)

diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index 514a6472..987a58fc 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -183,7 +183,7 @@ def weights(self):
         return self._weights
 
     def _initialize_ml_nuisance_params(self):
-        valid_learner = ["ml_g0", "ml_g1", "ml_m"]
+        valid_learner = ["ml_g_d_lvl0", "ml_g_d_lvl1", "ml_m"]
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in valid_learner}
 
     def _initialize_weights(self, weights):
@@ -207,64 +207,68 @@ def _get_weights(self):
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        dx = np.column_stack((d, x))
         # use the treated indicator to get the correct sample splits
-        x, treated = check_X_y(x, self.treated, force_all_finite=False)
+        treated = self.treated
 
         # get train indices for d == treatment_level
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, treated)
-        g0_external = external_predictions["ml_g0"] is not None
-        g1_external = external_predictions["ml_g1"] is not None
+        g_d_lvl0_external = external_predictions["ml_g_d_lvl0"] is not None
+        g_d_lvl1_external = external_predictions["ml_g_d_lvl1"] is not None
         m_external = external_predictions["ml_m"] is not None
 
-        # nuisance g (g0 only relevant for sensitivity analysis)
-        if g0_external:
+        # nuisance g_d_lvl1 (relevant for score as (average) counterfactuals)
+        if g_d_lvl1_external:
             # use external predictions
-            g_hat0 = {
-                "preds": external_predictions["ml_g0"],
-                "targets": _cond_targets(y, cond_sample=(treated == 0)),
+            g_hat_d_lvl1 = {
+                "preds": external_predictions["ml_g_d_lvl1"],
+                "targets": _cond_targets(y, cond_sample=(treated == 1)),
                 "models": None,
             }
         else:
-            g_hat0 = _dml_cv_predict(
+            g_hat_d_lvl1 = _dml_cv_predict(
                 self._learner["ml_g"],
                 x,
                 y,
-                smpls=smpls_d0,
+                smpls=smpls_d1,
                 n_jobs=n_jobs_cv,
-                est_params=self._get_params("ml_g0"),
+                est_params=self._get_params("ml_g_d_lvl1"),
                 method=self._predict_method["ml_g"],
                 return_models=return_models,
             )
-            _check_finite_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", smpls)
-            g_hat0["targets"] = _cond_targets(g_hat0["targets"], cond_sample=(treated == 0))
+            _check_finite_predictions(g_hat_d_lvl1["preds"], self._learner["ml_g"], "ml_g", smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat_d_lvl1["targets"] = _cond_targets(g_hat_d_lvl1["targets"], cond_sample=(treated == 1))
 
         if self._dml_data.binary_outcome:
-            _check_binary_predictions(g_hat0["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
+            _check_binary_predictions(g_hat_d_lvl1["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
 
-        if g1_external:
+        # nuisance g (g for other treatment levels only relevant for sensitivity analysis)
+        if g_d_lvl0_external:
             # use external predictions
-            g_hat1 = {
-                "preds": external_predictions["ml_g1"],
-                "targets": _cond_targets(y, cond_sample=(treated == 1)),
+            g_hat_d_lvl0 = {
+                "preds": external_predictions["ml_g_d_lvl0"],
+                "targets": _cond_targets(y, cond_sample=(treated == 0)),
                 "models": None,
             }
         else:
-            g_hat1 = _dml_cv_predict(
+            g_hat_d_lvl0 = _dml_cv_predict(
                 self._learner["ml_g"],
-                x,
+                dx,  # used to obtain an estimation over several treatment levels (reduced variance in sensitivity)
                 y,
-                smpls=smpls_d1,
+                smpls=smpls_d0,
                 n_jobs=n_jobs_cv,
-                est_params=self._get_params("ml_g1"),
+                est_params=self._get_params("ml_g_d_lvl0"),
                 method=self._predict_method["ml_g"],
                 return_models=return_models,
             )
-            _check_finite_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", smpls)
+            _check_finite_predictions(g_hat_d_lvl0["preds"], self._learner["ml_g"], "ml_g", smpls)
             # adjust target values to consider only compatible subsamples
-            g_hat1["targets"] = _cond_targets(g_hat1["targets"], cond_sample=(treated == 1))
+            g_hat_d_lvl0["targets"] = _cond_targets(g_hat_d_lvl0["targets"], cond_sample=(treated == 0))
 
         if self._dml_data.binary_outcome:
-            _check_binary_predictions(g_hat1["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
+            _check_binary_predictions(g_hat_d_lvl0["preds"], self._learner["ml_g"], "ml_g", self._dml_data.y_col)
 
         # nuisance m
         if m_external:
@@ -287,25 +291,33 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # also trimm external predictions
         m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
-        psi_a, psi_b = self._score_elements(y, treated, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls)
+        psi_a, psi_b = self._score_elements(y, treated, g_hat_d_lvl0["preds"], g_hat_d_lvl1["preds"], m_hat["preds"], smpls)
         psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
 
         preds = {
-            "predictions": {"ml_g0": g_hat0["preds"], "ml_g1": g_hat1["preds"], "ml_m": m_hat["preds"]},
-            "targets": {"ml_g0": g_hat0["targets"], "ml_g1": g_hat1["targets"], "ml_m": m_hat["targets"]},
-            "models": {"ml_g0": g_hat0["models"], "ml_g1": g_hat1["models"], "ml_m": m_hat["models"]},
+            "predictions": {
+                "ml_g_d_lvl0": g_hat_d_lvl0["preds"],
+                "ml_g_d_lvl1": g_hat_d_lvl1["preds"],
+                "ml_m": m_hat["preds"],
+            },
+            "targets": {
+                "ml_g_d_lvl0": g_hat_d_lvl0["targets"],
+                "ml_g_d_lvl1": g_hat_d_lvl1["targets"],
+                "ml_m": m_hat["targets"],
+            },
+            "models": {"ml_g_d_lvl0": g_hat_d_lvl0["models"], "ml_g_d_lvl1": g_hat_d_lvl1["models"], "ml_m": m_hat["models"]},
         }
         return psi_elements, preds
 
-    def _score_elements(self, y, treated, g_hat0, g_hat1, m_hat, smpls):
+    def _score_elements(self, y, treated, g_hat_d_lvl0, g_hat_d_lvl1, m_hat, smpls):
         if self.normalize_ipw:
             m_hat_adj = _normalize_ipw(m_hat, treated)
         else:
             m_hat_adj = m_hat
 
-        u_hat = y - g_hat1
+        u_hat = y - g_hat_d_lvl1
         weights, weights_bar = self._get_weights()
-        psi_b = weights * g_hat1 + weights_bar * np.divide(np.multiply(treated, u_hat), m_hat_adj)
+        psi_b = weights * g_hat_d_lvl1 + weights_bar * np.divide(np.multiply(treated, u_hat), m_hat_adj)
         psi_a = -1.0 * np.divide(weights, np.mean(weights))  # TODO: check if this is correct
 
         return psi_a, psi_b
@@ -316,12 +328,12 @@ def _sensitivity_element_est(self, preds):
         treated = self.treated
 
         m_hat = preds["predictions"]["ml_m"]
-        g_hat0 = preds["predictions"]["ml_g0"]
-        g_hat1 = preds["predictions"]["ml_g1"]
+        g_hat_d_lvl0 = preds["predictions"]["ml_g_d_lvl0"]
+        g_hat_d_lvl1 = preds["predictions"]["ml_g_d_lvl1"]
 
         weights, weights_bar = self._get_weights()
 
-        sigma2_score_element = np.square(y - np.multiply(treated, g_hat1) - np.multiply(1.0 - treated, g_hat0))
+        sigma2_score_element = np.square(y - np.multiply(treated, g_hat_d_lvl1) - np.multiply(1.0 - treated, g_hat_d_lvl0))
         sigma2 = np.mean(sigma2_score_element)
         psi_sigma2 = sigma2_score_element - sigma2
 
@@ -346,7 +358,11 @@ def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
     ):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
-        x, treated = check_X_y(x, self.treated, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        dx = np.column_stack((d, x))
+        # use the treated indicator to get the correct sample splits
+        treated = self.treated
+
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, treated)
 
@@ -354,12 +370,12 @@ def _nuisance_tuning(
             scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
-        train_inds_d0 = [train_index for (train_index, _) in smpls_d0]
-        train_inds_d1 = [train_index for (train_index, _) in smpls_d1]
-        g0_tune_res = _dml_tune(
+        train_inds_d_lvl0 = [train_index for (train_index, _) in smpls_d0]
+        train_inds_d_lvl1 = [train_index for (train_index, _) in smpls_d1]
+        g_d_lvl0_tune_res = _dml_tune(
             y,
-            x,
-            train_inds_d0,
+            dx,  # used to obtain an estimation over several treatment levels (reduced variance in sensitivity)
+            train_inds_d_lvl0,
             self._learner["ml_g"],
             param_grids["ml_g"],
             scoring_methods["ml_g"],
@@ -368,10 +384,10 @@ def _nuisance_tuning(
             search_mode,
             n_iter_randomized_search,
         )
-        g1_tune_res = _dml_tune(
+        g_d_lvl1_tune_res = _dml_tune(
             y,
             x,
-            train_inds_d1,
+            train_inds_d_lvl1,
             self._learner["ml_g"],
             param_grids["ml_g"],
             scoring_methods["ml_g"],
@@ -394,12 +410,12 @@ def _nuisance_tuning(
             n_iter_randomized_search,
         )
 
-        g0_best_params = [xx.best_params_ for xx in g0_tune_res]
-        g1_best_params = [xx.best_params_ for xx in g1_tune_res]
+        g_d_lvl0_best_params = [xx.best_params_ for xx in g_d_lvl0_tune_res]
+        g_d_lvl1_best_params = [xx.best_params_ for xx in g_d_lvl1_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {"ml_g0": g0_best_params, "ml_g1": g1_best_params, "ml_m": m_best_params}
-        tune_res = {"g0_tune": g0_tune_res, "g1_tune": g1_tune_res, "m_tune": m_tune_res}
+        params = {"ml_g_d_lvl0": g_d_lvl0_best_params, "ml_g_d_lvl1": g_d_lvl1_best_params, "ml_m": m_best_params}
+        tune_res = {"g_d_lvl0_tune": g_d_lvl0_tune_res, "g_d_lvl1_tune": g_d_lvl1_tune_res, "m_tune": m_tune_res}
 
         res = {"params": params, "tune_res": tune_res}
 

From de76905941214ab1f44425117fe43351e5d5f6de Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 08:29:47 +0100
Subject: [PATCH 35/54] update apo tests

---
 doubleml/irm/tests/_utils_apo_manual.py         | 12 +++++++-----
 doubleml/irm/tests/test_apo.py                  |  6 +++---
 .../irm/tests/test_apo_external_predictions.py  | 17 ++++++++++++++---
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py
index 3b74051f..69767df2 100644
--- a/doubleml/irm/tests/_utils_apo_manual.py
+++ b/doubleml/irm/tests/_utils_apo_manual.py
@@ -87,12 +87,13 @@ def fit_nuisance_apo(
 ):
     ml_g0 = clone(learner_g)
     ml_g1 = clone(learner_g)
+    dx = np.column_stack((d, x))
 
     train_cond0 = np.where(treated == 0)[0]
     if is_classifier(learner_g):
-        g_hat0_list = fit_predict_proba(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
+        g_hat0_list = fit_predict_proba(y, dx, ml_g0, g0_params, smpls, train_cond=train_cond0)
     else:
-        g_hat0_list = fit_predict(y, x, ml_g0, g0_params, smpls, train_cond=train_cond0)
+        g_hat0_list = fit_predict(y, dx, ml_g0, g0_params, smpls, train_cond=train_cond0)
 
     train_cond1 = np.where(treated == 1)[0]
     if is_classifier(learner_g):
@@ -223,8 +224,8 @@ def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, s
 
     for i_rep in range(n_rep):
         m_hat = predictions["ml_m"][:, i_rep, 0]
-        g_hat0 = predictions["ml_g0"][:, i_rep, 0]
-        g_hat1 = predictions["ml_g1"][:, i_rep, 0]
+        g_hat0 = predictions["ml_g_d_lvl0"][:, i_rep, 0]
+        g_hat1 = predictions["ml_g_d_lvl1"][:, i_rep, 0]
 
         weights = np.ones_like(d)
         weights_bar = np.ones_like(d)
@@ -246,8 +247,9 @@ def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, s
 
 
 def tune_nuisance_apo(y, x, d, treatment_level, ml_g, ml_m, smpls, score, n_folds_tune, param_grid_g, param_grid_m):
+    dx = np.column_stack((d, x))
     train_cond0 = np.where(d != treatment_level)[0]
-    g0_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond0)
+    g0_tune_res = tune_grid_search(y, dx, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond0)
 
     train_cond1 = np.where(d == treatment_level)[0]
     g1_tune_res = tune_grid_search(y, x, ml_g, smpls, param_grid_g, n_folds_tune, train_cond=train_cond1)
diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py
index 920f6047..1c8f0813 100644
--- a/doubleml/irm/tests/test_apo.py
+++ b/doubleml/irm/tests/test_apo.py
@@ -44,7 +44,7 @@ def treatment_level(request):
 
 
 @pytest.fixture(scope="module")
-def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshold, treatment_level):
+def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -116,8 +116,8 @@ def dml_apo_fixture(generate_data_irm, learner, normalize_ipw, trimming_threshol
 
     prediction_dict = {
         "d": {
-            "ml_g0": dml_obj.predictions["ml_g0"].reshape(-1, 1),
-            "ml_g1": dml_obj.predictions["ml_g1"].reshape(-1, 1),
+            "ml_g_d_lvl0": dml_obj.predictions["ml_g_d_lvl0"].reshape(-1, 1),
+            "ml_g_d_lvl1": dml_obj.predictions["ml_g_d_lvl1"].reshape(-1, 1),
             "ml_m": dml_obj.predictions["ml_m"].reshape(-1, 1),
         }
     }
diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py
index aa9b43f3..e3874fef 100644
--- a/doubleml/irm/tests/test_apo_external_predictions.py
+++ b/doubleml/irm/tests/test_apo_external_predictions.py
@@ -66,8 +66,8 @@ def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext):
         ml_m = LogisticRegression(random_state=42)
 
     if set_ml_g_ext:
-        ext_predictions["d"]["ml_g0"] = dml_obj.predictions["ml_g0"][:, :, 0]
-        ext_predictions["d"]["ml_g1"] = dml_obj.predictions["ml_g1"][:, :, 0]
+        ext_predictions["d"]["ml_g_d_lvl0"] = dml_obj.predictions["ml_g_d_lvl0"][:, :, 0]
+        ext_predictions["d"]["ml_g_d_lvl1"] = dml_obj.predictions["ml_g_d_lvl1"][:, :, 0]
         ml_g = DMLDummyRegressor()
     else:
         ml_g = LinearRegression()
@@ -78,7 +78,12 @@ def doubleml_apo_ext_fixture(n_rep, set_ml_m_ext, set_ml_g_ext):
     np.random.seed(3141)
     dml_obj_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_obj.coef[0], "coef_ext": dml_obj_ext.coef[0]}
+    res_dict = {
+        "coef_normal": dml_obj.coef[0],
+        "coef_ext": dml_obj_ext.coef[0],
+        "se_normal": dml_obj.se[0],
+        "se_ext": dml_obj_ext.se[0],
+    }
 
     return res_dict
 
@@ -88,3 +93,9 @@ def test_doubleml_apo_ext_coef(doubleml_apo_ext_fixture):
     assert math.isclose(
         doubleml_apo_ext_fixture["coef_normal"], doubleml_apo_ext_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
     )
+
+
+
+@pytest.mark.ci
+def test_doubleml_apo_ext_se(doubleml_apo_ext_fixture):
+    assert math.isclose(doubleml_apo_ext_fixture["se_normal"], doubleml_apo_ext_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 3d7d3150897093e05e1872107e688f462956e497 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 08:39:15 +0100
Subject: [PATCH 36/54] update apos model

---
 doubleml/irm/apos.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 9fe5617d..a4e92b81 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -803,7 +803,7 @@ def _check_external_predictions(self, external_predictions):
                 + f"Passed keys: {set(external_predictions.keys())}."
             )
 
-        expected_learner_keys = ["ml_g0", "ml_g1", "ml_m"]
+        expected_learner_keys = ["ml_g_d_lvl0", "ml_g_d_lvl1", "ml_m"]
         for key, value in external_predictions.items():
             if not isinstance(value, dict):
                 raise TypeError(
@@ -821,12 +821,12 @@ def _rename_external_predictions(self, external_predictions):
         d_col = self._dml_data.d_cols[0]
         ext_pred_dict = {treatment_level: {d_col: {}} for treatment_level in self.treatment_levels}
         for treatment_level in self.treatment_levels:
-            if "ml_g1" in external_predictions[treatment_level]:
-                ext_pred_dict[treatment_level][d_col]["ml_g1"] = external_predictions[treatment_level]["ml_g1"]
+            if "ml_g_d_lvl1" in external_predictions[treatment_level]:
+                ext_pred_dict[treatment_level][d_col]["ml_g_d_lvl1"] = external_predictions[treatment_level]["ml_g_d_lvl1"]
             if "ml_m" in external_predictions[treatment_level]:
                 ext_pred_dict[treatment_level][d_col]["ml_m"] = external_predictions[treatment_level]["ml_m"]
-            if "ml_g0" in external_predictions[treatment_level]:
-                ext_pred_dict[treatment_level][d_col]["ml_g0"] = external_predictions[treatment_level]["ml_g0"]
+            if "ml_g_d_lvl0" in external_predictions[treatment_level]:
+                ext_pred_dict[treatment_level][d_col]["ml_g_d_lvl0"] = external_predictions[treatment_level]["ml_g_d_lvl0"]
 
         return ext_pred_dict
 

From 117d8167a708bf6381c1e675110d2b881ff40d40 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 08:39:23 +0100
Subject: [PATCH 37/54] update apos tests

---
 .../irm/tests/test_apos_external_predictions.py  |  6 +++---
 doubleml/irm/tests/test_irm_vs_apos.py           | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/doubleml/irm/tests/test_apos_external_predictions.py b/doubleml/irm/tests/test_apos_external_predictions.py
index a6e2c912..4cbebdfb 100644
--- a/doubleml/irm/tests/test_apos_external_predictions.py
+++ b/doubleml/irm/tests/test_apos_external_predictions.py
@@ -72,8 +72,8 @@ def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ex
 
     if set_ml_g_ext:
         for i_treatment_level, treatment_level in enumerate(treatment_levels):
-            ext_predictions[treatment_level]["ml_g0"] = dml_obj.modellist[i_treatment_level].predictions["ml_g0"][:, :, 0]
-            ext_predictions[treatment_level]["ml_g1"] = dml_obj.modellist[i_treatment_level].predictions["ml_g1"][:, :, 0]
+            ext_predictions[treatment_level]["ml_g_d_lvl0"] = dml_obj.modellist[i_treatment_level].predictions["ml_g_d_lvl0"][:, :, 0]
+            ext_predictions[treatment_level]["ml_g_d_lvl1"] = dml_obj.modellist[i_treatment_level].predictions["ml_g_d_lvl1"][:, :, 0]
         ml_g = DMLDummyRegressor()
     else:
         ml_g = LinearRegression()
@@ -105,7 +105,7 @@ def test_doubleml_apos_ext_coef(doubleml_apos_ext_fixture):
 @pytest.mark.ci
 def test_doubleml_apos_ext_pred_nuisance(doubleml_apos_ext_fixture):
     for i_level, _ in enumerate(doubleml_apos_ext_fixture["treatment_levels"]):
-        for nuisance_key in ["ml_g0", "ml_g1", "ml_m"]:
+        for nuisance_key in ["ml_g_d_lvl0", "ml_g_d_lvl1", "ml_m"]:
             assert np.allclose(
                 doubleml_apos_ext_fixture["dml_obj"].modellist[i_level].nuisance_loss[nuisance_key],
                 doubleml_apos_ext_fixture["dml_obj_ext"].modellist[i_level].nuisance_loss[nuisance_key],
diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index 37016913..9f0ee4a0 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -74,13 +74,13 @@ def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimm
     external_predictions_apos = {
         0: {
             "ml_m": 1.0 - m_hat,
-            "ml_g1": g0_hat,
-            "ml_g0": g1_hat,
+            "ml_g_d_lvl1": g0_hat,
+            "ml_g_d_lvl0": g1_hat,
         },
         1: {
             "ml_m": m_hat,
-            "ml_g1": g1_hat,
-            "ml_g0": g0_hat,
+            "ml_g_d_lvl1": g1_hat,
+            "ml_g_d_lvl0": g0_hat,
         },
     }
 
@@ -194,13 +194,13 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i
     external_predictions_apos = {
         0: {
             "ml_m": 1.0 - m_hat,
-            "ml_g1": g0_hat,
-            "ml_g0": g1_hat,
+            "ml_g_d_lvl1": g0_hat,
+            "ml_g_d_lvl0": g1_hat,
         },
         1: {
             "ml_m": m_hat,
-            "ml_g1": g1_hat,
-            "ml_g0": g0_hat,
+            "ml_g_d_lvl1": g1_hat,
+            "ml_g_d_lvl0": g0_hat,
         },
     }
 

From 82991de3bdde0177b7cfa1e1d7e1d398c4b55c89 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 08:39:56 +0100
Subject: [PATCH 38/54] formatting

---
 doubleml/irm/tests/test_apo_external_predictions.py  | 1 -
 doubleml/irm/tests/test_apos_external_predictions.py | 8 ++++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/doubleml/irm/tests/test_apo_external_predictions.py b/doubleml/irm/tests/test_apo_external_predictions.py
index e3874fef..2bbe50e8 100644
--- a/doubleml/irm/tests/test_apo_external_predictions.py
+++ b/doubleml/irm/tests/test_apo_external_predictions.py
@@ -95,7 +95,6 @@ def test_doubleml_apo_ext_coef(doubleml_apo_ext_fixture):
     )
 
 
-
 @pytest.mark.ci
 def test_doubleml_apo_ext_se(doubleml_apo_ext_fixture):
     assert math.isclose(doubleml_apo_ext_fixture["se_normal"], doubleml_apo_ext_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_apos_external_predictions.py b/doubleml/irm/tests/test_apos_external_predictions.py
index 4cbebdfb..318e4566 100644
--- a/doubleml/irm/tests/test_apos_external_predictions.py
+++ b/doubleml/irm/tests/test_apos_external_predictions.py
@@ -72,8 +72,12 @@ def doubleml_apos_ext_fixture(n_rep, treatment_levels, set_ml_m_ext, set_ml_g_ex
 
     if set_ml_g_ext:
         for i_treatment_level, treatment_level in enumerate(treatment_levels):
-            ext_predictions[treatment_level]["ml_g_d_lvl0"] = dml_obj.modellist[i_treatment_level].predictions["ml_g_d_lvl0"][:, :, 0]
-            ext_predictions[treatment_level]["ml_g_d_lvl1"] = dml_obj.modellist[i_treatment_level].predictions["ml_g_d_lvl1"][:, :, 0]
+            ext_predictions[treatment_level]["ml_g_d_lvl0"] = dml_obj.modellist[i_treatment_level].predictions["ml_g_d_lvl0"][
+                :, :, 0
+            ]
+            ext_predictions[treatment_level]["ml_g_d_lvl1"] = dml_obj.modellist[i_treatment_level].predictions["ml_g_d_lvl1"][
+                :, :, 0
+            ]
         ml_g = DMLDummyRegressor()
     else:
         ml_g = LinearRegression()

From ef85a9ff1f29a925ec7b49401b3bf901ab62d8ba Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 09:06:49 +0100
Subject: [PATCH 39/54] update skip_index in causal constrast

---
 doubleml/irm/apos.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index a4e92b81..ba8cb5e9 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -733,17 +733,21 @@ def causal_contrast(self, reference_levels):
                 "a single treatment level."
             )
 
-        skip_index = []
+        skip_index = set()
         all_treatment_names = []
         all_acc_frameworks = []
+
         for ref_lvl in reference_levels:
             i_ref_lvl = self.treatment_levels.index(ref_lvl)
             ref_framework = self.modellist[i_ref_lvl].framework
 
-            skip_index += [i_ref_lvl]
+            skip_index.add(i_ref_lvl)
+            # update frameworks
             all_acc_frameworks += [
                 model.framework - ref_framework for i, model in enumerate(self.modellist) if i not in skip_index
             ]
+
+            # update treatment names
             all_treatment_names += [
                 f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}"
                 for i in range(self.n_treatment_levels)

From ef45e5592329f60371ed89ff1bcfffb7c038d4b8 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 09:14:21 +0100
Subject: [PATCH 40/54] update causal contrast inner loop

---
 doubleml/irm/apos.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index ba8cb5e9..8c2180bd 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -742,17 +742,16 @@ def causal_contrast(self, reference_levels):
             ref_framework = self.modellist[i_ref_lvl].framework
 
             skip_index.add(i_ref_lvl)
-            # update frameworks
-            all_acc_frameworks += [
-                model.framework - ref_framework for i, model in enumerate(self.modellist) if i not in skip_index
-            ]
-
-            # update treatment names
-            all_treatment_names += [
-                f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}"
-                for i in range(self.n_treatment_levels)
-                if i not in skip_index
-            ]
+            for i, model in enumerate(self.modellist):
+                # only comparisons which are not yet computed
+                if i in skip_index:
+                    continue
+
+                current_framework = model.framework - ref_framework
+                current_treatment_name = f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}"
+
+                all_acc_frameworks += [current_framework]
+                all_treatment_names += [current_treatment_name]
 
         acc = concat(all_acc_frameworks)
         acc.treatment_names = all_treatment_names

From bcbad76f4dc5142749fc6ad39ec80228eff5d38e Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 10:09:12 +0100
Subject: [PATCH 41/54] add sensitivity update for apos

---
 doubleml/irm/apos.py | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 8c2180bd..7cf2ab26 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -11,6 +11,7 @@
 from ..double_ml_framework import concat
 from ..utils._checks import _check_sample_splitting, _check_score, _check_trimming, _check_weights
 from ..utils._descriptive import generate_summary
+from ..utils._sensitivity import _compute_sensitivity_bias
 from ..utils.gain_statistics import gain_statistics
 from ..utils.resampling import DoubleMLResampling
 from .apo import DoubleMLAPO
@@ -739,7 +740,7 @@ def causal_contrast(self, reference_levels):
 
         for ref_lvl in reference_levels:
             i_ref_lvl = self.treatment_levels.index(ref_lvl)
-            ref_framework = self.modellist[i_ref_lvl].framework
+            ref_model = self.modellist[i_ref_lvl]
 
             skip_index.add(i_ref_lvl)
             for i, model in enumerate(self.modellist):
@@ -747,9 +748,13 @@ def causal_contrast(self, reference_levels):
                 if i in skip_index:
                     continue
 
-                current_framework = model.framework - ref_framework
+                current_framework = model.framework - ref_model.framework
                 current_treatment_name = f"{self.treatment_levels[i]} vs {self.treatment_levels[i_ref_lvl]}"
 
+                # update sensitivity elements with sharper bounds
+                current_sensitivity_dict = self._compute_causal_contrast_sensitivity_dict(model=model, ref_model=ref_model)
+                current_framework._check_and_set_sensitivity_elements(current_sensitivity_dict)
+
                 all_acc_frameworks += [current_framework]
                 all_treatment_names += [current_treatment_name]
 
@@ -771,6 +776,38 @@ def _fit_model(self, i_level, n_jobs_cv=None, store_predictions=True, store_mode
         )
         return model
 
+    def _compute_causal_contrast_sensitivity_dict(self, model, ref_model):
+        # reshape sensitivity elements to (1 or n_obs, n_coefs, n_rep)
+        model_sigma2 = np.transpose(model.sensitivity_elements["sigma2"], (0, 2, 1))
+        model_nu2 = np.transpose(model.sensitivity_elements["nu2"], (0, 2, 1))
+        model_psi_sigma2 = np.transpose(model.sensitivity_elements["psi_sigma2"], (0, 2, 1))
+        model_psi_nu2 = np.transpose(model.sensitivity_elements["psi_nu2"], (0, 2, 1))
+
+        ref_model_sigma2 = np.transpose(ref_model.sensitivity_elements["sigma2"], (0, 2, 1))
+        ref_model_nu2 = np.transpose(ref_model.sensitivity_elements["nu2"], (0, 2, 1))
+        ref_model_psi_sigma2 = np.transpose(ref_model.sensitivity_elements["psi_sigma2"], (0, 2, 1))
+        ref_model_psi_nu2 = np.transpose(ref_model.sensitivity_elements["psi_nu2"], (0, 2, 1))
+
+        combined_sensitivity_dict = {
+            "sigma2": (model_sigma2 + ref_model_sigma2) / 2,
+            "nu2": model_nu2 + ref_model_nu2,
+            "psi_sigma2": (model_psi_sigma2 + ref_model_psi_sigma2) / 2,
+            "psi_nu2": model_psi_nu2 + ref_model_psi_nu2,
+        }
+
+        max_bias, psi_max_bias = _compute_sensitivity_bias(**combined_sensitivity_dict)
+
+        new_sensitvitiy_dict = {
+            "sensitivity_elements": {
+                "max_bias": max_bias,
+                "psi_max_bias": psi_max_bias,
+                "sigma2": combined_sensitivity_dict["sigma2"],
+                "nu2": combined_sensitivity_dict["nu2"],
+            }
+        }
+
+        return new_sensitvitiy_dict
+
     def _check_treatment_levels(self, treatment_levels):
         is_iterable = isinstance(treatment_levels, Iterable)
         if not is_iterable:

From 5da072eb969fb8bbaa563e1dc7103cdbaf1440f2 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 11:05:52 +0100
Subject: [PATCH 42/54] add sensitivity comparison to unit tests

---
 doubleml/irm/tests/test_irm_vs_apos.py | 43 ++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index 9f0ee4a0..3efa1a0b 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -214,6 +214,11 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i
     irm_weighted_confint = dml_irm_weighted.confint().values
     causal_contrast_confint = causal_contrast.confint().values
 
+    # sensitivity analysis
+    dml_irm.sensitivity_analysis()
+    dml_irm_weighted.sensitivity_analysis()
+    causal_contrast.sensitivity_analysis()
+
     result_dict = {
         "dml_irm": dml_irm,
         "dml_irm_weighted": dml_irm_weighted,
@@ -275,3 +280,41 @@ def test_apos_vs_irm_weighted_confint(dml_irm_apos_weighted_fixture):
         rtol=1e-9,
         atol=1e-4,
     )
+
+
+@pytest.mark.ci
+def test_apos_vs_irm_weighted_sensitivity(dml_irm_apos_weighted_fixture):
+    params_irm = dml_irm_apos_weighted_fixture["dml_irm"].sensitivity_params
+    params_irm_weighted = dml_irm_apos_weighted_fixture["dml_irm_weighted"].sensitivity_params
+    params_causal_contrast = dml_irm_apos_weighted_fixture["causal_contrast"].sensitivity_params
+
+    for key in ["theta", "se", "ci"]:
+        for boundary in ["upper", "lower"]:
+            assert np.allclose(
+                params_irm[key][boundary],
+                params_irm_weighted[key][boundary],
+                rtol=1e-9,
+                atol=1e-4,
+            )
+
+            assert np.allclose(
+                params_irm_weighted[key][boundary],
+                params_causal_contrast[key][boundary],
+                rtol=1e-9,
+                atol=1e-4,
+            )
+
+    for key in ["rv", "rva"]:
+        assert np.allclose(
+            params_irm[key],
+            params_irm_weighted[key],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+        assert np.allclose(
+            params_irm_weighted[key],
+            params_causal_contrast[key],
+            rtol=1e-9,
+            atol=1e-4,
+        )

From ee74514307a596b7ff04d383bf4cf5ed4727c50d Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 15:38:15 +0100
Subject: [PATCH 43/54] update irm_vs_apo sensitivity test

---
 doubleml/irm/tests/test_irm_vs_apos.py | 36 ++++++++++++++++++++------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index 3efa1a0b..08b89e0f 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -1,5 +1,3 @@
-import copy
-
 import numpy as np
 import pytest
 from sklearn.base import clone
@@ -7,7 +5,6 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.utils._estimation import _normalize_ipw
 
 
 @pytest.fixture(
@@ -91,6 +88,10 @@ def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimm
     irm_confint = dml_irm.confint().values
     causal_contrast_confint = causal_contrast.confint().values
 
+    # sensitivity analysis
+    dml_irm.sensitivity_analysis()
+    causal_contrast.sensitivity_analysis()
+
     result_dict = {
         "dml_irm": dml_irm,
         "dml_apos": dml_apos,
@@ -131,6 +132,29 @@ def test_apos_vs_irm_confint(dml_irm_apos_fixture):
     )
 
 
+@pytest.mark.ci
+def test_apos_vs_irm_sensitivity(dml_irm_apos_fixture):
+    params_irm = dml_irm_apos_fixture["dml_irm"].sensitivity_params
+    params_causal_contrast = dml_irm_apos_fixture["causal_contrast"].sensitivity_params
+
+    for key in ["theta", "se", "ci"]:
+        for boundary in ["upper", "lower"]:
+            assert np.allclose(
+                params_irm[key][boundary],
+                params_causal_contrast[key][boundary],
+                rtol=1e-9,
+                atol=1e-4,
+            )
+
+    for key in ["rv", "rva"]:
+        assert np.allclose(
+            params_irm[key],
+            params_causal_contrast[key],
+            rtol=1e-9,
+            atol=1e-4,
+        )
+
+
 @pytest.fixture(scope="module")
 def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold):
 
@@ -165,13 +189,9 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i
 
     # define weights
     p_hat = np.mean(d)
-    m_hat_adjusted = copy.deepcopy(m_hat)
-    if normalize_ipw:
-        for i_rep in range(n_rep):
-            m_hat_adjusted[:, i_rep] = _normalize_ipw(m_hat[:, i_rep], d)
     weights_dict = {
         "weights": d / p_hat,
-        "weights_bar": m_hat_adjusted / p_hat,
+        "weights_bar": m_hat / p_hat,
     }
 
     external_predictions_irm = {

From 55b7e45b01971657ac7e7f2708d5155c5f840eb3 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 7 Feb 2025 18:07:24 +0100
Subject: [PATCH 44/54] update atte test irm vs apo

---
 doubleml/irm/tests/test_irm_vs_apos.py | 38 ++++++++++++++++----------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index 08b89e0f..ee8c0d1e 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -1,3 +1,5 @@
+import copy
+
 import numpy as np
 import pytest
 from sklearn.base import clone
@@ -5,6 +7,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
+from doubleml.utils._estimation import _normalize_ipw
 
 
 @pytest.fixture(
@@ -189,9 +192,13 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i
 
     # define weights
     p_hat = np.mean(d)
+    m_hat_adjusted = copy.deepcopy(m_hat)
+    if normalize_ipw:
+        for i_rep in range(n_rep):
+            m_hat_adjusted[:, i_rep] = _normalize_ipw(m_hat[:, i_rep], d)
     weights_dict = {
         "weights": d / p_hat,
-        "weights_bar": m_hat / p_hat,
+        "weights_bar": m_hat_adjusted / p_hat,
     }
 
     external_predictions_irm = {
@@ -304,18 +311,20 @@ def test_apos_vs_irm_weighted_confint(dml_irm_apos_weighted_fixture):
 
 @pytest.mark.ci
 def test_apos_vs_irm_weighted_sensitivity(dml_irm_apos_weighted_fixture):
-    params_irm = dml_irm_apos_weighted_fixture["dml_irm"].sensitivity_params
+    # TODO: Include after normalize_ipw rework, see Issue https://github.com/DoubleML/doubleml-for-py/issues/296
+    # params_irm = dml_irm_apos_weighted_fixture["dml_irm"].sensitivity_params
     params_irm_weighted = dml_irm_apos_weighted_fixture["dml_irm_weighted"].sensitivity_params
     params_causal_contrast = dml_irm_apos_weighted_fixture["causal_contrast"].sensitivity_params
 
     for key in ["theta", "se", "ci"]:
         for boundary in ["upper", "lower"]:
-            assert np.allclose(
-                params_irm[key][boundary],
-                params_irm_weighted[key][boundary],
-                rtol=1e-9,
-                atol=1e-4,
-            )
+            # TODO: Include after normalize_ipw rework, see Issue https://github.com/DoubleML/doubleml-for-py/issues/296
+            # assert np.allclose(
+            #     params_irm[key][boundary],
+            #     params_irm_weighted[key][boundary],
+            #     rtol=1e-9,
+            #     atol=1e-4,
+            # )
 
             assert np.allclose(
                 params_irm_weighted[key][boundary],
@@ -325,12 +334,13 @@ def test_apos_vs_irm_weighted_sensitivity(dml_irm_apos_weighted_fixture):
             )
 
     for key in ["rv", "rva"]:
-        assert np.allclose(
-            params_irm[key],
-            params_irm_weighted[key],
-            rtol=1e-9,
-            atol=1e-4,
-        )
+        # TODO: Include after normalize_ipw rework, see Issue https://github.com/DoubleML/doubleml-for-py/issues/296
+        # assert np.allclose(
+        #     params_irm[key],
+        #     params_irm_weighted[key],
+        #     rtol=1e-9,
+        #     atol=1e-4,
+        # )
 
         assert np.allclose(
             params_irm_weighted[key],

From 74672673e1eac3b36efcd36c1b3ee83e75e063eb Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Sat, 8 Feb 2025 17:00:29 +0100
Subject: [PATCH 45/54] fix return type test

---
 doubleml/tests/test_return_types.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doubleml/tests/test_return_types.py b/doubleml/tests/test_return_types.py
index c4a725fc..51c39c24 100644
--- a/doubleml/tests/test_return_types.py
+++ b/doubleml/tests/test_return_types.py
@@ -322,8 +322,8 @@ def test_stored_predictions():
     assert ssm_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
     assert ssm_obj.predictions["ml_pi"].shape == (n_obs, n_rep, n_treat)
 
-    assert apo_obj.predictions["ml_g0"].shape == (n_obs, n_rep, n_treat)
-    assert apo_obj.predictions["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.predictions["ml_g_d_lvl0"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.predictions["ml_g_d_lvl1"].shape == (n_obs, n_rep, n_treat)
     assert apo_obj.predictions["ml_m"].shape == (n_obs, n_rep, n_treat)
 
 
@@ -373,8 +373,8 @@ def test_stored_nuisance_targets():
     assert ssm_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
     assert ssm_obj.nuisance_targets["ml_pi"].shape == (n_obs, n_rep, n_treat)
 
-    assert apo_obj.nuisance_targets["ml_g0"].shape == (n_obs, n_rep, n_treat)
-    assert apo_obj.nuisance_targets["ml_g1"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.nuisance_targets["ml_g_d_lvl0"].shape == (n_obs, n_rep, n_treat)
+    assert apo_obj.nuisance_targets["ml_g_d_lvl1"].shape == (n_obs, n_rep, n_treat)
     assert apo_obj.nuisance_targets["ml_m"].shape == (n_obs, n_rep, n_treat)
 
 
@@ -424,8 +424,8 @@ def test_nuisance_loss():
     assert ssm_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
     assert ssm_obj.nuisance_loss["ml_pi"].shape == (n_rep, n_treat)
 
-    assert apo_obj.nuisance_loss["ml_g0"].shape == (n_rep, n_treat)
-    assert apo_obj.nuisance_loss["ml_g1"].shape == (n_rep, n_treat)
+    assert apo_obj.nuisance_loss["ml_g_d_lvl0"].shape == (n_rep, n_treat)
+    assert apo_obj.nuisance_loss["ml_g_d_lvl1"].shape == (n_rep, n_treat)
     assert apo_obj.nuisance_loss["ml_m"].shape == (n_rep, n_treat)
 
 

From 0152e75375e30ec504dd3eb6d44667188737f352 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 10 Feb 2025 08:16:17 +0100
Subject: [PATCH 46/54] move location from _trimm and _normalize_ipw

---
 doubleml/did/did.py                           | 11 +++++-----
 doubleml/did/did_cs.py                        | 11 +++++-----
 doubleml/irm/apo.py                           | 11 +++++-----
 doubleml/irm/apos.py                          | 18 ++++++++---------
 doubleml/irm/cvar.py                          | 13 ++++++------
 doubleml/irm/iivm.py                          | 11 +++++-----
 doubleml/irm/irm.py                           | 15 +++++++-------
 doubleml/irm/lpq.py                           | 13 ++++++------
 doubleml/irm/pq.py                            | 13 ++++++------
 doubleml/irm/qte.py                           | 18 ++++++++---------
 doubleml/irm/ssm.py                           | 11 +++++-----
 doubleml/irm/tests/_utils_apo_manual.py       |  2 +-
 doubleml/irm/tests/_utils_cvar_manual.py      |  3 ++-
 doubleml/irm/tests/_utils_iivm_manual.py      |  2 +-
 doubleml/irm/tests/_utils_irm_manual.py       |  2 +-
 doubleml/irm/tests/_utils_lpq_manual.py       |  3 ++-
 doubleml/irm/tests/_utils_pq_manual.py        |  3 ++-
 doubleml/irm/tests/_utils_ssm_manual.py       |  3 ++-
 doubleml/irm/tests/test_irm_vs_apos.py        |  2 +-
 .../irm/tests/test_irm_weighted_scores.py     |  2 +-
 doubleml/utils/_estimation.py                 | 17 ----------------
 doubleml/utils/_propensity_score.py           | 20 +++++++++++++++++++
 doubleml/utils/_sensitivity.py                |  4 +---
 23 files changed, 108 insertions(+), 100 deletions(-)
 create mode 100644 doubleml/utils/_propensity_score.py

diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 20e22375..e71068f2 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -4,11 +4,12 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
-from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _trimm
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
+from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._propensity_score import _trimm
 
 
 class DoubleMLDID(LinearScoreMixin, DoubleML):
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index 2b7b5b12..a198bcea 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -4,11 +4,12 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
-from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _trimm
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
+from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
+from doubleml.utils._propensity_score import _trimm
 
 
 class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index 987a58fc..e0d1e4fc 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -4,9 +4,9 @@
 import pandas as pd
 from sklearn.utils import check_X_y
 
-from ..double_ml import DoubleML
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import (
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import (
     _check_binary_predictions,
     _check_finite_predictions,
     _check_is_propensity,
@@ -14,8 +14,9 @@
     _check_trimming,
     _check_weights,
 )
-from ..utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls, _normalize_ipw, _trimm
-from ..utils.blp import DoubleMLBLP
+from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils.blp import DoubleMLBLP
 
 
 class DoubleMLAPO(LinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 7cf2ab26..e9b160cb 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -6,15 +6,15 @@
 from joblib import Parallel, delayed
 from sklearn.base import clone
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLClusterData, DoubleMLData
-from ..double_ml_framework import concat
-from ..utils._checks import _check_sample_splitting, _check_score, _check_trimming, _check_weights
-from ..utils._descriptive import generate_summary
-from ..utils._sensitivity import _compute_sensitivity_bias
-from ..utils.gain_statistics import gain_statistics
-from ..utils.resampling import DoubleMLResampling
-from .apo import DoubleMLAPO
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLClusterData, DoubleMLData
+from doubleml.double_ml_framework import concat
+from doubleml.irm.apo import DoubleMLAPO
+from doubleml.utils._checks import _check_sample_splitting, _check_score, _check_trimming, _check_weights
+from doubleml.utils._descriptive import generate_summary
+from doubleml.utils._sensitivity import _compute_sensitivity_bias
+from doubleml.utils.gain_statistics import gain_statistics
+from doubleml.utils.resampling import DoubleMLResampling
 
 
 class DoubleMLAPOS:
diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py
index 6190b078..e77031e6 100644
--- a/doubleml/irm/cvar.py
+++ b/doubleml/irm/cvar.py
@@ -3,10 +3,10 @@
 from sklearn.model_selection import StratifiedKFold, train_test_split
 from sklearn.utils import check_X_y
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import (
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import (
     _check_contains_iv,
     _check_quantile,
     _check_score,
@@ -14,16 +14,15 @@
     _check_trimming,
     _check_zero_one_treatment,
 )
-from ..utils._estimation import (
+from doubleml.utils._estimation import (
     _cond_targets,
     _dml_cv_predict,
     _dml_tune,
     _get_bracket_guess,
-    _normalize_ipw,
     _predict_zero_one_propensity,
     _solve_ipw_score,
-    _trimm,
 )
+from doubleml.utils._propensity_score import _normalize_ipw, _trimm
 
 
 class DoubleMLCVAR(LinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index e9dbf8ff..d78d6f3e 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -2,17 +2,18 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import (
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import (
     _check_binary_predictions,
     _check_finite_predictions,
     _check_is_propensity,
     _check_score,
     _check_trimming,
 )
-from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _normalize_ipw, _trimm
+from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._propensity_score import _normalize_ipw, _trimm
 
 
 class DoubleMLIIVM(LinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index c5765293..e8c153d8 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -5,10 +5,10 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import (
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import (
     _check_binary_predictions,
     _check_finite_predictions,
     _check_integer,
@@ -17,9 +17,10 @@
     _check_trimming,
     _check_weights,
 )
-from ..utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls, _normalize_ipw, _trimm
-from ..utils.blp import DoubleMLBLP
-from ..utils.policytree import DoubleMLPolicyTree
+from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils.blp import DoubleMLBLP
+from doubleml.utils.policytree import DoubleMLPolicyTree
 
 
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py
index dd912aec..56a97969 100644
--- a/doubleml/irm/lpq.py
+++ b/doubleml/irm/lpq.py
@@ -4,21 +4,20 @@
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import NonLinearScoreMixin
-from ..utils._checks import _check_quantile, _check_score, _check_treatment, _check_trimming, _check_zero_one_treatment
-from ..utils._estimation import (
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import NonLinearScoreMixin
+from doubleml.utils._checks import _check_quantile, _check_score, _check_treatment, _check_trimming, _check_zero_one_treatment
+from doubleml.utils._estimation import (
     _cond_targets,
     _default_kde,
     _dml_cv_predict,
     _dml_tune,
     _get_bracket_guess,
-    _normalize_ipw,
     _predict_zero_one_propensity,
     _solve_ipw_score,
-    _trimm,
 )
+from doubleml.utils._propensity_score import _normalize_ipw, _trimm
 
 
 class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py
index cef152e4..4cdcd74c 100644
--- a/doubleml/irm/pq.py
+++ b/doubleml/irm/pq.py
@@ -3,10 +3,10 @@
 from sklearn.model_selection import StratifiedKFold, train_test_split
 from sklearn.utils import check_X_y
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import NonLinearScoreMixin
-from ..utils._checks import (
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import NonLinearScoreMixin
+from doubleml.utils._checks import (
     _check_contains_iv,
     _check_quantile,
     _check_score,
@@ -14,17 +14,16 @@
     _check_trimming,
     _check_zero_one_treatment,
 )
-from ..utils._estimation import (
+from doubleml.utils._estimation import (
     _cond_targets,
     _default_kde,
     _dml_cv_predict,
     _dml_tune,
     _get_bracket_guess,
-    _normalize_ipw,
     _predict_zero_one_propensity,
     _solve_ipw_score,
-    _trimm,
 )
+from doubleml.utils._propensity_score import _normalize_ipw, _trimm
 
 
 class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index 786635ba..f05269ad 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -3,15 +3,15 @@
 from joblib import Parallel, delayed
 from sklearn.base import clone
 
-from ..double_ml_data import DoubleMLClusterData, DoubleMLData
-from ..double_ml_framework import concat
-from ..utils._checks import _check_sample_splitting, _check_score, _check_trimming, _check_zero_one_treatment
-from ..utils._descriptive import generate_summary
-from ..utils._estimation import _default_kde
-from ..utils.resampling import DoubleMLResampling
-from .cvar import DoubleMLCVAR
-from .lpq import DoubleMLLPQ
-from .pq import DoubleMLPQ
+from doubleml.double_ml_data import DoubleMLClusterData, DoubleMLData
+from doubleml.double_ml_framework import concat
+from doubleml.irm.cvar import DoubleMLCVAR
+from doubleml.irm.lpq import DoubleMLLPQ
+from doubleml.irm.pq import DoubleMLPQ
+from doubleml.utils._checks import _check_sample_splitting, _check_score, _check_trimming, _check_zero_one_treatment
+from doubleml.utils._descriptive import generate_summary
+from doubleml.utils._estimation import _default_kde
+from doubleml.utils.resampling import DoubleMLResampling
 
 
 class DoubleMLQTE:
diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py
index c63d2076..5a6458ca 100644
--- a/doubleml/irm/ssm.py
+++ b/doubleml/irm/ssm.py
@@ -6,11 +6,12 @@
 from sklearn.model_selection import train_test_split
 from sklearn.utils import check_X_y
 
-from ..double_ml import DoubleML
-from ..double_ml_data import DoubleMLData
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import _check_finite_predictions, _check_score, _check_trimming
-from ..utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity, _trimm
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_data import DoubleMLData
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import _check_finite_predictions, _check_score, _check_trimming
+from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity
+from doubleml.utils._propensity_score import _trimm
 
 
 class DoubleMLSSM(LinearScoreMixin, DoubleML):
diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py
index 69767df2..f451ae26 100644
--- a/doubleml/irm/tests/_utils_apo_manual.py
+++ b/doubleml/irm/tests/_utils_apo_manual.py
@@ -4,7 +4,7 @@
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
 from ...tests._utils_boot import boot_manual, draw_weights
 from ...utils._checks import _check_is_propensity
-from ...utils._estimation import _normalize_ipw
+from ...utils._propensity_score import _normalize_ipw
 
 
 def fit_apo(
diff --git a/doubleml/irm/tests/_utils_cvar_manual.py b/doubleml/irm/tests/_utils_cvar_manual.py
index 7ff2c230..dd6935b6 100644
--- a/doubleml/irm/tests/_utils_cvar_manual.py
+++ b/doubleml/irm/tests/_utils_cvar_manual.py
@@ -3,7 +3,8 @@
 from sklearn.model_selection import StratifiedKFold, train_test_split
 
 from ...tests._utils import fit_predict_proba, tune_grid_search
-from ...utils._estimation import _dml_cv_predict, _get_bracket_guess, _normalize_ipw, _solve_ipw_score
+from ...utils._estimation import _dml_cv_predict, _get_bracket_guess, _solve_ipw_score
+from ...utils._propensity_score import _normalize_ipw
 
 
 def fit_cvar(
diff --git a/doubleml/irm/tests/_utils_iivm_manual.py b/doubleml/irm/tests/_utils_iivm_manual.py
index a358d6f8..b61526b2 100644
--- a/doubleml/irm/tests/_utils_iivm_manual.py
+++ b/doubleml/irm/tests/_utils_iivm_manual.py
@@ -3,7 +3,7 @@
 
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
 from ...tests._utils_boot import boot_manual, draw_weights
-from ...utils._estimation import _normalize_ipw
+from ...utils._propensity_score import _normalize_ipw
 
 
 def fit_iivm(
diff --git a/doubleml/irm/tests/_utils_irm_manual.py b/doubleml/irm/tests/_utils_irm_manual.py
index f9096294..f0ce825b 100644
--- a/doubleml/irm/tests/_utils_irm_manual.py
+++ b/doubleml/irm/tests/_utils_irm_manual.py
@@ -4,7 +4,7 @@
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
 from ...tests._utils_boot import boot_manual, draw_weights
 from ...utils._checks import _check_is_propensity
-from ...utils._estimation import _normalize_ipw
+from ...utils._propensity_score import _normalize_ipw
 
 
 def fit_irm(
diff --git a/doubleml/irm/tests/_utils_lpq_manual.py b/doubleml/irm/tests/_utils_lpq_manual.py
index e24b5e92..376c7c46 100644
--- a/doubleml/irm/tests/_utils_lpq_manual.py
+++ b/doubleml/irm/tests/_utils_lpq_manual.py
@@ -4,7 +4,8 @@
 from sklearn.model_selection import StratifiedKFold, train_test_split
 
 from ...tests._utils import tune_grid_search
-from ...utils._estimation import _default_kde, _dml_cv_predict, _get_bracket_guess, _normalize_ipw, _solve_ipw_score, _trimm
+from ...utils._estimation import _default_kde, _dml_cv_predict, _get_bracket_guess, _solve_ipw_score
+from ...utils._propensity_score import _normalize_ipw, _trimm
 
 
 def fit_lpq(
diff --git a/doubleml/irm/tests/_utils_pq_manual.py b/doubleml/irm/tests/_utils_pq_manual.py
index 3bcd1395..b5b27c7c 100644
--- a/doubleml/irm/tests/_utils_pq_manual.py
+++ b/doubleml/irm/tests/_utils_pq_manual.py
@@ -4,7 +4,8 @@
 from sklearn.model_selection import StratifiedKFold, train_test_split
 
 from ...tests._utils import tune_grid_search
-from ...utils._estimation import _default_kde, _dml_cv_predict, _get_bracket_guess, _normalize_ipw, _solve_ipw_score
+from ...utils._estimation import _default_kde, _dml_cv_predict, _get_bracket_guess, _solve_ipw_score
+from ...utils._propensity_score import _normalize_ipw
 
 
 def fit_pq(
diff --git a/doubleml/irm/tests/_utils_ssm_manual.py b/doubleml/irm/tests/_utils_ssm_manual.py
index 1ce4a97b..e27f7fe1 100644
--- a/doubleml/irm/tests/_utils_ssm_manual.py
+++ b/doubleml/irm/tests/_utils_ssm_manual.py
@@ -3,7 +3,8 @@
 from sklearn.model_selection import train_test_split
 
 from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
-from ...utils._estimation import _predict_zero_one_propensity, _trimm
+from ...utils._estimation import _predict_zero_one_propensity
+from ...utils._propensity_score import _trimm
 
 
 def fit_selection(
diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index ee8c0d1e..9c11f5df 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -7,7 +7,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.utils._estimation import _normalize_ipw
+from doubleml.utils._propensity_score import _normalize_ipw
 
 
 @pytest.fixture(
diff --git a/doubleml/irm/tests/test_irm_weighted_scores.py b/doubleml/irm/tests/test_irm_weighted_scores.py
index a7984478..0592c3d3 100644
--- a/doubleml/irm/tests/test_irm_weighted_scores.py
+++ b/doubleml/irm/tests/test_irm_weighted_scores.py
@@ -5,7 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.utils._estimation import _normalize_ipw
+from doubleml.utils._propensity_score import _normalize_ipw
 
 
 def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
index 408c2f51..25e6262d 100644
--- a/doubleml/utils/_estimation.py
+++ b/doubleml/utils/_estimation.py
@@ -185,23 +185,6 @@ def _draw_weights(method, n_rep_boot, n_obs):
     return weights
 
 
-def _trimm(preds, trimming_rule, trimming_threshold):
-    if trimming_rule == "truncate":
-        preds[preds < trimming_threshold] = trimming_threshold
-        preds[preds > 1 - trimming_threshold] = 1 - trimming_threshold
-    return preds
-
-
-def _normalize_ipw(propensity, treatment):
-    mean_treat1 = np.mean(np.divide(treatment, propensity))
-    mean_treat0 = np.mean(np.divide(1.0 - treatment, 1.0 - propensity))
-    normalized_weights = np.multiply(treatment, np.multiply(propensity, mean_treat1)) + np.multiply(
-        1.0 - treatment, 1.0 - np.multiply(1.0 - propensity, mean_treat0)
-    )
-
-    return normalized_weights
-
-
 def _rmse(y_true, y_pred):
     subset = np.logical_not(np.isnan(y_true))
     rmse = root_mean_squared_error(y_true[subset], y_pred[subset])
diff --git a/doubleml/utils/_propensity_score.py b/doubleml/utils/_propensity_score.py
new file mode 100644
index 00000000..4d06c52c
--- /dev/null
+++ b/doubleml/utils/_propensity_score.py
@@ -0,0 +1,20 @@
+import numpy as np
+
+
+def _trimm(preds, trimming_rule, trimming_threshold):
+    """Trim predictions based on the specified method and threshold."""
+    if trimming_rule == "truncate":
+        adjusted_predictions = np.clip(preds, trimming_threshold, 1 - trimming_threshold)
+    return adjusted_predictions
+
+
+def _normalize_ipw(propensity, treatment):
+    """Normalize inverse probability weights."""
+    mean_treat1 = np.mean(np.divide(treatment, propensity))
+    mean_treat0 = np.mean(np.divide(1.0 - treatment, 1.0 - propensity))
+
+    normalized_weights = np.multiply(treatment, np.multiply(propensity, mean_treat1)) + np.multiply(
+        1.0 - treatment, 1.0 - np.multiply(1.0 - propensity, mean_treat0)
+    )
+
+    return normalized_weights
diff --git a/doubleml/utils/_sensitivity.py b/doubleml/utils/_sensitivity.py
index 3765f018..e54d4e9c 100644
--- a/doubleml/utils/_sensitivity.py
+++ b/doubleml/utils/_sensitivity.py
@@ -3,7 +3,5 @@
 
 def _compute_sensitivity_bias(sigma2, nu2, psi_sigma2, psi_nu2):
     max_bias = np.sqrt(np.multiply(sigma2, nu2))
-    psi_max_bias = np.divide(
-        np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)), np.multiply(2.0, max_bias)
-    )
+    psi_max_bias = np.divide(np.add(np.multiply(sigma2, psi_nu2), np.multiply(nu2, psi_sigma2)), np.multiply(2.0, max_bias))
     return max_bias, psi_max_bias

From 0ae0aedcd943f37e82348d34e3ab59cdb33728ae Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 10 Feb 2025 08:26:03 +0100
Subject: [PATCH 47/54] use np.clip

---
 doubleml/utils/_propensity_score.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/_propensity_score.py b/doubleml/utils/_propensity_score.py
index 4d06c52c..7102aeb6 100644
--- a/doubleml/utils/_propensity_score.py
+++ b/doubleml/utils/_propensity_score.py
@@ -4,7 +4,7 @@
 def _trimm(preds, trimming_rule, trimming_threshold):
     """Trim predictions based on the specified method and threshold."""
     if trimming_rule == "truncate":
-        adjusted_predictions = np.clip(preds, trimming_threshold, 1 - trimming_threshold)
+        adjusted_predictions = np.clip(a=preds, a_min=trimming_threshold, a_max=1 - trimming_threshold)
     return adjusted_predictions
 
 

From d922d7bd68af6776e3dbd6a8524652404a75ad27 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 10 Feb 2025 09:51:58 +0100
Subject: [PATCH 48/54] update propensity score adjustment in irm

---
 doubleml/irm/irm.py                     | 18 ++++++++++--------
 doubleml/irm/tests/_utils_irm_manual.py | 12 ++++++++----
 doubleml/irm/tests/test_irm.py          |  8 +++++++-
 doubleml/utils/_propensity_score.py     | 20 +++++++++++++++-----
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index e8c153d8..62b04d81 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -18,7 +18,7 @@
     _check_weights,
 )
 from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
-from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils._propensity_score import _propensity_score_adjustments, _trimm
 from doubleml.utils.blp import DoubleMLBLP
 from doubleml.utils.policytree import DoubleMLPolicyTree
 
@@ -341,10 +341,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         return psi_elements, preds
 
     def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
-        if self.normalize_ipw:
-            m_hat_adj = _normalize_ipw(m_hat, d)
-        else:
-            m_hat_adj = m_hat
+        m_hat_adj = _propensity_score_adjustments(
+            propensity_score=m_hat, treatment_indicator=d, normalize_ipw=self.normalize_ipw
+        )
 
         # compute residuals
         u_hat0 = y - g_hat0
@@ -372,19 +371,22 @@ def _sensitivity_element_est(self, preds):
         d = self._dml_data.d
 
         m_hat = preds["predictions"]["ml_m"]
+        m_hat_adj = _propensity_score_adjustments(
+            propensity_score=m_hat, treatment_indicator=d, normalize_ipw=self.normalize_ipw
+        )
         g_hat0 = preds["predictions"]["ml_g0"]
         g_hat1 = preds["predictions"]["ml_g1"]
 
         # use weights make this extendable
-        weights, weights_bar = self._get_weights(m_hat=m_hat)
+        weights, weights_bar = self._get_weights(m_hat=m_hat_adj)
 
         sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0 - d, g_hat0))
         sigma2 = np.mean(sigma2_score_element)
         psi_sigma2 = sigma2_score_element - sigma2
 
         # calc m(W,alpha) and Riesz representer
-        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat) + np.divide(1.0, 1.0 - m_hat))))
-        rr = np.multiply(weights_bar, (np.divide(d, m_hat) - np.divide(1.0 - d, 1.0 - m_hat)))
+        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat_adj) + np.divide(1.0, 1.0 - m_hat_adj))))
+        rr = np.multiply(weights_bar, (np.divide(d, m_hat_adj) - np.divide(1.0 - d, 1.0 - m_hat_adj)))
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2 = np.mean(nu2_score_element)
diff --git a/doubleml/irm/tests/_utils_irm_manual.py b/doubleml/irm/tests/_utils_irm_manual.py
index f0ce825b..f5a5bad7 100644
--- a/doubleml/irm/tests/_utils_irm_manual.py
+++ b/doubleml/irm/tests/_utils_irm_manual.py
@@ -309,7 +309,7 @@ def boot_irm_single_split(
     return boot_t_stat
 
 
-def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep):
+def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep, normalize_ipw):
     n_treat = 1
     n_obs = len(y)
 
@@ -320,6 +320,10 @@ def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep):
 
     for i_rep in range(n_rep):
         m_hat = predictions["ml_m"][:, i_rep, 0]
+        if normalize_ipw:
+            m_hat_adj = _normalize_ipw(m_hat, d)
+        else:
+            m_hat_adj = m_hat
         g_hat0 = predictions["ml_g0"][:, i_rep, 0]
         g_hat1 = predictions["ml_g1"][:, i_rep, 0]
 
@@ -329,15 +333,15 @@ def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep):
         else:
             assert score == "ATTE"
             weights = np.divide(d, np.mean(d))
-            weights_bar = np.divide(m_hat, np.mean(d))
+            weights_bar = np.divide(m_hat_adj, np.mean(d))
 
         sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0 - d, g_hat0))
         sigma2[0, i_rep, 0] = np.mean(sigma2_score_element)
         psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0]
 
         # calc m(W,alpha) and Riesz representer
-        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat) + np.divide(1.0, 1.0 - m_hat))))
-        rr = np.multiply(weights_bar, (np.divide(d, m_hat) - np.divide(1.0 - d, 1.0 - m_hat)))
+        m_alpha = np.multiply(weights, np.multiply(weights_bar, (np.divide(1.0, m_hat_adj) + np.divide(1.0, 1.0 - m_hat_adj))))
+        rr = np.multiply(weights_bar, (np.divide(d, m_hat_adj) - np.divide(1.0 - d, 1.0 - m_hat_adj)))
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2[0, i_rep, 0] = np.mean(nu2_score_element)
diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py
index 04d9596d..f99f2253 100644
--- a/doubleml/irm/tests/test_irm.py
+++ b/doubleml/irm/tests/test_irm.py
@@ -155,7 +155,13 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t
     # sensitivity tests
     res_dict["sensitivity_elements"] = dml_irm_obj.sensitivity_elements
     res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_irm(
-        y, d, all_coef=dml_irm_obj.all_coef, predictions=dml_irm_obj.predictions, score=score, n_rep=1
+        y,
+        d,
+        all_coef=dml_irm_obj.all_coef,
+        predictions=dml_irm_obj.predictions,
+        score=score,
+        n_rep=1,
+        normalize_ipw=normalize_ipw,
     )
 
     # check if sensitivity score with rho=0 gives equal asymptotic standard deviation
diff --git a/doubleml/utils/_propensity_score.py b/doubleml/utils/_propensity_score.py
index 7102aeb6..26a6af9c 100644
--- a/doubleml/utils/_propensity_score.py
+++ b/doubleml/utils/_propensity_score.py
@@ -1,6 +1,16 @@
 import numpy as np
 
 
+def _propensity_score_adjustments(propensity_score, treatment_indicator, normalize_ipw):
+    """Adjust propensity score."""
+    if normalize_ipw:
+        adjusted_propensity_score = _normalize_ipw(propensity_score=propensity_score, treatment_indicator=treatment_indicator)
+    else:
+        adjusted_propensity_score = propensity_score
+
+    return adjusted_propensity_score
+
+
 def _trimm(preds, trimming_rule, trimming_threshold):
     """Trim predictions based on the specified method and threshold."""
     if trimming_rule == "truncate":
@@ -8,13 +18,13 @@ def _trimm(preds, trimming_rule, trimming_threshold):
     return adjusted_predictions
 
 
-def _normalize_ipw(propensity, treatment):
+def _normalize_ipw(propensity_score, treatment_indicator):
     """Normalize inverse probability weights."""
-    mean_treat1 = np.mean(np.divide(treatment, propensity))
-    mean_treat0 = np.mean(np.divide(1.0 - treatment, 1.0 - propensity))
+    mean_treat1 = np.mean(np.divide(treatment_indicator, propensity_score))
+    mean_treat0 = np.mean(np.divide(1.0 - treatment_indicator, 1.0 - propensity_score))
 
-    normalized_weights = np.multiply(treatment, np.multiply(propensity, mean_treat1)) + np.multiply(
-        1.0 - treatment, 1.0 - np.multiply(1.0 - propensity, mean_treat0)
+    normalized_weights = np.multiply(treatment_indicator, np.multiply(propensity_score, mean_treat1)) + np.multiply(
+        1.0 - treatment_indicator, 1.0 - np.multiply(1.0 - propensity_score, mean_treat0)
     )
 
     return normalized_weights

From 48ec2896fab691236cd2a8190369568923ed4fa8 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 10 Feb 2025 10:06:20 +0100
Subject: [PATCH 49/54] update prop score adjustments apo

---
 doubleml/irm/apo.py                     | 16 ++++++-----
 doubleml/irm/irm.py                     |  6 ++---
 doubleml/irm/tests/_utils_apo_manual.py | 10 ++++---
 doubleml/irm/tests/test_apo.py          |  9 ++++++-
 doubleml/irm/tests/test_irm_vs_apos.py  | 35 +++++++++++++------------
 doubleml/utils/_propensity_score.py     |  2 +-
 6 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index e0d1e4fc..177d5958 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -15,7 +15,7 @@
     _check_weights,
 )
 from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
-from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm
 from doubleml.utils.blp import DoubleMLBLP
 
 
@@ -311,10 +311,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         return psi_elements, preds
 
     def _score_elements(self, y, treated, g_hat_d_lvl0, g_hat_d_lvl1, m_hat, smpls):
-        if self.normalize_ipw:
-            m_hat_adj = _normalize_ipw(m_hat, treated)
-        else:
-            m_hat_adj = m_hat
+        m_hat_adj = _propensity_score_adjustment(
+            propensity_score=m_hat, treatment_indicator=treated, normalize_ipw=self.normalize_ipw
+        )
 
         u_hat = y - g_hat_d_lvl1
         weights, weights_bar = self._get_weights()
@@ -329,6 +328,9 @@ def _sensitivity_element_est(self, preds):
         treated = self.treated
 
         m_hat = preds["predictions"]["ml_m"]
+        m_hat_adj = _propensity_score_adjustment(
+            propensity_score=m_hat, treatment_indicator=treated, normalize_ipw=self.normalize_ipw
+        )
         g_hat_d_lvl0 = preds["predictions"]["ml_g_d_lvl0"]
         g_hat_d_lvl1 = preds["predictions"]["ml_g_d_lvl1"]
 
@@ -339,8 +341,8 @@ def _sensitivity_element_est(self, preds):
         psi_sigma2 = sigma2_score_element - sigma2
 
         # calc m(W,alpha) and Riesz representer
-        m_alpha = np.multiply(weights, np.multiply(weights_bar, np.divide(1.0, m_hat)))
-        rr = np.multiply(weights_bar, np.divide(treated, m_hat))
+        m_alpha = np.multiply(weights, np.multiply(weights_bar, np.divide(1.0, m_hat_adj)))
+        rr = np.multiply(weights_bar, np.divide(treated, m_hat_adj))
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2 = np.mean(nu2_score_element)
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 62b04d81..b8879a93 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -18,7 +18,7 @@
     _check_weights,
 )
 from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
-from doubleml.utils._propensity_score import _propensity_score_adjustments, _trimm
+from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm
 from doubleml.utils.blp import DoubleMLBLP
 from doubleml.utils.policytree import DoubleMLPolicyTree
 
@@ -341,7 +341,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         return psi_elements, preds
 
     def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
-        m_hat_adj = _propensity_score_adjustments(
+        m_hat_adj = _propensity_score_adjustment(
             propensity_score=m_hat, treatment_indicator=d, normalize_ipw=self.normalize_ipw
         )
 
@@ -371,7 +371,7 @@ def _sensitivity_element_est(self, preds):
         d = self._dml_data.d
 
         m_hat = preds["predictions"]["ml_m"]
-        m_hat_adj = _propensity_score_adjustments(
+        m_hat_adj = _propensity_score_adjustment(
             propensity_score=m_hat, treatment_indicator=d, normalize_ipw=self.normalize_ipw
         )
         g_hat0 = preds["predictions"]["ml_g0"]
diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py
index f451ae26..0ec84417 100644
--- a/doubleml/irm/tests/_utils_apo_manual.py
+++ b/doubleml/irm/tests/_utils_apo_manual.py
@@ -212,7 +212,7 @@ def boot_apo_single_split(
     return boot_t_stat
 
 
-def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, score, n_rep):
+def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, score, n_rep, normalize_ipw):
     n_treat = 1
     n_obs = len(y)
     treated = d == treatment_level
@@ -224,6 +224,10 @@ def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, s
 
     for i_rep in range(n_rep):
         m_hat = predictions["ml_m"][:, i_rep, 0]
+        if normalize_ipw:
+            m_hat_adj = _normalize_ipw(m_hat, treated)
+        else:
+            m_hat_adj = m_hat
         g_hat0 = predictions["ml_g_d_lvl0"][:, i_rep, 0]
         g_hat1 = predictions["ml_g_d_lvl1"][:, i_rep, 0]
 
@@ -235,8 +239,8 @@ def fit_sensitivity_elements_apo(y, d, treatment_level, all_coef, predictions, s
         psi_sigma2[:, i_rep, 0] = sigma2_score_element - sigma2[0, i_rep, 0]
 
         # calc m(W,alpha) and Riesz representer
-        m_alpha = np.multiply(weights, np.multiply(weights_bar, np.divide(1.0, m_hat)))
-        rr = np.multiply(weights_bar, np.divide(treated, m_hat))
+        m_alpha = np.multiply(weights, np.multiply(weights_bar, np.divide(1.0, m_hat_adj)))
+        rr = np.multiply(weights_bar, np.divide(treated, m_hat_adj))
 
         nu2_score_element = np.multiply(2.0, m_alpha) - np.square(rr)
         nu2[0, i_rep, 0] = np.mean(nu2_score_element)
diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py
index 1c8f0813..df4ec284 100644
--- a/doubleml/irm/tests/test_apo.py
+++ b/doubleml/irm/tests/test_apo.py
@@ -166,7 +166,14 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level)
     # sensitivity tests
     res_dict["sensitivity_elements"] = dml_obj.sensitivity_elements
     res_dict["sensitivity_elements_manual"] = fit_sensitivity_elements_apo(
-        y, d, treatment_level, all_coef=dml_obj.all_coef, predictions=dml_obj.predictions, score="APO", n_rep=1
+        y,
+        d,
+        treatment_level,
+        all_coef=dml_obj.all_coef,
+        predictions=dml_obj.predictions,
+        score="APO",
+        n_rep=1,
+        normalize_ipw=normalize_ipw,
     )
     return res_dict
 
diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index 9c11f5df..a91c8c05 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -7,7 +7,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
-from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils._propensity_score import _propensity_score_adjustment
 
 
 @pytest.fixture(
@@ -193,9 +193,10 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i
     # define weights
     p_hat = np.mean(d)
     m_hat_adjusted = copy.deepcopy(m_hat)
-    if normalize_ipw:
-        for i_rep in range(n_rep):
-            m_hat_adjusted[:, i_rep] = _normalize_ipw(m_hat[:, i_rep], d)
+    for i_rep in range(n_rep):
+        m_hat_adjusted[:, i_rep] = _propensity_score_adjustment(
+            propensity_score=m_hat[:, i_rep], treatment_indicator=d, normalize_ipw=normalize_ipw
+        )
     weights_dict = {
         "weights": d / p_hat,
         "weights_bar": m_hat_adjusted / p_hat,
@@ -312,19 +313,19 @@ def test_apos_vs_irm_weighted_confint(dml_irm_apos_weighted_fixture):
 @pytest.mark.ci
 def test_apos_vs_irm_weighted_sensitivity(dml_irm_apos_weighted_fixture):
     # TODO: Include after normalize_ipw rework, see Issue https://github.com/DoubleML/doubleml-for-py/issues/296
-    # params_irm = dml_irm_apos_weighted_fixture["dml_irm"].sensitivity_params
+    params_irm = dml_irm_apos_weighted_fixture["dml_irm"].sensitivity_params
     params_irm_weighted = dml_irm_apos_weighted_fixture["dml_irm_weighted"].sensitivity_params
     params_causal_contrast = dml_irm_apos_weighted_fixture["causal_contrast"].sensitivity_params
 
     for key in ["theta", "se", "ci"]:
         for boundary in ["upper", "lower"]:
             # TODO: Include after normalize_ipw rework, see Issue https://github.com/DoubleML/doubleml-for-py/issues/296
-            # assert np.allclose(
-            #     params_irm[key][boundary],
-            #     params_irm_weighted[key][boundary],
-            #     rtol=1e-9,
-            #     atol=1e-4,
-            # )
+            assert np.allclose(
+                params_irm[key][boundary],
+                params_irm_weighted[key][boundary],
+                rtol=1e-9,
+                atol=1e-4,
+            )
 
             assert np.allclose(
                 params_irm_weighted[key][boundary],
@@ -335,12 +336,12 @@ def test_apos_vs_irm_weighted_sensitivity(dml_irm_apos_weighted_fixture):
 
     for key in ["rv", "rva"]:
         # TODO: Include after normalize_ipw rework, see Issue https://github.com/DoubleML/doubleml-for-py/issues/296
-        # assert np.allclose(
-        #     params_irm[key],
-        #     params_irm_weighted[key],
-        #     rtol=1e-9,
-        #     atol=1e-4,
-        # )
+        assert np.allclose(
+            params_irm[key],
+            params_irm_weighted[key],
+            rtol=1e-9,
+            atol=1e-4,
+        )
 
         assert np.allclose(
             params_irm_weighted[key],
diff --git a/doubleml/utils/_propensity_score.py b/doubleml/utils/_propensity_score.py
index 26a6af9c..84714718 100644
--- a/doubleml/utils/_propensity_score.py
+++ b/doubleml/utils/_propensity_score.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 
-def _propensity_score_adjustments(propensity_score, treatment_indicator, normalize_ipw):
+def _propensity_score_adjustment(propensity_score, treatment_indicator, normalize_ipw):
     """Adjust propensity score."""
     if normalize_ipw:
         adjusted_propensity_score = _normalize_ipw(propensity_score=propensity_score, treatment_indicator=treatment_indicator)

From 3e95ed4f78f0814137a69d634043b0d52265735d Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 10 Feb 2025 10:11:24 +0100
Subject: [PATCH 50/54] fix _trimm

---
 doubleml/utils/_propensity_score.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doubleml/utils/_propensity_score.py b/doubleml/utils/_propensity_score.py
index 84714718..9ce70ea0 100644
--- a/doubleml/utils/_propensity_score.py
+++ b/doubleml/utils/_propensity_score.py
@@ -15,6 +15,8 @@ def _trimm(preds, trimming_rule, trimming_threshold):
     """Trim predictions based on the specified method and threshold."""
     if trimming_rule == "truncate":
         adjusted_predictions = np.clip(a=preds, a_min=trimming_threshold, a_max=1 - trimming_threshold)
+    else:
+        adjusted_predictions = preds
     return adjusted_predictions
 
 

From 915b6387b10f8f772744b8d9ca9babe8bfcbb755 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Thu, 13 Feb 2025 21:02:24 +0100
Subject: [PATCH 51/54] add tests for sensitivity operations

---
 doubleml/tests/test_framework_sensitivity.py | 33 ++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py
index eec80264..2f397fdf 100644
--- a/doubleml/tests/test_framework_sensitivity.py
+++ b/doubleml/tests/test_framework_sensitivity.py
@@ -24,15 +24,21 @@ def dml_framework_sensitivity_fixture(n_rep, generate_data_simple):
 
     # add objects
     dml_framework_obj_add_obj = dml_framework_obj + dml_framework_obj
+    dml_framework_obj_add_obj.sensitivity_analysis()
 
     # substract objects
     dml_irm_obj_2 = DoubleMLIRM(dml_data_2, ml_g, ml_m, n_rep=n_rep)
     dml_irm_obj_2.fit()
     dml_framework_obj_2 = dml_irm_obj_2.construct_framework()
     dml_framework_obj_sub_obj = dml_framework_obj - dml_framework_obj_2
+    dml_framework_obj_sub_obj2 = dml_framework_obj - dml_framework_obj
+    dml_framework_obj_sub_obj2.sensitivity_analysis()
 
     # multiply objects
     dml_framework_obj_mul_obj = dml_framework_obj * 2
+    dml_framework_obj_mul_obj.sensitivity_analysis()
+    dml_framework_obj_mul_zero_obj = 0 * dml_framework_obj
+    dml_framework_obj_mul_zero_obj.sensitivity_analysis()
 
     # concat objects
     dml_framework_obj_concat = concat([dml_framework_obj, dml_framework_obj])
@@ -44,7 +50,9 @@ def dml_framework_sensitivity_fixture(n_rep, generate_data_simple):
         "dml_framework_obj_2": dml_framework_obj_2,
         "dml_framework_obj_add_obj": dml_framework_obj_add_obj,
         "dml_framework_obj_sub_obj": dml_framework_obj_sub_obj,
+        "dml_framework_obj_sub_obj2": dml_framework_obj_sub_obj2,
         "dml_framework_obj_mul_obj": dml_framework_obj_mul_obj,
+        "dml_framework_obj_mul_zero_obj": dml_framework_obj_mul_zero_obj,
         "dml_framework_obj_concat": dml_framework_obj_concat,
         "n_rep": n_rep,
     }
@@ -104,3 +112,28 @@ def test_dml_framework_sensitivity_summary(dml_framework_sensitivity_fixture):
     ]
     for substring in substrings:
         assert substring in sensitivity_summary
+
+
+@pytest.mark.ci
+def test_dml_framework_sensitivity_operations(dml_framework_sensitivity_fixture):
+    add_obj = dml_framework_sensitivity_fixture["dml_framework_obj_add_obj"]
+    mul_obj = dml_framework_sensitivity_fixture["dml_framework_obj_mul_obj"]
+    mul_zero_obj = dml_framework_sensitivity_fixture["dml_framework_obj_mul_zero_obj"]
+
+    for key in ["theta", "se", "ci"]:
+        assert add_obj.sensitivity_params[key]["upper"] == mul_obj.sensitivity_params[key]["upper"]
+        assert add_obj.sensitivity_params[key]["lower"] == mul_obj.sensitivity_params[key]["lower"]
+
+        assert mul_zero_obj.sensitivity_params[key]["upper"] == 0
+        assert mul_zero_obj.sensitivity_params[key]["lower"] == 0
+
+    assert add_obj.sensitivity_params["rv"] == mul_obj.sensitivity_params["rv"]
+    assert mul_zero_obj.sensitivity_params["rv"] > 0.99  # due to degenerated variance
+    assert mul_zero_obj.sensitivity_params["rva"] > 0.99  # due to degenerated variance
+
+    sub_obj = dml_framework_sensitivity_fixture["dml_framework_obj_sub_obj2"]
+    assert sub_obj.sensitivity_params["theta"]["upper"] == -1 * sub_obj.sensitivity_params["theta"]["lower"]
+    assert sub_obj.sensitivity_params["se"]["upper"] == sub_obj.sensitivity_params["se"]["lower"]
+    assert sub_obj.sensitivity_params["ci"]["upper"] == -1 * sub_obj.sensitivity_params["ci"]["lower"]
+    assert sub_obj.sensitivity_params["rv"] < 0.01
+    assert sub_obj.sensitivity_params["rva"] < 0.01

From be5b39cd21f1635f8c50bb936adb44c2119d29be Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 21 Feb 2025 07:36:58 +0100
Subject: [PATCH 52/54] add pytest-cov to dev requirements

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index bd14d3d0..339bd0a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,8 @@ rdd = [
     "rdrobust>=1.3.0"
 ]
 dev = [
-    "pytest",
+    "pytest>=8.3.0",
+    "pytest-cov>=6.0.0",
     "xgboost",
     "lightgbm",
     "black>=24.3.0",

From 4b49ecd8bce8e3b30c29d9270838ca4b86a47292 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 21 Feb 2025 07:39:07 +0100
Subject: [PATCH 53/54] add .coverage to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index d9ffb93c..ba091906 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,4 @@ MANIFEST
 *.idea
 *.vscode
 .flake8
+.coverage
\ No newline at end of file

From 1382b89e420bc907291509b6cae8d8d69db402fa Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 3 Mar 2025 07:04:59 +0100
Subject: [PATCH 54/54] update IRM ATT estimation

---
 doubleml/irm/apo.py | 2 +-
 doubleml/irm/irm.py | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index 177d5958..e8c75172 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -318,7 +318,7 @@ def _score_elements(self, y, treated, g_hat_d_lvl0, g_hat_d_lvl1, m_hat, smpls):
         u_hat = y - g_hat_d_lvl1
         weights, weights_bar = self._get_weights()
         psi_b = weights * g_hat_d_lvl1 + weights_bar * np.divide(np.multiply(treated, u_hat), m_hat_adj)
-        psi_a = -1.0 * np.divide(weights, np.mean(weights))  # TODO: check if this is correct
+        psi_a = -1.0 * np.divide(weights, np.mean(weights))
 
         return psi_a, psi_b
 
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index b8879a93..72db088e 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -354,11 +354,7 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
             psi_b = weights * (g_hat1 - g_hat0) + weights_bar * (
                 np.divide(np.multiply(d, u_hat1), m_hat_adj) - np.divide(np.multiply(1.0 - d, u_hat0), 1.0 - m_hat_adj)
             )
-            if self.score == "ATE":
-                psi_a = -1.0 * np.divide(weights, np.mean(weights))  # TODO: check if this is correct
-            else:
-                assert self.score == "ATTE"
-                psi_a = -1.0 * weights
+            psi_a = -1.0 * np.divide(weights, np.mean(weights))
         else:
             assert callable(self.score)
             psi_a, psi_b = self.score(y=y, d=d, g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj, smpls=smpls)