From e52122f348e222b06b581df323c306825e3fb108 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 16 Jun 2025 11:04:59 +0200
Subject: [PATCH 1/2] add flexible summary with multiple formats

---
 doubleml/did/did_binary.py    |  63 ++++----------------
 doubleml/did/did_cs_binary.py |  65 ++++----------------
 doubleml/double_ml.py         | 109 +++++++++++++++++++++++-----------
 doubleml/irm/iivm.py          |  15 +----
 4 files changed, 100 insertions(+), 152 deletions(-)

diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index 99e18e28..99ce7ef9 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -239,58 +239,17 @@ def __init__(
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
-    def __str__(self):
-        class_name = self.__class__.__name__
-        header = f"================== {class_name} Object ==================\n"
-        data_summary = self._dml_data._data_summary_str()
-        score_info = (
-            f"Score function: {str(self.score)}\n"
-            f"Treatment group: {str(self.g_value)}\n"
-            f"Pre-treatment period: {str(self.t_value_pre)}\n"
-            f"Evaluation period: {str(self.t_value_eval)}\n"
-            f"Control group: {str(self.control_group)}\n"
-            f"Anticipation periods: {str(self.anticipation_periods)}\n"
-            f"Effective sample size: {str(self.n_obs_subset)}\n"
-        )
-        learner_info = ""
-        for key, value in self.learner.items():
-            learner_info += f"Learner {key}: {str(value)}\n"
-        if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\n"
-            is_classifier = [value for value in self._is_classifier.values()]
-            is_regressor = [not value for value in is_classifier]
-            if any(is_regressor):
-                learner_info += "Regression:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is False]:
-                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
-            if any(is_classifier):
-                learner_info += "Classification:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is True]:
-                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
-
-        if self._is_cluster_data:
-            resampling_info = (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
-                f"No. folds: {self.n_folds}\n"
-                f"No. repeated sample splits: {self.n_rep}\n"
-            )
-        else:
-            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
-        fit_summary = str(self.summary)
-        res = (
-            header
-            + "\n------------------ Data summary      ------------------\n"
-            + data_summary
-            + "\n------------------ Score & algorithm ------------------\n"
-            + score_info
-            + "\n------------------ Machine learner   ------------------\n"
-            + learner_info
-            + "\n------------------ Resampling        ------------------\n"
-            + resampling_info
-            + "\n------------------ Fit summary       ------------------\n"
-            + fit_summary
-        )
-        return res
+    def _format_score_info_str(self):
+        lines = [
+            f"Score function: {str(self.score)}",
+            f"Treatment group: {str(self.g_value)}",
+            f"Pre-treatment period: {str(self.t_value_pre)}",
+            f"Evaluation period: {str(self.t_value_eval)}",
+            f"Control group: {str(self.control_group)}",
+            f"Anticipation periods: {str(self.anticipation_periods)}",
+            f"Effective sample size: {str(self.n_obs_subset)}",
+        ]
+        return "\\n".join(lines)
 
     @property
     def g_value(self):
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index a6005d53..73b9152f 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -156,58 +156,19 @@ def __init__(
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
-    def __str__(self):
-        class_name = self.__class__.__name__
-        header = f"================== {class_name} Object ==================\n"
-        data_summary = self._dml_data._data_summary_str()
-        score_info = (
-            f"Score function: {str(self.score)}\n"
-            f"Treatment group: {str(self.g_value)}\n"
-            f"Pre-treatment period: {str(self.t_value_pre)}\n"
-            f"Evaluation period: {str(self.t_value_eval)}\n"
-            f"Control group: {str(self.control_group)}\n"
-            f"Anticipation periods: {str(self.anticipation_periods)}\n"
-            f"Effective sample size: {str(self.n_obs_subset)}\n"
-        )
-        learner_info = ""
-        for key, value in self.learner.items():
-            learner_info += f"Learner {key}: {str(value)}\n"
-        if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\n"
-            is_classifier = [value for value in self._is_classifier.values()]
-            is_regressor = [not value for value in is_classifier]
-            if any(is_regressor):
-                learner_info += "Regression:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is False]:
-                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
-            if any(is_classifier):
-                learner_info += "Classification:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is True]:
-                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
-
-        if self._is_cluster_data:
-            resampling_info = (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
-                f"No. folds: {self.n_folds}\n"
-                f"No. repeated sample splits: {self.n_rep}\n"
-            )
-        else:
-            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
-        fit_summary = str(self.summary)
-        res = (
-            header
-            + "\n------------------ Data summary      ------------------\n"
-            + data_summary
-            + "\n------------------ Score & algorithm ------------------\n"
-            + score_info
-            + "\n------------------ Machine learner   ------------------\n"
-            + learner_info
-            + "\n------------------ Resampling        ------------------\n"
-            + resampling_info
-            + "\n------------------ Fit summary       ------------------\n"
-            + fit_summary
-        )
-        return res
+    def _format_score_info_str(self):
+        lines = [
+            f"Score function: {str(self.score)}",
+            f"Treatment group: {str(self.g_value)}",
+            f"Pre-treatment period: {str(self.t_value_pre)}",
+            f"Evaluation period: {str(self.t_value_eval)}",
+            f"Control group: {str(self.control_group)}",
+            f"Anticipation periods: {str(self.anticipation_periods)}",
+            f"Effective sample size: {str(self.n_obs_subset)}",
+        ]
+        return "\n".join(lines)
+
+    # _format_learner_info_str method is inherited from DoubleML base class.
 
     @property
     def g_value(self):
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 88f677ef..72f3b44a 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -110,50 +110,87 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
         self._i_rep = None
         self._i_treat = None
 
-    def __str__(self):
+    def _format_header_str(self):
         class_name = self.__class__.__name__
-        header = f"================== {class_name} Object ==================\n"
-        data_summary = self._dml_data._data_summary_str()
-        score_info = f"Score function: {str(self.score)}\n"
+        return f"================== {class_name} Object =================="
+
+    def _format_score_info_str(self):
+        return f"Score function: {str(self.score)}"
+
+    def _format_learner_info_str(self):
         learner_info = ""
-        for key, value in self.learner.items():
-            learner_info += f"Learner {key}: {str(value)}\n"
+        if self.learner is not None:
+            for key, value in self.learner.items():
+                learner_info += f"Learner {key}: {str(value)}\\n"
         if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\n"
-            is_classifier = [value for value in self._is_classifier.values()]
-            is_regressor = [not value for value in is_classifier]
-            if any(is_regressor):
-                learner_info += "Regression:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is False]:
-                    learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
-            if any(is_classifier):
-                learner_info += "Classification:\n"
-                for learner in [key for key, value in self._is_classifier.items() if value is True]:
-                    learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
+            learner_info += "Out-of-sample Performance:\\n"
+            # Check if _is_classifier is populated, otherwise, it might be called before fit
+            if self._is_classifier:
+                is_classifier_any = any(self._is_classifier.values())
+                is_regressor_any = any(not v for v in self._is_classifier.values())
+
+                if is_regressor_any:
+                    learner_info += "Regression:\\n"
+                    for learner_name in self.params_names:  # Iterate through known learners
+                        if not self._is_classifier.get(learner_name, True):  # Default to not regressor if not found
+                            loss_val = self.nuisance_loss.get(learner_name, "N/A")
+                            learner_info += f"Learner {learner_name} RMSE: {loss_val}\\n"
+                if is_classifier_any:
+                    learner_info += "Classification:\\n"
+                    for learner_name in self.params_names:  # Iterate through known learners
+                        if self._is_classifier.get(learner_name, False):  # Default to not classifier if not found
+                            loss_val = self.nuisance_loss.get(learner_name, "N/A")
+                            learner_info += f"Learner {learner_name} Log Loss: {loss_val}\\n"
+            else:
+                learner_info += " (Run .fit() to see out-of-sample performance)\\n"
+        return learner_info.strip()
 
+    def _format_resampling_info_str(self):
         if self._is_cluster_data:
-            resampling_info = (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
-                f"No. folds: {self.n_folds}\n"
-                f"No. repeated sample splits: {self.n_rep}\n"
+            return (
+                f"No. folds per cluster: {self._n_folds_per_cluster}\\\\n"
+                f"No. folds: {self.n_folds}\\\\n"
+                f"No. repeated sample splits: {self.n_rep}"
             )
         else:
-            resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
-        fit_summary = str(self.summary)
-        res = (
-            header
-            + "\n------------------ Data summary      ------------------\n"
-            + data_summary
-            + "\n------------------ Score & algorithm ------------------\n"
-            + score_info
-            + "\n------------------ Machine learner   ------------------\n"
-            + learner_info
-            + "\n------------------ Resampling        ------------------\n"
-            + resampling_info
-            + "\n------------------ Fit summary       ------------------\n"
-            + fit_summary
+            return f"No. folds: {self.n_folds}\\\\nNo. repeated sample splits: {self.n_rep}"
+
+    def _format_additional_info_str(self):
+        """
+        Hook for subclasses to add additional information to the string representation.
+        Returns an empty string by default.
+        Subclasses should override this method to provide content.
+        The content should not include the 'Additional Information' header itself.
+        """
+        return ""
+
+    def __str__(self):
+        header = self._format_header_str()
+        # Assumes self._dml_data._data_summary_str() exists and is well-formed
+        data_summary = self._dml_data._data_summary_str()
+        score_info = self._format_score_info_str()
+        learner_info = self._format_learner_info_str()
+        resampling_info = self._format_resampling_info_str()
+        fit_summary = str(self.summary)  # Assumes self.summary is well-formed
+
+        representation = (
+            f"{header}\\n"
+            f"\\n------------------ Data Summary      ------------------\\n"
+            f"{data_summary}\\n"
+            f"\\n------------------ Score & Algorithm ------------------\\n"
+            f"{score_info}\\n"
+            f"\\n------------------ Machine Learner   ------------------\\n"
+            f"{learner_info}\\n"
+            f"\\n------------------ Resampling        ------------------\\n"
+            f"{resampling_info}\\n"
+            f"\\n------------------ Fit Summary       ------------------\\n"
+            f"{fit_summary}"
         )
-        return res
+
+        additional_info = self._format_additional_info_str()
+        if additional_info:
+            representation += f"\\n\\n------------------ Additional Information ------------------\\n" f"{additional_info}"
+        return representation
 
     @property
     def n_folds(self):
diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index a43c0a03..b3cc11e7 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -197,22 +197,13 @@ def __init__(
         self.subgroups = subgroups
         self._external_predictions_implemented = True
 
-    def __str__(self):
-        parent_str = super().__str__()
-
-        # add robust confset
+    def _format_additional_info_str(self):
         if self.framework is None:
-            confset_str = ""
+            return ""
         else:
             confset = self.robust_confset()
             formatted_confset = ", ".join([f"[{lower:.4f}, {upper:.4f}]" for lower, upper in confset])
-            confset_str = (
-                "\n\n--------------- Additional Information ----------------\n"
-                + f"Robust Confidence Set: {formatted_confset}\n"
-            )
-
-        res = parent_str + confset_str
-        return res
+            return f"Robust Confidence Set: {formatted_confset}"
 
     @property
     def normalize_ipw(self):

From bf7e16af8a6b3dde11f7fd80c76549659b1e11a7 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 16 Jun 2025 12:09:09 +0200
Subject: [PATCH 2/2] fix format

---
 doubleml/double_ml.py | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 72f3b44a..694968bc 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -121,39 +121,39 @@ def _format_learner_info_str(self):
         learner_info = ""
         if self.learner is not None:
             for key, value in self.learner.items():
-                learner_info += f"Learner {key}: {str(value)}\\n"
+                learner_info += f"Learner {key}: {str(value)}\n"
         if self.nuisance_loss is not None:
-            learner_info += "Out-of-sample Performance:\\n"
+            learner_info += "Out-of-sample Performance:\n"
             # Check if _is_classifier is populated, otherwise, it might be called before fit
             if self._is_classifier:
                 is_classifier_any = any(self._is_classifier.values())
                 is_regressor_any = any(not v for v in self._is_classifier.values())
 
                 if is_regressor_any:
-                    learner_info += "Regression:\\n"
+                    learner_info += "Regression:\n"
                     for learner_name in self.params_names:  # Iterate through known learners
                         if not self._is_classifier.get(learner_name, True):  # Default to not regressor if not found
                             loss_val = self.nuisance_loss.get(learner_name, "N/A")
-                            learner_info += f"Learner {learner_name} RMSE: {loss_val}\\n"
+                            learner_info += f"Learner {learner_name} RMSE: {loss_val}\n"
                 if is_classifier_any:
-                    learner_info += "Classification:\\n"
+                    learner_info += "Classification:\n"
                     for learner_name in self.params_names:  # Iterate through known learners
                         if self._is_classifier.get(learner_name, False):  # Default to not classifier if not found
                             loss_val = self.nuisance_loss.get(learner_name, "N/A")
-                            learner_info += f"Learner {learner_name} Log Loss: {loss_val}\\n"
+                            learner_info += f"Learner {learner_name} Log Loss: {loss_val}\n"
             else:
-                learner_info += " (Run .fit() to see out-of-sample performance)\\n"
+                learner_info += " (Run .fit() to see out-of-sample performance)\n"
         return learner_info.strip()
 
     def _format_resampling_info_str(self):
         if self._is_cluster_data:
             return (
-                f"No. folds per cluster: {self._n_folds_per_cluster}\\\\n"
-                f"No. folds: {self.n_folds}\\\\n"
+                f"No. folds per cluster: {self._n_folds_per_cluster}\n"
+                f"No. folds: {self.n_folds}\n"
                 f"No. repeated sample splits: {self.n_rep}"
             )
         else:
-            return f"No. folds: {self.n_folds}\\\\nNo. repeated sample splits: {self.n_rep}"
+            return f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}"
 
     def _format_additional_info_str(self):
         """
@@ -174,22 +174,22 @@ def __str__(self):
         fit_summary = str(self.summary)  # Assumes self.summary is well-formed
 
         representation = (
-            f"{header}\\n"
-            f"\\n------------------ Data Summary      ------------------\\n"
-            f"{data_summary}\\n"
-            f"\\n------------------ Score & Algorithm ------------------\\n"
-            f"{score_info}\\n"
-            f"\\n------------------ Machine Learner   ------------------\\n"
-            f"{learner_info}\\n"
-            f"\\n------------------ Resampling        ------------------\\n"
-            f"{resampling_info}\\n"
-            f"\\n------------------ Fit Summary       ------------------\\n"
+            f"{header}\n"
+            f"\n------------------ Data Summary      ------------------\n"
+            f"{data_summary}\n"
+            f"\n------------------ Score & Algorithm ------------------\n"
+            f"{score_info}\n"
+            f"\n------------------ Machine Learner   ------------------\n"
+            f"{learner_info}\n"
+            f"\n------------------ Resampling        ------------------\n"
+            f"{resampling_info}\n"
+            f"\n------------------ Fit Summary       ------------------\n"
             f"{fit_summary}"
         )
 
         additional_info = self._format_additional_info_str()
         if additional_info:
-            representation += f"\\n\\n------------------ Additional Information ------------------\\n" f"{additional_info}"
+            representation += f"\n\n------------------ Additional Information ------------------\n" f"{additional_info}"
         return representation
 
     @property