Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 11 additions & 52 deletions doubleml/did/did_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,58 +239,17 @@ def __init__(
self._sensitivity_implemented = True
self._external_predictions_implemented = True

def __str__(self):
class_name = self.__class__.__name__
header = f"================== {class_name} Object ==================\n"
data_summary = self._dml_data._data_summary_str()
score_info = (
f"Score function: {str(self.score)}\n"
f"Treatment group: {str(self.g_value)}\n"
f"Pre-treatment period: {str(self.t_value_pre)}\n"
f"Evaluation period: {str(self.t_value_eval)}\n"
f"Control group: {str(self.control_group)}\n"
f"Anticipation periods: {str(self.anticipation_periods)}\n"
f"Effective sample size: {str(self.n_obs_subset)}\n"
)
learner_info = ""
for key, value in self.learner.items():
learner_info += f"Learner {key}: {str(value)}\n"
if self.nuisance_loss is not None:
learner_info += "Out-of-sample Performance:\n"
is_classifier = [value for value in self._is_classifier.values()]
is_regressor = [not value for value in is_classifier]
if any(is_regressor):
learner_info += "Regression:\n"
for learner in [key for key, value in self._is_classifier.items() if value is False]:
learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
if any(is_classifier):
learner_info += "Classification:\n"
for learner in [key for key, value in self._is_classifier.items() if value is True]:
learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"

if self._is_cluster_data:
resampling_info = (
f"No. folds per cluster: {self._n_folds_per_cluster}\n"
f"No. folds: {self.n_folds}\n"
f"No. repeated sample splits: {self.n_rep}\n"
)
else:
resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
fit_summary = str(self.summary)
res = (
header
+ "\n------------------ Data summary ------------------\n"
+ data_summary
+ "\n------------------ Score & algorithm ------------------\n"
+ score_info
+ "\n------------------ Machine learner ------------------\n"
+ learner_info
+ "\n------------------ Resampling ------------------\n"
+ resampling_info
+ "\n------------------ Fit summary ------------------\n"
+ fit_summary
)
return res
def _format_score_info_str(self):
lines = [
f"Score function: {str(self.score)}",
f"Treatment group: {str(self.g_value)}",
f"Pre-treatment period: {str(self.t_value_pre)}",
f"Evaluation period: {str(self.t_value_eval)}",
f"Control group: {str(self.control_group)}",
f"Anticipation periods: {str(self.anticipation_periods)}",
f"Effective sample size: {str(self.n_obs_subset)}",
]
return "\\n".join(lines)

@property
def g_value(self):
Expand Down
65 changes: 13 additions & 52 deletions doubleml/did/did_cs_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,58 +156,19 @@ def __init__(
self._sensitivity_implemented = True
self._external_predictions_implemented = True

def __str__(self):
class_name = self.__class__.__name__
header = f"================== {class_name} Object ==================\n"
data_summary = self._dml_data._data_summary_str()
score_info = (
f"Score function: {str(self.score)}\n"
f"Treatment group: {str(self.g_value)}\n"
f"Pre-treatment period: {str(self.t_value_pre)}\n"
f"Evaluation period: {str(self.t_value_eval)}\n"
f"Control group: {str(self.control_group)}\n"
f"Anticipation periods: {str(self.anticipation_periods)}\n"
f"Effective sample size: {str(self.n_obs_subset)}\n"
)
learner_info = ""
for key, value in self.learner.items():
learner_info += f"Learner {key}: {str(value)}\n"
if self.nuisance_loss is not None:
learner_info += "Out-of-sample Performance:\n"
is_classifier = [value for value in self._is_classifier.values()]
is_regressor = [not value for value in is_classifier]
if any(is_regressor):
learner_info += "Regression:\n"
for learner in [key for key, value in self._is_classifier.items() if value is False]:
learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
if any(is_classifier):
learner_info += "Classification:\n"
for learner in [key for key, value in self._is_classifier.items() if value is True]:
learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"

if self._is_cluster_data:
resampling_info = (
f"No. folds per cluster: {self._n_folds_per_cluster}\n"
f"No. folds: {self.n_folds}\n"
f"No. repeated sample splits: {self.n_rep}\n"
)
else:
resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
fit_summary = str(self.summary)
res = (
header
+ "\n------------------ Data summary ------------------\n"
+ data_summary
+ "\n------------------ Score & algorithm ------------------\n"
+ score_info
+ "\n------------------ Machine learner ------------------\n"
+ learner_info
+ "\n------------------ Resampling ------------------\n"
+ resampling_info
+ "\n------------------ Fit summary ------------------\n"
+ fit_summary
)
return res
def _format_score_info_str(self):
lines = [
f"Score function: {str(self.score)}",
f"Treatment group: {str(self.g_value)}",
f"Pre-treatment period: {str(self.t_value_pre)}",
f"Evaluation period: {str(self.t_value_eval)}",
f"Control group: {str(self.control_group)}",
f"Anticipation periods: {str(self.anticipation_periods)}",
f"Effective sample size: {str(self.n_obs_subset)}",
]
return "\n".join(lines)

# _format_learner_info_str method is inherited from DoubleML base class.

@property
def g_value(self):
Expand Down
103 changes: 70 additions & 33 deletions doubleml/double_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,50 +110,87 @@ def __init__(self, obj_dml_data, n_folds, n_rep, score, draw_sample_splitting):
self._i_rep = None
self._i_treat = None

def __str__(self):
def _format_header_str(self):
class_name = self.__class__.__name__
header = f"================== {class_name} Object ==================\n"
data_summary = self._dml_data._data_summary_str()
score_info = f"Score function: {str(self.score)}\n"
return f"================== {class_name} Object =================="

def _format_score_info_str(self):
return f"Score function: {str(self.score)}"

def _format_learner_info_str(self):
learner_info = ""
for key, value in self.learner.items():
learner_info += f"Learner {key}: {str(value)}\n"
if self.learner is not None:
for key, value in self.learner.items():
learner_info += f"Learner {key}: {str(value)}\n"
if self.nuisance_loss is not None:
learner_info += "Out-of-sample Performance:\n"
is_classifier = [value for value in self._is_classifier.values()]
is_regressor = [not value for value in is_classifier]
if any(is_regressor):
learner_info += "Regression:\n"
for learner in [key for key, value in self._is_classifier.items() if value is False]:
learner_info += f"Learner {learner} RMSE: {self.nuisance_loss[learner]}\n"
if any(is_classifier):
learner_info += "Classification:\n"
for learner in [key for key, value in self._is_classifier.items() if value is True]:
learner_info += f"Learner {learner} Log Loss: {self.nuisance_loss[learner]}\n"
# Check if _is_classifier is populated, otherwise, it might be called before fit
if self._is_classifier:
is_classifier_any = any(self._is_classifier.values())
is_regressor_any = any(not v for v in self._is_classifier.values())

if is_regressor_any:
learner_info += "Regression:\n"
for learner_name in self.params_names: # Iterate through known learners
if not self._is_classifier.get(learner_name, True): # Default to not regressor if not found
loss_val = self.nuisance_loss.get(learner_name, "N/A")
learner_info += f"Learner {learner_name} RMSE: {loss_val}\n"
if is_classifier_any:
learner_info += "Classification:\n"
for learner_name in self.params_names: # Iterate through known learners
if self._is_classifier.get(learner_name, False): # Default to not classifier if not found
loss_val = self.nuisance_loss.get(learner_name, "N/A")
learner_info += f"Learner {learner_name} Log Loss: {loss_val}\n"
else:
learner_info += " (Run .fit() to see out-of-sample performance)\n"
return learner_info.strip()

def _format_resampling_info_str(self):
if self._is_cluster_data:
resampling_info = (
return (
f"No. folds per cluster: {self._n_folds_per_cluster}\n"
f"No. folds: {self.n_folds}\n"
f"No. repeated sample splits: {self.n_rep}\n"
f"No. repeated sample splits: {self.n_rep}"
)
else:
resampling_info = f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}\n"
fit_summary = str(self.summary)
res = (
header
+ "\n------------------ Data summary ------------------\n"
+ data_summary
+ "\n------------------ Score & algorithm ------------------\n"
+ score_info
+ "\n------------------ Machine learner ------------------\n"
+ learner_info
+ "\n------------------ Resampling ------------------\n"
+ resampling_info
+ "\n------------------ Fit summary ------------------\n"
+ fit_summary
return f"No. folds: {self.n_folds}\nNo. repeated sample splits: {self.n_rep}"

def _format_additional_info_str(self):
"""
Hook for subclasses to add additional information to the string representation.
Returns an empty string by default.
Subclasses should override this method to provide content.
The content should not include the 'Additional Information' header itself.
"""
return ""

def __str__(self):
header = self._format_header_str()
# Assumes self._dml_data._data_summary_str() exists and is well-formed
data_summary = self._dml_data._data_summary_str()
score_info = self._format_score_info_str()
learner_info = self._format_learner_info_str()
resampling_info = self._format_resampling_info_str()
fit_summary = str(self.summary) # Assumes self.summary is well-formed

representation = (
f"{header}\n"
f"\n------------------ Data Summary ------------------\n"
f"{data_summary}\n"
f"\n------------------ Score & Algorithm ------------------\n"
f"{score_info}\n"
f"\n------------------ Machine Learner ------------------\n"
f"{learner_info}\n"
f"\n------------------ Resampling ------------------\n"
f"{resampling_info}\n"
f"\n------------------ Fit Summary ------------------\n"
f"{fit_summary}"
)
return res

additional_info = self._format_additional_info_str()
if additional_info:
representation += f"\n\n------------------ Additional Information ------------------\n" f"{additional_info}"
return representation

@property
def n_folds(self):
Expand Down
15 changes: 3 additions & 12 deletions doubleml/irm/iivm.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,22 +197,13 @@ def __init__(
self.subgroups = subgroups
self._external_predictions_implemented = True

def __str__(self):
parent_str = super().__str__()

# add robust confset
def _format_additional_info_str(self):
if self.framework is None:
confset_str = ""
return ""
else:
confset = self.robust_confset()
formatted_confset = ", ".join([f"[{lower:.4f}, {upper:.4f}]" for lower, upper in confset])
confset_str = (
"\n\n--------------- Additional Information ----------------\n"
+ f"Robust Confidence Set: {formatted_confset}\n"
)

res = parent_str + confset_str
return res
return f"Robust Confidence Set: {formatted_confset}"

@property
def normalize_ipw(self):
Expand Down