Skip to content

Commit c31b082

Browse files
committed
add sim for did_pa_multi_tune
1 parent 375333c commit c31b082

10 files changed

+350
-1
lines changed

monte-cover/src/montecover/did/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,10 @@
22

33
from montecover.did.did_cs_multi import DIDCSMultiCoverageSimulation
44
from montecover.did.did_pa_multi import DIDMultiCoverageSimulation
5+
from montecover.did.did_pa_multi_tune import DIDMultiTuningCoverageSimulation
56

6-
__all__ = ["DIDMultiCoverageSimulation", "DIDCSMultiCoverageSimulation"]
7+
__all__ = [
8+
"DIDMultiCoverageSimulation",
9+
"DIDCSMultiCoverageSimulation",
10+
"DIDMultiTuningCoverageSimulation"
11+
]
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
from typing import Any, Dict, Optional
2+
3+
import doubleml as dml
4+
import numpy as np
5+
import optuna
6+
import pandas as pd
7+
from doubleml.did.datasets import make_did_CS2021
8+
9+
from montecover.base import BaseSimulation
10+
from montecover.utils import create_learner_from_config
11+
12+
13+
class DIDMultiTuningCoverageSimulation(BaseSimulation):
14+
"""Simulation study for coverage properties of DoubleMLDIDMulti with hyperparameter tuning."""
15+
16+
def __init__(
17+
self,
18+
config_file: str,
19+
suppress_warnings: bool = True,
20+
log_level: str = "INFO",
21+
log_file: Optional[str] = None,
22+
):
23+
super().__init__(
24+
config_file=config_file,
25+
suppress_warnings=suppress_warnings,
26+
log_level=log_level,
27+
log_file=log_file,
28+
)
29+
30+
# Additional results storage for aggregated results
31+
self.results_aggregated = []
32+
33+
# Calculate oracle values
34+
self._calculate_oracle_values()
35+
36+
# tuning specific settings
37+
# parameter space for the outcome regression tuning
38+
def ml_g_params(trial):
39+
return {
40+
"n_estimators": trial.suggest_int("n_estimators", 100, 200, step=50),
41+
"learning_rate": trial.suggest_float(
42+
"learning_rate", 1e-3, 0.1, log=True
43+
),
44+
"min_child_samples": trial.suggest_int(
45+
"min_child_samples", 20, 50, step=5
46+
),
47+
"max_depth": 5,
48+
"lambda_l1": trial.suggest_float("lambda_l1", 1e-3, 10.0, log=True),
49+
"lambda_l2": trial.suggest_float("lambda_l2", 1e-3, 10.0, log=True),
50+
}
51+
52+
# parameter space for the propensity score tuning
53+
def ml_m_params(trial):
54+
return {
55+
"n_estimators": trial.suggest_int("n_estimators", 100, 200, step=50),
56+
"learning_rate": trial.suggest_float(
57+
"learning_rate", 1e-3, 0.1, log=True
58+
),
59+
"min_child_samples": trial.suggest_int(
60+
"min_child_samples", 20, 50, step=5
61+
),
62+
"max_depth": 5,
63+
"lambda_l1": trial.suggest_float("lambda_l1", 1e-3, 10.0, log=True),
64+
"lambda_l2": trial.suggest_float("lambda_l2", 1e-3, 10.0, log=True),
65+
}
66+
67+
self._param_space = {"ml_g": ml_g_params, "ml_m": ml_m_params}
68+
69+
self._optuna_settings = {
70+
"n_trials": 200,
71+
"show_progress_bar": False,
72+
"verbosity": optuna.logging.WARNING, # Suppress Optuna logs
73+
}
74+
75+
def _process_config_parameters(self):
76+
"""Process simulation-specific parameters from config"""
77+
# Process ML models in parameter grid
78+
# Process ML models in parameter grid
79+
assert (
80+
"learners" in self.dml_parameters
81+
), "No learners specified in the config file"
82+
83+
required_learners = ["ml_g", "ml_m"]
84+
for learner in self.dml_parameters["learners"]:
85+
for ml in required_learners:
86+
assert ml in learner, f"No {ml} specified in the config file"
87+
88+
def _calculate_oracle_values(self):
89+
"""Calculate oracle values for the simulation."""
90+
self.logger.info("Calculating oracle values")
91+
92+
self.oracle_values = dict()
93+
# Oracle values
94+
df_oracle = make_did_CS2021(
95+
n_obs=int(1e6), dgp_type=1
96+
) # does not depend on the DGP type
97+
df_oracle["ite"] = df_oracle["y1"] - df_oracle["y0"]
98+
self.oracle_values["detailed"] = (
99+
df_oracle.groupby(["d", "t"])["ite"].mean().reset_index()
100+
)
101+
102+
# Oracle group aggregation
103+
df_oracle_post_treatment = df_oracle[df_oracle["t"] >= df_oracle["d"]]
104+
self.oracle_values["group"] = df_oracle_post_treatment.groupby("d")[
105+
"ite"
106+
].mean()
107+
108+
# Oracle time aggregation
109+
self.oracle_values["time"] = df_oracle_post_treatment.groupby("t")["ite"].mean()
110+
111+
# Oracle eventstudy aggregation
112+
df_oracle["e"] = pd.to_datetime(df_oracle["t"]).values.astype(
113+
"datetime64[M]"
114+
) - pd.to_datetime(df_oracle["d"]).values.astype("datetime64[M]")
115+
self.oracle_values["eventstudy"] = df_oracle.groupby("e")["ite"].mean()[1:]
116+
117+
def run_single_rep(self, dml_data, dml_params) -> Dict[str, Any]:
118+
"""Run a single repetition with the given parameters."""
119+
# Extract parameters
120+
learner_config = dml_params["learners"]
121+
learner_g_name, ml_g = create_learner_from_config(learner_config["ml_g"])
122+
learner_m_name, ml_m = create_learner_from_config(learner_config["ml_m"])
123+
score = dml_params["score"]
124+
in_sample_normalization = dml_params["in_sample_normalization"]
125+
126+
# Model
127+
dml_model = dml.did.DoubleMLDIDMulti(
128+
obj_dml_data=dml_data,
129+
ml_g=ml_g,
130+
ml_m=None if score == "experimental" else ml_m,
131+
gt_combinations="standard",
132+
score=score,
133+
in_sample_normalization=in_sample_normalization,
134+
)
135+
# Tuning
136+
dml_model_tuned = dml.did.DoubleMLDIDMulti(
137+
obj_dml_data=dml_data,
138+
ml_g=ml_g,
139+
ml_m=None if score == "experimental" else ml_m,
140+
gt_combinations="standard",
141+
score=score,
142+
in_sample_normalization=in_sample_normalization,
143+
)
144+
dml_model_tuned.tune_ml_models(
145+
ml_param_space=self._param_space,
146+
optuna_settings=self._optuna_settings,
147+
)
148+
149+
# sort out oracle thetas
150+
oracle_thetas = np.full(len(dml_model.gt_combinations), np.nan)
151+
for i, (g, _, t) in enumerate(dml_model.gt_combinations):
152+
group_index = self.oracle_values["detailed"]["d"] == g
153+
time_index = self.oracle_values["detailed"]["t"] == t
154+
oracle_thetas[i] = self.oracle_values["detailed"][group_index & time_index][
155+
"ite"
156+
].iloc[0]
157+
158+
result = {
159+
"detailed": [],
160+
"group": [],
161+
"time": [],
162+
"eventstudy": [],
163+
}
164+
for model in [dml_model, dml_model_tuned]:
165+
model.fit()
166+
model.bootstrap(n_rep_boot=2000)
167+
for level in self.confidence_parameters["level"]:
168+
level_result = dict()
169+
level_result["detailed"] = self._compute_coverage(
170+
thetas=model.coef,
171+
oracle_thetas=oracle_thetas,
172+
confint=model.confint(level=level),
173+
joint_confint=model.confint(level=level, joint=True),
174+
)
175+
176+
for aggregation_method in ["group", "time", "eventstudy"]:
177+
agg_obj = model.aggregate(aggregation=aggregation_method)
178+
agg_obj.aggregated_frameworks.bootstrap(n_rep_boot=2000)
179+
180+
level_result[aggregation_method] = self._compute_coverage(
181+
thetas=agg_obj.aggregated_frameworks.thetas,
182+
oracle_thetas=self.oracle_values[aggregation_method].values,
183+
confint=agg_obj.aggregated_frameworks.confint(level=level),
184+
joint_confint=agg_obj.aggregated_frameworks.confint(
185+
level=level, joint=True
186+
),
187+
)
188+
189+
# add parameters to the result
190+
for res in level_result.values():
191+
res.update(
192+
{
193+
"Learner g": learner_g_name,
194+
"Learner m": learner_m_name,
195+
"Score": score,
196+
"In-sample-norm.": in_sample_normalization,
197+
"level": level,
198+
"Tuned": model is dml_model_tuned,
199+
}
200+
)
201+
for key, res in level_result.items():
202+
result[key].append(res)
203+
204+
return result
205+
206+
def summarize_results(self):
207+
"""Summarize the simulation results."""
208+
self.logger.info("Summarizing simulation results")
209+
210+
groupby_cols = [
211+
"Learner g",
212+
"Learner m",
213+
"Score",
214+
"In-sample-norm.",
215+
"DGP",
216+
"level",
217+
"Tuned",
218+
]
219+
aggregation_dict = {
220+
"Coverage": "mean",
221+
"CI Length": "mean",
222+
"Bias": "mean",
223+
"Uniform Coverage": "mean",
224+
"Uniform CI Length": "mean",
225+
"repetition": "count",
226+
}
227+
228+
result_summary = dict()
229+
for result_name, result_df in self.results.items():
230+
result_summary[result_name] = (
231+
result_df.groupby(groupby_cols).agg(aggregation_dict).reset_index()
232+
)
233+
self.logger.debug(f"Summarized {result_name} results")
234+
235+
return result_summary
236+
237+
def _generate_dml_data(self, dgp_params) -> dml.data.DoubleMLPanelData:
238+
"""Generate data for the simulation."""
239+
data = make_did_CS2021(n_obs=dgp_params["n_obs"], dgp_type=dgp_params["DGP"])
240+
dml_data = dml.data.DoubleMLPanelData(
241+
data,
242+
y_col="y",
243+
d_cols="d",
244+
id_col="id",
245+
t_col="t",
246+
x_cols=["Z1", "Z2", "Z3", "Z4"],
247+
)
248+
return dml_data
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
simulation_parameters:
2+
repetitions: 2
3+
max_runtime: 19800
4+
random_seed: 42
5+
n_jobs: -2
6+
dgp_parameters:
7+
DGP:
8+
- 1
9+
n_obs:
10+
- 2000
11+
learner_definitions:
12+
lgbmr: &id001
13+
name: LGBM Regr.
14+
lgbmc: &id002
15+
name: LGBM Clas.
16+
dml_parameters:
17+
learners:
18+
- ml_g: *id001
19+
ml_m: *id002
20+
score:
21+
- observational
22+
in_sample_normalization:
23+
- true
24+
confidence_parameters:
25+
level:
26+
- 0.95
27+
- 0.9
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Learner g,Learner m,Score,In-sample-norm.,DGP,level,Tuned,Coverage,CI Length,Bias,Uniform Coverage,Uniform CI Length,repetition
2+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,False,0.9166666666666667,0.9768034697095042,0.23400349839993873,1.0,1.5475751781406841,2
3+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,True,0.875,0.606470751441224,0.1614408775625408,0.5,0.9452913645347121,2
4+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,False,1.0,1.163933124038929,0.23400349839993873,1.0,1.6901781219508825,2
5+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,True,0.875,0.7226544727294515,0.1614408775625408,0.5,1.033662115539573,2
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Learner g,Learner m,Score,In-sample-norm.,DGP,level,Tuned,Coverage,CI Length,Bias,Uniform Coverage,Uniform CI Length,repetition
2+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,False,1.0,0.8932471261067743,0.22134836624169563,1.0,1.2842220318304163,2
3+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,True,0.8333333333333333,0.5517000133722438,0.171384287662864,0.5,0.7722944107543078,2
4+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,False,1.0,1.0643695996876914,0.22134836624169563,1.0,1.4044272167279779,2
5+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,True,0.9166666666666667,0.6573911129611814,0.171384287662864,0.5,0.8460573095770816,2
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Learner g,Learner m,Score,In-sample-norm.,DGP,level,Tuned,Coverage,CI Length,Bias,Uniform Coverage,Uniform CI Length,repetition
2+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,False,0.8333333333333333,0.9252764729189984,0.27505916642335154,1.0,1.1910838199026417,2
3+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,True,0.8333333333333333,0.6185581636672488,0.18895139098415115,0.5,0.7900541517154416,2
4+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,False,1.0,1.10253492040176,0.27505916642335154,1.0,1.328849771012091,2
5+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,True,0.8333333333333333,0.7370575127575214,0.18895139098415115,0.5,0.8906147435536307,2
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
DoubleML Version,Script,Date,Total Runtime (minutes),Python Version,Config File
2+
0.12.dev0,DIDMultiTuningCoverageSimulation,2025-11-27 21:14,8.742515516281127,3.12.9,scripts/did/did_pa_multi_tune_config.yml
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Learner g,Learner m,Score,In-sample-norm.,DGP,level,Tuned,Coverage,CI Length,Bias,Uniform Coverage,Uniform CI Length,repetition
2+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,False,0.8333333333333333,0.9347149637122762,0.21239079355381435,1.0,1.1850129288693911,2
3+
LGBM Regr.,LGBM Clas.,observational,True,1,0.9,True,0.8333333333333333,0.5539493870014797,0.16522070900786323,1.0,0.6750979119813514,2
4+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,False,1.0,1.1137815758610201,0.21239079355381435,1.0,1.299771642249359,2
5+
LGBM Regr.,LGBM Clas.,observational,True,1,0.95,True,1.0,0.6600714069574607,0.16522070900786323,1.0,0.7914255322117764,2

scripts/did/did_pa_multi_tune.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from montecover.did import DIDMultiTuningCoverageSimulation
2+
3+
# Create and run simulation with config file
4+
sim = DIDMultiTuningCoverageSimulation(
5+
config_file="scripts/did/did_pa_multi_tune_config.yml",
6+
log_level="DEBUG",
7+
log_file="logs/did/did_pa_multi_tune_sim.log",
8+
)
9+
sim.run_simulation()
10+
sim.save_results(output_path="results/did/", file_prefix="did_pa_multi_tune")
11+
12+
# Save config file for reproducibility
13+
sim.save_config("results/did/did_pa_multi_tune_config.yml")
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Simulation parameters for DID Multi Coverage
2+
3+
simulation_parameters:
4+
repetitions: 2
5+
max_runtime: 19800 # 5.5 hours in seconds
6+
random_seed: 42
7+
n_jobs: -2
8+
9+
dgp_parameters:
10+
DGP: [1] # Different DGP specifications
11+
n_obs: [2000] # Sample size for each simulation (has to be a list)
12+
13+
# Define reusable learner configurations
14+
learner_definitions:
15+
lgbmr: &lgbmr
16+
name: "LGBM Regr."
17+
18+
lgbmc: &lgbmc
19+
name: "LGBM Clas."
20+
21+
22+
dml_parameters:
23+
# ML methods for ml_g and ml_m
24+
learners:
25+
- ml_g: *lgbmr
26+
ml_m: *lgbmc
27+
28+
score:
29+
- observational # Standard DML score
30+
31+
in_sample_normalization: [true]
32+
33+
confidence_parameters:
34+
level: [0.95, 0.90] # Confidence levels

0 commit comments

Comments
 (0)