Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 49 additions & 36 deletions Murray/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,9 @@ def filter_controls_by_weights(self, control_group, min_weight_threshold=0.001):
if np.sum(filtered_weights) > 0:
filtered_weights = filtered_weights / np.sum(filtered_weights)

# Round weights to 2 decimal places
filtered_weights = np.round(filtered_weights, 2)

return filtered_control_group, filtered_weights


Expand Down Expand Up @@ -357,7 +360,7 @@ def evaluate_group(
logger.debug(f"Starting evaluation for treatment group: {treatment_group}")

treatment_Y = data[data["location"].isin(treatment_group)]["Y"].sum()
holdout_percentage = (1 - (treatment_Y / total_Y)) * 100
holdout_percentage = round((1 - (treatment_Y / total_Y)) * 100, 2)

logger.debug(
f"Treatment Y: {treatment_Y}, Holdout percentage: {holdout_percentage:.2f}%"
Expand Down Expand Up @@ -428,27 +431,31 @@ def evaluate_group(
)

logger.debug("Calculating metrics")
MAPE = (
MAPE = round(
np.mean(
np.abs(
(y_original[split_index:] - counterfactual_full_original[split_index:])
/ (y_original[split_index:] + 1e-10)
)
)
* 100
* 100,
2
)
SMAPE_value = smape(
y_original[split_index:], counterfactual_full_original[split_index:]
SMAPE_value = round(
smape(
y_original[split_index:], counterfactual_full_original[split_index:]
),
2
)
observed_conformity = np.mean(y_original - counterfactual_full_original)
observed_conformity = round(float(np.mean(y_original - counterfactual_full_original)), 2)

return (
treatment_group,
filtered_control_group,
MAPE,
SMAPE_value,
y_original,
counterfactual_full_original,
np.round(y_original, 2),
np.round(counterfactual_full_original, 2),
filtered_weights,
observed_conformity,
)
Expand Down Expand Up @@ -678,7 +685,7 @@ def evaluate_group_exclusive(
)

treatment_Y = data[data["location"].isin(treatment_group)]["Y"].sum()
holdout_percentage = (1 - (treatment_Y / total_Y)) * 100
holdout_percentage = round((1 - (treatment_Y / total_Y)) * 100, 2)

logger.debug(
f"Treatment Y: {treatment_Y}, Holdout percentage: {holdout_percentage:.2f}%"
Expand Down Expand Up @@ -750,28 +757,32 @@ def evaluate_group_exclusive(
)

logger.debug("Calculating metrics")
MAPE = (
MAPE = round(
np.mean(
np.abs(
(y_original[split_index:] - counterfactual_full_original[split_index:])
/ (y_original[split_index:] + 1e-10)
)
)
* 100
* 100,
2
)
SMAPE_value = smape(
y_original[split_index:], counterfactual_full_original[split_index:]
SMAPE_value = round(
smape(
y_original[split_index:], counterfactual_full_original[split_index:]
),
2
)

observed_conformity = np.mean(y_original - counterfactual_full_original)
observed_conformity = round(float(np.mean(y_original - counterfactual_full_original)), 2)

return (
treatment_group,
filtered_control_group,
MAPE,
SMAPE_value,
y_original,
counterfactual_full_original,
np.round(y_original, 2),
np.round(counterfactual_full_original, 2),
filtered_weights,
observed_conformity,
)
Expand Down Expand Up @@ -818,10 +829,10 @@ def BetterGroups(
"""
unique_locations = data["location"].unique()
no_locations = len(unique_locations)
# max_group_size = round(no_locations * 0.35)
# min_elements_in_treatment = round(no_locations * 0.20)
max_group_size = round(no_locations * 0.45)
min_elements_in_treatment = round(no_locations * 0.15)
max_group_size = round(no_locations * 0.35)
min_elements_in_treatment = round(no_locations * 0.20)
# max_group_size = round(no_locations * 0.45)
# min_elements_in_treatment = round(no_locations * 0.15)
min_holdout = 100 - (maximum_treatment_percentage * 100)
total_Y = data["Y"].sum()

Expand Down Expand Up @@ -1062,7 +1073,7 @@ def BetterGroups(
treatment_Y = data[data["location"].isin(best_treatment_group)]["Y"].sum()

if total_Y > 0:
holdout_percentage = ((total_Y - treatment_Y) / total_Y) * 100
holdout_percentage = round(((total_Y - treatment_Y) / total_Y) * 100, 2)
else:
holdout_percentage = 0.0

Expand Down Expand Up @@ -1395,8 +1406,8 @@ def optimize_global_multicell(
logger.error(f"STILL HAVE OVERLAP in cell {i+1}: {overlap}")

treatment_Y = data[data["location"].isin(treatment_group)]["Y"].sum()
holdout_percentage = (
((total_Y - treatment_Y) / total_Y) * 100 if total_Y > 0 else 0.0
holdout_percentage = round(
((total_Y - treatment_Y) / total_Y) * 100 if total_Y > 0 else 0.0, 2
)

result_dict = {
Expand Down Expand Up @@ -1735,25 +1746,25 @@ def simulate_power(
null_stats = np.array(null_stats)

# Two-sided test
p_value = np.mean(np.abs(null_stats) >= np.abs(observed_stat))
p_value = round(float(np.mean(np.abs(null_stats) >= np.abs(observed_stat))), 2)
p_values.append(p_value)

if p_value < significance_level:
rejected_tests += 1

power = rejected_tests / n_power_simulations
power = round(rejected_tests / n_power_simulations, 2)

# Calculate confidence interval for power estimate
power_se = np.sqrt(power * (1 - power) / n_power_simulations)
power_ci = (max(0, power - 1.95 * power_se), min(1, power + 1.95 * power_se))
power_ci = (round(max(0, power - 1.95 * power_se), 2), round(min(1, power + 1.95 * power_se), 2))

y_with_lift_sample = apply_lift(y_real, delta, start_treatment, end_treatment)

logger.debug(
f"Power simulation completed: power={power:.4f}, CI=({power_ci[0]:.4f}, {power_ci[1]:.4f}), mean p-value={np.mean(p_values):.4f}"
)

return delta, power, power_ci, y_with_lift_sample, np.mean(p_values)
return delta, power, power_ci, y_with_lift_sample, round(float(np.mean(p_values)), 2)


def run_simulation(
Expand Down Expand Up @@ -1914,7 +1925,7 @@ def evaluate_sensitivity(
logger.debug(f"Status update failed: {e}")

statistical_power = [
(res[0], res[1], res[2], res[4]) for res in results
(round(res[0], 2), res[1], res[2], res[4]) for res in results
] # (delta, power, power_ci, p_value)
mde = next(
(
Expand All @@ -1925,20 +1936,20 @@ def evaluate_sensitivity(
None,
)

p_value = None
mde_p_value = None
power_ci = None
power = None
mde_power = None
if mde is not None:
for delta, power, ci, p_value in statistical_power:
if delta == mde:
p_value = p_value
mde_p_value = p_value
power_ci = ci
power = power
mde_power = power
break

# Format values safely for logging
p_value_str = f"{p_value:.4f}" if p_value is not None else "None"
power_str = f"{power:.4f}" if power is not None else "None"
p_value_str = f"{mde_p_value:.4f}" if mde_p_value is not None else "None"
power_str = f"{mde_power:.4f}" if mde_power is not None else "None"
power_ci_str = (
f"({power_ci[0]:.4f} - {power_ci[1]:.4f})"
if power_ci is not None
Expand All @@ -1955,9 +1966,9 @@ def evaluate_sensitivity(
results_by_period[period] = {
"Statistical Power": statistical_power,
"MDE": mde,
"P-Value": p_value,
"P-Value": mde_p_value,
"MDE_CI": power_ci,
"Power": power,
"Power": mde_power,
}

sensitivity_results[size] = results_by_period
Expand Down Expand Up @@ -2093,6 +2104,8 @@ def run_geo_analysis_streamlit_app(

periods = list(np.arange(*periods_range))
deltas = np.arange(*deltas_range)
# logger.info(f'Deltas: {deltas}')
# logger.info(f'Periods: {periods}')

# Step 1: Generate market correlations
logger.info("Step 1: Generating market correlations.....")
Expand Down
4 changes: 2 additions & 2 deletions Murray/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -2146,7 +2146,7 @@ def calculate_confidence_bands(
lower = predicted - band_width / 2
upper = predicted + band_width / 2

return lower, upper
return np.round(lower, 2), np.round(upper, 2)


def calculate_optimal_noise_scale(predictions, actual_values, min_relative_scale=0.005):
Expand All @@ -2167,4 +2167,4 @@ def calculate_optimal_noise_scale(predictions, actual_values, min_relative_scale
relative_scale = max(np.median(relative_errors), min_relative_scale)

final_scale = max(scale_mad, relative_scale * np.median(np.abs(actual_values)))
return final_scale
return round(float(final_scale), 2)
70 changes: 35 additions & 35 deletions Murray/post_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ def smape(A, F):
logger.info("Calculating metrics...")
logger.info(f"Data shapes - treatment: {treatment.shape}, counterfactual: {counterfactual.shape}")

MAPE = np.mean(np.abs((y_original - counterfactual) / (y_original + 1e-10))) * 100
SMAPE = smape(y_original, counterfactual)
MAPE = round(np.mean(np.abs((y_original - counterfactual) / (y_original + 1e-10))) * 100, 2)
SMAPE = round(smape(y_original, counterfactual), 2)

# Calculate percentage lift (only during treatment period)
treatment_period_sum = np.sum(treatment[start_position_treatment:end_position_treatment])
Expand All @@ -151,7 +151,7 @@ def smape(A, F):
logger.info(f"Counterfactual period sum: {counterfactual_period_sum}")
logger.info(f"Lift difference (treatment - counterfactual): {lift_difference}")

percenge_lift = (lift_difference / np.abs(counterfactual_period_sum)) * 100
percenge_lift = round((lift_difference / np.abs(counterfactual_period_sum)) * 100, 2)

def compute_residuals(y_treatment, y_control):
return y_treatment - y_control
Expand All @@ -178,8 +178,8 @@ def stat_func(x):
null_stats = np.array(null_stats)

logger.info("Permutation test completed, calculating p-value and power...")
p_value = np.mean(abs(null_stats) >= abs(observed_stat))
power = np.mean(p_value < significance_level)
p_value = round(float(np.mean(abs(null_stats) >= abs(observed_stat))), 2)
power = round(float(np.mean(p_value < significance_level)), 2)

length_treatment = len(treatment_group)

Expand All @@ -193,21 +193,21 @@ def stat_func(x):
results_evaluation = {
"MAPE": MAPE,
"SMAPE": SMAPE,
"counterfactual": counterfactual,
"treatment": treatment,
"counterfactual": np.round(counterfactual, 2),
"treatment": np.round(treatment, 2),
"p_value": p_value,
"power": power,
"percenge_lift": percenge_lift,
"control_group": filtered_control_group,
"observed_stat": observed_stat,
"null_stats": null_stats,
"weights": filtered_weights,
"observed_stat": round(float(observed_stat), 2),
"null_stats": np.round(null_stats, 2),
"weights": np.round(filtered_weights, 2),
"period": period,
"spend": spend,
"spend": round(float(spend), 2),
"length_treatment": length_treatment,
# Complete data for plotting (including post-treatment)
"counterfactual_complete": counterfactual_complete,
"treatment_complete": treatment_complete,
"counterfactual_complete": np.round(counterfactual_complete, 2),
"treatment_complete": np.round(treatment_complete, 2),
"time_index_full": time_index_full,
# Period information for plotting zones
"start_position_treatment": start_position_treatment,
Expand Down Expand Up @@ -307,37 +307,37 @@ def get_evaluation_chart_data(
chart_data = {
# Base series
"dates": dates,
"treatment": treatment.tolist(),
"counterfactual": counterfactual.tolist(),
"point_difference": point_difference.tolist(),
"cumulative_effect": cumulative_effect,
"treatment": np.round(treatment, 2).tolist(),
"counterfactual": np.round(counterfactual, 2).tolist(),
"point_difference": np.round(point_difference, 2).tolist(),
"cumulative_effect": np.round(cumulative_effect, 2).tolist(),

# Treatment period data
"treatment_dates": dates[start_position_treatment:],
"y_treatment": y_treatment.tolist(),
"point_difference_treatment": point_difference_treatment.tolist(),
"cumulative_effect_treatment": cumulative_effect_treatment,
"y_treatment": np.round(y_treatment, 2).tolist(),
"point_difference_treatment": np.round(point_difference_treatment, 2).tolist(),
"cumulative_effect_treatment": np.round(cumulative_effect_treatment, 2).tolist(),

# Confidence bands
"lower_bound": lower_bound.tolist(),
"upper_bound": upper_bound.tolist(),
"lower_bound_pd": lower_bound_pd.tolist(),
"upper_bound_pd": upper_bound_pd.tolist(),
"lower_bound_ce": lower_bound_ce.tolist(),
"upper_bound_ce": upper_bound_ce.tolist(),
"lower_bound": np.round(lower_bound, 2).tolist(),
"upper_bound": np.round(upper_bound, 2).tolist(),
"lower_bound_pd": np.round(lower_bound_pd, 2).tolist(),
"upper_bound_pd": np.round(upper_bound_pd, 2).tolist(),
"lower_bound_ce": np.round(lower_bound_ce, 2).tolist(),
"upper_bound_ce": np.round(upper_bound_ce, 2).tolist(),

# Aggregate values
"lower_bound_value": float(lower_bound_value),
"upper_bound_value": float(upper_bound_value),
"prediction_value": float(prediction_value),
"att": float(att),
"incremental": float(incremental),
"lower_bound_value": round(float(lower_bound_value), 2),
"upper_bound_value": round(float(upper_bound_value), 2),
"prediction_value": round(float(prediction_value), 2),
"att": round(float(att), 2),
"incremental": round(float(incremental), 2),

# Pre/post treatment periods
"pre_treatment": pre_treatment.tolist(),
"pre_counterfactual": pre_counterfactual.tolist(),
"post_treatment": post_treatment.tolist(),
"post_counterfactual": post_counterfactual.tolist(),
"pre_treatment": np.round(pre_treatment, 2).tolist(),
"pre_counterfactual": np.round(pre_counterfactual, 2).tolist(),
"post_treatment": np.round(post_treatment, 2).tolist(),
"post_counterfactual": np.round(post_counterfactual, 2).tolist(),

# Metadata
"start_position_treatment": start_position_treatment,
Expand Down
Loading