Marginal-and-Aggregate-Load-Analysis/Get_marginal_load_final.py at main · PortlandStatePowerLab/Marginal-and-Aggregate-Load-Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 23 11:54:29 2025

@author: Joe_admin
"""

import pandas as pd
import time
import numpy as np

start_time = time.time()

def sample_data(input_df, units):
    # Randomly sample N rows with replacement
    df_sampled = input_df.sample(n=units, replace=True) # remove the random state when done testing!

    #before returning, remove the site ID column and sort
    df_sampled = df_sampled.drop(['Home'], axis=1)
    return df_sampled

def get_MCS_run(N, input_df):

    for j, M in enumerate(np.arange(1, MCS_runs+1)):
        # sample the data
        df_sampled = sample_data(input_df, N)

        # get the aggragate load of the sample set
        agg_sample = df_sampled.sum()

        # add the agg load to the MSC_table
        MCS_table.loc[j] = agg_sample # this is one row of the MCS table!

    return MCS_table

def get_stats(input_df):
    # Compute the statistics
    summary_df = pd.DataFrame({
        '0.975 Quant': input_df.quantile(0.975),
        'Mean': input_df.mean(),
        '0.025 Quant': input_df.quantile(0.025),
        'Variance' : input_df.var(ddof=0),
        'Std Dev' : input_df.std(),
        'Skew' : input_df.skew()
        }).T  # Transpose to get rows as statistics

    return summary_df

############################################################################
#                           Enter inputs here                              #
############################################################################

# enter in the input and output file names.
input_file_name  = "C:/Users/Joe_admin/Documents/IEEE_2026_paper/Ready_data/hpwh_baseline_ready_data.csv"

upper_quant_output_file  = "C:/Users/Joe_admin/Documents/IEEE_2026_paper/ML_files/hpwh_975th_ML_500_for_table.csv"
mean_output_file         = "C:/Users/Joe_admin/Documents/IEEE_2026_paper/ML_files/hpwh_Mean_ML_500_for_table.csv"
lower_quant_output_file  = "C:/Users/Joe_admin/Documents/IEEE_2026_paper/ML_files/hpwh_025th_ML_500_for_table.csv"
variance_output_file     = "C:/Users/Joe_admin/Documents/IEEE_2026_paper/ML_files/hpwh_var_ML_500_for_table.csv"
standard_dev_output_file = "C:/Users/Joe_admin/Documents/IEEE_2026_paper/ML_files/hpwh_sdev_ML_500_for_table.csv"
skew_output_file         = "C:/Users/Joe_admin/Documents/IEEE_2026_paper/ML_files/hpwh_skew_ML_500_for_table.csv"

unit_runs = 500
MCS_runs = 1000

############################################################################
#                             Program Start                                #
############################################################################

# read data
df = pd.read_csv(input_file_name)

# crease an arraw for the number of units
units_arr = np.arange(1, unit_runs+1)

# get the times
times = df.drop(['Home'], axis=1).columns # this was changed from ee_site)id

# initialize MSC table
MCS_table = pd.DataFrame(np.nan, index=range(MCS_runs), columns=times)

# initialize stats tables
upper_quant_df  = pd.DataFrame(np.nan, index=range(unit_runs), columns=times)
mean_df         = pd.DataFrame(np.nan, index=range(unit_runs), columns=times)
lower_quant_df  = pd.DataFrame(np.nan, index=range(unit_runs), columns=times)
variance_df     = pd.DataFrame(np.nan, index=range(unit_runs), columns=times)
std_dev_df      = pd.DataFrame(np.nan, index=range(unit_runs), columns=times)
skew_df         = pd.DataFrame(np.nan, index=range(unit_runs), columns=times)

for i, N in enumerate(np.arange(1, unit_runs+1)):
    # get the table that contains each MCS run
    MCS_table = get_MCS_run(N, df)

    # scale the aggreagte load profiles to PU values
    MCS_table = MCS_table.div(0.5 * N)  # 0.5 for HPWH , 4.5 for ER

    # find the 95th, mean, 5th percentile values at each time step
    stats_df = get_stats(MCS_table)

    # save those stats to three seperate tables.
    upper_quant_df.loc[i]  = stats_df.loc['0.975 Quant']
    mean_df.loc[i]         = stats_df.loc['Mean']
    lower_quant_df.loc[i]  = stats_df.loc['0.025 Quant']
    variance_df.loc[i]     = stats_df.loc['Variance']
    std_dev_df.loc[i]      = stats_df.loc['Std Dev']
    skew_df.loc[i]         = stats_df.loc['Skew']

# results_df.to_csv(output_file_name, index=True)
upper_quant_df.to_csv(upper_quant_output_file, index=True)
mean_df.to_csv(mean_output_file, index=True)
lower_quant_df.to_csv(lower_quant_output_file, index=True)
variance_df.to_csv(variance_output_file, index=True)
std_dev_df.to_csv(standard_dev_output_file, index=True)
skew_df.to_csv(skew_output_file, index=True)

# maximum widths of the 95th CI
width_df = upper_quant_df - lower_quant_df
result = pd.DataFrame({
    'max_value': width_df.max(axis=1),
    'max_column': width_df.idxmax(axis=1)
})

# print out the time it took to run the program
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")
execution_min = execution_time/60
print(f"Execution time: {execution_min} minutes")