diff --git a/.gitignore b/.gitignore index 0d20b64..5e71c83 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,7 @@ *.pyc +*.TMP +*.h11~ +*.qeb~ +*.wa0~ +__pycache__/ +.vs/ diff --git a/.vs/ProjectSettings.json b/.vs/ProjectSettings.json deleted file mode 100644 index f8b4888..0000000 --- a/.vs/ProjectSettings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "CurrentProjectSetting": null -} \ No newline at end of file diff --git a/.vs/VSWorkspaceState.json b/.vs/VSWorkspaceState.json deleted file mode 100644 index 553b4d3..0000000 --- a/.vs/VSWorkspaceState.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "ExpandedNodes": [ - "" - ], - "SelectedNode": "\\test.py", - "PreviewInSolutionExplorer": false -} \ No newline at end of file diff --git a/.vs/modpods/FileContentIndex/021492eb-36bb-40ae-82c0-d2b946f1f986.vsidx b/.vs/modpods/FileContentIndex/021492eb-36bb-40ae-82c0-d2b946f1f986.vsidx deleted file mode 100644 index ebe044f..0000000 Binary files a/.vs/modpods/FileContentIndex/021492eb-36bb-40ae-82c0-d2b946f1f986.vsidx and /dev/null differ diff --git a/.vs/modpods/FileContentIndex/1115c48f-8b44-44b5-9c76-cd02e1ad0d3c.vsidx b/.vs/modpods/FileContentIndex/1115c48f-8b44-44b5-9c76-cd02e1ad0d3c.vsidx deleted file mode 100644 index bb82930..0000000 Binary files a/.vs/modpods/FileContentIndex/1115c48f-8b44-44b5-9c76-cd02e1ad0d3c.vsidx and /dev/null differ diff --git a/.vs/modpods/FileContentIndex/3538d893-0576-4bfa-8963-dbb4d747b7b3.vsidx b/.vs/modpods/FileContentIndex/3538d893-0576-4bfa-8963-dbb4d747b7b3.vsidx deleted file mode 100644 index 46f1f46..0000000 Binary files a/.vs/modpods/FileContentIndex/3538d893-0576-4bfa-8963-dbb4d747b7b3.vsidx and /dev/null differ diff --git a/.vs/modpods/FileContentIndex/4726c3f9-a22a-4893-80c6-d12979a64b18.vsidx b/.vs/modpods/FileContentIndex/4726c3f9-a22a-4893-80c6-d12979a64b18.vsidx deleted file mode 100644 index 668b4fc..0000000 Binary files a/.vs/modpods/FileContentIndex/4726c3f9-a22a-4893-80c6-d12979a64b18.vsidx and /dev/null differ diff --git a/.vs/modpods/FileContentIndex/aec5ee9b-f924-4b68-8ac3-bc9f9583b29c.vsidx b/.vs/modpods/FileContentIndex/aec5ee9b-f924-4b68-8ac3-bc9f9583b29c.vsidx deleted file mode 100644 index d4d19b6..0000000 Binary files a/.vs/modpods/FileContentIndex/aec5ee9b-f924-4b68-8ac3-bc9f9583b29c.vsidx and /dev/null differ diff --git a/.vs/modpods/v17/.wsuo b/.vs/modpods/v17/.wsuo deleted file mode 100644 index bcde4e8..0000000 Binary files a/.vs/modpods/v17/.wsuo and /dev/null differ diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite deleted file mode 100644 index aa31295..0000000 Binary files a/.vs/slnx.sqlite and /dev/null differ diff --git a/1gktfkct.h11~ b/1gktfkct.h11~ deleted file mode 100644 index 9db1a73..0000000 --- a/1gktfkct.h11~ +++ /dev/null @@ -1,412 +0,0 @@ -# reference: https://colab.research.google.com/github/kLabUM/pystorms/blob/master/tutorials/Scenario_Gamma.ipynb - -import sys -#from modpods import topo_from_pystorms -#sys.path.append("G:/My Drive/modpods") -import modpods -import pystorms -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd -import control as ct -import dill as pickle -use_blind = False - -''' -# uncontrolled -env = pystorms.scenarios.gamma() -done = False - -while not done: - # Query the current state of the simulation - state = env.state() - - # Initialize actions to have each asset open - actions = np.ones(11) - - # Set the actions and progress the simulation - done = env.step(actions) - -# Calculate the performance measure for the uncontrolled simulation -uncontrolled_perf = sum(env.data_log["performance_measure"]) - -print("The calculated performance for the uncontrolled case of Scenario gamma is:") -print("{}.".format(uncontrolled_perf)) - -basin_max_depths = [5., 10., 10., 10.] - -plt.figure(figsize=(15,6)) -plt.subplot(1,2,1) -plt.plot(np.asarray(env.data_log['depthN']['1'])/basin_max_depths[0], label='Basin 1') -plt.plot(np.asarray(env.data_log['depthN']['2'])/basin_max_depths[1], label='Basin 2') -plt.plot(np.asarray(env.data_log['depthN']['3'])/basin_max_depths[2], label='Basin 3') -plt.plot(np.asarray(env.data_log['depthN']['4'])/basin_max_depths[3], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Filling Degree') -plt.legend() - -plt.subplot(1,2,2) -plt.plot(env.data_log['flow']['O1'], label='Basin 1') -plt.plot(env.data_log['flow']['O2'], label='Basin 2') -plt.plot(env.data_log['flow']['O3'], label='Basin 3') -plt.plot(env.data_log['flow']['O4'], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Basin Outflow (cfs)') -plt.legend() -plt.show() - -uncontrolled_data = env.data_log -''' -def controller_efd(state, max_depths): - # Initialize the action space so that we can compute the new settings - new_settings = np.ones(len(state)) - # Set equal filling degree parameters - c = 1.5 - theta = 0.25 - - # Assign the current depth in each basin - depths = state - - # Compute the filling degrees - fd = depths/max_depths - # Compute the average filling degree across each controlled basin - fd_average = sum(fd)/len(fd) - - # Update each valve setting based on the relative fullness of each basin - for i in range(0,len(fd)): - - # If a basin is very full compared to the average, we should open its - # valve to release some water - if fd[i] > fd_average: - new_settings[i] = c*(fd[i]-fd_average) - - # If a basin's filling degree is close to the average (within some value - # theta), its setting can be close to that average - elif fd_average-fd[i] <= theta: - new_settings[i] = fd_average - - # If a basin is very empty compared to the average, we can close its - # valve to store more water at that location, prioritizing releasing at - # the other locations - else: - new_settings[i] = 0. - - # Make sure the settings are in bounds [0,1] - new_settings[i] = min(new_settings[i], 1.) - new_settings[i] = max(new_settings[i], 0.) - - return new_settings - - - -env = pystorms.scenarios.gamma() -done = False - -# Specify the maximum depths for each basin we are controlling -basin_max_depths = [5., 10., 10., 10.] - -while not done: - # Query the current state of the simulation - state = env.state() - # Isolate only the states that we need (the 4 downstream basin depths) - states_relevant = state[0:4] - - # Pass the current, relevant states and the maximum basin - # depths into our equal filling degree logic - actions_efd = controller_efd(states_relevant, basin_max_depths) - # Specify that the other 7 valves in the network should be - # open since we are not controlling them here - actions_uncontrolled = np.ones(7) - # Join the two above action arrays - actions = np.concatenate((actions_efd, actions_uncontrolled), axis=0) - - # Set the actions and progress the simulation - done = env.step(actions) - -# Calculate the performance measure for the uncontrolled simulation -equalfilling_perf = sum(env.data_log["performance_measure"]) - -print("The calculated performance for the equal filling degree case of Scenario gamma is:") -print("{}.".format(equalfilling_perf)) -''' -plt.figure(figsize=(15,6)) -plt.subplot(1,2,1) -plt.plot(np.asarray(env.data_log['depthN']['1'])/basin_max_depths[0], label='Basin 1') -plt.plot(np.asarray(env.data_log['depthN']['2'])/basin_max_depths[1], label='Basin 2') -plt.plot(np.asarray(env.data_log['depthN']['3'])/basin_max_depths[2], label='Basin 3') -plt.plot(np.asarray(env.data_log['depthN']['4'])/basin_max_depths[3], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Filling Degree') -plt.legend() - -plt.subplot(1,2,2) -plt.plot(env.data_log['flow']['O1'], label='Basin 1') -plt.plot(env.data_log['flow']['O2'], label='Basin 2') -plt.plot(env.data_log['flow']['O3'], label='Basin 3') -plt.plot(env.data_log['flow']['O4'], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Basin Outflow (cfs)') -plt.legend() -''' -ef_data = env.data_log - -# get the responses into one dataframe with zero pads in between -#uncontrolled_flows = pd.DataFrame.from_dict(uncontrolled_data['flow']) -#uncontrolled_depthN = pd.DataFrame.from_dict(uncontrolled_data['depthN']) -#uncontrolled_response = pd.concat([uncontrolled_flows, uncontrolled_depthN], axis=1) -#print(uncontrolled_response) -ef_flows = pd.DataFrame.from_dict(ef_data['flow']) -ef_flows.columns = env.config['action_space'] -ef_depthN = pd.DataFrame.from_dict(ef_data['depthN']) -ef_depthN.columns = env.config['states'] -ef_response = pd.concat([ef_flows, ef_depthN], axis=1) -ef_response.index = env.data_log['simulation_time'] -print(ef_response) -# for the columns of ef_response which do not contain the string "O" (i.e. the depths) -# if the current column name is "X", make it "(X, depthN)" -# this is because the modpods library expects the depths to be named "X, depthN" -# where X is the name of the corresponding flow -for col in ef_response.columns: - if "O" in col: # the orifices - # if there's a number in that column name that's greater than 4, drop this column - # this is because we're only controlling the first 4 orifices and these measurements are redundant to the storage node depths if the orifices are always open - if int(col[1:]) > 4: - ef_response.drop(columns=col, inplace=True) - - ef_response.rename(columns={col: (col, "flow")}, inplace=True) - - - -print(ef_response) - - - -# for debugging resample to a coarser time step (native resolution is about one minute but not consistent) -# need a consistent time step for modpods -ef_response = ef_response.resample('5T',axis='index').mean().copy(deep=True) -print(ef_response) -# we'll only use the ef response to infer the topology and dynamics -# for this experiment assume all of flow O1-O11 and depth 1-11 are observable -# but only O1-O4 are controllable -independent_columns = ef_response.columns[0:4] # orifices O1 through O4 -dependent_columns = ef_response.drop(columns=independent_columns).columns - - -# learn the topology from the data -# this will be the "blind" plant model -if use_blind: # don't have this on all the time because it's very expensive - blind_topo = modpods.infer_causative_topology(ef_response, dependent_columns = dependent_columns, - independent_columns = independent_columns, verbose=True) - - print(blind_topo.causative_topo) - print(blind_topo.total_graph) - - -# read the topology from the swmm file (this is much cheaper) -env.config['states'] = dependent_columns -env.config['action_space'] = independent_columns -# the default is controlling all 11 orifices so we need to edit the environment -print("defining topology") -swmm_topo = modpods.topo_from_pystorms(env) -# the index of the causative topology should be the dependent columns -#swmm_topo.index = dependent_columns -# the columns of the causative topology should be the dependent columns plus the independent columns -#swmm_topo.columns = dependent_columns.append(independent_columns) - -# show all columns when printing dataframes -pd.set_option('display.max_columns', None) -print(swmm_topo) - -if use_blind: - print("differences in topology") - print(blind_topo.causative_topo.compare(swmm_topo)) - -# learn the dynamics now, or load a previously learned model -''' -# learn the dynamics from the efd response -print("learning dynamics") -lti_plant_approx_seeing = modpods.lti_system_gen(swmm_topo, ef_response, - independent_columns= independent_columns, - dependent_columns = dependent_columns, max_iter = 0, - swmm=True,bibo_stable=True) -# pickle the plant approximation to load later -with open('G:/My Drive/modpods/swmm_lti_plant_approx_seeing.pickle', 'wb') as handle: - pickle.dump(lti_plant_approx_seeing, handle) -''' - -# load the plant approximation from a pickle -with open('G:/My Drive/modpods/swmm_lti_plant_approx_seeing.pickle', 'rb') as handle: - print("loading previously trained model") - lti_plant_approx_seeing = pickle.load(handle) - -if use_blind: - lti_plant_approx_blind = modpods.lti_system_gen(blind_topo, ef_response, - independent_columns= independent_columns, - dependent_columns = dependent_columns) - - - -# cast the columns of dataframes to strings for easier indexing -ef_response.columns = ef_response.columns.astype(str) -dependent_columns = [str(col) for col in dependent_columns] -independent_columns = [str(col) for col in independent_columns] -# reindex the ef_response to an integer step -ef_response.index = np.arange(0,len(ef_response),1) - -# evaluate the plant approximation accuracy -# only plot the depths at 1, 2, 3, and 4 -# the forcing is the flows at O1, O2, O3, and O4 -approx_response = ct.forced_response(lti_plant_approx_seeing['system'], U=np.transpose(ef_response[independent_columns].values), T=ef_response.index.values) -approx_data = pd.DataFrame(index=ef_response.index.values) -approx_data[dependent_columns[0]] = approx_response.outputs[0][:] -approx_data[dependent_columns[1]] = approx_response.outputs[1][:] -approx_data[dependent_columns[2]] = approx_response.outputs[2][:] -approx_data[dependent_columns[3]] = approx_response.outputs[3][:] - -output_columns = dependent_columns[0:4] # depth at 1,2,3,4 - -# create a vertical subplot of 3 axes -fig, axes = plt.subplots(4, 1, figsize=(10, 10)) - -for idx in range(len(output_columns)): - axes[idx].plot(ef_response[output_columns[idx]],label='actual') - axes[idx].plot(approx_data[output_columns[idx]],label='approx') - if idx == 0: - axes[idx].legend(fontsize='x-large',loc='best') - axes[idx].set_ylabel(output_columns[idx],fontsize='large') - if idx == len(output_columns)-1: - axes[idx].set_xlabel("time",fontsize='x-large') -# label the left column of plots "training" -axes[0].set_title("outputs",fontsize='xx-large') - -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx.png") -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx.svg") -#plt.show() - -# same plot, but just the first few timesteps (this is the accuracy that matters for feedback control) -# create a vertical subplot of 3 axes -fig, axes = plt.subplots(4, 1, figsize=(10, 10)) - -for idx in range(len(output_columns)): - axes[idx].plot(ef_response[output_columns[idx]][:10],label='actual') - axes[idx].plot(approx_data[output_columns[idx]][:10],label='approx') - if idx == 0: - axes[idx].legend(fontsize='x-large',loc='best') - axes[idx].set_ylabel(output_columns[idx],fontsize='large') - if idx == len(output_columns)-1: - axes[idx].set_xlabel("time",fontsize='x-large') -# label the left column of plots "training" -axes[0].set_title("outputs",fontsize='xx-large') - -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx_first10.png") -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx_first10.svg") -#plt.show() - - -# define the cost function -Q = np.eye(len(lti_plant_approx_seeing['A'].columns)) / 10e12 # we don't want to penalize the transition states as their magnitude doesn't have directly tractable physical meaning -# bryson's rule based on the maxiumum depth of each basin -# note: the swmm file gamma.inp actually specifies the maximum depth of storage node 1 as 10 feet, -# but i'll be consistent with the configuration of the efd controller for consistent comparison -basin_max_depths_all = [5.0, 10.0, 10.0, 10.0, 10.0,20.0, 10.0, 10.0,10.0, 13.72, 14.96] -for asset_index in range(len(dependent_columns)): - Q[lti_plant_approx_seeing['A'].columns.get_loc(dependent_columns[asset_index]),lti_plant_approx_seeing['A'].columns.get_loc(dependent_columns[asset_index])] = 1 / ((basin_max_depths_all[asset_index])**2 ) - -# threshold on flows at 0.11 m^3 / s which is 3.9 cfs -R = np.eye(len(lti_plant_approx_seeing['B'].columns)) / (3.9**2) # bryson's rule on maximum allowable flow -# define the system -# sys_response_to_control = ct.ss(lti_plant_approx_seeing['A'],lti_plant_approx_seeing['B'],lti_plant_approx_seeing['C'],0) # just for defining the controller gain -# (not necessary in this case, but would be if you've got disturbances) -# find the state feedback gain for the linear quadratic regulator -K,S,E = ct.lqr(lti_plant_approx_seeing['system'],Q,R) # one row of K should be zeros to reflect that u1 is not used as a control but is the disturbance - -# define the observer gain -L,P,E = ct.lqe(lti_plant_approx_seeing['system'],np.eye(len(lti_plant_approx_seeing['B'].columns)),np.eye(len(lti_plant_approx_seeing['C'].index)) ) # unit covariance on process noise and measurement error - -''' -# define the observer based compensator (per freudenberg 560 course notes 2.4) -obc_A = lti_plant_approx_seeing['A'].values-lti_plant_approx_seeing['B'].values@K - L@lti_plant_approx_seeing['C'].values -# ingests measurements, returns control actions -obc = ct.ss(obc_A, L, -K, 0, inputs=list(lti_plant_approx_seeing['C'].index),outputs=list(lti_plant_approx_seeing['B'].columns)) # negate K to give back commanded flows which are positive - - -# need to separately define the observer and controller because we can't close the loop in the typical way -# the observer takes the control input and measured output as inputs and outputs the estimated full state -#observer_input = np.concatenate((lti_plant_approx_seeing['B'].values,L),axis = 1) -#observer = ct.ss(lti_plant_approx_seeing['A'] - L@lti_plant_approx_seeing['C'].values, observer_input, np.eye(len(lti_plant_approx_seeing['A']) ) , 0 ) - -# the controller takes in an estiamte of the state and returns a control command -# this is just, u = -K @ xhat, not necessary to define a state space model for that as there's no evolution - -# can't form the closed loop system because the plant is not a state space system, but rather a software model -# the state estimate and control actions will be computed iteratively as the simulation is stepped through -''' -env = pystorms.scenarios.gamma() -done = False -u = np.zeros((len(lti_plant_approx_seeing['B'].columns),1) ) # start with all orifices completely closed -xhat = np.zeros((len(lti_plant_approx_seeing['A'].columns),1)) # initial state estimate -while not done: - # Query the current state of the simulation - observables = env.state() - - # update the observer based on these measurements (xhat_dot = (A-LC) xhat + B u + L y_m) - state_evolution = (lti_plant_approx_seeing['A'] - L@lti_plant_approx_seeing['C'].values) @ xhat - impact_of_control = lti_plant_approx_seeing['B'].values @ u - output_updating = (L @ np.transpose(observables)).reshape((-1,1)) # provided as row vector, need a column vector. also need to reshape to 2d array with 1 column - xhat_dot = state_evolution + impact_of_control + output_updating - xhat += xhat_dot # update the state estimate - yhat = lti_plant_approx_seeing['C'] @ xhat # just for reference, could be useful for plotting later - u = -K @ xhat # calculate control command - - # convert control command (flow) into orifice open percentage - # per the EPA-SWMM user manual volume ii hydraulics, orifices (section 6.2, page 107) - https://nepis.epa.gov/Exe/ZyPDF.cgi/P100S9AS.PDF?Dockey=P100S9AS.PDF - # all orifices in gamma are "bottom" - Cd = 0.65 # happens to be the same for all of them - Ao = 1 # area is one square foot. again, happens to be the same for all of them. - g = 32.2 # ft / s^2 - # the expression for discharge is found using Torricelli's equation: Q = Cd * (Ao*open_pct) sqrt(2*g*H_e) - # H_e is the effective head in feet, which is just the depth in the basin as the orifices are "bottom" - # to get the action command as a percent open, we solve as: open_pct = Q_desired / (Cd * Ao * sqrt(2*g*H_e)) - u_open_pct = u*-1 - for idx in range(len(u)): - u_open_pct.iloc[idx,0] = u.iloc[idx,0] / (Cd*Ao * np.sqrt(2*g*observables[idx])) - - - # Specify that the other 7 valves in the network should be - # open since we are not controlling them here - actions_uncontrolled = np.ones(7) - # Join the two above action arrays - actions = np.concatenate((u_open_pct.flatten(), actions_uncontrolled), axis=0) - - # Set the actions and progress the simulation - done = env.step(actions) - - -# Calculate the performance measure for the uncontrolled simulation -obc_perf = sum(env.data_log["performance_measure"]) - -print("The calculated performance for the observer based compensator case of Scenario gamma is:") -print("{}.".format(obc_perf)) - -plt.figure(figsize=(15,6)) -plt.subplot(1,2,1) -plt.plot(np.asarray(env.data_log['depthN']['1'])/basin_max_depths[0], label='Basin 1') -plt.plot(np.asarray(env.data_log['depthN']['2'])/basin_max_depths[1], label='Basin 2') -plt.plot(np.asarray(env.data_log['depthN']['3'])/basin_max_depths[2], label='Basin 3') -plt.plot(np.asarray(env.data_log['depthN']['4'])/basin_max_depths[3], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Filling Degree') -plt.legend() - -plt.subplot(1,2,2) -plt.plot(env.data_log['flow']['O1'], label='Basin 1') -plt.plot(env.data_log['flow']['O2'], label='Basin 2') -plt.plot(env.data_log['flow']['O3'], label='Basin 3') -plt.plot(env.data_log['flow']['O4'], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Basin Outflow (cfs)') -plt.legend() - - -print("done") - diff --git a/5wkkup1r.qeb~ b/5wkkup1r.qeb~ deleted file mode 100644 index e69de29..0000000 diff --git a/__pycache__/__init__.cpython-38.pyc b/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 6ce662f..0000000 Binary files a/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/__pycache__/__init__.cpython-39.pyc b/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 65f0bf3..0000000 Binary files a/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/__pycache__/modpods.cpython-312.pyc b/__pycache__/modpods.cpython-312.pyc deleted file mode 100644 index d5ab883..0000000 Binary files a/__pycache__/modpods.cpython-312.pyc and /dev/null differ diff --git a/__pycache__/modpods.cpython-38.pyc b/__pycache__/modpods.cpython-38.pyc deleted file mode 100644 index a1e75c4..0000000 Binary files a/__pycache__/modpods.cpython-38.pyc and /dev/null differ diff --git a/__pycache__/modpods.cpython-39.pyc b/__pycache__/modpods.cpython-39.pyc deleted file mode 100644 index 6fad2fb..0000000 Binary files a/__pycache__/modpods.cpython-39.pyc and /dev/null differ diff --git a/__pycache__/modpods.cpython-39.pyc.1669562004624 b/__pycache__/modpods.cpython-39.pyc.1669562004624 deleted file mode 100644 index e69de29..0000000 diff --git a/__pycache__/modpods.cpython-39.pyc.1915384797088 b/__pycache__/modpods.cpython-39.pyc.1915384797088 deleted file mode 100644 index e69de29..0000000 diff --git a/__pycache__/modpods.cpython-39.pyc.2433066808016 b/__pycache__/modpods.cpython-39.pyc.2433066808016 deleted file mode 100644 index e69de29..0000000 diff --git a/__pycache__/modpods_bayesian.cpython-312.pyc b/__pycache__/modpods_bayesian.cpython-312.pyc deleted file mode 100644 index 2f9006d..0000000 Binary files a/__pycache__/modpods_bayesian.cpython-312.pyc and /dev/null differ diff --git a/afuitcsp.wa0~ b/afuitcsp.wa0~ deleted file mode 100644 index e69de29..0000000 diff --git a/modpods.py b/modpods.py index 50ab0ad..c660134 100644 --- a/modpods.py +++ b/modpods.py @@ -1,19 +1,29 @@ -import pandas as pd +import re +import warnings +from typing import Any + +import control +import matplotlib.pyplot as plt +import networkx as nx import numpy as np +import pandas as pd import pysindy as ps import scipy.stats as stats -from scipy import signal -import matplotlib.pyplot as plt -import control as control -import networkx as nx -import sys +from pysindy.optimizers._constrained_sr3 import ConstrainedSR3 as _ConstrainedSR3 +from scipy.optimize import minimize +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import Matern + try: - import pyswmm # not a requirement for any other function + import pyswmm # not a requirement for any other function except ImportError: pyswmm = None -from sklearn.gaussian_process import GaussianProcessRegressor -from sklearn.gaussian_process.kernels import Matern -from scipy.optimize import minimize + +# Suppress the specific AxesWarning from pysindy after import +warnings.filterwarnings( + "ignore", message=".*axes labeled for array with.*", module="pysindy" +) + # Bayesian optimization helper functions def _expected_improvement(X, X_sample, Y_sample, gpr, xi=0.01): @@ -21,51 +31,34 @@ def _expected_improvement(X, X_sample, Y_sample, gpr, xi=0.01): mu, sigma = gpr.predict(X, return_std=True) mu = mu.reshape(-1, 1) sigma = sigma.reshape(-1, 1) - + mu_sample_opt = np.max(Y_sample) - - with np.errstate(divide='warn'): + + with np.errstate(divide="warn"): imp = mu - mu_sample_opt - xi Z = imp / sigma ei = imp * stats.norm.cdf(Z) + sigma * stats.norm.pdf(Z) ei[sigma == 0.0] = 0.0 - + return ei + def _propose_location(acquisition, X_sample, Y_sample, gpr, bounds, n_restarts=10): """Propose next sampling point by optimizing acquisition function.""" dim = X_sample.shape[1] min_val = 1 min_x = None - + def min_obj(X): return -acquisition(X.reshape(-1, dim), X_sample, Y_sample, gpr).flatten() - + for x0 in np.random.uniform(bounds[:, 0], bounds[:, 1], size=(n_restarts, dim)): - res = minimize(min_obj, x0=x0, bounds=bounds, method='L-BFGS-B') + res = minimize(min_obj, x0=x0, bounds=bounds, method="L-BFGS-B") if res.fun < min_val: min_val = res.fun min_x = res.x - - return min_x.reshape(-1, 1) -import re -import warnings -from typing import Any - -import control -import matplotlib.pyplot as plt -import networkx as nx -import numpy as np -import pandas as pd -import pysindy as ps -import pyswmm # not a requirement for any other function -import scipy.stats as stats -from pysindy.optimizers._constrained_sr3 import ConstrainedSR3 as _ConstrainedSR3 -# Suppress the specific AxesWarning from pysindy after import -warnings.filterwarnings( - "ignore", message=".*axes labeled for array with.*", module="pysindy" -) + return min_x.reshape(-1, 1) # delay model builds differential equations relating the dependent variables to transformations of all the variables @@ -91,13 +84,26 @@ def min_obj(X): # that is, forcing and the response to that forcing cannot occur at the same timestep # it may be necessary for the user to shift the forcing data back to make the system causal (especially for time aggregated data like daily rainfall-runoff) # forcing_coef_constraints is a dictionary of column name and then a 1, 0, or -1 depending on whether the coefficients of that variable should be positive, unconstrained, or negative -def delay_io_train(system_data, dependent_columns, independent_columns, - windup_timesteps=0,init_transforms=1, max_transforms=4, - max_iter=250, poly_order=3, transform_dependent=False, - verbose=False, extra_verbose=False, include_bias=False, - include_interaction=False, bibo_stable = False, - transform_only = None, forcing_coef_constraints=None, - early_stopping_threshold = 0.005, optimization_method="compass_search"): +def delay_io_train( + system_data, + dependent_columns, + independent_columns, + windup_timesteps=0, + init_transforms=1, + max_transforms=4, + max_iter=250, + poly_order=3, + transform_dependent=False, + verbose=False, + extra_verbose=False, + include_bias=False, + include_interaction=False, + bibo_stable=False, + transform_only=None, + forcing_coef_constraints=None, + early_stopping_threshold=0.005, + optimization_method="compass_search", +): forcing = system_data[independent_columns].copy(deep=True) orig_forcing_columns = forcing.columns @@ -193,7 +199,7 @@ def delay_io_train(system_data, dependent_columns, independent_columns, if optimization_method == "bayesian": if verbose: print(f"Using Bayesian optimization for {num_transforms} transforms...") - + # Determine which columns to transform if transform_dependent: transform_columns = system_data.columns.tolist() @@ -201,39 +207,58 @@ def delay_io_train(system_data, dependent_columns, independent_columns, transform_columns = transform_only else: transform_columns = independent_columns - + # Bayesian optimization for this number of transforms - n_params = len(transform_columns) * num_transforms * 3 - bounds = [] + bounds_list: list[list[float]] = [] for transform in range(1, num_transforms + 1): for col in transform_columns: - bounds.append([1.0, 50.0]) # shape_factors bounds - bounds.append([0.1, 5.0]) # scale_factors bounds - bounds.append([0.0, 20.0]) # loc_factors bounds - bounds = np.array(bounds) - + bounds_list.append([1.0, 50.0]) # shape_factors bounds + bounds_list.append([0.1, 5.0]) # scale_factors bounds + bounds_list.append([0.0, 20.0]) # loc_factors bounds + bounds = np.array(bounds_list) + def objective_function(params_vector): try: # Convert vector to DataFrames - shape_factors_opt = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - scale_factors_opt = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - loc_factors_opt = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - + shape_factors_opt = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + scale_factors_opt = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + loc_factors_opt = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + idx = 0 for transform in range(1, num_transforms + 1): for col in transform_columns: shape_factors_opt.loc[transform, col] = params_vector[idx] - scale_factors_opt.loc[transform, col] = params_vector[idx + 1] + scale_factors_opt.loc[transform, col] = params_vector[ + idx + 1 + ] loc_factors_opt.loc[transform, col] = params_vector[idx + 2] idx += 3 - - result = SINDY_delays_MI(shape_factors_opt, scale_factors_opt, loc_factors_opt, - system_data.index, forcing, response, False, - poly_order, include_bias, include_interaction, - windup_timesteps, bibo_stable, transform_dependent, - transform_only, forcing_coef_constraints) - - r2 = result['error_metrics']['r2'] + + result = SINDY_delays_MI( + shape_factors_opt, + scale_factors_opt, + loc_factors_opt, + system_data.index, + forcing, + response, + False, + poly_order, + include_bias, + include_interaction, + windup_timesteps, + bibo_stable, + transform_dependent, + transform_only, + forcing_coef_constraints, + ) + + r2 = result["error_metrics"]["r2"] if verbose: print(f" R² = {r2:.6f}") return r2 @@ -241,56 +266,65 @@ def objective_function(params_vector): if verbose: print(f" Evaluation failed: {e}") return -1.0 - + # Bayesian optimization n_initial = min(10, max(5, max_iter // 4)) - X_sample = [] - Y_sample = [] - + X_sample_list: list[Any] = [] + Y_sample_list: list[Any] = [] + # Generate initial random samples for i in range(n_initial): x = np.random.uniform(bounds[:, 0], bounds[:, 1]) y = objective_function(x) - X_sample.append(x) - Y_sample.append(y) + X_sample_list.append(x) + Y_sample_list.append(y) if verbose: print(f" Initial sample {i+1}/{n_initial}: R² = {y:.6f}") - - X_sample = np.array(X_sample) - Y_sample = np.array(Y_sample).reshape(-1, 1) - + + X_sample: np.ndarray = np.array(X_sample_list) + Y_sample: np.ndarray = np.array(Y_sample_list).reshape(-1, 1) + # Main Bayesian optimization loop best_r2 = np.max(Y_sample) - best_params = X_sample[np.argmax(Y_sample)] - + best_params: np.ndarray = X_sample[np.argmax(Y_sample)] + # Gaussian Process setup kernel = Matern(length_scale=1.0, nu=2.5) - gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True, - n_restarts_optimizer=5, random_state=42) - + gpr = GaussianProcessRegressor( + kernel=kernel, + alpha=1e-6, + normalize_y=True, + n_restarts_optimizer=5, + random_state=42, + ) + for iteration in range(max_iter - n_initial): # Fit GP and find next point gpr.fit(X_sample, Y_sample.ravel()) - next_x = _propose_location(_expected_improvement, X_sample, Y_sample, gpr, bounds) + next_x = _propose_location( + _expected_improvement, X_sample, Y_sample, gpr, bounds + ) next_x = next_x.flatten() - + # Evaluate objective next_y = objective_function(next_x) - + if verbose: - print(f" BO iteration {iteration+1}/{max_iter-n_initial}: R² = {next_y:.6f}") - + print( + f" BO iteration {iteration+1}/{max_iter-n_initial}: R² = {next_y:.6f}" + ) + # Update samples X_sample = np.append(X_sample, [next_x], axis=0) Y_sample = np.append(Y_sample, next_y) - + # Update best if next_y > best_r2: best_r2 = next_y best_params = next_x if verbose: print(f" New best R² = {best_r2:.6f}") - + # Convert best parameters back to DataFrames idx = 0 for transform in range(1, num_transforms + 1): @@ -299,21 +333,49 @@ def objective_function(params_vector): scale_factors.loc[transform, col] = best_params[idx + 1] loc_factors.loc[transform, col] = best_params[idx + 2] idx += 3 - + # Use the optimized parameters for final evaluation - prev_model = SINDY_delays_MI(shape_factors, scale_factors, loc_factors, system_data.index, - forcing, response, extra_verbose, poly_order, include_bias, - include_interaction, windup_timesteps, bibo_stable, transform_dependent=transform_dependent, - transform_only=transform_only, forcing_coef_constraints=forcing_coef_constraints) - + prev_model = SINDY_delays_MI( + shape_factors, + scale_factors, + loc_factors, + system_data.index, + forcing, + response, + extra_verbose, + poly_order, + include_bias, + include_interaction, + windup_timesteps, + bibo_stable, + transform_dependent=transform_dependent, + transform_only=transform_only, + forcing_coef_constraints=forcing_coef_constraints, + ) + else: # Default compass search optimization if verbose: - print(f"Using compass search optimization for {num_transforms} transforms...") - - prev_model = SINDY_delays_MI(shape_factors, scale_factors, loc_factors, system_data.index, - forcing, response,extra_verbose, poly_order , include_bias, - include_interaction,windup_timesteps,bibo_stable,transform_dependent=transform_dependent, - transform_only=transform_only,forcing_coef_constraints=forcing_coef_constraints) + print( + f"Using compass search optimization for {num_transforms} transforms..." + ) + + prev_model = SINDY_delays_MI( + shape_factors, + scale_factors, + loc_factors, + system_data.index, + forcing, + response, + extra_verbose, + poly_order, + include_bias, + include_interaction, + windup_timesteps, + bibo_stable, + transform_dependent=transform_dependent, + transform_only=transform_only, + forcing_coef_constraints=forcing_coef_constraints, + ) print("\nInitial model:\n") try: diff --git a/modpods.pyw~RFa860ffd2.TMP b/modpods.pyw~RFa860ffd2.TMP deleted file mode 100644 index e69de29..0000000 diff --git a/modpods_bayesian.py b/modpods_bayesian.py index 314e337..d694f8a 100644 --- a/modpods_bayesian.py +++ b/modpods_bayesian.py @@ -1,24 +1,21 @@ -import pandas as pd +from typing import Any + +import control as control import numpy as np -import pysindy as ps +import pandas as pd import scipy.stats as stats -from scipy import signal from scipy.optimize import minimize -import matplotlib.pyplot as plt -import control as control -import networkx as nx -import sys + try: - import pyswmm # not a requirement for any other function + import pyswmm # not a requirement for any other function except ImportError: pyswmm = None -import re from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import Matern -import warnings # Import original modpods functions -from modpods import * +from modpods import * # noqa: F401, F403 + def expected_improvement(X, X_sample, Y_sample, gpr, xi=0.01): """ @@ -28,17 +25,18 @@ def expected_improvement(X, X_sample, Y_sample, gpr, xi=0.01): mu, sigma = gpr.predict(X, return_std=True) mu = mu.reshape(-1, 1) sigma = sigma.reshape(-1, 1) - + mu_sample_opt = np.max(Y_sample) - - with np.errstate(divide='warn'): + + with np.errstate(divide="warn"): imp = mu - mu_sample_opt - xi Z = imp / sigma ei = imp * stats.norm.cdf(Z) + sigma * stats.norm.pdf(Z) ei[sigma == 0.0] = 0.0 - + return ei + def propose_location(acquisition, X_sample, Y_sample, gpr, bounds, n_restarts=25): """ Proposes the next sampling point by optimizing the acquisition function. @@ -46,30 +44,42 @@ def propose_location(acquisition, X_sample, Y_sample, gpr, bounds, n_restarts=25 dim = X_sample.shape[1] min_val = 1 min_x = None - + def min_obj(X): return -acquisition(X.reshape(-1, dim), X_sample, Y_sample, gpr).flatten() - + for x0 in np.random.uniform(bounds[:, 0], bounds[:, 1], size=(n_restarts, dim)): - res = minimize(min_obj, x0=x0, bounds=bounds, method='L-BFGS-B') + res = minimize(min_obj, x0=x0, bounds=bounds, method="L-BFGS-B") if res.fun < min_val: min_val = res.fun min_x = res.x - + return min_x.reshape(-1, 1) -def delay_io_train_bayesian(system_data, dependent_columns, independent_columns, - windup_timesteps=0, init_transforms=1, max_transforms=4, - max_iter=50, poly_order=3, transform_dependent=False, - verbose=False, extra_verbose=False, include_bias=False, - include_interaction=False, bibo_stable=False, - transform_only=None, forcing_coef_constraints=None, - early_stopping_threshold=0.005): + +def delay_io_train_bayesian( + system_data, + dependent_columns, + independent_columns, + windup_timesteps=0, + init_transforms=1, + max_transforms=4, + max_iter=50, + poly_order=3, + transform_dependent=False, + verbose=False, + extra_verbose=False, + include_bias=False, + include_interaction=False, + bibo_stable=False, + transform_only=None, + forcing_coef_constraints=None, + early_stopping_threshold=0.005, +): """ Bayesian optimization version of delay_io_train function. """ forcing = system_data[independent_columns].copy(deep=True) - orig_forcing_columns = forcing.columns response = system_data[dependent_columns].copy(deep=True) results = dict() @@ -80,29 +90,34 @@ def delay_io_train_bayesian(system_data, dependent_columns, independent_columns, transform_columns = transform_only else: transform_columns = independent_columns - + for num_transforms in range(init_transforms, max_transforms + 1): print(f"num_transforms: {num_transforms}") - + # Define parameter bounds for this number of transforms # Parameters: [shape1, scale1, loc1, shape2, scale2, loc2, ...] - n_params = len(transform_columns) * num_transforms * 3 - bounds = [] + bounds_list: list[list[float]] = [] for transform in range(1, num_transforms + 1): for col in transform_columns: - bounds.append([1.0, 50.0]) # shape_factors bounds - bounds.append([0.1, 5.0]) # scale_factors bounds - bounds.append([0.0, 20.0]) # loc_factors bounds - bounds = np.array(bounds) - + bounds_list.append([1.0, 50.0]) # shape_factors bounds + bounds_list.append([0.1, 5.0]) # scale_factors bounds + bounds_list.append([0.0, 20.0]) # loc_factors bounds + bounds = np.array(bounds_list) + def objective_function(params_vector): """Objective function that takes parameter vector and returns R²""" try: # Convert vector to DataFrames - shape_factors = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - scale_factors = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - loc_factors = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - + shape_factors = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + scale_factors = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + loc_factors = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + idx = 0 for transform in range(1, num_transforms + 1): for col in transform_columns: @@ -110,15 +125,27 @@ def objective_function(params_vector): scale_factors.loc[transform, col] = params_vector[idx + 1] loc_factors.loc[transform, col] = params_vector[idx + 2] idx += 3 - + # Evaluate using SINDY_delays_MI - result = SINDY_delays_MI(shape_factors, scale_factors, loc_factors, - system_data.index, forcing, response, False, - poly_order, include_bias, include_interaction, - windup_timesteps, bibo_stable, transform_dependent, - transform_only, forcing_coef_constraints) - - r2 = result['error_metrics']['r2'] + result = SINDY_delays_MI( # noqa: F405 + shape_factors, + scale_factors, + loc_factors, + system_data.index, + forcing, + response, + False, + poly_order, + include_bias, + include_interaction, + windup_timesteps, + bibo_stable, + transform_dependent, + transform_only, + forcing_coef_constraints, + ) + + r2 = result["error_metrics"]["r2"] if verbose: print(f" R² = {r2:.6f}") return r2 @@ -126,64 +153,79 @@ def objective_function(params_vector): if verbose: print(f" Evaluation failed: {e}") return -1.0 # Poor score for failed evaluations - + # Bayesian optimization n_initial = min(10, max(5, max_iter // 4)) - X_sample = [] - Y_sample = [] - + X_sample_list: list[Any] = [] + Y_sample_list: list[Any] = [] + if verbose: print(f"Starting Bayesian optimization with {n_initial} initial samples...") - + # Generate initial random samples for i in range(n_initial): x = np.random.uniform(bounds[:, 0], bounds[:, 1]) y = objective_function(x) - X_sample.append(x) - Y_sample.append(y) + X_sample_list.append(x) + Y_sample_list.append(y) if verbose: print(f" Initial sample {i+1}/{n_initial}: R² = {y:.6f}") - - X_sample = np.array(X_sample) - Y_sample = np.array(Y_sample).reshape(-1, 1) - + + X_sample: np.ndarray = np.array(X_sample_list) + Y_sample: np.ndarray = np.array(Y_sample_list).reshape(-1, 1) + # Main Bayesian optimization loop best_r2 = np.max(Y_sample) - best_params = X_sample[np.argmax(Y_sample)] - + best_params: np.ndarray = X_sample[np.argmax(Y_sample)] + # Gaussian Process setup kernel = Matern(length_scale=1.0, nu=2.5) - gpr = GaussianProcessRegressor(kernel=kernel, alpha=1e-6, normalize_y=True, - n_restarts_optimizer=5, random_state=42) - + gpr = GaussianProcessRegressor( + kernel=kernel, + alpha=1e-6, + normalize_y=True, + n_restarts_optimizer=5, + random_state=42, + ) + for iteration in range(max_iter - n_initial): # Fit GP and find next point gpr.fit(X_sample, Y_sample.ravel()) - next_x = propose_location(expected_improvement, X_sample, Y_sample, gpr, bounds) + next_x = propose_location( + expected_improvement, X_sample, Y_sample, gpr, bounds + ) next_x = next_x.flatten() - + # Evaluate objective next_y = objective_function(next_x) - + if verbose: - print(f" BO iteration {iteration+1}/{max_iter-n_initial}: R² = {next_y:.6f}") - + print( + f" BO iteration {iteration+1}/{max_iter-n_initial}: R² = {next_y:.6f}" + ) + # Update samples X_sample = np.append(X_sample, [next_x], axis=0) Y_sample = np.append(Y_sample, next_y) - + # Update best if next_y > best_r2: best_r2 = next_y best_params = next_x if verbose: print(f" New best R² = {best_r2:.6f}") - + # Convert best parameters back to DataFrames - shape_factors = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - scale_factors = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - loc_factors = pd.DataFrame(columns=transform_columns, index=range(1, num_transforms + 1)) - + shape_factors = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + scale_factors = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + loc_factors = pd.DataFrame( + columns=transform_columns, index=range(1, num_transforms + 1) + ) + idx = 0 for transform in range(1, num_transforms + 1): for col in transform_columns: @@ -191,17 +233,29 @@ def objective_function(params_vector): scale_factors.loc[transform, col] = best_params[idx + 1] loc_factors.loc[transform, col] = best_params[idx + 2] idx += 3 - + # Final evaluation - final_model = SINDY_delays_MI(shape_factors, scale_factors, loc_factors, - system_data.index, forcing, response, True, - poly_order, include_bias, include_interaction, - windup_timesteps, bibo_stable, transform_dependent, - transform_only, forcing_coef_constraints) - + final_model = SINDY_delays_MI( # noqa: F405 + shape_factors, + scale_factors, + loc_factors, + system_data.index, + forcing, + response, + True, + poly_order, + include_bias, + include_interaction, + windup_timesteps, + bibo_stable, + transform_dependent, + transform_only, + forcing_coef_constraints, + ) + print(f"\nFinal model for {num_transforms} transforms:") try: - print(final_model['model'].print(precision=5)) + print(final_model["model"].print(precision=5)) except Exception as e: print(e) print(f"R² = {final_model['error_metrics']['r2']:.6f}") @@ -212,23 +266,28 @@ def objective_function(params_vector): print("Location factors:") print(loc_factors) print() - + # Store results results[num_transforms] = { - 'final_model': final_model.copy(), - 'shape_factors': shape_factors.copy(deep=True), - 'scale_factors': scale_factors.copy(deep=True), - 'loc_factors': loc_factors.copy(deep=True), - 'windup_timesteps': windup_timesteps, - 'dependent_columns': dependent_columns, - 'independent_columns': independent_columns + "final_model": final_model.copy(), + "shape_factors": shape_factors.copy(deep=True), + "scale_factors": scale_factors.copy(deep=True), + "loc_factors": loc_factors.copy(deep=True), + "windup_timesteps": windup_timesteps, + "dependent_columns": dependent_columns, + "independent_columns": independent_columns, } - + # Early stopping check - if (num_transforms > init_transforms and - results[num_transforms]['final_model']['error_metrics']['r2'] - - results[num_transforms-1]['final_model']['error_metrics']['r2'] < early_stopping_threshold): - print(f"Last transformation added less than {early_stopping_threshold*100}% to R² score. Terminating early.") + if ( + num_transforms > init_transforms + and results[num_transforms]["final_model"]["error_metrics"]["r2"] + - results[num_transforms - 1]["final_model"]["error_metrics"]["r2"] + < early_stopping_threshold + ): + print( + f"Last transformation added less than {early_stopping_threshold*100}% to R² score. Terminating early." + ) break - - return results \ No newline at end of file + + return results diff --git a/pyproject.toml b/pyproject.toml index 4f557b9..7471176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,19 @@ [tool.black] line-length = 88 target-version = ["py310", "py311", "py312"] +exclude = "modpods_backup\\.py" [tool.ruff] line-length = 88 target-version = "py310" +exclude = [ + "modpods_backup.py", + "*.TMP", + "*.h11~", + "*.qeb~", + "*.wa0~", + ".vs", +] [tool.ruff.lint] select = ["E", "F", "W", "I"] @@ -15,6 +24,10 @@ ignore_missing_imports = true no_strict_optional = true warn_return_any = true check_untyped_defs = true +explicit_package_bases = true +exclude = [ + "modpods_backup\\.py", +] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/test_bayesian.py b/test_bayesian.py index bd6ea33..9cd4b95 100644 --- a/test_bayesian.py +++ b/test_bayesian.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -import matplotlib.pyplot as plt + import modpods import modpods_bayesian @@ -14,13 +14,14 @@ # Simple system: output depends on delayed and transformed input input_signal = np.random.randn(n_samples) * 0.5 + np.sin(t * 0.1) delayed_input = np.concatenate([np.zeros(5), input_signal[:-5]]) # 5-step delay -output_signal = 0.7 * delayed_input + 0.3 * np.roll(delayed_input, 3) + 0.1 * np.random.randn(n_samples) +output_signal = ( + 0.7 * delayed_input + + 0.3 * np.roll(delayed_input, 3) + + 0.1 * np.random.randn(n_samples) +) # Create DataFrame -test_data = pd.DataFrame({ - 'input': input_signal, - 'output': output_signal -}) +test_data = pd.DataFrame({"input": input_signal, "output": output_signal}) # Test with minimal parameters print("Testing Bayesian optimization with minimal example...") @@ -28,31 +29,51 @@ # Test compass search first (original function) print("\n=== Testing Original Compass Search ===") model_compass = modpods.delay_io_train( - test_data, ['output'], ['input'], - windup_timesteps=10, init_transforms=1, max_transforms=1, - max_iter=5, verbose=True, poly_order=1 + test_data, + ["output"], + ["input"], + windup_timesteps=10, + init_transforms=1, + max_transforms=1, + max_iter=5, + verbose=True, + poly_order=1, ) print("Compass search completed successfully!") print(f"R² = {model_compass[1]['final_model']['error_metrics']['r2']:.6f}") - - # Test Bayesian optimization + + # Test Bayesian optimization print("\n=== Testing Bayesian Optimization ===") model_bayesian = modpods_bayesian.delay_io_train_bayesian( - test_data, ['output'], ['input'], - windup_timesteps=10, init_transforms=1, max_transforms=1, - max_iter=15, verbose=True, poly_order=1 + test_data, + ["output"], + ["input"], + windup_timesteps=10, + init_transforms=1, + max_transforms=1, + max_iter=15, + verbose=True, + poly_order=1, ) print("Bayesian optimization completed successfully!") print(f"R² = {model_bayesian[1]['final_model']['error_metrics']['r2']:.6f}") - + print("\n=== Comparison ===") - print(f"Compass search R²: {model_compass[1]['final_model']['error_metrics']['r2']:.6f}") - print(f"Bayesian opt R²: {model_bayesian[1]['final_model']['error_metrics']['r2']:.6f}") - - improvement = model_bayesian[1]['final_model']['error_metrics']['r2'] - model_compass[1]['final_model']['error_metrics']['r2'] + print( + f"Compass search R²: {model_compass[1]['final_model']['error_metrics']['r2']:.6f}" + ) + print( + f"Bayesian opt R²: {model_bayesian[1]['final_model']['error_metrics']['r2']:.6f}" + ) + + improvement = ( + model_bayesian[1]["final_model"]["error_metrics"]["r2"] + - model_compass[1]["final_model"]["error_metrics"]["r2"] + ) print(f"Improvement: {improvement:.6f}") - + except Exception as e: print(f"Error: {e}") import traceback - traceback.print_exc() \ No newline at end of file + + traceback.print_exc() diff --git a/test_camels.py b/test_camels.py index 3d4f2b2..42c04f2 100644 --- a/test_camels.py +++ b/test_camels.py @@ -1,6 +1,5 @@ -import numpy as np import pandas as pd -import matplotlib.pyplot as plt + import modpods # Test with the original CAMELS dataset @@ -8,14 +7,16 @@ # Load the original dataset filepath = "./03439000_05_model_output.txt" -df = pd.read_csv(filepath, sep=r'\s+') +df = pd.read_csv(filepath, sep=r"\s+") print("Data loaded successfully!") print(f"Dataset shape: {df.shape}") # Prepare data as in original test -df.rename({'YR':'year','MNTH':'month','DY':'day','HR':'hour'},axis=1,inplace=True) -df['datetime'] = pd.to_datetime(df[['year','month','day','hour']]) -df.set_index('datetime',inplace=True) +df.rename( + {"YR": "year", "MNTH": "month", "DY": "day", "HR": "hour"}, axis=1, inplace=True +) +df["datetime"] = pd.to_datetime(df[["year", "month", "day", "hour"]]) +df.set_index("datetime", inplace=True) # Shift forcing to make system causal df.RAIM = df.RAIM.shift(-1) @@ -24,11 +25,11 @@ # Use subset for testing windup_timesteps = 30 years = 1 -df_train = df.iloc[:365*years + windup_timesteps,:] +df_train = df.iloc[: 365 * years + windup_timesteps, :] # Test both methods on real data -forcing_coef_constraints = {'RAIM':-1, 'PET':1,'PRCP':-1} -df_train = df_train[['OBS_RUN','RAIM','PET','PRCP']] +forcing_coef_constraints = {"RAIM": -1, "PET": 1, "PRCP": -1} +df_train = df_train[["OBS_RUN", "RAIM", "PET", "PRCP"]] print(f"\nTraining data shape: {df_train.shape}") print("Training both optimization methods...") @@ -37,58 +38,71 @@ # Compass search print("\n=== Compass Search on CAMELS Data ===") model_compass = modpods.delay_io_train( - df_train, ['OBS_RUN'], ['RAIM','PET','PRCP'], + df_train, + ["OBS_RUN"], + ["RAIM", "PET", "PRCP"], windup_timesteps=windup_timesteps, - init_transforms=1, max_transforms=1, max_iter=20, - verbose=False, forcing_coef_constraints=forcing_coef_constraints, - poly_order=1, bibo_stable=False, - optimization_method="compass_search" + init_transforms=1, + max_transforms=1, + max_iter=20, + verbose=False, + forcing_coef_constraints=forcing_coef_constraints, + poly_order=1, + bibo_stable=False, + optimization_method="compass_search", ) - compass_r2 = model_compass[1]['final_model']['error_metrics']['r2'] + compass_r2 = model_compass[1]["final_model"]["error_metrics"]["r2"] print(f"Compass search R² = {compass_r2:.6f}") - + # Bayesian optimization print("\n=== Bayesian Optimization on CAMELS Data ===") model_bayesian = modpods.delay_io_train( - df_train, ['OBS_RUN'], ['RAIM','PET','PRCP'], + df_train, + ["OBS_RUN"], + ["RAIM", "PET", "PRCP"], windup_timesteps=windup_timesteps, - init_transforms=1, max_transforms=1, max_iter=25, - verbose=False, forcing_coef_constraints=forcing_coef_constraints, - poly_order=1, bibo_stable=False, - optimization_method="bayesian" + init_transforms=1, + max_transforms=1, + max_iter=25, + verbose=False, + forcing_coef_constraints=forcing_coef_constraints, + poly_order=1, + bibo_stable=False, + optimization_method="bayesian", ) - bayesian_r2 = model_bayesian[1]['final_model']['error_metrics']['r2'] + bayesian_r2 = model_bayesian[1]["final_model"]["error_metrics"]["r2"] print(f"Bayesian optimization R² = {bayesian_r2:.6f}") - + # Results improvement = bayesian_r2 - compass_r2 pct_improvement = (improvement / compass_r2) * 100 if compass_r2 > 0 else 0 - - print(f"\n=== CAMELS Dataset Results ===") + + print("\n=== CAMELS Dataset Results ===") print(f"Compass search R²: {compass_r2:.6f}") print(f"Bayesian opt R²: {bayesian_r2:.6f}") print(f"Absolute improvement: {improvement:.6f}") print(f"Percent improvement: {pct_improvement:.1f}%") - + if improvement > 0: print("✓ Bayesian optimization found a better solution!") else: print("→ Compass search performed better on this dataset") - + print("\n=== Parameter Comparison ===") print("Compass search factors:") print(f" Shape: {model_compass[1]['shape_factors'].iloc[0,0]:.3f}") print(f" Scale: {model_compass[1]['scale_factors'].iloc[0,0]:.3f}") print(f" Location: {model_compass[1]['loc_factors'].iloc[0,0]:.3f}") - + print("Bayesian optimization factors:") print(f" Shape: {model_bayesian[1]['shape_factors'].iloc[0,0]:.3f}") print(f" Scale: {model_bayesian[1]['scale_factors'].iloc[0,0]:.3f}") print(f" Location: {model_bayesian[1]['loc_factors'].iloc[0,0]:.3f}") - + print("\n=== SUCCESS: Both methods completed successfully! ===") - + except Exception as e: print(f"Error: {e}") import traceback - traceback.print_exc() \ No newline at end of file + + traceback.print_exc() diff --git a/test_fixed.py b/test_fixed.py index dee5867..2415cc5 100644 --- a/test_fixed.py +++ b/test_fixed.py @@ -1,8 +1,6 @@ -import numpy as np -import pandas as pd -import scipy.stats as stats -import os import matplotlib.pyplot as plt +import pandas as pd + import modpods # basic funcionality tests and a bit of a tutorial @@ -13,67 +11,91 @@ filepath = "./03439000_05_model_output.txt" -df = pd.read_csv(filepath, sep='\s+') +df = pd.read_csv(filepath, sep=r"\s+") print(df) print(df.columns) # combine the columns YR, MNTH, DY, and YR into a single datetime column -df.rename({'YR':'year','MNTH':'month','DY':'day','HR':'hour'},axis=1,inplace=True) -df['datetime'] = pd.to_datetime(df[['year','month','day','hour']]) +df.rename( + {"YR": "year", "MNTH": "month", "DY": "day", "HR": "hour"}, axis=1, inplace=True +) +df["datetime"] = pd.to_datetime(df[["year", "month", "day", "hour"]]) # set the index to the datetime column -df.set_index('datetime',inplace=True) +df.set_index("datetime", inplace=True) # shift the forcing back one timestep (one day) to make the system causal -print(df[['OBS_RUN','RAIM']]) +print(df[["OBS_RUN", "RAIM"]]) df.RAIM = df.RAIM.shift(-1) df.dropna(inplace=True) -print(df[['OBS_RUN','RAIM']]) +print(df[["OBS_RUN", "RAIM"]]) # for better results (and slower run) up the max iterations, model complexity (poly_order and max_transforms), and the number of years used to train - # drop all columns except for RAIM (surface water input) and OBS_RUN (observed runoff) for actual CAMELS training # but for testing the MIMO delay_io_model I want multiple inputs and multiple outputs -windup_timesteps = 30 # days of windup +windup_timesteps = 30 # days of windup years = 1 -df_train = df.iloc[:365*years + windup_timesteps,:] # total data used, actually trained on this less the windup period -df_eval = df.iloc[-(365*years + windup_timesteps):,:] # data for evaluation, not used in training - -#df.plot(y=['OBS_RUN','RAIM']) -#plt.show() +df_train = df.iloc[ + : 365 * years + windup_timesteps, : +] # total data used, actually trained on this less the windup period +df_eval = df.iloc[ + -(365 * years + windup_timesteps) :, : +] # data for evaluation, not used in training +# df.plot(y=['OBS_RUN','RAIM']) +# plt.show() -#df['ones'] = np.ones(len(df.OBS_RUN)) # to make sure MIMO error metrics are working correctly +# df['ones'] = np.ones(len(df.OBS_RUN)) # to make sure MIMO error metrics are working correctly print(df_train) -forcing_coef_constraints = {'RAIM':-1, 'PET':1,'PRCP':-1} -df_train = df_train[['OBS_RUN','RAIM','PET','PRCP']] -rainfall_runoff_model = modpods.delay_io_train(df_train, ['OBS_RUN'],['RAIM','PET','PRCP'],windup_timesteps=windup_timesteps, - init_transforms=1, max_transforms=1,max_iter=10, verbose=True, forcing_coef_constraints= forcing_coef_constraints, - poly_order=1, bibo_stable=False) +forcing_coef_constraints = {"RAIM": -1, "PET": 1, "PRCP": -1} +df_train = df_train[["OBS_RUN", "RAIM", "PET", "PRCP"]] +rainfall_runoff_model = modpods.delay_io_train( + df_train, + ["OBS_RUN"], + ["RAIM", "PET", "PRCP"], + windup_timesteps=windup_timesteps, + init_transforms=1, + max_transforms=1, + max_iter=10, + verbose=True, + forcing_coef_constraints=forcing_coef_constraints, + poly_order=1, + bibo_stable=False, +) print(rainfall_runoff_model) print(rainfall_runoff_model[1]) print("error metrics") -print(rainfall_runoff_model[1]['final_model']['error_metrics']) -#print(rainfall_runoff_model[2]['final_model']['error_metrics']) -#print(rainfall_runoff_model[3]['final_model']['error_metrics']) +print(rainfall_runoff_model[1]["final_model"]["error_metrics"]) +# print(rainfall_runoff_model[2]['final_model']['error_metrics']) +# print(rainfall_runoff_model[3]['final_model']['error_metrics']) print("shapes") -print(rainfall_runoff_model[1]['shape_factors']) -#print(rainfall_runoff_model[2]['shape_factors']) -#print(rainfall_runoff_model[3]['shape_factors']) +print(rainfall_runoff_model[1]["shape_factors"]) +# print(rainfall_runoff_model[2]['shape_factors']) +# print(rainfall_runoff_model[3]['shape_factors']) # plot the results -fig, ax = plt.subplots(1,1,figsize=(8,4)) -ax.plot(df_train.index[windup_timesteps+1:],rainfall_runoff_model[1]['final_model']['response']['OBS_RUN'][windup_timesteps+1:],label='observed') -ax.plot(df_train.index[windup_timesteps+1:],rainfall_runoff_model[1]['final_model']['simulated'][:,0],label='simulated') -#ax.set_title('1 transformation') +fig, ax = plt.subplots(1, 1, figsize=(8, 4)) +ax.plot( + df_train.index[windup_timesteps + 1 :], + rainfall_runoff_model[1]["final_model"]["response"]["OBS_RUN"][ + windup_timesteps + 1 : + ], + label="observed", +) +ax.plot( + df_train.index[windup_timesteps + 1 :], + rainfall_runoff_model[1]["final_model"]["simulated"][:, 0], + label="simulated", +) +# ax.set_title('1 transformation') ax.legend() plt.title("training") -''' +""" ax[1].plot(df.index[windup_timesteps+1:],rainfall_runoff_model[2]['final_model']['response']['OBS_RUN'][windup_timesteps+1:],label='observed') ax[1].plot(df.index[windup_timesteps+1:],rainfall_runoff_model[2]['final_model']['simulated'][:,0],label='simulated') ax[1].set_title('2 transformations') @@ -82,23 +104,25 @@ ax[2].plot(df.index[windup_timesteps+1:],rainfall_runoff_model[3]['final_model']['simulated'][:,0],label='simulated') ax[2].set_title('3 transformations') ax[2].legend() -''' +""" plt.show() -plt.close('all') - +plt.close("all") # now test prediction / evaluation -eval_sim = modpods.delay_io_predict(rainfall_runoff_model, df_eval, 1,evaluation=True) +eval_sim = modpods.delay_io_predict(rainfall_runoff_model, df_eval, 1, evaluation=True) print("error metrics") -print(eval_sim['error_metrics']) -fig, ax = plt.subplots(1,1,figsize=(8,4)) -ax.plot(df_eval.index[windup_timesteps+1:],df_eval['OBS_RUN'][windup_timesteps+1:],label='observed') -ax.plot(df_eval.index[windup_timesteps+1:],eval_sim['prediction'],label='simulated') -#ax.set_title('1 transformation') +print(eval_sim["error_metrics"]) +fig, ax = plt.subplots(1, 1, figsize=(8, 4)) +ax.plot( + df_eval.index[windup_timesteps + 1 :], + df_eval["OBS_RUN"][windup_timesteps + 1 :], + label="observed", +) +ax.plot( + df_eval.index[windup_timesteps + 1 :], eval_sim["prediction"], label="simulated" +) +# ax.set_title('1 transformation') ax.legend() plt.title("evaluation") plt.show() - - - diff --git a/test_integrated.py b/test_integrated.py index a216329..a08fd97 100644 --- a/test_integrated.py +++ b/test_integrated.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -import matplotlib.pyplot as plt + import modpods # Create a simple test case @@ -13,13 +13,14 @@ # Simple system: output depends on delayed and transformed input input_signal = np.random.randn(n_samples) * 0.5 + np.sin(t * 0.1) delayed_input = np.concatenate([np.zeros(5), input_signal[:-5]]) # 5-step delay -output_signal = 0.7 * delayed_input + 0.3 * np.roll(delayed_input, 3) + 0.1 * np.random.randn(n_samples) +output_signal = ( + 0.7 * delayed_input + + 0.3 * np.roll(delayed_input, 3) + + 0.1 * np.random.randn(n_samples) +) # Create DataFrame -test_data = pd.DataFrame({ - 'input': input_signal, - 'output': output_signal -}) +test_data = pd.DataFrame({"input": input_signal, "output": output_signal}) # Test integrated Bayesian optimization print("Testing integrated Bayesian optimization in delay_io_train...") @@ -27,35 +28,55 @@ # Test compass search first (default) print("\n=== Testing Compass Search ===") model_compass = modpods.delay_io_train( - test_data, ['output'], ['input'], - windup_timesteps=10, init_transforms=1, max_transforms=1, - max_iter=5, verbose=True, poly_order=1, - optimization_method="compass_search" + test_data, + ["output"], + ["input"], + windup_timesteps=10, + init_transforms=1, + max_transforms=1, + max_iter=5, + verbose=True, + poly_order=1, + optimization_method="compass_search", ) print("Compass search completed successfully!") print(f"R² = {model_compass[1]['final_model']['error_metrics']['r2']:.6f}") - + # Test integrated Bayesian optimization print("\n=== Testing Integrated Bayesian Optimization ===") model_bayesian = modpods.delay_io_train( - test_data, ['output'], ['input'], - windup_timesteps=10, init_transforms=1, max_transforms=1, - max_iter=15, verbose=True, poly_order=1, - optimization_method="bayesian" + test_data, + ["output"], + ["input"], + windup_timesteps=10, + init_transforms=1, + max_transforms=1, + max_iter=15, + verbose=True, + poly_order=1, + optimization_method="bayesian", ) print("Bayesian optimization completed successfully!") print(f"R² = {model_bayesian[1]['final_model']['error_metrics']['r2']:.6f}") - + print("\n=== Comparison ===") - print(f"Compass search R²: {model_compass[1]['final_model']['error_metrics']['r2']:.6f}") - print(f"Bayesian opt R²: {model_bayesian[1]['final_model']['error_metrics']['r2']:.6f}") - - improvement = model_bayesian[1]['final_model']['error_metrics']['r2'] - model_compass[1]['final_model']['error_metrics']['r2'] + print( + f"Compass search R²: {model_compass[1]['final_model']['error_metrics']['r2']:.6f}" + ) + print( + f"Bayesian opt R²: {model_bayesian[1]['final_model']['error_metrics']['r2']:.6f}" + ) + + improvement = ( + model_bayesian[1]["final_model"]["error_metrics"]["r2"] + - model_compass[1]["final_model"]["error_metrics"]["r2"] + ) print(f"Improvement: {improvement:.6f}") - + print("\n=== SUCCESS: Bayesian optimization integrated successfully! ===") - + except Exception as e: print(f"Error: {e}") import traceback - traceback.print_exc() \ No newline at end of file + + traceback.print_exc() diff --git a/test_lti_control_of_swmm.py~RFb6efb78e.TMP b/test_lti_control_of_swmm.py~RFb6efb78e.TMP deleted file mode 100644 index e69de29..0000000 diff --git a/test_lti_control_of_swmm.py~RFdf17d1.TMP b/test_lti_control_of_swmm.py~RFdf17d1.TMP deleted file mode 100644 index 89076f0..0000000 --- a/test_lti_control_of_swmm.py~RFdf17d1.TMP +++ /dev/null @@ -1,445 +0,0 @@ -# reference: https://colab.research.google.com/github/kLabUM/pystorms/blob/master/tutorials/Scenario_Gamma.ipynb - -import sys -#from modpods import topo_from_pystorms -#sys.path.append("G:/My Drive/modpods") -import modpods -import pystorms -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd -import control as ct -import dill as pickle -use_blind = False - -''' -# uncontrolled -env = pystorms.scenarios.gamma() -done = False - -while not done: - # Query the current state of the simulation - state = env.state() - - # Initialize actions to have each asset open - actions = np.ones(11) - - # Set the actions and progress the simulation - done = env.step(actions) - -# Calculate the performance measure for the uncontrolled simulation -uncontrolled_perf = sum(env.data_log["performance_measure"]) - -print("The calculated performance for the uncontrolled case of Scenario gamma is:") -print("{}.".format(uncontrolled_perf)) - -basin_max_depths = [5., 10., 10., 10.] - -plt.figure(figsize=(15,6)) -plt.subplot(1,2,1) -plt.plot(np.asarray(env.data_log['depthN']['1'])/basin_max_depths[0], label='Basin 1') -plt.plot(np.asarray(env.data_log['depthN']['2'])/basin_max_depths[1], label='Basin 2') -plt.plot(np.asarray(env.data_log['depthN']['3'])/basin_max_depths[2], label='Basin 3') -plt.plot(np.asarray(env.data_log['depthN']['4'])/basin_max_depths[3], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Filling Degree') -plt.legend() - -plt.subplot(1,2,2) -plt.plot(env.data_log['flow']['O1'], label='Basin 1') -plt.plot(env.data_log['flow']['O2'], label='Basin 2') -plt.plot(env.data_log['flow']['O3'], label='Basin 3') -plt.plot(env.data_log['flow']['O4'], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Basin Outflow (cfs)') -plt.legend() -plt.show() - -uncontrolled_data = env.data_log -''' -def controller_efd(state, max_depths): - # Initialize the action space so that we can compute the new settings - new_settings = np.ones(len(state)) - # Set equal filling degree parameters - c = 1.5 - theta = 0.25 - - # Assign the current depth in each basin - depths = state - - # Compute the filling degrees - fd = depths/max_depths - # Compute the average filling degree across each controlled basin - fd_average = sum(fd)/len(fd) - - # Update each valve setting based on the relative fullness of each basin - for i in range(0,len(fd)): - - # If a basin is very full compared to the average, we should open its - # valve to release some water - if fd[i] > fd_average: - new_settings[i] = c*(fd[i]-fd_average) - - # If a basin's filling degree is close to the average (within some value - # theta), its setting can be close to that average - elif fd_average-fd[i] <= theta: - new_settings[i] = fd_average - - # If a basin is very empty compared to the average, we can close its - # valve to store more water at that location, prioritizing releasing at - # the other locations - else: - new_settings[i] = 0. - - # Make sure the settings are in bounds [0,1] - new_settings[i] = min(new_settings[i], 1.) - new_settings[i] = max(new_settings[i], 0.) - - return new_settings - - - -env = pystorms.scenarios.gamma() -done = False - -# Specify the maximum depths for each basin we are controlling -basin_max_depths = [5., 10., 10., 10.] - -while not done: - # Query the current state of the simulation - state = env.state() - # Isolate only the states that we need (the 4 downstream basin depths) - states_relevant = state[0:4] - - # Pass the current, relevant states and the maximum basin - # depths into our equal filling degree logic - actions_efd = controller_efd(states_relevant, basin_max_depths) - # Specify that the other 7 valves in the network should be - # open since we are not controlling them here - actions_uncontrolled = np.ones(7) - # Join the two above action arrays - actions = np.concatenate((actions_efd, actions_uncontrolled), axis=0) - - # Set the actions and progress the simulation - done = env.step(actions) - -# Calculate the performance measure for the uncontrolled simulation -equalfilling_perf = sum(env.data_log["performance_measure"]) - -print("The calculated performance for the equal filling degree case of Scenario gamma is:") -print("{}.".format(equalfilling_perf)) -''' -plt.figure(figsize=(15,6)) -plt.subplot(1,2,1) -plt.plot(np.asarray(env.data_log['depthN']['1'])/basin_max_depths[0], label='Basin 1') -plt.plot(np.asarray(env.data_log['depthN']['2'])/basin_max_depths[1], label='Basin 2') -plt.plot(np.asarray(env.data_log['depthN']['3'])/basin_max_depths[2], label='Basin 3') -plt.plot(np.asarray(env.data_log['depthN']['4'])/basin_max_depths[3], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Filling Degree') -plt.legend() - -plt.subplot(1,2,2) -plt.plot(env.data_log['flow']['O1'], label='Basin 1') -plt.plot(env.data_log['flow']['O2'], label='Basin 2') -plt.plot(env.data_log['flow']['O3'], label='Basin 3') -plt.plot(env.data_log['flow']['O4'], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Basin Outflow (cfs)') -plt.legend() -''' -ef_data = env.data_log - -# get the responses into one dataframe with zero pads in between -#uncontrolled_flows = pd.DataFrame.from_dict(uncontrolled_data['flow']) -#uncontrolled_depthN = pd.DataFrame.from_dict(uncontrolled_data['depthN']) -#uncontrolled_response = pd.concat([uncontrolled_flows, uncontrolled_depthN], axis=1) -#print(uncontrolled_response) -ef_flows = pd.DataFrame.from_dict(ef_data['flow']) -ef_flows.columns = env.config['action_space'] -ef_depthN = pd.DataFrame.from_dict(ef_data['depthN']) -ef_depthN.columns = env.config['states'] -ef_response = pd.concat([ef_flows, ef_depthN], axis=1) -ef_response.index = env.data_log['simulation_time'] -print(ef_response) -# for the columns of ef_response which do not contain the string "O" (i.e. the depths) -# if the current column name is "X", make it "(X, depthN)" -# this is because the modpods library expects the depths to be named "X, depthN" -# where X is the name of the corresponding flow -for col in ef_response.columns: - if "O" in col: # the orifices - # if there's a number in that column name that's greater than 4, drop this column - # this is because we're only controlling the first 4 orifices and these measurements are redundant to the storage node depths if the orifices are always open - if int(col[1:]) > 4: - ef_response.drop(columns=col, inplace=True) - - ef_response.rename(columns={col: (col, "flow")}, inplace=True) - - - -print(ef_response) - - - -# for debugging resample to a coarser time step (native resolution is about one minute but not consistent) -# need a consistent time step for modpods -orig_index_length = len(ef_response.index) -ef_response = ef_response.resample('1H',axis='index').mean().copy(deep=True) -print(ef_response) -training_dt = orig_index_length / len(ef_response.index) -# we'll only use the ef response to infer the topology and dynamics -# for this experiment assume all of flow O1-O11 and depth 1-11 are observable -# but only O1-O4 are controllable -independent_columns = ef_response.columns[0:4] # orifices O1 through O4 -dependent_columns = ef_response.drop(columns=independent_columns).columns - - -# learn the topology from the data -# this will be the "blind" plant model -if use_blind: # don't have this on all the time because it's very expensive - blind_topo = modpods.infer_causative_topology(ef_response, dependent_columns = dependent_columns, - independent_columns = independent_columns, verbose=True) - - print(blind_topo.causative_topo) - print(blind_topo.total_graph) - - -# read the topology from the swmm file (this is much cheaper) -env.config['states'] = dependent_columns -env.config['action_space'] = independent_columns -# the default is controlling all 11 orifices so we need to edit the environment -print("defining topology") -swmm_topo = modpods.topo_from_pystorms(env) -# the index of the causative topology should be the dependent columns -#swmm_topo.index = dependent_columns -# the columns of the causative topology should be the dependent columns plus the independent columns -#swmm_topo.columns = dependent_columns.append(independent_columns) - -# show all columns when printing dataframes -pd.set_option('display.max_columns', None) -#print(swmm_topo) - -if use_blind: - print("differences in topology") - print(blind_topo.causative_topo.compare(swmm_topo)) - -# learn the dynamics now, or load a previously learned model - -# learn the dynamics from the efd response -print("learning dynamics") -lti_plant_approx_seeing = modpods.lti_system_gen(swmm_topo, ef_response, - independent_columns= independent_columns, - dependent_columns = dependent_columns, max_iter = 0, - swmm=True,bibo_stable=True,max_transition_state_dim=5) -# pickle the plant approximation to load later -with open('G:/My Drive/modpods/swmm_lti_plant_approx_seeing.pickle', 'wb') as handle: - pickle.dump(lti_plant_approx_seeing, handle) -''' - -# load the plant approximation from a pickle -with open('G:/My Drive/modpods/swmm_lti_plant_approx_seeing.pickle', 'rb') as handle: - print("loading previously trained model") - lti_plant_approx_seeing = pickle.load(handle) - ''' -if use_blind: - lti_plant_approx_blind = modpods.lti_system_gen(blind_topo, ef_response, - independent_columns= independent_columns, - dependent_columns = dependent_columns) - - - -# is the plant approximation internally stable? -plant_eigenvalues,_ = np.linalg.eig(lti_plant_approx_seeing['A'].values) - -# cast the columns of dataframes to strings for easier indexing -ef_response.columns = ef_response.columns.astype(str) -dependent_columns = [str(col) for col in dependent_columns] -independent_columns = [str(col) for col in independent_columns] -# reindex the ef_response to an integer step -ef_response.index = np.arange(0,len(ef_response),1) - -# evaluate the plant approximation accuracy -# only plot the depths at 1, 2, 3, and 4 -# the forcing is the flows at O1, O2, O3, and O4 -approx_response = ct.forced_response(lti_plant_approx_seeing['system'], U=np.transpose(ef_response[independent_columns].values), T=ef_response.index.values) -approx_data = pd.DataFrame(index=ef_response.index.values) -approx_data[dependent_columns[0]] = approx_response.outputs[0][:] -approx_data[dependent_columns[1]] = approx_response.outputs[1][:] -approx_data[dependent_columns[2]] = approx_response.outputs[2][:] -approx_data[dependent_columns[3]] = approx_response.outputs[3][:] - -output_columns = dependent_columns[0:4] # depth at 1,2,3,4 - -# create a vertical subplot of 3 axes -fig, axes = plt.subplots(4, 1, figsize=(10, 10)) - -for idx in range(len(output_columns)): - axes[idx].plot(ef_response[output_columns[idx]],label='actual') - axes[idx].plot(approx_data[output_columns[idx]],label='approx') - if idx == 0: - axes[idx].legend(fontsize='x-large',loc='best') - axes[idx].set_ylabel(output_columns[idx],fontsize='large') - if idx == len(output_columns)-1: - axes[idx].set_xlabel("time",fontsize='x-large') -# label the left column of plots "training" -axes[0].set_title("outputs",fontsize='xx-large') - -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx.png") -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx.svg") -#plt.show() -plt.close() -# same plot, but just the first few timesteps (this is the accuracy that matters for feedback control) -# create a vertical subplot of 3 axes -fig, axes = plt.subplots(4, 1, figsize=(10, 10)) - -for idx in range(len(output_columns)): - axes[idx].plot(ef_response[output_columns[idx]][:10],label='actual') - axes[idx].plot(approx_data[output_columns[idx]][:10],label='approx') - if idx == 0: - axes[idx].legend(fontsize='x-large',loc='best') - axes[idx].set_ylabel(output_columns[idx],fontsize='large') - if idx == len(output_columns)-1: - axes[idx].set_xlabel("time",fontsize='x-large') -# label the left column of plots "training" -axes[0].set_title("outputs",fontsize='xx-large') - -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx_first10.png") -plt.savefig("G:/My Drive/modpods/test_lti_control_of_swmm_plant_approx_first10.svg") -#plt.show() -plt.close() - -# define the cost function -Q = np.eye(len(lti_plant_approx_seeing['A'].columns)) / 10e12 # we don't want to penalize the transition states as their magnitude doesn't have directly tractable physical meaning -# bryson's rule based on the maxiumum depth of each basin -# note: the swmm file gamma.inp actually specifies the maximum depth of storage node 1 as 10 feet, -# but i'll be consistent with the configuration of the efd controller for consistent comparison -basin_max_depths_all = [5.0, 10.0, 10.0, 10.0, 10.0,20.0, 10.0, 10.0,10.0, 13.72, 14.96] -for asset_index in range(len(dependent_columns)): - Q[lti_plant_approx_seeing['A'].columns.get_loc(dependent_columns[asset_index]),lti_plant_approx_seeing['A'].columns.get_loc(dependent_columns[asset_index])] = 1 / ((basin_max_depths_all[asset_index])**2 ) - -flood_weighting = 10 # how much more important is flooding than the other objectives? -Q = Q * flood_weighting # set flood_weighting = 1 for basic bryson's rule (care about flows as much as flooding) -# threshold on flows at 0.11 m^3 / s which is 3.9 cfs -R = np.eye(len(lti_plant_approx_seeing['B'].columns)) / (3.9**2) # bryson's rule on maximum allowable flow -# define the system -# sys_response_to_control = ct.ss(lti_plant_approx_seeing['A'],lti_plant_approx_seeing['B'],lti_plant_approx_seeing['C'],0) # just for defining the controller gain -# (not necessary in this case, but would be if you've got disturbances) -# find the state feedback gain for the linear quadratic regulator -K,S,E = ct.lqr(lti_plant_approx_seeing['system'],Q,R) # one row of K should be zeros to reflect that u1 is not used as a control but is the disturbance - -# define the observer gain -noise_level = 1 # noise on depth measurements -L,P,E = ct.lqe(lti_plant_approx_seeing['system'],np.eye(len(lti_plant_approx_seeing['B'].columns)),noise_level*np.eye(len(lti_plant_approx_seeing['C'].index)) ) # unit covariance on process noise and measurement error - -''' -# define the observer based compensator (per freudenberg 560 course notes 2.4) -obc_A = lti_plant_approx_seeing['A'].values-lti_plant_approx_seeing['B'].values@K - L@lti_plant_approx_seeing['C'].values -# ingests measurements, returns control actions -obc = ct.ss(obc_A, L, -K, 0, inputs=list(lti_plant_approx_seeing['C'].index),outputs=list(lti_plant_approx_seeing['B'].columns)) # negate K to give back commanded flows which are positive - - -# need to separately define the observer and controller because we can't close the loop in the typical way -# the observer takes the control input and measured output as inputs and outputs the estimated full state -#observer_input = np.concatenate((lti_plant_approx_seeing['B'].values,L),axis = 1) -#observer = ct.ss(lti_plant_approx_seeing['A'] - L@lti_plant_approx_seeing['C'].values, observer_input, np.eye(len(lti_plant_approx_seeing['A']) ) , 0 ) - -# the controller takes in an estiamte of the state and returns a control command -# this is just, u = -K @ xhat, not necessary to define a state space model for that as there's no evolution - -# can't form the closed loop system because the plant is not a state space system, but rather a software model -# the state estimate and control actions will be computed iteratively as the simulation is stepped through -''' -env = pystorms.scenarios.gamma() -done = False - -u = np.zeros((len(lti_plant_approx_seeing['B'].columns),1) ) # start with all orifices completely closed -u_open_pct = np.zeros((len(lti_plant_approx_seeing['B'].columns),1) ) # start with all orifices completely closed -xhat = np.zeros((len(lti_plant_approx_seeing['A'].columns),1)) # initial state estimate - -steps = 0 # make sure the estimator and controller operate at the frequency the approxiation was trained at - -# convert control command (flow) into orifice open percentage -# per the EPA-SWMM user manual volume ii hydraulics, orifices (section 6.2, page 107) - https://nepis.epa.gov/Exe/ZyPDF.cgi/P100S9AS.PDF?Dockey=P100S9AS.PDF -# all orifices in gamma are "bottom" -Cd = 0.65 # happens to be the same for all of them -Ao = 1 # area is one square foot. again, happens to be the same for all of them. -g = 32.2 # ft / s^2 -# the expression for discharge is found using Torricelli's equation: Q = Cd * (Ao*open_pct) sqrt(2*g*H_e) -# H_e is the effective head in feet, which is just the depth in the basin as the orifices are "bottom" -# to get the action command as a percent open, we solve as: open_pct = Q_desired / (Cd * Ao * sqrt(2*g*H_e)) - -while not done: - #if steps % 5 == 0: # match to the frequency of the approximation - # Query the current state of the simulation - observables = env.state() - # for updating the plant, calculate the "u" that is actually applied to the plant, not the desired control input - for idx in range(len(u)): - u[idx,0] = Cd*Ao*u_open_pct[idx,0]*np.sqrt(2*g*observables[idx]) # calculate the actual flow through the orifice - - # update the observer based on these measurements (xhat_dot = (A-LC) xhat + B u + L y_m) - state_evolution = (lti_plant_approx_seeing['A'].values - L@lti_plant_approx_seeing['C'].values) @ xhat # TODO: this is being absurdbly fast / aggressive right now, tone it down - # causing the state estimation to diverge to crazy values - impact_of_control = lti_plant_approx_seeing['B'].values @ u - output_updating = (L @ np.transpose(observables)).reshape((-1,1)) # provided as row vector, need a column vector. also need to reshape to 2d array with 1 column - xhat_dot = (state_evolution + impact_of_control + output_updating) / training_dt # divide by the training frequency to get the change in state over the time step - xhat += xhat_dot # update the state estimate - yhat = lti_plant_approx_seeing['C'] @ xhat # just for reference, could be useful for plotting later - observables_error = yhat - observables.reshape(-1,1) # cast observables to be 2 dimensional - # note that this is only truly the error if there is zero error in the measurements. Generally, data is not truth. - u = -K @ xhat # calculate control command - - - u_open_pct = u*-1 - for idx in range(len(u)): - head = 2*g*observables[idx] - - if head < 0.01: # if the head is less than 0.01 ft, the basin is empty, so close the orifice - u_open_pct[idx,0] = 0 - else: - u_open_pct[idx,0] = u[idx,0] / (Cd*Ao * np.sqrt(2*g*observables[idx])) # open percentage for desired flow rate - - if u_open_pct[idx,0] > 1: # if the calculated open percentage is greater than 1, the orifice is fully open - u_open_pct[idx,0] = 1 - elif u_open_pct[idx,0]< 0: # if the calculated open percentage is less than 0, the orifice is fully closed - u_open_pct[idx,0] = 0 - - - # Specify that the other 7 valves in the network should be - # open since we are not controlling them here - actions_uncontrolled = np.ones(7) - # Join the two above action arrays - actions = np.concatenate((u_open_pct.flatten(), actions_uncontrolled), axis=0) - - # Set the actions and progress the simulation - done = env.step(actions) - steps += 1 - - -# Calculate the performance measure for the uncontrolled simulation -obc_perf = sum(env.data_log["performance_measure"]) - -print("The calculated performance for the observer based compensator case of Scenario gamma is:") -print("{}.".format(obc_perf)) - -plt.figure(figsize=(15,6)) -plt.subplot(1,2,1) -plt.plot(np.asarray(env.data_log['depthN']['1'])/basin_max_depths[0], label='Basin 1') -plt.plot(np.asarray(env.data_log['depthN']['2'])/basin_max_depths[1], label='Basin 2') -plt.plot(np.asarray(env.data_log['depthN']['3'])/basin_max_depths[2], label='Basin 3') -plt.plot(np.asarray(env.data_log['depthN']['4'])/basin_max_depths[3], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Filling Degree') -plt.legend() - -plt.subplot(1,2,2) -plt.plot(env.data_log['flow']['O1'], label='Basin 1') -plt.plot(env.data_log['flow']['O2'], label='Basin 2') -plt.plot(env.data_log['flow']['O3'], label='Basin 3') -plt.plot(env.data_log['flow']['O4'], label='Basin 4') -plt.xlabel('Simulation Timestep') -plt.ylabel('Basin Outflow (cfs)') -plt.legend() - - -print("done") - diff --git a/tests/test_modpods.py b/tests/test_modpods.py index 9f7f713..fb536a6 100644 --- a/tests/test_modpods.py +++ b/tests/test_modpods.py @@ -238,6 +238,127 @@ def test_delay_io_predict_returns_expected_shape( assert pred["prediction"].shape[0] > 0 +# --------------------------------------------------------------------------- +# Optimization method comparison tests +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def compass_model(simple_lti_data: pd.DataFrame) -> dict[Any, Any]: + """Train a model using the default compass-search optimizer.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + model = modpods.delay_io_train( + simple_lti_data, + dependent_columns=["x1"], + independent_columns=["u"], + windup_timesteps=0, + init_transforms=1, + max_transforms=1, + max_iter=20, + poly_order=1, + verbose=False, + optimization_method="compass_search", + ) + return cast(dict[Any, Any], model) + + +@pytest.fixture(scope="module") +def bayesian_model(simple_lti_data: pd.DataFrame) -> dict[Any, Any]: + """Train a model using Bayesian optimization.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + model = modpods.delay_io_train( + simple_lti_data, + dependent_columns=["x1"], + independent_columns=["u"], + windup_timesteps=0, + init_transforms=1, + max_transforms=1, + max_iter=20, + poly_order=1, + verbose=False, + optimization_method="bayesian", + ) + return cast(dict[Any, Any], model) + + +def test_compass_search_returns_valid_model( + compass_model: dict[Any, Any], +) -> None: + """Compass-search optimizer must return a well-formed model dict.""" + assert isinstance(compass_model, dict) + assert 1 in compass_model + assert "final_model" in compass_model[1] + assert "error_metrics" in compass_model[1]["final_model"] + r2 = float(compass_model[1]["final_model"]["error_metrics"]["r2"]) + assert r2 > -1.0, f"Compass R² {r2:.4f} is unreasonably low" + + +def test_bayesian_returns_valid_model( + bayesian_model: dict[Any, Any], +) -> None: + """Bayesian optimizer must return a well-formed model dict.""" + assert isinstance(bayesian_model, dict) + assert 1 in bayesian_model + assert "final_model" in bayesian_model[1] + assert "error_metrics" in bayesian_model[1]["final_model"] + r2 = float(bayesian_model[1]["final_model"]["error_metrics"]["r2"]) + assert r2 > -1.0, f"Bayesian R² {r2:.4f} is unreasonably low" + + +def test_both_methods_produce_comparable_r2( + compass_model: dict[Any, Any], + bayesian_model: dict[Any, Any], +) -> None: + """Both optimization methods should achieve similar R² on the same data. + + The difference in R² should be within a reasonable margin, confirming + that both methods solve the same underlying optimization problem. + """ + r2_compass = float(compass_model[1]["final_model"]["error_metrics"]["r2"]) + r2_bayesian = float(bayesian_model[1]["final_model"]["error_metrics"]["r2"]) + # Both should be positive (reasonable fit) + assert r2_compass > 0.0, f"Compass R² {r2_compass:.4f} is non-positive" + assert r2_bayesian > 0.0, f"Bayesian R² {r2_bayesian:.4f} is non-positive" + # Neither method should be dramatically worse than the other + assert abs(r2_compass - r2_bayesian) < 0.5, ( + f"Methods diverge too much: compass={r2_compass:.4f}, " + f"bayesian={r2_bayesian:.4f}" + ) + + +def test_compass_and_bayesian_predictions_agree( + compass_model: dict[Any, Any], + bayesian_model: dict[Any, Any], + simple_lti_data: pd.DataFrame, +) -> None: + """Predictions from compass and Bayesian models should broadly agree.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + pred_compass = modpods.delay_io_predict( + compass_model, simple_lti_data, num_transforms=1 + ) + pred_bayesian = modpods.delay_io_predict( + bayesian_model, simple_lti_data, num_transforms=1 + ) + assert "prediction" in pred_compass + assert "prediction" in pred_bayesian + p_c = pred_compass["prediction"].ravel() + p_b = pred_bayesian["prediction"].ravel() + assert p_c.shape == p_b.shape, "Prediction shapes differ between methods" + # Both predictions must be finite (no NaN or Inf) + assert np.all(np.isfinite(p_c)), "Compass predictions contain NaN/Inf" + assert np.all(np.isfinite(p_b)), "Bayesian predictions contain NaN/Inf" + # Correlation of predictions should be high (both are fitting the same signal) + # Guard against constant predictions (std == 0) which yield undefined correlation + if p_c.std() > 0 and p_b.std() > 0: + corr = float(np.corrcoef(p_c, p_b)[0, 1]) + assert ( + corr > 0.5 + ), f"Compass and Bayesian predictions are poorly correlated: {corr:.4f}" + + # --------------------------------------------------------------------------- # infer_causative_topology tests (from test_topo_inference.py) # ---------------------------------------------------------------------------