From 00e17b0f58bf53f0a280b45cbb143ddbe4fc08f9 Mon Sep 17 00:00:00 2001 From: Kendra Johnson Date: Fri, 6 Dec 2024 09:18:06 -0900 Subject: [PATCH] disagg stats from csv --- .../man/tools/get_disaggregation_from_csv.py | 197 ++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 openquake/man/tools/get_disaggregation_from_csv.py diff --git a/openquake/man/tools/get_disaggregation_from_csv.py b/openquake/man/tools/get_disaggregation_from_csv.py new file mode 100644 index 000000000..325610628 --- /dev/null +++ b/openquake/man/tools/get_disaggregation_from_csv.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[9]: + + +import re +import numpy as np +import os +import glob +import math +import pandas as pd + + +def _find_idcalc(path_hazard): + # Find the first CSV file matching the pattern + files = glob.glob(os.path.join(path_hazard, 'realizations_*.csv')) + + if not files: # Check if no files are found + raise FileNotFoundError("No files matching the pattern 'realizations_*.csv' found in the specified path.") + + # Extract the number from the first matched file + match = re.search(r'realizations_(\d+)\.csv$', files[0]) + + if match: + return int(match.group(1)) + else: + raise ValueError(f"Filename '{files[0]}' does not match the expected pattern.") + + + +def disaggrMDEmean(Disaggr_name, imt, poe_target, type_disaggr): + # Read CSV data + disaggr_csv = pd.read_csv(Disaggr_name, header=1) + + # Extract columns as NumPy arrays for efficiency + SA_types = disaggr_csv['imt'].values + poe_col = disaggr_csv['poe'].values + iml = disaggr_csv['iml'].values + mag_col = disaggr_csv['mag'].values + dist_col = disaggr_csv['dist'].values + eps_col = disaggr_csv['eps'].values + wmean_poe_all = disaggr_csv['mean'].values + + # Initialize arrays + mag, dist, eps, wmean_poe = [], [], [], [] + mag_mean_i, dist_mean_i, eps_mean_i = [], [], [] + + # Apply conditions using masks + if type_disaggr == 'iml': + mask = SA_types == imt + else: + mask = (SA_types == imt) & (np.abs(poe_col - poe_target) <= 1e-3) + + # Apply the mask to extract relevant rows + mag_selected = mag_col[mask] + dist_selected = dist_col[mask] + eps_selected = eps_col[mask] + wmean_poe_selected = wmean_poe_all[mask] + iml_selected = iml[mask] + + # Calculate weighted means directly + mag_mean_i = mag_selected * wmean_poe_selected + dist_mean_i = dist_selected * wmean_poe_selected + eps_mean_i = eps_selected * wmean_poe_selected + + # Populate lists with selected values + mag.extend(mag_selected) + dist.extend(dist_selected) + eps.extend(eps_selected) + wmean_poe.extend(wmean_poe_selected) + + # Calculate aggregate values + + if np.any(iml_selected > 0): # Check if there are any valid entries + poe_mode = np.max(wmean_poe_selected) + it = np.abs(wmean_poe_selected - poe_mode).argmin() + + mag_mode = mag_selected[it] + dist_mode = dist_selected[it] + eps_mode = eps_selected[it] + + mag_mean = np.sum(mag_mean_i) / poe_target + dist_mean = np.sum(dist_mean_i) / poe_target + eps_mean = np.sum(eps_mean_i) / poe_target + + disaggr_mean = [mag_mean, dist_mean, eps_mean] + disaggr_mode = [mag_mode, dist_mode, eps_mode] + else: + disaggr_mean = [0, 0, 0] + disaggr_mode = [0, 0, 0] + + return iml_selected, mag, dist, eps, wmean_poe, disaggr_mean, disaggr_mode + + + +def main(file_sites_in: str, path_disaggregation: str): + + """ + Find the mean a mode of a disaggregation analysis given the output in csv + + """ + + type_disaggr = 'poe' + main.file_sites_in = "Path to the input site model .csv" + main.path_disaggregation = "Path to the disaggregation output from OQ .csv file" + + # Read the input sites CSV + sites_csv = pd.read_csv(file_sites_in) + + # Extract data from sites_csv + custom_site_ids = sites_csv['custom_site_id'].values + lons = sites_csv['lon'].values + lats = sites_csv['lat'].values + + # Initialize lists to store outputs + output_data = { "calc_id": [], "id": [], "site": [], "lon": [], "lat": [], + "imt": [], "poe": [], "iml": [], "M mean": [], "R mean": [], "eps mean": [], + "M mode": [], "R mode": [], "eps mode": [], "check": [], "pass": [] + } + + # Messages for readme.txt + out_text = [] + + # Iterate through each custom site + for c, custom_site_id in enumerate(custom_site_ids): + + calc_id = _find_idcalc(path_disaggregation) + fname_disaggr = os.path.join(path_disaggregation, f'Mag_Dist_Eps-mean-{c}_{calc_id}.csv') + + if os.path.isfile(fname_disaggr): + with open(fname_disaggr, "r") as f: + metadata = f.readline() + + # Extract coordinates from metadata + lon_disagg = round(float(metadata.split('lon=')[1].split(',')[0].strip()), 4) + lat_disagg =round(float(metadata.split('lat=')[1].split('"')[0].strip()), 4) + + # Check if coordinates match + if np.isclose(lon_disagg, lons[c], atol=0.01) and np.isclose(lat_disagg, lats[c], atol=0.01): + disaggr_csv = pd.read_csv(fname_disaggr, header=1) + + poe_target = [0.002107] if type_disaggr == 'iml' else disaggr_csv['poe'].unique() + + for imt in disaggr_csv['imt'].unique(): + for poe in poe_target: + # Call external function and unpack results + iml_disaggr, mag, dist, eps, wmean_poe, disaggr_means_s, disaggr_mode = disaggrMDEmean( + fname_disaggr, imt, poe, type_disaggr) + + # Append data to the dictionary + + output_data["calc_id"].append(calc_id) + output_data["id"].append(c) + output_data["site"].append(custom_site_id) + output_data["lon"].append(lon_disagg) + output_data["lat"].append(lat_disagg) + output_data["poe"].append(poe) + output_data["imt"].append(imt) + output_data["iml"].append(iml_disaggr[0]) + + output_data["M mean"].append(disaggr_means_s[0]) + output_data["R mean"].append(disaggr_means_s[1]) + output_data["eps mean"].append(disaggr_means_s[2]) + + + if iml_disaggr[0] > 0: + output_data["M mode"].append(f"{disaggr_mode[0]:.2f}") + output_data["R mode"].append(f"{disaggr_mode[1]:.2f}") + output_data["eps mode"].append(f"{disaggr_mode[2]:.2f}") + else: + output_data["M mode"].append(str(-999)) + output_data["R mode"].append(str(-999)) + output_data["eps mode"].append(str(-999)) + + check_val = sum(wmean_poe) + output_data["check"].append(check_val) + check_ratio = abs((check_val - poe) / poe) + output_data["pass"].append('yes' if check_ratio <= 0.1 else 'no') + else: + out_text.append(f'Coordinate mismatch for site id {c}: {custom_site_id}') + else: + out_text.append(f'No disaggregation data for site id {c}: {custom_site_id}') + + # Create DataFrames from the collected data + df_disaggr = pd.DataFrame(output_data) + df_disaggr.to_csv(path_disaggregation + 'res_disaggregation_mean_output.csv' ,index=False) + # Save messages to readme.txt + with open(os.path.join(path_disaggregation, 'res_disaggregation_readme.txt'), 'w') as f: + f.write('\n'.join(out_text)) + + +if __name__ == '__main__': + # Call main function with the required arguments + from openquake.baselib import sap + + sap.run(main)