From cf55836b939b269294e8f5b0feecbb7d767c48c2 Mon Sep 17 00:00:00 2001 From: Fahrettin Kuran Date: Sat, 21 Feb 2026 22:45:04 +0100 Subject: [PATCH 1/2] converter --- openquake/cat/converter.py | 110 ++++++++++++++++++++++++++ openquake/cat/tests/converter_test.py | 97 +++++++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 openquake/cat/converter.py create mode 100644 openquake/cat/tests/converter_test.py diff --git a/openquake/cat/converter.py b/openquake/cat/converter.py new file mode 100644 index 000000000..2c74f41a9 --- /dev/null +++ b/openquake/cat/converter.py @@ -0,0 +1,110 @@ +# Script Title: GMICEs and IGMCEs of Gallahue and Abrahamson (2023)'s work + +# Description: +# This script implements the methodology described in the paper titled +# "New Methodology for Unbiased Ground-Motion Intensity Conversion Equations" +# by Gallahue, M., and Abrahamson, N., published in 2023. Ground-motion +# intensity conversion equations (GMICEs) and intensity ground-motion +# conversion equations (IGMCEs) developed in the work are coded to +# obtain PGA and Intensity. + +# Reference: +# Molly Gallahue, Norman Abrahamson; New Methodology for Unbiased Ground‐Motion +# Intensity Conversion Equations. Bulletin of the Seismological Society of +# America 2023; 113 (3): 1133–1151. doi: https://doi.org/10.1785/0120220224 + +import numpy as np +import pandas as pd +import os + +# Coefficients +COEFFS = { + "eq19": {"d1": 2.919, "d2": 0.356, "d3": 0.010, "d4": 1.041, "d5": -0.889, "sigma": 0.566}, + "eq20": {"h1": 8.622, "h2": 1.230, "h3": 0.056, "h4": -0.568, "sigma": 0.704}, + "eq22": {"f1": -2.808, "f2": 0.444, "f3": -0.061, "f4": -0.047, "f5": -0.458, "sigma": 0.618}, + "eq23": {"i1": -6.558, "i2": 0.754, "i3": -0.072, "i4": -0.187, "sigma": 0.667} +} + +# Functions +def gmice(data, equation_id, epsilon=0): + """ + Ground-motion to intensity conversion (PGA -> Intensity) + Equations: 19 or 20 + Units: PGA [g], Rhyp [km] + Epsilon (ϵ): It can be estimated using the mean ϵ from the disaggregation; + however, if disaggregation results are not available, then ϵ can be approximated + from the slope of the hazard curve at any particular site (Gallahue and Abrahamson, 2023). + """ + df = pd.read_csv(data) + c = COEFFS.get(equation_id) + + if equation_id == "eq19": + required = ["PGA", "Mw", "Rhyp"] + if not all(col in df.columns for col in required): + raise ValueError(f"ERROR: {required} are not found for Equation 19!") + + ln_pga = np.log(df["PGA"]) + ln_rhyp = np.log(df["Rhyp"]) + df["Intensity_pred"] = (c["d1"] + c["d2"] * ln_pga + + c["d3"] * (ln_pga - np.log(0.1))**2 + + c["d4"] * df["Mw"] + c["d5"] * ln_rhyp) + + elif equation_id == "eq20": + if "PGA" not in df.columns: + raise ValueError("ERROR: PGA is not found for Equation 20!") + + ln_pga = np.log(df["PGA"]) + df["Intensity_pred"] = (c["h1"] + c["h2"] * ln_pga + + c["h3"] * (ln_pga - np.log(0.1))**2 + + c["h4"] * epsilon) + else: + raise ValueError("ERROR: Invalid equation ID! Please select 'eq19' or 'eq20' for GMICE.") + + save_results(df, "gmice_res.csv") + +def igmce(data, equation_id, epsilon=0): + """ + Intensity to ground-motion conversion (Intensity -> PGA) + Equations: 22 or 23 + Units: PGA [g], Rjb [km] + Epsilon (ϵ): It can be estimated using the mean ϵ from the disaggregation; + however, if disaggregation results are not available, then ϵ can be approximated + from the slope of the hazard curve at any particular site (Gallahue and Abrahamson, 2023). + """ + df = pd.read_csv(data) + c = COEFFS.get(equation_id) + + if equation_id == "eq22": + required = ["Intensity", "Mw", "Rjb"] + if not all(col in df.columns for col in required): + raise ValueError(f"ERROR: {required} are not found for Equation 22!") + + ln_rjb = np.log(df["Rjb"]) + ln_pga = (c["f1"] + c["f2"] * df["Intensity"] + + c["f3"] * (df["Intensity"] - 6)**2 + + c["f4"] * df["Mw"] + c["f5"] * ln_rjb) + df["PGA_pred"] = np.exp(ln_pga) + + elif equation_id == "eq23": + if "Intensity" not in df.columns: + raise ValueError("ERROR: Intensity is not found for Equation 23!") + + ln_pga = (c["i1"] + c["i2"] * df["Intensity"] + + c["i3"] * (df["Intensity"] - 6)**2 + + c["i4"] * epsilon) + df["PGA_pred"] = np.exp(ln_pga) + else: + raise ValueError("ERROR: Invalid equation ID! Please select 'eq22' or 'eq23' for IGMCE.") + + save_results(df, "igmce_res.csv") + +def save_results(df, filename): + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + filepath = os.path.join(output_dir, filename) + + df = df.round(5) + df.to_csv(filepath, index=False) + print(f"Done! Saved to '{filepath}'") diff --git a/openquake/cat/tests/converter_test.py b/openquake/cat/tests/converter_test.py new file mode 100644 index 000000000..f75c2f317 --- /dev/null +++ b/openquake/cat/tests/converter_test.py @@ -0,0 +1,97 @@ +import os +import unittest +import pandas as pd +import numpy as np +import tempfile +import shutil + +from openquake.cat.converter import gmice, igmce + +class TestGallahueAbrahamson2023(unittest.TestCase): + + def setUp(self): + """Initialize a temporary directory and create dummy data for testing.""" + self.test_dir = tempfile.mkdtemp() + self.data_path = os.path.join(self.test_dir, 'data.csv') + + # Sample data for testing + self.sample_data = pd.DataFrame({ + "PGA": [0.07, 0.20, 0.46], # g + "Mw": [6.0, 6.5, 7.3], + "Rhyp": [10.0, 50.0, 100.0], # km + "Rjb": [9.0, 45.0, 95.0], # km + "Intensity": [3.9, 5.8, 8.0] + }) + self.sample_data.to_csv(self.data_path, index=False) + + def tearDown(self): + shutil.rmtree(self.test_dir) + if os.path.exists("output"): + shutil.rmtree("output") + + def test_gmice_eq19_success(self): + """Testing if Equation 19 calculates Intensity_pred and saves correctly.""" + gmice(self.data_path, "eq19") + output_path = os.path.join("output", "gmice_res.csv") + + self.assertTrue(os.path.exists(output_path)) + df_res = pd.read_csv(output_path) + self.assertIn("Intensity_pred", df_res.columns) + self.assertEqual(len(df_res), 3) + + def test_gmice_eq20_with_epsilon(self): + """Testing if Equation 20 handles epsilon correctly.""" + gmice(self.data_path, "eq20", epsilon=0) + df_mean = pd.read_csv(os.path.join("output", "gmice_res.csv")) + + # Calculate with epsilon=1 + gmice(self.data_path, "eq20", epsilon=1) + df_eps = pd.read_csv(os.path.join("output", "gmice_res.csv")) + + # Eq 20 has h4 = -0.568. If epsilon=1, intensity should be lower than the mean. + self.assertTrue((df_eps["Intensity_pred"] < df_mean["Intensity_pred"]).all()) + + def test_gmice_eq19_missing_column_error(self): + """Verifies that Equation 19 raises ValueError if required columns are not found.""" + bad_data = self.sample_data.drop(columns=["Rhyp"]) + bad_input = os.path.join(self.test_dir, 'bad_input.csv') + bad_data.to_csv(bad_input, index=False) + + with self.assertRaises(ValueError) as cm: + gmice(bad_input, "eq19") + self.assertIn("Rhyp", str(cm.exception)) + + def test_igmce_eq22_success(self): + """Testing if Equation 22 calculates PGA successfully.""" + igmce(self.data_path, "eq22") + output_path = os.path.join("output", "igmce_res.csv") + + self.assertTrue(os.path.exists(output_path)) + df_res = pd.read_csv(output_path) + self.assertIn("PGA_pred", df_res.columns) + self.assertTrue((df_res["PGA_pred"] > 0).all()) + + def test_igmce_eq23_with_epsilon(self): + """Testing if Equation 23 handles epsilon and changes the PGA_pred result.""" + igmce(self.data_path, "eq23", epsilon=0) + pga_mean = pd.read_csv(os.path.join("output", "igmce_res.csv"))["PGA_pred"] + + igmce(self.data_path, "eq23", epsilon=1) + pga_eps = pd.read_csv(os.path.join("output", "igmce_res.csv"))["PGA_pred"] + + # Eq 23 has i4 = -0.187. Check if the results are different. + self.assertFalse(np.array_equal(pga_mean.values, pga_eps.values)) + + def test_invalid_equation_id(self): + """Testing if an incorrect equation_id triggers the appropriate error message.""" + with self.assertRaises(ValueError) as cm: + gmice(self.data_path, "eq_wrong") + self.assertIn("Invalid equation ID", str(cm.exception)) + + def test_output_directory_creation(self): + """Testing if the script automatically creates the 'output' directory.""" + if os.path.exists("output"): + shutil.rmtree("output") + + gmice(self.data_path, "eq20") + self.assertTrue(os.path.isdir("output")) From 75a35c7bcda7d51d8c15cf732017784ff758487e Mon Sep 17 00:00:00 2001 From: Fahrettin Kuran Date: Thu, 26 Feb 2026 13:41:52 +0100 Subject: [PATCH 2/2] Refactor: implemented OOP structure --- openquake/cat/converter.py | 164 +++++++++++++++----------- openquake/cat/tests/converter_test.py | 115 +++++++++--------- 2 files changed, 158 insertions(+), 121 deletions(-) diff --git a/openquake/cat/converter.py b/openquake/cat/converter.py index 2c74f41a9..e0da71878 100644 --- a/openquake/cat/converter.py +++ b/openquake/cat/converter.py @@ -15,9 +15,9 @@ import numpy as np import pandas as pd -import os +import pathlib + -# Coefficients COEFFS = { "eq19": {"d1": 2.919, "d2": 0.356, "d3": 0.010, "d4": 1.041, "d5": -0.889, "sigma": 0.566}, "eq20": {"h1": 8.622, "h2": 1.230, "h3": 0.056, "h4": -0.568, "sigma": 0.704}, @@ -25,86 +25,118 @@ "eq23": {"i1": -6.558, "i2": 0.754, "i3": -0.072, "i4": -0.187, "sigma": 0.667} } -# Functions -def gmice(data, equation_id, epsilon=0): + +class GallahueAbrahamson2023Model1: """ Ground-motion to intensity conversion (PGA -> Intensity) Equations: 19 or 20 - Units: PGA [g], Rhyp [km] + Units: pga [g], rhypo [km] Epsilon (ϵ): It can be estimated using the mean ϵ from the disaggregation; however, if disaggregation results are not available, then ϵ can be approximated from the slope of the hazard curve at any particular site (Gallahue and Abrahamson, 2023). """ - df = pd.read_csv(data) - c = COEFFS.get(equation_id) - - if equation_id == "eq19": - required = ["PGA", "Mw", "Rhyp"] - if not all(col in df.columns for col in required): - raise ValueError(f"ERROR: {required} are not found for Equation 19!") - - ln_pga = np.log(df["PGA"]) - ln_rhyp = np.log(df["Rhyp"]) - df["Intensity_pred"] = (c["d1"] + c["d2"] * ln_pga + - c["d3"] * (ln_pga - np.log(0.1))**2 + - c["d4"] * df["Mw"] + c["d5"] * ln_rhyp) - - elif equation_id == "eq20": - if "PGA" not in df.columns: - raise ValueError("ERROR: PGA is not found for Equation 20!") + def __init__(self, data: np.ndarray): + self.data = data + self.mint = None + + def get_intensity(self, mode: str = 'eq19', epsilon: float = 0): + if mode == 'eq19': + + required = ['pga', 'mag', 'rhypo'] + self._check_columns(required) + + c = COEFFS["eq19"] + ln_pga = np.log(self.data['pga']) + ln_rhypo = np.log(self.data['rhypo']) + + self.mint = (c["d1"] + c["d2"] * ln_pga + + c["d3"] * (ln_pga - np.log(0.1))**2 + + c["d4"] * self.data['mag'] + c["d5"] * ln_rhypo) + + elif mode == 'eq20': + self._check_columns(['pga']) + + c = COEFFS["eq20"] + ln_pga = np.log(self.data['pga']) + + self.mint = (c["h1"] + c["h2"] * ln_pga + + c["h3"] * (ln_pga - np.log(0.1))**2 + + c["h4"] * epsilon) + else: + raise ValueError("Invalid mode! Choose 'eq19' or 'eq20'.") - ln_pga = np.log(df["PGA"]) - df["Intensity_pred"] = (c["h1"] + c["h2"] * ln_pga + - c["h3"] * (ln_pga - np.log(0.1))**2 + - c["h4"] * epsilon) - else: - raise ValueError("ERROR: Invalid equation ID! Please select 'eq19' or 'eq20' for GMICE.") + return self.mint + + def save(self, filename: str): + if self.mint is None: + raise ValueError("Please run 'get_intensity' before saving.") + _save_results(self.data, self.mint, 'intensity', filename) + + def _check_columns(self, required): + missing = [col for col in required if col not in self.data.dtype.names] + if missing: + raise ValueError(f"Missing required columns in structured array: {missing}") - save_results(df, "gmice_res.csv") -def igmce(data, equation_id, epsilon=0): +class GallahueAbrahamson2023Model2: """ - Intensity to ground-motion conversion (Intensity -> PGA) + Intensity to ground motion conversion (Intensity -> PGA) Equations: 22 or 23 - Units: PGA [g], Rjb [km] + Units: pga [g], rjb [km] Epsilon (ϵ): It can be estimated using the mean ϵ from the disaggregation; however, if disaggregation results are not available, then ϵ can be approximated from the slope of the hazard curve at any particular site (Gallahue and Abrahamson, 2023). """ - df = pd.read_csv(data) - c = COEFFS.get(equation_id) - - if equation_id == "eq22": - required = ["Intensity", "Mw", "Rjb"] - if not all(col in df.columns for col in required): - raise ValueError(f"ERROR: {required} are not found for Equation 22!") - - ln_rjb = np.log(df["Rjb"]) - ln_pga = (c["f1"] + c["f2"] * df["Intensity"] + - c["f3"] * (df["Intensity"] - 6)**2 + - c["f4"] * df["Mw"] + c["f5"] * ln_rjb) - df["PGA_pred"] = np.exp(ln_pga) - - elif equation_id == "eq23": - if "Intensity" not in df.columns: - raise ValueError("ERROR: Intensity is not found for Equation 23!") + def __init__(self, data: np.ndarray): + self.data = data + self.pga = None + + def get_pga(self, mode: str = 'eq22', epsilon: float = 0): + if mode == 'eq22': + required = ['intensity', 'mag', 'rjb'] + self._check_columns(required) - ln_pga = (c["i1"] + c["i2"] * df["Intensity"] + - c["i3"] * (df["Intensity"] - 6)**2 + - c["i4"] * epsilon) - df["PGA_pred"] = np.exp(ln_pga) - else: - raise ValueError("ERROR: Invalid equation ID! Please select 'eq22' or 'eq23' for IGMCE.") - - save_results(df, "igmce_res.csv") - -def save_results(df, filename): - output_dir = "output" - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - filepath = os.path.join(output_dir, filename) + c = COEFFS["eq22"] + ln_rjb = np.log(self.data['rjb']) + + ln_pga = (c["f1"] + c["f2"] * self.data['intensity'] + + c["f3"] * (self.data['intensity'] - 6)**2 + + c["f4"] * self.data['mag'] + c["f5"] * ln_rjb) + self.pga = np.exp(ln_pga) + + elif mode == 'eq23': + self._check_columns(['intensity']) + + c = COEFFS["eq23"] + ln_pga = (c["i1"] + c["i2"] * self.data['intensity'] + + c["i3"] * (self.data['intensity'] - 6)**2 + + c["i4"] * epsilon) + self.pga = np.exp(ln_pga) + else: + raise ValueError("Invalid mode! Choose 'eq22' or 'eq23'.") + + return self.pga + + def save(self, filename: str): + if self.pga is None: + raise ValueError("Please run 'get_pga' before saving.") + _save_results(self.data, self.pga, 'pga', filename) + + def _check_columns(self, required): + missing = [col for col in required if col not in self.data.dtype.names] + if missing: + raise ValueError(f"Missing required columns in structured array: {missing}") + + +def _save_results(data: np.ndarray, result_array: np.ndarray, result_name: str, filename: str): + path = pathlib.Path(filename) + path.parent.mkdir(parents=True, exist_ok=True) + + keys = data.dtype.names + tmp = {k: data[k] for k in keys} + tmp[result_name] = result_array + df = pd.DataFrame(tmp) df = df.round(5) - df.to_csv(filepath, index=False) - print(f"Done! Saved to '{filepath}'") + df.to_csv(path, index=False) + print(f"Done! Saved to '{filename}'") diff --git a/openquake/cat/tests/converter_test.py b/openquake/cat/tests/converter_test.py index f75c2f317..e828d2126 100644 --- a/openquake/cat/tests/converter_test.py +++ b/openquake/cat/tests/converter_test.py @@ -5,93 +5,98 @@ import tempfile import shutil -from openquake.cat.converter import gmice, igmce +from openquake.cat.converter import GallahueAbrahamson2023Model1, GallahueAbrahamson2023Model2 class TestGallahueAbrahamson2023(unittest.TestCase): def setUp(self): - """Initialize a temporary directory and create dummy data for testing.""" + """Initialize temporary directory and create structured array for testing.""" self.test_dir = tempfile.mkdtemp() - self.data_path = os.path.join(self.test_dir, 'data.csv') # Sample data for testing self.sample_data = pd.DataFrame({ - "PGA": [0.07, 0.20, 0.46], # g - "Mw": [6.0, 6.5, 7.3], - "Rhyp": [10.0, 50.0, 100.0], # km - "Rjb": [9.0, 45.0, 95.0], # km - "Intensity": [3.9, 5.8, 8.0] + "pga": [0.07, 0.20, 0.46], # g + "mag": [6.0, 6.5, 7.3], + "rhypo": [10.0, 50.0, 100.0], # km + "rjb": [9.0, 45.0, 95.0], # km + "intensity": [3.9, 5.8, 8.0] }) - self.sample_data.to_csv(self.data_path, index=False) + + self.structured_data = self.sample_data.to_records(index=False) def tearDown(self): shutil.rmtree(self.test_dir) if os.path.exists("output"): shutil.rmtree("output") - def test_gmice_eq19_success(self): - """Testing if Equation 19 calculates Intensity_pred and saves correctly.""" - gmice(self.data_path, "eq19") - output_path = os.path.join("output", "gmice_res.csv") + def test_model1_eq19_success(self): + """Testing if equation 19 calculates intensity and saves correctly.""" + model = GallahueAbrahamson2023Model1(self.structured_data) + model.get_intensity(mode='eq19') + + output_path = os.path.join("output", "test_gmice.csv") + model.save(output_path) self.assertTrue(os.path.exists(output_path)) df_res = pd.read_csv(output_path) - self.assertIn("Intensity_pred", df_res.columns) + + # Checking if the result column 'intensity' exists + self.assertIn("intensity", df_res.columns) self.assertEqual(len(df_res), 3) - def test_gmice_eq20_with_epsilon(self): - """Testing if Equation 20 handles epsilon correctly.""" - gmice(self.data_path, "eq20", epsilon=0) - df_mean = pd.read_csv(os.path.join("output", "gmice_res.csv")) - - # Calculate with epsilon=1 - gmice(self.data_path, "eq20", epsilon=1) - df_eps = pd.read_csv(os.path.join("output", "gmice_res.csv")) + def test_model1_eq20_with_epsilon(self): + """Testing if equation 20 handles epsilon shifts correctly.""" + model = GallahueAbrahamson2023Model1(self.structured_data) + + # Calculation with epsilon=0 and epsilon=1 + mean_results = model.get_intensity(mode='eq20', epsilon=0).copy() + eps_results = model.get_intensity(mode='eq20', epsilon=1) # Eq 20 has h4 = -0.568. If epsilon=1, intensity should be lower than the mean. - self.assertTrue((df_eps["Intensity_pred"] < df_mean["Intensity_pred"]).all()) + self.assertTrue(np.all(eps_results < mean_results)) - def test_gmice_eq19_missing_column_error(self): - """Verifies that Equation 19 raises ValueError if required columns are not found.""" - bad_data = self.sample_data.drop(columns=["Rhyp"]) - bad_input = os.path.join(self.test_dir, 'bad_input.csv') - bad_data.to_csv(bad_input, index=False) + def test_model1_missing_column_error(self): + bad_dt = np.dtype([('pga', 'f8'), ('mag', 'f8')]) + bad_data = np.array([(0.1, 6.0)], dtype=bad_dt) + model = GallahueAbrahamson2023Model1(bad_data) with self.assertRaises(ValueError) as cm: - gmice(bad_input, "eq19") - self.assertIn("Rhyp", str(cm.exception)) + model.get_intensity(mode='eq19') + self.assertIn("Missing required columns", str(cm.exception)) - def test_igmce_eq22_success(self): - """Testing if Equation 22 calculates PGA successfully.""" - igmce(self.data_path, "eq22") - output_path = os.path.join("output", "igmce_res.csv") + def test_model2_eq22_success(self): + """Testing if equation 22 calculates PGA and saves correctly.""" + model = GallahueAbrahamson2023Model2(self.structured_data) + model.get_pga(mode='eq22') + + output_path = os.path.join("output", "test_igmce.csv") + model.save(output_path) self.assertTrue(os.path.exists(output_path)) df_res = pd.read_csv(output_path) - self.assertIn("PGA_pred", df_res.columns) - self.assertTrue((df_res["PGA_pred"] > 0).all()) + self.assertIn("pga", df_res.columns) + self.assertTrue((df_res["pga"] > 0).all()) - def test_igmce_eq23_with_epsilon(self): - """Testing if Equation 23 handles epsilon and changes the PGA_pred result.""" - igmce(self.data_path, "eq23", epsilon=0) - pga_mean = pd.read_csv(os.path.join("output", "igmce_res.csv"))["PGA_pred"] + def test_model2_eq23_with_epsilon(self): + """Verifies equation 23 shifts PGA when epsilon is changed.""" + model = GallahueAbrahamson2023Model2(self.structured_data) - igmce(self.data_path, "eq23", epsilon=1) - pga_eps = pd.read_csv(os.path.join("output", "igmce_res.csv"))["PGA_pred"] + pga_mean = model.get_pga(mode='eq23', epsilon=0).copy() + pga_eps = model.get_pga(mode='eq23', epsilon=1) - # Eq 23 has i4 = -0.187. Check if the results are different. - self.assertFalse(np.array_equal(pga_mean.values, pga_eps.values)) + # Confirming results are different + self.assertFalse(np.array_equal(pga_mean, pga_eps)) - def test_invalid_equation_id(self): - """Testing if an incorrect equation_id triggers the appropriate error message.""" + def test_invalid_mode_error(self): + """Verifies that an invalid mode string raises a ValueError.""" + model = GallahueAbrahamson2023Model1(self.structured_data) with self.assertRaises(ValueError) as cm: - gmice(self.data_path, "eq_wrong") - self.assertIn("Invalid equation ID", str(cm.exception)) + model.get_intensity(mode='wrong_mode') + self.assertIn("Invalid mode", str(cm.exception)) - def test_output_directory_creation(self): - """Testing if the script automatically creates the 'output' directory.""" - if os.path.exists("output"): - shutil.rmtree("output") - - gmice(self.data_path, "eq20") - self.assertTrue(os.path.isdir("output")) + def test_save_before_calculation_error(self): + """Verifies that saving before calculation triggers an error.""" + model = GallahueAbrahamson2023Model2(self.structured_data) + with self.assertRaises(ValueError) as cm: + model.save("failure.csv") + self.assertIn("run 'get_pga' before saving", str(cm.exception))