From 61c0e575abad0621f27a28294425d14da65a19c5 Mon Sep 17 00:00:00 2001
From: RoxGamba <rgamba@berkeley.edu>
Date: Tue, 27 Jan 2026 13:52:37 -0800
Subject: [PATCH 1/9] Add function to download GRA data from scholarsphere

---
 PyART/catalogs/gra.py | 238 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 224 insertions(+), 14 deletions(-)

diff --git a/PyART/catalogs/gra.py b/PyART/catalogs/gra.py
index ab037e1..cab984a 100644
--- a/PyART/catalogs/gra.py
+++ b/PyART/catalogs/gra.py
@@ -2,8 +2,23 @@
 import os
 import h5py
 from ..waveform import Waveform
-import glob as glob
 import json
+import logging
+import re
+import time
+
+# librares for downloading
+try:
+    import requests
+    from bs4 import BeautifulSoup
+    from urllib.parse import urljoin
+    from requests.adapters import HTTPAdapter
+    from urllib3.util.retry import Retry
+except ImportError as e:
+    raise ImportError(
+        "To use the GRA catalog, please install the required "
+        "dependencies: requests, beautifulsoup4, urllib3"
+    ) from e
 
 
 class Waveform_GRA(Waveform):
@@ -17,18 +32,22 @@ class Waveform_GRA(Waveform):
 
     def __init__(
         self,
-        path,
+        ID="0001",
+        path="../dat/GRA",
         ellmax=8,
         ext="ext",
+        res="128",
         r_ext=None,
         cut_N=None,
         cut_U=None,
-        mtdt_path=None,
-        rescale=False,
+        nu_rescale=False,
         modes=[(2, 2)],
+        download=False,
+        downloads=["hlm", "metadata"],
     ):
 
         super().__init__()
+        self.ID = ID
         self.path = path
         self.cut_N = cut_N
         self.cut_U = cut_U
@@ -37,19 +56,79 @@ def __init__(
         self.extrap = ext
         self.domain = "Time"
         self.r_ext = r_ext
-        self.rescale = rescale
+        self.nu_rescale = nu_rescale
+        self.res = res
         # comment out the following for the moment
-        self.load_metadata(mtdt_path)
+
+        if download:
+            self.download_simulation(ID=ID, path=path, downloads=downloads, res=res)
+
+        self.load_metadata()
         self.load_hlm(extrap=ext, ellmax=ellmax, r_ext=r_ext)
         pass
 
-    def load_metadata(self, path):
+    def download_simulation(
+        self,
+        ID="0001",
+        path=None,
+        downloads=["hlm", "metadata"],
+        res=None,
+    ):
+        """
+        Automatically download and unpack a GRAthena++
+        simulation from scholarsphere.
+        """
+
+        session = make_session()
+
+        logging.info("Fetching catalog...")
+        id_map = get_id_to_item_url(session)
+
+        if ID not in id_map:
+            raise RuntimeError(f"ID {ID} not found in catalog")
+
+        item_url = id_map[ID]
+
+        soup = get_item_soup(session, item_url)
+
+        if "hlm" in downloads:
+            logging.info("Downloading hlm data...")
+            if res is None:
+                res = "128"
+                self.res = res
+                logging.warning("No resolution specified, defaulting to res=128")
+
+            filename, tar_url = find_tar_for_resolution(soup, res)
+            logging.info(f"Found .tar: {filename}")
+            logging.info(f"Downloading from: {tar_url}")
+            download_safe(session, tar_url, filename)
+            # untar, execute via os.system for the moment
+            extract_path = os.path.join(path, f"GRA_BHBH_{ID}")
+            os.makedirs(extract_path, exist_ok=True)
+            logging.info(f"Extracting to: {extract_path}")
+            os.system(f"tar -xf {filename} -C {extract_path}")
+            os.remove(filename)
+
+        if "metadata" in downloads:
+            logging.info("Downloading metadata...")
+            filename, meta_url = find_metadata_file(soup)
+            logging.info(f"Found metadata file: {filename}")
+            logging.info(f"Downloading from: {meta_url}")
+            download_safe(session, meta_url, filename)
+            # move to correct location
+            extract_path = os.path.join(path, f"GRA_BHBH_{ID}", "metadata.json")
+            os.makedirs(os.path.dirname(extract_path), exist_ok=True)
+            os.rename(filename, extract_path)
+
+        # Be polite to the server
+        time.sleep(3)
+
+    def load_metadata(self):
         """
         Load the metadata, if path is None assume
         that they are in the same dir as the .h5 files
         """
-        if path is None:
-            path = self.path
+        path = os.path.join(self.path, f"GRA_BHBH_{self.ID}", self.res, "metadata.json")
         ometa = json.load(open(path, "r"))
 
         m1 = float(ometa["initial-mass1"])
@@ -124,11 +203,23 @@ def load_hlm(self, extrap="ext", ellmax=None, load_m0=False, r_ext=None):
             r_ext = "100.00"
 
         if extrap == "ext":
-            h5_file = os.path.join(self.path, "rh_Asymptotic_GeometricUnits.h5")
+            h5_file = os.path.join(
+                self.path,
+                f"GRA_BHBH_{self.ID}",
+                self.res,
+                "rh_Asymptotic_GeometricUnits.h5",
+            )
         elif extrap == "CCE":
-            h5_file = os.path.join(self.path, "rh_CCE_GeometricUnits.h5")
+            h5_file = os.path.join(
+                self.path, f"GRA_BHBH_{self.ID}", self.res, "rh_CCE_GeometricUnits.h5"
+            )
         elif extrap == "finite":
-            h5_file = os.path.join(self.path, "rh_FiniteRadii_GeometricUnits.h5")
+            h5_file = os.path.join(
+                self.path,
+                f"GRA_BHBH_{self.ID}",
+                self.res,
+                "rh_FiniteRadii_GeometricUnits.h5",
+            )
         else:
             raise ValueError('extrap should be either "ext", "CCE" or "finite"')
 
@@ -171,7 +262,7 @@ def load_hlm(self, extrap="ext", ellmax=None, load_m0=False, r_ext=None):
             mode = "Y_l" + str(l) + "_m" + str(m) + ".dat"
             hlm = nr[r_ext][mode]
             h = hlm[:, 1] + 1j * hlm[:, 2]
-            if self.rescale:
+            if self.nu_rescale:
                 h /= self.metadata["nu"]
             # amp and phase
             Alm = abs(h)[self.cut_N :]
@@ -282,7 +373,7 @@ def load_psi4lm(
             mode = "Y_l" + str(l) + "_m" + str(m) + ".dat"
             psi4lm = nr[r_ext][mode]
             psi4 = psi4lm[:, 1] + 1j * psi4lm[:, 2]
-            if self.rescale:
+            if self.nu_rescale:
                 psi4 /= self.metadata["nu"]
             Alm = abs(psi4)[self.cut_N :]
             plm = -np.unwrap(np.angle(psi4))[self.cut_N :]
@@ -297,3 +388,122 @@ def load_psi4lm(
 
         self._psi4lm = dict_psi4lm
         pass
+
+
+# ----------------------------------------------------------------------
+# Functions needed to download data from GRAthena++
+# ----------------------------------------------------------------------
+
+CATALOG_URL = (
+    "https://scholarsphere.psu.edu/resources/610744ac-80b9-4689-8119-320dfd2e2b9a"
+)
+BASE_URL = "https://scholarsphere.psu.edu"
+
+
+def make_session():
+    session = requests.Session()
+
+    retries = Retry(
+        total=5,
+        backoff_factor=1.5,
+        status_forcelist=[429, 500, 502, 503, 504],
+        allowed_methods=["GET"],
+    )
+
+    adapter = HTTPAdapter(max_retries=retries)
+    session.mount("https://", adapter)
+    session.mount("http://", adapter)
+
+    session.headers.update(
+        {
+            "User-Agent": (
+                "Mozilla/5.0 (X11; Linux x86_64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/121.0 Safari/537.36"
+            ),
+            "Accept": "*/*",
+            "Accept-Encoding": "identity",  # avoids chunked/gzip resets
+            "Connection": "keep-alive",
+            "Referer": "https://scholarsphere.psu.edu/",
+        }
+    )
+
+    return session
+
+
+def get_id_to_item_url(session):
+    r = session.get(CATALOG_URL, timeout=30)
+    r.raise_for_status()
+    soup = BeautifulSoup(r.text, "html.parser")
+
+    id_map = {}
+
+    for a in soup.find_all("a", href=True):
+        text = a.get_text(strip=True)
+        m = re.search(r"GRAthena:BHBH:(\d{4})", text)
+        if m:
+            id_map[m.group(1)] = urljoin(BASE_URL, a["href"])
+
+    if not id_map:
+        raise RuntimeError("No GRAthena IDs found on catalog page")
+
+    return id_map
+
+
+def get_item_soup(session, item_url):
+    r = session.get(item_url, timeout=30)
+    r.raise_for_status()
+    return BeautifulSoup(r.text, "html.parser")
+
+
+def find_tar_for_resolution(item_soup, resolution):
+    resolution = resolution.lower()
+
+    for a in item_soup.find_all("a", href=True):
+        href = a["href"].lower()
+        text = a.get_text(strip=True).lower()
+        if (
+            "/downloads/" in href
+            and text.endswith(".tar")
+            and resolution in (href + text)
+        ):
+            filename = os.path.basename(href)
+            return filename, urljoin(BASE_URL, a["href"])
+
+    raise RuntimeError(f"No .tar found for resolution '{resolution}'")
+
+
+def find_metadata_file(item_soup):
+    for a in item_soup.find_all("a", href=True):
+        href = a["href"].lower()
+        text = a.get_text(strip=True).lower()
+        if "/downloads/" in href and text.endswith(".json"):
+            filename = os.path.basename(href)
+            return filename, urljoin(BASE_URL, a["href"])
+
+    raise RuntimeError(f"No metadata.json file found")
+
+
+def download_safe(session, url, filename, chunk_size=1024 * 1024):
+    tmp_file = filename + ".part"
+    downloaded = 0
+
+    if os.path.exists(tmp_file):
+        downloaded = os.path.getsize(tmp_file)
+        logging.info(f"Resuming download from byte {downloaded}")
+
+    headers = {}
+    if downloaded > 0:
+        headers["Range"] = f"bytes={downloaded}-"
+
+    with session.get(url, stream=True, headers=headers, timeout=60) as r:
+        r.raise_for_status()
+
+        mode = "ab" if downloaded > 0 else "wb"
+        with open(tmp_file, mode) as f:
+            for chunk in r.iter_content(chunk_size=chunk_size):
+                if chunk:
+                    f.write(chunk)
+
+    os.rename(tmp_file, filename)
+    logging.info(f"Download completed")

From bd3c49e9b93c5ba6a1a4d4d283216ddb03142b88 Mon Sep 17 00:00:00 2001
From: RoxGamba <rgamba@berkeley.edu>
Date: Tue, 27 Jan 2026 13:53:55 -0800
Subject: [PATCH 2/9] update libraries for testing gra

---
 .github/workflows/tests.yml |  2 +-
 tests/test_gra.py           | 42 +++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_gra.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a7f79d1..4a5e803 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -27,7 +27,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        pip install flake8 pytest 'sxs==2025.0.9' romspline pycbc
+        pip install flake8 pytest 'sxs==2025.0.9' romspline pycbc requests bs4 urllib
     #- name: Lint with flake8
     #  run: |
         # stop the build if there are Python syntax errors or undefined names
diff --git a/tests/test_gra.py b/tests/test_gra.py
new file mode 100644
index 0000000..727462c
--- /dev/null
+++ b/tests/test_gra.py
@@ -0,0 +1,42 @@
+"""
+Tests for the SXS catalog.
+"""
+
+from PyART.catalogs import gra
+import os
+
+mode_keys = ["A", "p", "real", "imag", "z"]
+
+
+def test_gra():
+    """
+    Test the SXS download function.
+    """
+    wf = gra.Waveform_GRA(
+        ID="0001",
+        path="./",
+        download=True,
+        res="128",
+        downloads=["hlm", "metadata"],
+    )
+    # check attributes
+    assert wf.ID == "0001"
+
+    # check that the files were downloaded
+    assert os.path.exists("GRA_BHBH_0001")
+    assert os.path.exists(f"GRA_BHBH_0001/metadata.json")
+    assert os.path.exists(f"GRA_BHBH_0001/128/rh_CCE_GeometricUnits.h5")
+    # check that the modes loaded make sense
+    for mode in wf.hlm.keys():
+
+        # check ell, emm
+        assert mode[0] >= abs(mode[1])
+        # check keys
+        for key in mode_keys:
+            assert key in wf.hlm[mode].keys()
+        # check length
+        assert len(wf.hlm[mode]["A"]) == len(wf.u)
+
+
+if __name__ == "__main__":
+    test_gra()

From 6b578a086f923e1fd6ee3c7d06029dcddcf06e73 Mon Sep 17 00:00:00 2001
From: RoxGamba <rgamba@berkeley.edu>
Date: Tue, 27 Jan 2026 14:00:00 -0800
Subject: [PATCH 3/9] update libraries

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4a5e803..8efdd57 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -27,7 +27,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        pip install flake8 pytest 'sxs==2025.0.9' romspline pycbc requests bs4 urllib
+        pip install flake8 pytest 'sxs==2025.0.9' romspline pycbc requests bs4
     #- name: Lint with flake8
     #  run: |
         # stop the build if there are Python syntax errors or undefined names

From b48f31d333ebc53c4fd8a1003a1e5b9b2fe933d2 Mon Sep 17 00:00:00 2001
From: RoxGamba <rgamba@berkeley.edu>
Date: Tue, 27 Jan 2026 14:05:36 -0800
Subject: [PATCH 4/9] Fix: metadata path when loadng

---
 PyART/catalogs/gra.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyART/catalogs/gra.py b/PyART/catalogs/gra.py
index cab984a..844ec3e 100644
--- a/PyART/catalogs/gra.py
+++ b/PyART/catalogs/gra.py
@@ -128,7 +128,7 @@ def load_metadata(self):
         Load the metadata, if path is None assume
         that they are in the same dir as the .h5 files
         """
-        path = os.path.join(self.path, f"GRA_BHBH_{self.ID}", self.res, "metadata.json")
+        path = os.path.join(self.path, f"GRA_BHBH_{self.ID}", "metadata.json")
         ometa = json.load(open(path, "r"))
 
         m1 = float(ometa["initial-mass1"])

From 46cdca29d4b7770f7ef0bb5b8194c537e7f45128 Mon Sep 17 00:00:00 2001
From: Rossella Gamba <72128273+RoxGamba@users.noreply.github.com>
Date: Fri, 6 Feb 2026 11:09:51 -0800
Subject: [PATCH 5/9] Update tests/test_gra.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 tests/test_gra.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_gra.py b/tests/test_gra.py
index 727462c..33c0d73 100644
--- a/tests/test_gra.py
+++ b/tests/test_gra.py
@@ -18,6 +18,7 @@ def test_gra():
         download=True,
         res="128",
         downloads=["hlm", "metadata"],
+        ext="CCE",
     )
     # check attributes
     assert wf.ID == "0001"

From ed37dc4252a026a10273c3bf465a0d6e9c44debe Mon Sep 17 00:00:00 2001
From: Rossella Gamba <72128273+RoxGamba@users.noreply.github.com>
Date: Fri, 6 Feb 2026 11:10:10 -0800
Subject: [PATCH 6/9] Update PyART/catalogs/gra.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 PyART/catalogs/gra.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PyART/catalogs/gra.py b/PyART/catalogs/gra.py
index 844ec3e..b01ae93 100644
--- a/PyART/catalogs/gra.py
+++ b/PyART/catalogs/gra.py
@@ -7,7 +7,7 @@
 import re
 import time
 
-# librares for downloading
+# libraries for downloading
 try:
     import requests
     from bs4 import BeautifulSoup

From e431d195449f666809e691a089107c063022c95c Mon Sep 17 00:00:00 2001
From: Rossella Gamba <72128273+RoxGamba@users.noreply.github.com>
Date: Fri, 6 Feb 2026 11:12:03 -0800
Subject: [PATCH 7/9] Update PyART/catalogs/gra.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 PyART/catalogs/gra.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/PyART/catalogs/gra.py b/PyART/catalogs/gra.py
index b01ae93..7fb0cd9 100644
--- a/PyART/catalogs/gra.py
+++ b/PyART/catalogs/gra.py
@@ -47,6 +47,11 @@ def __init__(
     ):
 
         super().__init__()
+        # Normalize ID to a 4-digit zero-padded string for consistency
+        if isinstance(ID, int):
+            ID = f"{ID:04d}"
+        elif isinstance(ID, str) and ID.isdigit() and len(ID) < 4:
+            ID = ID.zfill(4)
         self.ID = ID
         self.path = path
         self.cut_N = cut_N

From 4e57132c1900321cbc3cb2dd739bbbe0ee11a1d2 Mon Sep 17 00:00:00 2001
From: RoxGamba <rgamba@berkeley.edu>
Date: Fri, 6 Feb 2026 11:27:00 -0800
Subject: [PATCH 8/9] Some updates after review

---
 PyART/catalogs/gra.py | 32 +++++++++++++++++++++++++-------
 tests/test_gra.py     |  4 ++--
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/PyART/catalogs/gra.py b/PyART/catalogs/gra.py
index 7fb0cd9..b341ab0 100644
--- a/PyART/catalogs/gra.py
+++ b/PyART/catalogs/gra.py
@@ -84,6 +84,9 @@ def download_simulation(
         simulation from scholarsphere.
         """
 
+        if path is None:
+            path = self.path
+
         session = make_session()
 
         logging.info("Fetching catalog...")
@@ -130,8 +133,7 @@ def download_simulation(
 
     def load_metadata(self):
         """
-        Load the metadata, if path is None assume
-        that they are in the same dir as the .h5 files
+        Load the metadata from the json file and store it in self.metadata
         """
         path = os.path.join(self.path, f"GRA_BHBH_{self.ID}", "metadata.json")
         ometa = json.load(open(path, "r"))
@@ -318,13 +320,14 @@ def get_indices_dict(self):
 
     def load_psi4lm(
         self,
-        path=None,
-        fname=None,
         ellmax=None,
         r_ext=None,
         extrap="ext",
         load_m0=False,
     ):
+        """
+        Load the data from the h5 file, but for psi4 instead of h.
+        """
         if ellmax == None:
             ellmax = self.ellmax
 
@@ -332,11 +335,26 @@ def load_psi4lm(
             r_ext = "100.00"
 
         if extrap == "ext":
-            h5_file = os.path.join(self.path, "rPsi4_Asymptotic_GeometricUnits.h5")
+            h5_file = os.path.join(
+                self.path,
+                f"GRA_BHBH_{self.ID}",
+                self.res,
+                "rPsi4_Asymptotic_GeometricUnits.h5",
+            )
         elif extrap == "CCE":
-            h5_file = os.path.join(self.path, "rPsi4_CCE_GeometricUnits.h5")
+            h5_file = os.path.join(
+                self.path,
+                f"GRA_BHBH_{self.ID}",
+                self.res,
+                "rPsi4_CCE_GeometricUnits.h5",
+            )
         elif extrap == "finite":
-            h5_file = os.path.join(self.path, "rPsi4_FiniteRadii_GeometricUnits.h5")
+            h5_file = os.path.join(
+                self.path,
+                f"GRA_BHBH_{self.ID}",
+                self.res,
+                "rPsi4_FiniteRadii_GeometricUnits.h5",
+            )
         else:
             raise ValueError('extrap should be either "ext", "CCE" or "finite"')
 
diff --git a/tests/test_gra.py b/tests/test_gra.py
index 33c0d73..60363aa 100644
--- a/tests/test_gra.py
+++ b/tests/test_gra.py
@@ -1,5 +1,5 @@
 """
-Tests for the SXS catalog.
+Tests for the GRA catalog.
 """
 
 from PyART.catalogs import gra
@@ -10,7 +10,7 @@
 
 def test_gra():
     """
-    Test the SXS download function.
+    Test the GRA download function.
     """
     wf = gra.Waveform_GRA(
         ID="0001",

From 6c66270aca9953d5c1d37217685ebd5f1a17083e Mon Sep 17 00:00:00 2001
From: Rossella Gamba <72128273+RoxGamba@users.noreply.github.com>
Date: Fri, 6 Feb 2026 11:32:14 -0800
Subject: [PATCH 9/9] Update PyART/catalogs/gra.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 PyART/catalogs/gra.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/PyART/catalogs/gra.py b/PyART/catalogs/gra.py
index b341ab0..a7f1f13 100644
--- a/PyART/catalogs/gra.py
+++ b/PyART/catalogs/gra.py
@@ -522,7 +522,31 @@ def download_safe(session, url, filename, chunk_size=1024 * 1024):
     with session.get(url, stream=True, headers=headers, timeout=60) as r:
         r.raise_for_status()
 
-        mode = "ab" if downloaded > 0 else "wb"
+        # Decide whether we can safely resume or must restart from scratch.
+        resume_supported = False
+        if downloaded > 0:
+            if r.status_code == 206:
+                content_range = r.headers.get("Content-Range", "")
+                # Expect the content range to start at our downloaded offset.
+                expected = f"bytes {downloaded}-"
+                if content_range.startswith(expected) or expected in content_range:
+                    resume_supported = True
+            else:
+                logging.info(
+                    "Server did not honor Range header (status %s); "
+                    "restarting full download",
+                    r.status_code,
+                )
+
+        if not resume_supported:
+            # If we had a partial file, overwrite it rather than append, to avoid
+            # corrupting the file when the server sends the full content.
+            if downloaded > 0:
+                logging.info("Discarding existing partial download and restarting")
+                downloaded = 0
+            mode = "wb"
+        else:
+            mode = "ab"
         with open(tmp_file, mode) as f:
             for chunk in r.iter_content(chunk_size=chunk_size):
                 if chunk: