Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install flake8 pytest 'sxs==2025.0.9' romspline pycbc
pip install flake8 pytest 'sxs==2025.0.9' romspline pycbc requests bs4
#- name: Lint with flake8
# run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
295 changes: 276 additions & 19 deletions PyART/catalogs/gra.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,23 @@
import os
import h5py
from ..waveform import Waveform
import glob as glob
import json
import logging
import re
import time

# libraries for downloading
try:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
except ImportError as e:
raise ImportError(
"To use the GRA catalog, please install the required "
"dependencies: requests, beautifulsoup4, urllib3"
) from e


class Waveform_GRA(Waveform):
Expand All @@ -17,18 +32,27 @@ class Waveform_GRA(Waveform):

def __init__(
self,
path,
ID="0001",
path="../dat/GRA",
ellmax=8,
ext="ext",
res="128",
r_ext=None,
cut_N=None,
cut_U=None,
mtdt_path=None,
rescale=False,
nu_rescale=False,
modes=[(2, 2)],
download=False,
downloads=["hlm", "metadata"],
):

super().__init__()
# Normalize ID to a 4-digit zero-padded string for consistency
if isinstance(ID, int):
ID = f"{ID:04d}"
elif isinstance(ID, str) and ID.isdigit() and len(ID) < 4:
ID = ID.zfill(4)
self.ID = ID
self.path = path
self.cut_N = cut_N
self.cut_U = cut_U
Expand All @@ -37,19 +61,81 @@ def __init__(
self.extrap = ext
self.domain = "Time"
self.r_ext = r_ext
self.rescale = rescale
self.nu_rescale = nu_rescale
self.res = res
# comment out the following for the moment
self.load_metadata(mtdt_path)

if download:
self.download_simulation(ID=ID, path=path, downloads=downloads, res=res)

self.load_metadata()
self.load_hlm(extrap=ext, ellmax=ellmax, r_ext=r_ext)
pass

def load_metadata(self, path):
def download_simulation(
self,
ID="0001",
path=None,
downloads=["hlm", "metadata"],
res=None,
):
"""
Load the metadata, if path is None assume
that they are in the same dir as the .h5 files
Automatically download and unpack a GRAthena++
simulation from scholarsphere.
"""

if path is None:
path = self.path

session = make_session()

logging.info("Fetching catalog...")
id_map = get_id_to_item_url(session)

if ID not in id_map:
raise RuntimeError(f"ID {ID} not found in catalog")

item_url = id_map[ID]

soup = get_item_soup(session, item_url)

if "hlm" in downloads:
logging.info("Downloading hlm data...")
if res is None:
res = "128"
self.res = res
logging.warning("No resolution specified, defaulting to res=128")

filename, tar_url = find_tar_for_resolution(soup, res)
logging.info(f"Found .tar: {filename}")
logging.info(f"Downloading from: {tar_url}")
download_safe(session, tar_url, filename)
# untar, execute via os.system for the moment
extract_path = os.path.join(path, f"GRA_BHBH_{ID}")
os.makedirs(extract_path, exist_ok=True)
logging.info(f"Extracting to: {extract_path}")
os.system(f"tar -xf {filename} -C {extract_path}")
os.remove(filename)

if "metadata" in downloads:
logging.info("Downloading metadata...")
filename, meta_url = find_metadata_file(soup)
logging.info(f"Found metadata file: {filename}")
logging.info(f"Downloading from: {meta_url}")
download_safe(session, meta_url, filename)
# move to correct location
extract_path = os.path.join(path, f"GRA_BHBH_{ID}", "metadata.json")
os.makedirs(os.path.dirname(extract_path), exist_ok=True)
os.rename(filename, extract_path)

# Be polite to the server
time.sleep(3)

def load_metadata(self):
"""
Load the metadata from the json file and store it in self.metadata
"""
path = os.path.join(self.path, f"GRA_BHBH_{self.ID}", "metadata.json")
ometa = json.load(open(path, "r"))

m1 = float(ometa["initial-mass1"])
Expand Down Expand Up @@ -124,11 +210,23 @@ def load_hlm(self, extrap="ext", ellmax=None, load_m0=False, r_ext=None):
r_ext = "100.00"

if extrap == "ext":
h5_file = os.path.join(self.path, "rh_Asymptotic_GeometricUnits.h5")
h5_file = os.path.join(
self.path,
f"GRA_BHBH_{self.ID}",
self.res,
"rh_Asymptotic_GeometricUnits.h5",
)
elif extrap == "CCE":
h5_file = os.path.join(self.path, "rh_CCE_GeometricUnits.h5")
h5_file = os.path.join(
self.path, f"GRA_BHBH_{self.ID}", self.res, "rh_CCE_GeometricUnits.h5"
)
elif extrap == "finite":
h5_file = os.path.join(self.path, "rh_FiniteRadii_GeometricUnits.h5")
h5_file = os.path.join(
self.path,
f"GRA_BHBH_{self.ID}",
self.res,
"rh_FiniteRadii_GeometricUnits.h5",
)
else:
raise ValueError('extrap should be either "ext", "CCE" or "finite"')

Expand Down Expand Up @@ -171,7 +269,7 @@ def load_hlm(self, extrap="ext", ellmax=None, load_m0=False, r_ext=None):
mode = "Y_l" + str(l) + "_m" + str(m) + ".dat"
hlm = nr[r_ext][mode]
h = hlm[:, 1] + 1j * hlm[:, 2]
if self.rescale:
if self.nu_rescale:
h /= self.metadata["nu"]
# amp and phase
Alm = abs(h)[self.cut_N :]
Expand Down Expand Up @@ -222,25 +320,41 @@ def get_indices_dict(self):

def load_psi4lm(
self,
path=None,
fname=None,
ellmax=None,
r_ext=None,
extrap="ext",
load_m0=False,
):
"""
Load the data from the h5 file, but for psi4 instead of h.
"""
if ellmax == None:
ellmax = self.ellmax

if r_ext == None:
r_ext = "100.00"

if extrap == "ext":
h5_file = os.path.join(self.path, "rPsi4_Asymptotic_GeometricUnits.h5")
h5_file = os.path.join(
self.path,
f"GRA_BHBH_{self.ID}",
self.res,
"rPsi4_Asymptotic_GeometricUnits.h5",
)
elif extrap == "CCE":
h5_file = os.path.join(self.path, "rPsi4_CCE_GeometricUnits.h5")
h5_file = os.path.join(
self.path,
f"GRA_BHBH_{self.ID}",
self.res,
"rPsi4_CCE_GeometricUnits.h5",
)
elif extrap == "finite":
h5_file = os.path.join(self.path, "rPsi4_FiniteRadii_GeometricUnits.h5")
h5_file = os.path.join(
self.path,
f"GRA_BHBH_{self.ID}",
self.res,
"rPsi4_FiniteRadii_GeometricUnits.h5",
)
else:
raise ValueError('extrap should be either "ext", "CCE" or "finite"')

Expand Down Expand Up @@ -282,7 +396,7 @@ def load_psi4lm(
mode = "Y_l" + str(l) + "_m" + str(m) + ".dat"
psi4lm = nr[r_ext][mode]
psi4 = psi4lm[:, 1] + 1j * psi4lm[:, 2]
if self.rescale:
if self.nu_rescale:
psi4 /= self.metadata["nu"]
Alm = abs(psi4)[self.cut_N :]
plm = -np.unwrap(np.angle(psi4))[self.cut_N :]
Expand All @@ -297,3 +411,146 @@ def load_psi4lm(

self._psi4lm = dict_psi4lm
pass


# ----------------------------------------------------------------------
# Functions needed to download data from GRAthena++
# ----------------------------------------------------------------------

CATALOG_URL = (
"https://scholarsphere.psu.edu/resources/610744ac-80b9-4689-8119-320dfd2e2b9a"
)
BASE_URL = "https://scholarsphere.psu.edu"


def make_session():
session = requests.Session()

retries = Retry(
total=5,
backoff_factor=1.5,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["GET"],
)

adapter = HTTPAdapter(max_retries=retries)
session.mount("https://", adapter)
session.mount("http://", adapter)

session.headers.update(
{
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/121.0 Safari/537.36"
),
"Accept": "*/*",
"Accept-Encoding": "identity", # avoids chunked/gzip resets
"Connection": "keep-alive",
"Referer": "https://scholarsphere.psu.edu/",
}
)

return session


def get_id_to_item_url(session):
r = session.get(CATALOG_URL, timeout=30)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")

id_map = {}

for a in soup.find_all("a", href=True):
text = a.get_text(strip=True)
m = re.search(r"GRAthena:BHBH:(\d{4})", text)
if m:
id_map[m.group(1)] = urljoin(BASE_URL, a["href"])

if not id_map:
raise RuntimeError("No GRAthena IDs found on catalog page")

return id_map


def get_item_soup(session, item_url):
r = session.get(item_url, timeout=30)
r.raise_for_status()
return BeautifulSoup(r.text, "html.parser")


def find_tar_for_resolution(item_soup, resolution):
resolution = resolution.lower()

for a in item_soup.find_all("a", href=True):
href = a["href"].lower()
text = a.get_text(strip=True).lower()
if (
"/downloads/" in href
and text.endswith(".tar")
and resolution in (href + text)
):
filename = os.path.basename(href)
return filename, urljoin(BASE_URL, a["href"])

raise RuntimeError(f"No .tar found for resolution '{resolution}'")


def find_metadata_file(item_soup):
for a in item_soup.find_all("a", href=True):
href = a["href"].lower()
text = a.get_text(strip=True).lower()
if "/downloads/" in href and text.endswith(".json"):
filename = os.path.basename(href)
return filename, urljoin(BASE_URL, a["href"])

raise RuntimeError(f"No metadata.json file found")


def download_safe(session, url, filename, chunk_size=1024 * 1024):
tmp_file = filename + ".part"
downloaded = 0

if os.path.exists(tmp_file):
downloaded = os.path.getsize(tmp_file)
logging.info(f"Resuming download from byte {downloaded}")

headers = {}
if downloaded > 0:
headers["Range"] = f"bytes={downloaded}-"

with session.get(url, stream=True, headers=headers, timeout=60) as r:
r.raise_for_status()

# Decide whether we can safely resume or must restart from scratch.
resume_supported = False
if downloaded > 0:
if r.status_code == 206:
content_range = r.headers.get("Content-Range", "")
# Expect the content range to start at our downloaded offset.
expected = f"bytes {downloaded}-"
if content_range.startswith(expected) or expected in content_range:
resume_supported = True
else:
logging.info(
"Server did not honor Range header (status %s); "
"restarting full download",
r.status_code,
)

if not resume_supported:
# If we had a partial file, overwrite it rather than append, to avoid
# corrupting the file when the server sends the full content.
if downloaded > 0:
logging.info("Discarding existing partial download and restarting")
downloaded = 0
mode = "wb"
else:
mode = "ab"
with open(tmp_file, mode) as f:
for chunk in r.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)

os.rename(tmp_file, filename)
logging.info(f"Download completed")
Loading