Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
d2e143c
multisurvey plan
psferguson Jun 15, 2026
8bab5a4
phase 1 of refactor
psferguson Jun 15, 2026
9e6a90f
phase 2
psferguson Jun 15, 2026
e298dcf
phase 3
psferguson Jun 15, 2026
d28d6da
phase 4 and notebook
psferguson Jun 15, 2026
7c965ae
Unify injector into one StreamInjector; always-namespaced columns
psferguson Jun 16, 2026
efd06a6
S/N cut: apply reference-band cut once (option b)
psferguson Jun 16, 2026
b7d52c7
docs update and fix some sphinx warnings
psferguson Jun 16, 2026
0935b2d
remove some unnesseary files
psferguson Jun 16, 2026
79d14c7
testing multi survey injector
MatthieuPE Jun 17, 2026
67999d8
black + isort format
MatthieuPE Jun 17, 2026
1335628
Merge pull request #50 from LSSTDESC/roman_multisurvey_v2
MatthieuPE Jun 17, 2026
5e89f70
Address PR #47 review: unified bands API, release-namespacing, SNR cu…
psferguson Jun 17, 2026
6674845
Docs: sync with release-namespacing; tolerant completeness-column loader
psferguson Jun 17, 2026
1ac8d37
Fix SplineStreamModel instantiation and plot_inject namespaced columns
psferguson Jun 17, 2026
a623b9c
Docs: use des/yr6 (not y6) for the DES release identifier
psferguson Jun 17, 2026
44ab9c8
Docs: correct complete_catalog preserve-vs-overwrite docstring
psferguson Jun 17, 2026
11dcbfe
Removing release from mag true
MatthieuPE Jun 18, 2026
78053b8
move tutorial to the doc + order the surveys doc
MatthieuPE Jun 18, 2026
27f8490
black + isort
MatthieuPE Jun 18, 2026
fd166db
Merge pull request #52 from LSSTDESC/roman_multisurvey_matthieu
MatthieuPE Jun 18, 2026
3566fd0
complete documentation about lsst
MatthieuPE Jun 18, 2026
0153a1e
complete documentation about lsst
MatthieuPE Jun 18, 2026
a21a695
complete documentation about lsst
MatthieuPE Jun 18, 2026
dd1b3ce
fix paths
MatthieuPE Jun 18, 2026
641c1fd
fix typos
MatthieuPE Jun 18, 2026
7a56a3c
Docs: true-mag columns key on survey name (release-independent), not …
psferguson Jun 18, 2026
1158154
small docs updates
psferguson Jun 18, 2026
fcc0905
Consolidate IsochroneModel.sample/sample_multisurvey into one sample(…
psferguson Jun 18, 2026
6a27246
remove plan.md file from the branch
psferguson Jun 18, 2026
8467c9a
isort and black
psferguson Jun 18, 2026
3ce6a12
Add Roman HLWAS survey config, selection-function derivation, and docs
psferguson Jun 11, 2026
ce1c10e
Docs: paper-style derivation writeup with diagnostic figures
psferguson Jun 11, 2026
a04ce3b
Docs: drop tile-margin geometry paragraph
psferguson Jun 11, 2026
9691e64
Docs: drop truth-duplicate handling paragraph
psferguson Jun 11, 2026
b9bf515
Truth-validate errors: drop maglim normalization, truth-based error m…
psferguson Jun 11, 2026
6f176ae
Paper re-read fixes: saturation 17, F106/F129/F158 only, zeropoints +…
psferguson Jun 11, 2026
467ffd3
Adopt official STScI extinction coefficients (Roman-STScI-000825 Tabl…
psferguson Jun 11, 2026
d6591bb
Add true-stars photo-error plot alongside the star-classified one
psferguson Jun 11, 2026
5b58b65
True-star products, truth-anchored depths; notebook -> script; docs r…
psferguson Jun 11, 2026
01afdf6
Add regenerated roman_dc2 figures, config + docs content updates
psferguson Jun 11, 2026
8395f54
Detection cut at true S/N>5 (corrected errors) for all products
psferguson Jun 11, 2026
07f2b0d
Split survey configs: roman/dc2 (populated) + roman/hlwas (placeholder)
psferguson Jun 11, 2026
469d127
Roman DC2: F158 size-envelope star classifier + single-band F158 dete…
psferguson Jun 17, 2026
a8e34a8
Build hygiene: gitignore local notebook generators
psferguson Jun 18, 2026
14639f4
draft 3/4 of hlwas info
psferguson Jun 18, 2026
78ddd6e
mostly ready
psferguson Jun 18, 2026
c640753
roman_hlwas: align to true/obs column convention + index/docs (rebase…
psferguson Jun 18, 2026
9e3e3fe
added other roman bands/maglim maps
psferguson Jun 18, 2026
4b54724
create model at initialisation
MatthieuPE Jun 19, 2026
7b3e8c3
testing isochrone model
MatthieuPE Jun 19, 2026
ede5592
support rng argument for reproducibility
MatthieuPE Jun 19, 2026
3a4a103
black + isort
MatthieuPE Jun 19, 2026
e3e098f
fix rng for injection
MatthieuPE Jun 19, 2026
328491b
testing reproducibility of injection
MatthieuPE Jun 19, 2026
830f2a2
black isort tests
MatthieuPE Jun 19, 2026
a65af9a
Merge pull request #54 from LSSTDESC/roman_hlwas_v2
MatthieuPE Jun 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,22 @@ dmypy.json
data/surveys/*
data/others/*
.DS_Store
# External reference repos (design references only, not part of streamobs)
/survey_systematics_in_LSST_streams/
/rubin_roman_object_classification/
/lsst_dc2_scratch/
/artifacts/
docs/source/roman_multisurvey_plan.md

# Local-only Roman HLWAS derivation notebook (kept un-tracked; script is the source)
notebooks/create_streamobs_files_hlwas.ipynb
scripts/roman/script_to_notebook.py

# Local-only notebook generators (kept un-tracked; the production scripts/products
# are the source of truth, not the rendered notebooks they build)
build_*_nb.py
scripts/**/build_*_nb.py

artifacts/
# Staged data archive for upload (built by bin/build_data_archive.py)
/archive/
161 changes: 161 additions & 0 deletions bin/build_data_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python
"""
Stage streamobs *runtime* data products and zip them for upload.

This is the build-side counterpart to ``bin/download_data.py``: it assembles the
small per-survey product files that ``Survey.load`` needs (maglim maps,
completeness / photo-error tables, the shared ebv map, ...) into a clean tree at
``archive/data/`` and writes ``archive/data.zip`` ready to upload (e.g. Zenodo).
Update ``BASE_DATA_URL`` in ``download_data.py`` to the new record afterward.

Why this exists (the symlink trap): on a dev machine ``data/surveys/roman_dc2``
is a *symlink* to the ~13 GB Roman mock, and the HLWAS tier CSVs are symlinks to
the roman_dc2 copies. Zipping those directly would either capture broken links or
pull in gigabytes of derivation source. This script therefore:
* **dereferences** symlinks (copies the real file contents), and
* **excludes** large derivation source/intermediate files (parquets, raw mock
detections/truth, external skims, provenance), keeping only runtime products.

Usage:
python bin/build_data_archive.py # stage + zip into archive/
python bin/build_data_archive.py --no-zip # stage only
python bin/build_data_archive.py --list # dry-run: show what would be included
"""

import argparse
import fnmatch
import os
import shutil
import zipfile
from pathlib import Path

REPO = Path(__file__).resolve().parent.parent
DATA = REPO / "data"
OUT = REPO / "archive"
STAGE = OUT / "data"
ZIP_PATH = OUT / "data.zip"

# Directories (path relative to data/) skipped entirely — derivation source /
# intermediates, not needed to load or inject a survey at runtime.
EXCLUDE_DIRS = {
"surveys/lsst_dc2", # external LSST DC2 skims (contamination derivation)
"surveys/roman_dc2/det", # raw Roman mock SExtractor detections
"surveys/roman_dc2/truth", # raw Roman mock truth tiles
"surveys/roman_hlwas", # bare placeholder dir: raw HLWAS exptime maps (build
# *inputs*); the runtime products are the derived
# maglim maps in roman_hlwas_{wide,medium,all}/
}
# Directory basename globs skipped wherever they appear.
EXCLUDE_DIR_GLOBS = ["*_tiles", "*_tiles_*", "__pycache__"]

# File basename globs skipped — large / derivation / provenance / non-runtime.
EXCLUDE_FILE_GLOBS = [
"*.parquet", # det_truth, lsst_matched, cosmodc2_size, truth_stars
"andy_*.fits", # reference matched catalogs (4.8 GB)
"cosmoDC2_*", # cosmoDC2 size skims (derivation input)
"dc2_object_*", # LSST DC2 object skims
"dc2_run2.2i_truth_*", # LSST DC2 truth skims
"*_raw.csv", # photo-error provenance (raw, pre-afterburner)
"roman_galaxy_misclass_*.csv", # analysis output (injector does not consume it)
"map_HLWAS-*", # raw HLWAS exposure-time maps (build inputs)
"*_rough_maglim*", # intermediate rough maglim maps
"*.README.md",
".DS_Store",
]
# Safety net: warn + skip any single file larger than this that slipped past the
# globs above (all genuine runtime products are < a few MB).
MAX_FILE_MB = 50.0


def _excluded_dir(rel: str) -> bool:
rel = rel.replace(os.sep, "/")
if rel in EXCLUDE_DIRS:
return True
base = rel.rsplit("/", 1)[-1]
return any(fnmatch.fnmatch(base, g) for g in EXCLUDE_DIR_GLOBS)


def _excluded_file(name: str) -> bool:
return any(fnmatch.fnmatch(name, g) for g in EXCLUDE_FILE_GLOBS)


def collect():
"""Walk data/ (following symlinks), return (kept, skipped) lists of (relpath, bytes)."""
kept, skipped = [], []
for root, dirs, files in os.walk(DATA, followlinks=True):
rel_root = os.path.relpath(root, DATA)
rel_root = "" if rel_root == "." else rel_root
# prune excluded directories in place (don't descend — avoids the huge dirs)
dirs[:] = [
d for d in sorted(dirs) if not _excluded_dir(f"{rel_root}/{d}".lstrip("/"))
]
for fname in sorted(files):
rel = f"{rel_root}/{fname}".lstrip("/")
src = Path(root) / fname
try:
size = src.stat().st_size # stat() follows symlinks
except OSError:
continue
if _excluded_file(fname):
skipped.append((rel, size, "pattern"))
elif size > MAX_FILE_MB * 1024 * 1024:
skipped.append((rel, size, f">{MAX_FILE_MB:.0f}MB"))
else:
kept.append((rel, size))
return kept, skipped


def main():
ap = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
ap.add_argument(
"--list", action="store_true", help="dry run: list included/skipped, no copy"
)
ap.add_argument(
"--no-zip", action="store_true", help="stage into archive/data/ but don't zip"
)
args = ap.parse_args()

kept, skipped = collect()
kept_mb = sum(s for _, s in kept) / 1024 / 1024

print("=" * 78)
print(f"Runtime data products under {DATA} (symlinks dereferenced)")
print("=" * 78)
for rel, size in kept:
print(f" + {rel} ({size/1024:.0f} KB)")
print(f"\n {len(kept)} files, {kept_mb:.1f} MB total")
if skipped:
print(f"\n excluded {len(skipped)} file(s) (derivation/large/provenance):")
for rel, size, why in skipped:
print(f" - {rel} ({size/1024/1024:.0f} MB, {why})")

if args.list:
return 0

# Stage real copies into archive/data/
if STAGE.exists():
shutil.rmtree(STAGE)
for rel, _ in kept:
dst = STAGE / rel
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(DATA / rel, dst) # copy2 follows symlinks -> real file
print(f"\nStaged {len(kept)} files -> {STAGE}")

if args.no_zip:
return 0

# Zip with a top-level data/ entry so download_data.py's extractall(repo_root) works.
with zipfile.ZipFile(ZIP_PATH, "w", zipfile.ZIP_DEFLATED) as zf:
for rel, _ in kept:
zf.write(STAGE / rel, arcname=f"data/{rel}")
print(f"Wrote {ZIP_PATH} ({ZIP_PATH.stat().st_size/1024/1024:.1f} MB)")
print(
"\nNext: upload archive/data.zip and update BASE_DATA_URL in bin/download_data.py"
)
return 0


if __name__ == "__main__":
raise SystemExit(main())
Loading