Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d2e143c
multisurvey plan
psferguson Jun 15, 2026
8bab5a4
phase 1 of refactor
psferguson Jun 15, 2026
9e6a90f
phase 2
psferguson Jun 15, 2026
e298dcf
phase 3
psferguson Jun 15, 2026
d28d6da
phase 4 and notebook
psferguson Jun 15, 2026
7c965ae
Unify injector into one StreamInjector; always-namespaced columns
psferguson Jun 16, 2026
efd06a6
S/N cut: apply reference-band cut once (option b)
psferguson Jun 16, 2026
b7d52c7
docs update and fix some sphinx warnings
psferguson Jun 16, 2026
0935b2d
remove some unnesseary files
psferguson Jun 16, 2026
79d14c7
testing multi survey injector
MatthieuPE Jun 17, 2026
67999d8
black + isort format
MatthieuPE Jun 17, 2026
1335628
Merge pull request #50 from LSSTDESC/roman_multisurvey_v2
MatthieuPE Jun 17, 2026
5e89f70
Address PR #47 review: unified bands API, release-namespacing, SNR cu…
psferguson Jun 17, 2026
6674845
Docs: sync with release-namespacing; tolerant completeness-column loader
psferguson Jun 17, 2026
1ac8d37
Fix SplineStreamModel instantiation and plot_inject namespaced columns
psferguson Jun 17, 2026
a623b9c
Docs: use des/yr6 (not y6) for the DES release identifier
psferguson Jun 17, 2026
44ab9c8
Docs: correct complete_catalog preserve-vs-overwrite docstring
psferguson Jun 17, 2026
11dcbfe
Removing release from mag true
MatthieuPE Jun 18, 2026
78053b8
move tutorial to the doc + order the surveys doc
MatthieuPE Jun 18, 2026
27f8490
black + isort
MatthieuPE Jun 18, 2026
fd166db
Merge pull request #52 from LSSTDESC/roman_multisurvey_matthieu
MatthieuPE Jun 18, 2026
3566fd0
complete documentation about lsst
MatthieuPE Jun 18, 2026
0153a1e
complete documentation about lsst
MatthieuPE Jun 18, 2026
a21a695
complete documentation about lsst
MatthieuPE Jun 18, 2026
dd1b3ce
fix paths
MatthieuPE Jun 18, 2026
641c1fd
fix typos
MatthieuPE Jun 18, 2026
7a56a3c
Docs: true-mag columns key on survey name (release-independent), not …
psferguson Jun 18, 2026
1158154
small docs updates
psferguson Jun 18, 2026
fcc0905
Consolidate IsochroneModel.sample/sample_multisurvey into one sample(…
psferguson Jun 18, 2026
6a27246
remove plan.md file from the branch
psferguson Jun 18, 2026
8467c9a
isort and black
psferguson Jun 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,10 @@ dmypy.json
data/surveys/*
data/others/*
.DS_Store

# External reference repos (design references only, not part of streamobs)
/survey_systematics_in_LSST_streams/
/rubin_roman_object_classification/
/lsst_dc2_scratch/
/artifacts/
docs/source/roman_multisurvey_plan.md
151 changes: 87 additions & 64 deletions bin/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
(e.g., Zenodo, institutional server) and extracts it to the data/ directory.
"""

import argparse
import os
import sys
import argparse
import urllib.request
import tempfile
import urllib.error
from pathlib import Path
import urllib.request
import zipfile
import tempfile

from pathlib import Path

# =============================================================================
# CONFIGURATION - Update this URL when data location changes
Expand All @@ -30,7 +29,7 @@
DATA_ARCHIVE_URL = BASE_DATA_URL + DATA_ARCHIVE_NAME

# Expected size (approximate, for user information)
ARCHIVE_SIZE_MB = 30 #Mb
ARCHIVE_SIZE_MB = 30 # Mb

# =============================================================================
# =============================================================================
Expand All @@ -41,16 +40,18 @@ def download_file(url, output_path, description="file"):
print(f"Downloading {description}...")
print(f" From: {url}")
print(f" To: {output_path}")

def progress_hook(count, block_size, total_size):
"""Show download progress."""
if total_size > 0:
percent = int(count * block_size * 100 / total_size)
mb_downloaded = count * block_size / (1024 * 1024)
mb_total = total_size / (1024 * 1024)
sys.stdout.write(f"\r Progress: {percent}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)")
sys.stdout.write(
f"\r Progress: {percent}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)"
)
sys.stdout.flush()

try:
urllib.request.urlretrieve(url, output_path, reporthook=progress_hook)
print("\n ✓ Download complete!")
Expand All @@ -65,20 +66,20 @@ def extract_zip(zip_path, extract_to, description="archive"):
print(f"\nExtracting {description}...")
print(f" From: {zip_path}")
print(f" To: {extract_to}")

try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
with zipfile.ZipFile(zip_path, "r") as zip_ref:
# Get list of files
file_list = zip_ref.namelist()
print(f" Found {len(file_list)} files in archive")

# Extract all files
zip_ref.extractall(extract_to)
print(" ✓ Extraction complete!")

# Clean up unwanted files after extraction
cleanup_unwanted_files(extract_to)

return True
except Exception as e:
print(f" ✗ Extraction failed: {e}")
Expand All @@ -89,16 +90,16 @@ def cleanup_unwanted_files(base_path):
"""Remove unwanted files like .DS_Store, .backup, etc. after extraction."""
base_path = Path(base_path)
removed_count = 0

# Patterns to remove
unwanted_patterns = [
'**/.DS_Store', # macOS system files
'**/__MACOSX', # macOS resource forks
'**/*.backup', # Backup files
'**/*.bak', # Backup files
'**/*~', # Temporary files
"**/.DS_Store", # macOS system files
"**/__MACOSX", # macOS resource forks
"**/*.backup", # Backup files
"**/*.bak", # Backup files
"**/*~", # Temporary files
]

for pattern in unwanted_patterns:
for item in base_path.glob(pattern):
try:
Expand All @@ -107,47 +108,48 @@ def cleanup_unwanted_files(base_path):
removed_count += 1
elif item.is_dir():
import shutil

shutil.rmtree(item)
removed_count += 1
except Exception:
pass # Ignore errors during cleanup

if removed_count > 0:
print(f" Cleaned up {removed_count} unwanted file(s)")



def list_data_contents(data_dir):
"""List what's in the data directory after download."""
print("\n📂 Data directory contents:")
print("=" * 80)

if not data_dir.exists():
print(" (empty - data directory doesn't exist yet)")
return

# List subdirectories
subdirs = [d for d in data_dir.iterdir() if d.is_dir() and not d.name.startswith('.')]
subdirs = [
d for d in data_dir.iterdir() if d.is_dir() and not d.name.startswith(".")
]
if not subdirs:
print(" (empty - no subdirectories found)")
return

for subdir in sorted(subdirs):
print(f"\n {subdir.name}/")
# Count files in subdirectory
try:
files = list(subdir.rglob('*'))
files = [f for f in files if f.is_file() and not f.name.startswith('.')]
files = list(subdir.rglob("*"))
files = [f for f in files if f.is_file() and not f.name.startswith(".")]
total_size = sum(f.stat().st_size for f in files)
print(f" Files: {len(files)} ({total_size / (1024*1024):.1f} MB)")
except Exception:
print(f" (unable to read directory)")



def main():
parser = argparse.ArgumentParser(
description='Download and extract large data files for streamobs',
description="Download and extract large data files for streamobs",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
Expand All @@ -162,60 +164,81 @@ def main():

# Use custom data URL
python download_data.py --url https://my-server.edu/data.zip
"""
""",
)

parser.add_argument(
"--list",
action="store_true",
help="List current data directory contents without downloading",
)
parser.add_argument(
"--url",
type=str,
default=DATA_ARCHIVE_URL,
help=f"URL for data archive (default: configured Zenodo URL)",
)
parser.add_argument(
"--data-dir",
type=str,
default=None,
help="Data directory (default: streamobs/data/)",
)

parser.add_argument('--list', action='store_true',
help='List current data directory contents without downloading')
parser.add_argument('--url', type=str, default=DATA_ARCHIVE_URL,
help=f'URL for data archive (default: configured Zenodo URL)')
parser.add_argument('--data-dir', type=str, default=None,
help='Data directory (default: streamobs/data/)')
parser.add_argument('--force', action='store_true',
help='Force re-download even if data exists')
parser.add_argument('--keep-archive', action='store_true',
help='Keep the downloaded zip file after extraction')

parser.add_argument(
"--force", action="store_true", help="Force re-download even if data exists"
)
parser.add_argument(
"--keep-archive",
action="store_true",
help="Keep the downloaded zip file after extraction",
)

args = parser.parse_args()

# Determine data directory
if args.data_dir:
data_dir = Path(args.data_dir)
else:
# Assume script is in streamobs/bin/
script_dir = Path(__file__).parent
data_dir = script_dir.parent / 'data'
data_dir = script_dir.parent / "data"

# List contents and exit if requested
if args.list:
list_data_contents(data_dir)
return 0

# Check if URL is configured
if 'XXXXX' in args.url:
if "XXXXX" in args.url:
print("=" * 80)
print("ERROR: Data URL is not yet configured!")
print("=" * 80)
print("\nThe DATA_ARCHIVE_URL in this script is still set to a placeholder.")
print("Please update it with the actual data hosting location.\n")
print("Steps:")
print(" 1. Upload your data/ directory as a zip file to Zenodo or another host")
print(
" 1. Upload your data/ directory as a zip file to Zenodo or another host"
)
print(" 2. Edit this script and update BASE_DATA_URL and DATA_ARCHIVE_NAME")
print(" 3. Or use --url to specify a custom URL\n")
return 1

# Check if data already exists
if data_dir.exists() and not args.force:
subdirs = [d for d in data_dir.iterdir() if d.is_dir() and not d.name.startswith('.')]
subdirs = [
d for d in data_dir.iterdir() if d.is_dir() and not d.name.startswith(".")
]
if subdirs:
print("=" * 80)
print("Data directory already exists with content!")
print("=" * 80)
list_data_contents(data_dir)
print("\n" + "=" * 80)
print("Use --force to re-download and overwrite, or --list to view contents")
print(
"Use --force to re-download and overwrite, or --list to view contents"
)
return 0

# Download and extract
print("=" * 80)
print("Stream Simulation Data Download")
Expand All @@ -224,36 +247,36 @@ def main():
print(f"Destination: {data_dir}")
print(f"Archive size: ~{ARCHIVE_SIZE_MB} MB")
print("\n" + "=" * 80)

# Create data directory
data_dir.mkdir(parents=True, exist_ok=True)

# Create temporary file for download
with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp_file:
with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_file:
tmp_path = Path(tmp_file.name)

try:
# Download
if not download_file(args.url, tmp_path, "data archive"):
print("\n✗ Download failed!")
return 1

# Extract
if not extract_zip(tmp_path, data_dir.parent, "data archive"):
print("\n✗ Extraction failed!")
return 1

# Show what was extracted
list_data_contents(data_dir)

print("\n" + "=" * 80)
print("✓ Data download and extraction complete!")
print("=" * 80)
print("\nYou can now run stream simulations with this data.")
print("The data has been extracted to:", data_dir)

return 0

finally:
# Clean up temporary file unless requested to keep it
if tmp_path.exists():
Expand All @@ -265,5 +288,5 @@ def main():
tmp_path.unlink()


if __name__ == '__main__':
if __name__ == "__main__":
sys.exit(main())
8 changes: 4 additions & 4 deletions bin/example_generate_pal5.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ def inverse_transfom_sample(vals, pdf, size):
cdf /= cdf[-1]
fn = scipy.interpolate.interp1d(cdf, list(range(0, len(cdf))))
x_new = np.random.uniform(size=np.rint(size).astype(int))
x_new[
x_new < 1e-3
] = 1e-3 # running into error that values close to 0 are flagged as being
# below interp range
x_new[x_new < 1e-3] = (
1e-3 # running into error that values close to 0 are flagged as being
)
# below interp range
index = np.rint(fn(x_new)).astype(int)
return vals[index]

Expand Down
Loading