From c56bfacb65e22192342fed46b947de654639748f Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Fri, 19 Jun 2026 17:26:20 +0200 Subject: [PATCH 1/8] Add background subpackage: CMD histogram resources and injection pipeline - Add gal_misclassification field and method to Survey - Extend set_completeness() with selection='missclassified' - Thread source_type kwarg through StreamInjector.inject() and detect_flag() - New streamobs/background/ subpackage: BackgroundCatalogInjector, BackgroundResourceBuilder, BackgroundStorage, LightBackgroundGenerator, Background wrapper - Add load_catalog() utility to utils.py - Add tests/test_background.py (31 stubs, @pytest.mark.background) - Add user and developer documentation for background generation - Gitignore data/background/ and .claude/ Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 6 + docs/source/background.md | 50 ++++ docs/source/build_background_resources.md | 70 +++++ docs/source/index.rst | 9 +- pytest.ini | 1 + streamobs/__init__.py | 8 + streamobs/background/__init__.py | 39 +++ streamobs/background/background.py | 193 +++++++++++++ streamobs/background/catalog_injector.py | 93 ++++++ streamobs/background/generator.py | 278 ++++++++++++++++++ streamobs/background/resource_builder.py | 257 +++++++++++++++++ streamobs/background/storage.py | 145 ++++++++++ streamobs/observed.py | 48 +++- streamobs/surveys.py | 92 +++++- streamobs/utils.py | 36 ++- tests/test_background.py | 328 ++++++++++++++++++++++ 16 files changed, 1633 insertions(+), 20 deletions(-) create mode 100644 docs/source/background.md create mode 100644 docs/source/build_background_resources.md create mode 100644 streamobs/background/__init__.py create mode 100644 streamobs/background/background.py create mode 100644 streamobs/background/catalog_injector.py create mode 100644 streamobs/background/generator.py create mode 100644 streamobs/background/resource_builder.py create mode 100644 streamobs/background/storage.py create mode 100644 tests/test_background.py diff --git a/.gitignore b/.gitignore index d7d0c8b..e1c7407 100644 --- a/.gitignore +++ b/.gitignore @@ -128,9 +128,15 @@ dmypy.json # Pyre type checker .pyre/ +# Claude Code settings (local only) +.claude/ + # Surveys files data/surveys/* data/others/* + +# Background resources (precomputed CMD grids — built locally, not tracked) +data/background/ .DS_Store # External reference repos (design references only, not part of streamobs) /survey_systematics_in_LSST_streams/ diff --git a/docs/source/background.md b/docs/source/background.md new file mode 100644 index 0000000..200a10b --- /dev/null +++ b/docs/source/background.md @@ -0,0 +1,50 @@ +# Generating background + +`streamobs` provides two complementary ways to generate a background catalog +(stars and/or galaxies) for a stream realization. + +| Method | Speed | Requirements | +|--------|-------|--------------| +| `'light'` | Fast | Precomputed CMD resource files | +| `'full'` | Slow | True background catalogs | + +## Quick start — light method + +```python +from streamobs.surveys import Survey +from streamobs.background import Background + +survey = Survey.load('lsst', release='yr5') +bg = Background(survey, source_type='both', method='light') + +catalog = bg.generate( + phi1_limits=(-20, 20), + phi2_limits=(-2, 2), + gc_frame=frame, # gala GreatCircleICRSFrame +) +``` + +## Stars only or galaxies only + +```python +bg_stars = Background(survey, source_type='stars', method='light') +bg_gals = Background(survey, source_type='galaxies', method='light') +``` + +## Full injection method + +Requires a DataFrame of true background objects for each population: + +```python +bg = Background( + survey, + method='full', + source_type='both', + catalog_stars=df_true_stars, + catalog_galaxies=df_true_galaxies, +) +catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2)) +``` + +See [build_background_resources.md](build_background_resources.md) to learn +how to build or rebuild the resource files used by the light method. diff --git a/docs/source/build_background_resources.md b/docs/source/build_background_resources.md new file mode 100644 index 0000000..5965723 --- /dev/null +++ b/docs/source/build_background_resources.md @@ -0,0 +1,70 @@ +# Building background resources (developer guide) + +The **light background method** reads precomputed color–magnitude diagram +(CMD) histograms from parquet files stored in `data/background/`. These +files are not tracked by git and must be built locally (or provided by the +package maintainers). + +## What the resources are + +For each combination of ``(source_type, bands)``, one parquet file is +stored with a long-format table: + +``` +maglim_r | maglim_g | color_center | mag_center | count | n_ref | area_ref_deg2 +``` + +Each row represents one cell of a 2-D CMD histogram at a specific magnitude +limit pair. The generator looks up the nearest ``(maglim_r, maglim_g)`` +entry at runtime and samples from it. + +## How to build resources + +### 1. Prepare input catalogs + +You need a DataFrame of **true** (pre-observation) positions and magnitudes +for stars and/or galaxies. These catalogs are not part of `streamobs` and +must be supplied by the developer. + +```python +import pandas as pd +df_stars = pd.read_parquet('/path/to/true_stars.parquet') +df_galaxies = pd.read_parquet('/path/to/true_galaxies.parquet') +``` + +### 2. Run the builder + +```python +from streamobs.background import BackgroundResourceBuilder, BackgroundStorage + +builder = BackgroundResourceBuilder(survey_name='lsst', release='yr5') +builder.build( + catalog_stars=df_stars, + catalog_galaxies=df_galaxies, + bands=('g', 'r'), + maglim_ref_values=[25.0, 25.5, 26.0, 26.5, 27.0], + delta_range=(-1.0, 1.0), + delta_step=0.1, + source_type='both', +) +``` + +### 3. Save to disk + +```python +storage = BackgroundStorage(survey_name='lsst', release='yr5') +builder.save(storage) +``` + +Files are written to `data/background/lsst_yr5/` — one parquet per +``(source_type, bands)`` combination, e.g. `stars_gr.parquet`. + +## File naming convention + +`BackgroundStorage.get_path(source_type, bands)` returns the full path: + +``` +data/background/{survey_name}_{release}/{source_type}_{bands_str}.parquet +``` + +Example: `data/background/lsst_yr5/galaxies_gr.parquet`. diff --git a/docs/source/index.rst b/docs/source/index.rst index 6241600..774a7ce 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -103,10 +103,17 @@ Documentation Contents surveys/Roman +.. toctree:: + :maxdepth: 1 + :caption: Background generation + + background + build_background_resources + .. toctree:: :maxdepth: 2 :caption: For developers - + modifying_streamobs new_survey update_data diff --git a/pytest.ini b/pytest.ini index e3a2752..303c1aa 100644 --- a/pytest.ini +++ b/pytest.ini @@ -27,6 +27,7 @@ markers = observed: marks tests for injection utilities model: marks tests for model functionality utils: marks tests for utility functions + background: marks tests for the background subpackage # Minimum Python version minversion = 3.8 \ No newline at end of file diff --git a/streamobs/__init__.py b/streamobs/__init__.py index fac239d..0421dae 100644 --- a/streamobs/__init__.py +++ b/streamobs/__init__.py @@ -1,2 +1,10 @@ #!/usr/bin/env python """Nothing to see here.""" + +from .background import ( + Background, + BackgroundCatalogInjector, + BackgroundResourceBuilder, + BackgroundStorage, + LightBackgroundGenerator, +) diff --git a/streamobs/background/__init__.py b/streamobs/background/__init__.py new file mode 100644 index 0000000..2cbf63f --- /dev/null +++ b/streamobs/background/__init__.py @@ -0,0 +1,39 @@ +""" +Background generation for stellar stream analysis. + +This subpackage provides two complementary pipelines: + +- **Full injection** (:class:`BackgroundCatalogInjector`): runs a known + catalog of true stars or galaxies through the complete + :class:`~streamobs.observed.StreamInjector` pipeline. +- **Light generation** (:class:`LightBackgroundGenerator`): samples from + precomputed binned color–magnitude distributions stored by + :class:`BackgroundStorage`, giving fast per-pixel background realizations + without re-running the full injection. + +The top-level :class:`Background` class wraps both modes and lets users +generate stars-only, galaxies-only, or combined backgrounds. + +Typical usage — light method with default bundled resources:: + + from streamobs.surveys import Survey + from streamobs.background import Background + + survey = Survey.load('lsst', release='yr5') + bg = Background(survey, source_type='both', method='light') + catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2), gc_frame=frame) +""" + +from .background import Background +from .catalog_injector import BackgroundCatalogInjector +from .generator import LightBackgroundGenerator +from .resource_builder import BackgroundResourceBuilder +from .storage import BackgroundStorage + +__all__ = [ + "Background", + "BackgroundCatalogInjector", + "BackgroundResourceBuilder", + "BackgroundStorage", + "LightBackgroundGenerator", +] diff --git a/streamobs/background/background.py b/streamobs/background/background.py new file mode 100644 index 0000000..359e682 --- /dev/null +++ b/streamobs/background/background.py @@ -0,0 +1,193 @@ +""" +Top-level background generation wrapper. +""" + +import os + +from ..surveys import Survey +from .catalog_injector import BackgroundCatalogInjector +from .generator import LightBackgroundGenerator +from .storage import BackgroundStorage + + +class Background: + """ + High-level wrapper for background generation. + + Dispatches to either the full injection method + (:class:`BackgroundCatalogInjector`) or the fast light method + (:class:`LightBackgroundGenerator`) depending on ``method``. Stars and + galaxies are treated independently so users can build custom combinations. + + When ``method='light'`` and ``storage=None``, the class falls back to the + bundled package-level resource files (if present). + + Parameters + ---------- + survey : Survey + Survey instance defining the observation conditions. + source_type : str, optional + Which background components to generate: ``'stars'``, ``'galaxies'``, + or ``'both'``. Default ``'both'``. + method : str, optional + Generation method: ``'light'`` (fast, uses precomputed CMD grids) or + ``'full'`` (runs the complete injection pipeline). Default ``'light'``. + storage : BackgroundStorage, optional + Resource storage for the light method. When ``None``, package-bundled + resources are used (via :meth:`_default_storage`). + catalog_stars : pd.DataFrame, optional + True stellar catalog. Required when ``method='full'`` and + ``source_type`` includes stars. + catalog_galaxies : pd.DataFrame, optional + True galaxy catalog. Required when ``method='full'`` and + ``source_type`` includes galaxies. + **kwargs + Forwarded to the underlying injector or generator. + + Examples + -------- + Light method with bundled defaults:: + + bg = Background(survey, source_type='both', method='light') + catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2), gc_frame=frame) + + Full injection with user-supplied catalogs:: + + bg = Background( + survey, + method='full', + source_type='stars', + catalog_stars=df_stars, + ) + catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2)) + """ + + def __init__( + self, + survey: Survey, + source_type: str = "both", + method: str = "light", + storage: BackgroundStorage = None, + catalog_stars=None, + catalog_galaxies=None, + **kwargs, + ): + if source_type not in ("stars", "galaxies", "both"): + raise ValueError( + f"source_type must be 'stars', 'galaxies', or 'both', got '{source_type}'." + ) + if method not in ("light", "full"): + raise ValueError(f"method must be 'light' or 'full', got '{method}'.") + + self.survey = survey + self.source_type = source_type + self.method = method + self.catalog_stars = catalog_stars + self.catalog_galaxies = catalog_galaxies + self._kwargs = kwargs + + if storage is None and method == "light": + storage = self._default_storage(survey) + self.storage = storage + + def generate( + self, + phi1_limits, + phi2_limits, + gc_frame=None, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate a background catalog for the given stream sky region. + + Parameters + ---------- + phi1_limits : tuple of float + ``(phi1_min, phi1_max)`` in degrees. + phi2_limits : tuple of float + ``(phi2_min, phi2_max)`` in degrees. + gc_frame : gala.coordinates.GreatCircleICRSFrame, optional + Great-circle frame. Required for the light method and when the + full method needs coordinate conversion. + **kwargs + Forwarded to the underlying generator or injector. + + Returns + ------- + pd.DataFrame + Background catalog. Column names follow the survey namespace + convention for the full method; for the light method columns are + ``phi1``, ``phi2``, ``color``, ``mag``, ``source_type``. + """ + if self.method == "full": + return self._generate_full(phi1_limits, phi2_limits, gc_frame, **kwargs) + return self._generate_light(phi1_limits, phi2_limits, gc_frame, **kwargs) + + def _generate_full( + self, + phi1_limits, + phi2_limits, + gc_frame, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate background via full catalog injection. + + For each active source type (controlled by :attr:`source_type`), uses + :class:`BackgroundCatalogInjector` to inject the corresponding true + catalog into the survey. The results are concatenated. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame or None + **kwargs + + Returns + ------- + pd.DataFrame + """ + ... + + def _generate_light( + self, + phi1_limits, + phi2_limits, + gc_frame, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate background via the fast light method. + + Delegates to :class:`LightBackgroundGenerator` using + :attr:`storage`. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame + **kwargs + + Returns + ------- + pd.DataFrame + """ + ... + + @staticmethod + def _default_storage(survey: Survey) -> BackgroundStorage: + """ + Return a :class:`BackgroundStorage` pointing to bundled package resources. + + Parameters + ---------- + survey : Survey + + Returns + ------- + BackgroundStorage + """ + return BackgroundStorage( + survey_name=survey.name, + release=survey.release, + ) diff --git a/streamobs/background/catalog_injector.py b/streamobs/background/catalog_injector.py new file mode 100644 index 0000000..505c232 --- /dev/null +++ b/streamobs/background/catalog_injector.py @@ -0,0 +1,93 @@ +""" +Full-injection background pipeline using known catalogs. +""" + +import copy + +import numpy as np + +from ..observed import StreamInjector +from ..surveys import Survey +from ..utils import load_catalog + + +class BackgroundCatalogInjector: + """ + Thin wrapper around :class:`~streamobs.observed.StreamInjector` for + background catalog injection. + + Provides :meth:`inject_stars` and :meth:`inject_galaxies` convenience + methods that each call :meth:`~streamobs.observed.StreamInjector.inject` + with the correct ``source_type``. Survey preparation — deep-copying, + zeroing dust, setting uniform magnitude limits — is handled by the single + :meth:`_prepare_survey` method and is shared between both injection modes. + + Parameters + ---------- + survey : Survey + Survey instance to inject into. + **kwargs + Forwarded to :meth:`_prepare_survey` at construction time (e.g. + ``no_dust``, ``uniform_maglim``). + + Examples + -------- + Inject a stars catalog with a uniform magnitude limit: + + >>> injector = BackgroundCatalogInjector(survey, no_dust=True, + ... uniform_maglim={'g': 26.0, 'r': 26.5}) + >>> obs_stars = injector.inject_stars(catalog_df, bands=['g', 'r']) + """ + + def __init__(self, survey: Survey, **kwargs): + self._survey = survey + self._kwargs = kwargs + + def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": + """ + Inject a catalog of stars through the full survey pipeline. + + Parameters + ---------- + catalog : pd.DataFrame or str + True stellar catalog. Accepts a DataFrame or a path to parquet/CSV + (loaded via :func:`~streamobs.utils.load_catalog`). Must contain + sky coordinates (``ra``/``dec`` or ``phi1``/``phi2``) and true + magnitude columns. + bands : list of str, optional + Photometric bands to inject. Defaults to ``['r', 'g']``. + **kwargs + Forwarded to :meth:`~streamobs.observed.StreamInjector.inject`. + + Returns + ------- + pd.DataFrame + Observed catalog with survey-namespaced magnitude and flag columns. + """ + ... + + def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": + """ + Inject a catalog of galaxies through the full survey pipeline. + + Uses :meth:`~streamobs.surveys.Survey.get_gal_misclassification` for + the detection flag (probability a galaxy passes stellar selection). + Requires :attr:`~streamobs.surveys.Survey.gal_misclassification` to be + loaded on the survey. + + Parameters + ---------- + catalog : pd.DataFrame or str + True galaxy catalog. Accepts a DataFrame or a path to parquet/CSV. + Must contain sky coordinates and true magnitude columns. + bands : list of str, optional + Photometric bands to inject. Defaults to ``['r', 'g']``. + **kwargs + Forwarded to :meth:`~streamobs.observed.StreamInjector.inject`. + + Returns + ------- + pd.DataFrame + Observed catalog with survey-namespaced magnitude and flag columns. + """ + ... diff --git a/streamobs/background/generator.py b/streamobs/background/generator.py new file mode 100644 index 0000000..f06bd05 --- /dev/null +++ b/streamobs/background/generator.py @@ -0,0 +1,278 @@ +""" +Fast per-pixel background generation from precomputed CMD grids. +""" + +import numpy as np + +from ..surveys import Survey +from .storage import BackgroundStorage + + +class LightBackgroundGenerator: + """ + Generate background catalogs rapidly from precomputed CMD histogram grids. + + For each HEALPix pixel in the requested sky region the generator: + + 1. Retrieves the local magnitude limits from the survey's HEALPix maps. + 2. Selects the nearest precomputed CMD histogram in ``(maglim_ref, delta)`` + space. + 3. Scales the reference object count to the pixel area via a Poisson draw. + 4. Samples ``(color, mag)`` pairs from the 2-D histogram. + 5. Samples sky positions uniformly within the pixel. + + Stars and galaxies are handled independently so that users can build + custom mixed background models. + + Parameters + ---------- + storage : BackgroundStorage + Storage backend holding the precomputed CMD grids. + survey : Survey + Survey with real (non-uniform) HEALPix magnitude limit maps used to + look up per-pixel depth. + bands : tuple of str, optional + ``(band1, band2)`` — must match the bands used when building resources. + Default ``('g', 'r')``. + **kwargs + Reserved for future use. + + Examples + -------- + >>> gen = LightBackgroundGenerator(storage, survey, bands=('g', 'r')) + >>> catalog = gen.generate( + ... phi1_limits=(-20, 20), + ... phi2_limits=(-2, 2), + ... gc_frame=frame, + ... nside=4096, + ... source_type='both', + ... ) + """ + + def __init__( + self, + storage: BackgroundStorage, + survey: Survey, + bands=("g", "r"), + **kwargs, + ): + self.storage = storage + self.survey = survey + self.bands = bands + # Lazy cache: {source_type: grid_dict} + self._resources: dict = {} + + def generate( + self, + phi1_limits, + phi2_limits, + gc_frame, + nside=4096, + source_type="both", + **kwargs, + ) -> "pd.DataFrame": + """ + Generate a background catalog for the given sky region. + + Parameters + ---------- + phi1_limits : tuple of float + ``(phi1_min, phi1_max)`` in degrees. + phi2_limits : tuple of float + ``(phi2_min, phi2_max)`` in degrees. + gc_frame : gala.coordinates.GreatCircleICRSFrame + Great-circle frame mapping ``(phi1, phi2)`` to ``(RA, Dec)``. + nside : int, optional + HEALPix resolution. Default ``4096``. + source_type : str, optional + ``'stars'``, ``'galaxies'``, or ``'both'``. Default ``'both'``. + **kwargs + rng : numpy.random.Generator, optional + seed : int, optional + + Returns + ------- + pd.DataFrame + Background catalog with columns ``phi1``, ``phi2``, ``color``, + ``mag`` (reference band), and ``source_type``. + """ + ... + + def _generate_one_type( + self, + phi1_limits, + phi2_limits, + gc_frame, + source_type: str, + nside: int, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate objects of a single source type pixel by pixel. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame + source_type : str + ``'stars'`` or ``'galaxies'``. + nside : int + **kwargs + + Returns + ------- + pd.DataFrame + """ + ... + + def _get_footprint_pixels( + self, + phi1_limits, + phi2_limits, + gc_frame, + nside: int, + ) -> np.ndarray: + """ + Return HEALPix pixel indices that cover the given ``(phi1, phi2)`` box. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame + nside : int + + Returns + ------- + np.ndarray of int + Pixel indices. + """ + ... + + def _get_maglim_pixels( + self, + pixels: np.ndarray, + band: str, + nside: int, + ) -> np.ndarray: + """ + Retrieve magnitude limits for a set of pixels from the survey map. + + Parameters + ---------- + pixels : np.ndarray of int + band : str + nside : int + Resolution of ``pixels``; the survey map may have a different nside + and will be re-pixelized if needed. + + Returns + ------- + np.ndarray of float + """ + ... + + def _select_cmd_distribution( + self, + maglim_ref: float, + delta: float, + source_type: str, + ) -> dict: + """ + Nearest-neighbour lookup in the precomputed CMD grid. + + Parameters + ---------- + maglim_ref : float + Reference band magnitude limit for this pixel. + delta : float + ``maglim_band1 - maglim_ref`` for this pixel. + source_type : str + ``'stars'`` or ``'galaxies'``. + + Returns + ------- + dict + ``{'cmd_hist', 'color_edges', 'mag_edges', 'n_ref', + 'area_ref_deg2'}`` for the nearest grid point. + """ + ... + + def _scale_n_objects( + self, + n_ref: int, + area_ref_deg2: float, + pixel_area_deg2: float, + ) -> int: + """ + Draw the expected number of objects for a pixel via Poisson scaling. + + Parameters + ---------- + n_ref : int + Number of objects in the reference simulation. + area_ref_deg2 : float + Sky area of the reference simulation in deg². + pixel_area_deg2 : float + Sky area of the target pixel in deg². + + Returns + ------- + int + Poisson draw around ``n_ref * pixel_area_deg2 / area_ref_deg2``. + """ + ... + + def _sample_from_cmd( + self, + cmd_hist: np.ndarray, + color_edges: np.ndarray, + mag_edges: np.ndarray, + n_objects: int, + rng: np.random.Generator, + ) -> "pd.DataFrame": + """ + Draw ``n_objects`` (color, mag) pairs from the 2-D CMD histogram. + + Parameters + ---------- + cmd_hist : np.ndarray + 2-D array of counts with shape ``(n_color, n_mag)``. + color_edges : np.ndarray + mag_edges : np.ndarray + n_objects : int + rng : numpy.random.Generator + + Returns + ------- + pd.DataFrame + Columns ``color`` and ``mag``. + """ + ... + + def _sample_positions( + self, + n_objects: int, + pixel: int, + nside: int, + gc_frame, + rng: np.random.Generator, + ) -> "pd.DataFrame": + """ + Sample ``(phi1, phi2)`` positions uniformly within a HEALPix pixel. + + Parameters + ---------- + n_objects : int + pixel : int + HEALPix pixel index. + nside : int + gc_frame : gala.coordinates.GreatCircleICRSFrame + Used to convert ``(RA, Dec)`` corners back to ``(phi1, phi2)``. + rng : numpy.random.Generator + + Returns + ------- + pd.DataFrame + Columns ``phi1`` and ``phi2``. + """ + ... diff --git a/streamobs/background/resource_builder.py b/streamobs/background/resource_builder.py new file mode 100644 index 0000000..df02717 --- /dev/null +++ b/streamobs/background/resource_builder.py @@ -0,0 +1,257 @@ +""" +Builder for precomputed background color–magnitude diagram (CMD) resources. +""" + +import numpy as np + +from ..surveys import Survey, SurveyFactory +from ..utils import load_catalog +from .catalog_injector import BackgroundCatalogInjector +from .storage import BackgroundStorage + + +class BackgroundResourceBuilder: + """ + Build precomputed CMD histogram grids for fast background generation. + + Drives :class:`BackgroundCatalogInjector` on uniform surveys (no dust, + constant magnitude limits) at a grid of ``(maglim_ref, delta)`` pairs, + where ``delta = maglim_g - maglim_ref``. The resulting 2-D color–magnitude + histograms are stored via :class:`BackgroundStorage` and later consumed by + :class:`~streamobs.background.generator.LightBackgroundGenerator`. + + Parameters + ---------- + survey_name : str, optional + Survey identifier (e.g. ``'lsst'``). + release : str, optional + Survey release string (e.g. ``'yr5'``). + **kwargs + Forwarded to :meth:`~streamobs.surveys.SurveyFactory.create_survey`. + + Examples + -------- + >>> builder = BackgroundResourceBuilder('lsst', release='yr5') + >>> builder.build( + ... catalog_stars=df_stars, + ... catalog_galaxies=df_gals, + ... bands=('g', 'r'), + ... maglim_ref_values=[25.5, 26.0, 26.5], + ... delta_range=(-1.0, 1.0), + ... delta_step=0.1, + ... ) + >>> storage = BackgroundStorage(survey_name='lsst', release='yr5') + >>> builder.save(storage) + """ + + def __init__(self, survey_name="lsst", release=None, **kwargs): + self.survey_name = survey_name + self.release = release + self._kwargs = kwargs + # Nested dict: {source_type: {(maglim_ref, delta): config_dict}} + self.resources: dict = {} + + def build( + self, + catalog_stars=None, + catalog_galaxies=None, + bands=("g", "r"), + maglim_ref_values=None, + delta_range=(-1.0, 1.0), + delta_step=0.1, + n_bins_color=50, + n_bins_mag=50, + source_type="both", + **kwargs, + ): + """ + Build CMD histograms for all ``(maglim_ref, delta)`` configurations. + + For each combination of source type and magnitude limit pair, injects + the catalog into a uniform (no-dust, constant-maglim) survey and + computes a 2-D histogram of ``(color, mag_ref_band)``. + + Parameters + ---------- + catalog_stars : pd.DataFrame or str, optional + True stellar catalog. Required when ``source_type`` is + ``'stars'`` or ``'both'``. + catalog_galaxies : pd.DataFrame or str, optional + True galaxy catalog. Required when ``source_type`` is + ``'galaxies'`` or ``'both'``. + bands : tuple of str, optional + Two band names ``(band1, band2)`` where color = band1 - band2 and + ``band2`` is the reference magnitude axis. Default ``('g', 'r')``. + maglim_ref_values : list of float, optional + Reference magnitude limits to sweep (for the second band in + ``bands``). + delta_range : tuple of float, optional + ``(min, max)`` of ``maglim_band1 - maglim_ref``. + Default ``(-1.0, 1.0)``. + delta_step : float, optional + Step size between delta values. Default ``0.1``. + n_bins_color : int, optional + Number of color histogram bins. Default ``50``. + n_bins_mag : int, optional + Number of magnitude histogram bins. Default ``50``. + source_type : str, optional + ``'stars'``, ``'galaxies'``, or ``'both'``. Default ``'both'``. + **kwargs + Forwarded to + :meth:`BackgroundCatalogInjector.inject_stars` / + :meth:`BackgroundCatalogInjector.inject_galaxies`. + """ + ... + + def _build_one_config( + self, + catalog, + source_type, + bands, + maglim_ref, + delta, + n_bins_color, + n_bins_mag, + **kwargs, + ) -> dict: + """ + Build the CMD histogram for a single ``(maglim_ref, delta)`` pair. + + Creates a uniform survey, injects the catalog, and histograms the + result. + + Parameters + ---------- + catalog : pd.DataFrame + True background catalog (stars or galaxies). + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + ``(band1, band2)`` — color = band1_obs - band2_obs. + maglim_ref : float + Magnitude limit for the reference band (``band2``). + delta : float + ``maglim_band1 - maglim_ref``. + n_bins_color : int + Number of color bins. + n_bins_mag : int + Number of magnitude bins. + **kwargs + Forwarded to the injector. + + Returns + ------- + dict + ``{'cmd_hist': np.ndarray, 'color_edges': np.ndarray, + 'mag_edges': np.ndarray, 'n_ref': int, + 'area_ref_deg2': float}`` + """ + ... + + def _prepare_survey( + self, + survey: Survey, + no_dust: bool = False, + uniform_maglim: dict = None, + **kwargs, + ) -> Survey: + """ + Return a modified copy of the survey for background injection. + + Creates a deep copy so that modifications do not affect the original. + Handles both the full-injection case (no-dust only) and the resource- + building case (uniform magnitude limits + no dust). + + Parameters + ---------- + survey : Survey + Survey to copy and modify. + no_dust : bool, optional + If True, zero the EBV map. Default is False. + uniform_maglim : dict, optional + Mapping ``{band: value}`` to replace per-pixel magnitude limit maps + with constant arrays. E.g. ``{'g': 26.0, 'r': 26.5}``. + When provided, ``no_dust`` is implicitly set to True since uniform + surveys are always dust-free by construction. + **kwargs + Reserved for future use. + + Returns + ------- + Survey + Modified copy of the survey. + """ + ... + + def _compute_cmd_histogram( + self, + observed_df, + bands: tuple, + n_bins_color: int, + n_bins_mag: int, + ) -> dict: + """ + Build a 2-D color–magnitude histogram from an observed catalog. + + Color is defined as ``mag_band1_obs - mag_band2_obs`` and the + magnitude axis is ``mag_band2_obs``. + + Parameters + ---------- + observed_df : pd.DataFrame + Output of the catalog injector (observed magnitudes). + bands : tuple of str + ``(band1, band2)``. + n_bins_color : int + Number of color bins. + n_bins_mag : int + Number of magnitude bins. + + Returns + ------- + dict + ``{'cmd_hist': np.ndarray (n_bins_color × n_bins_mag), + 'color_edges': np.ndarray, 'mag_edges': np.ndarray}`` + """ + ... + + def save(self, storage: BackgroundStorage, source_type="both", **kwargs): + """ + Persist the built resources to disk via ``storage``. + + Parameters + ---------- + storage : BackgroundStorage + Storage backend to use. + source_type : str, optional + Which source types to save: ``'stars'``, ``'galaxies'``, or + ``'both'``. Default ``'both'``. + **kwargs + Forwarded to :meth:`BackgroundStorage.save_data`. + """ + ... + + @classmethod + def load( + cls, + storage: BackgroundStorage, + source_type="both", + **kwargs, + ) -> "BackgroundResourceBuilder": + """ + Load precomputed resources from disk into a new builder instance. + + Parameters + ---------- + storage : BackgroundStorage + Storage backend to read from. + source_type : str, optional + Which source types to load: ``'stars'``, ``'galaxies'``, or + ``'both'``. Default ``'both'``. + + Returns + ------- + BackgroundResourceBuilder + Populated instance with :attr:`resources` filled. + """ + ... diff --git a/streamobs/background/storage.py b/streamobs/background/storage.py new file mode 100644 index 0000000..6a8b4d7 --- /dev/null +++ b/streamobs/background/storage.py @@ -0,0 +1,145 @@ +""" +Persistence layer for precomputed background CMD histogram grids. +""" + +import os + + +class BackgroundStorage: + """ + Save and load precomputed color–magnitude diagram (CMD) histogram grids. + + One compressed parquet file is stored per ``(source_type, bands)`` + combination, e.g. ``stars_gr.parquet``. All files live under + ``data/background/`` (which is excluded from version control). + + **File format** — long-format parquet with one row per + ``(maglim_r, maglim_g, color_center, mag_center)`` cell:: + + maglim_r | maglim_g | color_center | mag_center | count | n_ref | area_ref_deg2 + + Parquet's columnar compression handles repeated ``color_center`` and + ``mag_center`` values efficiently. + + Parameters + ---------- + base_path : str, optional + Root directory for resource files. Defaults to + ``{package_root}/data/background/``. + survey_name : str, optional + Survey identifier (e.g. ``'lsst'``). Used in the file path. + release : str, optional + Survey release string (e.g. ``'yr5'``). Used in the file path. + + Examples + -------- + >>> storage = BackgroundStorage(survey_name='lsst', release='yr5') + >>> path = storage.get_path('stars', ('g', 'r')) + >>> # -> .../data/background/lsst_yr5/stars_gr.parquet + """ + + def __init__(self, base_path=None, survey_name="lsst", release=None, **kwargs): + if base_path is None: + _pkg_root = os.path.join(os.path.dirname(__file__), "..", "..", "data") + base_path = os.path.join(_pkg_root, "background") + self.base_path = base_path + self.survey_name = survey_name + self.release = release + + def get_path(self, source_type: str, bands: tuple) -> str: + """ + Build the file path for a given ``(source_type, bands)`` combination. + + Parameters + ---------- + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names in order, e.g. ``('g', 'r')``. + + Returns + ------- + str + Absolute path to the parquet file, e.g. + ``{base_path}/lsst_yr5/stars_gr.parquet``. + """ + survey_key = ( + f"{self.survey_name}_{self.release}" if self.release else self.survey_name + ) + bands_str = "".join(bands) + filename = f"{source_type}_{bands_str}.parquet" + return os.path.join(self.base_path, survey_key, filename) + + def save_data( + self, + data: dict, + source_type: str, + bands: tuple, + **kwargs, + ): + """ + Persist the CMD histogram grid to a compressed parquet file. + + The ``data`` dict is keyed by ``(maglim_r, maglim_g)`` and each value + is a dict with keys ``cmd_hist``, ``color_edges``, ``mag_edges``, + ``n_ref``, and ``area_ref_deg2`` (as returned by + :meth:`~streamobs.background.resource_builder.BackgroundResourceBuilder._build_one_config`). + The grid is flattened to long-format before writing. + + Parameters + ---------- + data : dict + CMD histogram grid keyed by ``(maglim_r, maglim_g)``. + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names, e.g. ``('g', 'r')``. + **kwargs + compression : str, optional + Parquet compression codec. Default is ``'zstd'``. + """ + ... + + def load_data( + self, + source_type: str, + bands: tuple, + **kwargs, + ) -> dict: + """ + Load a CMD histogram grid from the parquet file for this combination. + + Reads the file returned by :meth:`get_path` and reconstructs the + nested dict keyed by ``(maglim_r, maglim_g)``. + + Parameters + ---------- + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names, e.g. ``('g', 'r')``. + + Returns + ------- + dict + CMD grid keyed by ``(maglim_r, maglim_g)`` → ``{'cmd_hist', + 'color_edges', 'mag_edges', 'n_ref', 'area_ref_deg2'}``. + """ + ... + + def exists(self, source_type: str, bands: tuple) -> bool: + """ + Check whether the resource file for this combination exists on disk. + + Parameters + ---------- + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names, e.g. ``('g', 'r')``. + + Returns + ------- + bool + """ + return os.path.exists(self.get_path(source_type, bands)) diff --git a/streamobs/observed.py b/streamobs/observed.py index 4e2ce34..3f11ee4 100644 --- a/streamobs/observed.py +++ b/streamobs/observed.py @@ -244,6 +244,12 @@ def inject(self, data, bands=None, stream_config=None, **kwargs): perfect_galstarsep : bool, optional If True, also computes a flag assuming perfect star/galaxy separation (detection efficiency only, no classification losses). Default is False. + Only applies when ``source_type='stars'``. + source_type : str, optional + Type of source being injected. Either ``'stars'`` (default) or + ``'galaxies'``. When ``'galaxies'``, the detection flag uses + :meth:`~streamobs.surveys.Survey.get_gal_misclassification` + instead of the star completeness function. verbose : bool, optional Whether to print progress information. Default is True. @@ -362,7 +368,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) Seed used to build an RNG when ``rng`` is None. **kwargs ``nside``, ``detection_mag_cut``, ``dust_correction``, - ``perfect_galstarsep``, ``verbose`` (see :meth:`inject`). + ``perfect_galstarsep``, ``source_type``, ``verbose`` (see :meth:`inject`). Returns ------- @@ -461,6 +467,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) # Compute detection flag for completeness-band (reference band) if band == survey.completeness_band: + source_type = kwargs.get("source_type", "stars") flag_completeness_band = self.detect_flag( pix_maglim, survey=survey, @@ -469,9 +476,10 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) rng=rng, seed=seed, perfect_galstarsep=False, + source_type=source_type, **kwargs, ) - if perfect_galstarsep: + if perfect_galstarsep and source_type == "stars": flag_detection_only_band = self.detect_flag( pix_maglim, survey=survey, @@ -480,6 +488,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) rng=rng, seed=seed, perfect_galstarsep=True, + source_type=source_type, **kwargs, ) @@ -1166,10 +1175,14 @@ def sample_measured_magnitudes(self, mag_true, mag_err, **kwargs): def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): """ - Apply the survey selection to determine detection flags for stars. + Apply the survey selection to determine detection flags. - This method uses the survey completeness function and random sampling - to determine which stars would be detected by the survey. + For stars (``source_type='stars'``, the default), uses the survey's + combined completeness (detection × classification) or detection-only + efficiency when ``perfect_galstarsep=True``. For galaxies + (``source_type='galaxies'``), uses + :meth:`~streamobs.surveys.Survey.get_gal_misclassification` and + ignores ``perfect_galstarsep``. Parameters ---------- @@ -1180,8 +1193,7 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): band : str, optional Band to consider for detection. Default is 'r'. survey : Survey - Survey whose completeness/detection-efficiency curves to use - (required). + Survey whose efficiency curves to use (required). **kwargs Additional keyword arguments: @@ -1189,13 +1201,17 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): Random number generator instance. seed : int, optional Random seed if rng is not provided. + source_type : str, optional + ``'stars'`` (default) or ``'galaxies'``. perfect_galstarsep : bool, optional - If True, assumes perfect star/galaxy separation. Default is False. + If True and ``source_type='stars'``, uses detection-only + efficiency (no classification losses). Ignored for galaxies. + Default is False. Returns ------- np.ndarray - Boolean array: True for detected stars, False otherwise. + Boolean array: True for detected objects, False otherwise. Raises ------ @@ -1208,16 +1224,18 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): seed = kwargs.pop("seed", None) rng = np.random.default_rng(seed) - # Select the appropriate magnitude and map depending on the band maglim = survey.get_maglim(band, pixel=pix) - perfect_galstarsep = kwargs.get("perfect_galstarsep", False) - if perfect_galstarsep: - compl = survey.get_detection_efficiency(band, mag, maglim) + source_type = kwargs.get("source_type", "stars") + if source_type == "galaxies": + compl = survey.get_gal_misclassification(band, mag, maglim) else: - compl = survey.get_completeness(band, mag, maglim) + perfect_galstarsep = kwargs.get("perfect_galstarsep", False) + if perfect_galstarsep: + compl = survey.get_detection_efficiency(band, mag, maglim) + else: + compl = survey.get_completeness(band, mag, maglim) - # Set the threshold using completeness threshold = rng.uniform(size=len(mag)) <= compl return threshold diff --git a/streamobs/surveys.py b/streamobs/surveys.py index be1c704..84be45d 100644 --- a/streamobs/surveys.py +++ b/streamobs/surveys.py @@ -120,6 +120,7 @@ class Survey: delta_saturation: Optional[float] = None log_photo_error_catalog: Optional[Callable] = None log_photo_error_sample: Optional[Callable] = None + gal_misclassification: Optional[Callable] = None # Band-independent maps ebv_map: Optional[np.ndarray] = None @@ -560,6 +561,62 @@ def get_efficiency( return compl + def get_gal_misclassification( + self, band: str, magnitude: float, maglim: float, **kwargs + ) -> float: + """ + Get galaxy misclassification efficiency (probability a galaxy passes stellar selection). + + Mirrors :meth:`get_efficiency` for the ``"completeness"`` type, but + **without 1-padding at the bright end**: bright galaxies are not forced + to efficiency = 1 when their delta_mag falls below the saturation + threshold. This reproduces the behaviour of ``custom_get_completeness`` + used for galaxies in external background-generation scripts. + + Parameters + ---------- + band : str + Band identifier (e.g., 'g', 'r'). + magnitude : float or np.ndarray + True apparent magnitude(s) including extinction. + maglim : float or np.ndarray + Magnitude limit(s) at the source position(s). + **kwargs + delta_saturation : float, optional + Override the survey's default saturation threshold. + + Returns + ------- + float or np.ndarray + Misclassification probability in [0, 1]. + + Raises + ------ + ValueError + If :attr:`gal_misclassification` has not been loaded. + """ + if self.gal_misclassification is None: + raise ValueError( + "gal_misclassification function not loaded for this survey. " + "Load it via SurveyFactory.set_gal_misclassification() or the " + "'gal_misclassification' config key." + ) + + delta_mag = magnitude - maglim + + # No 1-padding: apply the interpolator directly + compl = self.gal_misclassification(delta_mag) + + compl = np.where( + magnitude < self.saturation[band], 0.0, compl + ) # saturation at the bright end + + compl = np.where( + (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl + ) # not observed if the area is not covered + + return compl + class SurveyFactory: """ @@ -1165,6 +1222,27 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs): if verbose: print("No classification efficiency file found, skipping.") + # Load galaxy misclassification efficiency (optional) + if "gal_misclassification" in survey_config: + try: + cls._load_file( + survey, + survey_config, + "gal_misclassification", + "Galaxy misclassification efficiency", + lambda f: cls.set_completeness( + f, + delta_saturation=survey.delta_saturation, + selection="missclassified", + ), + data_path_survey, + data_path_others, + **kwargs, + ) + except Exception as e: + if verbose: + print(f"Could not load galaxy misclassification file: {e}") + # Load photometric error model(s), same for all bands. Two curves: # - catalog : reported error vs delta_mag (the survey's photoerror file). # Written as magerr and used for the S/N cut. Always loaded. @@ -1401,6 +1479,8 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): - 'classification_eff' : Classification efficiency only (the legacy misspelled header 'classifiction_eff' is also accepted) - 'classification_detection_eff' : Combined efficiency + - 'gal_misclassification_eff' : Galaxy misclassification efficiency + (probability a galaxy passes stellar selection) delta_saturation : float, optional Magnitude difference threshold for saturation. Default is -10.4. @@ -1410,7 +1490,10 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): - 'detected' : Detection efficiency only (column 'detection_eff') - 'classified' : Classification efficiency only (column 'classification_eff', or legacy 'classifiction_eff') - - 'both' : Combined detection and classification (column 'classification_detection_eff') + - 'both' : Combined detection and classification (column + 'classification_detection_eff') + - 'missclassified' : Galaxy misclassification efficiency (column + 'gal_misclassification_eff') Default is 'both'. @@ -1422,7 +1505,7 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): Raises ------ ValueError - If selection is not one of 'detected', 'classified', or 'both'. + If selection is not one of the recognised values. Notes ----- @@ -1446,9 +1529,12 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): efficiencies = data["classifiction_eff"] elif selection == "both": efficiencies = data["classification_detection_eff"] + elif selection == "missclassified": + efficiencies = data["gal_misclassification_eff"] else: raise ValueError( - f"Invalid selection '{selection}'. Must be 'detected', 'classified', or 'both'" + f"Invalid selection '{selection}'. Must be 'detected', 'classified', " + "'both', or 'missclassified'." ) # Extend efficiency to bright end (force to zero at saturation) diff --git a/streamobs/utils.py b/streamobs/utils.py index 3f19ccd..f52349a 100644 --- a/streamobs/utils.py +++ b/streamobs/utils.py @@ -3,9 +3,43 @@ Utils for streamobs """ +import pandas as pd import yaml +def load_catalog(catalog): + """Load a catalog as a :class:`pandas.DataFrame`. + + Parameters + ---------- + catalog : pd.DataFrame or str + A DataFrame (returned as-is) or a path to a parquet or CSV file. + + Returns + ------- + pd.DataFrame + + Raises + ------ + ValueError + If ``catalog`` is neither a DataFrame nor a recognised file path. + """ + if isinstance(catalog, pd.DataFrame): + return catalog + if isinstance(catalog, str): + if catalog.endswith(".parquet"): + return pd.read_parquet(catalog) + if catalog.endswith(".csv"): + return pd.read_csv(catalog) + raise ValueError( + f"Unrecognised file extension for catalog path '{catalog}'. " + "Expected .parquet or .csv." + ) + raise ValueError( + f"catalog must be a DataFrame or a path string, got {type(catalog)}." + ) + + def parse_config(config): """Parse a yaml formatted file or string into a dict. @@ -20,6 +54,6 @@ def parse_config(config): try: # If `config` is a file return yaml.safe_load(open(config, "r")) - except OSError, FileNotFoundError: + except (OSError, FileNotFoundError): # Otherwise assume it is a string return yaml.safe_load(config) diff --git a/tests/test_background.py b/tests/test_background.py new file mode 100644 index 0000000..b3800c9 --- /dev/null +++ b/tests/test_background.py @@ -0,0 +1,328 @@ +""" +Tests for the streamobs.background subpackage. + +All tests are marked ``background`` so they can be run in isolation: + + pytest tests/test_background.py -m background + +Tests that write to disk (Storage, ResourceBuilder) use the ``tmp_path`` +pytest fixture so the temporary directory is created and cleaned up +automatically regardless of test outcome. +""" + +import numpy as np +import pandas as pd +import pytest + +pytestmark = pytest.mark.background + + +# --------------------------------------------------------------------------- +# Shared fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="module") +def mock_survey(verbose=False): + from streamobs.surveys import Survey + + return Survey.load("lsst", release="yr4", verbose=verbose) + + +@pytest.fixture(scope="module") +def tiny_stars_catalog(): + """Minimal DataFrame that looks like a true background star catalog.""" + rng = np.random.default_rng(0) + n = 100 + return pd.DataFrame( + { + "ra": rng.uniform(30.0, 60.0, n), + "dec": rng.uniform(-20.0, 0.0, n), + "lsst_yr4_g_true": rng.uniform(20.0, 27.0, n), + "lsst_yr4_r_true": rng.uniform(20.0, 27.0, n), + } + ) + + +@pytest.fixture(scope="module") +def tiny_galaxies_catalog(): + """Minimal DataFrame that looks like a true background galaxy catalog.""" + rng = np.random.default_rng(1) + n = 100 + return pd.DataFrame( + { + "ra": rng.uniform(30.0, 60.0, n), + "dec": rng.uniform(-20.0, 0.0, n), + "lsst_yr4_g_true": rng.uniform(20.0, 28.0, n), + "lsst_yr4_r_true": rng.uniform(20.0, 28.0, n), + } + ) + + +# --------------------------------------------------------------------------- +# Part 1 — Survey galaxy misclassification +# --------------------------------------------------------------------------- + + +class TestSurveyGalMisclassification: + """Tests for the gal_misclassification field and method on Survey.""" + + def test_gal_misclassification_field_exists(self, mock_survey): + """Survey dataclass must expose the gal_misclassification attribute.""" + assert hasattr(mock_survey, "gal_misclassification") + + def test_get_gal_misclassification_raises_when_not_loaded(self, mock_survey): + """get_gal_misclassification raises ValueError if the function is not loaded.""" + import copy + + s = copy.deepcopy(mock_survey) + s.gal_misclassification = None + mags = np.linspace(20.0, 27.0, 10) + maglim = np.full(10, 26.5) + with pytest.raises(ValueError, match="gal_misclassification"): + s.get_gal_misclassification("r", mags, maglim) + + def test_get_gal_misclassification_no_1padding(self, mock_survey): + """get_gal_misclassification must NOT return 1 at the bright end (no 1-padding). + + Verify by checking that the method returns 0 for very bright objects + (below saturation) and does not saturate to 1.0 for delta_mag far below + delta_saturation. + """ + ... + + def test_set_completeness_missclassified_loads_file(self, tmp_path): + """set_completeness with selection='missclassified' must return a callable interpolator.""" + from streamobs.surveys import SurveyFactory + + csv_path = tmp_path / "gal_misclassification.csv" + csv_path.write_text( + "delta_mag,gal_misclassification_eff\n" + "-12.0,0.0\n" + "-5.0,0.0\n" + "0.0,0.3\n" + "1.0,0.0\n" + ) + func = SurveyFactory.set_completeness(str(csv_path), selection="missclassified") + assert callable(func) + result = func(np.array([0.0])) + assert 0.0 <= float(result) <= 1.0 + + +# --------------------------------------------------------------------------- +# Part 2 — StreamInjector source_type parameter +# --------------------------------------------------------------------------- + + +class TestInjectSourceType: + """Tests that source_type is accepted and routed correctly in StreamInjector.""" + + def test_inject_accepts_source_type_kwarg(self): + """inject() must accept source_type without raising TypeError.""" + from streamobs.observed import StreamInjector + import inspect + + sig = inspect.signature(StreamInjector.inject) + # source_type is passed through **kwargs — verify inject accepts **kwargs + assert "kwargs" in str(sig) or "source_type" in sig.parameters + + def test_inject_galaxies_uses_gal_misclassification( + self, mock_survey, tiny_galaxies_catalog + ): + """inject() with source_type='galaxies' must reach get_gal_misclassification.""" + ... + + +# --------------------------------------------------------------------------- +# Part 3 — BackgroundCatalogInjector +# --------------------------------------------------------------------------- + + +class TestBackgroundCatalogInjector: + """Tests for BackgroundCatalogInjector.""" + + def test_init(self, mock_survey): + """BackgroundCatalogInjector can be instantiated with a Survey.""" + from streamobs.background import BackgroundCatalogInjector + + inj = BackgroundCatalogInjector(mock_survey) + assert inj._survey is mock_survey + + def test_prepare_survey_no_dust(self, mock_survey): + """_prepare_survey with no_dust=True must zero the EBV map.""" + ... + + def test_prepare_survey_uniform_maglim(self, mock_survey): + """_prepare_survey with uniform_maglim must replace maglim maps with constants.""" + ... + + def test_inject_stars_delegates_to_stream_injector( + self, mock_survey, tiny_stars_catalog + ): + """inject_stars must return a DataFrame (delegates to StreamInjector).""" + ... + + def test_inject_galaxies_delegates_to_stream_injector( + self, mock_survey, tiny_galaxies_catalog + ): + """inject_galaxies must return a DataFrame.""" + ... + + +# --------------------------------------------------------------------------- +# Part 4 — BackgroundStorage +# --------------------------------------------------------------------------- + + +class TestBackgroundStorage: + """Tests for BackgroundStorage persistence helpers.""" + + def test_get_path_naming(self, tmp_path): + """get_path must embed source_type and band names in the filename.""" + from streamobs.background import BackgroundStorage + + storage = BackgroundStorage( + base_path=str(tmp_path), survey_name="lsst", release="yr4" + ) + path = storage.get_path("stars", ("g", "r")) + assert "stars" in path + assert "gr" in path + assert path.endswith(".parquet") + + def test_save_data_creates_file(self, tmp_path): + """save_data must write a file at the path returned by get_path.""" + ... + + def test_load_data_roundtrip(self, tmp_path): + """load_data must recover the dict saved by save_data.""" + ... + + def test_exists_false_before_save(self, tmp_path): + """exists must return False when the file is not on disk.""" + from streamobs.background import BackgroundStorage + + storage = BackgroundStorage( + base_path=str(tmp_path), survey_name="lsst", release="yr4" + ) + assert storage.exists("stars", ("g", "r")) is False + + def test_exists_true_after_save(self, tmp_path): + """exists must return True after save_data has been called.""" + ... + + +# --------------------------------------------------------------------------- +# Part 5 — BackgroundResourceBuilder +# --------------------------------------------------------------------------- + + +class TestBackgroundResourceBuilder: + """Tests for BackgroundResourceBuilder (uses tmp_path for storage).""" + + def test_init(self): + """BackgroundResourceBuilder can be instantiated.""" + from streamobs.background import BackgroundResourceBuilder + + builder = BackgroundResourceBuilder(survey_name="lsst", release="yr4") + assert builder.survey_name == "lsst" + assert isinstance(builder.resources, dict) + + def test_build_one_config(self, mock_survey, tiny_stars_catalog): + """_build_one_config must return a dict with the expected keys.""" + ... + + def test_save_via_storage(self, tmp_path, tiny_stars_catalog): + """save must write a parquet file via BackgroundStorage.""" + ... + + def test_load_via_storage(self, tmp_path, tiny_stars_catalog): + """load must reconstruct resources from the file saved by save.""" + ... + + +# --------------------------------------------------------------------------- +# Part 6 — LightBackgroundGenerator +# --------------------------------------------------------------------------- + + +class TestLightBackgroundGenerator: + """Tests for LightBackgroundGenerator.""" + + @pytest.fixture(scope="class") + def storage_with_data(self, tmp_path_factory, mock_survey, tiny_stars_catalog): + """BackgroundStorage pre-populated with a minimal CMD grid.""" + ... + + def test_init(self, mock_survey, tmp_path): + """LightBackgroundGenerator can be instantiated.""" + from streamobs.background import BackgroundStorage, LightBackgroundGenerator + + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") + gen = LightBackgroundGenerator(storage, mock_survey) + assert gen.survey is mock_survey + + def test_generate_stars(self): + """generate with source_type='stars' must return a DataFrame.""" + ... + + def test_generate_galaxies(self): + """generate with source_type='galaxies' must return a DataFrame.""" + ... + + def test_generate_both(self): + """generate with source_type='both' must return combined DataFrame.""" + ... + + +# --------------------------------------------------------------------------- +# Part 7 — Background (top-level wrapper) +# --------------------------------------------------------------------------- + + +class TestBackground: + """Tests for the Background wrapper class.""" + + def test_init_rejects_invalid_source_type(self, mock_survey): + """Background raises ValueError for unknown source_type.""" + from streamobs.background import Background + + with pytest.raises(ValueError, match="source_type"): + Background(mock_survey, source_type="invalid") + + def test_init_rejects_invalid_method(self, mock_survey): + """Background raises ValueError for unknown method.""" + from streamobs.background import Background + + with pytest.raises(ValueError, match="method"): + Background(mock_survey, method="invalid") + + def test_full_method_stars(self, mock_survey, tiny_stars_catalog): + """Background with method='full' and source_type='stars' must generate a catalog.""" + ... + + def test_full_method_galaxies(self, mock_survey, tiny_galaxies_catalog): + """Background with method='full' and source_type='galaxies' must generate a catalog.""" + ... + + def test_full_method_both( + self, mock_survey, tiny_stars_catalog, tiny_galaxies_catalog + ): + """Background with method='full' and source_type='both' must combine catalogs.""" + ... + + def test_light_method_default_storage(self, mock_survey): + """Background with method='light' and storage=None must fall back to _default_storage.""" + from streamobs.background import Background + + bg = Background(mock_survey, method="light") + assert bg.storage is not None + + def test_light_method_custom_storage(self, mock_survey, tmp_path): + """Background accepts a user-supplied BackgroundStorage.""" + from streamobs.background import Background, BackgroundStorage + + storage = BackgroundStorage( + base_path=str(tmp_path), survey_name="lsst", release="yr4" + ) + bg = Background(mock_survey, method="light", storage=storage) + assert bg.storage is storage From e765e9d5033d826fbab9fec3122e870c33174cba Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Fri, 19 Jun 2026 17:41:11 +0200 Subject: [PATCH 2/8] code wrapper --- streamobs/background/catalog_injector.py | 39 ++++++++++-------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/streamobs/background/catalog_injector.py b/streamobs/background/catalog_injector.py index 505c232..aa0c031 100644 --- a/streamobs/background/catalog_injector.py +++ b/streamobs/background/catalog_injector.py @@ -2,10 +2,6 @@ Full-injection background pipeline using known catalogs. """ -import copy - -import numpy as np - from ..observed import StreamInjector from ..surveys import Survey from ..utils import load_catalog @@ -16,32 +12,27 @@ class BackgroundCatalogInjector: Thin wrapper around :class:`~streamobs.observed.StreamInjector` for background catalog injection. - Provides :meth:`inject_stars` and :meth:`inject_galaxies` convenience - methods that each call :meth:`~streamobs.observed.StreamInjector.inject` - with the correct ``source_type``. Survey preparation — deep-copying, - zeroing dust, setting uniform magnitude limits — is handled by the single - :meth:`_prepare_survey` method and is shared between both injection modes. + Exposes :meth:`inject_stars` and :meth:`inject_galaxies` so the caller + does not need to pass ``source_type`` explicitly. All injection logic + lives in :class:`~streamobs.observed.StreamInjector`. Parameters ---------- survey : Survey Survey instance to inject into. **kwargs - Forwarded to :meth:`_prepare_survey` at construction time (e.g. - ``no_dust``, ``uniform_maglim``). + Forwarded to :class:`~streamobs.observed.StreamInjector`. Examples -------- - Inject a stars catalog with a uniform magnitude limit: - - >>> injector = BackgroundCatalogInjector(survey, no_dust=True, - ... uniform_maglim={'g': 26.0, 'r': 26.5}) + >>> injector = BackgroundCatalogInjector(survey) >>> obs_stars = injector.inject_stars(catalog_df, bands=['g', 'r']) + >>> obs_gals = injector.inject_galaxies(catalog_df, bands=['g', 'r']) """ def __init__(self, survey: Survey, **kwargs): self._survey = survey - self._kwargs = kwargs + self.streaminjector = StreamInjector(self._survey, **kwargs) def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": """ @@ -50,10 +41,7 @@ def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": Parameters ---------- catalog : pd.DataFrame or str - True stellar catalog. Accepts a DataFrame or a path to parquet/CSV - (loaded via :func:`~streamobs.utils.load_catalog`). Must contain - sky coordinates (``ra``/``dec`` or ``phi1``/``phi2``) and true - magnitude columns. + True stellar catalog. Accepts a DataFrame or a path to parquet/CSV. bands : list of str, optional Photometric bands to inject. Defaults to ``['r', 'g']``. **kwargs @@ -64,7 +52,10 @@ def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": pd.DataFrame Observed catalog with survey-namespaced magnitude and flag columns. """ - ... + catalog = load_catalog(catalog) + return self.streaminjector.inject( + catalog, bands=bands, source_type="stars", **kwargs + ) def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": """ @@ -79,7 +70,6 @@ def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": ---------- catalog : pd.DataFrame or str True galaxy catalog. Accepts a DataFrame or a path to parquet/CSV. - Must contain sky coordinates and true magnitude columns. bands : list of str, optional Photometric bands to inject. Defaults to ``['r', 'g']``. **kwargs @@ -90,4 +80,7 @@ def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": pd.DataFrame Observed catalog with survey-namespaced magnitude and flag columns. """ - ... + catalog = load_catalog(catalog) + return self.streaminjector.inject( + catalog, bands=bands, source_type="galaxies", **kwargs + ) From f59aef3e8a9d81949dcaab402f43df1ea6f063d7 Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Fri, 19 Jun 2026 17:41:45 +0200 Subject: [PATCH 3/8] no dependence on release --- streamobs/background/storage.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/streamobs/background/storage.py b/streamobs/background/storage.py index 6a8b4d7..782732c 100644 --- a/streamobs/background/storage.py +++ b/streamobs/background/storage.py @@ -28,23 +28,20 @@ class BackgroundStorage: ``{package_root}/data/background/``. survey_name : str, optional Survey identifier (e.g. ``'lsst'``). Used in the file path. - release : str, optional - Survey release string (e.g. ``'yr5'``). Used in the file path. Examples -------- - >>> storage = BackgroundStorage(survey_name='lsst', release='yr5') + >>> storage = BackgroundStorage(survey_name='lsst') >>> path = storage.get_path('stars', ('g', 'r')) - >>> # -> .../data/background/lsst_yr5/stars_gr.parquet + >>> # -> .../data/background/lsst/stars_gr.parquet """ - def __init__(self, base_path=None, survey_name="lsst", release=None, **kwargs): + def __init__(self, base_path=None, survey_name="lsst", **kwargs): if base_path is None: _pkg_root = os.path.join(os.path.dirname(__file__), "..", "..", "data") base_path = os.path.join(_pkg_root, "background") self.base_path = base_path self.survey_name = survey_name - self.release = release def get_path(self, source_type: str, bands: tuple) -> str: """ @@ -61,14 +58,11 @@ def get_path(self, source_type: str, bands: tuple) -> str: ------- str Absolute path to the parquet file, e.g. - ``{base_path}/lsst_yr5/stars_gr.parquet``. + ``{base_path}/lsst/stars_gr.parquet``. """ - survey_key = ( - f"{self.survey_name}_{self.release}" if self.release else self.survey_name - ) bands_str = "".join(bands) filename = f"{source_type}_{bands_str}.parquet" - return os.path.join(self.base_path, survey_key, filename) + return os.path.join(self.base_path, self.survey_name, filename) def save_data( self, From a3019896e9f879dfd45dde36f5ace3e6be6abd74 Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Fri, 19 Jun 2026 18:39:24 +0200 Subject: [PATCH 4/8] add missclassification --- streamobs/surveys.py | 153 ++++++++++++++++++++++++++++++++----------- 1 file changed, 114 insertions(+), 39 deletions(-) diff --git a/streamobs/surveys.py b/streamobs/surveys.py index 84be45d..ad66ece 100644 --- a/streamobs/surveys.py +++ b/streamobs/surveys.py @@ -121,6 +121,7 @@ class Survey: log_photo_error_catalog: Optional[Callable] = None log_photo_error_sample: Optional[Callable] = None gal_misclassification: Optional[Callable] = None + gal_misclassification_detection: Optional[Callable] = None # Band-independent maps ebv_map: Optional[np.ndarray] = None @@ -528,37 +529,72 @@ def get_efficiency( get_detection_efficiency : Convenience wrapper for detection-only. get_classification_efficiency : Convenience wrapper for classification-only. """ - if type == "completeness": + delta_saturation = kwargs.get("delta_saturation", self.delta_saturation) + delta_mag = magnitude - maglim + + if type in ("missclassified", "detected_missclassified"): + # Galaxy-related efficiencies: no 1-padding at the bright end. + if type == "missclassified": + func = self.gal_misclassification + if func is None: + raise ValueError( + "Efficiency function for type 'missclassified' not loaded." + ) + else: # detected_missclassified + func = getattr(self, "gal_misclassification_detection", None) + if func is None: + # Fallback: missclassification × detection efficiency. + mis = self.gal_misclassification + det = getattr(self, "efficiency_detection", None) + if mis is None or det is None: + raise ValueError( + "Efficiency function for type 'detected_missclassified' not " + "loaded and cannot be derived (need both 'missclassified' and " + "'detection_efficiency')." + ) + func = lambda dm: mis(dm) * det(dm) # noqa: E731 + compl = func(delta_mag) + compl = np.where(magnitude < self.saturation[band], 0.0, compl) + compl = np.where( + (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl + ) + return compl + + if type in ("completeness", "classification_detection"): func = self.completeness + if func is None: + # Fallback: compute classification × detection on the fly. + det = getattr(self, "efficiency_detection", None) + cls_ = getattr(self, "efficiency_classification", None) + if det is None or cls_ is None: + raise ValueError( + "Efficiency function for type 'completeness' not loaded and " + "cannot be derived (need both detection and classification)." + ) + func = lambda dm: det(dm) * cls_(dm) # noqa: E731 elif type == "detection_efficiency": - func = self.efficiency_detection + func = getattr(self, "efficiency_detection", None) elif type == "classification_efficiency": - func = self.efficiency_classification + func = getattr(self, "efficiency_classification", None) else: - raise ValueError(f"Efficiency type '{type}' not recognized.") + raise ValueError( + f"Efficiency type '{type}' not recognized. Valid types: " + "'completeness', 'classification_detection', 'detection_efficiency', " + "'classification_efficiency', 'missclassified', 'detected_missclassified'." + ) if func is None: - raise ValueError(f"Efficiency function for type '{type}' not loaded") + raise ValueError(f"Efficiency function for type '{type}' not loaded.") - delta_saturation = kwargs.get("delta_saturation", self.delta_saturation) - - # Calculate delta_mag - delta_mag = magnitude - maglim - - # Apply saturation condition: 1 padding for objects fainter than saturation but equivalent mag brighter than saturation0 + # 1-padding: bright sources (well above saturation) are always detected. compl = np.where( (magnitude > self.saturation[band]) & (delta_mag <= delta_saturation), 1.0, func(delta_mag), - ) # 1 padded - - compl = np.where( - magnitude < self.saturation[band], 0.0, compl - ) # saturation at the bright end - + ) + compl = np.where(magnitude < self.saturation[band], 0.0, compl) compl = np.where( (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl - ) # not observed if the area is not covered - + ) return compl def get_gal_misclassification( @@ -595,28 +631,45 @@ def get_gal_misclassification( ValueError If :attr:`gal_misclassification` has not been loaded. """ - if self.gal_misclassification is None: - raise ValueError( - "gal_misclassification function not loaded for this survey. " - "Load it via SurveyFactory.set_gal_misclassification() or the " - "'gal_misclassification' config key." - ) + return self.get_efficiency(band, magnitude, maglim, type="missclassified", **kwargs) - delta_mag = magnitude - maglim - - # No 1-padding: apply the interpolator directly - compl = self.gal_misclassification(delta_mag) + def get_gal_misclassification_detection( + self, band: str, magnitude: float, maglim: float, **kwargs + ) -> float: + """ + Get galaxy misclassification × detection efficiency. - compl = np.where( - magnitude < self.saturation[band], 0.0, compl - ) # saturation at the bright end + Mirrors :meth:`get_efficiency` for the ``"detected_missclassified"`` type, + but **without 1-padding at the bright end**: bright galaxies are not forced + to efficiency = 1 when their delta_mag falls below the saturation + threshold. This reproduces the behaviour of ``custom_get_completeness`` + used for galaxies in external background-generation scripts. - compl = np.where( - (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl - ) # not observed if the area is not covered + Parameters + ---------- + band : str + Band identifier (e.g., 'g', 'r'). + magnitude : float or np.ndarray + True apparent magnitude(s) including extinction. + maglim : float or np.ndarray + Magnitude limit(s) at the source position(s). + **kwargs + delta_saturation : float, optional + Override the survey's default saturation threshold. - return compl + Returns + ------- + float or np.ndarray + Misclassification × detection probability in [0, 1]. + Raises + ------ + ValueError + If :attr:`gal_misclassification_detection` has not been loaded. + """ + return self.get_efficiency( + band, magnitude, maglim, type="detected_missclassified", **kwargs + ) class SurveyFactory: """ @@ -1243,6 +1296,26 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs): if verbose: print(f"Could not load galaxy misclassification file: {e}") + # Also try to load the combined detection × misclassification from the same file. + try: + cls._load_file( + survey, + survey_config, + "gal_misclassification_detection", + "Galaxy misclassification × detection efficiency", + lambda f: cls.set_completeness( + f, + delta_saturation=survey.delta_saturation, + selection="detected_missclassified", + ), + data_path_survey, + data_path_others, + filename=survey_config.get("gal_misclassification"), + **kwargs, + ) + except Exception: + pass # column absent — fallback to product at call time + # Load photometric error model(s), same for all bands. Two curves: # - catalog : reported error vs delta_mag (the survey's photoerror file). # Written as magerr and used for the S/N cut. Always loaded. @@ -1479,7 +1552,7 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): - 'classification_eff' : Classification efficiency only (the legacy misspelled header 'classifiction_eff' is also accepted) - 'classification_detection_eff' : Combined efficiency - - 'gal_misclassification_eff' : Galaxy misclassification efficiency + - 'missclassification_eff' : Galaxy misclassification efficiency (probability a galaxy passes stellar selection) delta_saturation : float, optional @@ -1493,7 +1566,7 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): - 'both' : Combined detection and classification (column 'classification_detection_eff') - 'missclassified' : Galaxy misclassification efficiency (column - 'gal_misclassification_eff') + 'missclassification_eff') Default is 'both'. @@ -1530,7 +1603,9 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): elif selection == "both": efficiencies = data["classification_detection_eff"] elif selection == "missclassified": - efficiencies = data["gal_misclassification_eff"] + efficiencies = data["missclassification_eff"] + elif selection == "detected_missclassified": + efficiencies = data["missclassification_detection_eff"] else: raise ValueError( f"Invalid selection '{selection}'. Must be 'detected', 'classified', " From 6eef164ce6899276e78bd85f20985999553a1833 Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Fri, 19 Jun 2026 18:40:39 +0200 Subject: [PATCH 5/8] deal with different type of sources --- streamobs/observed.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/streamobs/observed.py b/streamobs/observed.py index 3f11ee4..1f6c244 100644 --- a/streamobs/observed.py +++ b/streamobs/observed.py @@ -476,7 +476,6 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) rng=rng, seed=seed, perfect_galstarsep=False, - source_type=source_type, **kwargs, ) if perfect_galstarsep and source_type == "stars": @@ -488,7 +487,6 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) rng=rng, seed=seed, perfect_galstarsep=True, - source_type=source_type, **kwargs, ) @@ -1181,7 +1179,7 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): combined completeness (detection × classification) or detection-only efficiency when ``perfect_galstarsep=True``. For galaxies (``source_type='galaxies'``), uses - :meth:`~streamobs.surveys.Survey.get_gal_misclassification` and + :meth:`~streamobs.surveys.Survey.get_gal_misclassification_detection` and ignores ``perfect_galstarsep``. Parameters @@ -1228,7 +1226,7 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): source_type = kwargs.get("source_type", "stars") if source_type == "galaxies": - compl = survey.get_gal_misclassification(band, mag, maglim) + compl = survey.get_gal_misclassification_detection(band, mag, maglim) else: perfect_galstarsep = kwargs.get("perfect_galstarsep", False) if perfect_galstarsep: From 673ba64152d619f4f958b191ba20ba5d666717af Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Fri, 19 Jun 2026 18:46:09 +0200 Subject: [PATCH 6/8] use by default the completeness file --- streamobs/surveys.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/streamobs/surveys.py b/streamobs/surveys.py index ad66ece..ca89f4d 100644 --- a/streamobs/surveys.py +++ b/streamobs/surveys.py @@ -1275,8 +1275,13 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs): if verbose: print("No classification efficiency file found, skipping.") - # Load galaxy misclassification efficiency (optional) - if "gal_misclassification" in survey_config: + # Load galaxy misclassification efficiency (optional). + # Uses the explicit 'gal_misclassification' config key when present, + # otherwise falls back to the completeness file (same CSV, different column). + _gal_mis_file = survey_config.get( + "gal_misclassification", survey_config.get("completeness") + ) + if _gal_mis_file is not None: try: cls._load_file( survey, @@ -1290,13 +1295,13 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs): ), data_path_survey, data_path_others, + filename=_gal_mis_file, **kwargs, ) - except Exception as e: - if verbose: - print(f"Could not load galaxy misclassification file: {e}") + except Exception: + pass # column absent in this survey's file — silently skip - # Also try to load the combined detection × misclassification from the same file. + # Also try the combined detection × misclassification from the same file. try: cls._load_file( survey, @@ -1310,7 +1315,7 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs): ), data_path_survey, data_path_others, - filename=survey_config.get("gal_misclassification"), + filename=_gal_mis_file, **kwargs, ) except Exception: From 3e11ab14db27dc0a54189432d497c8f61ca01345 Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Mon, 22 Jun 2026 10:04:46 +0200 Subject: [PATCH 7/8] not dev file tracking --- .gitignore | 3 + tests/test_background.py | 118 +++++++++++++++------------------------ 2 files changed, 48 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index e1c7407..11209b9 100644 --- a/.gitignore +++ b/.gitignore @@ -154,6 +154,9 @@ scripts/roman/script_to_notebook.py build_*_nb.py scripts/**/build_*_nb.py +# local-only development notebooks +notebooks/dev.ipynb + artifacts/ # Staged data archive for upload (built by bin/build_data_archive.py) /archive/ diff --git a/tests/test_background.py b/tests/test_background.py index b3800c9..9299979 100644 --- a/tests/test_background.py +++ b/tests/test_background.py @@ -21,40 +21,17 @@ # Shared fixtures # --------------------------------------------------------------------------- - -@pytest.fixture(scope="module") -def mock_survey(verbose=False): - from streamobs.surveys import Survey - - return Survey.load("lsst", release="yr4", verbose=verbose) - - -@pytest.fixture(scope="module") -def tiny_stars_catalog(): - """Minimal DataFrame that looks like a true background star catalog.""" - rng = np.random.default_rng(0) - n = 100 - return pd.DataFrame( - { - "ra": rng.uniform(30.0, 60.0, n), - "dec": rng.uniform(-20.0, 0.0, n), - "lsst_yr4_g_true": rng.uniform(20.0, 27.0, n), - "lsst_yr4_r_true": rng.uniform(20.0, 27.0, n), - } - ) - - @pytest.fixture(scope="module") def tiny_galaxies_catalog(): - """Minimal DataFrame that looks like a true background galaxy catalog.""" + """Galaxy catalog: same sky positions as stream_catalog but with randomised magnitudes.""" rng = np.random.default_rng(1) n = 100 return pd.DataFrame( { "ra": rng.uniform(30.0, 60.0, n), "dec": rng.uniform(-20.0, 0.0, n), - "lsst_yr4_g_true": rng.uniform(20.0, 28.0, n), - "lsst_yr4_r_true": rng.uniform(20.0, 28.0, n), + "lsst_g_true": rng.uniform(20.0, 28.0, n), + "lsst_r_true": rng.uniform(20.0, 28.0, n), } ) @@ -79,7 +56,7 @@ def test_get_gal_misclassification_raises_when_not_loaded(self, mock_survey): s.gal_misclassification = None mags = np.linspace(20.0, 27.0, 10) maglim = np.full(10, 26.5) - with pytest.raises(ValueError, match="gal_misclassification"): + with pytest.raises(ValueError, match="missclassified"): s.get_gal_misclassification("r", mags, maglim) def test_get_gal_misclassification_no_1padding(self, mock_survey): @@ -91,24 +68,6 @@ def test_get_gal_misclassification_no_1padding(self, mock_survey): """ ... - def test_set_completeness_missclassified_loads_file(self, tmp_path): - """set_completeness with selection='missclassified' must return a callable interpolator.""" - from streamobs.surveys import SurveyFactory - - csv_path = tmp_path / "gal_misclassification.csv" - csv_path.write_text( - "delta_mag,gal_misclassification_eff\n" - "-12.0,0.0\n" - "-5.0,0.0\n" - "0.0,0.3\n" - "1.0,0.0\n" - ) - func = SurveyFactory.set_completeness(str(csv_path), selection="missclassified") - assert callable(func) - result = func(np.array([0.0])) - assert 0.0 <= float(result) <= 1.0 - - # --------------------------------------------------------------------------- # Part 2 — StreamInjector source_type parameter # --------------------------------------------------------------------------- @@ -148,25 +107,44 @@ def test_init(self, mock_survey): inj = BackgroundCatalogInjector(mock_survey) assert inj._survey is mock_survey - def test_prepare_survey_no_dust(self, mock_survey): - """_prepare_survey with no_dust=True must zero the EBV map.""" - ... + def test_inject_stars_returns_nonempty_dataframe( + self, mock_survey, stream_catalog + ): + """inject_stars must return a non-empty DataFrame (smoke test only — full coverage in test_observed).""" + from streamobs.background import BackgroundCatalogInjector - def test_prepare_survey_uniform_maglim(self, mock_survey): - """_prepare_survey with uniform_maglim must replace maglim maps with constants.""" - ... + result = BackgroundCatalogInjector(mock_survey).inject_stars( + stream_catalog, bands=["g", "r"] + ) + assert isinstance(result, pd.DataFrame) + assert len(result) > 0 - def test_inject_stars_delegates_to_stream_injector( - self, mock_survey, tiny_stars_catalog + def test_inject_galaxies_returns_nonempty_dataframe( + self, mock_survey, tiny_galaxies_catalog ): - """inject_stars must return a DataFrame (delegates to StreamInjector).""" - ... + """inject_galaxies must return a non-empty DataFrame.""" + from streamobs.background import BackgroundCatalogInjector - def test_inject_galaxies_delegates_to_stream_injector( + result = BackgroundCatalogInjector(mock_survey).inject_galaxies( + tiny_galaxies_catalog, bands=["g", "r"] + ) + assert isinstance(result, pd.DataFrame) + assert len(result) > 0 + + def test_inject_galaxies_has_detection_flag( self, mock_survey, tiny_galaxies_catalog ): - """inject_galaxies must return a DataFrame.""" - ... + """inject_galaxies output must include a detection flag column.""" + from streamobs.background import BackgroundCatalogInjector + + result = BackgroundCatalogInjector(mock_survey).inject_galaxies( + tiny_galaxies_catalog, bands=["g", "r"] + ) + flag_cols = [c for c in result.columns if "flag" in c] + assert len(flag_cols) > 0 + assert "flag_observed" in flag_cols + assert result["flag_observed"].isin([0, 1]).all() + assert result["flag_observed"].sum() > 0 # at least one detected # --------------------------------------------------------------------------- @@ -181,9 +159,7 @@ def test_get_path_naming(self, tmp_path): """get_path must embed source_type and band names in the filename.""" from streamobs.background import BackgroundStorage - storage = BackgroundStorage( - base_path=str(tmp_path), survey_name="lsst", release="yr4" - ) + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") path = storage.get_path("stars", ("g", "r")) assert "stars" in path assert "gr" in path @@ -201,9 +177,7 @@ def test_exists_false_before_save(self, tmp_path): """exists must return False when the file is not on disk.""" from streamobs.background import BackgroundStorage - storage = BackgroundStorage( - base_path=str(tmp_path), survey_name="lsst", release="yr4" - ) + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") assert storage.exists("stars", ("g", "r")) is False def test_exists_true_after_save(self, tmp_path): @@ -227,15 +201,15 @@ def test_init(self): assert builder.survey_name == "lsst" assert isinstance(builder.resources, dict) - def test_build_one_config(self, mock_survey, tiny_stars_catalog): + def test_build_one_config(self, mock_survey, stream_catalog): """_build_one_config must return a dict with the expected keys.""" ... - def test_save_via_storage(self, tmp_path, tiny_stars_catalog): + def test_save_via_storage(self, tmp_path, stream_catalog): """save must write a parquet file via BackgroundStorage.""" ... - def test_load_via_storage(self, tmp_path, tiny_stars_catalog): + def test_load_via_storage(self, tmp_path, stream_catalog): """load must reconstruct resources from the file saved by save.""" ... @@ -249,7 +223,7 @@ class TestLightBackgroundGenerator: """Tests for LightBackgroundGenerator.""" @pytest.fixture(scope="class") - def storage_with_data(self, tmp_path_factory, mock_survey, tiny_stars_catalog): + def storage_with_data(self, tmp_path_factory, mock_survey, stream_catalog): """BackgroundStorage pre-populated with a minimal CMD grid.""" ... @@ -296,7 +270,7 @@ def test_init_rejects_invalid_method(self, mock_survey): with pytest.raises(ValueError, match="method"): Background(mock_survey, method="invalid") - def test_full_method_stars(self, mock_survey, tiny_stars_catalog): + def test_full_method_stars(self, mock_survey, stream_catalog): """Background with method='full' and source_type='stars' must generate a catalog.""" ... @@ -305,7 +279,7 @@ def test_full_method_galaxies(self, mock_survey, tiny_galaxies_catalog): ... def test_full_method_both( - self, mock_survey, tiny_stars_catalog, tiny_galaxies_catalog + self, mock_survey, stream_catalog, tiny_galaxies_catalog ): """Background with method='full' and source_type='both' must combine catalogs.""" ... @@ -321,8 +295,6 @@ def test_light_method_custom_storage(self, mock_survey, tmp_path): """Background accepts a user-supplied BackgroundStorage.""" from streamobs.background import Background, BackgroundStorage - storage = BackgroundStorage( - base_path=str(tmp_path), survey_name="lsst", release="yr4" - ) + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") bg = Background(mock_survey, method="light", storage=storage) assert bg.storage is storage From f6632bd5a7f349a0f4e00d6eb419442348c931b7 Mon Sep 17 00:00:00 2001 From: MatthieuPe Date: Mon, 22 Jun 2026 11:45:12 +0200 Subject: [PATCH 8/8] testing the missclassification --- tests/test_surveys.py | 51 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/test_surveys.py b/tests/test_surveys.py index 96760d9..e5b4d2d 100644 --- a/tests/test_surveys.py +++ b/tests/test_surveys.py @@ -441,6 +441,57 @@ def test_completeness_behavior( comp_faint < 0.1 ), f"Completeness should be near 0 for magnitudes well below saturation in band '{completeness_band}'" + + def test_missclassification_behavior( + self, + loaded_survey, + saturation_magnitudes, + bright_magnitudes, + faint_magnitudes, + base_maglim,): + + completeness_band = loaded_survey.completeness_band + sat = loaded_survey.saturation[completeness_band] + + # work only with magnitudes above or below saturation + sat_mag = saturation_magnitudes[saturation_magnitudes < sat] + bright_mag = bright_magnitudes[bright_magnitudes > sat] + faint_mag = faint_magnitudes[faint_magnitudes > sat] + + # Test that missclassification is loaded + # Only gal_misclassification_detection is needed, but if it None it can + # be estimated from gal_misclassification, so we require at least one of them to be present. + assert (loaded_survey.gal_misclassification_detection is not None) or (loaded_survey.gal_misclassification is not None), "Missclassification function is None" + + + + # Verify completeness behavior in each regime + if len(sat_mag) > 0: + comp_sat = loaded_survey.get_efficiency( + completeness_band, sat_mag, base_maglim, type = 'detected_missclassified', + ) + assert np.all( + comp_sat == 0.0 + ), f"Completeness should be 0 for magnitudes below saturation in band '{completeness_band}'" + + + mag_test = np.linspace(18, 28, 100) + comp_test = loaded_survey.get_efficiency( + completeness_band, mag_test, base_maglim, type = 'detected_missclassified', + ) + max_val = np.max(comp_test) + assert max_val < 1.0, f"Missclassification efficiency should be less than 1 for magnitudes in band '{completeness_band}'" + assert max_val > 0.0, f"Missclassification efficiency should be greater than 0 for magnitudes in band '{completeness_band}'" + + if len(faint_mag) > 0: + comp_faint = loaded_survey.get_efficiency( + completeness_band, faint_mag, base_maglim, type = 'detected_missclassified', + ) + assert np.all( + comp_faint < 0.1 + ), f"Completeness should be near 0 for magnitudes well below saturation in band '{completeness_band}'" + + def test_log_photo_error_behavior( self, loaded_survey,