diff --git a/.gitignore b/.gitignore index d7d0c8b..11209b9 100644 --- a/.gitignore +++ b/.gitignore @@ -128,9 +128,15 @@ dmypy.json # Pyre type checker .pyre/ +# Claude Code settings (local only) +.claude/ + # Surveys files data/surveys/* data/others/* + +# Background resources (precomputed CMD grids — built locally, not tracked) +data/background/ .DS_Store # External reference repos (design references only, not part of streamobs) /survey_systematics_in_LSST_streams/ @@ -148,6 +154,9 @@ scripts/roman/script_to_notebook.py build_*_nb.py scripts/**/build_*_nb.py +# local-only development notebooks +notebooks/dev.ipynb + artifacts/ # Staged data archive for upload (built by bin/build_data_archive.py) /archive/ diff --git a/docs/source/background.md b/docs/source/background.md new file mode 100644 index 0000000..200a10b --- /dev/null +++ b/docs/source/background.md @@ -0,0 +1,50 @@ +# Generating background + +`streamobs` provides two complementary ways to generate a background catalog +(stars and/or galaxies) for a stream realization. + +| Method | Speed | Requirements | +|--------|-------|--------------| +| `'light'` | Fast | Precomputed CMD resource files | +| `'full'` | Slow | True background catalogs | + +## Quick start — light method + +```python +from streamobs.surveys import Survey +from streamobs.background import Background + +survey = Survey.load('lsst', release='yr5') +bg = Background(survey, source_type='both', method='light') + +catalog = bg.generate( + phi1_limits=(-20, 20), + phi2_limits=(-2, 2), + gc_frame=frame, # gala GreatCircleICRSFrame +) +``` + +## Stars only or galaxies only + +```python +bg_stars = Background(survey, source_type='stars', method='light') +bg_gals = Background(survey, source_type='galaxies', method='light') +``` + +## Full injection method + +Requires a DataFrame of true background objects for each population: + +```python +bg = Background( + survey, + method='full', + source_type='both', + catalog_stars=df_true_stars, + catalog_galaxies=df_true_galaxies, +) +catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2)) +``` + +See [build_background_resources.md](build_background_resources.md) to learn +how to build or rebuild the resource files used by the light method. diff --git a/docs/source/build_background_resources.md b/docs/source/build_background_resources.md new file mode 100644 index 0000000..5965723 --- /dev/null +++ b/docs/source/build_background_resources.md @@ -0,0 +1,70 @@ +# Building background resources (developer guide) + +The **light background method** reads precomputed color–magnitude diagram +(CMD) histograms from parquet files stored in `data/background/`. These +files are not tracked by git and must be built locally (or provided by the +package maintainers). + +## What the resources are + +For each combination of ``(source_type, bands)``, one parquet file is +stored with a long-format table: + +``` +maglim_r | maglim_g | color_center | mag_center | count | n_ref | area_ref_deg2 +``` + +Each row represents one cell of a 2-D CMD histogram at a specific magnitude +limit pair. The generator looks up the nearest ``(maglim_r, maglim_g)`` +entry at runtime and samples from it. + +## How to build resources + +### 1. Prepare input catalogs + +You need a DataFrame of **true** (pre-observation) positions and magnitudes +for stars and/or galaxies. These catalogs are not part of `streamobs` and +must be supplied by the developer. + +```python +import pandas as pd +df_stars = pd.read_parquet('/path/to/true_stars.parquet') +df_galaxies = pd.read_parquet('/path/to/true_galaxies.parquet') +``` + +### 2. Run the builder + +```python +from streamobs.background import BackgroundResourceBuilder, BackgroundStorage + +builder = BackgroundResourceBuilder(survey_name='lsst', release='yr5') +builder.build( + catalog_stars=df_stars, + catalog_galaxies=df_galaxies, + bands=('g', 'r'), + maglim_ref_values=[25.0, 25.5, 26.0, 26.5, 27.0], + delta_range=(-1.0, 1.0), + delta_step=0.1, + source_type='both', +) +``` + +### 3. Save to disk + +```python +storage = BackgroundStorage(survey_name='lsst', release='yr5') +builder.save(storage) +``` + +Files are written to `data/background/lsst_yr5/` — one parquet per +``(source_type, bands)`` combination, e.g. `stars_gr.parquet`. + +## File naming convention + +`BackgroundStorage.get_path(source_type, bands)` returns the full path: + +``` +data/background/{survey_name}_{release}/{source_type}_{bands_str}.parquet +``` + +Example: `data/background/lsst_yr5/galaxies_gr.parquet`. diff --git a/docs/source/index.rst b/docs/source/index.rst index 6241600..774a7ce 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -103,10 +103,17 @@ Documentation Contents surveys/Roman +.. toctree:: + :maxdepth: 1 + :caption: Background generation + + background + build_background_resources + .. toctree:: :maxdepth: 2 :caption: For developers - + modifying_streamobs new_survey update_data diff --git a/pytest.ini b/pytest.ini index e3a2752..303c1aa 100644 --- a/pytest.ini +++ b/pytest.ini @@ -27,6 +27,7 @@ markers = observed: marks tests for injection utilities model: marks tests for model functionality utils: marks tests for utility functions + background: marks tests for the background subpackage # Minimum Python version minversion = 3.8 \ No newline at end of file diff --git a/streamobs/__init__.py b/streamobs/__init__.py index fac239d..0421dae 100644 --- a/streamobs/__init__.py +++ b/streamobs/__init__.py @@ -1,2 +1,10 @@ #!/usr/bin/env python """Nothing to see here.""" + +from .background import ( + Background, + BackgroundCatalogInjector, + BackgroundResourceBuilder, + BackgroundStorage, + LightBackgroundGenerator, +) diff --git a/streamobs/background/__init__.py b/streamobs/background/__init__.py new file mode 100644 index 0000000..2cbf63f --- /dev/null +++ b/streamobs/background/__init__.py @@ -0,0 +1,39 @@ +""" +Background generation for stellar stream analysis. + +This subpackage provides two complementary pipelines: + +- **Full injection** (:class:`BackgroundCatalogInjector`): runs a known + catalog of true stars or galaxies through the complete + :class:`~streamobs.observed.StreamInjector` pipeline. +- **Light generation** (:class:`LightBackgroundGenerator`): samples from + precomputed binned color–magnitude distributions stored by + :class:`BackgroundStorage`, giving fast per-pixel background realizations + without re-running the full injection. + +The top-level :class:`Background` class wraps both modes and lets users +generate stars-only, galaxies-only, or combined backgrounds. + +Typical usage — light method with default bundled resources:: + + from streamobs.surveys import Survey + from streamobs.background import Background + + survey = Survey.load('lsst', release='yr5') + bg = Background(survey, source_type='both', method='light') + catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2), gc_frame=frame) +""" + +from .background import Background +from .catalog_injector import BackgroundCatalogInjector +from .generator import LightBackgroundGenerator +from .resource_builder import BackgroundResourceBuilder +from .storage import BackgroundStorage + +__all__ = [ + "Background", + "BackgroundCatalogInjector", + "BackgroundResourceBuilder", + "BackgroundStorage", + "LightBackgroundGenerator", +] diff --git a/streamobs/background/background.py b/streamobs/background/background.py new file mode 100644 index 0000000..359e682 --- /dev/null +++ b/streamobs/background/background.py @@ -0,0 +1,193 @@ +""" +Top-level background generation wrapper. +""" + +import os + +from ..surveys import Survey +from .catalog_injector import BackgroundCatalogInjector +from .generator import LightBackgroundGenerator +from .storage import BackgroundStorage + + +class Background: + """ + High-level wrapper for background generation. + + Dispatches to either the full injection method + (:class:`BackgroundCatalogInjector`) or the fast light method + (:class:`LightBackgroundGenerator`) depending on ``method``. Stars and + galaxies are treated independently so users can build custom combinations. + + When ``method='light'`` and ``storage=None``, the class falls back to the + bundled package-level resource files (if present). + + Parameters + ---------- + survey : Survey + Survey instance defining the observation conditions. + source_type : str, optional + Which background components to generate: ``'stars'``, ``'galaxies'``, + or ``'both'``. Default ``'both'``. + method : str, optional + Generation method: ``'light'`` (fast, uses precomputed CMD grids) or + ``'full'`` (runs the complete injection pipeline). Default ``'light'``. + storage : BackgroundStorage, optional + Resource storage for the light method. When ``None``, package-bundled + resources are used (via :meth:`_default_storage`). + catalog_stars : pd.DataFrame, optional + True stellar catalog. Required when ``method='full'`` and + ``source_type`` includes stars. + catalog_galaxies : pd.DataFrame, optional + True galaxy catalog. Required when ``method='full'`` and + ``source_type`` includes galaxies. + **kwargs + Forwarded to the underlying injector or generator. + + Examples + -------- + Light method with bundled defaults:: + + bg = Background(survey, source_type='both', method='light') + catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2), gc_frame=frame) + + Full injection with user-supplied catalogs:: + + bg = Background( + survey, + method='full', + source_type='stars', + catalog_stars=df_stars, + ) + catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2)) + """ + + def __init__( + self, + survey: Survey, + source_type: str = "both", + method: str = "light", + storage: BackgroundStorage = None, + catalog_stars=None, + catalog_galaxies=None, + **kwargs, + ): + if source_type not in ("stars", "galaxies", "both"): + raise ValueError( + f"source_type must be 'stars', 'galaxies', or 'both', got '{source_type}'." + ) + if method not in ("light", "full"): + raise ValueError(f"method must be 'light' or 'full', got '{method}'.") + + self.survey = survey + self.source_type = source_type + self.method = method + self.catalog_stars = catalog_stars + self.catalog_galaxies = catalog_galaxies + self._kwargs = kwargs + + if storage is None and method == "light": + storage = self._default_storage(survey) + self.storage = storage + + def generate( + self, + phi1_limits, + phi2_limits, + gc_frame=None, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate a background catalog for the given stream sky region. + + Parameters + ---------- + phi1_limits : tuple of float + ``(phi1_min, phi1_max)`` in degrees. + phi2_limits : tuple of float + ``(phi2_min, phi2_max)`` in degrees. + gc_frame : gala.coordinates.GreatCircleICRSFrame, optional + Great-circle frame. Required for the light method and when the + full method needs coordinate conversion. + **kwargs + Forwarded to the underlying generator or injector. + + Returns + ------- + pd.DataFrame + Background catalog. Column names follow the survey namespace + convention for the full method; for the light method columns are + ``phi1``, ``phi2``, ``color``, ``mag``, ``source_type``. + """ + if self.method == "full": + return self._generate_full(phi1_limits, phi2_limits, gc_frame, **kwargs) + return self._generate_light(phi1_limits, phi2_limits, gc_frame, **kwargs) + + def _generate_full( + self, + phi1_limits, + phi2_limits, + gc_frame, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate background via full catalog injection. + + For each active source type (controlled by :attr:`source_type`), uses + :class:`BackgroundCatalogInjector` to inject the corresponding true + catalog into the survey. The results are concatenated. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame or None + **kwargs + + Returns + ------- + pd.DataFrame + """ + ... + + def _generate_light( + self, + phi1_limits, + phi2_limits, + gc_frame, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate background via the fast light method. + + Delegates to :class:`LightBackgroundGenerator` using + :attr:`storage`. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame + **kwargs + + Returns + ------- + pd.DataFrame + """ + ... + + @staticmethod + def _default_storage(survey: Survey) -> BackgroundStorage: + """ + Return a :class:`BackgroundStorage` pointing to bundled package resources. + + Parameters + ---------- + survey : Survey + + Returns + ------- + BackgroundStorage + """ + return BackgroundStorage( + survey_name=survey.name, + release=survey.release, + ) diff --git a/streamobs/background/catalog_injector.py b/streamobs/background/catalog_injector.py new file mode 100644 index 0000000..aa0c031 --- /dev/null +++ b/streamobs/background/catalog_injector.py @@ -0,0 +1,86 @@ +""" +Full-injection background pipeline using known catalogs. +""" + +from ..observed import StreamInjector +from ..surveys import Survey +from ..utils import load_catalog + + +class BackgroundCatalogInjector: + """ + Thin wrapper around :class:`~streamobs.observed.StreamInjector` for + background catalog injection. + + Exposes :meth:`inject_stars` and :meth:`inject_galaxies` so the caller + does not need to pass ``source_type`` explicitly. All injection logic + lives in :class:`~streamobs.observed.StreamInjector`. + + Parameters + ---------- + survey : Survey + Survey instance to inject into. + **kwargs + Forwarded to :class:`~streamobs.observed.StreamInjector`. + + Examples + -------- + >>> injector = BackgroundCatalogInjector(survey) + >>> obs_stars = injector.inject_stars(catalog_df, bands=['g', 'r']) + >>> obs_gals = injector.inject_galaxies(catalog_df, bands=['g', 'r']) + """ + + def __init__(self, survey: Survey, **kwargs): + self._survey = survey + self.streaminjector = StreamInjector(self._survey, **kwargs) + + def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": + """ + Inject a catalog of stars through the full survey pipeline. + + Parameters + ---------- + catalog : pd.DataFrame or str + True stellar catalog. Accepts a DataFrame or a path to parquet/CSV. + bands : list of str, optional + Photometric bands to inject. Defaults to ``['r', 'g']``. + **kwargs + Forwarded to :meth:`~streamobs.observed.StreamInjector.inject`. + + Returns + ------- + pd.DataFrame + Observed catalog with survey-namespaced magnitude and flag columns. + """ + catalog = load_catalog(catalog) + return self.streaminjector.inject( + catalog, bands=bands, source_type="stars", **kwargs + ) + + def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame": + """ + Inject a catalog of galaxies through the full survey pipeline. + + Uses :meth:`~streamobs.surveys.Survey.get_gal_misclassification` for + the detection flag (probability a galaxy passes stellar selection). + Requires :attr:`~streamobs.surveys.Survey.gal_misclassification` to be + loaded on the survey. + + Parameters + ---------- + catalog : pd.DataFrame or str + True galaxy catalog. Accepts a DataFrame or a path to parquet/CSV. + bands : list of str, optional + Photometric bands to inject. Defaults to ``['r', 'g']``. + **kwargs + Forwarded to :meth:`~streamobs.observed.StreamInjector.inject`. + + Returns + ------- + pd.DataFrame + Observed catalog with survey-namespaced magnitude and flag columns. + """ + catalog = load_catalog(catalog) + return self.streaminjector.inject( + catalog, bands=bands, source_type="galaxies", **kwargs + ) diff --git a/streamobs/background/generator.py b/streamobs/background/generator.py new file mode 100644 index 0000000..f06bd05 --- /dev/null +++ b/streamobs/background/generator.py @@ -0,0 +1,278 @@ +""" +Fast per-pixel background generation from precomputed CMD grids. +""" + +import numpy as np + +from ..surveys import Survey +from .storage import BackgroundStorage + + +class LightBackgroundGenerator: + """ + Generate background catalogs rapidly from precomputed CMD histogram grids. + + For each HEALPix pixel in the requested sky region the generator: + + 1. Retrieves the local magnitude limits from the survey's HEALPix maps. + 2. Selects the nearest precomputed CMD histogram in ``(maglim_ref, delta)`` + space. + 3. Scales the reference object count to the pixel area via a Poisson draw. + 4. Samples ``(color, mag)`` pairs from the 2-D histogram. + 5. Samples sky positions uniformly within the pixel. + + Stars and galaxies are handled independently so that users can build + custom mixed background models. + + Parameters + ---------- + storage : BackgroundStorage + Storage backend holding the precomputed CMD grids. + survey : Survey + Survey with real (non-uniform) HEALPix magnitude limit maps used to + look up per-pixel depth. + bands : tuple of str, optional + ``(band1, band2)`` — must match the bands used when building resources. + Default ``('g', 'r')``. + **kwargs + Reserved for future use. + + Examples + -------- + >>> gen = LightBackgroundGenerator(storage, survey, bands=('g', 'r')) + >>> catalog = gen.generate( + ... phi1_limits=(-20, 20), + ... phi2_limits=(-2, 2), + ... gc_frame=frame, + ... nside=4096, + ... source_type='both', + ... ) + """ + + def __init__( + self, + storage: BackgroundStorage, + survey: Survey, + bands=("g", "r"), + **kwargs, + ): + self.storage = storage + self.survey = survey + self.bands = bands + # Lazy cache: {source_type: grid_dict} + self._resources: dict = {} + + def generate( + self, + phi1_limits, + phi2_limits, + gc_frame, + nside=4096, + source_type="both", + **kwargs, + ) -> "pd.DataFrame": + """ + Generate a background catalog for the given sky region. + + Parameters + ---------- + phi1_limits : tuple of float + ``(phi1_min, phi1_max)`` in degrees. + phi2_limits : tuple of float + ``(phi2_min, phi2_max)`` in degrees. + gc_frame : gala.coordinates.GreatCircleICRSFrame + Great-circle frame mapping ``(phi1, phi2)`` to ``(RA, Dec)``. + nside : int, optional + HEALPix resolution. Default ``4096``. + source_type : str, optional + ``'stars'``, ``'galaxies'``, or ``'both'``. Default ``'both'``. + **kwargs + rng : numpy.random.Generator, optional + seed : int, optional + + Returns + ------- + pd.DataFrame + Background catalog with columns ``phi1``, ``phi2``, ``color``, + ``mag`` (reference band), and ``source_type``. + """ + ... + + def _generate_one_type( + self, + phi1_limits, + phi2_limits, + gc_frame, + source_type: str, + nside: int, + **kwargs, + ) -> "pd.DataFrame": + """ + Generate objects of a single source type pixel by pixel. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame + source_type : str + ``'stars'`` or ``'galaxies'``. + nside : int + **kwargs + + Returns + ------- + pd.DataFrame + """ + ... + + def _get_footprint_pixels( + self, + phi1_limits, + phi2_limits, + gc_frame, + nside: int, + ) -> np.ndarray: + """ + Return HEALPix pixel indices that cover the given ``(phi1, phi2)`` box. + + Parameters + ---------- + phi1_limits, phi2_limits : tuple of float + gc_frame : gala.coordinates.GreatCircleICRSFrame + nside : int + + Returns + ------- + np.ndarray of int + Pixel indices. + """ + ... + + def _get_maglim_pixels( + self, + pixels: np.ndarray, + band: str, + nside: int, + ) -> np.ndarray: + """ + Retrieve magnitude limits for a set of pixels from the survey map. + + Parameters + ---------- + pixels : np.ndarray of int + band : str + nside : int + Resolution of ``pixels``; the survey map may have a different nside + and will be re-pixelized if needed. + + Returns + ------- + np.ndarray of float + """ + ... + + def _select_cmd_distribution( + self, + maglim_ref: float, + delta: float, + source_type: str, + ) -> dict: + """ + Nearest-neighbour lookup in the precomputed CMD grid. + + Parameters + ---------- + maglim_ref : float + Reference band magnitude limit for this pixel. + delta : float + ``maglim_band1 - maglim_ref`` for this pixel. + source_type : str + ``'stars'`` or ``'galaxies'``. + + Returns + ------- + dict + ``{'cmd_hist', 'color_edges', 'mag_edges', 'n_ref', + 'area_ref_deg2'}`` for the nearest grid point. + """ + ... + + def _scale_n_objects( + self, + n_ref: int, + area_ref_deg2: float, + pixel_area_deg2: float, + ) -> int: + """ + Draw the expected number of objects for a pixel via Poisson scaling. + + Parameters + ---------- + n_ref : int + Number of objects in the reference simulation. + area_ref_deg2 : float + Sky area of the reference simulation in deg². + pixel_area_deg2 : float + Sky area of the target pixel in deg². + + Returns + ------- + int + Poisson draw around ``n_ref * pixel_area_deg2 / area_ref_deg2``. + """ + ... + + def _sample_from_cmd( + self, + cmd_hist: np.ndarray, + color_edges: np.ndarray, + mag_edges: np.ndarray, + n_objects: int, + rng: np.random.Generator, + ) -> "pd.DataFrame": + """ + Draw ``n_objects`` (color, mag) pairs from the 2-D CMD histogram. + + Parameters + ---------- + cmd_hist : np.ndarray + 2-D array of counts with shape ``(n_color, n_mag)``. + color_edges : np.ndarray + mag_edges : np.ndarray + n_objects : int + rng : numpy.random.Generator + + Returns + ------- + pd.DataFrame + Columns ``color`` and ``mag``. + """ + ... + + def _sample_positions( + self, + n_objects: int, + pixel: int, + nside: int, + gc_frame, + rng: np.random.Generator, + ) -> "pd.DataFrame": + """ + Sample ``(phi1, phi2)`` positions uniformly within a HEALPix pixel. + + Parameters + ---------- + n_objects : int + pixel : int + HEALPix pixel index. + nside : int + gc_frame : gala.coordinates.GreatCircleICRSFrame + Used to convert ``(RA, Dec)`` corners back to ``(phi1, phi2)``. + rng : numpy.random.Generator + + Returns + ------- + pd.DataFrame + Columns ``phi1`` and ``phi2``. + """ + ... diff --git a/streamobs/background/resource_builder.py b/streamobs/background/resource_builder.py new file mode 100644 index 0000000..df02717 --- /dev/null +++ b/streamobs/background/resource_builder.py @@ -0,0 +1,257 @@ +""" +Builder for precomputed background color–magnitude diagram (CMD) resources. +""" + +import numpy as np + +from ..surveys import Survey, SurveyFactory +from ..utils import load_catalog +from .catalog_injector import BackgroundCatalogInjector +from .storage import BackgroundStorage + + +class BackgroundResourceBuilder: + """ + Build precomputed CMD histogram grids for fast background generation. + + Drives :class:`BackgroundCatalogInjector` on uniform surveys (no dust, + constant magnitude limits) at a grid of ``(maglim_ref, delta)`` pairs, + where ``delta = maglim_g - maglim_ref``. The resulting 2-D color–magnitude + histograms are stored via :class:`BackgroundStorage` and later consumed by + :class:`~streamobs.background.generator.LightBackgroundGenerator`. + + Parameters + ---------- + survey_name : str, optional + Survey identifier (e.g. ``'lsst'``). + release : str, optional + Survey release string (e.g. ``'yr5'``). + **kwargs + Forwarded to :meth:`~streamobs.surveys.SurveyFactory.create_survey`. + + Examples + -------- + >>> builder = BackgroundResourceBuilder('lsst', release='yr5') + >>> builder.build( + ... catalog_stars=df_stars, + ... catalog_galaxies=df_gals, + ... bands=('g', 'r'), + ... maglim_ref_values=[25.5, 26.0, 26.5], + ... delta_range=(-1.0, 1.0), + ... delta_step=0.1, + ... ) + >>> storage = BackgroundStorage(survey_name='lsst', release='yr5') + >>> builder.save(storage) + """ + + def __init__(self, survey_name="lsst", release=None, **kwargs): + self.survey_name = survey_name + self.release = release + self._kwargs = kwargs + # Nested dict: {source_type: {(maglim_ref, delta): config_dict}} + self.resources: dict = {} + + def build( + self, + catalog_stars=None, + catalog_galaxies=None, + bands=("g", "r"), + maglim_ref_values=None, + delta_range=(-1.0, 1.0), + delta_step=0.1, + n_bins_color=50, + n_bins_mag=50, + source_type="both", + **kwargs, + ): + """ + Build CMD histograms for all ``(maglim_ref, delta)`` configurations. + + For each combination of source type and magnitude limit pair, injects + the catalog into a uniform (no-dust, constant-maglim) survey and + computes a 2-D histogram of ``(color, mag_ref_band)``. + + Parameters + ---------- + catalog_stars : pd.DataFrame or str, optional + True stellar catalog. Required when ``source_type`` is + ``'stars'`` or ``'both'``. + catalog_galaxies : pd.DataFrame or str, optional + True galaxy catalog. Required when ``source_type`` is + ``'galaxies'`` or ``'both'``. + bands : tuple of str, optional + Two band names ``(band1, band2)`` where color = band1 - band2 and + ``band2`` is the reference magnitude axis. Default ``('g', 'r')``. + maglim_ref_values : list of float, optional + Reference magnitude limits to sweep (for the second band in + ``bands``). + delta_range : tuple of float, optional + ``(min, max)`` of ``maglim_band1 - maglim_ref``. + Default ``(-1.0, 1.0)``. + delta_step : float, optional + Step size between delta values. Default ``0.1``. + n_bins_color : int, optional + Number of color histogram bins. Default ``50``. + n_bins_mag : int, optional + Number of magnitude histogram bins. Default ``50``. + source_type : str, optional + ``'stars'``, ``'galaxies'``, or ``'both'``. Default ``'both'``. + **kwargs + Forwarded to + :meth:`BackgroundCatalogInjector.inject_stars` / + :meth:`BackgroundCatalogInjector.inject_galaxies`. + """ + ... + + def _build_one_config( + self, + catalog, + source_type, + bands, + maglim_ref, + delta, + n_bins_color, + n_bins_mag, + **kwargs, + ) -> dict: + """ + Build the CMD histogram for a single ``(maglim_ref, delta)`` pair. + + Creates a uniform survey, injects the catalog, and histograms the + result. + + Parameters + ---------- + catalog : pd.DataFrame + True background catalog (stars or galaxies). + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + ``(band1, band2)`` — color = band1_obs - band2_obs. + maglim_ref : float + Magnitude limit for the reference band (``band2``). + delta : float + ``maglim_band1 - maglim_ref``. + n_bins_color : int + Number of color bins. + n_bins_mag : int + Number of magnitude bins. + **kwargs + Forwarded to the injector. + + Returns + ------- + dict + ``{'cmd_hist': np.ndarray, 'color_edges': np.ndarray, + 'mag_edges': np.ndarray, 'n_ref': int, + 'area_ref_deg2': float}`` + """ + ... + + def _prepare_survey( + self, + survey: Survey, + no_dust: bool = False, + uniform_maglim: dict = None, + **kwargs, + ) -> Survey: + """ + Return a modified copy of the survey for background injection. + + Creates a deep copy so that modifications do not affect the original. + Handles both the full-injection case (no-dust only) and the resource- + building case (uniform magnitude limits + no dust). + + Parameters + ---------- + survey : Survey + Survey to copy and modify. + no_dust : bool, optional + If True, zero the EBV map. Default is False. + uniform_maglim : dict, optional + Mapping ``{band: value}`` to replace per-pixel magnitude limit maps + with constant arrays. E.g. ``{'g': 26.0, 'r': 26.5}``. + When provided, ``no_dust`` is implicitly set to True since uniform + surveys are always dust-free by construction. + **kwargs + Reserved for future use. + + Returns + ------- + Survey + Modified copy of the survey. + """ + ... + + def _compute_cmd_histogram( + self, + observed_df, + bands: tuple, + n_bins_color: int, + n_bins_mag: int, + ) -> dict: + """ + Build a 2-D color–magnitude histogram from an observed catalog. + + Color is defined as ``mag_band1_obs - mag_band2_obs`` and the + magnitude axis is ``mag_band2_obs``. + + Parameters + ---------- + observed_df : pd.DataFrame + Output of the catalog injector (observed magnitudes). + bands : tuple of str + ``(band1, band2)``. + n_bins_color : int + Number of color bins. + n_bins_mag : int + Number of magnitude bins. + + Returns + ------- + dict + ``{'cmd_hist': np.ndarray (n_bins_color × n_bins_mag), + 'color_edges': np.ndarray, 'mag_edges': np.ndarray}`` + """ + ... + + def save(self, storage: BackgroundStorage, source_type="both", **kwargs): + """ + Persist the built resources to disk via ``storage``. + + Parameters + ---------- + storage : BackgroundStorage + Storage backend to use. + source_type : str, optional + Which source types to save: ``'stars'``, ``'galaxies'``, or + ``'both'``. Default ``'both'``. + **kwargs + Forwarded to :meth:`BackgroundStorage.save_data`. + """ + ... + + @classmethod + def load( + cls, + storage: BackgroundStorage, + source_type="both", + **kwargs, + ) -> "BackgroundResourceBuilder": + """ + Load precomputed resources from disk into a new builder instance. + + Parameters + ---------- + storage : BackgroundStorage + Storage backend to read from. + source_type : str, optional + Which source types to load: ``'stars'``, ``'galaxies'``, or + ``'both'``. Default ``'both'``. + + Returns + ------- + BackgroundResourceBuilder + Populated instance with :attr:`resources` filled. + """ + ... diff --git a/streamobs/background/storage.py b/streamobs/background/storage.py new file mode 100644 index 0000000..782732c --- /dev/null +++ b/streamobs/background/storage.py @@ -0,0 +1,139 @@ +""" +Persistence layer for precomputed background CMD histogram grids. +""" + +import os + + +class BackgroundStorage: + """ + Save and load precomputed color–magnitude diagram (CMD) histogram grids. + + One compressed parquet file is stored per ``(source_type, bands)`` + combination, e.g. ``stars_gr.parquet``. All files live under + ``data/background/`` (which is excluded from version control). + + **File format** — long-format parquet with one row per + ``(maglim_r, maglim_g, color_center, mag_center)`` cell:: + + maglim_r | maglim_g | color_center | mag_center | count | n_ref | area_ref_deg2 + + Parquet's columnar compression handles repeated ``color_center`` and + ``mag_center`` values efficiently. + + Parameters + ---------- + base_path : str, optional + Root directory for resource files. Defaults to + ``{package_root}/data/background/``. + survey_name : str, optional + Survey identifier (e.g. ``'lsst'``). Used in the file path. + + Examples + -------- + >>> storage = BackgroundStorage(survey_name='lsst') + >>> path = storage.get_path('stars', ('g', 'r')) + >>> # -> .../data/background/lsst/stars_gr.parquet + """ + + def __init__(self, base_path=None, survey_name="lsst", **kwargs): + if base_path is None: + _pkg_root = os.path.join(os.path.dirname(__file__), "..", "..", "data") + base_path = os.path.join(_pkg_root, "background") + self.base_path = base_path + self.survey_name = survey_name + + def get_path(self, source_type: str, bands: tuple) -> str: + """ + Build the file path for a given ``(source_type, bands)`` combination. + + Parameters + ---------- + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names in order, e.g. ``('g', 'r')``. + + Returns + ------- + str + Absolute path to the parquet file, e.g. + ``{base_path}/lsst/stars_gr.parquet``. + """ + bands_str = "".join(bands) + filename = f"{source_type}_{bands_str}.parquet" + return os.path.join(self.base_path, self.survey_name, filename) + + def save_data( + self, + data: dict, + source_type: str, + bands: tuple, + **kwargs, + ): + """ + Persist the CMD histogram grid to a compressed parquet file. + + The ``data`` dict is keyed by ``(maglim_r, maglim_g)`` and each value + is a dict with keys ``cmd_hist``, ``color_edges``, ``mag_edges``, + ``n_ref``, and ``area_ref_deg2`` (as returned by + :meth:`~streamobs.background.resource_builder.BackgroundResourceBuilder._build_one_config`). + The grid is flattened to long-format before writing. + + Parameters + ---------- + data : dict + CMD histogram grid keyed by ``(maglim_r, maglim_g)``. + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names, e.g. ``('g', 'r')``. + **kwargs + compression : str, optional + Parquet compression codec. Default is ``'zstd'``. + """ + ... + + def load_data( + self, + source_type: str, + bands: tuple, + **kwargs, + ) -> dict: + """ + Load a CMD histogram grid from the parquet file for this combination. + + Reads the file returned by :meth:`get_path` and reconstructs the + nested dict keyed by ``(maglim_r, maglim_g)``. + + Parameters + ---------- + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names, e.g. ``('g', 'r')``. + + Returns + ------- + dict + CMD grid keyed by ``(maglim_r, maglim_g)`` → ``{'cmd_hist', + 'color_edges', 'mag_edges', 'n_ref', 'area_ref_deg2'}``. + """ + ... + + def exists(self, source_type: str, bands: tuple) -> bool: + """ + Check whether the resource file for this combination exists on disk. + + Parameters + ---------- + source_type : str + ``'stars'`` or ``'galaxies'``. + bands : tuple of str + Band names, e.g. ``('g', 'r')``. + + Returns + ------- + bool + """ + return os.path.exists(self.get_path(source_type, bands)) diff --git a/streamobs/observed.py b/streamobs/observed.py index 4e2ce34..1f6c244 100644 --- a/streamobs/observed.py +++ b/streamobs/observed.py @@ -244,6 +244,12 @@ def inject(self, data, bands=None, stream_config=None, **kwargs): perfect_galstarsep : bool, optional If True, also computes a flag assuming perfect star/galaxy separation (detection efficiency only, no classification losses). Default is False. + Only applies when ``source_type='stars'``. + source_type : str, optional + Type of source being injected. Either ``'stars'`` (default) or + ``'galaxies'``. When ``'galaxies'``, the detection flag uses + :meth:`~streamobs.surveys.Survey.get_gal_misclassification` + instead of the star completeness function. verbose : bool, optional Whether to print progress information. Default is True. @@ -362,7 +368,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) Seed used to build an RNG when ``rng`` is None. **kwargs ``nside``, ``detection_mag_cut``, ``dust_correction``, - ``perfect_galstarsep``, ``verbose`` (see :meth:`inject`). + ``perfect_galstarsep``, ``source_type``, ``verbose`` (see :meth:`inject`). Returns ------- @@ -461,6 +467,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) # Compute detection flag for completeness-band (reference band) if band == survey.completeness_band: + source_type = kwargs.get("source_type", "stars") flag_completeness_band = self.detect_flag( pix_maglim, survey=survey, @@ -471,7 +478,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs) perfect_galstarsep=False, **kwargs, ) - if perfect_galstarsep: + if perfect_galstarsep and source_type == "stars": flag_detection_only_band = self.detect_flag( pix_maglim, survey=survey, @@ -1166,10 +1173,14 @@ def sample_measured_magnitudes(self, mag_true, mag_err, **kwargs): def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): """ - Apply the survey selection to determine detection flags for stars. + Apply the survey selection to determine detection flags. - This method uses the survey completeness function and random sampling - to determine which stars would be detected by the survey. + For stars (``source_type='stars'``, the default), uses the survey's + combined completeness (detection × classification) or detection-only + efficiency when ``perfect_galstarsep=True``. For galaxies + (``source_type='galaxies'``), uses + :meth:`~streamobs.surveys.Survey.get_gal_misclassification_detection` and + ignores ``perfect_galstarsep``. Parameters ---------- @@ -1180,8 +1191,7 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): band : str, optional Band to consider for detection. Default is 'r'. survey : Survey - Survey whose completeness/detection-efficiency curves to use - (required). + Survey whose efficiency curves to use (required). **kwargs Additional keyword arguments: @@ -1189,13 +1199,17 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): Random number generator instance. seed : int, optional Random seed if rng is not provided. + source_type : str, optional + ``'stars'`` (default) or ``'galaxies'``. perfect_galstarsep : bool, optional - If True, assumes perfect star/galaxy separation. Default is False. + If True and ``source_type='stars'``, uses detection-only + efficiency (no classification losses). Ignored for galaxies. + Default is False. Returns ------- np.ndarray - Boolean array: True for detected stars, False otherwise. + Boolean array: True for detected objects, False otherwise. Raises ------ @@ -1208,16 +1222,18 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs): seed = kwargs.pop("seed", None) rng = np.random.default_rng(seed) - # Select the appropriate magnitude and map depending on the band maglim = survey.get_maglim(band, pixel=pix) - perfect_galstarsep = kwargs.get("perfect_galstarsep", False) - if perfect_galstarsep: - compl = survey.get_detection_efficiency(band, mag, maglim) + source_type = kwargs.get("source_type", "stars") + if source_type == "galaxies": + compl = survey.get_gal_misclassification_detection(band, mag, maglim) else: - compl = survey.get_completeness(band, mag, maglim) + perfect_galstarsep = kwargs.get("perfect_galstarsep", False) + if perfect_galstarsep: + compl = survey.get_detection_efficiency(band, mag, maglim) + else: + compl = survey.get_completeness(band, mag, maglim) - # Set the threshold using completeness threshold = rng.uniform(size=len(mag)) <= compl return threshold diff --git a/streamobs/surveys.py b/streamobs/surveys.py index be1c704..ca89f4d 100644 --- a/streamobs/surveys.py +++ b/streamobs/surveys.py @@ -120,6 +120,8 @@ class Survey: delta_saturation: Optional[float] = None log_photo_error_catalog: Optional[Callable] = None log_photo_error_sample: Optional[Callable] = None + gal_misclassification: Optional[Callable] = None + gal_misclassification_detection: Optional[Callable] = None # Band-independent maps ebv_map: Optional[np.ndarray] = None @@ -527,39 +529,147 @@ def get_efficiency( get_detection_efficiency : Convenience wrapper for detection-only. get_classification_efficiency : Convenience wrapper for classification-only. """ - if type == "completeness": + delta_saturation = kwargs.get("delta_saturation", self.delta_saturation) + delta_mag = magnitude - maglim + + if type in ("missclassified", "detected_missclassified"): + # Galaxy-related efficiencies: no 1-padding at the bright end. + if type == "missclassified": + func = self.gal_misclassification + if func is None: + raise ValueError( + "Efficiency function for type 'missclassified' not loaded." + ) + else: # detected_missclassified + func = getattr(self, "gal_misclassification_detection", None) + if func is None: + # Fallback: missclassification × detection efficiency. + mis = self.gal_misclassification + det = getattr(self, "efficiency_detection", None) + if mis is None or det is None: + raise ValueError( + "Efficiency function for type 'detected_missclassified' not " + "loaded and cannot be derived (need both 'missclassified' and " + "'detection_efficiency')." + ) + func = lambda dm: mis(dm) * det(dm) # noqa: E731 + compl = func(delta_mag) + compl = np.where(magnitude < self.saturation[band], 0.0, compl) + compl = np.where( + (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl + ) + return compl + + if type in ("completeness", "classification_detection"): func = self.completeness + if func is None: + # Fallback: compute classification × detection on the fly. + det = getattr(self, "efficiency_detection", None) + cls_ = getattr(self, "efficiency_classification", None) + if det is None or cls_ is None: + raise ValueError( + "Efficiency function for type 'completeness' not loaded and " + "cannot be derived (need both detection and classification)." + ) + func = lambda dm: det(dm) * cls_(dm) # noqa: E731 elif type == "detection_efficiency": - func = self.efficiency_detection + func = getattr(self, "efficiency_detection", None) elif type == "classification_efficiency": - func = self.efficiency_classification + func = getattr(self, "efficiency_classification", None) else: - raise ValueError(f"Efficiency type '{type}' not recognized.") + raise ValueError( + f"Efficiency type '{type}' not recognized. Valid types: " + "'completeness', 'classification_detection', 'detection_efficiency', " + "'classification_efficiency', 'missclassified', 'detected_missclassified'." + ) if func is None: - raise ValueError(f"Efficiency function for type '{type}' not loaded") - - delta_saturation = kwargs.get("delta_saturation", self.delta_saturation) - - # Calculate delta_mag - delta_mag = magnitude - maglim + raise ValueError(f"Efficiency function for type '{type}' not loaded.") - # Apply saturation condition: 1 padding for objects fainter than saturation but equivalent mag brighter than saturation0 + # 1-padding: bright sources (well above saturation) are always detected. compl = np.where( (magnitude > self.saturation[band]) & (delta_mag <= delta_saturation), 1.0, func(delta_mag), - ) # 1 padded - - compl = np.where( - magnitude < self.saturation[band], 0.0, compl - ) # saturation at the bright end - + ) + compl = np.where(magnitude < self.saturation[band], 0.0, compl) compl = np.where( (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl - ) # not observed if the area is not covered - + ) return compl + def get_gal_misclassification( + self, band: str, magnitude: float, maglim: float, **kwargs + ) -> float: + """ + Get galaxy misclassification efficiency (probability a galaxy passes stellar selection). + + Mirrors :meth:`get_efficiency` for the ``"completeness"`` type, but + **without 1-padding at the bright end**: bright galaxies are not forced + to efficiency = 1 when their delta_mag falls below the saturation + threshold. This reproduces the behaviour of ``custom_get_completeness`` + used for galaxies in external background-generation scripts. + + Parameters + ---------- + band : str + Band identifier (e.g., 'g', 'r'). + magnitude : float or np.ndarray + True apparent magnitude(s) including extinction. + maglim : float or np.ndarray + Magnitude limit(s) at the source position(s). + **kwargs + delta_saturation : float, optional + Override the survey's default saturation threshold. + + Returns + ------- + float or np.ndarray + Misclassification probability in [0, 1]. + + Raises + ------ + ValueError + If :attr:`gal_misclassification` has not been loaded. + """ + return self.get_efficiency(band, magnitude, maglim, type="missclassified", **kwargs) + + def get_gal_misclassification_detection( + self, band: str, magnitude: float, maglim: float, **kwargs + ) -> float: + """ + Get galaxy misclassification × detection efficiency. + + Mirrors :meth:`get_efficiency` for the ``"detected_missclassified"`` type, + but **without 1-padding at the bright end**: bright galaxies are not forced + to efficiency = 1 when their delta_mag falls below the saturation + threshold. This reproduces the behaviour of ``custom_get_completeness`` + used for galaxies in external background-generation scripts. + + Parameters + ---------- + band : str + Band identifier (e.g., 'g', 'r'). + magnitude : float or np.ndarray + True apparent magnitude(s) including extinction. + maglim : float or np.ndarray + Magnitude limit(s) at the source position(s). + **kwargs + delta_saturation : float, optional + Override the survey's default saturation threshold. + + Returns + ------- + float or np.ndarray + Misclassification × detection probability in [0, 1]. + + Raises + ------ + ValueError + If :attr:`gal_misclassification_detection` has not been loaded. + """ + return self.get_efficiency( + band, magnitude, maglim, type="detected_missclassified", **kwargs + ) class SurveyFactory: """ @@ -1165,6 +1275,52 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs): if verbose: print("No classification efficiency file found, skipping.") + # Load galaxy misclassification efficiency (optional). + # Uses the explicit 'gal_misclassification' config key when present, + # otherwise falls back to the completeness file (same CSV, different column). + _gal_mis_file = survey_config.get( + "gal_misclassification", survey_config.get("completeness") + ) + if _gal_mis_file is not None: + try: + cls._load_file( + survey, + survey_config, + "gal_misclassification", + "Galaxy misclassification efficiency", + lambda f: cls.set_completeness( + f, + delta_saturation=survey.delta_saturation, + selection="missclassified", + ), + data_path_survey, + data_path_others, + filename=_gal_mis_file, + **kwargs, + ) + except Exception: + pass # column absent in this survey's file — silently skip + + # Also try the combined detection × misclassification from the same file. + try: + cls._load_file( + survey, + survey_config, + "gal_misclassification_detection", + "Galaxy misclassification × detection efficiency", + lambda f: cls.set_completeness( + f, + delta_saturation=survey.delta_saturation, + selection="detected_missclassified", + ), + data_path_survey, + data_path_others, + filename=_gal_mis_file, + **kwargs, + ) + except Exception: + pass # column absent — fallback to product at call time + # Load photometric error model(s), same for all bands. Two curves: # - catalog : reported error vs delta_mag (the survey's photoerror file). # Written as magerr and used for the S/N cut. Always loaded. @@ -1401,6 +1557,8 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): - 'classification_eff' : Classification efficiency only (the legacy misspelled header 'classifiction_eff' is also accepted) - 'classification_detection_eff' : Combined efficiency + - 'missclassification_eff' : Galaxy misclassification efficiency + (probability a galaxy passes stellar selection) delta_saturation : float, optional Magnitude difference threshold for saturation. Default is -10.4. @@ -1410,7 +1568,10 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): - 'detected' : Detection efficiency only (column 'detection_eff') - 'classified' : Classification efficiency only (column 'classification_eff', or legacy 'classifiction_eff') - - 'both' : Combined detection and classification (column 'classification_detection_eff') + - 'both' : Combined detection and classification (column + 'classification_detection_eff') + - 'missclassified' : Galaxy misclassification efficiency (column + 'missclassification_eff') Default is 'both'. @@ -1422,7 +1583,7 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): Raises ------ ValueError - If selection is not one of 'detected', 'classified', or 'both'. + If selection is not one of the recognised values. Notes ----- @@ -1446,9 +1607,14 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"): efficiencies = data["classifiction_eff"] elif selection == "both": efficiencies = data["classification_detection_eff"] + elif selection == "missclassified": + efficiencies = data["missclassification_eff"] + elif selection == "detected_missclassified": + efficiencies = data["missclassification_detection_eff"] else: raise ValueError( - f"Invalid selection '{selection}'. Must be 'detected', 'classified', or 'both'" + f"Invalid selection '{selection}'. Must be 'detected', 'classified', " + "'both', or 'missclassified'." ) # Extend efficiency to bright end (force to zero at saturation) diff --git a/streamobs/utils.py b/streamobs/utils.py index 3f19ccd..f52349a 100644 --- a/streamobs/utils.py +++ b/streamobs/utils.py @@ -3,9 +3,43 @@ Utils for streamobs """ +import pandas as pd import yaml +def load_catalog(catalog): + """Load a catalog as a :class:`pandas.DataFrame`. + + Parameters + ---------- + catalog : pd.DataFrame or str + A DataFrame (returned as-is) or a path to a parquet or CSV file. + + Returns + ------- + pd.DataFrame + + Raises + ------ + ValueError + If ``catalog`` is neither a DataFrame nor a recognised file path. + """ + if isinstance(catalog, pd.DataFrame): + return catalog + if isinstance(catalog, str): + if catalog.endswith(".parquet"): + return pd.read_parquet(catalog) + if catalog.endswith(".csv"): + return pd.read_csv(catalog) + raise ValueError( + f"Unrecognised file extension for catalog path '{catalog}'. " + "Expected .parquet or .csv." + ) + raise ValueError( + f"catalog must be a DataFrame or a path string, got {type(catalog)}." + ) + + def parse_config(config): """Parse a yaml formatted file or string into a dict. @@ -20,6 +54,6 @@ def parse_config(config): try: # If `config` is a file return yaml.safe_load(open(config, "r")) - except OSError, FileNotFoundError: + except (OSError, FileNotFoundError): # Otherwise assume it is a string return yaml.safe_load(config) diff --git a/tests/test_background.py b/tests/test_background.py new file mode 100644 index 0000000..9299979 --- /dev/null +++ b/tests/test_background.py @@ -0,0 +1,300 @@ +""" +Tests for the streamobs.background subpackage. + +All tests are marked ``background`` so they can be run in isolation: + + pytest tests/test_background.py -m background + +Tests that write to disk (Storage, ResourceBuilder) use the ``tmp_path`` +pytest fixture so the temporary directory is created and cleaned up +automatically regardless of test outcome. +""" + +import numpy as np +import pandas as pd +import pytest + +pytestmark = pytest.mark.background + + +# --------------------------------------------------------------------------- +# Shared fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="module") +def tiny_galaxies_catalog(): + """Galaxy catalog: same sky positions as stream_catalog but with randomised magnitudes.""" + rng = np.random.default_rng(1) + n = 100 + return pd.DataFrame( + { + "ra": rng.uniform(30.0, 60.0, n), + "dec": rng.uniform(-20.0, 0.0, n), + "lsst_g_true": rng.uniform(20.0, 28.0, n), + "lsst_r_true": rng.uniform(20.0, 28.0, n), + } + ) + + +# --------------------------------------------------------------------------- +# Part 1 — Survey galaxy misclassification +# --------------------------------------------------------------------------- + + +class TestSurveyGalMisclassification: + """Tests for the gal_misclassification field and method on Survey.""" + + def test_gal_misclassification_field_exists(self, mock_survey): + """Survey dataclass must expose the gal_misclassification attribute.""" + assert hasattr(mock_survey, "gal_misclassification") + + def test_get_gal_misclassification_raises_when_not_loaded(self, mock_survey): + """get_gal_misclassification raises ValueError if the function is not loaded.""" + import copy + + s = copy.deepcopy(mock_survey) + s.gal_misclassification = None + mags = np.linspace(20.0, 27.0, 10) + maglim = np.full(10, 26.5) + with pytest.raises(ValueError, match="missclassified"): + s.get_gal_misclassification("r", mags, maglim) + + def test_get_gal_misclassification_no_1padding(self, mock_survey): + """get_gal_misclassification must NOT return 1 at the bright end (no 1-padding). + + Verify by checking that the method returns 0 for very bright objects + (below saturation) and does not saturate to 1.0 for delta_mag far below + delta_saturation. + """ + ... + +# --------------------------------------------------------------------------- +# Part 2 — StreamInjector source_type parameter +# --------------------------------------------------------------------------- + + +class TestInjectSourceType: + """Tests that source_type is accepted and routed correctly in StreamInjector.""" + + def test_inject_accepts_source_type_kwarg(self): + """inject() must accept source_type without raising TypeError.""" + from streamobs.observed import StreamInjector + import inspect + + sig = inspect.signature(StreamInjector.inject) + # source_type is passed through **kwargs — verify inject accepts **kwargs + assert "kwargs" in str(sig) or "source_type" in sig.parameters + + def test_inject_galaxies_uses_gal_misclassification( + self, mock_survey, tiny_galaxies_catalog + ): + """inject() with source_type='galaxies' must reach get_gal_misclassification.""" + ... + + +# --------------------------------------------------------------------------- +# Part 3 — BackgroundCatalogInjector +# --------------------------------------------------------------------------- + + +class TestBackgroundCatalogInjector: + """Tests for BackgroundCatalogInjector.""" + + def test_init(self, mock_survey): + """BackgroundCatalogInjector can be instantiated with a Survey.""" + from streamobs.background import BackgroundCatalogInjector + + inj = BackgroundCatalogInjector(mock_survey) + assert inj._survey is mock_survey + + def test_inject_stars_returns_nonempty_dataframe( + self, mock_survey, stream_catalog + ): + """inject_stars must return a non-empty DataFrame (smoke test only — full coverage in test_observed).""" + from streamobs.background import BackgroundCatalogInjector + + result = BackgroundCatalogInjector(mock_survey).inject_stars( + stream_catalog, bands=["g", "r"] + ) + assert isinstance(result, pd.DataFrame) + assert len(result) > 0 + + def test_inject_galaxies_returns_nonempty_dataframe( + self, mock_survey, tiny_galaxies_catalog + ): + """inject_galaxies must return a non-empty DataFrame.""" + from streamobs.background import BackgroundCatalogInjector + + result = BackgroundCatalogInjector(mock_survey).inject_galaxies( + tiny_galaxies_catalog, bands=["g", "r"] + ) + assert isinstance(result, pd.DataFrame) + assert len(result) > 0 + + def test_inject_galaxies_has_detection_flag( + self, mock_survey, tiny_galaxies_catalog + ): + """inject_galaxies output must include a detection flag column.""" + from streamobs.background import BackgroundCatalogInjector + + result = BackgroundCatalogInjector(mock_survey).inject_galaxies( + tiny_galaxies_catalog, bands=["g", "r"] + ) + flag_cols = [c for c in result.columns if "flag" in c] + assert len(flag_cols) > 0 + assert "flag_observed" in flag_cols + assert result["flag_observed"].isin([0, 1]).all() + assert result["flag_observed"].sum() > 0 # at least one detected + + +# --------------------------------------------------------------------------- +# Part 4 — BackgroundStorage +# --------------------------------------------------------------------------- + + +class TestBackgroundStorage: + """Tests for BackgroundStorage persistence helpers.""" + + def test_get_path_naming(self, tmp_path): + """get_path must embed source_type and band names in the filename.""" + from streamobs.background import BackgroundStorage + + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") + path = storage.get_path("stars", ("g", "r")) + assert "stars" in path + assert "gr" in path + assert path.endswith(".parquet") + + def test_save_data_creates_file(self, tmp_path): + """save_data must write a file at the path returned by get_path.""" + ... + + def test_load_data_roundtrip(self, tmp_path): + """load_data must recover the dict saved by save_data.""" + ... + + def test_exists_false_before_save(self, tmp_path): + """exists must return False when the file is not on disk.""" + from streamobs.background import BackgroundStorage + + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") + assert storage.exists("stars", ("g", "r")) is False + + def test_exists_true_after_save(self, tmp_path): + """exists must return True after save_data has been called.""" + ... + + +# --------------------------------------------------------------------------- +# Part 5 — BackgroundResourceBuilder +# --------------------------------------------------------------------------- + + +class TestBackgroundResourceBuilder: + """Tests for BackgroundResourceBuilder (uses tmp_path for storage).""" + + def test_init(self): + """BackgroundResourceBuilder can be instantiated.""" + from streamobs.background import BackgroundResourceBuilder + + builder = BackgroundResourceBuilder(survey_name="lsst", release="yr4") + assert builder.survey_name == "lsst" + assert isinstance(builder.resources, dict) + + def test_build_one_config(self, mock_survey, stream_catalog): + """_build_one_config must return a dict with the expected keys.""" + ... + + def test_save_via_storage(self, tmp_path, stream_catalog): + """save must write a parquet file via BackgroundStorage.""" + ... + + def test_load_via_storage(self, tmp_path, stream_catalog): + """load must reconstruct resources from the file saved by save.""" + ... + + +# --------------------------------------------------------------------------- +# Part 6 — LightBackgroundGenerator +# --------------------------------------------------------------------------- + + +class TestLightBackgroundGenerator: + """Tests for LightBackgroundGenerator.""" + + @pytest.fixture(scope="class") + def storage_with_data(self, tmp_path_factory, mock_survey, stream_catalog): + """BackgroundStorage pre-populated with a minimal CMD grid.""" + ... + + def test_init(self, mock_survey, tmp_path): + """LightBackgroundGenerator can be instantiated.""" + from streamobs.background import BackgroundStorage, LightBackgroundGenerator + + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") + gen = LightBackgroundGenerator(storage, mock_survey) + assert gen.survey is mock_survey + + def test_generate_stars(self): + """generate with source_type='stars' must return a DataFrame.""" + ... + + def test_generate_galaxies(self): + """generate with source_type='galaxies' must return a DataFrame.""" + ... + + def test_generate_both(self): + """generate with source_type='both' must return combined DataFrame.""" + ... + + +# --------------------------------------------------------------------------- +# Part 7 — Background (top-level wrapper) +# --------------------------------------------------------------------------- + + +class TestBackground: + """Tests for the Background wrapper class.""" + + def test_init_rejects_invalid_source_type(self, mock_survey): + """Background raises ValueError for unknown source_type.""" + from streamobs.background import Background + + with pytest.raises(ValueError, match="source_type"): + Background(mock_survey, source_type="invalid") + + def test_init_rejects_invalid_method(self, mock_survey): + """Background raises ValueError for unknown method.""" + from streamobs.background import Background + + with pytest.raises(ValueError, match="method"): + Background(mock_survey, method="invalid") + + def test_full_method_stars(self, mock_survey, stream_catalog): + """Background with method='full' and source_type='stars' must generate a catalog.""" + ... + + def test_full_method_galaxies(self, mock_survey, tiny_galaxies_catalog): + """Background with method='full' and source_type='galaxies' must generate a catalog.""" + ... + + def test_full_method_both( + self, mock_survey, stream_catalog, tiny_galaxies_catalog + ): + """Background with method='full' and source_type='both' must combine catalogs.""" + ... + + def test_light_method_default_storage(self, mock_survey): + """Background with method='light' and storage=None must fall back to _default_storage.""" + from streamobs.background import Background + + bg = Background(mock_survey, method="light") + assert bg.storage is not None + + def test_light_method_custom_storage(self, mock_survey, tmp_path): + """Background accepts a user-supplied BackgroundStorage.""" + from streamobs.background import Background, BackgroundStorage + + storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst") + bg = Background(mock_survey, method="light", storage=storage) + assert bg.storage is storage diff --git a/tests/test_surveys.py b/tests/test_surveys.py index 96760d9..e5b4d2d 100644 --- a/tests/test_surveys.py +++ b/tests/test_surveys.py @@ -441,6 +441,57 @@ def test_completeness_behavior( comp_faint < 0.1 ), f"Completeness should be near 0 for magnitudes well below saturation in band '{completeness_band}'" + + def test_missclassification_behavior( + self, + loaded_survey, + saturation_magnitudes, + bright_magnitudes, + faint_magnitudes, + base_maglim,): + + completeness_band = loaded_survey.completeness_band + sat = loaded_survey.saturation[completeness_band] + + # work only with magnitudes above or below saturation + sat_mag = saturation_magnitudes[saturation_magnitudes < sat] + bright_mag = bright_magnitudes[bright_magnitudes > sat] + faint_mag = faint_magnitudes[faint_magnitudes > sat] + + # Test that missclassification is loaded + # Only gal_misclassification_detection is needed, but if it None it can + # be estimated from gal_misclassification, so we require at least one of them to be present. + assert (loaded_survey.gal_misclassification_detection is not None) or (loaded_survey.gal_misclassification is not None), "Missclassification function is None" + + + + # Verify completeness behavior in each regime + if len(sat_mag) > 0: + comp_sat = loaded_survey.get_efficiency( + completeness_band, sat_mag, base_maglim, type = 'detected_missclassified', + ) + assert np.all( + comp_sat == 0.0 + ), f"Completeness should be 0 for magnitudes below saturation in band '{completeness_band}'" + + + mag_test = np.linspace(18, 28, 100) + comp_test = loaded_survey.get_efficiency( + completeness_band, mag_test, base_maglim, type = 'detected_missclassified', + ) + max_val = np.max(comp_test) + assert max_val < 1.0, f"Missclassification efficiency should be less than 1 for magnitudes in band '{completeness_band}'" + assert max_val > 0.0, f"Missclassification efficiency should be greater than 0 for magnitudes in band '{completeness_band}'" + + if len(faint_mag) > 0: + comp_faint = loaded_survey.get_efficiency( + completeness_band, faint_mag, base_maglim, type = 'detected_missclassified', + ) + assert np.all( + comp_faint < 0.1 + ), f"Completeness should be near 0 for magnitudes well below saturation in band '{completeness_band}'" + + def test_log_photo_error_behavior( self, loaded_survey,