From c56bfacb65e22192342fed46b947de654639748f Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Fri, 19 Jun 2026 17:26:20 +0200
Subject: [PATCH 1/8] Add background subpackage: CMD histogram resources and
 injection pipeline

- Add gal_misclassification field and method to Survey
- Extend set_completeness() with selection='missclassified'
- Thread source_type kwarg through StreamInjector.inject() and detect_flag()
- New streamobs/background/ subpackage: BackgroundCatalogInjector,
  BackgroundResourceBuilder, BackgroundStorage, LightBackgroundGenerator,
  Background wrapper
- Add load_catalog() utility to utils.py
- Add tests/test_background.py (31 stubs, @pytest.mark.background)
- Add user and developer documentation for background generation
- Gitignore data/background/ and .claude/

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .gitignore                                |   6 +
 docs/source/background.md                 |  50 ++++
 docs/source/build_background_resources.md |  70 +++++
 docs/source/index.rst                     |   9 +-
 pytest.ini                                |   1 +
 streamobs/__init__.py                     |   8 +
 streamobs/background/__init__.py          |  39 +++
 streamobs/background/background.py        | 193 +++++++++++++
 streamobs/background/catalog_injector.py  |  93 ++++++
 streamobs/background/generator.py         | 278 ++++++++++++++++++
 streamobs/background/resource_builder.py  | 257 +++++++++++++++++
 streamobs/background/storage.py           | 145 ++++++++++
 streamobs/observed.py                     |  48 +++-
 streamobs/surveys.py                      |  92 +++++-
 streamobs/utils.py                        |  36 ++-
 tests/test_background.py                  | 328 ++++++++++++++++++++++
 16 files changed, 1633 insertions(+), 20 deletions(-)
 create mode 100644 docs/source/background.md
 create mode 100644 docs/source/build_background_resources.md
 create mode 100644 streamobs/background/__init__.py
 create mode 100644 streamobs/background/background.py
 create mode 100644 streamobs/background/catalog_injector.py
 create mode 100644 streamobs/background/generator.py
 create mode 100644 streamobs/background/resource_builder.py
 create mode 100644 streamobs/background/storage.py
 create mode 100644 tests/test_background.py

diff --git a/.gitignore b/.gitignore
index d7d0c8b..e1c7407 100644
--- a/.gitignore
+++ b/.gitignore
@@ -128,9 +128,15 @@ dmypy.json
 # Pyre type checker
 .pyre/
 
+# Claude Code settings (local only)
+.claude/
+
 # Surveys files
 data/surveys/*
 data/others/*
+
+# Background resources (precomputed CMD grids — built locally, not tracked)
+data/background/
 .DS_Store
 # External reference repos (design references only, not part of streamobs)
 /survey_systematics_in_LSST_streams/
diff --git a/docs/source/background.md b/docs/source/background.md
new file mode 100644
index 0000000..200a10b
--- /dev/null
+++ b/docs/source/background.md
@@ -0,0 +1,50 @@
+# Generating background
+
+`streamobs` provides two complementary ways to generate a background catalog
+(stars and/or galaxies) for a stream realization.
+
+| Method | Speed | Requirements |
+|--------|-------|--------------|
+| `'light'` | Fast | Precomputed CMD resource files |
+| `'full'` | Slow | True background catalogs |
+
+## Quick start — light method
+
+```python
+from streamobs.surveys import Survey
+from streamobs.background import Background
+
+survey = Survey.load('lsst', release='yr5')
+bg = Background(survey, source_type='both', method='light')
+
+catalog = bg.generate(
+    phi1_limits=(-20, 20),
+    phi2_limits=(-2, 2),
+    gc_frame=frame,          # gala GreatCircleICRSFrame
+)
+```
+
+## Stars only or galaxies only
+
+```python
+bg_stars = Background(survey, source_type='stars', method='light')
+bg_gals  = Background(survey, source_type='galaxies', method='light')
+```
+
+## Full injection method
+
+Requires a DataFrame of true background objects for each population:
+
+```python
+bg = Background(
+    survey,
+    method='full',
+    source_type='both',
+    catalog_stars=df_true_stars,
+    catalog_galaxies=df_true_galaxies,
+)
+catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2))
+```
+
+See [build_background_resources.md](build_background_resources.md) to learn
+how to build or rebuild the resource files used by the light method.
diff --git a/docs/source/build_background_resources.md b/docs/source/build_background_resources.md
new file mode 100644
index 0000000..5965723
--- /dev/null
+++ b/docs/source/build_background_resources.md
@@ -0,0 +1,70 @@
+# Building background resources (developer guide)
+
+The **light background method** reads precomputed color–magnitude diagram
+(CMD) histograms from parquet files stored in `data/background/`.  These
+files are not tracked by git and must be built locally (or provided by the
+package maintainers).
+
+## What the resources are
+
+For each combination of ``(source_type, bands)``, one parquet file is
+stored with a long-format table:
+
+```
+maglim_r | maglim_g | color_center | mag_center | count | n_ref | area_ref_deg2
+```
+
+Each row represents one cell of a 2-D CMD histogram at a specific magnitude
+limit pair.  The generator looks up the nearest ``(maglim_r, maglim_g)``
+entry at runtime and samples from it.
+
+## How to build resources
+
+### 1. Prepare input catalogs
+
+You need a DataFrame of **true** (pre-observation) positions and magnitudes
+for stars and/or galaxies.  These catalogs are not part of `streamobs` and
+must be supplied by the developer.
+
+```python
+import pandas as pd
+df_stars = pd.read_parquet('/path/to/true_stars.parquet')
+df_galaxies = pd.read_parquet('/path/to/true_galaxies.parquet')
+```
+
+### 2. Run the builder
+
+```python
+from streamobs.background import BackgroundResourceBuilder, BackgroundStorage
+
+builder = BackgroundResourceBuilder(survey_name='lsst', release='yr5')
+builder.build(
+    catalog_stars=df_stars,
+    catalog_galaxies=df_galaxies,
+    bands=('g', 'r'),
+    maglim_ref_values=[25.0, 25.5, 26.0, 26.5, 27.0],
+    delta_range=(-1.0, 1.0),
+    delta_step=0.1,
+    source_type='both',
+)
+```
+
+### 3. Save to disk
+
+```python
+storage = BackgroundStorage(survey_name='lsst', release='yr5')
+builder.save(storage)
+```
+
+Files are written to `data/background/lsst_yr5/` — one parquet per
+``(source_type, bands)`` combination, e.g. `stars_gr.parquet`.
+
+## File naming convention
+
+`BackgroundStorage.get_path(source_type, bands)` returns the full path:
+
+```
+data/background/{survey_name}_{release}/{source_type}_{bands_str}.parquet
+```
+
+Example: `data/background/lsst_yr5/galaxies_gr.parquet`.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6241600..774a7ce 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -103,10 +103,17 @@ Documentation Contents
    surveys/Roman
 
 
+.. toctree::
+   :maxdepth: 1
+   :caption: Background generation
+
+   background
+   build_background_resources
+
 .. toctree::
    :maxdepth: 2
    :caption: For developers
-   
+
    modifying_streamobs
    new_survey
    update_data
diff --git a/pytest.ini b/pytest.ini
index e3a2752..303c1aa 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -27,6 +27,7 @@ markers =
     observed: marks tests for injection utilities
     model: marks tests for model functionality
     utils: marks tests for utility functions
+    background: marks tests for the background subpackage
 
 # Minimum Python version
 minversion = 3.8
\ No newline at end of file
diff --git a/streamobs/__init__.py b/streamobs/__init__.py
index fac239d..0421dae 100644
--- a/streamobs/__init__.py
+++ b/streamobs/__init__.py
@@ -1,2 +1,10 @@
 #!/usr/bin/env python
 """Nothing to see here."""
+
+from .background import (
+    Background,
+    BackgroundCatalogInjector,
+    BackgroundResourceBuilder,
+    BackgroundStorage,
+    LightBackgroundGenerator,
+)
diff --git a/streamobs/background/__init__.py b/streamobs/background/__init__.py
new file mode 100644
index 0000000..2cbf63f
--- /dev/null
+++ b/streamobs/background/__init__.py
@@ -0,0 +1,39 @@
+"""
+Background generation for stellar stream analysis.
+
+This subpackage provides two complementary pipelines:
+
+- **Full injection** (:class:`BackgroundCatalogInjector`): runs a known
+  catalog of true stars or galaxies through the complete
+  :class:`~streamobs.observed.StreamInjector` pipeline.
+- **Light generation** (:class:`LightBackgroundGenerator`): samples from
+  precomputed binned color–magnitude distributions stored by
+  :class:`BackgroundStorage`, giving fast per-pixel background realizations
+  without re-running the full injection.
+
+The top-level :class:`Background` class wraps both modes and lets users
+generate stars-only, galaxies-only, or combined backgrounds.
+
+Typical usage — light method with default bundled resources::
+
+    from streamobs.surveys import Survey
+    from streamobs.background import Background
+
+    survey = Survey.load('lsst', release='yr5')
+    bg = Background(survey, source_type='both', method='light')
+    catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2), gc_frame=frame)
+"""
+
+from .background import Background
+from .catalog_injector import BackgroundCatalogInjector
+from .generator import LightBackgroundGenerator
+from .resource_builder import BackgroundResourceBuilder
+from .storage import BackgroundStorage
+
+__all__ = [
+    "Background",
+    "BackgroundCatalogInjector",
+    "BackgroundResourceBuilder",
+    "BackgroundStorage",
+    "LightBackgroundGenerator",
+]
diff --git a/streamobs/background/background.py b/streamobs/background/background.py
new file mode 100644
index 0000000..359e682
--- /dev/null
+++ b/streamobs/background/background.py
@@ -0,0 +1,193 @@
+"""
+Top-level background generation wrapper.
+"""
+
+import os
+
+from ..surveys import Survey
+from .catalog_injector import BackgroundCatalogInjector
+from .generator import LightBackgroundGenerator
+from .storage import BackgroundStorage
+
+
+class Background:
+    """
+    High-level wrapper for background generation.
+
+    Dispatches to either the full injection method
+    (:class:`BackgroundCatalogInjector`) or the fast light method
+    (:class:`LightBackgroundGenerator`) depending on ``method``.  Stars and
+    galaxies are treated independently so users can build custom combinations.
+
+    When ``method='light'`` and ``storage=None``, the class falls back to the
+    bundled package-level resource files (if present).
+
+    Parameters
+    ----------
+    survey : Survey
+        Survey instance defining the observation conditions.
+    source_type : str, optional
+        Which background components to generate: ``'stars'``, ``'galaxies'``,
+        or ``'both'``. Default ``'both'``.
+    method : str, optional
+        Generation method: ``'light'`` (fast, uses precomputed CMD grids) or
+        ``'full'`` (runs the complete injection pipeline). Default ``'light'``.
+    storage : BackgroundStorage, optional
+        Resource storage for the light method.  When ``None``, package-bundled
+        resources are used (via :meth:`_default_storage`).
+    catalog_stars : pd.DataFrame, optional
+        True stellar catalog. Required when ``method='full'`` and
+        ``source_type`` includes stars.
+    catalog_galaxies : pd.DataFrame, optional
+        True galaxy catalog. Required when ``method='full'`` and
+        ``source_type`` includes galaxies.
+    **kwargs
+        Forwarded to the underlying injector or generator.
+
+    Examples
+    --------
+    Light method with bundled defaults::
+
+        bg = Background(survey, source_type='both', method='light')
+        catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2), gc_frame=frame)
+
+    Full injection with user-supplied catalogs::
+
+        bg = Background(
+            survey,
+            method='full',
+            source_type='stars',
+            catalog_stars=df_stars,
+        )
+        catalog = bg.generate(phi1_limits=(-20, 20), phi2_limits=(-2, 2))
+    """
+
+    def __init__(
+        self,
+        survey: Survey,
+        source_type: str = "both",
+        method: str = "light",
+        storage: BackgroundStorage = None,
+        catalog_stars=None,
+        catalog_galaxies=None,
+        **kwargs,
+    ):
+        if source_type not in ("stars", "galaxies", "both"):
+            raise ValueError(
+                f"source_type must be 'stars', 'galaxies', or 'both', got '{source_type}'."
+            )
+        if method not in ("light", "full"):
+            raise ValueError(f"method must be 'light' or 'full', got '{method}'.")
+
+        self.survey = survey
+        self.source_type = source_type
+        self.method = method
+        self.catalog_stars = catalog_stars
+        self.catalog_galaxies = catalog_galaxies
+        self._kwargs = kwargs
+
+        if storage is None and method == "light":
+            storage = self._default_storage(survey)
+        self.storage = storage
+
+    def generate(
+        self,
+        phi1_limits,
+        phi2_limits,
+        gc_frame=None,
+        **kwargs,
+    ) -> "pd.DataFrame":
+        """
+        Generate a background catalog for the given stream sky region.
+
+        Parameters
+        ----------
+        phi1_limits : tuple of float
+            ``(phi1_min, phi1_max)`` in degrees.
+        phi2_limits : tuple of float
+            ``(phi2_min, phi2_max)`` in degrees.
+        gc_frame : gala.coordinates.GreatCircleICRSFrame, optional
+            Great-circle frame. Required for the light method and when the
+            full method needs coordinate conversion.
+        **kwargs
+            Forwarded to the underlying generator or injector.
+
+        Returns
+        -------
+        pd.DataFrame
+            Background catalog. Column names follow the survey namespace
+            convention for the full method; for the light method columns are
+            ``phi1``, ``phi2``, ``color``, ``mag``, ``source_type``.
+        """
+        if self.method == "full":
+            return self._generate_full(phi1_limits, phi2_limits, gc_frame, **kwargs)
+        return self._generate_light(phi1_limits, phi2_limits, gc_frame, **kwargs)
+
+    def _generate_full(
+        self,
+        phi1_limits,
+        phi2_limits,
+        gc_frame,
+        **kwargs,
+    ) -> "pd.DataFrame":
+        """
+        Generate background via full catalog injection.
+
+        For each active source type (controlled by :attr:`source_type`), uses
+        :class:`BackgroundCatalogInjector` to inject the corresponding true
+        catalog into the survey. The results are concatenated.
+
+        Parameters
+        ----------
+        phi1_limits, phi2_limits : tuple of float
+        gc_frame : gala.coordinates.GreatCircleICRSFrame or None
+        **kwargs
+
+        Returns
+        -------
+        pd.DataFrame
+        """
+        ...
+
+    def _generate_light(
+        self,
+        phi1_limits,
+        phi2_limits,
+        gc_frame,
+        **kwargs,
+    ) -> "pd.DataFrame":
+        """
+        Generate background via the fast light method.
+
+        Delegates to :class:`LightBackgroundGenerator` using
+        :attr:`storage`.
+
+        Parameters
+        ----------
+        phi1_limits, phi2_limits : tuple of float
+        gc_frame : gala.coordinates.GreatCircleICRSFrame
+        **kwargs
+
+        Returns
+        -------
+        pd.DataFrame
+        """
+        ...
+
+    @staticmethod
+    def _default_storage(survey: Survey) -> BackgroundStorage:
+        """
+        Return a :class:`BackgroundStorage` pointing to bundled package resources.
+
+        Parameters
+        ----------
+        survey : Survey
+
+        Returns
+        -------
+        BackgroundStorage
+        """
+        return BackgroundStorage(
+            survey_name=survey.name,
+            release=survey.release,
+        )
diff --git a/streamobs/background/catalog_injector.py b/streamobs/background/catalog_injector.py
new file mode 100644
index 0000000..505c232
--- /dev/null
+++ b/streamobs/background/catalog_injector.py
@@ -0,0 +1,93 @@
+"""
+Full-injection background pipeline using known catalogs.
+"""
+
+import copy
+
+import numpy as np
+
+from ..observed import StreamInjector
+from ..surveys import Survey
+from ..utils import load_catalog
+
+
+class BackgroundCatalogInjector:
+    """
+    Thin wrapper around :class:`~streamobs.observed.StreamInjector` for
+    background catalog injection.
+
+    Provides :meth:`inject_stars` and :meth:`inject_galaxies` convenience
+    methods that each call :meth:`~streamobs.observed.StreamInjector.inject`
+    with the correct ``source_type``. Survey preparation — deep-copying,
+    zeroing dust, setting uniform magnitude limits — is handled by the single
+    :meth:`_prepare_survey` method and is shared between both injection modes.
+
+    Parameters
+    ----------
+    survey : Survey
+        Survey instance to inject into.
+    **kwargs
+        Forwarded to :meth:`_prepare_survey` at construction time (e.g.
+        ``no_dust``, ``uniform_maglim``).
+
+    Examples
+    --------
+    Inject a stars catalog with a uniform magnitude limit:
+
+    >>> injector = BackgroundCatalogInjector(survey, no_dust=True,
+    ...                                      uniform_maglim={'g': 26.0, 'r': 26.5})
+    >>> obs_stars = injector.inject_stars(catalog_df, bands=['g', 'r'])
+    """
+
+    def __init__(self, survey: Survey, **kwargs):
+        self._survey = survey
+        self._kwargs = kwargs
+
+    def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
+        """
+        Inject a catalog of stars through the full survey pipeline.
+
+        Parameters
+        ----------
+        catalog : pd.DataFrame or str
+            True stellar catalog. Accepts a DataFrame or a path to parquet/CSV
+            (loaded via :func:`~streamobs.utils.load_catalog`). Must contain
+            sky coordinates (``ra``/``dec`` or ``phi1``/``phi2``) and true
+            magnitude columns.
+        bands : list of str, optional
+            Photometric bands to inject. Defaults to ``['r', 'g']``.
+        **kwargs
+            Forwarded to :meth:`~streamobs.observed.StreamInjector.inject`.
+
+        Returns
+        -------
+        pd.DataFrame
+            Observed catalog with survey-namespaced magnitude and flag columns.
+        """
+        ...
+
+    def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
+        """
+        Inject a catalog of galaxies through the full survey pipeline.
+
+        Uses :meth:`~streamobs.surveys.Survey.get_gal_misclassification` for
+        the detection flag (probability a galaxy passes stellar selection).
+        Requires :attr:`~streamobs.surveys.Survey.gal_misclassification` to be
+        loaded on the survey.
+
+        Parameters
+        ----------
+        catalog : pd.DataFrame or str
+            True galaxy catalog. Accepts a DataFrame or a path to parquet/CSV.
+            Must contain sky coordinates and true magnitude columns.
+        bands : list of str, optional
+            Photometric bands to inject. Defaults to ``['r', 'g']``.
+        **kwargs
+            Forwarded to :meth:`~streamobs.observed.StreamInjector.inject`.
+
+        Returns
+        -------
+        pd.DataFrame
+            Observed catalog with survey-namespaced magnitude and flag columns.
+        """
+        ...
diff --git a/streamobs/background/generator.py b/streamobs/background/generator.py
new file mode 100644
index 0000000..f06bd05
--- /dev/null
+++ b/streamobs/background/generator.py
@@ -0,0 +1,278 @@
+"""
+Fast per-pixel background generation from precomputed CMD grids.
+"""
+
+import numpy as np
+
+from ..surveys import Survey
+from .storage import BackgroundStorage
+
+
+class LightBackgroundGenerator:
+    """
+    Generate background catalogs rapidly from precomputed CMD histogram grids.
+
+    For each HEALPix pixel in the requested sky region the generator:
+
+    1. Retrieves the local magnitude limits from the survey's HEALPix maps.
+    2. Selects the nearest precomputed CMD histogram in ``(maglim_ref, delta)``
+       space.
+    3. Scales the reference object count to the pixel area via a Poisson draw.
+    4. Samples ``(color, mag)`` pairs from the 2-D histogram.
+    5. Samples sky positions uniformly within the pixel.
+
+    Stars and galaxies are handled independently so that users can build
+    custom mixed background models.
+
+    Parameters
+    ----------
+    storage : BackgroundStorage
+        Storage backend holding the precomputed CMD grids.
+    survey : Survey
+        Survey with real (non-uniform) HEALPix magnitude limit maps used to
+        look up per-pixel depth.
+    bands : tuple of str, optional
+        ``(band1, band2)`` — must match the bands used when building resources.
+        Default ``('g', 'r')``.
+    **kwargs
+        Reserved for future use.
+
+    Examples
+    --------
+    >>> gen = LightBackgroundGenerator(storage, survey, bands=('g', 'r'))
+    >>> catalog = gen.generate(
+    ...     phi1_limits=(-20, 20),
+    ...     phi2_limits=(-2, 2),
+    ...     gc_frame=frame,
+    ...     nside=4096,
+    ...     source_type='both',
+    ... )
+    """
+
+    def __init__(
+        self,
+        storage: BackgroundStorage,
+        survey: Survey,
+        bands=("g", "r"),
+        **kwargs,
+    ):
+        self.storage = storage
+        self.survey = survey
+        self.bands = bands
+        # Lazy cache: {source_type: grid_dict}
+        self._resources: dict = {}
+
+    def generate(
+        self,
+        phi1_limits,
+        phi2_limits,
+        gc_frame,
+        nside=4096,
+        source_type="both",
+        **kwargs,
+    ) -> "pd.DataFrame":
+        """
+        Generate a background catalog for the given sky region.
+
+        Parameters
+        ----------
+        phi1_limits : tuple of float
+            ``(phi1_min, phi1_max)`` in degrees.
+        phi2_limits : tuple of float
+            ``(phi2_min, phi2_max)`` in degrees.
+        gc_frame : gala.coordinates.GreatCircleICRSFrame
+            Great-circle frame mapping ``(phi1, phi2)`` to ``(RA, Dec)``.
+        nside : int, optional
+            HEALPix resolution. Default ``4096``.
+        source_type : str, optional
+            ``'stars'``, ``'galaxies'``, or ``'both'``. Default ``'both'``.
+        **kwargs
+            rng : numpy.random.Generator, optional
+            seed : int, optional
+
+        Returns
+        -------
+        pd.DataFrame
+            Background catalog with columns ``phi1``, ``phi2``, ``color``,
+            ``mag`` (reference band), and ``source_type``.
+        """
+        ...
+
+    def _generate_one_type(
+        self,
+        phi1_limits,
+        phi2_limits,
+        gc_frame,
+        source_type: str,
+        nside: int,
+        **kwargs,
+    ) -> "pd.DataFrame":
+        """
+        Generate objects of a single source type pixel by pixel.
+
+        Parameters
+        ----------
+        phi1_limits, phi2_limits : tuple of float
+        gc_frame : gala.coordinates.GreatCircleICRSFrame
+        source_type : str
+            ``'stars'`` or ``'galaxies'``.
+        nside : int
+        **kwargs
+
+        Returns
+        -------
+        pd.DataFrame
+        """
+        ...
+
+    def _get_footprint_pixels(
+        self,
+        phi1_limits,
+        phi2_limits,
+        gc_frame,
+        nside: int,
+    ) -> np.ndarray:
+        """
+        Return HEALPix pixel indices that cover the given ``(phi1, phi2)`` box.
+
+        Parameters
+        ----------
+        phi1_limits, phi2_limits : tuple of float
+        gc_frame : gala.coordinates.GreatCircleICRSFrame
+        nside : int
+
+        Returns
+        -------
+        np.ndarray of int
+            Pixel indices.
+        """
+        ...
+
+    def _get_maglim_pixels(
+        self,
+        pixels: np.ndarray,
+        band: str,
+        nside: int,
+    ) -> np.ndarray:
+        """
+        Retrieve magnitude limits for a set of pixels from the survey map.
+
+        Parameters
+        ----------
+        pixels : np.ndarray of int
+        band : str
+        nside : int
+            Resolution of ``pixels``; the survey map may have a different nside
+            and will be re-pixelized if needed.
+
+        Returns
+        -------
+        np.ndarray of float
+        """
+        ...
+
+    def _select_cmd_distribution(
+        self,
+        maglim_ref: float,
+        delta: float,
+        source_type: str,
+    ) -> dict:
+        """
+        Nearest-neighbour lookup in the precomputed CMD grid.
+
+        Parameters
+        ----------
+        maglim_ref : float
+            Reference band magnitude limit for this pixel.
+        delta : float
+            ``maglim_band1 - maglim_ref`` for this pixel.
+        source_type : str
+            ``'stars'`` or ``'galaxies'``.
+
+        Returns
+        -------
+        dict
+            ``{'cmd_hist', 'color_edges', 'mag_edges', 'n_ref',
+            'area_ref_deg2'}`` for the nearest grid point.
+        """
+        ...
+
+    def _scale_n_objects(
+        self,
+        n_ref: int,
+        area_ref_deg2: float,
+        pixel_area_deg2: float,
+    ) -> int:
+        """
+        Draw the expected number of objects for a pixel via Poisson scaling.
+
+        Parameters
+        ----------
+        n_ref : int
+            Number of objects in the reference simulation.
+        area_ref_deg2 : float
+            Sky area of the reference simulation in deg².
+        pixel_area_deg2 : float
+            Sky area of the target pixel in deg².
+
+        Returns
+        -------
+        int
+            Poisson draw around ``n_ref * pixel_area_deg2 / area_ref_deg2``.
+        """
+        ...
+
+    def _sample_from_cmd(
+        self,
+        cmd_hist: np.ndarray,
+        color_edges: np.ndarray,
+        mag_edges: np.ndarray,
+        n_objects: int,
+        rng: np.random.Generator,
+    ) -> "pd.DataFrame":
+        """
+        Draw ``n_objects`` (color, mag) pairs from the 2-D CMD histogram.
+
+        Parameters
+        ----------
+        cmd_hist : np.ndarray
+            2-D array of counts with shape ``(n_color, n_mag)``.
+        color_edges : np.ndarray
+        mag_edges : np.ndarray
+        n_objects : int
+        rng : numpy.random.Generator
+
+        Returns
+        -------
+        pd.DataFrame
+            Columns ``color`` and ``mag``.
+        """
+        ...
+
+    def _sample_positions(
+        self,
+        n_objects: int,
+        pixel: int,
+        nside: int,
+        gc_frame,
+        rng: np.random.Generator,
+    ) -> "pd.DataFrame":
+        """
+        Sample ``(phi1, phi2)`` positions uniformly within a HEALPix pixel.
+
+        Parameters
+        ----------
+        n_objects : int
+        pixel : int
+            HEALPix pixel index.
+        nside : int
+        gc_frame : gala.coordinates.GreatCircleICRSFrame
+            Used to convert ``(RA, Dec)`` corners back to ``(phi1, phi2)``.
+        rng : numpy.random.Generator
+
+        Returns
+        -------
+        pd.DataFrame
+            Columns ``phi1`` and ``phi2``.
+        """
+        ...
diff --git a/streamobs/background/resource_builder.py b/streamobs/background/resource_builder.py
new file mode 100644
index 0000000..df02717
--- /dev/null
+++ b/streamobs/background/resource_builder.py
@@ -0,0 +1,257 @@
+"""
+Builder for precomputed background color–magnitude diagram (CMD) resources.
+"""
+
+import numpy as np
+
+from ..surveys import Survey, SurveyFactory
+from ..utils import load_catalog
+from .catalog_injector import BackgroundCatalogInjector
+from .storage import BackgroundStorage
+
+
+class BackgroundResourceBuilder:
+    """
+    Build precomputed CMD histogram grids for fast background generation.
+
+    Drives :class:`BackgroundCatalogInjector` on uniform surveys (no dust,
+    constant magnitude limits) at a grid of ``(maglim_ref, delta)`` pairs,
+    where ``delta = maglim_g - maglim_ref``.  The resulting 2-D color–magnitude
+    histograms are stored via :class:`BackgroundStorage` and later consumed by
+    :class:`~streamobs.background.generator.LightBackgroundGenerator`.
+
+    Parameters
+    ----------
+    survey_name : str, optional
+        Survey identifier (e.g. ``'lsst'``).
+    release : str, optional
+        Survey release string (e.g. ``'yr5'``).
+    **kwargs
+        Forwarded to :meth:`~streamobs.surveys.SurveyFactory.create_survey`.
+
+    Examples
+    --------
+    >>> builder = BackgroundResourceBuilder('lsst', release='yr5')
+    >>> builder.build(
+    ...     catalog_stars=df_stars,
+    ...     catalog_galaxies=df_gals,
+    ...     bands=('g', 'r'),
+    ...     maglim_ref_values=[25.5, 26.0, 26.5],
+    ...     delta_range=(-1.0, 1.0),
+    ...     delta_step=0.1,
+    ... )
+    >>> storage = BackgroundStorage(survey_name='lsst', release='yr5')
+    >>> builder.save(storage)
+    """
+
+    def __init__(self, survey_name="lsst", release=None, **kwargs):
+        self.survey_name = survey_name
+        self.release = release
+        self._kwargs = kwargs
+        # Nested dict: {source_type: {(maglim_ref, delta): config_dict}}
+        self.resources: dict = {}
+
+    def build(
+        self,
+        catalog_stars=None,
+        catalog_galaxies=None,
+        bands=("g", "r"),
+        maglim_ref_values=None,
+        delta_range=(-1.0, 1.0),
+        delta_step=0.1,
+        n_bins_color=50,
+        n_bins_mag=50,
+        source_type="both",
+        **kwargs,
+    ):
+        """
+        Build CMD histograms for all ``(maglim_ref, delta)`` configurations.
+
+        For each combination of source type and magnitude limit pair, injects
+        the catalog into a uniform (no-dust, constant-maglim) survey and
+        computes a 2-D histogram of ``(color, mag_ref_band)``.
+
+        Parameters
+        ----------
+        catalog_stars : pd.DataFrame or str, optional
+            True stellar catalog.  Required when ``source_type`` is
+            ``'stars'`` or ``'both'``.
+        catalog_galaxies : pd.DataFrame or str, optional
+            True galaxy catalog.  Required when ``source_type`` is
+            ``'galaxies'`` or ``'both'``.
+        bands : tuple of str, optional
+            Two band names ``(band1, band2)`` where color = band1 - band2 and
+            ``band2`` is the reference magnitude axis. Default ``('g', 'r')``.
+        maglim_ref_values : list of float, optional
+            Reference magnitude limits to sweep (for the second band in
+            ``bands``).
+        delta_range : tuple of float, optional
+            ``(min, max)`` of ``maglim_band1 - maglim_ref``.
+            Default ``(-1.0, 1.0)``.
+        delta_step : float, optional
+            Step size between delta values. Default ``0.1``.
+        n_bins_color : int, optional
+            Number of color histogram bins. Default ``50``.
+        n_bins_mag : int, optional
+            Number of magnitude histogram bins. Default ``50``.
+        source_type : str, optional
+            ``'stars'``, ``'galaxies'``, or ``'both'``. Default ``'both'``.
+        **kwargs
+            Forwarded to
+            :meth:`BackgroundCatalogInjector.inject_stars` /
+            :meth:`BackgroundCatalogInjector.inject_galaxies`.
+        """
+        ...
+
+    def _build_one_config(
+        self,
+        catalog,
+        source_type,
+        bands,
+        maglim_ref,
+        delta,
+        n_bins_color,
+        n_bins_mag,
+        **kwargs,
+    ) -> dict:
+        """
+        Build the CMD histogram for a single ``(maglim_ref, delta)`` pair.
+
+        Creates a uniform survey, injects the catalog, and histograms the
+        result.
+
+        Parameters
+        ----------
+        catalog : pd.DataFrame
+            True background catalog (stars or galaxies).
+        source_type : str
+            ``'stars'`` or ``'galaxies'``.
+        bands : tuple of str
+            ``(band1, band2)`` — color = band1_obs - band2_obs.
+        maglim_ref : float
+            Magnitude limit for the reference band (``band2``).
+        delta : float
+            ``maglim_band1 - maglim_ref``.
+        n_bins_color : int
+            Number of color bins.
+        n_bins_mag : int
+            Number of magnitude bins.
+        **kwargs
+            Forwarded to the injector.
+
+        Returns
+        -------
+        dict
+            ``{'cmd_hist': np.ndarray, 'color_edges': np.ndarray,
+            'mag_edges': np.ndarray, 'n_ref': int,
+            'area_ref_deg2': float}``
+        """
+        ...
+
+    def _prepare_survey(
+        self,
+        survey: Survey,
+        no_dust: bool = False,
+        uniform_maglim: dict = None,
+        **kwargs,
+    ) -> Survey:
+        """
+        Return a modified copy of the survey for background injection.
+
+        Creates a deep copy so that modifications do not affect the original.
+        Handles both the full-injection case (no-dust only) and the resource-
+        building case (uniform magnitude limits + no dust).
+
+        Parameters
+        ----------
+        survey : Survey
+            Survey to copy and modify.
+        no_dust : bool, optional
+            If True, zero the EBV map. Default is False.
+        uniform_maglim : dict, optional
+            Mapping ``{band: value}`` to replace per-pixel magnitude limit maps
+            with constant arrays. E.g. ``{'g': 26.0, 'r': 26.5}``.
+            When provided, ``no_dust`` is implicitly set to True since uniform
+            surveys are always dust-free by construction.
+        **kwargs
+            Reserved for future use.
+
+        Returns
+        -------
+        Survey
+            Modified copy of the survey.
+        """
+        ...
+
+    def _compute_cmd_histogram(
+        self,
+        observed_df,
+        bands: tuple,
+        n_bins_color: int,
+        n_bins_mag: int,
+    ) -> dict:
+        """
+        Build a 2-D color–magnitude histogram from an observed catalog.
+
+        Color is defined as ``mag_band1_obs - mag_band2_obs`` and the
+        magnitude axis is ``mag_band2_obs``.
+
+        Parameters
+        ----------
+        observed_df : pd.DataFrame
+            Output of the catalog injector (observed magnitudes).
+        bands : tuple of str
+            ``(band1, band2)``.
+        n_bins_color : int
+            Number of color bins.
+        n_bins_mag : int
+            Number of magnitude bins.
+
+        Returns
+        -------
+        dict
+            ``{'cmd_hist': np.ndarray (n_bins_color × n_bins_mag),
+            'color_edges': np.ndarray, 'mag_edges': np.ndarray}``
+        """
+        ...
+
+    def save(self, storage: BackgroundStorage, source_type="both", **kwargs):
+        """
+        Persist the built resources to disk via ``storage``.
+
+        Parameters
+        ----------
+        storage : BackgroundStorage
+            Storage backend to use.
+        source_type : str, optional
+            Which source types to save: ``'stars'``, ``'galaxies'``, or
+            ``'both'``. Default ``'both'``.
+        **kwargs
+            Forwarded to :meth:`BackgroundStorage.save_data`.
+        """
+        ...
+
+    @classmethod
+    def load(
+        cls,
+        storage: BackgroundStorage,
+        source_type="both",
+        **kwargs,
+    ) -> "BackgroundResourceBuilder":
+        """
+        Load precomputed resources from disk into a new builder instance.
+
+        Parameters
+        ----------
+        storage : BackgroundStorage
+            Storage backend to read from.
+        source_type : str, optional
+            Which source types to load: ``'stars'``, ``'galaxies'``, or
+            ``'both'``. Default ``'both'``.
+
+        Returns
+        -------
+        BackgroundResourceBuilder
+            Populated instance with :attr:`resources` filled.
+        """
+        ...
diff --git a/streamobs/background/storage.py b/streamobs/background/storage.py
new file mode 100644
index 0000000..6a8b4d7
--- /dev/null
+++ b/streamobs/background/storage.py
@@ -0,0 +1,145 @@
+"""
+Persistence layer for precomputed background CMD histogram grids.
+"""
+
+import os
+
+
+class BackgroundStorage:
+    """
+    Save and load precomputed color–magnitude diagram (CMD) histogram grids.
+
+    One compressed parquet file is stored per ``(source_type, bands)``
+    combination, e.g. ``stars_gr.parquet``.  All files live under
+    ``data/background/`` (which is excluded from version control).
+
+    **File format** — long-format parquet with one row per
+    ``(maglim_r, maglim_g, color_center, mag_center)`` cell::
+
+        maglim_r | maglim_g | color_center | mag_center | count | n_ref | area_ref_deg2
+
+    Parquet's columnar compression handles repeated ``color_center`` and
+    ``mag_center`` values efficiently.
+
+    Parameters
+    ----------
+    base_path : str, optional
+        Root directory for resource files.  Defaults to
+        ``{package_root}/data/background/``.
+    survey_name : str, optional
+        Survey identifier (e.g. ``'lsst'``).  Used in the file path.
+    release : str, optional
+        Survey release string (e.g. ``'yr5'``).  Used in the file path.
+
+    Examples
+    --------
+    >>> storage = BackgroundStorage(survey_name='lsst', release='yr5')
+    >>> path = storage.get_path('stars', ('g', 'r'))
+    >>> # -> .../data/background/lsst_yr5/stars_gr.parquet
+    """
+
+    def __init__(self, base_path=None, survey_name="lsst", release=None, **kwargs):
+        if base_path is None:
+            _pkg_root = os.path.join(os.path.dirname(__file__), "..", "..", "data")
+            base_path = os.path.join(_pkg_root, "background")
+        self.base_path = base_path
+        self.survey_name = survey_name
+        self.release = release
+
+    def get_path(self, source_type: str, bands: tuple) -> str:
+        """
+        Build the file path for a given ``(source_type, bands)`` combination.
+
+        Parameters
+        ----------
+        source_type : str
+            ``'stars'`` or ``'galaxies'``.
+        bands : tuple of str
+            Band names in order, e.g. ``('g', 'r')``.
+
+        Returns
+        -------
+        str
+            Absolute path to the parquet file, e.g.
+            ``{base_path}/lsst_yr5/stars_gr.parquet``.
+        """
+        survey_key = (
+            f"{self.survey_name}_{self.release}" if self.release else self.survey_name
+        )
+        bands_str = "".join(bands)
+        filename = f"{source_type}_{bands_str}.parquet"
+        return os.path.join(self.base_path, survey_key, filename)
+
+    def save_data(
+        self,
+        data: dict,
+        source_type: str,
+        bands: tuple,
+        **kwargs,
+    ):
+        """
+        Persist the CMD histogram grid to a compressed parquet file.
+
+        The ``data`` dict is keyed by ``(maglim_r, maglim_g)`` and each value
+        is a dict with keys ``cmd_hist``, ``color_edges``, ``mag_edges``,
+        ``n_ref``, and ``area_ref_deg2`` (as returned by
+        :meth:`~streamobs.background.resource_builder.BackgroundResourceBuilder._build_one_config`).
+        The grid is flattened to long-format before writing.
+
+        Parameters
+        ----------
+        data : dict
+            CMD histogram grid keyed by ``(maglim_r, maglim_g)``.
+        source_type : str
+            ``'stars'`` or ``'galaxies'``.
+        bands : tuple of str
+            Band names, e.g. ``('g', 'r')``.
+        **kwargs
+            compression : str, optional
+                Parquet compression codec. Default is ``'zstd'``.
+        """
+        ...
+
+    def load_data(
+        self,
+        source_type: str,
+        bands: tuple,
+        **kwargs,
+    ) -> dict:
+        """
+        Load a CMD histogram grid from the parquet file for this combination.
+
+        Reads the file returned by :meth:`get_path` and reconstructs the
+        nested dict keyed by ``(maglim_r, maglim_g)``.
+
+        Parameters
+        ----------
+        source_type : str
+            ``'stars'`` or ``'galaxies'``.
+        bands : tuple of str
+            Band names, e.g. ``('g', 'r')``.
+
+        Returns
+        -------
+        dict
+            CMD grid keyed by ``(maglim_r, maglim_g)`` → ``{'cmd_hist',
+            'color_edges', 'mag_edges', 'n_ref', 'area_ref_deg2'}``.
+        """
+        ...
+
+    def exists(self, source_type: str, bands: tuple) -> bool:
+        """
+        Check whether the resource file for this combination exists on disk.
+
+        Parameters
+        ----------
+        source_type : str
+            ``'stars'`` or ``'galaxies'``.
+        bands : tuple of str
+            Band names, e.g. ``('g', 'r')``.
+
+        Returns
+        -------
+        bool
+        """
+        return os.path.exists(self.get_path(source_type, bands))
diff --git a/streamobs/observed.py b/streamobs/observed.py
index 4e2ce34..3f11ee4 100644
--- a/streamobs/observed.py
+++ b/streamobs/observed.py
@@ -244,6 +244,12 @@ def inject(self, data, bands=None, stream_config=None, **kwargs):
             perfect_galstarsep : bool, optional
                 If True, also computes a flag assuming perfect star/galaxy separation
                 (detection efficiency only, no classification losses). Default is False.
+                Only applies when ``source_type='stars'``.
+            source_type : str, optional
+                Type of source being injected. Either ``'stars'`` (default) or
+                ``'galaxies'``. When ``'galaxies'``, the detection flag uses
+                :meth:`~streamobs.surveys.Survey.get_gal_misclassification`
+                instead of the star completeness function.
             verbose : bool, optional
                 Whether to print progress information. Default is True.
 
@@ -362,7 +368,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs)
             Seed used to build an RNG when ``rng`` is None.
         **kwargs
             ``nside``, ``detection_mag_cut``, ``dust_correction``,
-            ``perfect_galstarsep``, ``verbose`` (see :meth:`inject`).
+            ``perfect_galstarsep``, ``source_type``, ``verbose`` (see :meth:`inject`).
 
         Returns
         -------
@@ -461,6 +467,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs)
 
             # Compute detection flag for completeness-band (reference band)
             if band == survey.completeness_band:
+                source_type = kwargs.get("source_type", "stars")
                 flag_completeness_band = self.detect_flag(
                     pix_maglim,
                     survey=survey,
@@ -469,9 +476,10 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs)
                     rng=rng,
                     seed=seed,
                     perfect_galstarsep=False,
+                    source_type=source_type,
                     **kwargs,
                 )
-                if perfect_galstarsep:
+                if perfect_galstarsep and source_type == "stars":
                     flag_detection_only_band = self.detect_flag(
                         pix_maglim,
                         survey=survey,
@@ -480,6 +488,7 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs)
                         rng=rng,
                         seed=seed,
                         perfect_galstarsep=True,
+                        source_type=source_type,
                         **kwargs,
                     )
 
@@ -1166,10 +1175,14 @@ def sample_measured_magnitudes(self, mag_true, mag_err, **kwargs):
 
     def detect_flag(self, pix, survey, mag=None, band="r", **kwargs):
         """
-        Apply the survey selection to determine detection flags for stars.
+        Apply the survey selection to determine detection flags.
 
-        This method uses the survey completeness function and random sampling
-        to determine which stars would be detected by the survey.
+        For stars (``source_type='stars'``, the default), uses the survey's
+        combined completeness (detection × classification) or detection-only
+        efficiency when ``perfect_galstarsep=True``. For galaxies
+        (``source_type='galaxies'``), uses
+        :meth:`~streamobs.surveys.Survey.get_gal_misclassification` and
+        ignores ``perfect_galstarsep``.
 
         Parameters
         ----------
@@ -1180,8 +1193,7 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs):
         band : str, optional
             Band to consider for detection. Default is 'r'.
         survey : Survey
-            Survey whose completeness/detection-efficiency curves to use
-            (required).
+            Survey whose efficiency curves to use (required).
         **kwargs
             Additional keyword arguments:
 
@@ -1189,13 +1201,17 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs):
                 Random number generator instance.
             seed : int, optional
                 Random seed if rng is not provided.
+            source_type : str, optional
+                ``'stars'`` (default) or ``'galaxies'``.
             perfect_galstarsep : bool, optional
-                If True, assumes perfect star/galaxy separation. Default is False.
+                If True and ``source_type='stars'``, uses detection-only
+                efficiency (no classification losses). Ignored for galaxies.
+                Default is False.
 
         Returns
         -------
         np.ndarray
-            Boolean array: True for detected stars, False otherwise.
+            Boolean array: True for detected objects, False otherwise.
 
         Raises
         ------
@@ -1208,16 +1224,18 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs):
             seed = kwargs.pop("seed", None)
             rng = np.random.default_rng(seed)
 
-        # Select the appropriate magnitude and map depending on the band
         maglim = survey.get_maglim(band, pixel=pix)
 
-        perfect_galstarsep = kwargs.get("perfect_galstarsep", False)
-        if perfect_galstarsep:
-            compl = survey.get_detection_efficiency(band, mag, maglim)
+        source_type = kwargs.get("source_type", "stars")
+        if source_type == "galaxies":
+            compl = survey.get_gal_misclassification(band, mag, maglim)
         else:
-            compl = survey.get_completeness(band, mag, maglim)
+            perfect_galstarsep = kwargs.get("perfect_galstarsep", False)
+            if perfect_galstarsep:
+                compl = survey.get_detection_efficiency(band, mag, maglim)
+            else:
+                compl = survey.get_completeness(band, mag, maglim)
 
-        # Set the threshold using completeness
         threshold = rng.uniform(size=len(mag)) <= compl
 
         return threshold
diff --git a/streamobs/surveys.py b/streamobs/surveys.py
index be1c704..84be45d 100644
--- a/streamobs/surveys.py
+++ b/streamobs/surveys.py
@@ -120,6 +120,7 @@ class Survey:
     delta_saturation: Optional[float] = None
     log_photo_error_catalog: Optional[Callable] = None
     log_photo_error_sample: Optional[Callable] = None
+    gal_misclassification: Optional[Callable] = None
 
     # Band-independent maps
     ebv_map: Optional[np.ndarray] = None
@@ -560,6 +561,62 @@ def get_efficiency(
 
         return compl
 
+    def get_gal_misclassification(
+        self, band: str, magnitude: float, maglim: float, **kwargs
+    ) -> float:
+        """
+        Get galaxy misclassification efficiency (probability a galaxy passes stellar selection).
+
+        Mirrors :meth:`get_efficiency` for the ``"completeness"`` type, but
+        **without 1-padding at the bright end**: bright galaxies are not forced
+        to efficiency = 1 when their delta_mag falls below the saturation
+        threshold. This reproduces the behaviour of ``custom_get_completeness``
+        used for galaxies in external background-generation scripts.
+
+        Parameters
+        ----------
+        band : str
+            Band identifier (e.g., 'g', 'r').
+        magnitude : float or np.ndarray
+            True apparent magnitude(s) including extinction.
+        maglim : float or np.ndarray
+            Magnitude limit(s) at the source position(s).
+        **kwargs
+            delta_saturation : float, optional
+                Override the survey's default saturation threshold.
+
+        Returns
+        -------
+        float or np.ndarray
+            Misclassification probability in [0, 1].
+
+        Raises
+        ------
+        ValueError
+            If :attr:`gal_misclassification` has not been loaded.
+        """
+        if self.gal_misclassification is None:
+            raise ValueError(
+                "gal_misclassification function not loaded for this survey. "
+                "Load it via SurveyFactory.set_gal_misclassification() or the "
+                "'gal_misclassification' config key."
+            )
+
+        delta_mag = magnitude - maglim
+
+        # No 1-padding: apply the interpolator directly
+        compl = self.gal_misclassification(delta_mag)
+
+        compl = np.where(
+            magnitude < self.saturation[band], 0.0, compl
+        )  # saturation at the bright end
+
+        compl = np.where(
+            (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl
+        )  # not observed if the area is not covered
+
+        return compl
+
 
 class SurveyFactory:
     """
@@ -1165,6 +1222,27 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs):
                 if verbose:
                     print("No classification efficiency file found, skipping.")
 
+        # Load galaxy misclassification efficiency (optional)
+        if "gal_misclassification" in survey_config:
+            try:
+                cls._load_file(
+                    survey,
+                    survey_config,
+                    "gal_misclassification",
+                    "Galaxy misclassification efficiency",
+                    lambda f: cls.set_completeness(
+                        f,
+                        delta_saturation=survey.delta_saturation,
+                        selection="missclassified",
+                    ),
+                    data_path_survey,
+                    data_path_others,
+                    **kwargs,
+                )
+            except Exception as e:
+                if verbose:
+                    print(f"Could not load galaxy misclassification file: {e}")
+
         # Load photometric error model(s), same for all bands. Two curves:
         #   - catalog : reported error vs delta_mag (the survey's photoerror file).
         #               Written as magerr and used for the S/N cut. Always loaded.
@@ -1401,6 +1479,8 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"):
             - 'classification_eff' : Classification efficiency only (the legacy
               misspelled header 'classifiction_eff' is also accepted)
             - 'classification_detection_eff' : Combined efficiency
+            - 'gal_misclassification_eff' : Galaxy misclassification efficiency
+              (probability a galaxy passes stellar selection)
 
         delta_saturation : float, optional
             Magnitude difference threshold for saturation. Default is -10.4.
@@ -1410,7 +1490,10 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"):
             - 'detected' : Detection efficiency only (column 'detection_eff')
             - 'classified' : Classification efficiency only (column
               'classification_eff', or legacy 'classifiction_eff')
-            - 'both' : Combined detection and classification (column 'classification_detection_eff')
+            - 'both' : Combined detection and classification (column
+              'classification_detection_eff')
+            - 'missclassified' : Galaxy misclassification efficiency (column
+              'gal_misclassification_eff')
 
             Default is 'both'.
 
@@ -1422,7 +1505,7 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"):
         Raises
         ------
         ValueError
-            If selection is not one of 'detected', 'classified', or 'both'.
+            If selection is not one of the recognised values.
 
         Notes
         -----
@@ -1446,9 +1529,12 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"):
                 efficiencies = data["classifiction_eff"]
         elif selection == "both":
             efficiencies = data["classification_detection_eff"]
+        elif selection == "missclassified":
+            efficiencies = data["gal_misclassification_eff"]
         else:
             raise ValueError(
-                f"Invalid selection '{selection}'. Must be 'detected', 'classified', or 'both'"
+                f"Invalid selection '{selection}'. Must be 'detected', 'classified', "
+                "'both', or 'missclassified'."
             )
 
         # Extend efficiency to bright end (force to zero at saturation)
diff --git a/streamobs/utils.py b/streamobs/utils.py
index 3f19ccd..f52349a 100644
--- a/streamobs/utils.py
+++ b/streamobs/utils.py
@@ -3,9 +3,43 @@
 Utils for streamobs
 """
 
+import pandas as pd
 import yaml
 
 
+def load_catalog(catalog):
+    """Load a catalog as a :class:`pandas.DataFrame`.
+
+    Parameters
+    ----------
+    catalog : pd.DataFrame or str
+        A DataFrame (returned as-is) or a path to a parquet or CSV file.
+
+    Returns
+    -------
+    pd.DataFrame
+
+    Raises
+    ------
+    ValueError
+        If ``catalog`` is neither a DataFrame nor a recognised file path.
+    """
+    if isinstance(catalog, pd.DataFrame):
+        return catalog
+    if isinstance(catalog, str):
+        if catalog.endswith(".parquet"):
+            return pd.read_parquet(catalog)
+        if catalog.endswith(".csv"):
+            return pd.read_csv(catalog)
+        raise ValueError(
+            f"Unrecognised file extension for catalog path '{catalog}'. "
+            "Expected .parquet or .csv."
+        )
+    raise ValueError(
+        f"catalog must be a DataFrame or a path string, got {type(catalog)}."
+    )
+
+
 def parse_config(config):
     """Parse a yaml formatted file or string into a dict.
 
@@ -20,6 +54,6 @@ def parse_config(config):
     try:
         # If `config` is a file
         return yaml.safe_load(open(config, "r"))
-    except OSError, FileNotFoundError:
+    except (OSError, FileNotFoundError):
         # Otherwise assume it is a string
         return yaml.safe_load(config)
diff --git a/tests/test_background.py b/tests/test_background.py
new file mode 100644
index 0000000..b3800c9
--- /dev/null
+++ b/tests/test_background.py
@@ -0,0 +1,328 @@
+"""
+Tests for the streamobs.background subpackage.
+
+All tests are marked ``background`` so they can be run in isolation:
+
+    pytest tests/test_background.py -m background
+
+Tests that write to disk (Storage, ResourceBuilder) use the ``tmp_path``
+pytest fixture so the temporary directory is created and cleaned up
+automatically regardless of test outcome.
+"""
+
+import numpy as np
+import pandas as pd
+import pytest
+
+pytestmark = pytest.mark.background
+
+
+# ---------------------------------------------------------------------------
+# Shared fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(scope="module")
+def mock_survey(verbose=False):
+    from streamobs.surveys import Survey
+
+    return Survey.load("lsst", release="yr4", verbose=verbose)
+
+
+@pytest.fixture(scope="module")
+def tiny_stars_catalog():
+    """Minimal DataFrame that looks like a true background star catalog."""
+    rng = np.random.default_rng(0)
+    n = 100
+    return pd.DataFrame(
+        {
+            "ra": rng.uniform(30.0, 60.0, n),
+            "dec": rng.uniform(-20.0, 0.0, n),
+            "lsst_yr4_g_true": rng.uniform(20.0, 27.0, n),
+            "lsst_yr4_r_true": rng.uniform(20.0, 27.0, n),
+        }
+    )
+
+
+@pytest.fixture(scope="module")
+def tiny_galaxies_catalog():
+    """Minimal DataFrame that looks like a true background galaxy catalog."""
+    rng = np.random.default_rng(1)
+    n = 100
+    return pd.DataFrame(
+        {
+            "ra": rng.uniform(30.0, 60.0, n),
+            "dec": rng.uniform(-20.0, 0.0, n),
+            "lsst_yr4_g_true": rng.uniform(20.0, 28.0, n),
+            "lsst_yr4_r_true": rng.uniform(20.0, 28.0, n),
+        }
+    )
+
+
+# ---------------------------------------------------------------------------
+# Part 1 — Survey galaxy misclassification
+# ---------------------------------------------------------------------------
+
+
+class TestSurveyGalMisclassification:
+    """Tests for the gal_misclassification field and method on Survey."""
+
+    def test_gal_misclassification_field_exists(self, mock_survey):
+        """Survey dataclass must expose the gal_misclassification attribute."""
+        assert hasattr(mock_survey, "gal_misclassification")
+
+    def test_get_gal_misclassification_raises_when_not_loaded(self, mock_survey):
+        """get_gal_misclassification raises ValueError if the function is not loaded."""
+        import copy
+
+        s = copy.deepcopy(mock_survey)
+        s.gal_misclassification = None
+        mags = np.linspace(20.0, 27.0, 10)
+        maglim = np.full(10, 26.5)
+        with pytest.raises(ValueError, match="gal_misclassification"):
+            s.get_gal_misclassification("r", mags, maglim)
+
+    def test_get_gal_misclassification_no_1padding(self, mock_survey):
+        """get_gal_misclassification must NOT return 1 at the bright end (no 1-padding).
+
+        Verify by checking that the method returns 0 for very bright objects
+        (below saturation) and does not saturate to 1.0 for delta_mag far below
+        delta_saturation.
+        """
+        ...
+
+    def test_set_completeness_missclassified_loads_file(self, tmp_path):
+        """set_completeness with selection='missclassified' must return a callable interpolator."""
+        from streamobs.surveys import SurveyFactory
+
+        csv_path = tmp_path / "gal_misclassification.csv"
+        csv_path.write_text(
+            "delta_mag,gal_misclassification_eff\n"
+            "-12.0,0.0\n"
+            "-5.0,0.0\n"
+            "0.0,0.3\n"
+            "1.0,0.0\n"
+        )
+        func = SurveyFactory.set_completeness(str(csv_path), selection="missclassified")
+        assert callable(func)
+        result = func(np.array([0.0]))
+        assert 0.0 <= float(result) <= 1.0
+
+
+# ---------------------------------------------------------------------------
+# Part 2 — StreamInjector source_type parameter
+# ---------------------------------------------------------------------------
+
+
+class TestInjectSourceType:
+    """Tests that source_type is accepted and routed correctly in StreamInjector."""
+
+    def test_inject_accepts_source_type_kwarg(self):
+        """inject() must accept source_type without raising TypeError."""
+        from streamobs.observed import StreamInjector
+        import inspect
+
+        sig = inspect.signature(StreamInjector.inject)
+        # source_type is passed through **kwargs — verify inject accepts **kwargs
+        assert "kwargs" in str(sig) or "source_type" in sig.parameters
+
+    def test_inject_galaxies_uses_gal_misclassification(
+        self, mock_survey, tiny_galaxies_catalog
+    ):
+        """inject() with source_type='galaxies' must reach get_gal_misclassification."""
+        ...
+
+
+# ---------------------------------------------------------------------------
+# Part 3 — BackgroundCatalogInjector
+# ---------------------------------------------------------------------------
+
+
+class TestBackgroundCatalogInjector:
+    """Tests for BackgroundCatalogInjector."""
+
+    def test_init(self, mock_survey):
+        """BackgroundCatalogInjector can be instantiated with a Survey."""
+        from streamobs.background import BackgroundCatalogInjector
+
+        inj = BackgroundCatalogInjector(mock_survey)
+        assert inj._survey is mock_survey
+
+    def test_prepare_survey_no_dust(self, mock_survey):
+        """_prepare_survey with no_dust=True must zero the EBV map."""
+        ...
+
+    def test_prepare_survey_uniform_maglim(self, mock_survey):
+        """_prepare_survey with uniform_maglim must replace maglim maps with constants."""
+        ...
+
+    def test_inject_stars_delegates_to_stream_injector(
+        self, mock_survey, tiny_stars_catalog
+    ):
+        """inject_stars must return a DataFrame (delegates to StreamInjector)."""
+        ...
+
+    def test_inject_galaxies_delegates_to_stream_injector(
+        self, mock_survey, tiny_galaxies_catalog
+    ):
+        """inject_galaxies must return a DataFrame."""
+        ...
+
+
+# ---------------------------------------------------------------------------
+# Part 4 — BackgroundStorage
+# ---------------------------------------------------------------------------
+
+
+class TestBackgroundStorage:
+    """Tests for BackgroundStorage persistence helpers."""
+
+    def test_get_path_naming(self, tmp_path):
+        """get_path must embed source_type and band names in the filename."""
+        from streamobs.background import BackgroundStorage
+
+        storage = BackgroundStorage(
+            base_path=str(tmp_path), survey_name="lsst", release="yr4"
+        )
+        path = storage.get_path("stars", ("g", "r"))
+        assert "stars" in path
+        assert "gr" in path
+        assert path.endswith(".parquet")
+
+    def test_save_data_creates_file(self, tmp_path):
+        """save_data must write a file at the path returned by get_path."""
+        ...
+
+    def test_load_data_roundtrip(self, tmp_path):
+        """load_data must recover the dict saved by save_data."""
+        ...
+
+    def test_exists_false_before_save(self, tmp_path):
+        """exists must return False when the file is not on disk."""
+        from streamobs.background import BackgroundStorage
+
+        storage = BackgroundStorage(
+            base_path=str(tmp_path), survey_name="lsst", release="yr4"
+        )
+        assert storage.exists("stars", ("g", "r")) is False
+
+    def test_exists_true_after_save(self, tmp_path):
+        """exists must return True after save_data has been called."""
+        ...
+
+
+# ---------------------------------------------------------------------------
+# Part 5 — BackgroundResourceBuilder
+# ---------------------------------------------------------------------------
+
+
+class TestBackgroundResourceBuilder:
+    """Tests for BackgroundResourceBuilder (uses tmp_path for storage)."""
+
+    def test_init(self):
+        """BackgroundResourceBuilder can be instantiated."""
+        from streamobs.background import BackgroundResourceBuilder
+
+        builder = BackgroundResourceBuilder(survey_name="lsst", release="yr4")
+        assert builder.survey_name == "lsst"
+        assert isinstance(builder.resources, dict)
+
+    def test_build_one_config(self, mock_survey, tiny_stars_catalog):
+        """_build_one_config must return a dict with the expected keys."""
+        ...
+
+    def test_save_via_storage(self, tmp_path, tiny_stars_catalog):
+        """save must write a parquet file via BackgroundStorage."""
+        ...
+
+    def test_load_via_storage(self, tmp_path, tiny_stars_catalog):
+        """load must reconstruct resources from the file saved by save."""
+        ...
+
+
+# ---------------------------------------------------------------------------
+# Part 6 — LightBackgroundGenerator
+# ---------------------------------------------------------------------------
+
+
+class TestLightBackgroundGenerator:
+    """Tests for LightBackgroundGenerator."""
+
+    @pytest.fixture(scope="class")
+    def storage_with_data(self, tmp_path_factory, mock_survey, tiny_stars_catalog):
+        """BackgroundStorage pre-populated with a minimal CMD grid."""
+        ...
+
+    def test_init(self, mock_survey, tmp_path):
+        """LightBackgroundGenerator can be instantiated."""
+        from streamobs.background import BackgroundStorage, LightBackgroundGenerator
+
+        storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst")
+        gen = LightBackgroundGenerator(storage, mock_survey)
+        assert gen.survey is mock_survey
+
+    def test_generate_stars(self):
+        """generate with source_type='stars' must return a DataFrame."""
+        ...
+
+    def test_generate_galaxies(self):
+        """generate with source_type='galaxies' must return a DataFrame."""
+        ...
+
+    def test_generate_both(self):
+        """generate with source_type='both' must return combined DataFrame."""
+        ...
+
+
+# ---------------------------------------------------------------------------
+# Part 7 — Background (top-level wrapper)
+# ---------------------------------------------------------------------------
+
+
+class TestBackground:
+    """Tests for the Background wrapper class."""
+
+    def test_init_rejects_invalid_source_type(self, mock_survey):
+        """Background raises ValueError for unknown source_type."""
+        from streamobs.background import Background
+
+        with pytest.raises(ValueError, match="source_type"):
+            Background(mock_survey, source_type="invalid")
+
+    def test_init_rejects_invalid_method(self, mock_survey):
+        """Background raises ValueError for unknown method."""
+        from streamobs.background import Background
+
+        with pytest.raises(ValueError, match="method"):
+            Background(mock_survey, method="invalid")
+
+    def test_full_method_stars(self, mock_survey, tiny_stars_catalog):
+        """Background with method='full' and source_type='stars' must generate a catalog."""
+        ...
+
+    def test_full_method_galaxies(self, mock_survey, tiny_galaxies_catalog):
+        """Background with method='full' and source_type='galaxies' must generate a catalog."""
+        ...
+
+    def test_full_method_both(
+        self, mock_survey, tiny_stars_catalog, tiny_galaxies_catalog
+    ):
+        """Background with method='full' and source_type='both' must combine catalogs."""
+        ...
+
+    def test_light_method_default_storage(self, mock_survey):
+        """Background with method='light' and storage=None must fall back to _default_storage."""
+        from streamobs.background import Background
+
+        bg = Background(mock_survey, method="light")
+        assert bg.storage is not None
+
+    def test_light_method_custom_storage(self, mock_survey, tmp_path):
+        """Background accepts a user-supplied BackgroundStorage."""
+        from streamobs.background import Background, BackgroundStorage
+
+        storage = BackgroundStorage(
+            base_path=str(tmp_path), survey_name="lsst", release="yr4"
+        )
+        bg = Background(mock_survey, method="light", storage=storage)
+        assert bg.storage is storage

From e765e9d5033d826fbab9fec3122e870c33174cba Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Fri, 19 Jun 2026 17:41:11 +0200
Subject: [PATCH 2/8] code wrapper

---
 streamobs/background/catalog_injector.py | 39 ++++++++++--------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/streamobs/background/catalog_injector.py b/streamobs/background/catalog_injector.py
index 505c232..aa0c031 100644
--- a/streamobs/background/catalog_injector.py
+++ b/streamobs/background/catalog_injector.py
@@ -2,10 +2,6 @@
 Full-injection background pipeline using known catalogs.
 """
 
-import copy
-
-import numpy as np
-
 from ..observed import StreamInjector
 from ..surveys import Survey
 from ..utils import load_catalog
@@ -16,32 +12,27 @@ class BackgroundCatalogInjector:
     Thin wrapper around :class:`~streamobs.observed.StreamInjector` for
     background catalog injection.
 
-    Provides :meth:`inject_stars` and :meth:`inject_galaxies` convenience
-    methods that each call :meth:`~streamobs.observed.StreamInjector.inject`
-    with the correct ``source_type``. Survey preparation — deep-copying,
-    zeroing dust, setting uniform magnitude limits — is handled by the single
-    :meth:`_prepare_survey` method and is shared between both injection modes.
+    Exposes :meth:`inject_stars` and :meth:`inject_galaxies` so the caller
+    does not need to pass ``source_type`` explicitly.  All injection logic
+    lives in :class:`~streamobs.observed.StreamInjector`.
 
     Parameters
     ----------
     survey : Survey
         Survey instance to inject into.
     **kwargs
-        Forwarded to :meth:`_prepare_survey` at construction time (e.g.
-        ``no_dust``, ``uniform_maglim``).
+        Forwarded to :class:`~streamobs.observed.StreamInjector`.
 
     Examples
     --------
-    Inject a stars catalog with a uniform magnitude limit:
-
-    >>> injector = BackgroundCatalogInjector(survey, no_dust=True,
-    ...                                      uniform_maglim={'g': 26.0, 'r': 26.5})
+    >>> injector = BackgroundCatalogInjector(survey)
     >>> obs_stars = injector.inject_stars(catalog_df, bands=['g', 'r'])
+    >>> obs_gals  = injector.inject_galaxies(catalog_df, bands=['g', 'r'])
     """
 
     def __init__(self, survey: Survey, **kwargs):
         self._survey = survey
-        self._kwargs = kwargs
+        self.streaminjector = StreamInjector(self._survey, **kwargs)
 
     def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
         """
@@ -50,10 +41,7 @@ def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
         Parameters
         ----------
         catalog : pd.DataFrame or str
-            True stellar catalog. Accepts a DataFrame or a path to parquet/CSV
-            (loaded via :func:`~streamobs.utils.load_catalog`). Must contain
-            sky coordinates (``ra``/``dec`` or ``phi1``/``phi2``) and true
-            magnitude columns.
+            True stellar catalog. Accepts a DataFrame or a path to parquet/CSV.
         bands : list of str, optional
             Photometric bands to inject. Defaults to ``['r', 'g']``.
         **kwargs
@@ -64,7 +52,10 @@ def inject_stars(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
         pd.DataFrame
             Observed catalog with survey-namespaced magnitude and flag columns.
         """
-        ...
+        catalog = load_catalog(catalog)
+        return self.streaminjector.inject(
+            catalog, bands=bands, source_type="stars", **kwargs
+        )
 
     def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
         """
@@ -79,7 +70,6 @@ def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
         ----------
         catalog : pd.DataFrame or str
             True galaxy catalog. Accepts a DataFrame or a path to parquet/CSV.
-            Must contain sky coordinates and true magnitude columns.
         bands : list of str, optional
             Photometric bands to inject. Defaults to ``['r', 'g']``.
         **kwargs
@@ -90,4 +80,7 @@ def inject_galaxies(self, catalog, bands=None, **kwargs) -> "pd.DataFrame":
         pd.DataFrame
             Observed catalog with survey-namespaced magnitude and flag columns.
         """
-        ...
+        catalog = load_catalog(catalog)
+        return self.streaminjector.inject(
+            catalog, bands=bands, source_type="galaxies", **kwargs
+        )

From f59aef3e8a9d81949dcaab402f43df1ea6f063d7 Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Fri, 19 Jun 2026 17:41:45 +0200
Subject: [PATCH 3/8] no dependence on release

---
 streamobs/background/storage.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/streamobs/background/storage.py b/streamobs/background/storage.py
index 6a8b4d7..782732c 100644
--- a/streamobs/background/storage.py
+++ b/streamobs/background/storage.py
@@ -28,23 +28,20 @@ class BackgroundStorage:
         ``{package_root}/data/background/``.
     survey_name : str, optional
         Survey identifier (e.g. ``'lsst'``).  Used in the file path.
-    release : str, optional
-        Survey release string (e.g. ``'yr5'``).  Used in the file path.
 
     Examples
     --------
-    >>> storage = BackgroundStorage(survey_name='lsst', release='yr5')
+    >>> storage = BackgroundStorage(survey_name='lsst')
     >>> path = storage.get_path('stars', ('g', 'r'))
-    >>> # -> .../data/background/lsst_yr5/stars_gr.parquet
+    >>> # -> .../data/background/lsst/stars_gr.parquet
     """
 
-    def __init__(self, base_path=None, survey_name="lsst", release=None, **kwargs):
+    def __init__(self, base_path=None, survey_name="lsst", **kwargs):
         if base_path is None:
             _pkg_root = os.path.join(os.path.dirname(__file__), "..", "..", "data")
             base_path = os.path.join(_pkg_root, "background")
         self.base_path = base_path
         self.survey_name = survey_name
-        self.release = release
 
     def get_path(self, source_type: str, bands: tuple) -> str:
         """
@@ -61,14 +58,11 @@ def get_path(self, source_type: str, bands: tuple) -> str:
         -------
         str
             Absolute path to the parquet file, e.g.
-            ``{base_path}/lsst_yr5/stars_gr.parquet``.
+            ``{base_path}/lsst/stars_gr.parquet``.
         """
-        survey_key = (
-            f"{self.survey_name}_{self.release}" if self.release else self.survey_name
-        )
         bands_str = "".join(bands)
         filename = f"{source_type}_{bands_str}.parquet"
-        return os.path.join(self.base_path, survey_key, filename)
+        return os.path.join(self.base_path, self.survey_name, filename)
 
     def save_data(
         self,

From a3019896e9f879dfd45dde36f5ace3e6be6abd74 Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Fri, 19 Jun 2026 18:39:24 +0200
Subject: [PATCH 4/8] add missclassification

---
 streamobs/surveys.py | 153 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 114 insertions(+), 39 deletions(-)

diff --git a/streamobs/surveys.py b/streamobs/surveys.py
index 84be45d..ad66ece 100644
--- a/streamobs/surveys.py
+++ b/streamobs/surveys.py
@@ -121,6 +121,7 @@ class Survey:
     log_photo_error_catalog: Optional[Callable] = None
     log_photo_error_sample: Optional[Callable] = None
     gal_misclassification: Optional[Callable] = None
+    gal_misclassification_detection: Optional[Callable] = None
 
     # Band-independent maps
     ebv_map: Optional[np.ndarray] = None
@@ -528,37 +529,72 @@ def get_efficiency(
         get_detection_efficiency : Convenience wrapper for detection-only.
         get_classification_efficiency : Convenience wrapper for classification-only.
         """
-        if type == "completeness":
+        delta_saturation = kwargs.get("delta_saturation", self.delta_saturation)
+        delta_mag = magnitude - maglim
+
+        if type in ("missclassified", "detected_missclassified"):
+            # Galaxy-related efficiencies: no 1-padding at the bright end.
+            if type == "missclassified":
+                func = self.gal_misclassification
+                if func is None:
+                    raise ValueError(
+                        "Efficiency function for type 'missclassified' not loaded."
+                    )
+            else:  # detected_missclassified
+                func = getattr(self, "gal_misclassification_detection", None)
+                if func is None:
+                    # Fallback: missclassification × detection efficiency.
+                    mis = self.gal_misclassification
+                    det = getattr(self, "efficiency_detection", None)
+                    if mis is None or det is None:
+                        raise ValueError(
+                            "Efficiency function for type 'detected_missclassified' not "
+                            "loaded and cannot be derived (need both 'missclassified' and "
+                            "'detection_efficiency')."
+                        )
+                    func = lambda dm: mis(dm) * det(dm)  # noqa: E731
+            compl = func(delta_mag)
+            compl = np.where(magnitude < self.saturation[band], 0.0, compl)
+            compl = np.where(
+                (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl
+            )
+            return compl
+
+        if type in ("completeness", "classification_detection"):
             func = self.completeness
+            if func is None:
+                # Fallback: compute classification × detection on the fly.
+                det = getattr(self, "efficiency_detection", None)
+                cls_ = getattr(self, "efficiency_classification", None)
+                if det is None or cls_ is None:
+                    raise ValueError(
+                        "Efficiency function for type 'completeness' not loaded and "
+                        "cannot be derived (need both detection and classification)."
+                    )
+                func = lambda dm: det(dm) * cls_(dm)  # noqa: E731
         elif type == "detection_efficiency":
-            func = self.efficiency_detection
+            func = getattr(self, "efficiency_detection", None)
         elif type == "classification_efficiency":
-            func = self.efficiency_classification
+            func = getattr(self, "efficiency_classification", None)
         else:
-            raise ValueError(f"Efficiency type '{type}' not recognized.")
+            raise ValueError(
+                f"Efficiency type '{type}' not recognized. Valid types: "
+                "'completeness', 'classification_detection', 'detection_efficiency', "
+                "'classification_efficiency', 'missclassified', 'detected_missclassified'."
+            )
         if func is None:
-            raise ValueError(f"Efficiency function for type '{type}' not loaded")
+            raise ValueError(f"Efficiency function for type '{type}' not loaded.")
 
-        delta_saturation = kwargs.get("delta_saturation", self.delta_saturation)
-
-        # Calculate delta_mag
-        delta_mag = magnitude - maglim
-
-        # Apply saturation condition: 1 padding for objects fainter than saturation but equivalent mag brighter than saturation0
+        # 1-padding: bright sources (well above saturation) are always detected.
         compl = np.where(
             (magnitude > self.saturation[band]) & (delta_mag <= delta_saturation),
             1.0,
             func(delta_mag),
-        )  # 1 padded
-
-        compl = np.where(
-            magnitude < self.saturation[band], 0.0, compl
-        )  # saturation at the bright end
-
+        )
+        compl = np.where(magnitude < self.saturation[band], 0.0, compl)
         compl = np.where(
             (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl
-        )  # not observed if the area is not covered
-
+        )
         return compl
 
     def get_gal_misclassification(
@@ -595,28 +631,45 @@ def get_gal_misclassification(
         ValueError
             If :attr:`gal_misclassification` has not been loaded.
         """
-        if self.gal_misclassification is None:
-            raise ValueError(
-                "gal_misclassification function not loaded for this survey. "
-                "Load it via SurveyFactory.set_gal_misclassification() or the "
-                "'gal_misclassification' config key."
-            )
+        return self.get_efficiency(band, magnitude, maglim, type="missclassified", **kwargs)
 
-        delta_mag = magnitude - maglim
-
-        # No 1-padding: apply the interpolator directly
-        compl = self.gal_misclassification(delta_mag)
+    def get_gal_misclassification_detection(
+        self, band: str, magnitude: float, maglim: float, **kwargs
+    ) -> float:
+        """
+        Get galaxy misclassification × detection efficiency.
 
-        compl = np.where(
-            magnitude < self.saturation[band], 0.0, compl
-        )  # saturation at the bright end
+        Mirrors :meth:`get_efficiency` for the ``"detected_missclassified"`` type,
+        but **without 1-padding at the bright end**: bright galaxies are not forced
+        to efficiency = 1 when their delta_mag falls below the saturation
+        threshold. This reproduces the behaviour of ``custom_get_completeness``
+        used for galaxies in external background-generation scripts.
 
-        compl = np.where(
-            (maglim < self.saturation[band]) | np.isnan(maglim), 0.0, compl
-        )  # not observed if the area is not covered
+        Parameters
+        ----------
+        band : str
+            Band identifier (e.g., 'g', 'r').
+        magnitude : float or np.ndarray
+            True apparent magnitude(s) including extinction.
+        maglim : float or np.ndarray
+            Magnitude limit(s) at the source position(s).
+        **kwargs
+            delta_saturation : float, optional
+                Override the survey's default saturation threshold.
 
-        return compl
+        Returns
+        -------
+        float or np.ndarray
+            Misclassification × detection probability in [0, 1].
 
+        Raises
+        ------
+        ValueError
+            If :attr:`gal_misclassification_detection` has not been loaded.
+        """
+        return self.get_efficiency(
+            band, magnitude, maglim, type="detected_missclassified", **kwargs
+        )
 
 class SurveyFactory:
     """
@@ -1243,6 +1296,26 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs):
                 if verbose:
                     print(f"Could not load galaxy misclassification file: {e}")
 
+            # Also try to load the combined detection × misclassification from the same file.
+            try:
+                cls._load_file(
+                    survey,
+                    survey_config,
+                    "gal_misclassification_detection",
+                    "Galaxy misclassification × detection efficiency",
+                    lambda f: cls.set_completeness(
+                        f,
+                        delta_saturation=survey.delta_saturation,
+                        selection="detected_missclassified",
+                    ),
+                    data_path_survey,
+                    data_path_others,
+                    filename=survey_config.get("gal_misclassification"),
+                    **kwargs,
+                )
+            except Exception:
+                pass  # column absent — fallback to product at call time
+
         # Load photometric error model(s), same for all bands. Two curves:
         #   - catalog : reported error vs delta_mag (the survey's photoerror file).
         #               Written as magerr and used for the S/N cut. Always loaded.
@@ -1479,7 +1552,7 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"):
             - 'classification_eff' : Classification efficiency only (the legacy
               misspelled header 'classifiction_eff' is also accepted)
             - 'classification_detection_eff' : Combined efficiency
-            - 'gal_misclassification_eff' : Galaxy misclassification efficiency
+            - 'missclassification_eff' : Galaxy misclassification efficiency
               (probability a galaxy passes stellar selection)
 
         delta_saturation : float, optional
@@ -1493,7 +1566,7 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"):
             - 'both' : Combined detection and classification (column
               'classification_detection_eff')
             - 'missclassified' : Galaxy misclassification efficiency (column
-              'gal_misclassification_eff')
+              'missclassification_eff')
 
             Default is 'both'.
 
@@ -1530,7 +1603,9 @@ def set_completeness(filename, delta_saturation=-10.4, selection="both"):
         elif selection == "both":
             efficiencies = data["classification_detection_eff"]
         elif selection == "missclassified":
-            efficiencies = data["gal_misclassification_eff"]
+            efficiencies = data["missclassification_eff"]
+        elif selection == "detected_missclassified":
+            efficiencies = data["missclassification_detection_eff"]
         else:
             raise ValueError(
                 f"Invalid selection '{selection}'. Must be 'detected', 'classified', "

From 6eef164ce6899276e78bd85f20985999553a1833 Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Fri, 19 Jun 2026 18:40:39 +0200
Subject: [PATCH 5/8] deal with different type of sources

---
 streamobs/observed.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/streamobs/observed.py b/streamobs/observed.py
index 3f11ee4..1f6c244 100644
--- a/streamobs/observed.py
+++ b/streamobs/observed.py
@@ -476,7 +476,6 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs)
                     rng=rng,
                     seed=seed,
                     perfect_galstarsep=False,
-                    source_type=source_type,
                     **kwargs,
                 )
                 if perfect_galstarsep and source_type == "stars":
@@ -488,7 +487,6 @@ def _inject_one_survey(self, data, bands, survey, rng=None, seed=None, **kwargs)
                         rng=rng,
                         seed=seed,
                         perfect_galstarsep=True,
-                        source_type=source_type,
                         **kwargs,
                     )
 
@@ -1181,7 +1179,7 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs):
         combined completeness (detection × classification) or detection-only
         efficiency when ``perfect_galstarsep=True``. For galaxies
         (``source_type='galaxies'``), uses
-        :meth:`~streamobs.surveys.Survey.get_gal_misclassification` and
+        :meth:`~streamobs.surveys.Survey.get_gal_misclassification_detection` and
         ignores ``perfect_galstarsep``.
 
         Parameters
@@ -1228,7 +1226,7 @@ def detect_flag(self, pix, survey, mag=None, band="r", **kwargs):
 
         source_type = kwargs.get("source_type", "stars")
         if source_type == "galaxies":
-            compl = survey.get_gal_misclassification(band, mag, maglim)
+            compl = survey.get_gal_misclassification_detection(band, mag, maglim)
         else:
             perfect_galstarsep = kwargs.get("perfect_galstarsep", False)
             if perfect_galstarsep:

From 673ba64152d619f4f958b191ba20ba5d666717af Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Fri, 19 Jun 2026 18:46:09 +0200
Subject: [PATCH 6/8] use by default the completeness file

---
 streamobs/surveys.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/streamobs/surveys.py b/streamobs/surveys.py
index ad66ece..ca89f4d 100644
--- a/streamobs/surveys.py
+++ b/streamobs/surveys.py
@@ -1275,8 +1275,13 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs):
                 if verbose:
                     print("No classification efficiency file found, skipping.")
 
-        # Load galaxy misclassification efficiency (optional)
-        if "gal_misclassification" in survey_config:
+        # Load galaxy misclassification efficiency (optional).
+        # Uses the explicit 'gal_misclassification' config key when present,
+        # otherwise falls back to the completeness file (same CSV, different column).
+        _gal_mis_file = survey_config.get(
+            "gal_misclassification", survey_config.get("completeness")
+        )
+        if _gal_mis_file is not None:
             try:
                 cls._load_file(
                     survey,
@@ -1290,13 +1295,13 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs):
                     ),
                     data_path_survey,
                     data_path_others,
+                    filename=_gal_mis_file,
                     **kwargs,
                 )
-            except Exception as e:
-                if verbose:
-                    print(f"Could not load galaxy misclassification file: {e}")
+            except Exception:
+                pass  # column absent in this survey's file — silently skip
 
-            # Also try to load the combined detection × misclassification from the same file.
+            # Also try the combined detection × misclassification from the same file.
             try:
                 cls._load_file(
                     survey,
@@ -1310,7 +1315,7 @@ def _load_survey_data(cls, survey: Survey, config: dict, **kwargs):
                     ),
                     data_path_survey,
                     data_path_others,
-                    filename=survey_config.get("gal_misclassification"),
+                    filename=_gal_mis_file,
                     **kwargs,
                 )
             except Exception:

From 3e11ab14db27dc0a54189432d497c8f61ca01345 Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Mon, 22 Jun 2026 10:04:46 +0200
Subject: [PATCH 7/8] not dev file tracking

---
 .gitignore               |   3 +
 tests/test_background.py | 118 +++++++++++++++------------------------
 2 files changed, 48 insertions(+), 73 deletions(-)

diff --git a/.gitignore b/.gitignore
index e1c7407..11209b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,6 +154,9 @@ scripts/roman/script_to_notebook.py
 build_*_nb.py
 scripts/**/build_*_nb.py
 
+# local-only development notebooks
+notebooks/dev.ipynb
+
 artifacts/
 # Staged data archive for upload (built by bin/build_data_archive.py)
 /archive/
diff --git a/tests/test_background.py b/tests/test_background.py
index b3800c9..9299979 100644
--- a/tests/test_background.py
+++ b/tests/test_background.py
@@ -21,40 +21,17 @@
 # Shared fixtures
 # ---------------------------------------------------------------------------
 
-
-@pytest.fixture(scope="module")
-def mock_survey(verbose=False):
-    from streamobs.surveys import Survey
-
-    return Survey.load("lsst", release="yr4", verbose=verbose)
-
-
-@pytest.fixture(scope="module")
-def tiny_stars_catalog():
-    """Minimal DataFrame that looks like a true background star catalog."""
-    rng = np.random.default_rng(0)
-    n = 100
-    return pd.DataFrame(
-        {
-            "ra": rng.uniform(30.0, 60.0, n),
-            "dec": rng.uniform(-20.0, 0.0, n),
-            "lsst_yr4_g_true": rng.uniform(20.0, 27.0, n),
-            "lsst_yr4_r_true": rng.uniform(20.0, 27.0, n),
-        }
-    )
-
-
 @pytest.fixture(scope="module")
 def tiny_galaxies_catalog():
-    """Minimal DataFrame that looks like a true background galaxy catalog."""
+    """Galaxy catalog: same sky positions as stream_catalog but with randomised magnitudes."""
     rng = np.random.default_rng(1)
     n = 100
     return pd.DataFrame(
         {
             "ra": rng.uniform(30.0, 60.0, n),
             "dec": rng.uniform(-20.0, 0.0, n),
-            "lsst_yr4_g_true": rng.uniform(20.0, 28.0, n),
-            "lsst_yr4_r_true": rng.uniform(20.0, 28.0, n),
+            "lsst_g_true": rng.uniform(20.0, 28.0, n),
+            "lsst_r_true": rng.uniform(20.0, 28.0, n),
         }
     )
 
@@ -79,7 +56,7 @@ def test_get_gal_misclassification_raises_when_not_loaded(self, mock_survey):
         s.gal_misclassification = None
         mags = np.linspace(20.0, 27.0, 10)
         maglim = np.full(10, 26.5)
-        with pytest.raises(ValueError, match="gal_misclassification"):
+        with pytest.raises(ValueError, match="missclassified"):
             s.get_gal_misclassification("r", mags, maglim)
 
     def test_get_gal_misclassification_no_1padding(self, mock_survey):
@@ -91,24 +68,6 @@ def test_get_gal_misclassification_no_1padding(self, mock_survey):
         """
         ...
 
-    def test_set_completeness_missclassified_loads_file(self, tmp_path):
-        """set_completeness with selection='missclassified' must return a callable interpolator."""
-        from streamobs.surveys import SurveyFactory
-
-        csv_path = tmp_path / "gal_misclassification.csv"
-        csv_path.write_text(
-            "delta_mag,gal_misclassification_eff\n"
-            "-12.0,0.0\n"
-            "-5.0,0.0\n"
-            "0.0,0.3\n"
-            "1.0,0.0\n"
-        )
-        func = SurveyFactory.set_completeness(str(csv_path), selection="missclassified")
-        assert callable(func)
-        result = func(np.array([0.0]))
-        assert 0.0 <= float(result) <= 1.0
-
-
 # ---------------------------------------------------------------------------
 # Part 2 — StreamInjector source_type parameter
 # ---------------------------------------------------------------------------
@@ -148,25 +107,44 @@ def test_init(self, mock_survey):
         inj = BackgroundCatalogInjector(mock_survey)
         assert inj._survey is mock_survey
 
-    def test_prepare_survey_no_dust(self, mock_survey):
-        """_prepare_survey with no_dust=True must zero the EBV map."""
-        ...
+    def test_inject_stars_returns_nonempty_dataframe(
+        self, mock_survey, stream_catalog
+    ):
+        """inject_stars must return a non-empty DataFrame (smoke test only — full coverage in test_observed)."""
+        from streamobs.background import BackgroundCatalogInjector
 
-    def test_prepare_survey_uniform_maglim(self, mock_survey):
-        """_prepare_survey with uniform_maglim must replace maglim maps with constants."""
-        ...
+        result = BackgroundCatalogInjector(mock_survey).inject_stars(
+            stream_catalog, bands=["g", "r"]
+        )
+        assert isinstance(result, pd.DataFrame)
+        assert len(result) > 0
 
-    def test_inject_stars_delegates_to_stream_injector(
-        self, mock_survey, tiny_stars_catalog
+    def test_inject_galaxies_returns_nonempty_dataframe(
+        self, mock_survey, tiny_galaxies_catalog
     ):
-        """inject_stars must return a DataFrame (delegates to StreamInjector)."""
-        ...
+        """inject_galaxies must return a non-empty DataFrame."""
+        from streamobs.background import BackgroundCatalogInjector
 
-    def test_inject_galaxies_delegates_to_stream_injector(
+        result = BackgroundCatalogInjector(mock_survey).inject_galaxies(
+            tiny_galaxies_catalog, bands=["g", "r"]
+        )
+        assert isinstance(result, pd.DataFrame)
+        assert len(result) > 0
+
+    def test_inject_galaxies_has_detection_flag(
         self, mock_survey, tiny_galaxies_catalog
     ):
-        """inject_galaxies must return a DataFrame."""
-        ...
+        """inject_galaxies output must include a detection flag column."""
+        from streamobs.background import BackgroundCatalogInjector
+
+        result = BackgroundCatalogInjector(mock_survey).inject_galaxies(
+            tiny_galaxies_catalog, bands=["g", "r"]
+        )
+        flag_cols = [c for c in result.columns if "flag" in c]
+        assert len(flag_cols) > 0
+        assert "flag_observed" in flag_cols
+        assert result["flag_observed"].isin([0, 1]).all()
+        assert result["flag_observed"].sum() > 0  # at least one detected
 
 
 # ---------------------------------------------------------------------------
@@ -181,9 +159,7 @@ def test_get_path_naming(self, tmp_path):
         """get_path must embed source_type and band names in the filename."""
         from streamobs.background import BackgroundStorage
 
-        storage = BackgroundStorage(
-            base_path=str(tmp_path), survey_name="lsst", release="yr4"
-        )
+        storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst")
         path = storage.get_path("stars", ("g", "r"))
         assert "stars" in path
         assert "gr" in path
@@ -201,9 +177,7 @@ def test_exists_false_before_save(self, tmp_path):
         """exists must return False when the file is not on disk."""
         from streamobs.background import BackgroundStorage
 
-        storage = BackgroundStorage(
-            base_path=str(tmp_path), survey_name="lsst", release="yr4"
-        )
+        storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst")
         assert storage.exists("stars", ("g", "r")) is False
 
     def test_exists_true_after_save(self, tmp_path):
@@ -227,15 +201,15 @@ def test_init(self):
         assert builder.survey_name == "lsst"
         assert isinstance(builder.resources, dict)
 
-    def test_build_one_config(self, mock_survey, tiny_stars_catalog):
+    def test_build_one_config(self, mock_survey, stream_catalog):
         """_build_one_config must return a dict with the expected keys."""
         ...
 
-    def test_save_via_storage(self, tmp_path, tiny_stars_catalog):
+    def test_save_via_storage(self, tmp_path, stream_catalog):
         """save must write a parquet file via BackgroundStorage."""
         ...
 
-    def test_load_via_storage(self, tmp_path, tiny_stars_catalog):
+    def test_load_via_storage(self, tmp_path, stream_catalog):
         """load must reconstruct resources from the file saved by save."""
         ...
 
@@ -249,7 +223,7 @@ class TestLightBackgroundGenerator:
     """Tests for LightBackgroundGenerator."""
 
     @pytest.fixture(scope="class")
-    def storage_with_data(self, tmp_path_factory, mock_survey, tiny_stars_catalog):
+    def storage_with_data(self, tmp_path_factory, mock_survey, stream_catalog):
         """BackgroundStorage pre-populated with a minimal CMD grid."""
         ...
 
@@ -296,7 +270,7 @@ def test_init_rejects_invalid_method(self, mock_survey):
         with pytest.raises(ValueError, match="method"):
             Background(mock_survey, method="invalid")
 
-    def test_full_method_stars(self, mock_survey, tiny_stars_catalog):
+    def test_full_method_stars(self, mock_survey, stream_catalog):
         """Background with method='full' and source_type='stars' must generate a catalog."""
         ...
 
@@ -305,7 +279,7 @@ def test_full_method_galaxies(self, mock_survey, tiny_galaxies_catalog):
         ...
 
     def test_full_method_both(
-        self, mock_survey, tiny_stars_catalog, tiny_galaxies_catalog
+        self, mock_survey, stream_catalog, tiny_galaxies_catalog
     ):
         """Background with method='full' and source_type='both' must combine catalogs."""
         ...
@@ -321,8 +295,6 @@ def test_light_method_custom_storage(self, mock_survey, tmp_path):
         """Background accepts a user-supplied BackgroundStorage."""
         from streamobs.background import Background, BackgroundStorage
 
-        storage = BackgroundStorage(
-            base_path=str(tmp_path), survey_name="lsst", release="yr4"
-        )
+        storage = BackgroundStorage(base_path=str(tmp_path), survey_name="lsst")
         bg = Background(mock_survey, method="light", storage=storage)
         assert bg.storage is storage

From f6632bd5a7f349a0f4e00d6eb419442348c931b7 Mon Sep 17 00:00:00 2001
From: MatthieuPe <matthieupelissier01@gmail.com>
Date: Mon, 22 Jun 2026 11:45:12 +0200
Subject: [PATCH 8/8] testing the missclassification

---
 tests/test_surveys.py | 51 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/tests/test_surveys.py b/tests/test_surveys.py
index 96760d9..e5b4d2d 100644
--- a/tests/test_surveys.py
+++ b/tests/test_surveys.py
@@ -441,6 +441,57 @@ def test_completeness_behavior(
                 comp_faint < 0.1
             ), f"Completeness should be near 0 for magnitudes well below saturation in band '{completeness_band}'"
 
+
+    def test_missclassification_behavior(
+        self,
+        loaded_survey,
+        saturation_magnitudes,
+        bright_magnitudes,
+        faint_magnitudes,
+        base_maglim,):
+
+        completeness_band = loaded_survey.completeness_band
+        sat = loaded_survey.saturation[completeness_band]
+
+        # work only with magnitudes above or below saturation
+        sat_mag = saturation_magnitudes[saturation_magnitudes < sat]
+        bright_mag = bright_magnitudes[bright_magnitudes > sat]
+        faint_mag = faint_magnitudes[faint_magnitudes > sat]
+
+        # Test that missclassification is loaded
+        # Only gal_misclassification_detection is needed, but if it None it can
+        # be estimated from gal_misclassification, so we require at least one of them to be present.
+        assert (loaded_survey.gal_misclassification_detection is not None) or (loaded_survey.gal_misclassification is not None), "Missclassification function is None"
+
+
+
+        # Verify completeness behavior in each regime
+        if len(sat_mag) > 0:
+            comp_sat = loaded_survey.get_efficiency(
+                completeness_band, sat_mag, base_maglim, type = 'detected_missclassified',
+            )
+            assert np.all(
+                comp_sat == 0.0
+            ), f"Completeness should be 0 for magnitudes below saturation in band '{completeness_band}'"
+  
+        
+        mag_test = np.linspace(18, 28, 100)
+        comp_test = loaded_survey.get_efficiency(
+            completeness_band, mag_test, base_maglim, type = 'detected_missclassified',
+        )
+        max_val = np.max(comp_test)
+        assert max_val < 1.0, f"Missclassification efficiency should be less than 1 for magnitudes in band '{completeness_band}'"
+        assert max_val > 0.0, f"Missclassification efficiency should be greater than 0 for magnitudes in band '{completeness_band}'"
+
+        if len(faint_mag) > 0:
+            comp_faint = loaded_survey.get_efficiency(
+                completeness_band, faint_mag, base_maglim, type = 'detected_missclassified',
+            )
+            assert np.all(
+                comp_faint < 0.1
+            ), f"Completeness should be near 0 for magnitudes well below saturation in band '{completeness_band}'"
+
+
     def test_log_photo_error_behavior(
         self,
         loaded_survey,