NetherlandsForensicInstitute · laurensWe · Mar 10, 2026 · Feb 24, 2026 · Feb 24, 2026 · Mar 2, 2026
diff --git a/packages/scratch-core/pyproject.toml b/packages/scratch-core/pyproject.toml
@@ -10,7 +10,9 @@ authors = [
 readme = "README.md"
 
 dependencies = [
+  "lir>=1.3.3",
   "loguru>=0.7.3",
+  "matplotlib>=3.10.7",
   "numpy>=2.3.4",
   "pillow>=12.0.0",
   "pydantic>=2.12.4",

diff --git a/packages/scratch-core/src/conversion/likelihood_ratio.py b/packages/scratch-core/src/conversion/likelihood_ratio.py
@@ -0,0 +1,127 @@
+import pickle
+from pathlib import Path
+from typing import Self
+
+import numpy as np
+from lir.data.models import FeatureData, LLRData
+from lir.lrsystems import LRSystem
+from pydantic import model_validator
+
+from container_models.base import ConfigBaseModel
+
+
+class ModelSpecs(ConfigBaseModel):
+    """Training data and model types for KM and KNM populations used to calibrate an LR system.
+
+    Holds scores and LLR data for two populations: known matches (KM) and
+    known non-matches (KNM), along with the model name used to produce each.
+
+    :param km_model: Identifier of the model used for KM scores.
+    :param km_scores: Similarity scores for the KM population.
+    :param km_llrs: Log-likelihood ratios for the KM population.
+    :param km_llr_intervals: LLR confidence intervals for the KM population, shape (n, 2), or None.
+    :param knm_model: Identifier of the model used for KNM scores.
+    :param knm_scores: Similarity scores for the KNM population.
+    :param knm_llrs: Log-likelihood ratios for the KNM population.
+    :param knm_llr_intervals: LLR confidence intervals for the KNM population, shape (n, 2), or None.
+    """
+
+    km_model: str
+    km_scores: np.ndarray
+    km_llrs: np.ndarray
+    km_llr_intervals: np.ndarray | None
+    knm_model: str
+    knm_scores: np.ndarray
+    knm_llrs: np.ndarray
+    knm_llr_intervals: np.ndarray | None
+
+    @model_validator(mode="after")
+    def _validate_matching_lengths(self) -> Self:
+        if len(self.km_scores) != len(self.km_llrs):
+            raise ValueError("km_scores and km_lrs must have the same length")
+        if len(self.knm_scores) != len(self.knm_llrs):
+            raise ValueError("knm_scores and knm_lrs must have the same length")
+        return self
+
+    @property
+    def scores(self) -> np.ndarray:
+        """Concatenated KM and KNM similarity scores."""
+        return np.concatenate([self.km_scores, self.knm_scores])
+
+    @property
+    def llrs(self) -> np.ndarray:
+        """Concatenated KM and KNM log-likelihood ratios."""
+        return np.concatenate([self.km_llrs, self.knm_llrs])
+
+    @property
+    def llr_intervals(self) -> np.ndarray:
+        """Concatenated KM and KNM LLR intervals, shape (n, 2)."""
+        if self.km_llr_intervals is None or self.knm_llr_intervals is None:
+            raise ValueError("Only models with llr_intervals can be used")
+        return np.concatenate([self.km_llr_intervals, self.knm_llr_intervals], axis=0)
+
+    @property
+    def labels(self) -> np.ndarray:
+        """Boolean labels: True for KM samples, False for KNM samples."""
+        return np.concatenate(
+            [
+                np.ones(len(self.km_scores), dtype=bool),
+                np.zeros(len(self.knm_scores), dtype=bool),
+            ]
+        )
+
+
+def get_lr_system(
+    lr_system_path: Path,
+) -> LRSystem:  # TODO replace with lr_module_scratch
+    """Load an LR system from a pickle file."""
+    with lr_system_path.open("rb") as f:
+        return pickle.load(f)  # noqa: S301
+
+
+def get_reference_data(
+    lr_system_path: Path,
+) -> ModelSpecs:  # TODO replace with lr_module_scratch
+    """Return hardcoded dummy reference data (KM/KNM scores and LLRs).
+
+    .. note::
+        This is a placeholder. The ``lr_system_path`` argument is accepted for
+        API compatibility but is not used; real reference data will be derived
+        from the LR system once ``lr_module_scratch`` is integrated.
+    """
+    _ = get_lr_system(lr_system_path)
+    return ModelSpecs(
+        km_model="random",
+        km_scores=np.array([0.9, 0.85, 0.78]),
+        km_llrs=np.array([2.1, 1.8, 1.5]),
+        km_llr_intervals=np.array([[1.9, 2.3], [1.6, 2.0], [1.3, 1.7]]),
+        knm_model="random",
+        knm_scores=np.array([0.3, 0.25, 0.15, 0.1]),
+        knm_llrs=np.array([-1.2, -0.9, -1.5, -2.0]),
+        knm_llr_intervals=np.array(
+            [[-1.4, -1.0], [-1.1, -0.7], [-1.7, -1.3], [-2.2, -1.8]]
+        ),
+    )
+
+
+def calculate_lr_striation(lr_system: LRSystem, score: float) -> LLRData:
+    """
+    Calculate likelihood ratio for striation marks.
+
+    :param lr_system: Trained LR system to apply.
+    :param score: Correlation coefficient between two striation profiles.
+    """
+    log10_lr_data = lr_system.apply(FeatureData(features=np.array([[score]])))
+    return log10_lr_data
+
+
+def calculate_lr_impression(lr_system: LRSystem, score: int, n_cells: int) -> LLRData:
+    """
+    Calculate likelihood ratio for impression marks.
+
+    :param lr_system: Trained LR system to apply.
+    :param score: CMC count (number of matching cells).
+    :param n_cells: Total number of cells analyzed.
+    """
+    result = lr_system.apply(FeatureData(features=np.array([[score, n_cells]])))
+    return result
diff --git a/packages/scratch-core/src/conversion/plots/data_formats.py b/packages/scratch-core/src/conversion/plots/data_formats.py
@@ -32,9 +32,9 @@ class HistogramData:
 
     scores: FloatArray1D
     labels: FloatArray1D
-    bins: int | None
-    densities: DensityData | None
-    new_score: float | None
+    bins: int | None = None
+    densities: DensityData | None = None
+    new_score: float | None = None
 
 
 @dataclass

diff --git a/packages/scratch-core/src/conversion/plots/utils.py b/packages/scratch-core/src/conversion/plots/utils.py
@@ -1,8 +1,10 @@
+import datetime
 import textwrap
 from typing import Literal, cast
 
 import numpy as np
 import matplotlib.pyplot as plt
+from lir import LLRData
 from matplotlib.axes import Axes
 from matplotlib.backends.backend_agg import FigureCanvasAgg
 from matplotlib.figure import Figure
@@ -11,6 +13,8 @@
 
 from container_models.base import FloatArray2D, ImageRGB, StriationProfile
 from conversion.data_formats import MarkMetadata
+from conversion.data_formats import MarkType
+from conversion.likelihood_ratio import ModelSpecs
 
 DEFAULT_COLORMAP = "viridis"
 
@@ -386,3 +390,72 @@ def draw_metadata_box(
         table[i, 0].PAD = 0.02
         table[i, 1].set_text_props(ha="left")
         table[i, 1].PAD = 0.02
+
+
+def _format_lr(llr_data: LLRData) -> str:
+    """Format a single log-LR value with optional confidence interval."""
+    if len(llr_data.llrs) > 1:
+        raise ValueError(f"expected single LR value, got {len(llr_data.llrs)}")
+
+    log_lr = llr_data.llrs[0]
+
+    if llr_data.llr_intervals is not None:
+        lower, upper = llr_data.llr_intervals[0, 0], llr_data.llr_intervals[0, 1]
+        return f"{log_lr:.2f} ({lower:.2f}, {upper:.2f})"
+    return f"{log_lr:.2f}"
+
+
+def _common_results_metadata(
+    reference_data: ModelSpecs,
+    llr_data: LLRData,
+    date_report: datetime.date,
+    user_id: str,
+    mark_type: MarkType,
+) -> dict[str, str]:
+    """Results metadata fields shared across all mark types."""
+    return {
+        "Date report": date_report.isoformat(),
+        "User ID": user_id,
+        "Mark type": mark_type.value,
+        "LogLR (5%, 95%)": _format_lr(llr_data),
+        "# of KM scores": str(len(reference_data.km_scores)),
+        "# of KNM scores": str(len(reference_data.knm_scores)),
+    }
+
+
+def build_results_metadata_striation(
+    reference_data: ModelSpecs,
+    llr_data: LLRData,
+    date_report: datetime.date,
+    user_id: str,
+    mark_type: MarkType,
+    score: float,
+    score_transform: float,
+) -> dict[str, str]:
+    return {
+        **_common_results_metadata(
+            reference_data, llr_data, date_report, user_id, mark_type
+        ),
+        "Score type": "CCF",
+        "Score (transform)": f"{score:.2f} ({score_transform:.2f})",
+    }
+
+
+def build_results_metadata_impression(
+    reference_data: ModelSpecs,
+    llr_data: LLRData,
+    date_report: datetime.date,
+    user_id: str,
+    mark_type: MarkType,
+    score: int,
+    n_cells: int,
+) -> dict[str, str]:
+    return {
+        **_common_results_metadata(
+            reference_data, llr_data, date_report, user_id, mark_type
+        ),
+        "KM model": reference_data.km_model,  # TODO this should be replaced by the lr system path (new ticket)
+        "KNM model": reference_data.knm_model,  # TODO this should be replaced by the lr system path (new ticket)
+        "Score type": "CMC",
+        "Score (transform)": f"{score} of {n_cells}",
+    }
diff --git a/packages/scratch-core/src/conversion/utils.py b/packages/scratch-core/src/conversion/utils.py
@@ -1,3 +1,4 @@
+import numpy as np
 from returns.io import IOResultE, IOSuccess
 from returns.result import ResultE, Success
 
@@ -13,6 +14,25 @@ def unwrap_result[T](result: IOResultE[T] | ResultE[T]) -> T:
             assert False, "failed to unwrap"
 
 
+def ccf_score_to_logodds(scores: np.ndarray) -> np.ndarray:
+    """
+    Transform CCF scores from [-1, +1] to [-inf, +inf] using a log10 logit.
+
+    Rescales to [0, 1] then applies log-odds (base 10):
+        y = (score + 1) / 2
+        transformed = log10(y / (1 - y))
+
+    Boundary values are clipped by one ULP to avoid infinite results.
+
+    :param scores: 1-D array of raw CCF scores in [-1, +1].
+    :returns: 1-D array of transformed scores.
+    """
+    eps = np.finfo(float).eps
+    clipped = np.clip(scores, -1 + eps, 1 - eps)
+    y = (clipped + 1) / 2
+    return np.log10(y) - np.log10(1 - y)
+
+
 def update_scan_image_data(scan_image: ScanImage, data: DepthData) -> ScanImage:
     """
     Return a new ScanImage with updated scan data.

diff --git a/packages/scratch-core/tests/resources/lr_systems.py b/packages/scratch-core/tests/resources/lr_systems.py
@@ -0,0 +1,17 @@
+import numpy as np
+from lir.data.models import FeatureData, LLRData, InstanceData
+from lir.lrsystems.lrsystems import LRSystem
+
+
+class RandomLRSystem(LRSystem):
+    """LRSystem that returns seeded random LLR values, for use in tests."""
+
+    def __init__(self) -> None:
+        pass
+
+    def apply(self, instances: InstanceData) -> LLRData:
+        """Return seeded random LLR values, one per input instance."""
+        assert isinstance(instances, FeatureData)
+        n = len(instances.features)
+        rng = np.random.default_rng(seed=42)
+        return LLRData(features=rng.random(n))
diff --git a/src/extractors/schemas.py b/src/extractors/schemas.py
@@ -217,6 +217,30 @@ class ComparisonResponseImpression(ComparisonResponse):
 
 
 class ComparisonResponseStriationURL(ComparisonResponse):
+    mark_reference_aligned_data: HttpUrl = Field(
+        ...,
+        description="Aligned reference mark.",
+        examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_reference_aligned.npz"],
+        json_schema_extra={"file_name": "mark_reference_aligned.npz"},
+    )
+    mark_reference_aligned_meta: HttpUrl = Field(
+        ...,
+        description="meta data from the aligned reference mark data.",
+        examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_reference_aligned.json"],
+        json_schema_extra={"file_name": "mark_reference_aligned.json"},
+    )
+    mark_compared_aligned_data: HttpUrl = Field(
+        ...,
+        description="Aligned compared mark.",
+        examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_compared_aligned.npz"],
+        json_schema_extra={"file_name": "mark_compared_aligned.npz"},
+    )
+    mark_compared_aligned_meta: HttpUrl = Field(
+        ...,
+        description="meta data from the aligned compared mark data.",
+        examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_compared_aligned.json"],
+        json_schema_extra={"file_name": "mark_compared_aligned.json"},
+    )
     mark_ref_preview: HttpUrl = Field(
         description="",
         examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_ref_preview.png"],