Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/scratch-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ authors = [
readme = "README.md"

dependencies = [
"lir>=1.3.3",
"loguru>=0.7.3",
"matplotlib>=3.10.7",
"numpy>=2.3.4",
"pillow>=12.0.0",
"pydantic>=2.12.4",
Expand Down
127 changes: 127 additions & 0 deletions packages/scratch-core/src/conversion/likelihood_ratio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import pickle
from pathlib import Path
from typing import Self

import numpy as np
from lir.data.models import FeatureData, LLRData
from lir.lrsystems import LRSystem
from pydantic import model_validator

from container_models.base import ConfigBaseModel


class ModelSpecs(ConfigBaseModel):
"""Training data and model types for KM and KNM populations used to calibrate an LR system.

Holds scores and LLR data for two populations: known matches (KM) and
known non-matches (KNM), along with the model name used to produce each.

:param km_model: Identifier of the model used for KM scores.
:param km_scores: Similarity scores for the KM population.
:param km_llrs: Log-likelihood ratios for the KM population.
:param km_llr_intervals: LLR confidence intervals for the KM population, shape (n, 2), or None.
:param knm_model: Identifier of the model used for KNM scores.
:param knm_scores: Similarity scores for the KNM population.
:param knm_llrs: Log-likelihood ratios for the KNM population.
:param knm_llr_intervals: LLR confidence intervals for the KNM population, shape (n, 2), or None.
"""

km_model: str
km_scores: np.ndarray
km_llrs: np.ndarray
km_llr_intervals: np.ndarray | None
knm_model: str
knm_scores: np.ndarray
knm_llrs: np.ndarray
knm_llr_intervals: np.ndarray | None

@model_validator(mode="after")
def _validate_matching_lengths(self) -> Self:
if len(self.km_scores) != len(self.km_llrs):
raise ValueError("km_scores and km_lrs must have the same length")
if len(self.knm_scores) != len(self.knm_llrs):
raise ValueError("knm_scores and knm_lrs must have the same length")
return self

@property
def scores(self) -> np.ndarray:
"""Concatenated KM and KNM similarity scores."""
return np.concatenate([self.km_scores, self.knm_scores])

@property
def llrs(self) -> np.ndarray:
"""Concatenated KM and KNM log-likelihood ratios."""
return np.concatenate([self.km_llrs, self.knm_llrs])

@property
def llr_intervals(self) -> np.ndarray:
"""Concatenated KM and KNM LLR intervals, shape (n, 2)."""
if self.km_llr_intervals is None or self.knm_llr_intervals is None:
raise ValueError("Only models with llr_intervals can be used")
return np.concatenate([self.km_llr_intervals, self.knm_llr_intervals], axis=0)

@property
def labels(self) -> np.ndarray:
"""Boolean labels: True for KM samples, False for KNM samples."""
return np.concatenate(
[
np.ones(len(self.km_scores), dtype=bool),
np.zeros(len(self.knm_scores), dtype=bool),
]
)


def get_lr_system(
lr_system_path: Path,
) -> LRSystem: # TODO replace with lr_module_scratch
"""Load an LR system from a pickle file."""
with lr_system_path.open("rb") as f:
return pickle.load(f) # noqa: S301


def get_reference_data(
lr_system_path: Path,
) -> ModelSpecs: # TODO replace with lr_module_scratch
"""Return hardcoded dummy reference data (KM/KNM scores and LLRs).

.. note::
This is a placeholder. The ``lr_system_path`` argument is accepted for
API compatibility but is not used; real reference data will be derived
from the LR system once ``lr_module_scratch`` is integrated.
"""
_ = get_lr_system(lr_system_path)
return ModelSpecs(
km_model="random",
km_scores=np.array([0.9, 0.85, 0.78]),
km_llrs=np.array([2.1, 1.8, 1.5]),
km_llr_intervals=np.array([[1.9, 2.3], [1.6, 2.0], [1.3, 1.7]]),
knm_model="random",
knm_scores=np.array([0.3, 0.25, 0.15, 0.1]),
knm_llrs=np.array([-1.2, -0.9, -1.5, -2.0]),
knm_llr_intervals=np.array(
[[-1.4, -1.0], [-1.1, -0.7], [-1.7, -1.3], [-2.2, -1.8]]
),
)


def calculate_lr_striation(lr_system: LRSystem, score: float) -> LLRData:
"""
Calculate likelihood ratio for striation marks.

:param lr_system: Trained LR system to apply.
:param score: Correlation coefficient between two striation profiles.
"""
Comment thread
laurensWe marked this conversation as resolved.
log10_lr_data = lr_system.apply(FeatureData(features=np.array([[score]])))
return log10_lr_data


def calculate_lr_impression(lr_system: LRSystem, score: int, n_cells: int) -> LLRData:
"""
Calculate likelihood ratio for impression marks.

:param lr_system: Trained LR system to apply.
:param score: CMC count (number of matching cells).
:param n_cells: Total number of cells analyzed.
"""
result = lr_system.apply(FeatureData(features=np.array([[score, n_cells]])))
return result
6 changes: 3 additions & 3 deletions packages/scratch-core/src/conversion/plots/data_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ class HistogramData:

scores: FloatArray1D
labels: FloatArray1D
bins: int | None
densities: DensityData | None
new_score: float | None
bins: int | None = None
densities: DensityData | None = None
new_score: float | None = None


@dataclass
Expand Down
73 changes: 73 additions & 0 deletions packages/scratch-core/src/conversion/plots/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import datetime
import textwrap
from typing import Literal, cast

import numpy as np
import matplotlib.pyplot as plt
from lir import LLRData
from matplotlib.axes import Axes
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.figure import Figure
Expand All @@ -11,6 +13,8 @@

from container_models.base import FloatArray2D, ImageRGB, StriationProfile
from conversion.data_formats import MarkMetadata
from conversion.data_formats import MarkType
from conversion.likelihood_ratio import ModelSpecs

DEFAULT_COLORMAP = "viridis"

Expand Down Expand Up @@ -386,3 +390,72 @@ def draw_metadata_box(
table[i, 0].PAD = 0.02
table[i, 1].set_text_props(ha="left")
table[i, 1].PAD = 0.02


def _format_lr(llr_data: LLRData) -> str:
"""Format a single log-LR value with optional confidence interval."""
if len(llr_data.llrs) > 1:
raise ValueError(f"expected single LR value, got {len(llr_data.llrs)}")

Comment thread
laurensWe marked this conversation as resolved.
log_lr = llr_data.llrs[0]

if llr_data.llr_intervals is not None:
lower, upper = llr_data.llr_intervals[0, 0], llr_data.llr_intervals[0, 1]
return f"{log_lr:.2f} ({lower:.2f}, {upper:.2f})"
return f"{log_lr:.2f}"


def _common_results_metadata(
reference_data: ModelSpecs,
llr_data: LLRData,
date_report: datetime.date,
user_id: str,
mark_type: MarkType,
) -> dict[str, str]:
"""Results metadata fields shared across all mark types."""
return {
"Date report": date_report.isoformat(),
"User ID": user_id,
"Mark type": mark_type.value,
"LogLR (5%, 95%)": _format_lr(llr_data),
"# of KM scores": str(len(reference_data.km_scores)),
"# of KNM scores": str(len(reference_data.knm_scores)),
}


def build_results_metadata_striation(
reference_data: ModelSpecs,
llr_data: LLRData,
date_report: datetime.date,
user_id: str,
mark_type: MarkType,
score: float,
score_transform: float,
Comment thread
laurensWe marked this conversation as resolved.
) -> dict[str, str]:
return {
**_common_results_metadata(
reference_data, llr_data, date_report, user_id, mark_type
),
"Score type": "CCF",
"Score (transform)": f"{score:.2f} ({score_transform:.2f})",
}


def build_results_metadata_impression(
reference_data: ModelSpecs,
llr_data: LLRData,
date_report: datetime.date,
user_id: str,
mark_type: MarkType,
score: int,
n_cells: int,
) -> dict[str, str]:
return {
**_common_results_metadata(
reference_data, llr_data, date_report, user_id, mark_type
),
"KM model": reference_data.km_model, # TODO this should be replaced by the lr system path (new ticket)
"KNM model": reference_data.knm_model, # TODO this should be replaced by the lr system path (new ticket)
"Score type": "CMC",
"Score (transform)": f"{score} of {n_cells}",
}
20 changes: 20 additions & 0 deletions packages/scratch-core/src/conversion/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
from returns.io import IOResultE, IOSuccess
from returns.result import ResultE, Success

Expand All @@ -13,6 +14,25 @@ def unwrap_result[T](result: IOResultE[T] | ResultE[T]) -> T:
assert False, "failed to unwrap"


def ccf_score_to_logodds(scores: np.ndarray) -> np.ndarray:
Comment thread
laurensWe marked this conversation as resolved.
"""
Transform CCF scores from [-1, +1] to [-inf, +inf] using a log10 logit.

Rescales to [0, 1] then applies log-odds (base 10):
y = (score + 1) / 2
transformed = log10(y / (1 - y))

Boundary values are clipped by one ULP to avoid infinite results.

:param scores: 1-D array of raw CCF scores in [-1, +1].
:returns: 1-D array of transformed scores.
"""
eps = np.finfo(float).eps
clipped = np.clip(scores, -1 + eps, 1 - eps)
y = (clipped + 1) / 2
return np.log10(y) - np.log10(1 - y)


def update_scan_image_data(scan_image: ScanImage, data: DepthData) -> ScanImage:
"""
Return a new ScanImage with updated scan data.
Expand Down
17 changes: 17 additions & 0 deletions packages/scratch-core/tests/resources/lr_systems.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy as np
from lir.data.models import FeatureData, LLRData, InstanceData
from lir.lrsystems.lrsystems import LRSystem


class RandomLRSystem(LRSystem):
"""LRSystem that returns seeded random LLR values, for use in tests."""

def __init__(self) -> None:
pass

def apply(self, instances: InstanceData) -> LLRData:
"""Return seeded random LLR values, one per input instance."""
assert isinstance(instances, FeatureData)
n = len(instances.features)
rng = np.random.default_rng(seed=42)
return LLRData(features=rng.random(n))
24 changes: 24 additions & 0 deletions src/extractors/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,30 @@ class ComparisonResponseImpression(ComparisonResponse):


class ComparisonResponseStriationURL(ComparisonResponse):
mark_reference_aligned_data: HttpUrl = Field(
...,
description="Aligned reference mark.",
examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_reference_aligned.npz"],
json_schema_extra={"file_name": "mark_reference_aligned.npz"},
)
mark_reference_aligned_meta: HttpUrl = Field(
...,
description="meta data from the aligned reference mark data.",
examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_reference_aligned.json"],
json_schema_extra={"file_name": "mark_reference_aligned.json"},
)
mark_compared_aligned_data: HttpUrl = Field(
...,
description="Aligned compared mark.",
examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_compared_aligned.npz"],
json_schema_extra={"file_name": "mark_compared_aligned.npz"},
)
mark_compared_aligned_meta: HttpUrl = Field(
...,
description="meta data from the aligned compared mark data.",
examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_compared_aligned.json"],
json_schema_extra={"file_name": "mark_compared_aligned.json"},
)
mark_ref_preview: HttpUrl = Field(
description="",
examples=["http://localhost:8000/preprocessor/files/surface_comparator_859lquto/mark_ref_preview.png"],
Expand Down
Loading