From 48111173ba7a1746495eb805ff33bde70d1692b7 Mon Sep 17 00:00:00 2001 From: alchemystack <70098256+alchemystack@users.noreply.github.com> Date: Fri, 27 Mar 2026 15:30:39 +0100 Subject: [PATCH 1/2] feat: add OpenEntropy source and ECDF amplifier from PR #4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrate OpenEntropy hardware entropy source and ECDF signal amplifier from ereid7's PR #4, with fixes for CI failures: - Move openentropy to optional dependency (was breaking Python 3.10/3.11/3.13) - Fix ruff formatting on openentropy.py and test_openentropy.py - Remove bandit-flagged assert in ecdf.py (invariant enforced by _calibrated) - Add ECDF calibration wiring in processor.py - Optimize _to_numpy with is_cpu check to avoid unnecessary .cpu() calls - Add oe_* config fields, entry point, mypy override, and tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- README.md | 33 +++ deployments/openentropy/.env.example | 49 +++++ deployments/openentropy/README.md | 128 +++++++++++ pyproject.toml | 8 + src/qr_sampler/amplification/__init__.py | 2 + src/qr_sampler/amplification/ecdf.py | 147 +++++++++++++ src/qr_sampler/config.py | 25 +++ src/qr_sampler/entropy/openentropy.py | 163 ++++++++++++++ src/qr_sampler/processor.py | 13 +- tests/test_amplification/test_ecdf.py | 215 +++++++++++++++++++ tests/test_config.py | 48 +++++ tests/test_entropy/test_openentropy.py | 259 +++++++++++++++++++++++ 12 files changed, 1088 insertions(+), 2 deletions(-) create mode 100644 deployments/openentropy/.env.example create mode 100644 deployments/openentropy/README.md create mode 100644 src/qr_sampler/amplification/ecdf.py create mode 100644 src/qr_sampler/entropy/openentropy.py create mode 100644 tests/test_amplification/test_ecdf.py create mode 100644 tests/test_entropy/test_openentropy.py diff --git a/README.md b/README.md index 857e12b..01cc57e 100644 --- a/README.md +++ b/README.md @@ -353,6 +353,7 @@ All fallback-sourced entropy is flagged in diagnostic logs so downstream analysi | **System** | `system` | `os.urandom()` — OS cryptographic RNG (fallback/testing) | | **Timing noise** | `timing_noise` | CPU timing jitter (experimental) | | **Mock uniform** | `mock_uniform` | Configurable test source with seed/bias | +| **OpenEntropy** | `openentropy` | 63 hardware noise sources (thermal, timing, microarch, GPU) — local, no network | ### Fallback behavior @@ -367,6 +368,38 @@ Configure with `QR_FALLBACK_MODE`: - `mock_uniform` — fall back to the mock source - `error` — raise immediately, no fallback +### OpenEntropy + +[OpenEntropy](https://github.com/amenti-labs/openentropy) harvests entropy from 63 hardware noise sources on the local machine — thermal sensors, CPU timing jitter, memory timing, GPU scheduling, and more. No network, no API keys, no gRPC server needed. + +Install: + +```bash +pip install openentropy +``` + +Configure: + +```bash +export QR_ENTROPY_SOURCE_TYPE=openentropy +export QR_OE_CONDITIONING=raw # raw (research default) | vonneumann | sha256 +``` + +List available sources on your machine: + +```python +from openentropy import detect_available_sources +print([s["name"] for s in detect_available_sources()]) +``` + +To sample from specific sources only, set `QR_OE_SOURCES` to a comma-separated list: + +```bash +export QR_OE_SOURCES=clock_jitter,dram_row_buffer +``` + +See [`deployments/openentropy/`](deployments/openentropy/) for the full deployment profile. + ### Third-party entropy sources Any Python package can register entropy sources via entry points: diff --git a/deployments/openentropy/.env.example b/deployments/openentropy/.env.example new file mode 100644 index 0000000..bebc7a5 --- /dev/null +++ b/deployments/openentropy/.env.example @@ -0,0 +1,49 @@ +# deployments/openentropy/.env +# +# Environment variables for the OpenEntropy native entropy profile. +# Copy this file to .env and edit as needed: +# cp .env.example .env + +# --- Hugging Face --- +# Model to serve. Default: Qwen/Qwen2.5-1.5B-Instruct +HF_MODEL=Qwen/Qwen2.5-1.5B-Instruct +# Set this if the model is gated (requires accepting a license). +HF_TOKEN= + +# --- Entropy source --- +# OpenEntropy: local hardware entropy (no Docker, native only) +# 63 hardware noise sources: thermal, timing, microarch, GPU, etc. +# Install: pip install openentropy +# Docs: https://github.com/amenti-labs/openentropy +QR_ENTROPY_SOURCE_TYPE=openentropy + +# Conditioning mode: raw (research default), vonneumann (debiased), sha256 (crypto) +QR_OE_CONDITIONING=raw + +# Comma-separated list of source names to use. Empty = all available sources. +QR_OE_SOURCES= + +# Enable parallel collection from multiple sources (faster, more entropy). +QR_OE_PARALLEL=true + +# Timeout in seconds for entropy collection. +QR_OE_TIMEOUT=5.0 + +# Fall back to system entropy if OpenEntropy is unavailable. +QR_FALLBACK_MODE=system + +# --- Sampling parameters --- +QR_SAMPLE_COUNT=20480 +QR_TEMPERATURE_STRATEGY=fixed +QR_FIXED_TEMPERATURE=0.7 +QR_TOP_K=0 +QR_TOP_P=1.0 +QR_LOG_LEVEL=summary + +# --- Ports (host-side) --- +VLLM_PORT=8000 + +# --- Open WebUI (optional, --profile ui) --- +OPEN_WEBUI_PORT=3000 +# Set to true to require login (recommended for shared/public servers). +OPEN_WEBUI_AUTH=false diff --git a/deployments/openentropy/README.md b/deployments/openentropy/README.md new file mode 100644 index 0000000..c8facb5 --- /dev/null +++ b/deployments/openentropy/README.md @@ -0,0 +1,128 @@ +# OpenEntropy Profile + +Runs vLLM with qr-sampler using **OpenEntropy** — a local hardware entropy +source that collects noise from 63 hardware sources on Apple Silicon (thermal, +timing, microarchitecture, GPU, etc.). This is a **native-only profile** — no +Docker, no network dependency. + +## Why not Docker? + +Docker containers cannot access Metal GPU or native hardware entropy sources on +macOS. Apple's Virtualization.framework has no GPU passthrough, and hardware +noise sources (thermal sensors, CPU timing, GPU state) are not exposed to +containerized processes. OpenEntropy requires native execution. + +## Quick start + +1. Install OpenEntropy and qr-sampler: + + ```bash + pip install openentropy + pip install -e /path/to/qr-sampler + ``` + +2. Configure your environment: + + ```bash + cd deployments/openentropy + cp .env.example .env + ``` + + Edit `.env` if needed — set `HF_TOKEN` if using a gated model. + +3. Start vLLM: + + ```bash + source .env + vllm serve $HF_MODEL \ + --port $VLLM_PORT \ + --logits-processors qr_sampler + ``` + +## Available entropy sources + +OpenEntropy provides 63 entropy sources across 13 categories. For the full +catalog with physics explanations, see the +[OpenEntropy Source Catalog](https://github.com/amenti-labs/openentropy/blob/master/docs/SOURCES.md). + +List all available sources on your hardware: + +```bash +python -c "from openentropy import detect_available_sources; print([s['name'] for s in detect_available_sources()])" +``` + +Sources span thermal, timing, microarchitecture, GPU, IPC, scheduling, and more. +Some notable ones for research: + +| Source | Category | Physical mechanism | +|--------|----------|-------------------| +| `counter_beat` | Thermal | CPU counter vs audio PLL crystal beat frequency | +| `dual_clock_domain` | Microarch | 24 MHz x 41 MHz independent oscillator beat | +| `gpu_divergence` | GPU | Shader warp execution order divergence | +| `dvfs_race` | Microarch | Cross-core DVFS frequency race | +| `clock_jitter` | Timing | Timing jitter between readout paths | +| `dram_row_buffer` | Timing | DRAM row buffer hit/miss timing | + +To sample from a specific source, set `QR_OE_SOURCES`: + +```bash +export QR_OE_SOURCES=counter_beat +``` + +## Conditioning modes + +OpenEntropy supports three conditioning strategies: + +| Mode | Use case | Properties | +|------|----------|-----------| +| `raw` | Research (default) | Preserves hardware noise signal; minimal processing | +| `vonneumann` | Debiased entropy | Von Neumann debiasing; slower, more uniform | +| `sha256` | Cryptographic | SHA-256 hashing; suitable for security-critical applications | + +Set `QR_OE_CONDITIONING` in `.env` or override per-request: + +```python +# Per-request override +extra_args = {"qr_oe_conditioning": "sha256"} +``` + +## Parallel collection + +By default, `QR_OE_PARALLEL=true` collects from multiple sources simultaneously, +increasing entropy throughput. Set to `false` for sequential collection (slower, +lower memory overhead). + +## When to use this profile + +- **Consciousness research**: Study whether intent influences quantum-random + processes using native hardware entropy. +- **Local experiments**: No network latency, no external dependencies. +- **Apple Silicon development**: Leverage Metal GPU and native hardware sensors. +- **Research baseline**: Compare hardware entropy against system entropy + (`/dev/urandom`). + +## Web UI (optional) + +This profile includes [Open WebUI](https://github.com/open-webui/open-webui), a +ChatGPT-style web interface. To use it, you'll need to run it separately (not +included in this native profile): + +```bash +docker run -d -p 3000:3000 --name open-webui ghcr.io/open-webui/open-webui:latest +``` + +Then point it at your vLLM instance running on `localhost:8000`. + +A pre-built filter function for controlling qr-sampler parameters from the UI is +available at [`examples/open-webui/`](../../examples/open-webui/). See that +directory's README for import instructions. + +## Next steps + +Once this profile works, you can: +1. Adjust `QR_OE_SOURCES` to use specific entropy sources. +2. Experiment with different conditioning modes (`raw`, `vonneumann`, `sha256`). +3. Compare results against the `urandom` profile (gRPC-based) or `system` profile + (fallback). +4. Browse the full [OpenEntropy Source Catalog](https://github.com/amenti-labs/openentropy/blob/master/docs/SOURCES.md) + for detailed physics explanations of each entropy source. diff --git a/pyproject.toml b/pyproject.toml index fd3dcb1..48f048d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,9 @@ dev = [ "pre-commit>=4.0", "bandit>=1.8.0", ] +openentropy = [ + "openentropy>=0.10.0", +] [project.entry-points."vllm.logits_processors"] qr_sampler = "qr_sampler.processor:QRSamplerLogitsProcessor" @@ -55,6 +58,7 @@ system = "qr_sampler.entropy.system:SystemEntropySource" quantum_grpc = "qr_sampler.entropy.quantum:QuantumGrpcSource" timing_noise = "qr_sampler.entropy.timing:TimingNoiseSource" mock_uniform = "qr_sampler.entropy.mock:MockUniformSource" +openentropy = "qr_sampler.entropy.openentropy:OpenEntropySource" [tool.setuptools.packages.find] where = ["src"] @@ -98,6 +102,10 @@ ignore_missing_imports = true module = ["torch", "torch.*"] ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "openentropy.*" +ignore_missing_imports = true + [tool.coverage.run] source = ["src/qr_sampler"] omit = [ diff --git a/src/qr_sampler/amplification/__init__.py b/src/qr_sampler/amplification/__init__.py index d49a773..5d623b6 100644 --- a/src/qr_sampler/amplification/__init__.py +++ b/src/qr_sampler/amplification/__init__.py @@ -6,12 +6,14 @@ """ from qr_sampler.amplification.base import AmplificationResult, SignalAmplifier +from qr_sampler.amplification.ecdf import ECDFAmplifier from qr_sampler.amplification.registry import AmplifierRegistry from qr_sampler.amplification.zscore import ZScoreMeanAmplifier __all__ = [ "AmplificationResult", "AmplifierRegistry", + "ECDFAmplifier", "SignalAmplifier", "ZScoreMeanAmplifier", ] diff --git a/src/qr_sampler/amplification/ecdf.py b/src/qr_sampler/amplification/ecdf.py new file mode 100644 index 0000000..fd2b705 --- /dev/null +++ b/src/qr_sampler/amplification/ecdf.py @@ -0,0 +1,147 @@ +"""ECDF-based signal amplifier. + +Converts raw entropy bytes into a uniform float via an empirical cumulative +distribution function (ECDF) built from calibration samples. Unlike the +z-score amplifier, this approach makes no distributional assumptions about +the entropy source — it learns the distribution empirically. + +The calibration phase collects N samples from the entropy source, computes +the byte-mean of each, and sorts them. At runtime, the sample mean is +mapped to a uniform float via binary search (Hazen plotting position). +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +import numpy as np + +from qr_sampler.amplification.base import AmplificationResult, SignalAmplifier +from qr_sampler.amplification.registry import AmplifierRegistry +from qr_sampler.exceptions import SignalAmplificationError + +if TYPE_CHECKING: + from qr_sampler.config import QRSamplerConfig + from qr_sampler.entropy.base import EntropySource + +logger = logging.getLogger("qr_sampler") + + +@AmplifierRegistry.register("ecdf") +class ECDFAmplifier(SignalAmplifier): + """ECDF-based signal amplification. + + Algorithm: + Calibration (one-time): + 1. Collect N samples of ``sample_count`` bytes each from the + entropy source. + 2. Compute the byte-mean of each sample. + 3. Sort the means to form the empirical CDF. + + Runtime (per token): + 1. Interpret raw_bytes as uint8 array, compute sample mean M. + 2. Binary search M in the sorted calibration means. + 3. Map to uniform via Hazen plotting position: + u = (rank + 1) / (N + 1). + 4. Clamp to (eps, 1-eps). + + The Hazen formula guarantees u in (0, 1) for any input, avoiding + degenerate CDF extremes. No distributional assumptions are required. + """ + + def __init__(self, config: QRSamplerConfig) -> None: + """Initialize with calibration parameters from config. + + Args: + config: Configuration providing ecdf_calibration_samples, + sample_count, and uniform_clamp_epsilon. + """ + self._ecdf_calibration_samples = config.ecdf_calibration_samples + self._sample_count = config.sample_count + self._clamp_epsilon = config.uniform_clamp_epsilon + self._sorted_means: np.ndarray[Any, np.dtype[np.floating[Any]]] | None = None + self._calibrated: bool = False + + def calibrate( + self, + entropy_source: EntropySource, + config: QRSamplerConfig, + ) -> None: + """Build the empirical CDF from calibration samples. + + Collects ``ecdf_calibration_samples`` samples from the entropy source, + computes the byte-mean of each, and sorts them. Calibration is + idempotent — calling again replaces the sorted array. + + Args: + entropy_source: Source to draw calibration bytes from. + config: Configuration providing sample_count. + + Raises: + SignalAmplificationError: If all calibration samples are identical + (zero variance). + """ + n = self._ecdf_calibration_samples + means: list[float] = [] + for _ in range(n): + raw = entropy_source.get_random_bytes(config.sample_count) + sample_mean = float(np.frombuffer(raw, dtype=np.uint8).mean()) + means.append(sample_mean) + + self._sorted_means = np.sort(np.array(means)) + + if np.std(self._sorted_means) == 0.0: + raise SignalAmplificationError( + "ECDF calibration produced zero variance — all samples identical" + ) + + self._calibrated = True + logger.info( + "ECDF calibration complete: %d samples, mean range [%.2f, %.2f]", + n, + float(self._sorted_means[0]), + float(self._sorted_means[-1]), + ) + + def amplify(self, raw_bytes: bytes) -> AmplificationResult: + """Convert raw entropy bytes into a uniform float via ECDF lookup. + + Args: + raw_bytes: Raw entropy bytes from an entropy source. + + Returns: + AmplificationResult with u in (eps, 1-eps) and diagnostics. + + Raises: + SignalAmplificationError: If not calibrated or raw_bytes is empty. + """ + if not self._calibrated: + raise SignalAmplificationError( + "ECDF amplifier has not been calibrated. Call calibrate() first." + ) + if not raw_bytes: + raise SignalAmplificationError("Cannot amplify empty byte sequence") + + sample_mean = float(np.frombuffer(raw_bytes, dtype=np.uint8).mean()) + + # Binary search in the sorted calibration means. + # _sorted_means is guaranteed non-None here because _calibrated is True. + rank = int(np.searchsorted(self._sorted_means, sample_mean, side="right")) # type: ignore[arg-type] + n = len(self._sorted_means) # type: ignore[arg-type] + + # Hazen plotting position: u = (rank + 1) / (N + 1). + u = (rank + 1) / (n + 1) + + # Clamp to avoid degenerate CDF extremes. + eps = self._clamp_epsilon + u = max(eps, min(1.0 - eps, u)) + + return AmplificationResult( + u=u, + diagnostics={ + "sample_mean": sample_mean, + "ecdf_rank": rank, + "calibration_size": n, + }, + ) diff --git a/src/qr_sampler/config.py b/src/qr_sampler/config.py index 29e277b..3a8063d 100644 --- a/src/qr_sampler/config.py +++ b/src/qr_sampler/config.py @@ -37,6 +37,7 @@ "top_p", "log_level", "diagnostic_mode", + "oe_conditioning", } ) @@ -151,6 +152,11 @@ class QRSamplerConfig(BaseSettings): default=1e-10, description="Clamp u to (epsilon, 1-epsilon) to avoid degenerate CDF", ) + ecdf_calibration_samples: int = Field( + default=2000, + ge=100, + description="Samples for ECDF calibration", + ) # --- Temperature Strategy (per-request overridable) --- @@ -201,6 +207,25 @@ class QRSamplerConfig(BaseSettings): description="Store all token records in memory for analysis", ) + # --- OpenEntropy (oe_conditioning per-request, others infrastructure) --- + + oe_conditioning: str = Field( + default="raw", + description="OpenEntropy conditioning mode: raw, sha256, vonneumann", + ) + oe_sources: str = Field( + default="", + description="Comma-separated OpenEntropy source names. Empty = all available.", + ) + oe_parallel: bool = Field( + default=True, + description="Collect OpenEntropy sources in parallel", + ) + oe_timeout: float = Field( + default=5.0, + description="OpenEntropy collection timeout in seconds", + ) + # Populate _ALL_FIELDS now that the class is defined. _ALL_FIELDS = frozenset(QRSamplerConfig.model_fields.keys()) diff --git a/src/qr_sampler/entropy/openentropy.py b/src/qr_sampler/entropy/openentropy.py new file mode 100644 index 0000000..9f1c97c --- /dev/null +++ b/src/qr_sampler/entropy/openentropy.py @@ -0,0 +1,163 @@ +"""OpenEntropy entropy source using the ``openentropy`` Python library. + +Wraps the ``openentropy.EntropyPool`` API to provide hardware-sourced +entropy from any platform-available source (e.g., camera noise, audio +noise, sensor jitter). The ``openentropy`` package is optional — this +module degrades gracefully when it is not installed. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +from qr_sampler.entropy.base import EntropySource +from qr_sampler.entropy.registry import register_entropy_source +from qr_sampler.exceptions import EntropyUnavailableError + +if TYPE_CHECKING: + from qr_sampler.config import QRSamplerConfig + +logger = logging.getLogger("qr_sampler") + +# --------------------------------------------------------------------------- +# Import guard — no crash when openentropy is not installed +# --------------------------------------------------------------------------- + +try: + from openentropy import EntropyPool + + _OPENENTROPY_AVAILABLE = True +except ImportError: + _OPENENTROPY_AVAILABLE = False + + +@register_entropy_source("openentropy") +class OpenEntropySource(EntropySource): + """Hardware entropy via the ``openentropy`` library. + + Uses ``EntropyPool.auto()`` to auto-discover platform-available entropy + sources (camera noise, audio noise, sensor jitter, etc.) and exposes + them through the standard ``EntropySource`` interface. + + The ``openentropy`` package must be installed separately:: + + pip install openentropy + + Configuration fields used from ``QRSamplerConfig``: + + * ``oe_conditioning`` — conditioning mode (``"raw"``, ``"vonneumann"``, + or ``"sha256"``). Per-request overridable. + * ``oe_sources`` — comma-separated list of specific source names to + sample from. Empty string means use all sources via ``collect_all()``. + * ``oe_parallel`` — whether to collect from sources in parallel. + * ``oe_timeout`` — timeout in seconds for ``collect_all()``. + """ + + def __init__(self, config: QRSamplerConfig) -> None: + """Initialize the OpenEntropy source. + + Args: + config: Sampler configuration providing ``oe_*`` fields. + + Raises: + EntropyUnavailableError: If the ``openentropy`` package is not + installed. + """ + if not _OPENENTROPY_AVAILABLE: + raise EntropyUnavailableError( + "openentropy package not installed. Install with: pip install openentropy" + ) + self._config = config + self._pool = EntropyPool.auto() + self._closed = False + + @property + def name(self) -> str: + """Return ``'openentropy'``.""" + return "openentropy" + + @property + def is_available(self) -> bool: + """Whether OpenEntropy has at least one working source.""" + return _OPENENTROPY_AVAILABLE and self._pool.source_count > 0 + + def get_random_bytes(self, n: int) -> bytes: + """Return exactly *n* random bytes from OpenEntropy sources. + + If ``oe_sources`` is configured, samples from each named source + individually via ``get_source_bytes()`` and combines the results. + Otherwise, calls ``collect_all()`` followed by ``get_bytes()``. + + Args: + n: Number of random bytes to generate. + + Returns: + Exactly *n* bytes of entropy. + + Raises: + EntropyUnavailableError: If the source is closed or collection + fails. + """ + if self._closed: + raise EntropyUnavailableError("OpenEntropySource is closed") + + try: + oe_sources = self._config.oe_sources.strip() + if oe_sources: + # Sample from specific named sources. + source_names = [s.strip() for s in oe_sources.split(",") if s.strip()] + combined = b"" + for source_name in source_names: + # Some sources have per-call byte limits, so collect + # in chunks of up to 8192 bytes until we have enough. + remaining = n + while remaining > 0: + request_size = min(remaining, 8192) + chunk = self._pool.get_source_bytes( + source_name, + request_size, + conditioning=self._config.oe_conditioning, + ) + if chunk is None: + raise EntropyUnavailableError( + f"OpenEntropy source '{source_name}' returned no data" + ) + combined += chunk + remaining -= len(chunk) + return combined[:n] + + # Collect from all sources, then draw bytes. + self._pool.collect_all( + parallel=self._config.oe_parallel, + timeout=self._config.oe_timeout, + ) + raw_bytes = self._pool.get_bytes(n, conditioning=self._config.oe_conditioning) + result: bytes = bytes(raw_bytes) + return result + except RuntimeError as e: + raise EntropyUnavailableError(f"OpenEntropy failed: {e}") from e + + def close(self) -> None: + """Mark the source as closed (idempotent).""" + self._closed = True + + def health_check(self) -> dict[str, object]: + """Return a status dictionary for this source. + + Returns: + Dictionary with source name, health status, source count, + and conditioning mode. + """ + if not _OPENENTROPY_AVAILABLE: + return { + "source": "openentropy", + "healthy": False, + "reason": "openentropy not installed", + } + return { + "source": "openentropy", + "healthy": self.is_available, + "source_count": self._pool.source_count, + "conditioning": self._config.oe_conditioning, + } diff --git a/src/qr_sampler/processor.py b/src/qr_sampler/processor.py index 42ea7d3..f28f4b8 100644 --- a/src/qr_sampler/processor.py +++ b/src/qr_sampler/processor.py @@ -194,6 +194,9 @@ def __init__( # --- Build shared components --- self._entropy_source = _build_entropy_source(self._default_config) self._default_amplifier = AmplifierRegistry.build(self._default_config) + # Calibrate amplifier if it supports calibration (e.g., ECDF). + if hasattr(self._default_amplifier, "calibrate"): + self._default_amplifier.calibrate(self._entropy_source, self._default_config) self._default_strategy = TemperatureStrategyRegistry.build( self._default_config, self._vocab_size ) @@ -355,6 +358,9 @@ def update_state(self, batch_update: Any | None) -> None: hash_str = self._default_config_hash else: amplifier = AmplifierRegistry.build(req_config) + # Calibrate per-request amplifier if it supports calibration. + if hasattr(amplifier, "calibrate"): + amplifier.calibrate(self._entropy_source, req_config) strategy = TemperatureStrategyRegistry.build(req_config, self._vocab_size) hash_str = _config_hash(req_config) @@ -497,9 +503,12 @@ def _to_numpy(tensor: Any) -> np.ndarray: """ if isinstance(tensor, np.ndarray): return tensor - # .cpu() moves GPU tensors (CUDA/MPS) to host memory; no-op on CPU. + # torch.Tensor — use .numpy() for zero-copy on CPU. try: - result: np.ndarray = tensor.detach().cpu().numpy() + if not tensor.is_cpu: + result: np.ndarray = tensor.detach().cpu().numpy() + else: + result = tensor.detach().numpy() return result except AttributeError: return np.asarray(tensor) diff --git a/tests/test_amplification/test_ecdf.py b/tests/test_amplification/test_ecdf.py new file mode 100644 index 0000000..f3f1061 --- /dev/null +++ b/tests/test_amplification/test_ecdf.py @@ -0,0 +1,215 @@ +"""Tests for the ECDFAmplifier and ECDF-specific registry behavior.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + +from qr_sampler.amplification.base import AmplificationResult, SignalAmplifier +from qr_sampler.amplification.ecdf import ECDFAmplifier +from qr_sampler.amplification.registry import AmplifierRegistry +from qr_sampler.config import QRSamplerConfig +from qr_sampler.entropy.base import EntropySource +from qr_sampler.entropy.mock import MockUniformSource +from qr_sampler.exceptions import ( + EntropyUnavailableError, + SignalAmplificationError, +) + + +@pytest.fixture() +def config() -> QRSamplerConfig: + """Default config for ECDF amplification tests.""" + return QRSamplerConfig(_env_file=None, signal_amplifier_type="ecdf") # type: ignore[call-arg] + + +@pytest.fixture() +def amplifier(config: QRSamplerConfig) -> ECDFAmplifier: + """Uncalibrated ECDFAmplifier.""" + return ECDFAmplifier(config) + + +@pytest.fixture() +def calibrated_amplifier(config: QRSamplerConfig) -> ECDFAmplifier: + """ECDFAmplifier calibrated with a balanced MockUniformSource.""" + amp = ECDFAmplifier(config) + source = MockUniformSource() + amp.calibrate(source, config) + return amp + + +class TestECDFAmplifier: + """Tests for ECDFAmplifier core functionality.""" + + def test_calibrate_and_amplify_balanced(self, config: QRSamplerConfig) -> None: + """Calibrated amplifier with balanced source should produce u ~ 0.5.""" + amp = ECDFAmplifier(config) + cal_source = MockUniformSource(seed=42) + amp.calibrate(cal_source, config) + + test_source = MockUniformSource(seed=99) + u_values = [] + for _ in range(50): + raw = test_source.get_random_bytes(config.sample_count) + u_values.append(amp.amplify(raw).u) + u_mean = sum(u_values) / len(u_values) + assert abs(u_mean - 0.5) < 0.15 + + def test_amplify_uncalibrated_raises(self, amplifier: ECDFAmplifier) -> None: + """Amplifying without calibration should raise SignalAmplificationError.""" + with pytest.raises(SignalAmplificationError, match="not been calibrated"): + amplifier.amplify(bytes([128] * 100)) + + def test_amplify_empty_bytes_raises(self, calibrated_amplifier: ECDFAmplifier) -> None: + """Empty input should raise SignalAmplificationError.""" + with pytest.raises(SignalAmplificationError, match="empty"): + calibrated_amplifier.amplify(b"") + + def test_amplify_single_byte(self, calibrated_amplifier: ECDFAmplifier) -> None: + """Single byte should produce a valid AmplificationResult.""" + result = calibrated_amplifier.amplify(b"\x80") + assert isinstance(result, AmplificationResult) + assert 0.0 < result.u < 1.0 + + def test_u_clamped_within_bounds(self, calibrated_amplifier: ECDFAmplifier) -> None: + """u should never be exactly 0.0 or 1.0 due to epsilon clamping.""" + for val in [0, 128, 255]: + raw = bytes([val] * 1000) + result = calibrated_amplifier.amplify(raw) + assert result.u > 0.0 + assert result.u < 1.0 + + def test_diagnostics_keys(self, calibrated_amplifier: ECDFAmplifier) -> None: + """Diagnostics should contain expected keys.""" + result = calibrated_amplifier.amplify(bytes([128] * 100)) + assert "sample_mean" in result.diagnostics + assert "ecdf_rank" in result.diagnostics + assert "calibration_size" in result.diagnostics + + def test_diagnostics_values( + self, + calibrated_amplifier: ECDFAmplifier, + config: QRSamplerConfig, + ) -> None: + """sample_mean should match numpy mean; calibration_size should match config.""" + raw = bytes([10, 20, 30]) + result = calibrated_amplifier.amplify(raw) + expected_mean = float(np.frombuffer(raw, dtype=np.uint8).mean()) + assert abs(result.diagnostics["sample_mean"] - expected_mean) < 1e-10 + assert result.diagnostics["calibration_size"] == config.ecdf_calibration_samples + + def test_result_is_frozen(self, calibrated_amplifier: ECDFAmplifier) -> None: + """AmplificationResult should be immutable.""" + result = calibrated_amplifier.amplify(bytes([128] * 100)) + with pytest.raises(AttributeError): + result.u = 0.5 # type: ignore[misc] + + def test_is_subclass_of_abc(self) -> None: + """ECDFAmplifier should be a SignalAmplifier subclass.""" + assert issubclass(ECDFAmplifier, SignalAmplifier) + + +class TestECDFCalibration: + """Tests for ECDF calibration behavior.""" + + def test_calibration_collects_correct_count(self, config: QRSamplerConfig) -> None: + """get_random_bytes should be called ecdf_calibration_samples times.""" + amp = ECDFAmplifier(config) + source = MockUniformSource(seed=42) + with patch.object(source, "get_random_bytes", wraps=source.get_random_bytes) as mock_get: + amp.calibrate(source, config) + assert mock_get.call_count == config.ecdf_calibration_samples + + def test_calibration_is_idempotent(self, config: QRSamplerConfig) -> None: + """Calibrating twice should replace the sorted means array.""" + amp = ECDFAmplifier(config) + + source1 = MockUniformSource(seed=42) + amp.calibrate(source1, config) + first_means = amp._sorted_means.copy() # type: ignore[union-attr] + + source2 = MockUniformSource(seed=99) + amp.calibrate(source2, config) + second_means = amp._sorted_means # type: ignore[union-attr] + + assert not np.array_equal(first_means, second_means) + + def test_calibration_zero_variance_raises(self, config: QRSamplerConfig) -> None: + """Constant entropy source should raise SignalAmplificationError.""" + amp = ECDFAmplifier(config) + source = MagicMock(spec=EntropySource) + source.get_random_bytes.return_value = bytes([128] * config.sample_count) + + with pytest.raises(SignalAmplificationError, match="zero variance"): + amp.calibrate(source, config) + + def test_calibration_entropy_unavailable_propagates(self, config: QRSamplerConfig) -> None: + """EntropyUnavailableError from source should propagate uncaught.""" + amp = ECDFAmplifier(config) + source = MagicMock(spec=EntropySource) + source.get_random_bytes.side_effect = EntropyUnavailableError("source failed") + + with pytest.raises(EntropyUnavailableError, match="source failed"): + amp.calibrate(source, config) + + +class TestAmplifierRegistryECDF: + """Tests for ECDF registration in AmplifierRegistry.""" + + def test_ecdf_is_registered(self) -> None: + """The ecdf amplifier should be registered at import time.""" + klass = AmplifierRegistry.get("ecdf") + assert klass is ECDFAmplifier + + def test_build_ecdf_from_config(self, config: QRSamplerConfig) -> None: + """build() should return an ECDFAmplifier instance.""" + amplifier = AmplifierRegistry.build(config) + assert isinstance(amplifier, ECDFAmplifier) + + def test_list_registered_includes_ecdf(self) -> None: + """list_registered() should include ecdf.""" + names = AmplifierRegistry.list_registered() + assert "ecdf" in names + + +class TestECDFStatistical: + """Statistical property tests for ECDF amplification (requires scipy).""" + + def test_uniform_distribution_ks(self, config: QRSamplerConfig) -> None: + """u values from calibrated ECDF should pass KS test for uniformity.""" + scipy = pytest.importorskip("scipy") + + amp = ECDFAmplifier(config) + cal_source = MockUniformSource(seed=42) + amp.calibrate(cal_source, config) + + sample_source = MockUniformSource(seed=99) + u_values = [] + for _ in range(500): + raw = sample_source.get_random_bytes(config.sample_count) + result = amp.amplify(raw) + u_values.append(result.u) + + stat, p_value = scipy.stats.kstest(u_values, "uniform") + assert p_value > 0.01, f"KS test failed: stat={stat:.4f}, p={p_value:.4f}" + + def test_bias_correction(self, config: QRSamplerConfig) -> None: + """ECDF calibration should correct for source bias.""" + pytest.importorskip("scipy") + + biased_mean = 130.0 + amp = ECDFAmplifier(config) + cal_source = MockUniformSource(mean=biased_mean, seed=42) + amp.calibrate(cal_source, config) + + sample_source = MockUniformSource(mean=biased_mean, seed=99) + u_values = [] + for _ in range(500): + raw = sample_source.get_random_bytes(config.sample_count) + result = amp.amplify(raw) + u_values.append(result.u) + + u_mean = sum(u_values) / len(u_values) + assert 0.4 <= u_mean <= 0.6, f"u_mean={u_mean:.4f} outside [0.4, 0.6]" diff --git a/tests/test_config.py b/tests/test_config.py index b386a62..4551240 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -248,6 +248,9 @@ class TestNonOverridableFields: "grpc_api_key_header", "fallback_mode", "entropy_source_type", + "oe_sources", + "oe_parallel", + "oe_timeout", ], ) def test_infrastructure_field_rejected( @@ -270,6 +273,9 @@ def test_infrastructure_field_rejected( "grpc_api_key_header", "fallback_mode", "entropy_source_type", + "oe_sources", + "oe_parallel", + "oe_timeout", ], ) def test_infrastructure_field_rejected_in_validate(self, field_name: str) -> None: @@ -385,3 +391,45 @@ def test_model_copy_preserves_unmodified(self, default_config: QRSamplerConfig) assert copy.top_p == default_config.top_p assert copy.fixed_temperature == default_config.fixed_temperature assert copy.grpc_server_address == default_config.grpc_server_address + + +# --------------------------------------------------------------------------- +# OpenEntropy config fields +# --------------------------------------------------------------------------- + + +class TestOpenEntropyConfigFields: + """Verify OpenEntropy config fields have correct defaults and overrides.""" + + def test_oe_defaults(self, default_config: QRSamplerConfig) -> None: + """Verify OpenEntropy field defaults.""" + assert default_config.oe_conditioning == "raw" + assert default_config.oe_sources == "" + assert default_config.oe_parallel is True + assert default_config.oe_timeout == 5.0 + + def test_oe_conditioning_env_var(self) -> None: + """Verify QR_OE_CONDITIONING env var is loaded.""" + with patch.dict(os.environ, {"QR_OE_CONDITIONING": "sha256"}): + config = QRSamplerConfig(_env_file=None) # type: ignore[call-arg] + assert config.oe_conditioning == "sha256" + + def test_oe_conditioning_per_request(self, default_config: QRSamplerConfig) -> None: + """Verify oe_conditioning can be overridden per-request.""" + result = resolve_config(default_config, {"qr_oe_conditioning": "vonneumann"}) + assert result.oe_conditioning == "vonneumann" + + def test_oe_sources_infra_locked(self) -> None: + """Verify oe_sources cannot be overridden per-request.""" + with pytest.raises(ConfigValidationError, match="infrastructure field"): + validate_extra_args({"qr_oe_sources": "clock_jitter"}) + + def test_oe_parallel_infra_locked(self) -> None: + """Verify oe_parallel cannot be overridden per-request.""" + with pytest.raises(ConfigValidationError, match="infrastructure field"): + validate_extra_args({"qr_oe_parallel": "false"}) + + def test_oe_timeout_infra_locked(self) -> None: + """Verify oe_timeout cannot be overridden per-request.""" + with pytest.raises(ConfigValidationError, match="infrastructure field"): + validate_extra_args({"qr_oe_timeout": "10.0"}) diff --git a/tests/test_entropy/test_openentropy.py b/tests/test_entropy/test_openentropy.py new file mode 100644 index 0000000..62c29d0 --- /dev/null +++ b/tests/test_entropy/test_openentropy.py @@ -0,0 +1,259 @@ +"""Tests for OpenEntropySource (mocked — no openentropy install required).""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from qr_sampler.config import QRSamplerConfig +from qr_sampler.exceptions import EntropyUnavailableError + +_POOL_TARGET = "qr_sampler.entropy.openentropy.EntropyPool" + + +def _make_config(**overrides: object) -> QRSamplerConfig: + """Create a QRSamplerConfig with openentropy-relevant defaults.""" + return QRSamplerConfig(_env_file=None, **overrides) # type: ignore[call-arg] + + +def _make_mock_pool(source_count: int = 3, bytes_return: bytes | None = None) -> MagicMock: + """Create a mock EntropyPool with configurable source_count and get_bytes return.""" + pool = MagicMock() + pool.source_count = source_count + if bytes_return is not None: + pool.get_bytes.return_value = bytes_return + pool.get_source_bytes.return_value = bytes_return + return pool + + +class TestOpenEntropySource: + """Tests for OpenEntropySource with fully mocked openentropy library.""" + + def test_name(self, default_config: QRSamplerConfig) -> None: + """Source name should be 'openentropy'.""" + mock_pool = _make_mock_pool(source_count=3, bytes_return=b"\x00" * 32) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + assert source.name == "openentropy" + + def test_is_available_when_installed(self, default_config: QRSamplerConfig) -> None: + """is_available should be True when pool has sources.""" + mock_pool = _make_mock_pool(source_count=3) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + assert source.is_available is True + + def test_is_available_when_no_sources(self, default_config: QRSamplerConfig) -> None: + """is_available should be False when pool has zero sources.""" + mock_pool = _make_mock_pool(source_count=0) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + assert source.is_available is False + + def test_get_random_bytes_returns_correct_count(self, default_config: QRSamplerConfig) -> None: + """get_random_bytes should return exactly n bytes.""" + n = 64 + mock_pool = _make_mock_pool(source_count=3, bytes_return=b"\xab" * n) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + result = source.get_random_bytes(n) + assert len(result) == n + assert isinstance(result, bytes) + + def test_get_random_bytes_raw_conditioning(self, default_config: QRSamplerConfig) -> None: + """Default conditioning should be 'raw'.""" + n = 32 + mock_pool = _make_mock_pool(source_count=3, bytes_return=b"\x00" * n) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + source.get_random_bytes(n) + + # Default oe_conditioning is "raw"; collect_all + get_bytes path + mock_pool.collect_all.assert_called_once() + mock_pool.get_bytes.assert_called_once_with(n, conditioning="raw") + + def test_get_random_bytes_sha256_conditioning(self) -> None: + """SHA256 conditioning should be passed through to pool.get_bytes.""" + n = 32 + config = _make_config(oe_conditioning="sha256") + mock_pool = _make_mock_pool(source_count=3, bytes_return=b"\x00" * n) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(config) + source.get_random_bytes(n) + + mock_pool.get_bytes.assert_called_once_with(n, conditioning="sha256") + + def test_close_idempotent(self, default_config: QRSamplerConfig) -> None: + """close() should be idempotent; subsequent get_random_bytes raises.""" + mock_pool = _make_mock_pool(source_count=3, bytes_return=b"\x00" * 16) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + source.close() # First close — no error. + source.close() # Second close — still no error. + + with pytest.raises(EntropyUnavailableError, match="closed"): + source.get_random_bytes(16) + + def test_health_check_when_available(self, default_config: QRSamplerConfig) -> None: + """health_check should return source, healthy, source_count, conditioning.""" + mock_pool = _make_mock_pool(source_count=5) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + health = source.health_check() + + assert health["source"] == "openentropy" + assert health["healthy"] is True + assert health["source_count"] == 5 + assert health["conditioning"] == "raw" + + def test_health_check_when_not_installed(self) -> None: + """health_check with openentropy unavailable returns unhealthy dict.""" + # We need an existing instance to call health_check on. + # Create one while "installed", then patch the flag to False. + mock_pool = _make_mock_pool(source_count=3) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + config = _make_config() + source = OpenEntropySource(config) + + # Now patch the flag to False (simulating post-construction unavailability). + with patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", False): + health = source.health_check() + assert health == { + "source": "openentropy", + "healthy": False, + "reason": "openentropy not installed", + } + + def test_source_filtering(self) -> None: + """When oe_sources is set, get_source_bytes should be called per source.""" + config = _make_config(oe_sources="clock_jitter,dram_row_buffer") + n = 64 + mock_pool = _make_mock_pool(source_count=2, bytes_return=b"\xff" * n) + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(config) + result = source.get_random_bytes(n) + + # Should NOT use collect_all path. + mock_pool.collect_all.assert_not_called() + mock_pool.get_bytes.assert_not_called() + + # Should call get_source_bytes for each named source. + assert mock_pool.get_source_bytes.call_count == 2 + calls = mock_pool.get_source_bytes.call_args_list + assert calls[0][0] == ("clock_jitter", n) + assert calls[0][1] == {"conditioning": "raw"} + assert calls[1][0] == ("dram_row_buffer", n) + assert calls[1][1] == {"conditioning": "raw"} + + # Result is truncated to n bytes. + assert len(result) == n + + def test_raises_entropy_unavailable_on_runtime_error( + self, default_config: QRSamplerConfig + ) -> None: + """RuntimeError from pool should be wrapped in EntropyUnavailableError.""" + mock_pool = _make_mock_pool(source_count=3) + mock_pool.collect_all.side_effect = RuntimeError("hardware fault") + mock_pool_class = MagicMock(spec=[]) + mock_pool_class.auto = MagicMock(return_value=mock_pool) + + with ( + patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", True), + patch(_POOL_TARGET, mock_pool_class, create=True), + ): + from qr_sampler.entropy.openentropy import OpenEntropySource + + source = OpenEntropySource(default_config) + with pytest.raises(EntropyUnavailableError, match="OpenEntropy failed"): + source.get_random_bytes(32) + + def test_raises_when_not_installed(self) -> None: + """Constructor should raise EntropyUnavailableError when openentropy is absent.""" + config = _make_config() + + with patch("qr_sampler.entropy.openentropy._OPENENTROPY_AVAILABLE", False): + from qr_sampler.entropy.openentropy import OpenEntropySource + + with pytest.raises(EntropyUnavailableError, match="not installed"): + OpenEntropySource(config) From 704230b96b038b8ac855650f3a13cb00f7af3688 Mon Sep 17 00:00:00 2001 From: alchemystack <70098256+alchemystack@users.noreply.github.com> Date: Fri, 27 Mar 2026 16:08:43 +0100 Subject: [PATCH 2/2] fix: resolve mypy strict type errors in ECDFAmplifier Narrow _sorted_means with a local variable + None check instead of type: ignore comments, satisfying mypy --strict. --- src/qr_sampler/amplification/ecdf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/qr_sampler/amplification/ecdf.py b/src/qr_sampler/amplification/ecdf.py index fd2b705..aeda090 100644 --- a/src/qr_sampler/amplification/ecdf.py +++ b/src/qr_sampler/amplification/ecdf.py @@ -126,9 +126,11 @@ def amplify(self, raw_bytes: bytes) -> AmplificationResult: sample_mean = float(np.frombuffer(raw_bytes, dtype=np.uint8).mean()) # Binary search in the sorted calibration means. - # _sorted_means is guaranteed non-None here because _calibrated is True. - rank = int(np.searchsorted(self._sorted_means, sample_mean, side="right")) # type: ignore[arg-type] - n = len(self._sorted_means) # type: ignore[arg-type] + sorted_means = self._sorted_means + if sorted_means is None: + raise SignalAmplificationError("Calibration data missing") + rank = int(np.searchsorted(sorted_means, sample_mean, side="right")) + n = len(sorted_means) # Hazen plotting position: u = (rank + 1) / (N + 1). u = (rank + 1) / (n + 1)