From 00696e99d4041cff964c6d7cd35c40d62ea4746c Mon Sep 17 00:00:00 2001 From: Luke Kosewski Date: Wed, 10 Jun 2026 22:20:24 +0000 Subject: [PATCH] fix: track Kompress model repo across headroom-ai 0.24.0 rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit headroom-ai 0.24.0 renamed the default Kompress weights repo (chopratejas/kompress-base -> chopratejas/kompress-v2-base). worker.py hardcoded the old name in its HF-cache pre-check, which decides whether to force HF_HUB_OFFLINE. Against 0.24.0 that broke two ways: a host with only the old model cached would be forced offline against a model that version no longer loads (cold load fails), and once the new model was cached the offline optimization silently stopped engaging. Resolve the repo from the installed headroom-ai version instead of hardcoding it, so a single worker.py supports both 0.23.x and 0.24.x — including hosts mid-upgrade. The version is read via importlib.metadata, never by importing headroom, so the set-offline-env-before-importing- transformers ordering this module relies on is preserved. When the model the installed version loads isn't cached we stay online (safe) rather than forcing offline. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Luke Kosewski --- tests/test_worker.py | 49 ++++++++++++++++++++++++++++++++++++++++++++ worker.py | 36 ++++++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/tests/test_worker.py b/tests/test_worker.py index e3f7882..221611c 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -13,6 +13,8 @@ import sys import types import unittest +from importlib.metadata import PackageNotFoundError +from unittest import mock # Make worker.py importable from the repo root. ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) @@ -201,5 +203,52 @@ def test_warmup_light_when_not_requested(self): self.assertNotIn("compress_user_messages", self.calls[-1]["kwargs"]) +class KompressRepoTest(unittest.TestCase): + """The default Kompress weights repo changed in headroom-ai 0.24.0 + (kompress-base -> kompress-v2-base); worker resolves it from the installed + version so a single worker.py supports both, including mid-upgrade hosts.""" + + def _repo_for(self, version_value): + with mock.patch("importlib.metadata.version", return_value=version_value): + return worker._kompress_weights_repo() + + def test_legacy_versions_use_kompress_base(self): + for v in ("0.23.0", "0.23.5", "0.1.0"): + self.assertEqual(self._repo_for(v), "chopratejas/kompress-base", v) + + def test_v024_plus_uses_v2_base(self): + for v in ("0.24.0", "0.24.3", "0.25.0", "1.0.0"): + self.assertEqual(self._repo_for(v), "chopratejas/kompress-v2-base", v) + + def test_unreadable_version_defaults_to_v2(self): + with mock.patch("importlib.metadata.version", side_effect=PackageNotFoundError): + self.assertEqual(worker._kompress_weights_repo(), "chopratejas/kompress-v2-base") + + def test_odd_version_defaults_to_v2(self): + # A non-numeric component must not raise; fall back to the current default. + self.assertEqual(self._repo_for("unknown"), "chopratejas/kompress-v2-base") + + def test_models_cached_tracks_resolved_repo(self): + # Inject a fake huggingface_hub so this runs without the real dependency. + def fake_hub(cached_ids): + repos = [types.SimpleNamespace(repo_id=i) for i in cached_ids] + mod = types.ModuleType("huggingface_hub") + mod.scan_cache_dir = lambda: types.SimpleNamespace(repos=repos) + return mod + + # 0.24 host with both required repos cached -> safe to go offline. + both = {"answerdotai/ModernBERT-base", "chopratejas/kompress-v2-base"} + with mock.patch.dict(sys.modules, {"huggingface_hub": fake_hub(both)}), \ + mock.patch("importlib.metadata.version", return_value="0.24.0"): + self.assertTrue(worker._models_cached()) + + # The upgrade trap: 0.24 installed but only the OLD model cached. We must + # NOT force offline (we'd block the v2 download), so _models_cached=False. + only_old = {"answerdotai/ModernBERT-base", "chopratejas/kompress-base"} + with mock.patch.dict(sys.modules, {"huggingface_hub": fake_hub(only_old)}), \ + mock.patch("importlib.metadata.version", return_value="0.24.0"): + self.assertFalse(worker._models_cached()) + + if __name__ == "__main__": unittest.main() diff --git a/worker.py b/worker.py index 3fba16d..1352f83 100644 --- a/worker.py +++ b/worker.py @@ -48,19 +48,47 @@ # up as "unauthenticated requests to the HF Hub", adds latency to the first # request a worker serves, and risks anonymous rate-limiting across a pool. We # tame it *before* importing headroom (so transformers sees the env at import). -_KOMPRESS_REPOS = ("answerdotai/ModernBERT-base", "chopratejas/kompress-base") +# +# ModernBERT is the Kompress tokenizer/encoder base, unchanged across versions. +_MODERNBERT_REPO = "answerdotai/ModernBERT-base" + + +def _kompress_weights_repo() -> str: + """HF repo holding the Kompress weights for the *installed* headroom-ai. + + The default Kompress model changed in headroom-ai 0.24.0 + (chopratejas/kompress-base -> chopratejas/kompress-v2-base). To support + hosts on either version (including ones mid-upgrade that still have only the + old model cached), we resolve the repo from the installed package version + rather than hardcoding one. We read the version via importlib.metadata and + never by importing headroom — importing it here would pull in transformers + before we've set the offline env, which is exactly what this module avoids. + + On an unreadable/odd version we assume the current default (v2): the worst + case is then a stale guess that loses the offline optimization, never one + that forces offline against a model the installed version won't load.""" + try: + from importlib.metadata import version + + major, minor = (int(p) for p in version("headroom-ai").split(".")[:2]) + if (major, minor) < (0, 24): + return "chopratejas/kompress-base" + except Exception: # noqa: BLE001 - missing/odd version -> assume current default + pass + return "chopratejas/kompress-v2-base" def _models_cached() -> bool: - """True if the Kompress models are already in the local HF cache, so it's - safe to run transformers offline (no network needed to load them).""" + """True if the Kompress models the installed headroom will load are already + in the local HF cache, so it's safe to run transformers offline (no network + needed to load them).""" try: from huggingface_hub import scan_cache_dir repos = {r.repo_id for r in scan_cache_dir().repos} except Exception: # noqa: BLE001 - hub missing/unscannable -> assume not cached return False - return set(_KOMPRESS_REPOS).issubset(repos) + return {_MODERNBERT_REPO, _kompress_weights_repo()}.issubset(repos) def _configure_hf_env() -> None: