diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b799d14..89df755 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,6 +23,7 @@ jobs:
             "uvicorn[standard]==0.32.*" \
             "mcp" \
             "neo4j==5.*" \
+            "real-ladybug==0.15.2" \
             "httpx==0.28.*" \
             "pydantic==2.*" \
             "python-dotenv==1.*" \
@@ -49,7 +50,9 @@ jobs:
       - name: Run smoke tests
         env:
           MOLLYGRAPH_TEST_MODE: "1"
-        run: pytest -q -m smoke
+          MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS: "0"
+          PYTHONPATH: service
+        run: python -m pytest -q tests -m smoke
 
   integration:
     runs-on: ubuntu-latest
@@ -84,6 +87,7 @@ jobs:
             "uvicorn[standard]==0.32.*" \
             "mcp" \
             "neo4j==5.*" \
+            "real-ladybug==0.15.2" \
             "httpx==0.28.*" \
             "pydantic==2.*" \
             "python-dotenv==1.*" \
@@ -94,7 +98,9 @@ jobs:
       - name: Run integration tests
         env:
           MOLLYGRAPH_TEST_MODE: "1"
+          MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS: "0"
           NEO4J_URI: bolt://localhost:7687
           NEO4J_USER: neo4j
           NEO4J_PASSWORD: testpassword
-        run: pytest -q -m integration
+          PYTHONPATH: service
+        run: python -m pytest -q tests -m integration
diff --git a/.gitignore b/.gitignore
index 6685607..a9f26de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ venv/
 ENV/
 
 ## Database
+.graph-memory/
 *.db
 *.db-journal
 *.db-wal
diff --git a/README.md b/README.md
index 0fa25a9..5cb5465 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ The canonical local runtime is Python `3.12`.
 The install script:
 - creates `service/.env` from `service/.env.example` if needed
 - creates the runtime venv at `service/.venv`
-- preloads the default `GLiNER2` and embedding models when possible
+- skips model downloads unless `MOLLYGRAPH_PRELOAD_MODELS=1` is set
 
 Default local API:
 - base URL: `http://127.0.0.1:7422`
@@ -44,6 +44,10 @@ Production-style smoke test:
 service/.venv/bin/python scripts/production_smoke.py --json
 ```
 
+That smoke test uses lightweight fixture extraction by default so it can verify
+the runtime without loading large local models. Use `--real-models` only when the
+model cache is warm and you intentionally want to exercise GLiNER2.
+
 ## Default Stack
 
 - graph storage: `Ladybug`
diff --git a/scripts/install.sh b/scripts/install.sh
index c20a44f..a82ab91 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -69,16 +69,24 @@ source "$VENV_DIR/bin/activate"
 python -m pip install --upgrade pip
 python -m pip install -r "$ROOT_DIR/requirements.txt"
 
-python - <<'PY'
-try:
-    from gliner2 import GLiNER2
-    GLiNER2.from_pretrained("fastino/gliner2-large-v1")
-    print("Downloaded base GLiNER2 model")
-except Exception as exc:
-    print(f"Skipped GLiNER2 pre-download: {exc}")
-PY
+case "${MOLLYGRAPH_PRELOAD_MODELS:-0}" in
+  1|true|TRUE|yes|YES|on|ON)
+    python - <<'PY'
+from huggingface_hub import hf_hub_download
+
+downloads = [
+    ("fastino/gliner2-large-v1", "model.safetensors", "base GLiNER2 model"),
+]
+
+for repo_id, filename, label in downloads:
+    try:
+        info = hf_hub_download(repo_id, filename, dry_run=True)
+        size_mb = int((info.size or 0) / (1024 * 1024))
+        print(f"Preparing {label}: {repo_id}/{filename} ({size_mb}MB, cached={info.is_cached})")
+        hf_hub_download(repo_id, filename)
+    except Exception as exc:
+        print(f"Skipped {label} pre-download: {exc}")
 
-python - <<'PY'
 try:
     from sentence_transformers import SentenceTransformer
     SentenceTransformer(
@@ -89,6 +97,11 @@ try:
 except Exception as exc:
     print(f"Skipped embedding model pre-download: {exc}")
 PY
+    ;;
+  *)
+    echo "Skipping model pre-download. Set MOLLYGRAPH_PRELOAD_MODELS=1 when you intentionally want to fetch local model weights."
+    ;;
+esac
 
 echo "Install complete."
 echo "Canonical local runtime:"
@@ -96,6 +109,7 @@ echo "  venv: $VENV_DIR"
 echo "  env:  $ACTIVE_ENV_FILE"
 echo "  data: ${MOLLYGRAPH_HOME_DIR:-$HOME/.graph-memory}"
 echo "  python: $TARGET_PYTHON"
+echo "  model preload: ${MOLLYGRAPH_PRELOAD_MODELS:-0} (set MOLLYGRAPH_PRELOAD_MODELS=1 to download weights intentionally)"
 echo "Default local stack:"
 echo "  MOLLYGRAPH_GRAPH_BACKEND=ladybug"
 echo "  MOLLYGRAPH_VECTOR_BACKEND=ladybug"
diff --git a/scripts/production_smoke.py b/scripts/production_smoke.py
index 8c7f7e6..b83a13d 100644
--- a/scripts/production_smoke.py
+++ b/scripts/production_smoke.py
@@ -110,6 +110,8 @@ def run_smoke(
     home_dir: str | None,
     timeout_seconds: float,
     keep_home: bool,
+    real_models: bool,
+    allow_model_downloads: bool,
 ) -> dict[str, Any]:
     runtime_home = Path(home_dir).expanduser() if home_dir else Path(tempfile.mkdtemp(prefix="mollygraph-production-smoke."))
     runtime_home.mkdir(parents=True, exist_ok=True)
@@ -125,6 +127,7 @@ def run_smoke(
         "home_dir": str(runtime_home),
         "logs_path": str(logs_path),
         "base_url": base_url,
+        "model_mode": "real" if real_models else "fixture",
     }
 
     try:
@@ -132,6 +135,16 @@ def run_smoke(
         env["GRAPH_MEMORY_PORT"] = str(port)
         env["MOLLYGRAPH_HOME_DIR"] = str(runtime_home)
         env["MOLLYGRAPH_ALLOW_CONCURRENT_LOCAL_RUNS"] = "1"
+        env["MOLLYGRAPH_SPACY_ENRICHMENT"] = "0"
+        env["MOLLYGRAPH_GLIREL_ENABLED"] = "false"
+
+        if real_models:
+            if not allow_model_downloads:
+                env.setdefault("MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS", "0")
+        else:
+            env["MOLLYGRAPH_EXTRACTOR_FIXTURE"] = "1"
+            env["MOLLYGRAPH_EMBEDDING_TIER_ORDER"] = "hash"
+            env["MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS"] = "0"
 
         proc = subprocess.Popen(
             [str(REPO_ROOT / "scripts" / "start.sh")],
@@ -271,6 +284,8 @@ def main() -> int:
     parser.add_argument("--timeout", type=float, default=90.0, help="Timeout in seconds for startup and ingest checks.")
     parser.add_argument("--keep-home", action="store_true", help="Keep the temporary runtime directory after the run.")
     parser.add_argument("--json", action="store_true", help="Print the final summary as JSON.")
+    parser.add_argument("--real-models", action="store_true", help="Use real configured models instead of the lightweight smoke fixture.")
+    parser.add_argument("--allow-model-downloads", action="store_true", help="Permit model downloads during --real-models runs.")
     args = parser.parse_args()
 
     try:
@@ -279,6 +294,8 @@ def main() -> int:
             home_dir=args.home_dir,
             timeout_seconds=args.timeout,
             keep_home=args.keep_home,
+            real_models=args.real_models,
+            allow_model_downloads=args.allow_model_downloads,
         )
     except Exception as exc:
         payload = {"status": "failed", "error": str(exc)}
diff --git a/service/.env.example b/service/.env.example
index e663912..3570fdf 100644
--- a/service/.env.example
+++ b/service/.env.example
@@ -43,6 +43,12 @@ MOLLYGRAPH_EMBEDDING_ST_MODEL=Snowflake/snowflake-arctic-embed-s
 # Keep this aligned with the default embedder unless you intentionally migrate.
 MOLLYGRAPH_EMBEDDING_VECTOR_DIMENSION=384
 
+# Model downloads are opt-in during install. This keeps setup memory-aware and
+# avoids surprise multi-GB downloads on first pass.
+# MOLLYGRAPH_PRELOAD_MODELS=0
+# MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS=1
+# MOLLYGRAPH_MODEL_MIN_AVAILABLE_MEMORY_MB=2048
+
 # Optional local alternate:
 # MOLLYGRAPH_EMBEDDING_OLLAMA_MODEL=nomic-embed-text
 # OLLAMA_BASE_URL=http://127.0.0.1:11434
diff --git a/service/README.md b/service/README.md
index d0006f3..9cb3936 100644
--- a/service/README.md
+++ b/service/README.md
@@ -65,4 +65,5 @@ When the active backend does not support them, they should be hidden from the de
 - `service/.env.example` is the runtime configuration reference.
 - `service/BACKLOG.md` is the service backlog.
 - `service/DECISION_TRACES_PLAN.md` is a later-phase product plan.
-- `service/.venv/bin/python scripts/production_smoke.py --json` runs the isolated production-style smoke pass.
+- `service/.venv/bin/python scripts/production_smoke.py --json` runs the isolated runtime smoke pass without loading large models.
+- Add `--real-models` only when the local model cache is warm and you intentionally want that heavier check.
diff --git a/service/config.py b/service/config.py
index 2b53ade..7bd8ae0 100644
--- a/service/config.py
+++ b/service/config.py
@@ -195,11 +195,14 @@
 GRAPH_BACKEND = os.environ.get("MOLLYGRAPH_GRAPH_BACKEND", "ladybug").strip().lower()
 VECTOR_BACKEND = os.environ.get("MOLLYGRAPH_VECTOR_BACKEND", "ladybug")
 RUNTIME_PROFILE = os.environ.get("MOLLYGRAPH_RUNTIME_PROFILE", "hybrid").strip().lower()
+EXTRACTOR_FIXTURE = os.environ.get("MOLLYGRAPH_EXTRACTOR_FIXTURE", "0").strip().lower() in {"1", "true", "yes", "on"}
+ALLOW_MODEL_DOWNLOADS = os.environ.get("MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS", "1").strip().lower() in {"1", "true", "yes", "on"}
+MODEL_MIN_AVAILABLE_MEMORY_MB = int(os.environ.get("MOLLYGRAPH_MODEL_MIN_AVAILABLE_MEMORY_MB", "2048"))
 STRICT_AI = (
     RUNTIME_PROFILE == "strict_ai"
     or os.environ.get("MOLLYGRAPH_STRICT_AI", "0").strip().lower() in {"1", "true", "yes", "on"}
 )
-SPACY_ENRICHMENT = os.environ.get("MOLLYGRAPH_SPACY_ENRICHMENT", "1").strip().lower() in {"1", "true", "yes", "on"}
+SPACY_ENRICHMENT = os.environ.get("MOLLYGRAPH_SPACY_ENRICHMENT", "0").strip().lower() in {"1", "true", "yes", "on"}
 SPACY_MODEL = os.environ.get("MOLLYGRAPH_SPACY_MODEL", "en_core_web_sm")
 SPACY_MIN_GLINER_ENTITIES = int(os.environ.get("MOLLYGRAPH_SPACY_MIN_GLINER_ENTITIES", "2"))
 EMBEDDING_BACKEND = os.environ.get("MOLLYGRAPH_EMBEDDING_BACKEND", "").strip().lower()  # legacy override; empty = use tier chain
diff --git a/service/extraction/pipeline.py b/service/extraction/pipeline.py
index 3ed1803..7c8c400 100644
--- a/service/extraction/pipeline.py
+++ b/service/extraction/pipeline.py
@@ -1496,6 +1496,7 @@ def _build_relationships(
 
         _entity_type_map: dict[str, str] = entity_type_map or {}
         gate = get_gate()
+        employment_pairs = self._employment_pairs(raw_relations, canonical_names)
 
         for item in raw_relations:
             if not isinstance(item, dict):
@@ -1536,6 +1537,22 @@ def _build_relationships(
             head_type = _entity_type_map.get(self._normalize(source_entity), "Concept")
             tail_type = _entity_type_map.get(self._normalize(target_entity), "Concept")
 
+            if self._is_unsupported_employment_fanout(
+                source_entity=source_entity,
+                target_entity=target_entity,
+                rel_type=rel_type,
+                tail_type=tail_type,
+                context=context,
+                employment_pairs=employment_pairs,
+            ):
+                log.debug(
+                    "employment_fanout suppressed: %s -[%s]-> %s",
+                    source_entity,
+                    rel_type,
+                    target_entity,
+                )
+                continue
+
             gate_result: GateResult = gate.evaluate(
                 head_type=head_type,
                 rel_type=rel_type,
@@ -1634,6 +1651,55 @@ def _build_relationships(
 
         return relationships, fallback_count, gate_quarantine_count, gate_skip_count
 
+    def _employment_pairs(
+        self,
+        raw_relations: list[dict[str, Any]],
+        canonical_names: dict[str, str],
+    ) -> set[tuple[str, str]]:
+        pairs: set[tuple[str, str]] = set()
+        for item in raw_relations:
+            if not isinstance(item, dict):
+                continue
+            head = str(item.get("head") or "").strip()
+            tail = str(item.get("tail") or "").strip()
+            if not head or not tail:
+                continue
+            rel_type = self._normalize_rel_type(str(item.get("label") or ""))
+            if rel_type != "WORKS_AT":
+                continue
+            source_entity = canonical_names.get(self._normalize(head), head)
+            target_entity = canonical_names.get(self._normalize(tail), tail)
+            pairs.add((self._normalize(source_entity), self._normalize(target_entity)))
+        return pairs
+
+    def _is_unsupported_employment_fanout(
+        self,
+        *,
+        source_entity: str,
+        target_entity: str,
+        rel_type: str,
+        tail_type: str,
+        context: str,
+        employment_pairs: set[tuple[str, str]],
+    ) -> bool:
+        """Drop hierarchy fan-out when a sentence only states employment."""
+        if rel_type not in {"REPORTS_TO"}:
+            return False
+        if tail_type != "Organization":
+            return False
+        pair = (self._normalize(source_entity), self._normalize(target_entity))
+        if pair not in employment_pairs:
+            return False
+
+        context_lower = str(context or "").lower()
+        has_employment_signal = bool(
+            re.search(r"\b(works?\s+(?:at|for)|employed\s+by|job\s+at)\b", context_lower)
+        )
+        has_hierarchy_signal = bool(
+            re.search(r"\b(reports?\s+to|manager|manages|supervisor|boss|lead)\b", context_lower)
+        )
+        return has_employment_signal and not has_hierarchy_signal
+
     def _spacy_enrich_entities(self, content: str, gliner_entity_count: int) -> list[dict[str, Any]]:
         """Optional NER enrichment when GLiNER yields sparse extraction."""
         if gliner_entity_count >= service_config.SPACY_MIN_GLINER_ENTITIES:
diff --git a/service/extraction/relation_gate.py b/service/extraction/relation_gate.py
index 2ddcb03..7e9be86 100644
--- a/service/extraction/relation_gate.py
+++ b/service/extraction/relation_gate.py
@@ -112,6 +112,8 @@ class GateResult:
 _TRIPLE_IMPLAUSIBLE: dict[tuple[str, str, str], float] = {
     ("Organization", "CHILD_OF",    "Person"):       0.20,
     ("Organization", "PARENT_OF",   "Person"):       0.20,
+    ("Person",       "REPORTS_TO",  "Organization"): 0.15,
+    ("Organization", "REPORTS_TO",  "Organization"): 0.15,
     ("Technology",   "REPORTS_TO",  "Person"):       0.15,
     ("Place",        "REPORTS_TO",  "Person"):       0.15,
     ("Technology",   "WORKS_AT",    "Organization"): 0.20,
diff --git a/service/memory/extractor.py b/service/memory/extractor.py
index bfa179e..d764210 100644
--- a/service/memory/extractor.py
+++ b/service/memory/extractor.py
@@ -10,8 +10,12 @@
 from __future__ import annotations
 
 import logging
+import os
+import re
+import subprocess
 import threading
 import time
+from pathlib import Path
 from typing import Any
 
 import config
@@ -26,6 +30,119 @@
 _model_mtime: float | None = None   # mtime of active model dir for hot-reload detection
 
 
+def _fixture_extract(text: str) -> dict[str, Any]:
+    """Tiny deterministic extractor for runtime smoke tests.
+
+    This is intentionally opt-in via ``MOLLYGRAPH_EXTRACTOR_FIXTURE=1``. It
+    exercises queue, graph, vector, API, and SDK plumbing without loading or
+    downloading model weights.
+    """
+    match = re.search(
+        r"\b(?P<person>[A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+){0,3})\s+"
+        r"(?:works\s+at|works\s+for|is\s+employed\s+by)\s+"
+        r"(?P<org>[A-Z][A-Za-z0-9&.-]+(?:\s+[A-Z][A-Za-z0-9&.-]+){0,4})\b",
+        text,
+    )
+    if not match:
+        return {"entities": [], "relations": [], "latency_ms": 0}
+
+    person = match.group("person").strip()
+    org = match.group("org").strip().rstrip(".")
+    return {
+        "entities": [
+            {"text": person, "label": "Person", "score": 0.99},
+            {"text": org, "label": "Organization", "score": 0.99},
+        ],
+        "relations": [
+            {"head": person, "tail": org, "label": "WORKS_AT", "score": 0.99},
+        ],
+        "latency_ms": 0,
+    }
+
+
+def _available_memory_mb() -> int | None:
+    """Best-effort available memory check for macOS/Linux."""
+    if sysconf_pages := getattr(os, "sysconf_names", {}).get("SC_AVPHYS_PAGES"):
+        try:
+            pages = int(os.sysconf(sysconf_pages))
+            page_size = int(os.sysconf("SC_PAGE_SIZE"))
+            return int((pages * page_size) / (1024 * 1024))
+        except (OSError, ValueError):
+            pass
+
+    if Path("/proc/meminfo").exists():
+        try:
+            for line in Path("/proc/meminfo").read_text(encoding="utf-8").splitlines():
+                if line.startswith("MemAvailable:"):
+                    return int(line.split()[1]) // 1024
+        except OSError:
+            return None
+
+    if Path("/usr/bin/vm_stat").exists():
+        try:
+            output = subprocess.check_output(["/usr/bin/vm_stat"], text=True, timeout=2)
+            page_size_match = re.search(r"page size of (\d+) bytes", output)
+            page_size = int(page_size_match.group(1)) if page_size_match else 4096
+            pages = 0
+            for label in ("Pages free", "Pages inactive", "Pages speculative"):
+                match = re.search(rf"{label}:\s+([\d.]+)", output)
+                if match:
+                    pages += int(match.group(1).replace(".", ""))
+            return int((pages * page_size) / (1024 * 1024))
+        except Exception:
+            return None
+
+    return None
+
+
+def _enforce_model_memory_floor() -> None:
+    min_mb = int(getattr(config, "MODEL_MIN_AVAILABLE_MEMORY_MB", 0) or 0)
+    if min_mb <= 0:
+        return
+
+    available_mb = _available_memory_mb()
+    if available_mb is None or available_mb >= min_mb:
+        return
+
+    raise RuntimeError(
+        f"Refusing to load GLiNER2 with only {available_mb}MB available memory "
+        f"(requires >= {min_mb}MB). Lower MOLLYGRAPH_MODEL_MIN_AVAILABLE_MEMORY_MB "
+        "only if you intentionally want to run under memory pressure."
+    )
+
+
+def _is_hub_model_ref(model_ref: str) -> bool:
+    return bool(model_ref and "/" in model_ref and not Path(model_ref).expanduser().exists())
+
+
+def _require_cached_model_if_downloads_disabled(model_ref: str) -> None:
+    if bool(getattr(config, "ALLOW_MODEL_DOWNLOADS", True)) or not _is_hub_model_ref(model_ref):
+        return
+
+    try:
+        from huggingface_hub import try_to_load_from_cache, _CACHED_NO_EXIST
+    except Exception as exc:
+        raise RuntimeError(
+            "MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS=0 but huggingface_hub cache helpers "
+            "are unavailable, so MollyGraph cannot safely prove the model is local."
+        ) from exc
+
+    cached = try_to_load_from_cache(model_ref, "model.safetensors")
+    if isinstance(cached, str) and Path(cached).exists():
+        return
+    if cached is _CACHED_NO_EXIST:
+        reason = "model.safetensors is known absent from the Hub repo"
+    else:
+        reason = "model.safetensors is not cached locally"
+
+    raise RuntimeError(
+        f"GLiNER2 model {model_ref!r} is not ready for offline use: {reason}. "
+        "Set MOLLYGRAPH_PRELOAD_MODELS=1 and run ./scripts/install.sh when you "
+        "want to download weights, or set MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS=1 "
+        "for an intentional online load."
+    )
+
+
 def _active_backend() -> str:
     backend = str(getattr(config, "EXTRACTOR_BACKEND", "gliner2") or "gliner2").strip().lower()
     if backend not in {"gliner2", "gliner"}:
@@ -77,6 +194,9 @@ def _load_gliner_model(model_ref: str) -> Any:
     """
     from gliner2 import GLiNER2
 
+    _require_cached_model_if_downloads_disabled(model_ref)
+    _enforce_model_memory_floor()
+
     log.info("Loading GLiNER2 model (%s)...", model_ref)
     try:
         m = GLiNER2.from_pretrained(model_ref)
@@ -191,6 +311,9 @@ def extract(text: str, threshold: float = 0.4) -> dict[str, Any]:
     Uses GLiNER2 for both entity and relation extraction.
     Raises RuntimeError in strict_ai mode on failure; logs + returns empty otherwise.
     """
+    if getattr(config, "EXTRACTOR_FIXTURE", False):
+        return _fixture_extract(text)
+
     if getattr(config, "TEST_MODE", False):
         return {"entities": [], "relations": [], "latency_ms": 0}
 
diff --git a/service/tests/test_extractor_fixture.py b/service/tests/test_extractor_fixture.py
new file mode 100644
index 0000000..655a7f7
--- /dev/null
+++ b/service/tests/test_extractor_fixture.py
@@ -0,0 +1,29 @@
+from __future__ import annotations
+
+from pathlib import Path
+import sys
+
+_HERE = Path(__file__).parent
+_SERVICE_ROOT = _HERE.parent
+if str(_SERVICE_ROOT) not in sys.path:
+    sys.path.insert(0, str(_SERVICE_ROOT))
+
+import config
+from memory import extractor
+
+
+def test_extractor_fixture_parses_basic_employment(monkeypatch):
+    monkeypatch.setattr(config, "EXTRACTOR_FIXTURE", True)
+
+    result = extractor.extract(
+        "Statement by Elena Ward: Elena Ward works at Signal Foundry.",
+        threshold=0.55,
+    )
+
+    assert result["entities"] == [
+        {"text": "Elena Ward", "label": "Person", "score": 0.99},
+        {"text": "Signal Foundry", "label": "Organization", "score": 0.99},
+    ]
+    assert result["relations"] == [
+        {"head": "Elena Ward", "tail": "Signal Foundry", "label": "WORKS_AT", "score": 0.99},
+    ]
diff --git a/service/tests/test_ladybug_core_flow.py b/service/tests/test_ladybug_core_flow.py
index 73e48ba..29f12f8 100644
--- a/service/tests/test_ladybug_core_flow.py
+++ b/service/tests/test_ladybug_core_flow.py
@@ -40,6 +40,19 @@ def _fake_extract(_text: str, _threshold: float):
     }
 
 
+def _fake_employment_fanout_extract(_text: str, _threshold: float):
+    return {
+        "entities": [
+            {"text": "Mira Hale", "label": "Person", "score": 0.99},
+            {"text": "Northwind Arcadia", "label": "Organization", "score": 0.97},
+        ],
+        "relations": [
+            {"head": "Mira Hale", "tail": "Northwind Arcadia", "label": "WORKS_AT", "score": 0.94},
+            {"head": "Mira Hale", "tail": "Northwind Arcadia", "label": "REPORTS_TO", "score": 0.94},
+        ],
+    }
+
+
 def _fake_embedding(_cls, text: str, dim: int = 384, _depth: int = 0) -> list[float]:
     seed = float((sum(ord(ch) for ch in text) % 17) + 1)
     return [seed] + [0.0] * (dim - 1)
@@ -151,3 +164,41 @@ def _thread_checked_embedding(_cls, text: str, dim: int = 384, _depth: int = 0)
 
     assert processed.status == "completed"
     assert len(embedding_thread_ids) >= 3
+
+
+def test_simple_employment_statement_suppresses_report_to_fanout(monkeypatch, tmp_path):
+    monkeypatch.setattr(memory_extractor, "extract", _fake_employment_fanout_extract)
+    monkeypatch.setattr(ExtractionPipeline, "_text_embedding", classmethod(_fake_embedding))
+    monkeypatch.setattr(config, "SPACY_ENRICHMENT", False)
+    monkeypatch.setattr(config, "GLIREL_ENABLED", False)
+    monkeypatch.setattr(config, "DECISION_TRACES_INGEST_ENABLED", False)
+
+    graph = LadybugGraph(tmp_path / "graph.lbug")
+    vector_store = VectorStore(backend="ladybug", db_path=tmp_path / "vectors.lbug")
+    pipeline = ExtractionPipeline(graph=graph, vector_store=vector_store)
+    job = ExtractionJob(
+        content="Mira Hale works at Northwind Arcadia.",
+        source="session",
+        speaker="Mira Hale",
+        priority=1,
+        reference_time=datetime.now(UTC),
+    )
+
+    processed = asyncio.run(pipeline.process_job(job))
+
+    assert processed.status == "completed"
+    rel_types = {rel.relation_type for rel in processed.extracted_relationships}
+    assert "WORKS_AT" in rel_types
+    assert "REPORTS_TO" not in rel_types
+
+    facts = graph.get_current_facts("Mira Hale")
+    assert any(
+        fact.get("rel_type") == "WORKS_AT"
+        and fact.get("target_name") == "Northwind Arcadia"
+        for fact in facts
+    )
+    assert not any(
+        fact.get("rel_type") == "REPORTS_TO"
+        and fact.get("target_name") == "Northwind Arcadia"
+        for fact in facts
+    )
diff --git a/tests/test_local_runtime_scripts.py b/tests/test_local_runtime_scripts.py
index a53f311..188248f 100644
--- a/tests/test_local_runtime_scripts.py
+++ b/tests/test_local_runtime_scripts.py
@@ -24,6 +24,9 @@ def test_install_script_uses_service_venv_and_service_env() -> None:
     assert 'uv python install "$TARGET_PYTHON"' in content
     assert 'uv venv --python "$TARGET_PYTHON" --seed --clear "$VENV_DIR"' in content
     assert 'python3.12 -m venv "$VENV_DIR"' in content
+    assert "MOLLYGRAPH_PRELOAD_MODELS" in content
+    assert "Skipping model pre-download" in content
+    assert "hf_hub_download(repo_id, filename, dry_run=True)" in content
 
 
 def test_repo_pins_python_312() -> None:
@@ -66,6 +69,16 @@ def test_install_script_calls_out_single_run_safety() -> None:
     assert "scripts/start.sh will warn and stop before launching a second copy" in content
 
 
+def test_runtime_smoke_defaults_to_fixture_models() -> None:
+    content = _read("scripts/production_smoke.py")
+
+    assert 'env["MOLLYGRAPH_EXTRACTOR_FIXTURE"] = "1"' in content
+    assert 'env["MOLLYGRAPH_EMBEDDING_TIER_ORDER"] = "hash"' in content
+    assert 'env["MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS"] = "0"' in content
+    assert "--real-models" in content
+    assert "--allow-model-downloads" in content
+
+
 def test_env_loader_handles_unquoted_values_with_spaces(tmp_path: Path) -> None:
     env_file = tmp_path / "sample.env"
     env_file.write_text(
diff --git a/tests/test_production_smoke.py b/tests/test_production_smoke.py
index 003ba64..29c4df9 100644
--- a/tests/test_production_smoke.py
+++ b/tests/test_production_smoke.py
@@ -44,6 +44,7 @@ def test_runtime_smoke_runner() -> None:
 
     payload = json.loads(_last_nonempty_line(result.stdout))
     assert payload["status"] == "ok"
+    assert payload["model_mode"] == "fixture"
     assert payload["startup_health"]["status"] == "healthy"
     assert payload["startup_health"]["operator_advisories"] == []
     assert payload["ingest"]["status"] == 200