brianmeyer · brianmeyer · May 17, 2026 · May 17, 2026 · May 17, 2026 · chatgpt-codex-connector
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -23,6 +23,7 @@ jobs:
             "uvicorn[standard]==0.32.*" \
             "mcp" \
             "neo4j==5.*" \
+            "real-ladybug==0.15.2" \
             "httpx==0.28.*" \
             "pydantic==2.*" \
             "python-dotenv==1.*" \
@@ -49,7 +50,9 @@ jobs:
       - name: Run smoke tests
         env:
           MOLLYGRAPH_TEST_MODE: "1"
-        run: pytest -q -m smoke
+          MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS: "0"
+          PYTHONPATH: service
+        run: python -m pytest -q tests -m smoke
 
   integration:
     runs-on: ubuntu-latest
@@ -84,6 +87,7 @@ jobs:
             "uvicorn[standard]==0.32.*" \
             "mcp" \
             "neo4j==5.*" \
+            "real-ladybug==0.15.2" \
             "httpx==0.28.*" \
             "pydantic==2.*" \
             "python-dotenv==1.*" \
@@ -94,7 +98,9 @@ jobs:
       - name: Run integration tests
         env:
           MOLLYGRAPH_TEST_MODE: "1"
+          MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS: "0"
           NEO4J_URI: bolt://localhost:7687
           NEO4J_USER: neo4j
           NEO4J_PASSWORD: testpassword
-        run: pytest -q -m integration
+          PYTHONPATH: service
+        run: python -m pytest -q tests -m integration
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ venv/
 ENV/
 
 ## Database
+.graph-memory/
 *.db
 *.db-journal
 *.db-wal

diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ The canonical local runtime is Python `3.12`.
 The install script:
 - creates `service/.env` from `service/.env.example` if needed
 - creates the runtime venv at `service/.venv`
-- preloads the default `GLiNER2` and embedding models when possible
+- skips model downloads unless `MOLLYGRAPH_PRELOAD_MODELS=1` is set
 
 Default local API:
 - base URL: `http://127.0.0.1:7422`
@@ -44,6 +44,10 @@ Production-style smoke test:
 service/.venv/bin/python scripts/production_smoke.py --json
 ```
 
+That smoke test uses lightweight fixture extraction by default so it can verify
+the runtime without loading large local models. Use `--real-models` only when the
+model cache is warm and you intentionally want to exercise GLiNER2.
+
 ## Default Stack
 
 - graph storage: `Ladybug`

diff --git a/scripts/install.sh b/scripts/install.sh
@@ -69,16 +69,24 @@ source "$VENV_DIR/bin/activate"
 python -m pip install --upgrade pip
 python -m pip install -r "$ROOT_DIR/requirements.txt"
 
-python - <<'PY'
-try:
-    from gliner2 import GLiNER2
-    GLiNER2.from_pretrained("fastino/gliner2-large-v1")
-    print("Downloaded base GLiNER2 model")
-except Exception as exc:
-    print(f"Skipped GLiNER2 pre-download: {exc}")
-PY
+case "${MOLLYGRAPH_PRELOAD_MODELS:-0}" in
+  1|true|TRUE|yes|YES|on|ON)
+    python - <<'PY'
+from huggingface_hub import hf_hub_download
+
+downloads = [
+    ("fastino/gliner2-large-v1", "model.safetensors", "base GLiNER2 model"),
+]
+
+for repo_id, filename, label in downloads:
+    try:
+        info = hf_hub_download(repo_id, filename, dry_run=True)
+        size_mb = int((info.size or 0) / (1024 * 1024))
+        print(f"Preparing {label}: {repo_id}/{filename} ({size_mb}MB, cached={info.is_cached})")
+        hf_hub_download(repo_id, filename)
+    except Exception as exc:
+        print(f"Skipped {label} pre-download: {exc}")
 
-python - <<'PY'
 try:
     from sentence_transformers import SentenceTransformer
     SentenceTransformer(
@@ -89,13 +97,19 @@ try:
 except Exception as exc:
     print(f"Skipped embedding model pre-download: {exc}")
 PY
+    ;;
+  *)
+    echo "Skipping model pre-download. Set MOLLYGRAPH_PRELOAD_MODELS=1 when you intentionally want to fetch local model weights."
+    ;;
+esac
 
 echo "Install complete."
 echo "Canonical local runtime:"
 echo "  venv: $VENV_DIR"
 echo "  env:  $ACTIVE_ENV_FILE"
 echo "  data: ${MOLLYGRAPH_HOME_DIR:-$HOME/.graph-memory}"
 echo "  python: $TARGET_PYTHON"
+echo "  model preload: ${MOLLYGRAPH_PRELOAD_MODELS:-0} (set MOLLYGRAPH_PRELOAD_MODELS=1 to download weights intentionally)"
 echo "Default local stack:"
 echo "  MOLLYGRAPH_GRAPH_BACKEND=ladybug"
 echo "  MOLLYGRAPH_VECTOR_BACKEND=ladybug"

diff --git a/scripts/production_smoke.py b/scripts/production_smoke.py
@@ -110,6 +110,8 @@ def run_smoke(
     home_dir: str | None,
     timeout_seconds: float,
     keep_home: bool,
+    real_models: bool,
+    allow_model_downloads: bool,
 ) -> dict[str, Any]:
     runtime_home = Path(home_dir).expanduser() if home_dir else Path(tempfile.mkdtemp(prefix="mollygraph-production-smoke."))
     runtime_home.mkdir(parents=True, exist_ok=True)
@@ -125,13 +127,24 @@ def run_smoke(
         "home_dir": str(runtime_home),
         "logs_path": str(logs_path),
         "base_url": base_url,
+        "model_mode": "real" if real_models else "fixture",
     }
 
     try:
         env = os.environ.copy()
         env["GRAPH_MEMORY_PORT"] = str(port)
         env["MOLLYGRAPH_HOME_DIR"] = str(runtime_home)
         env["MOLLYGRAPH_ALLOW_CONCURRENT_LOCAL_RUNS"] = "1"
+        env["MOLLYGRAPH_SPACY_ENRICHMENT"] = "0"
+        env["MOLLYGRAPH_GLIREL_ENABLED"] = "false"
+
+        if real_models:
+            if not allow_model_downloads:
+                env.setdefault("MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS", "0")
+        else:
+            env["MOLLYGRAPH_EXTRACTOR_FIXTURE"] = "1"
+            env["MOLLYGRAPH_EMBEDDING_TIER_ORDER"] = "hash"
+            env["MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS"] = "0"
 
         proc = subprocess.Popen(
             [str(REPO_ROOT / "scripts" / "start.sh")],
@@ -271,6 +284,8 @@ def main() -> int:
     parser.add_argument("--timeout", type=float, default=90.0, help="Timeout in seconds for startup and ingest checks.")
     parser.add_argument("--keep-home", action="store_true", help="Keep the temporary runtime directory after the run.")
     parser.add_argument("--json", action="store_true", help="Print the final summary as JSON.")
+    parser.add_argument("--real-models", action="store_true", help="Use real configured models instead of the lightweight smoke fixture.")
+    parser.add_argument("--allow-model-downloads", action="store_true", help="Permit model downloads during --real-models runs.")
     args = parser.parse_args()
 
     try:
@@ -279,6 +294,8 @@ def main() -> int:
             home_dir=args.home_dir,
             timeout_seconds=args.timeout,
             keep_home=args.keep_home,
+            real_models=args.real_models,
+            allow_model_downloads=args.allow_model_downloads,
         )
     except Exception as exc:
         payload = {"status": "failed", "error": str(exc)}

diff --git a/service/.env.example b/service/.env.example
@@ -43,6 +43,12 @@ MOLLYGRAPH_EMBEDDING_ST_MODEL=Snowflake/snowflake-arctic-embed-s
 # Keep this aligned with the default embedder unless you intentionally migrate.
 MOLLYGRAPH_EMBEDDING_VECTOR_DIMENSION=384
 
+# Model downloads are opt-in during install. This keeps setup memory-aware and
+# avoids surprise multi-GB downloads on first pass.
+# MOLLYGRAPH_PRELOAD_MODELS=0
+# MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS=1
+# MOLLYGRAPH_MODEL_MIN_AVAILABLE_MEMORY_MB=2048
+
 # Optional local alternate:
 # MOLLYGRAPH_EMBEDDING_OLLAMA_MODEL=nomic-embed-text
 # OLLAMA_BASE_URL=http://127.0.0.1:11434

diff --git a/service/README.md b/service/README.md
@@ -65,4 +65,5 @@ When the active backend does not support them, they should be hidden from the de
 - `service/.env.example` is the runtime configuration reference.
 - `service/BACKLOG.md` is the service backlog.
 - `service/DECISION_TRACES_PLAN.md` is a later-phase product plan.
-- `service/.venv/bin/python scripts/production_smoke.py --json` runs the isolated production-style smoke pass.
+- `service/.venv/bin/python scripts/production_smoke.py --json` runs the isolated runtime smoke pass without loading large models.
+- Add `--real-models` only when the local model cache is warm and you intentionally want that heavier check.
diff --git a/service/config.py b/service/config.py
@@ -195,11 +195,14 @@
 GRAPH_BACKEND = os.environ.get("MOLLYGRAPH_GRAPH_BACKEND", "ladybug").strip().lower()
 VECTOR_BACKEND = os.environ.get("MOLLYGRAPH_VECTOR_BACKEND", "ladybug")
 RUNTIME_PROFILE = os.environ.get("MOLLYGRAPH_RUNTIME_PROFILE", "hybrid").strip().lower()
+EXTRACTOR_FIXTURE = os.environ.get("MOLLYGRAPH_EXTRACTOR_FIXTURE", "0").strip().lower() in {"1", "true", "yes", "on"}
+ALLOW_MODEL_DOWNLOADS = os.environ.get("MOLLYGRAPH_ALLOW_MODEL_DOWNLOADS", "1").strip().lower() in {"1", "true", "yes", "on"}
+MODEL_MIN_AVAILABLE_MEMORY_MB = int(os.environ.get("MOLLYGRAPH_MODEL_MIN_AVAILABLE_MEMORY_MB", "2048"))
 STRICT_AI = (
     RUNTIME_PROFILE == "strict_ai"
     or os.environ.get("MOLLYGRAPH_STRICT_AI", "0").strip().lower() in {"1", "true", "yes", "on"}
 )
-SPACY_ENRICHMENT = os.environ.get("MOLLYGRAPH_SPACY_ENRICHMENT", "1").strip().lower() in {"1", "true", "yes", "on"}
+SPACY_ENRICHMENT = os.environ.get("MOLLYGRAPH_SPACY_ENRICHMENT", "0").strip().lower() in {"1", "true", "yes", "on"}
 SPACY_MODEL = os.environ.get("MOLLYGRAPH_SPACY_MODEL", "en_core_web_sm")
 SPACY_MIN_GLINER_ENTITIES = int(os.environ.get("MOLLYGRAPH_SPACY_MIN_GLINER_ENTITIES", "2"))
 EMBEDDING_BACKEND = os.environ.get("MOLLYGRAPH_EMBEDDING_BACKEND", "").strip().lower()  # legacy override; empty = use tier chain

diff --git a/service/extraction/pipeline.py b/service/extraction/pipeline.py
@@ -1496,6 +1496,7 @@ def _build_relationships(
 
         _entity_type_map: dict[str, str] = entity_type_map or {}
         gate = get_gate()
+        employment_pairs = self._employment_pairs(raw_relations, canonical_names)
 
         for item in raw_relations:
             if not isinstance(item, dict):
@@ -1536,6 +1537,22 @@ def _build_relationships(
             head_type = _entity_type_map.get(self._normalize(source_entity), "Concept")
             tail_type = _entity_type_map.get(self._normalize(target_entity), "Concept")
 
+            if self._is_unsupported_employment_fanout(
+                source_entity=source_entity,
+                target_entity=target_entity,
+                rel_type=rel_type,
+                tail_type=tail_type,
+                context=context,
+                employment_pairs=employment_pairs,
+            ):
+                log.debug(
+                    "employment_fanout suppressed: %s -[%s]-> %s",
+                    source_entity,
+                    rel_type,
+                    target_entity,
+                )
+                continue
+
             gate_result: GateResult = gate.evaluate(
                 head_type=head_type,
                 rel_type=rel_type,
@@ -1634,6 +1651,55 @@ def _build_relationships(
 
         return relationships, fallback_count, gate_quarantine_count, gate_skip_count
 
+    def _employment_pairs(
+        self,
+        raw_relations: list[dict[str, Any]],
+        canonical_names: dict[str, str],
+    ) -> set[tuple[str, str]]:
+        pairs: set[tuple[str, str]] = set()
+        for item in raw_relations:
+            if not isinstance(item, dict):
+                continue
+            head = str(item.get("head") or "").strip()
+            tail = str(item.get("tail") or "").strip()
+            if not head or not tail:
+                continue
+            rel_type = self._normalize_rel_type(str(item.get("label") or ""))
+            if rel_type != "WORKS_AT":
+                continue
+            source_entity = canonical_names.get(self._normalize(head), head)
+            target_entity = canonical_names.get(self._normalize(tail), tail)
+            pairs.add((self._normalize(source_entity), self._normalize(target_entity)))
+        return pairs
+
+    def _is_unsupported_employment_fanout(
+        self,
+        *,
+        source_entity: str,
+        target_entity: str,
+        rel_type: str,
+        tail_type: str,
+        context: str,
+        employment_pairs: set[tuple[str, str]],
+    ) -> bool:
+        """Drop hierarchy fan-out when a sentence only states employment."""
+        if rel_type not in {"REPORTS_TO"}:
+            return False
+        if tail_type != "Organization":
+            return False
+        pair = (self._normalize(source_entity), self._normalize(target_entity))
+        if pair not in employment_pairs:
+            return False
+
+        context_lower = str(context or "").lower()
+        has_employment_signal = bool(
+            re.search(r"\b(works?\s+(?:at|for)|employed\s+by|job\s+at)\b", context_lower)
+        )
+        has_hierarchy_signal = bool(
+            re.search(r"\b(reports?\s+to|manager|manages|supervisor|boss|lead)\b", context_lower)
+        )
+        return has_employment_signal and not has_hierarchy_signal
+
     def _spacy_enrich_entities(self, content: str, gliner_entity_count: int) -> list[dict[str, Any]]:
         """Optional NER enrichment when GLiNER yields sparse extraction."""
         if gliner_entity_count >= service_config.SPACY_MIN_GLINER_ENTITIES:

diff --git a/service/extraction/relation_gate.py b/service/extraction/relation_gate.py
@@ -112,6 +112,8 @@ class GateResult:
 _TRIPLE_IMPLAUSIBLE: dict[tuple[str, str, str], float] = {
     ("Organization", "CHILD_OF",    "Person"):       0.20,
     ("Organization", "PARENT_OF",   "Person"):       0.20,
+    ("Person",       "REPORTS_TO",  "Organization"): 0.15,
+    ("Organization", "REPORTS_TO",  "Organization"): 0.15,
     ("Technology",   "REPORTS_TO",  "Person"):       0.15,
     ("Place",        "REPORTS_TO",  "Person"):       0.15,
     ("Technology",   "WORKS_AT",    "Organization"): 0.20,