From 579218d79891325c66eb12012d2930ba3466f3ce Mon Sep 17 00:00:00 2001 From: Max Dubrinsky Date: Wed, 24 Jun 2026 14:46:58 -0400 Subject: [PATCH 1/5] feat(authz): unify plugin HTTP authz under the @path_rule surface Routes-derived plugin authorization: declare permissions as typed PermissionSets/AuthzScope and attach @path_rule to handlers; the platform derives the permission catalog, endpoint bindings, and caller-kind from the routes. Removes the nemo.authz / get_authz_contribution surface. Caller-kind (PRINCIPAL / SERVICE_PRINCIPAL) is enforced in Rego; invalid plugins fail closed per route (deny_route / quarantine / hard_fail). Migrates agents, auditor, evaluator, example, safe-synthesizer and customizer to the new surface, adds the E2E OIDC authz verification harness, and memoizes per-request endpoint scans to cut embedded-PDP fuel. Reconstructed onto current main; carries the resolutions from the dropped main-merge commits (notably evaluator metrics.py on the new surface). Signed-off-by: Max Dubrinsky --- docs/set-up/config-reference.mdx | 4 + e2e/authz_oidc/README.md | 81 +++ e2e/authz_oidc/conftest.py | 324 ++++++++++++ .../fixtures/harness-broken/pyproject.toml | 16 + .../src/harness_broken/service.py | 19 + .../fixtures/harness-fixture/pyproject.toml | 16 + .../src/harness_fixture/service.py | 49 ++ .../fixtures/harness-unruled/pyproject.toml | 16 + .../src/harness_unruled/service.py | 44 ++ e2e/authz_oidc/harness.py | 60 +++ e2e/authz_oidc/idp.py | 172 +++++++ e2e/authz_oidc/matrix.py | 474 ++++++++++++++++++ e2e/authz_oidc/report.py | 76 +++ e2e/authz_oidc/test_authz_matrix.py | 84 ++++ .../src/nemo_platform_plugin/authz.py | 454 +++++++++++------ .../nemo_platform_plugin/authz_discovery.py | 472 ++++++++++++----- .../src/nemo_platform_plugin/authz_format.py | 19 +- .../customization_contributor.py | 14 +- .../src/nemo_platform_plugin/discovery.py | 8 +- .../nemo_platform_plugin/functions/routes.py | 34 ++ .../nemo_platform_plugin/jobs/api_factory.py | 53 +- .../src/nemo_platform_plugin/jobs/routes.py | 30 +- .../src/nemo_platform_plugin/service.py | 20 +- .../nemo_platform_plugin/tests/test_authz.py | 341 ++++++------- .../tests/test_authz_failmode.py | 419 ++++++++++++++++ .../tests/test_discovery.py | 9 - .../tests/test_factory_authz.py | 216 ++++++++ .../tests/test_functions_routes.py | 7 + .../tests/test_path_rule.py | 190 +++++++ .../tests/auth/test_authz_format.py | 29 ++ .../customization_common/contributor/base.py | 25 +- .../src/nemo_example_plugin/_perms.py | 33 ++ .../src/nemo_example_plugin/authz.py | 14 + .../nemo_example_plugin/middleware_service.py | 28 ++ .../src/nemo_example_plugin/service.py | 47 +- plugins/example-plugin/tests/test_authz.py | 39 ++ .../src/nemo_agents_plugin/api/v2/_perms.py | 33 ++ .../src/nemo_agents_plugin/api/v2/agents.py | 23 + .../api/v2/deployment_logs.py | 13 + .../nemo_agents_plugin/api/v2/deployments.py | 23 + .../src/nemo_agents_plugin/api/v2/gateway.py | 117 ++++- .../src/nemo_agents_plugin/authz.py | 14 + .../src/nemo_agents_plugin/service.py | 231 +++------ plugins/nemo-agents/tests/test_authz.py | 111 ++++ .../nemo-agents/tests/unit/test_service.py | 50 +- .../src/nemo_anonymizer_plugin/service.py | 31 +- .../nemo-anonymizer/tests/unit/test_authz.py | 38 ++ .../src/nemo_auditor/api/v2/_perms.py | 29 ++ .../src/nemo_auditor/api/v2/configs.py | 28 ++ .../src/nemo_auditor/api/v2/targets.py | 28 ++ .../nemo-auditor/src/nemo_auditor/authz.py | 13 + .../nemo-auditor/src/nemo_auditor/service.py | 52 +- plugins/nemo-auditor/tests/test_authz.py | 38 ++ .../src/nemo_customizer/router.py | 34 +- plugins/nemo-customizer/tests/test_router.py | 85 ++-- .../src/nemo_data_designer_plugin/service.py | 31 +- .../tests/unit/test_authz.py | 38 ++ .../src/nemo_evaluator/api/v2/metrics.py | 15 + .../src/nemo_evaluator/authz.py | 14 + .../src/nemo_evaluator/service.py | 74 +-- plugins/nemo-evaluator/tests/test_authz.py | 41 ++ .../api/v2/jobs/endpoints.py | 2 + .../nemo_safe_synthesizer_plugin/service.py | 42 +- .../tests/unit/test_authz.py | 40 ++ .../tests/unit/test_service.py | 47 +- .../src/nemo_unsloth_plugin/contributor.py | 4 +- .../nemo-unsloth/tests/test_contributor.py | 37 +- services/core/auth/scripts/auth-tools.py | 26 +- .../core/auth/src/nmp/core/auth/app/bundle.py | 118 ++++- .../src/nmp/core/auth/app/policies/authz.rego | 180 ++++++- .../nmp/core/auth/app/policies/common.rego | 38 ++ .../nmp/core/auth/app/policies/extract.rego | 8 +- .../nmp/core/auth/app/policies/scopes.rego | 25 +- .../app/policy_tests/caller_kind_test.rego | 308 ++++++++++++ .../core/auth/app/policy_tests/deny_test.rego | 137 +++++ .../generic_entities_deny_test.rego | 6 +- .../policy_tests/namespace_access_test.rego | 37 +- .../policy_tests/namespace_creation_test.rego | 17 +- .../app/policy_tests/platform_admin_test.rego | 40 ++ .../auth/app/policy_tests/scopes_test.rego | 3 + .../policy_tests/unknown_endpoint_test.rego | 53 +- .../core/auth/src/nmp/core/auth/config.py | 20 +- services/core/auth/tests/test_bundle.py | 150 +++++- services/core/auth/tests/test_embedded_pdp.py | 38 +- 84 files changed, 5353 insertions(+), 1163 deletions(-) create mode 100644 e2e/authz_oidc/README.md create mode 100644 e2e/authz_oidc/conftest.py create mode 100644 e2e/authz_oidc/fixtures/harness-broken/pyproject.toml create mode 100644 e2e/authz_oidc/fixtures/harness-broken/src/harness_broken/service.py create mode 100644 e2e/authz_oidc/fixtures/harness-fixture/pyproject.toml create mode 100644 e2e/authz_oidc/fixtures/harness-fixture/src/harness_fixture/service.py create mode 100644 e2e/authz_oidc/fixtures/harness-unruled/pyproject.toml create mode 100644 e2e/authz_oidc/fixtures/harness-unruled/src/harness_unruled/service.py create mode 100644 e2e/authz_oidc/harness.py create mode 100644 e2e/authz_oidc/idp.py create mode 100644 e2e/authz_oidc/matrix.py create mode 100644 e2e/authz_oidc/report.py create mode 100644 e2e/authz_oidc/test_authz_matrix.py create mode 100644 packages/nemo_platform_plugin/tests/test_authz_failmode.py create mode 100644 packages/nemo_platform_plugin/tests/test_factory_authz.py create mode 100644 packages/nemo_platform_plugin/tests/test_path_rule.py create mode 100644 plugins/example-plugin/src/nemo_example_plugin/_perms.py create mode 100644 plugins/example-plugin/src/nemo_example_plugin/authz.py create mode 100644 plugins/example-plugin/tests/test_authz.py create mode 100644 plugins/nemo-agents/src/nemo_agents_plugin/api/v2/_perms.py create mode 100644 plugins/nemo-agents/src/nemo_agents_plugin/authz.py create mode 100644 plugins/nemo-agents/tests/test_authz.py create mode 100644 plugins/nemo-anonymizer/tests/unit/test_authz.py create mode 100644 plugins/nemo-auditor/src/nemo_auditor/api/v2/_perms.py create mode 100644 plugins/nemo-auditor/src/nemo_auditor/authz.py create mode 100644 plugins/nemo-auditor/tests/test_authz.py create mode 100644 plugins/nemo-data-designer/tests/unit/test_authz.py create mode 100644 plugins/nemo-evaluator/src/nemo_evaluator/authz.py create mode 100644 plugins/nemo-evaluator/tests/test_authz.py create mode 100644 plugins/nemo-safe-synthesizer/tests/unit/test_authz.py create mode 100644 services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego create mode 100644 services/core/auth/src/nmp/core/auth/app/policy_tests/deny_test.rego diff --git a/docs/set-up/config-reference.mdx b/docs/set-up/config-reference.mdx index 74f9e0baf0..fbc130d5d0 100644 --- a/docs/set-up/config-reference.mdx +++ b/docs/set-up/config-reference.mdx @@ -121,6 +121,10 @@ auth: embedded_pdp_cpu_limit: 200 # Maximum linear memory (MB) the embedded PDP WASM runtime can consume. | default: 32 embedded_pdp_memory_limit_mb: 32 + # Fail-mode for a plugin that contributes invalid HTTP authz. | default: 'deny_route' | values: 'deny_route' | 'quarantine' | 'hard_fail' + on_invalid_plugin: deny_route + # Allow a human PlatformAdmin on SERVICE_PRINCIPAL-only plugin routes. | default: False + platform_admin_exempt_from_service_only: false ``` ### `entities` diff --git a/e2e/authz_oidc/README.md b/e2e/authz_oidc/README.md new file mode 100644 index 0000000000..81a118c9fb --- /dev/null +++ b/e2e/authz_oidc/README.md @@ -0,0 +1,81 @@ +# Authz E2E verification harness (real OIDC, signed JWTs) + +Black-box verification that plugin HTTP authorization restricts access as +intended — exercised against a **real running platform** with identity supplied +exclusively as **RS256-signed JWTs from a real-HTTP test OIDC issuer**. This +covers the signed-JWT path end to end: `JWTValidator`'s discovery / JWKS / +signature / expiry / audience checks run over the network, which `opa test` and +the in-process integration tests (header principals, mocked `validate_token`) +do not exercise. + +## One command + +```sh +make build-policy # once per rego change — policy.wasm is gitignored +uv run pytest e2e/authz_oidc -v --run-e2e +``` + +Produces `AUTHZ_E2E_REPORT.md` (+ `.json`, both gitignored) — one row per case: +request → token claims → expected status → observed status. + +Not part of CI: everything is marked `e2e` and skipped without `--run-e2e`. + +## What it does + +1. **Starts a mini OIDC issuer** (`idp.py`) on a free localhost port: real + `/.well-known/openid-configuration` + JWKS over HTTP, real RS256 signing. + A second, unpublished key signs the "unknown key" case. Defective tokens + (expired / wrong issuer / wrong audience / `alg=none`) are minted directly — + the reason a production IdP container isn't used is that it *refuses* to + mint these. +2. **Installs three fixture plugins** (editable, into the active venv): + - `harness-fixture` — clean; declares the only `SERVICE_PRINCIPAL`-only + route (no shipped plugin has one), plus an open control route. + - `harness-unruled` — one ruled + one unruled route (deny-route + containment / quarantine subject). + - `harness-broken` — fails at import (unenumerable ⇒ namespace fence). +3. **Spawns `nemo services run`** on a free port with a fresh tmp data dir: + `auth.enabled=true`, `oidc.enabled=true` → issuer, **`allow_unsigned_jwt=false`** + (both local configs default it to *true*; with it on, the signed-JWT proof + would be hollow), audience pinned, `NMP_SEED_ON_STARTUP=true`, + `bundle_cache_seconds=0` for instant role-binding propagation. +4. **Provisions via signed service JWT** (`sub=service:e2e-harness` — the IAM + role-binding API is service-principal-only at the handler, and a Bearer + token whose `sub` starts with `service:` is a service principal end-to-end): + creates workspaces `authz-e2e-wsa`/`-wsb`, binds alice→Editor@wsA, + victor→Viewer@wsA, sam→Viewer@system, and **revokes the seeded wildcard + `*`→Viewer@system binding** (otherwise every authenticated user holds all + `.read`/`.list` permissions in `system` and the no-workspace permission-deny + rows are untestable). The seeded `*`→Editor@default binding is left alone — + no matrix row touches the `default` workspace. +5. **Runs the matrix** (`matrix.py`, ~40 cases), then repeats a small group on + a second platform instance with `on_invalid_plugin=quarantine` + + `platform_admin_exempt_from_service_only=true`. + +## Matrix coverage + +| Group | Verifies | +|-------|----------| +| authn | valid sig 200; no/expired/wrong-iss/wrong-aud/unknown-key/unsigned/garbage token → 401 | +| bindings | no binding → 403; Viewer read-not-write; cross-workspace isolation | +| no-workspace-get | permission-stamped no-`{workspace}` GET requires the permission in `system`; permissionless sibling stays open | +| scopes | `auditor:read` token: GET 200 / POST 403; `:write` POST 201; OIDC-only scopes = full power (documented); agents-gateway read/write method split | +| caller-kind | service principal denied on `callers=[principal]` route (symmetric half); human & PlatformAdmin denied on service-only route; service no-match bypass pinned as documented behavior | +| fence | unenumerable plugin namespace denied for human/service/PlatformAdmin incl. bare prefix; unruled route denied for everyone while ruled sibling works | +| knobs | quarantine fences the whole offending plugin; exemption knob admits PlatformAdmin (and only PlatformAdmin) to service-only routes | + +Status-code conventions asserted throughout: **401** only when no identity was +established (missing/invalid token); **403** for every policy denial of an +authenticated principal. Two rows use a `not 403` oracle (agent-gateway proxy +404s on a nonexistent agent *after* authz passes; getting past the PDP is the +point). + +## Known limits + +- WebSocket routes are not enforced by the PDP middleware at all — deliberately + absent from the matrix. +- `X-NMP-Principal-*` headers remain a trusted identity channel in this + deployment shape; the harness never sends them, but does not prove they are + stripped (that's an ingress concern, out of authz scope). +- `hard_fail` mode aborts bundle build (auth service degraded) — its + observable is process health, not a per-request status; not asserted here. diff --git a/e2e/authz_oidc/conftest.py b/e2e/authz_oidc/conftest.py new file mode 100644 index 0000000000..c1162f684c --- /dev/null +++ b/e2e/authz_oidc/conftest.py @@ -0,0 +1,324 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Fixtures for the authz OIDC E2E harness. + +Spawns dedicated ``nemo services run`` instances (fresh tmp data dir, free +port, this checkout's code) configured for **native OIDC only**: +``auth.enabled=true``, ``oidc.enabled=true`` pointing at the in-harness +issuer, and — critically — ``allow_unsigned_jwt=false``, so every identity in +the matrix is established by a real RS256-signed JWT. ``X-NMP-Principal-*`` +headers are never sent; provisioning itself authenticates with a signed JWT +whose ``sub`` is ``service:e2e-harness``. + +Two platform phases (both lazy, session-scoped): + +- ``platform`` — default authz knobs (``on_invalid_plugin=deny_route``, + PlatformAdmin not exempt from service-only routes). +- ``platform_knobs`` — ``on_invalid_plugin=quarantine`` + + ``platform_admin_exempt_from_service_only=true``. + +Run: ``pytest e2e/authz_oidc -v --run-e2e`` (see README.md). +""" + +from __future__ import annotations + +import json +import logging +import os +import socket +import subprocess +import sys +import time +from collections.abc import Generator, Iterator +from contextlib import closing +from pathlib import Path + +import httpx +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from harness import ADMIN_EMAIL, WS_A, WS_B, Platform # noqa: E402 +from idp import DEFAULT_AUDIENCE, MiniOIDCIssuer # noqa: E402 +from report import ReportCollector # noqa: E402 + +logger = logging.getLogger(__name__) + +_HERE = Path(__file__).resolve().parent +_REPO_ROOT = _HERE.parents[1] +_PLATFORM_CONFIG = _REPO_ROOT / "packages/nmp_platform/config/local.yaml" +_FIXTURE_PLUGINS = ["harness-fixture", "harness-unruled", "harness-broken"] + +_HEALTH_TIMEOUT = 180 +_PROVISION_TIMEOUT = 120 +_POLL = 1.0 + + +# --------------------------------------------------------------------------- # +# Issuer # +# --------------------------------------------------------------------------- # + + +@pytest.fixture(scope="session") +def issuer() -> Iterator[MiniOIDCIssuer]: + idp = MiniOIDCIssuer() + url = idp.start() + logger.info("Mini OIDC issuer serving at %s", url) + yield idp + idp.stop() + + +# --------------------------------------------------------------------------- # +# Platform process # +# --------------------------------------------------------------------------- # + + +def _free_port() -> int: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +def _ensure_fixture_plugins_installed() -> None: + """Editable-install the three fixture plugins into the active venv (idempotent).""" + from importlib.metadata import entry_points + + installed = {ep.name for ep in entry_points(group="nemo.services")} + missing = [p for p in _FIXTURE_PLUGINS if p not in installed] + if not missing: + return + pip_specs = [str(_HERE / "fixtures" / p) for p in missing] + subprocess.run( + ["uv", "pip", "install", "--python", sys.executable, *[f"-e{spec}" for spec in pip_specs]], + check=True, + capture_output=True, + timeout=300, + cwd=_REPO_ROOT, + ) + logger.info("Installed fixture plugins: %s", ", ".join(missing)) + + +def _platform_env(issuer_url: str, data_dir: Path, extra: dict[str, str]) -> dict[str, str]: + env = {k: v for k, v in os.environ.items() if not k.startswith(("NMP_", "DATABASE_"))} + env.update( + { + "NMP_CONFIG_FILE_PATH": str(_PLATFORM_CONFIG), + "NMP_CONFIG_WARNINGS_DISABLED": "1", + "NMP_DATA_DIR": str(data_dir), + "NMP_SEED_ON_STARTUP": "true", + "NMP_AUTH_ENABLED": "true", + "NMP_AUTH_ALLOW_UNSIGNED_JWT": "false", # defaults are true; signed JWTs only + "NMP_AUTH_OIDC_ENABLED": "true", + "NMP_AUTH_OIDC_ISSUER": issuer_url, + "NMP_AUTH_OIDC_AUDIENCE": DEFAULT_AUDIENCE, + "NMP_AUTH_ADMIN_EMAIL": ADMIN_EMAIL, + # bundle_cache_seconds must stay NONZERO: at 0 every PDP eval + # rebuilds policy data, and degraded fixture plugins are never + # cached — each eval then re-runs full plugin derivation + # and entity paging, blowing the 5s PDP timeout platform-wide. + # Fast background refresh + settle-probes handle propagation. + "NMP_AUTH_BUNDLE_CACHE_SECONDS": "5", + "NMP_AUTH_POLICY_DATA_REFRESH_INTERVAL": "2", + # FINDING (harness-discovered): branch rego exceeds the default + # embedded-PDP fuel budget (100M; config docstring says typical + # evals are 20-25M) once seeded principal data is loaded — every + # request 502s. Raised here to unblock; flagged for the branch. + "NMP_AUTH_EMBEDDED_PDP_CPU_LIMIT": "2000", + } + ) + env.update(extra) + return env + + +def _spawn_platform( + issuer: MiniOIDCIssuer, + tmp_path_factory: pytest.TempPathFactory, + label: str, + extra_env: dict[str, str], +) -> Generator[Platform, None, None]: + _ensure_fixture_plugins_installed() + port = _free_port() + base_url = f"http://127.0.0.1:{port}" + work = tmp_path_factory.mktemp(f"authz-e2e-{label}") + data_dir = work / "data" + data_dir.mkdir() + log_path = work / "services.log" + + nemo_bin = Path(sys.executable).parent / "nemo" + args = [ + str(nemo_bin), + "services", + "run", + "--service-group", + "all", + "--controller-group", + "all", + "--port", + str(port), + ] + env = _platform_env(issuer.issuer_url, data_dir, extra_env) + + logger.info("Spawning platform [%s] on %s (log: %s)", label, base_url, log_path) + with open(log_path, "w") as log_file: + proc = subprocess.Popen(args, stdout=log_file, stderr=subprocess.STDOUT, env=env) + try: + platform = Platform(base_url=base_url, issuer=issuer, log_path=log_path) + _wait_healthy(platform) + yield platform + finally: + proc.terminate() + try: + proc.wait(timeout=15) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait(timeout=5) + + +def _wait_healthy(platform: Platform) -> None: + deadline = time.monotonic() + _HEALTH_TIMEOUT + while time.monotonic() < deadline: + try: + if httpx.get(f"{platform.base_url}/health/ready", timeout=2.0).status_code == 200: + break + except httpx.RequestError: + pass + time.sleep(_POLL) + else: + pytest.fail(f"Platform on {platform.base_url} not healthy within {_HEALTH_TIMEOUT}s; log: {platform.log_path}") + + # Seeding runs as a startup task and may lag /health/ready: wait until the + # admin's PlatformAdmin binding is live via the OIDC path itself. + admin_token = platform.token("admin") + deadline = time.monotonic() + _PROVISION_TIMEOUT + while time.monotonic() < deadline: + resp = platform.request("GET", "/apis/entities/v2/workspaces", token=admin_token) + if resp.status_code == 200: + return + time.sleep(_POLL) + pytest.fail(f"Admin OIDC token not authorized within {_PROVISION_TIMEOUT}s; log: {platform.log_path}") + + +# --------------------------------------------------------------------------- # +# Provisioning (signed service JWT only — no principal headers anywhere) # +# --------------------------------------------------------------------------- # + + +def _provision(platform: Platform) -> None: + """Create workspaces + explicit role bindings; revoke the seeded wildcard. + + Everything authenticates as ``service:e2e-harness`` via a signed JWT: the + IAM role-binding endpoints are service-principal-only at the handler, and + a Bearer token whose ``sub`` starts with ``service:`` satisfies that + end-to-end (middleware builds Principal.id straight from the sub claim). + """ + token = platform.token("provisioner") + + def call(method: str, path: str, body: dict | None = None) -> httpx.Response: + resp = platform.request(method, path, token=token, body=body) + if resp.status_code >= 500: + raise AssertionError(f"provisioning {method} {path} -> {resp.status_code}: {resp.text[:300]}") + return resp + + # 1. Revoke the seeded wildcard '*' -> Viewer@system binding. With it in + # place every authenticated user holds all .read/.list permissions in + # the system workspace, which would make the no-workspace permission-deny rows untestable. + # Revocation must go through the generic entities API: both dedicated + # revocation endpoints are broken for this binding (IAM DELETE looks it + # up in the 'default' workspace; members DELETE filters on a + # data.workspace key that binding entities don't carry) — see report. + entity_path = "/apis/entities/v2/workspaces/system/entities/role_binding/wildcard-system-viewer" + entity = call("GET", entity_path) + assert entity.status_code == 200, f"seeded wildcard binding not found -> {entity.status_code}: {entity.text[:300]}" + payload = entity.json() + payload["data"]["revoked_at"] = "2026-01-01T00:00:00Z" + resp = call("PUT", entity_path, {"name": payload["name"], "data": payload["data"]}) + assert resp.status_code == 200, f"wildcard revoke -> {resp.status_code}: {resp.text[:300]}" + logger.info("Revoked seeded wildcard-system-viewer binding via entities API") + + # 2. Workspaces (creator auto-Admin binding is keyed to the service sub; harmless). + for ws in (WS_A, WS_B): + resp = call("POST", "/apis/entities/v2/workspaces", {"name": ws}) + assert resp.status_code == 201, f"workspace {ws} -> {resp.status_code}: {resp.text[:300]}" + + # 3. Explicit bindings (wait_role_propagation defaults to true -> synchronous). + for principal, workspace, role in ( + ("alice@harness.test", WS_A, "Editor"), + ("victor@harness.test", WS_A, "Viewer"), + ("sam@harness.test", "system", "Viewer"), + ): + resp = call( + "POST", "/apis/auth/v2/iam/role-bindings", {"principal": principal, "workspace": workspace, "role": role} + ) + assert resp.status_code in (200, 201), ( + f"binding {principal}/{role}@{workspace} -> {resp.status_code}: {resp.text[:300]}" + ) + + # 4. Settle probe: alice's Editor binding effective AND the wildcard + # revocation propagated (alice must now be denied on the no-workspace permission route). + alice = platform.token("alice") + deadline = time.monotonic() + _PROVISION_TIMEOUT + while time.monotonic() < deadline: + ok = platform.request("GET", f"/apis/auditor/v2/workspaces/{WS_A}/targets", token=alice).status_code == 200 + revoked = platform.request("GET", "/apis/entities/v2/workspaces", token=alice).status_code == 403 + if ok and revoked: + return + time.sleep(_POLL) + raise AssertionError("Provisioned bindings did not settle (alice 200 on wsA targets + 403 on workspaces list)") + + +# --------------------------------------------------------------------------- # +# Session fixtures # +# --------------------------------------------------------------------------- # + + +@pytest.fixture(scope="session") +def platform(issuer: MiniOIDCIssuer, tmp_path_factory: pytest.TempPathFactory) -> Iterator[Platform]: + """Default-knob platform, fully provisioned.""" + gen = _spawn_platform(issuer, tmp_path_factory, "default", {}) + with closing(gen): + p = next(gen) + _provision(p) + yield p + + +@pytest.fixture(scope="session") +def platform_knobs(issuer: MiniOIDCIssuer, tmp_path_factory: pytest.TempPathFactory) -> Iterator[Platform]: + """Quarantine + PlatformAdmin-exemption knob platform (no extra provisioning).""" + gen = _spawn_platform( + issuer, + tmp_path_factory, + "knobs", + { + "NMP_AUTH_ON_INVALID_PLUGIN": "quarantine", + "NMP_AUTH_PLATFORM_ADMIN_EXEMPT_FROM_SERVICE_ONLY": "true", + }, + ) + with closing(gen): + p = next(gen) + yield p + + +# --------------------------------------------------------------------------- # +# Audit report # +# --------------------------------------------------------------------------- # + + +_REPORT_KEY = pytest.StashKey[ReportCollector]() + + +@pytest.fixture(scope="session") +def report(request: pytest.FixtureRequest) -> ReportCollector: + collector = ReportCollector() + request.session.stash[_REPORT_KEY] = collector + return collector + + +def pytest_sessionfinish(session: pytest.Session) -> None: + collector = session.stash.get(_REPORT_KEY, None) + if collector and collector.rows: + out = _HERE / "AUTHZ_E2E_REPORT.md" + out.write_text(collector.render()) + json_out = _HERE / "AUTHZ_E2E_REPORT.json" + json_out.write_text(json.dumps(collector.as_json(), indent=2)) + print(f"\nAuthz E2E audit report: {out} ({len(collector.rows)} cases)") diff --git a/e2e/authz_oidc/fixtures/harness-broken/pyproject.toml b/e2e/authz_oidc/fixtures/harness-broken/pyproject.toml new file mode 100644 index 0000000000..85585e680b --- /dev/null +++ b/e2e/authz_oidc/fixtures/harness-broken/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "harness-broken" +version = "0.0.1" +description = "E2E authz fixture plugin: import-time failure (unenumerable plugin -> namespace fence)." +requires-python = ">=3.11" +dependencies = [] + +[project.entry-points."nemo.services"] +harness-broken = "harness_broken.service:BrokenService" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/harness_broken"] diff --git a/e2e/authz_oidc/fixtures/harness-broken/src/harness_broken/service.py b/e2e/authz_oidc/fixtures/harness-broken/src/harness_broken/service.py new file mode 100644 index 0000000000..3517a79782 --- /dev/null +++ b/e2e/authz_oidc/fixtures/harness-broken/src/harness_broken/service.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Unenumerable fixture plugin for the authz OIDC E2E harness. + +The import fails on purpose, so authz derivation enumerates zero endpoints +for this plugin. The bundle must fence the whole +``/apis/harness-broken`` namespace (including the bare prefix) with an +explicit deny for every caller kind — service principals included, which is +the no-match-bypass hole the fence exists to close. The platform runner's +fault-isolated ``discover()`` skips the plugin, so the platform itself keeps +running. +""" + +raise RuntimeError("harness-broken: deliberate import failure for authz fence verification") + + +class BrokenService: # pragma: no cover - unreachable past the raise above + name = "harness-broken" diff --git a/e2e/authz_oidc/fixtures/harness-fixture/pyproject.toml b/e2e/authz_oidc/fixtures/harness-fixture/pyproject.toml new file mode 100644 index 0000000000..4fcb678cc8 --- /dev/null +++ b/e2e/authz_oidc/fixtures/harness-fixture/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "harness-fixture" +version = "0.0.1" +description = "E2E authz fixture plugin: clean derivation with a SERVICE_PRINCIPAL-only route." +requires-python = ">=3.11" +dependencies = [] + +[project.entry-points."nemo.services"] +harness-fixture = "harness_fixture.service:HarnessFixtureService" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/harness_fixture"] diff --git a/e2e/authz_oidc/fixtures/harness-fixture/src/harness_fixture/service.py b/e2e/authz_oidc/fixtures/harness-fixture/src/harness_fixture/service.py new file mode 100644 index 0000000000..949da14037 --- /dev/null +++ b/e2e/authz_oidc/fixtures/harness-fixture/src/harness_fixture/service.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Clean fixture plugin for the authz OIDC E2E harness. + +No shipped plugin declares a ``SERVICE_PRINCIPAL``-only route, so the +caller-kind service-only deny is not observable on the +stock surface. This plugin provides: + +- ``GET /apis/harness-fixture/probe/service-only`` — ``callers=[SERVICE_PRINCIPAL]``: + humans (including PlatformAdmin, unless the exemption knob is set) must be + denied; service principals allowed. +- ``GET /apis/harness-fixture/probe/open`` — ``callers=[PRINCIPAL]``, no + permissions: control proving the plugin is mounted and a plain + authenticated user reaches it (so the service-only 403 is meaningful). +""" + +from __future__ import annotations + +from typing import ClassVar + +from fastapi import APIRouter +from nemo_platform_plugin.authz import CallerKind, path_rule +from nemo_platform_plugin.service import NemoService, RouterSpec + +router = APIRouter() + + +@router.get("/probe/service-only") +@path_rule(callers=[CallerKind.SERVICE_PRINCIPAL]) +async def probe_service_only() -> dict[str, str]: + """Reachable only by service principals (callers=[SERVICE_PRINCIPAL]).""" + return {"probe": "service-only", "status": "ok"} + + +@router.get("/probe/open") +@path_rule(callers=[CallerKind.PRINCIPAL]) +async def probe_open() -> dict[str, str]: + """Reachable by any authenticated principal (mounted-and-working control).""" + return {"probe": "open", "status": "ok"} + + +class HarnessFixtureService(NemoService): + """Minimal clean service: one service-only route, one open control route.""" + + name: ClassVar[str] = "harness-fixture" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router=router, tag="Authz E2E Fixture")] diff --git a/e2e/authz_oidc/fixtures/harness-unruled/pyproject.toml b/e2e/authz_oidc/fixtures/harness-unruled/pyproject.toml new file mode 100644 index 0000000000..87924f0e43 --- /dev/null +++ b/e2e/authz_oidc/fixtures/harness-unruled/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "harness-unruled" +version = "0.0.1" +description = "E2E authz fixture plugin: one ruled route + one unruled route (deny_route containment / quarantine subject)." +requires-python = ">=3.11" +dependencies = [] + +[project.entry-points."nemo.services"] +harness-unruled = "harness_unruled.service:HarnessUnruledService" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/harness_unruled"] diff --git a/e2e/authz_oidc/fixtures/harness-unruled/src/harness_unruled/service.py b/e2e/authz_oidc/fixtures/harness-unruled/src/harness_unruled/service.py new file mode 100644 index 0000000000..744051399a --- /dev/null +++ b/e2e/authz_oidc/fixtures/harness-unruled/src/harness_unruled/service.py @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Partially-invalid fixture plugin for the authz OIDC E2E harness. + +One route carries a valid ``@path_rule``; one deliberately does not. Under the +default ``on_invalid_plugin=deny_route`` fail mode (decision D4) the unruled +route must be explicitly denied for *every* caller — human with permissions, +service principal (``ServiceSystem`` wildcard), and PlatformAdmin — while the +ruled sibling keeps working. Under ``on_invalid_plugin=quarantine`` the whole +``/apis/harness-unruled`` namespace is fenced, ruled route included. +""" + +from __future__ import annotations + +from typing import ClassVar + +from fastapi import APIRouter +from nemo_platform_plugin.authz import CallerKind, path_rule +from nemo_platform_plugin.service import NemoService, RouterSpec + +router = APIRouter() + + +@router.get("/ruled") +@path_rule(callers=[CallerKind.PRINCIPAL]) +async def ruled() -> dict[str, str]: + """Properly annotated control route.""" + return {"route": "ruled", "status": "ok"} + + +@router.get("/unruled") +async def unruled() -> dict[str, str]: + """Deliberately missing @path_rule — must be denied for everyone (fail-closed).""" + return {"route": "unruled", "status": "you should never see this"} + + +class HarnessUnruledService(NemoService): + """Service with a deliberate authoring error on exactly one route.""" + + name: ClassVar[str] = "harness-unruled" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router=router, tag="Authz E2E Unruled Fixture")] diff --git a/e2e/authz_oidc/harness.py b/e2e/authz_oidc/harness.py new file mode 100644 index 0000000000..ac074810ba --- /dev/null +++ b/e2e/authz_oidc/harness.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Shared harness state: identities, workspace names, and the platform handle.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import httpx +from idp import MiniOIDCIssuer, MintSpec + +ADMIN_EMAIL = "admin@harness.test" +WS_A = "authz-e2e-wsa" +WS_B = "authz-e2e-wsb" + +# sub / email for every identity in the matrix (bindings provisioned in conftest). +IDENTITIES: dict[str, dict] = { + "admin": {"sub": "usr-admin", "email": ADMIN_EMAIL}, + "alice": {"sub": "usr-alice", "email": "alice@harness.test"}, + "victor": {"sub": "usr-victor", "email": "victor@harness.test"}, + "sam": {"sub": "usr-sam", "email": "sam@harness.test"}, + "nobody": {"sub": "usr-nobody", "email": "nobody@harness.test"}, + "service": {"sub": "service:probe", "email": None}, + "provisioner": {"sub": "service:e2e-harness", "email": None}, +} + + +@dataclass +class Platform: + """Handle to a spawned ``nemo services run`` instance under test.""" + + base_url: str + issuer: MiniOIDCIssuer + log_path: Path + + def token(self, identity: str, *, scopes: list[str] | None = None, **overrides) -> str: + info = IDENTITIES[identity] + spec = MintSpec(sub=info["sub"], email=info["email"], scopes=scopes or [], **overrides) + return self.issuer.mint(spec) + + def request( + self, + method: str, + path: str, + *, + token: str | None, + body: dict | None = None, + ) -> httpx.Response: + headers = {} + if token is not None: + headers["Authorization"] = f"Bearer {token}" + return httpx.request( + method, + f"{self.base_url}{path}", + headers=headers, + json=body, + timeout=30.0, + ) diff --git a/e2e/authz_oidc/idp.py b/e2e/authz_oidc/idp.py new file mode 100644 index 0000000000..555d601b92 --- /dev/null +++ b/e2e/authz_oidc/idp.py @@ -0,0 +1,172 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Minimal test OIDC issuer for the authz E2E harness. + +Serves the two endpoints ``nmp.common.auth.jwt.JWTValidator`` actually +consumes — ``/.well-known/openid-configuration`` and a JWKS document — over +real HTTP, and mints real RS256-signed JWTs. Token-defect cases (expired, +wrong issuer, wrong audience, unknown signing key, ``alg=none``) are minted +directly; a production IdP cannot produce most of these on demand, which is +why the harness owns its issuer instead of running Dex/Keycloak. + +Two RSA keys are generated per session: + +- ``KID_ACTIVE`` — published in the JWKS; signs every valid token. +- ``KID_ROGUE`` — never published; signs the "unknown signing key" case. +""" + +from __future__ import annotations + +import base64 +import json +import threading +import time +from dataclasses import dataclass, field +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +import jwt as pyjwt +from cryptography.hazmat.primitives.asymmetric import rsa +from jwt.algorithms import RSAAlgorithm + +KID_ACTIVE = "e2e-active-key" +KID_ROGUE = "e2e-rogue-key" + +DEFAULT_AUDIENCE = "nmp-e2e-authz" + + +@dataclass +class MintSpec: + """Declarative description of a token to mint (defaults = a valid token).""" + + sub: str + email: str | None = None + groups: list[str] = field(default_factory=list) + scopes: list[str] = field(default_factory=list) + issuer: str | None = None # None -> the real issuer URL + audience: str | None = DEFAULT_AUDIENCE + expires_in: int = 3600 # negative -> already expired + kid: str = KID_ACTIVE # KID_ROGUE -> signature by an unpublished key + unsigned: bool = False # True -> alg=none token + + +class MiniOIDCIssuer: + """A real-HTTP OIDC issuer: discovery + JWKS + RS256 minting.""" + + def __init__(self) -> None: + self._keys = { + KID_ACTIVE: rsa.generate_private_key(public_exponent=65537, key_size=2048), + KID_ROGUE: rsa.generate_private_key(public_exponent=65537, key_size=2048), + } + self._server: ThreadingHTTPServer | None = None + self._thread: threading.Thread | None = None + self.issuer_url: str = "" + + # -- HTTP surface ------------------------------------------------------ + + def _jwks_document(self) -> dict: + # Only the active key is published; the rogue key stays private. + jwk = json.loads(RSAAlgorithm.to_jwk(self._keys[KID_ACTIVE].public_key())) + jwk.update({"kid": KID_ACTIVE, "use": "sig", "alg": "RS256"}) + return {"keys": [jwk]} + + def _discovery_document(self) -> dict: + return { + "issuer": self.issuer_url, + "jwks_uri": f"{self.issuer_url}/jwks.json", + "authorization_endpoint": f"{self.issuer_url}/authorize", + "token_endpoint": f"{self.issuer_url}/token", + "device_authorization_endpoint": f"{self.issuer_url}/device", + "response_types_supported": ["code"], + "subject_types_supported": ["public"], + "id_token_signing_alg_values_supported": ["RS256"], + "scopes_supported": ["openid", "profile", "email"], + } + + def start(self) -> str: + issuer = self + + class Handler(BaseHTTPRequestHandler): + def do_GET(self) -> None: # noqa: N802 - http.server API + routes = { + "/.well-known/openid-configuration": issuer._discovery_document, + "/jwks.json": issuer._jwks_document, + } + builder = routes.get(self.path.split("?")[0]) + if builder is None: + self.send_response(404) + self.end_headers() + return + body = json.dumps(builder()).encode() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, format: str, *args: object) -> None: # noqa: A002 - http.server API + pass # keep pytest output clean + + self._server = ThreadingHTTPServer(("127.0.0.1", 0), Handler) + self.issuer_url = f"http://127.0.0.1:{self._server.server_port}" + self._thread = threading.Thread(target=self._server.serve_forever, daemon=True) + self._thread.start() + return self.issuer_url + + def stop(self) -> None: + if self._server: + self._server.shutdown() + self._server.server_close() + if self._thread: + self._thread.join(timeout=5) + self._server = None + self._thread = None + self.issuer_url = "" + + # -- Token minting ------------------------------------------------------- + + def mint(self, spec: MintSpec) -> str: + now = int(time.time()) + claims: dict[str, object] = { + "sub": spec.sub, + "iat": now - 5, + "exp": now + spec.expires_in, + "iss": spec.issuer if spec.issuer is not None else self.issuer_url, + } + if spec.audience is not None: + claims["aud"] = spec.audience + if spec.email is not None: + claims["email"] = spec.email + if spec.groups: + claims["groups"] = spec.groups + if spec.scopes: + claims["scope"] = " ".join(spec.scopes) + + if spec.unsigned: + # Same wire format the SDK's generate_unsigned_jwt produces: + # base64url(header).base64url(claims). with an empty signature. + def b64(obj: dict) -> str: + raw = json.dumps(obj, separators=(",", ":")).encode() + return base64.urlsafe_b64encode(raw).rstrip(b"=").decode() + + return f"{b64({'alg': 'none', 'typ': 'JWT'})}.{b64(claims)}." + return pyjwt.encode(claims, self._keys[spec.kid], algorithm="RS256", headers={"kid": spec.kid}) + + def claims_summary(self, spec: MintSpec) -> str: + """One-line human description of the minted claims for the audit report.""" + parts = [f"sub={spec.sub}"] + if spec.email: + parts.append(f"email={spec.email}") + if spec.scopes: + parts.append(f"scope={' '.join(spec.scopes)}") + if spec.expires_in <= 0: + parts.append("EXPIRED") + if spec.issuer is not None: + parts.append(f"iss={spec.issuer}") + if spec.audience != DEFAULT_AUDIENCE: + parts.append(f"aud={spec.audience}") + if spec.kid != KID_ACTIVE: + parts.append("key=unpublished") + if spec.unsigned: + parts.append("alg=none") + return ", ".join(parts) diff --git a/e2e/authz_oidc/matrix.py b/e2e/authz_oidc/matrix.py new file mode 100644 index 0000000000..89c71c9023 --- /dev/null +++ b/e2e/authz_oidc/matrix.py @@ -0,0 +1,474 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""The authz verification matrix: request + identity -> expected status. + +Each case is one HTTP request against the running platform, authenticated by a +real signed JWT (or a deliberately defective one). ``expected`` is either a set +of acceptable status codes, or ``NOT_403`` for rows where the authz decision is +the oracle but the downstream handler's status is environment-dependent (e.g. +the agent-gateway proxy 404s on a nonexistent agent *after* authorization +passes — getting past the PDP is exactly what the row proves). + +Identity keys map to tokens minted by the session issuer (see conftest): + +- ``admin`` sub=usr-admin, email matching auth.admin_email -> PlatformAdmin@system (seeded) +- ``alice`` sub=usr-alice, email alice@harness.test -> Editor@ (provisioned) +- ``victor`` sub=usr-victor, email victor@harness.test -> Viewer@ (provisioned) +- ``sam`` sub=usr-sam, email sam@harness.test -> Viewer@system (provisioned) +- ``nobody`` sub=usr-nobody, email nobody@harness.test -> no bindings anywhere +- ``service`` sub=service:probe -> service principal (ServiceSystem '*' default) +- ``provisioner`` sub=service:e2e-harness -> service principal used for setup +- ``anonymous`` no Authorization header at all + +Workspace placeholders ``{wsA}``/``{wsB}`` are substituted by the test with the +session's provisioned workspace names. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass, field +from typing import Literal + +NOT_403 = "not-403" + +# Paths under test (placeholders substituted at runtime). +TARGETS = "/apis/auditor/v2/workspaces/{wsA}/targets" +TARGETS_B = "/apis/auditor/v2/workspaces/{wsB}/targets" +WORKSPACES = "/apis/entities/v2/workspaces" +EVAL_HELLO = "/apis/evaluator/v1/hello/world" +EVAL_HEALTHZ = "/apis/evaluator/v1/healthz" +GATEWAY = "/apis/agents/v2/workspaces/{wsA}/agents/ghost-agent/-/health" +SERVICE_ONLY = "/apis/harness-fixture/probe/service-only" +FIXTURE_OPEN = "/apis/harness-fixture/probe/open" +UNRULED_OK = "/apis/harness-unruled/ruled" +UNRULED_BAD = "/apis/harness-unruled/unruled" +BROKEN_SUB = "/apis/harness-broken/anything" +BROKEN_BARE = "/apis/harness-broken" +UNKNOWN_PATH = "/apis/auditor/v2/path-that-matches-no-rule" +IAM_BINDINGS = "/apis/auth/v2/iam/role-bindings" +PDP_ALLOW = "/apis/auth/v2/authz/allow" + +TARGET_BODY = {"name": "e2e-authz-{case}", "type": "openai", "model": "gpt-test"} + + +@dataclass(frozen=True) +class Case: + id: str + group: str + description: str + method: str + path: str + identity: str + expected: set[int] | Literal["not-403"] # exact acceptable codes, or the NOT_403 sentinel + scopes: list[str] = field(default_factory=list) + token_defect: str | None = None # expired|wrong-issuer|wrong-audience|unknown-key|unsigned|garbage + body: Mapping[str, object] | None = None + phase: str = "default" + notes: str = "" + + +MATRIX: list[Case] = [ + # ------------------------------------------------------------------ # + # A. Token validity (authentication). Target endpoint is one alice # + # is fully authorized for, so ONLY the token defect varies. # + # ------------------------------------------------------------------ # + Case("A1", "authn", "Valid signed token, authorized principal", "GET", TARGETS, "alice", {200}), + Case("A2", "authn", "No token at all", "GET", TARGETS, "anonymous", {401}), + Case("A3", "authn", "Expired token", "GET", TARGETS, "alice", {401}, token_defect="expired"), + Case( + "A4", + "authn", + "Wrong issuer (signed by our key)", + "GET", + TARGETS, + "alice", + {401}, + token_defect="wrong-issuer", + ), + Case("A5", "authn", "Wrong audience", "GET", TARGETS, "alice", {401}, token_defect="wrong-audience"), + Case( + "A6", + "authn", + "Signed by a key absent from JWKS", + "GET", + TARGETS, + "alice", + {401}, + token_defect="unknown-key", + ), + Case( + "A7", + "authn", + "Unsigned alg=none token (allow_unsigned_jwt=false)", + "GET", + TARGETS, + "alice", + {401}, + token_defect="unsigned", + ), + Case("A8", "authn", "Garbage bearer string", "GET", TARGETS, "alice", {401}, token_defect="garbage"), + # ------------------------------------------------------------------ # + # B. Role bindings & workspace isolation (authorization basics). # + # ------------------------------------------------------------------ # + Case("B1", "bindings", "Valid signature, zero role bindings", "GET", TARGETS, "nobody", {403}), + Case( + "B2", + "bindings", + "Editor can write (control for scope rows)", + "POST", + TARGETS, + "alice", + {201}, + body=TARGET_BODY, + ), + Case("B3", "bindings", "Viewer reads OK", "GET", TARGETS, "victor", {200}), + Case("B4", "bindings", "Viewer denied on write", "POST", TARGETS, "victor", {403}, body=TARGET_BODY), + Case("B5", "bindings", "Cross-workspace: Editor in wsA denied in wsB", "GET", TARGETS_B, "alice", {403}), + # ------------------------------------------------------------------ # + # C. permission-stamped no-{workspace} GETs require the # + # permission (in the system workspace), not mere authentication. # + # Requires the seeded wildcard Viewer@system binding to be revoked. # + # ------------------------------------------------------------------ # + Case( + "C1", + "no-workspace-get", + "workspaces.list required: no system role -> deny", + "GET", + WORKSPACES, + "alice", + {403}, + ), + Case("C2", "no-workspace-get", "workspaces.list via Viewer@system -> allow", "GET", WORKSPACES, "sam", {200}), + Case( + "C3", + "no-workspace-get", + "evaluator.hello.read: no system role -> deny", + "GET", + EVAL_HELLO, + "alice", + {403}, + ), + Case( + "C4", + "no-workspace-get", + "evaluator.hello.read via Viewer@system -> allow", + "GET", + EVAL_HELLO, + "sam", + {200}, + ), + Case( + "C5", + "no-workspace-get", + "Permissionless no-workspace GET stays open to any authenticated user (control)", + "GET", + EVAL_HEALTHZ, + "alice", + {200}, + ), + Case( + "C6", + "no-workspace-get", + "Permissionless no-workspace GET still requires authentication", + "GET", + EVAL_HEALTHZ, + "anonymous", + {401}, + ), + # ------------------------------------------------------------------ # + # D. read-scoped vs write-scoped tokens. alice is Editor@wsA # + # (holds the permissions) — only the token's scope claim varies. # + # ------------------------------------------------------------------ # + Case( + "D1", + "scopes", + "auditor:read scope allows GET", + "GET", + TARGETS, + "alice", + {200}, + scopes=["auditor:read"], + ), + Case( + "D2", + "scopes", + "auditor:read scope denies POST", + "POST", + TARGETS, + "alice", + {403}, + scopes=["auditor:read"], + body=TARGET_BODY, + ), + Case( + "D3", + "scopes", + "auditor:write scope allows POST", + "POST", + TARGETS, + "alice", + {201}, + scopes=["auditor:write"], + body=TARGET_BODY, + ), + Case( + "D4", + "scopes", + "OIDC-only scopes (no area:verb) = full power, documented", + "POST", + TARGETS, + "alice", + {201}, + scopes=["openid", "profile", "email"], + body=TARGET_BODY, + notes="scopes.rego: tokens with no colon-scopes skip the scope gate by design", + ), + Case( + "D5", + "scopes", + "Gateway read method passes with agents:read (authz oracle: not 403)", + "GET", + GATEWAY, + "alice", + NOT_403, + scopes=["agents:read"], + notes="proxy 404s on the nonexistent agent AFTER authorization passes", + ), + Case( + "D6", + "scopes", + "Gateway write method denied with agents:read", + "POST", + GATEWAY, + "alice", + {403}, + scopes=["agents:read"], + body={}, + ), + Case( + "D7", + "scopes", + "Gateway write method passes with agents:write (authz oracle: not 403)", + "POST", + GATEWAY, + "alice", + NOT_403, + scopes=["agents:write"], + body={}, + ), + # ------------------------------------------------------------------ # + # E. symmetric caller-kind enforcement. # + # ------------------------------------------------------------------ # + Case( + "E1", + "caller-kind", + "Service principal denied on callers=[principal] route", + "GET", + TARGETS, + "service", + {403}, + notes="a service principal would otherwise pass via the ServiceSystem '*' wildcard", + ), + Case( + "E2", + "caller-kind", + "Service principal allowed on service-only route", + "GET", + SERVICE_ONLY, + "service", + {200}, + ), + Case( + "E3", + "caller-kind", + "Human denied on service-only route (holds no relevant permission)", + "GET", + SERVICE_ONLY, + "alice", + {403}, + ), + Case( + "E4", + "caller-kind", + "PlatformAdmin denied on service-only route (exemption knob default false)", + "GET", + SERVICE_ONLY, + "admin", + {403}, + ), + Case( + "E5", + "caller-kind", + "Fixture plugin mounted + open route works (control)", + "GET", + FIXTURE_OPEN, + "alice", + {200}, + ), + Case( + "E6", + "caller-kind", + "Service no-match bypass pinned: unknown path under healthy plugin -> authz passes (404)", + "GET", + UNKNOWN_PATH, + "service", + NOT_403, + notes="documents the deliberate service:* bypass for unmatched paths", + ), + Case("E7", "caller-kind", "Human denied on same unknown path", "GET", UNKNOWN_PATH, "alice", {403}), + Case( + "E8", + "caller-kind", + "Human denied on IAM role-bindings API (service-principal-only handler)", + "POST", + IAM_BINDINGS, + "alice", + {403}, + body={"principal": "x@harness.test", "workspace": "default", "role": "Viewer"}, + ), + Case( + "E9", + "caller-kind", + "Service JWT (sub=service:*) accepted by IAM API — provisioning ran on signed JWTs", + "GET", + IAM_BINDINGS, + "provisioner", + {200}, + ), + Case( + "E10", + "caller-kind", + "PDP entrypoint rejects Bearer identity (header-principal only)", + "POST", + PDP_ALLOW, + "provisioner", + {401}, + body={"input": {}}, + notes="middleware consults only X-NMP-Principal-Id on /apis/auth/v2/authz/*", + ), + # ------------------------------------------------------------------ # + # F. plugin fence & deny_route containment. # + # ------------------------------------------------------------------ # + Case( + "F1", + "fence", + "Unenumerable plugin: human denied under fenced namespace", + "GET", + BROKEN_SUB, + "alice", + {403}, + ), + Case( + "F2", + "fence", + "Unenumerable plugin: SERVICE principal denied (no-match bypass closed)", + "GET", + BROKEN_SUB, + "service", + {403}, + ), + Case("F3", "fence", "Unenumerable plugin: PlatformAdmin denied", "GET", BROKEN_SUB, "admin", {403}), + Case( + "F4", + "fence", + "Bare fenced prefix also denied for service principal", + "GET", + BROKEN_BARE, + "service", + {403}, + ), + Case( + "F5", + "fence", + "deny_route containment: ruled sibling route still works", + "GET", + UNRULED_OK, + "alice", + {200}, + ), + Case("F6", "fence", "Unruled route denied for human", "GET", UNRULED_BAD, "alice", {403}), + Case( + "F7", + "fence", + "Unruled route denied for service principal (overrides '*')", + "GET", + UNRULED_BAD, + "service", + {403}, + ), + Case( + "F8", + "fence", + "Unruled route denied for PlatformAdmin (overrides bypass)", + "GET", + UNRULED_BAD, + "admin", + {403}, + ), + # ------------------------------------------------------------------ # + # G. Knob phase: on_invalid_plugin=quarantine + # + # platform_admin_exempt_from_service_only=true (restarted platform). # + # ------------------------------------------------------------------ # + Case( + "G1", + "knobs", + "Quarantine: ruled route of the offending plugin now fenced too", + "GET", + UNRULED_OK, + "service", + {403}, + phase="knobs", + ), + Case( + "G2", + "knobs", + "Quarantine: PlatformAdmin denied on quarantined namespace", + "GET", + UNRULED_OK, + "admin", + {403}, + phase="knobs", + ), + Case( + "G3", + "knobs", + "Exemption knob: PlatformAdmin now allowed on service-only route", + "GET", + SERVICE_ONLY, + "admin", + {200}, + phase="knobs", + ), + Case( + "G4", + "knobs", + "Exemption knob does NOT extend to plain humans", + "GET", + SERVICE_ONLY, + "nobody", + {403}, + phase="knobs", + ), + Case( + "G5", + "knobs", + "Service principal still allowed on service-only route (control)", + "GET", + SERVICE_ONLY, + "service", + {200}, + phase="knobs", + ), + Case( + "G6", + "knobs", + "Platform sanity under knob phase (admin lists workspaces)", + "GET", + WORKSPACES, + "admin", + {200}, + phase="knobs", + ), +] diff --git a/e2e/authz_oidc/report.py b/e2e/authz_oidc/report.py new file mode 100644 index 0000000000..ab9e054264 --- /dev/null +++ b/e2e/authz_oidc/report.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Audit-report collector: request -> token claims -> expected -> observed.""" + +from __future__ import annotations + +import datetime +from dataclasses import asdict, dataclass, field + + +@dataclass +class Row: + case_id: str + group: str + description: str + method: str + path: str + identity: str + claims: str + expected: str + observed: int + passed: bool + phase: str + notes: str = "" + + +@dataclass +class ReportCollector: + rows: list[Row] = field(default_factory=list) + + def record(self, row: Row) -> None: + self.rows.append(row) + + def as_json(self) -> list[dict]: + return [asdict(r) for r in sorted(self.rows, key=lambda r: r.case_id)] + + def render(self) -> str: + ts = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M UTC") + total = len(self.rows) + passed = sum(r.passed for r in self.rows) + lines = [ + "# Authz E2E verification report (real OIDC, signed JWTs)", + "", + f"Generated {ts} by `e2e/authz_oidc` — {passed}/{total} cases passed.", + "", + "Identity for every request is an RS256-signed JWT minted by the in-harness", + "OIDC issuer and validated by the platform via JWKS discovery", + "(`auth.allow_unsigned_jwt=false`; no `X-NMP-Principal-*` headers anywhere).", + "", + ] + groups: dict[str, list[Row]] = {} + for row in sorted(self.rows, key=lambda r: r.case_id): + groups.setdefault(row.group, []).append(row) + for group, rows in groups.items(): + lines += [ + f"## {group} ({rows[0].phase} phase)" if all(r.phase == rows[0].phase for r in rows) else f"## {group}", + "", + ] + lines += [ + "| case | request | identity (claims) | expected | observed | result |", + "|------|---------|-------------------|----------|----------|--------|", + ] + for r in rows: + result = "PASS" if r.passed else "**FAIL**" + req = f"`{r.method} {r.path}`" + lines.append( + f"| {r.case_id} | {req} | {r.identity}: {r.claims} | {r.expected} | {r.observed} | {result} |" + ) + lines.append("") + for r in rows: + if r.notes or not r.passed: + note = r.notes or r.description + lines.append(f"- **{r.case_id}** — {r.description}. {note if r.notes else ''}".rstrip()) + lines.append("") + return "\n".join(lines) diff --git a/e2e/authz_oidc/test_authz_matrix.py b/e2e/authz_oidc/test_authz_matrix.py new file mode 100644 index 0000000000..1c1c734572 --- /dev/null +++ b/e2e/authz_oidc/test_authz_matrix.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Parametrized authz verification matrix (see matrix.py for the case list).""" + +from __future__ import annotations + +import pytest +from harness import IDENTITIES, WS_A, WS_B, Platform +from idp import KID_ROGUE, MintSpec +from matrix import MATRIX, Case +from report import ReportCollector, Row + +pytestmark = pytest.mark.e2e + +_DEFECT_OVERRIDES: dict[str, dict] = { + "expired": {"expires_in": -3600}, + "wrong-issuer": {"issuer": "http://127.0.0.1:1/evil-issuer"}, + "wrong-audience": {"audience": "some-other-audience"}, + "unknown-key": {"kid": KID_ROGUE}, + "unsigned": {"unsigned": True}, +} + + +def _mint_token(platform: Platform, case: Case) -> tuple[str | None, str]: + """Return (token, claims-description) for the case's identity + defect.""" + if case.identity == "anonymous": + return None, "(no Authorization header)" + if case.token_defect == "garbage": + return "not-a-jwt-at-all", "(garbage bearer string)" + + info = IDENTITIES[case.identity] + overrides = dict(_DEFECT_OVERRIDES.get(case.token_defect or "", {})) + spec = MintSpec(sub=info["sub"], email=info["email"], scopes=case.scopes, **overrides) + return platform.issuer.mint(spec), platform.issuer.claims_summary(spec) + + +def _run_case(platform: Platform, case: Case, report: ReportCollector) -> None: + token, claims = _mint_token(platform, case) + path = case.path.format(wsA=WS_A, wsB=WS_B) + body = case.body + if body is not None: + body = {k: (v.format(case=case.id.lower()) if isinstance(v, str) else v) for k, v in body.items()} + + response = platform.request(case.method, path, token=token, body=body) + observed = response.status_code + + if isinstance(case.expected, str): + passed = observed != 403 + expected_str = "not 403" + else: + passed = observed in case.expected + expected_str = "/".join(str(c) for c in sorted(case.expected)) + + report.record( + Row( + case_id=case.id, + group=case.group, + description=case.description, + method=case.method, + path=path, + identity=case.identity, + claims=claims, + expected=expected_str, + observed=observed, + passed=passed, + phase=case.phase, + notes=case.notes, + ) + ) + assert passed, ( + f"[{case.id}] {case.description}: expected {expected_str}, observed {observed} " + f"for {case.method} {path} as {case.identity} ({claims}); body: {response.text[:300]}" + ) + + +@pytest.mark.parametrize("case", [c for c in MATRIX if c.phase == "default"], ids=lambda c: c.id) +def test_authz_default_knobs(platform: Platform, report: ReportCollector, case: Case) -> None: + _run_case(platform, case, report) + + +@pytest.mark.parametrize("case", [c for c in MATRIX if c.phase == "knobs"], ids=lambda c: c.id) +def test_authz_quarantine_and_admin_exempt_knobs(platform_knobs: Platform, report: ReportCollector, case: Case) -> None: + _run_case(platform_knobs, case, report) diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz.py index 0a146c75b2..4e95c8f4ef 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz.py @@ -1,37 +1,294 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -"""Authorization policy contributions for NeMo Platform plugins. +"""Authorization policy for NeMo Platform plugins. -Plugins declare API routes and permissions so the auth service can authorize -requests without hand-editing ``static-authz.yaml`` for every new surface. - -Contributions are merged at runtime when the OPA bundle is built, and can be +A plugin attaches :func:`path_rule` rules to its route handlers, referencing +:class:`Permission` objects from a typed :class:`PermissionSet`. The platform derives +the normalized policy — the permission catalog (id + description), the per-endpoint +bindings, and the namespace — *entirely from the routes* (see +:mod:`nemo_platform_plugin.authz_discovery`) when the OPA bundle is built; it can also be materialized into ``static-authz.yaml`` via ``auth-tools sync-plugins``. -Example (customization job collection):: +There is no separate permission declaration to keep in sync: the permission *is* the +object referenced on the route, and it carries its own description. The only thing a +service declares apart from its routes is the optional escape hatch +:meth:`NemoService.extra_permissions` — for permissions that are not 1:1 with a route +(e.g. checked in middleware). - from nemo_platform_plugin.authz import AuthzContribution, authz_for_workspace_job_collection +Example:: - # Backend contributors implement get_authz_contribution on the contributor class. - # CustomizationRouterService (nemo.services) aggregates them at policy discovery time. + from fastapi import APIRouter + from nemo_platform_plugin.authz import CallerKind, PermissionSet, path_rule, perm + from nemo_platform_plugin.service import NemoService, RouterSpec - class AutomodelContributor: - ... - def get_authz_contribution(self) -> AuthzContribution: - return authz_for_workspace_job_collection( - api_area="customization", - collection_suffix="/automodel/jobs", - permission_prefix="customization.automodel.jobs", - include_healthz=True, - healthz_suffix="/automodel/healthz", - ) + class ExamplePerms(PermissionSet, namespace="example"): + READ = perm("Read example items") # -> Permission("example.read", ...) + + router = APIRouter() + + @router.get("/v2/workspaces/{workspace}/items/{name}") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[ExamplePerms.READ]) + async def get_item(workspace: str, name: str) -> dict: ... + + class ExampleService(NemoService): + name = "example" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] """ from __future__ import annotations +from collections.abc import Callable from dataclasses import dataclass, field -from typing import Any +from enum import StrEnum +from typing import Any, TypeVar + +# --------------------------------------------------------------------------- +# Plugin authoring API: a typed permission vocabulary + path rules. +# +# Plugins declare permissions as ``Permission`` constants (typically grouped in a +# ``PermissionSet``) and attach ``PathRule``s to route handlers with ``@path_rule``, +# referencing those constants. The platform derives the wire-format +# ``AuthzContribution`` (below) from the routes at startup — there is no separate +# permission list. +# --------------------------------------------------------------------------- + + +class CallerKind(StrEnum): + """Who a route is intended for — a PDP *subject attribute*, not a permission. + + Plugin routes are ``PRINCIPAL`` (a normal authenticated user) or + ``SERVICE_PRINCIPAL`` (a caller whose id is prefixed ``service:``). There is + intentionally no ``ANON``: the only genuinely public routes are core + infrastructure, hardcoded as a bypass in the PEP. + """ + + PRINCIPAL = "principal" + SERVICE_PRINCIPAL = "service_principal" + + +@dataclass(frozen=True) +class Permission: + """A service-owned permission: a stable id and a required human description. + + The id is the wire value (what path rules and roles reference); the description is + the one piece of authz data that cannot be derived from anything else, so it rides + on the permission itself rather than in a parallel list. ``str(permission)`` is the + id, so a ``Permission`` can be used wherever the wire string is expected. + """ + + id: str + description: str + + def __str__(self) -> str: + return self.id + + +@dataclass(frozen=True) +class AuthzScope: + """The OAuth scope a plugin owns, plus the permission namespace minted beneath it. + + A plugin declares one ``AuthzScope`` (``AuthzScope("data-designer")``) and the route + adapters derive every :class:`Permission` and scope list from it — the single front + door for plugin route authz, so the ``.`` id format lives in one + place rather than being hand-built per adapter. + + ``scope`` is the coarse OAuth grouping fed to :func:`scopes_for` (→ ``":read"`` / + ``":write"``). ``namespace`` is the dotted prefix for permission ids and defaults + to ``scope``; use :meth:`child` when the permission namespace nests deeper than the scope + (an ``agents`` scope with per-collection ``agents.`` permissions, say):: + + AuthzScope("agents").child("deployments").permission("create", description="...") + # -> Permission("agents.deployments.create", ...); scope stays "agents" + """ + + scope: str + # Empty string is the "default to ``scope``" sentinel, resolved in ``__post_init__``; + # a real permission namespace is never empty. Keeping the field ``str`` (not ``str | None``) + # means ``namespace`` is always the effective dotted prefix for readers and the type checker. + namespace: str = "" + + def __post_init__(self) -> None: + if not self.namespace: + object.__setattr__(self, "namespace", self.scope) + + def child(self, *segments: str) -> AuthzScope: + """Return a scope whose permission namespace is deepened by *segments*; scope unchanged.""" + return AuthzScope(self.scope, ".".join((self.namespace, *segments))) + + def permission(self, *segments: str, description: str) -> Permission: + """Build the :class:`Permission` for an action under this namespace. + + ``AuthzScope("agents").permission("create", description="Create agents")`` → + ``Permission("agents.create", "Create agents")``. + """ + return Permission(".".join((self.namespace, *segments)), description) + + def read(self) -> list[str]: + """Read scopes for this area, e.g. ``["agents:read", "platform:read"]``. + + Built from :attr:`scope`, not :attr:`namespace`, so a :meth:`child` scope keeps the + parent area. + """ + return scopes_for(self.scope, write=False) + + def write(self) -> list[str]: + """Write scopes for this area, e.g. ``["agents:write", "platform:write"]``.""" + return scopes_for(self.scope, write=True) + + +@dataclass(frozen=True) +class _PendingPermission: + """A permission declared inside a :class:`PermissionSet` body before its namespace + is known. :meth:`PermissionSet.__init_subclass__` resolves it into a + :class:`Permission` once the namespace is bound.""" + + description: str + suffix: str | None = None + + +def perm(description: str, *, suffix: str | None = None) -> Any: + """Declare a permission inside a :class:`PermissionSet` body. + + The id is built as ``.`` unless *suffix* is given + (use *suffix* for compound ids, e.g. ``perm("...", suffix="configs.create")``). The + return type is ``Any`` so the class attribute type-checks as a :class:`Permission` + after ``__init_subclass__`` rewrites it. + """ + return _PendingPermission(description, suffix) + + +class PermissionSet: + """A closed, typed group of permissions under one namespace. + + Subclass with ``namespace=`` and assign ``perm(...)`` members; each becomes a + :class:`Permission` whose id is ``.`` (or the + explicit ``suffix``). Referencing a member that doesn't exist is an ``AttributeError`` + at import — a permission typo can't reach the policy layer. + + class WidgetPerms(PermissionSet, namespace="widget"): + CREATE = perm("Create a widget") # -> Permission("widget.create", ...) + """ + + namespace: str + _members: dict[str, Permission] + + def __init_subclass__(cls, *, namespace: str, **kwargs: Any) -> None: + super().__init_subclass__(**kwargs) + cls.namespace = namespace + cls._members = {} + for name, value in list(vars(cls).items()): + if isinstance(value, _PendingPermission): + suffix = value.suffix or name.lower() + resolved = Permission(f"{namespace}.{suffix}", value.description) + setattr(cls, name, resolved) + cls._members[name] = resolved + + @classmethod + def all(cls) -> list[Permission]: + """Every permission declared on this set (handy for ``extra_permissions``).""" + return list(cls._members.values()) + + +@dataclass(frozen=True, kw_only=True) +class PathRule: + """One alternative authorization rule for a route handler. + + Within a rule, ``callers`` are OR'd and ``permissions`` are AND'd. Multiple rules on + one endpoint are OR'd (any satisfied rule allows access). + + ``method`` and ``path`` are unknown at decoration time and are filled in during + derivation once the route is mounted (see ``authz_discovery``). + """ + + callers: list[CallerKind] + permissions: list[Permission] = field(default_factory=list) + scopes: list[str] | None = None + method: str | None = None + path: str | None = None + + +# Attribute used to stash the (OR-combined) ``PathRule``s on a route handler. +# Mutated in place — the function is never wrapped — so ``route.endpoint`` +# identity survives FastAPI ``include_router(prefix=...)`` rebasing, which +# rebuilds ``APIRoute`` objects but passes the endpoint through by identity. +PATH_RULES_ATTR = "__nemo_path_rules__" + +_F = TypeVar("_F", bound=Callable[..., Any]) + + +def path_rule( + *, + callers: list[CallerKind], + permissions: list[Permission] | None = None, + scopes: list[str] | None = None, +) -> Callable[[_F], _F]: + """Attach an authorization rule to a route handler. + + Stacking ``@path_rule`` on the same handler adds alternative (OR) rules. The + handler is returned **unchanged** (same object, same signature): the rule is + stored on the function itself so it survives router rebasing. + + Args: + callers: Non-empty list of caller kinds this rule applies to (OR'd). + permissions: :class:`Permission` objects the caller must hold (AND'd). May be + empty for authenticated-but-permissionless routes. + scopes: Optional normalized NeMo scopes (``area:verb``). + + Raises: + ValueError: if *callers* is empty or contains an unknown caller kind. + TypeError: if any *permissions* entry is not a :class:`Permission` (e.g. a bare + string) — caught at decoration so a typo can't silently reach the policy layer. + """ + resolved_callers = [CallerKind(c) for c in callers] + if not resolved_callers: + raise ValueError("@path_rule requires at least one caller kind") + resolved_permissions = list(permissions or []) + for p in resolved_permissions: + if not isinstance(p, Permission): + raise TypeError( + f"@path_rule permissions must be Permission objects, got {type(p).__name__} ({p!r}). " + f"Reference a PermissionSet member (e.g. MyPerms.READ) rather than a bare string." + ) + rule = PathRule( + callers=resolved_callers, + permissions=resolved_permissions, + scopes=list(scopes) if scopes is not None else None, + ) + + def decorate(func: _F) -> _F: + rules = func.__dict__.get(PATH_RULES_ATTR) + if rules is None: + rules = [] + setattr(func, PATH_RULES_ATTR, rules) + rules.append(rule) + return func + + return decorate + + +def get_path_rules(func: Callable[..., Any]) -> list[PathRule]: + """Return the ``PathRule``s attached to *func* by :func:`path_rule` (empty if none).""" + return list(getattr(func, PATH_RULES_ATTR, [])) + + +def validate_caller_strings(callers: list[str] | None, *, context: str) -> None: + """Validate wire-format caller kinds. Absence (``None``) is allowed (⇒ PRINCIPAL). + + The valid set is derived from :class:`CallerKind` rather than hardcoded, so it + cannot drift from the enum. + + Raises: + ValueError: if any value is not a known :class:`CallerKind`. + """ + if callers is None: + return + valid = {c.value for c in CallerKind} + for c in callers: + if c not in valid: + raise ValueError(f"Invalid caller kind {c!r} in {context}: expected one of {sorted(valid)}.") @dataclass(frozen=True) @@ -40,6 +297,14 @@ class AuthzEndpointMethod: permissions: list[str] scopes: list[str] | None = None + callers: list[str] | None = None + """Allowed caller kinds (:class:`CallerKind` values). ``None`` ⇒ PRINCIPAL (default).""" + + deny: bool = False + """When True the route is unconditionally denied — the fail-closed marker for an + unruled or invalid plugin route. The PDP denies it outright, overriding every allow + rule (including the service ``*`` wildcard and the PlatformAdmin bypass), so an + un-annotated route can never fall through to the ``service:`` no-match bypass.""" @dataclass @@ -56,7 +321,7 @@ class AuthzContribution: """Optional explicit role → permission grants (merged with defaults).""" def to_dict(self) -> dict[str, Any]: - """Serialize for :func:`nemo_platform_plugin.authz_merge.merge_authz_contributions`.""" + """Serialize for :func:`nmp.common.auth.authz_merge.merge_authz_contributions`.""" return { "permissions": dict(self.permissions), "endpoints": { @@ -64,6 +329,8 @@ def to_dict(self) -> dict[str, Any]: method: { "permissions": spec.permissions, **({"scopes": spec.scopes} if spec.scopes is not None else {}), + **({"callers": spec.callers} if spec.callers is not None else {}), + **({"deny": True} if spec.deny else {}), } for method, spec in methods.items() } @@ -73,148 +340,7 @@ def to_dict(self) -> dict[str, Any]: } -def _scopes_for(api_area: str, write: bool) -> list[str]: +def scopes_for(api_area: str, write: bool) -> list[str]: + """Normalized NeMo scopes for a route: the api-area scope plus the platform scope.""" verb = "write" if write else "read" return [f"{api_area}:{verb}", f"platform:{verb}"] - - -def _job_collection_permissions(permission_prefix: str) -> dict[str, str]: - return { - f"{permission_prefix}.cancel": f"Cancel {permission_prefix} jobs", - f"{permission_prefix}.create": f"Create {permission_prefix} jobs", - f"{permission_prefix}.list": f"List {permission_prefix} jobs", - f"{permission_prefix}.read": f"Read {permission_prefix} jobs", - f"{permission_prefix}.delete": f"Delete {permission_prefix} jobs", - } - - -def authz_for_workspace_job_collection( - api_area: str, - collection_suffix: str, - permission_prefix: str, - include_healthz: bool = False, - healthz_suffix: str | None = None, -) -> AuthzContribution: - """Build authz for standard CORE job routes under ``/apis//v2/workspaces/{workspace}...``. - - Args: - api_area: URL segment after ``/apis/`` (e.g. ``customization``, ``safe-synthesizer``). - collection_suffix: Path after workspace (e.g. ``/automodel/jobs`` or ``/jobs``). - permission_prefix: Dot-separated permission namespace (e.g. ``customization.automodel.jobs``). - include_healthz: When true, register GET healthz with empty permissions (authenticated only). - healthz_suffix: Defaults to ``{first segment of collection_suffix}/healthz`` when omitted. - """ - if not collection_suffix.startswith("/"): - raise ValueError("collection_suffix must start with '/'") - base = f"/apis/{api_area}/v2/workspaces/{{workspace}}{collection_suffix}" - perms = _job_collection_permissions(permission_prefix) - prefix = permission_prefix - endpoints: dict[str, dict[str, AuthzEndpointMethod]] = { - base: { - "post": AuthzEndpointMethod( - permissions=[f"{prefix}.create"], - scopes=_scopes_for(api_area, write=True), - ), - "get": AuthzEndpointMethod( - permissions=[f"{prefix}.list"], - scopes=_scopes_for(api_area, write=False), - ), - }, - f"{base}/{{name}}": { - "get": AuthzEndpointMethod( - permissions=[f"{prefix}.read"], - scopes=_scopes_for(api_area, write=False), - ), - "delete": AuthzEndpointMethod( - permissions=[f"{prefix}.delete"], - scopes=_scopes_for(api_area, write=True), - ), - }, - f"{base}/{{name}}/cancel": { - "post": AuthzEndpointMethod( - permissions=[f"{prefix}.cancel"], - scopes=_scopes_for(api_area, write=True), - ), - }, - f"{base}/{{name}}/logs": { - "get": AuthzEndpointMethod( - permissions=[f"{prefix}.read"], - scopes=_scopes_for(api_area, write=False), - ), - }, - f"{base}/{{name}}/results": { - "get": AuthzEndpointMethod( - permissions=[f"{prefix}.read"], - scopes=_scopes_for(api_area, write=False), - ), - }, - f"{base}/{{name}}/status": { - "get": AuthzEndpointMethod( - permissions=[f"{prefix}.read"], - scopes=_scopes_for(api_area, write=False), - ), - }, - f"{base}/{{job}}/results/{{name}}": { - "get": AuthzEndpointMethod( - permissions=[f"{prefix}.read"], - scopes=_scopes_for(api_area, write=False), - ), - }, - f"{base}/{{job}}/results/{{name}}/download": { - "get": AuthzEndpointMethod( - permissions=[f"{prefix}.read"], - scopes=_scopes_for(api_area, write=False), - ), - }, - } - if include_healthz: - if healthz_suffix is None: - first = collection_suffix.strip("/").split("/")[0] - healthz_suffix = f"/{first}/healthz" - if not healthz_suffix.startswith("/"): - healthz_suffix = f"/{healthz_suffix}" - health_path = f"/apis/{api_area}/v2/workspaces/{{workspace}}{healthz_suffix}" - endpoints[health_path] = { - "get": AuthzEndpointMethod(permissions=[], scopes=[]), - } - - return AuthzContribution(permissions=perms, endpoints=endpoints) - - -def authz_for_workspace_function( - api_area: str, - function_suffix: str, - permission_prefix: str, - *, - read_only: bool = False, -) -> AuthzContribution: - """Build authz for one standard function route under ``/apis//v2/workspaces/{workspace}``.""" - if not function_suffix.startswith("/"): - raise ValueError("function_suffix must start with '/'") - permission = f"{permission_prefix}.exec" - return AuthzContribution( - permissions={permission: f"Execute {permission_prefix} function"}, - endpoints={ - f"/apis/{api_area}/v2/workspaces/{{workspace}}{function_suffix}": { - "post": AuthzEndpointMethod( - permissions=[permission], - scopes=_scopes_for(api_area, write=not read_only), - ), - } - }, - ) - - -def combine_authz_contributions(*contribs: AuthzContribution) -> AuthzContribution: - """Merge multiple :class:`AuthzContribution` objects into one (e.g. hub + backends).""" - merged = AuthzContribution() - for contrib in contribs: - merged.permissions.update(contrib.permissions) - for path, methods in contrib.endpoints.items(): - merged.endpoints.setdefault(path, {}).update(methods) - for role, perms in contrib.role_permissions.items(): - existing = merged.role_permissions.setdefault(role, []) - for perm in perms: - if perm not in existing: - existing.append(perm) - return merged diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py index 461ab6069b..e68b4f9818 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py @@ -1,154 +1,396 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -"""Discover plugin authorization contributions for policy merge.""" +"""Derive plugin authorization contributions from the ``NemoService`` route surface. + +Plugins attach :func:`~nemo_platform_plugin.authz.path_rule` rules to route handlers, +referencing :class:`~nemo_platform_plugin.authz.Permission` constants. This module +instantiates each discovered ``NemoService``, walks its mounted routes — computing the +same ``/apis//`` paths the platform mounts at runtime — reads the +function-attached :class:`~nemo_platform_plugin.authz.PathRule`\\ s, and builds the +wire-format :class:`~nemo_platform_plugin.authz.AuthzContribution` consumed by the OPA +bundle builder and ``auth-tools sync-plugins``. + +The permission catalog (ids + descriptions) and the service namespace are derived +*entirely from the routes* (plus the optional :meth:`NemoService.extra_permissions` +hatch). There is no separately-declared permission list: the permission is the object +referenced on the route, and it carries its own description. + +Path composition mirrors production: the platform runner mounts each service app at +``/apis/`` and the service app includes each ``RouterSpec`` router at its +``prefix`` (see ``nmp.platform_runner.server`` and ``nmp.common.service.base``). We +re-create that composition with a throwaway router so FastAPI computes the final paths — +including prefix joins and ``{param:path}`` wildcards — exactly as it does at runtime. +""" from __future__ import annotations -import inspect import logging -from functools import cache -from typing import Any, Callable +from dataclasses import dataclass, field +from typing import Any -from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod +from fastapi import APIRouter +from fastapi.routing import APIRoute +from nemo_platform_plugin.authz import ( + AuthzContribution, + AuthzEndpointMethod, + CallerKind, + PathRule, + Permission, + get_path_rules, +) +from nemo_platform_plugin.authz_format import is_valid_permission_id +from nemo_platform_plugin.service import NemoService logger = logging.getLogger(__name__) -AUTHZ_GROUP = "nemo.authz" -AuthzContributor = Callable[[], AuthzContribution] | type[Any] +def _method_from_dict(spec: dict[str, Any]) -> AuthzEndpointMethod: + """Parse a serialized endpoint-method dict back into :class:`AuthzEndpointMethod`. + This is the single chokepoint that decides which wire fields are preserved; unknown + keys are dropped. Kept as the canonical inverse of ``AuthzContribution.to_dict`` for + round-trip validation and bundle-side parsing. + """ + return AuthzEndpointMethod( + permissions=list(spec.get("permissions") or []), + scopes=list(spec["scopes"]) if spec.get("scopes") is not None else None, + callers=list(spec["callers"]) if spec.get("callers") is not None else None, + deny=bool(spec.get("deny", False)), + ) -def _load_authz_contribution(loaded: AuthzContributor, source: str) -> AuthzContribution | None: - try: - if isinstance(loaded, type): - if hasattr(loaded, "get_authz_contribution"): - result = _invoke_get_authz_contribution(loaded) - else: - instance = loaded() - result = _invoke_get_authz_contribution(instance) - elif callable(loaded): - result = loaded() - else: - logger.warning("Authz entry %s is not callable or a class — skipping", source) - return None - except Exception: - logger.warning("Failed to load authz contribution from %s — skipping", source, exc_info=True) - return None - - if result is None: - return None - if isinstance(result, AuthzContribution): - return result - if isinstance(result, dict): - return AuthzContribution( - permissions=result.get("permissions") or {}, - endpoints={ - path: {method: _method_from_dict(spec) for method, spec in methods.items() if isinstance(spec, dict)} - for path, methods in (result.get("endpoints") or {}).items() - if isinstance(methods, dict) - }, - role_permissions=result.get("role_permissions") or {}, - ) - logger.warning("Authz contribution from %s has unexpected type %r — skipping", source, type(result)) - return None +def _wire_callers(rules: list[PathRule]) -> list[str] | None: + """Union the caller kinds across an endpoint's (OR'd) rules into the wire list. -def _invoke_get_authz_contribution(item: Any) -> AuthzContribution | dict[str, Any] | None: - """Call ``get_authz_contribution`` on a service class or contributor instance.""" - getter = getattr(item, "get_authz_contribution", None) - if not callable(getter): - return None - if isinstance(item, type): - # discover_services() yields classes — must be @classmethod on NemoService. - return getter() - return getter() + Returns ``None`` when no rule declares callers (the route falls back to the PRINCIPAL + default and the Rego layer adds no caller-kind restriction). + """ + kinds = {c.value if isinstance(c, CallerKind) else str(c) for rule in rules for c in rule.callers} + return sorted(kinds) if kinds else None -def _method_from_dict(spec: dict[str, Any]) -> AuthzEndpointMethod: - return AuthzEndpointMethod( - permissions=list(spec.get("permissions") or []), - scopes=list(spec["scopes"]) if spec.get("scopes") is not None else None, +def _collapse_rules( + rules: list[PathRule], *, path: str, method: str, service: str +) -> tuple[list[Permission], list[str] | None, list[str] | None]: + """Collapse the (OR'd) ``PathRule``\\ s on one ``(path, method)`` into one binding. + + v1 supports OR across rules only in the **caller** dimension: caller kinds are unioned, + but ``permissions`` and ``scopes`` must agree across rules. The single-slot wire format + (one AND'd ``permissions`` list per method) and the Rego permission check cannot + represent an OR of *distinct* permission sets, so that case is rejected loudly rather + than silently mis-authorized. + + Returns ``(permissions, scopes, callers)`` for the representative rule. + """ + # Only callers are OR'd across rules (unioned below); their permissions and scopes must + # match, since the wire format holds one permissions/scopes list per (path, method). Reject + # a mismatch rather than silently picking one rule's. + distinct_permission_sets = {frozenset(p.id for p in rule.permissions) for rule in rules} + if len(distinct_permission_sets) > 1: + raise ValueError( + f"{service}: {method.upper()} {path} has @path_rule rules with differing " + f"permissions ({[sorted(p) for p in distinct_permission_sets]}). v1 cannot represent an OR of " + f"distinct permission sets — use one rule with shared permissions, or a single " + f"rule listing multiple callers." + ) + distinct_scope_sets = {None if rule.scopes is None else frozenset(rule.scopes) for rule in rules} + if len(distinct_scope_sets) > 1: + raise ValueError( + f"{service}: {method.upper()} {path} has @path_rule rules with differing scopes — " + f"all rules on one endpoint must declare the same scopes." + ) + + representative = rules[0] + return ( + list(representative.permissions), + list(representative.scopes) if representative.scopes is not None else None, + _wire_callers(rules), ) -def _collect_from_plugin_surface( - items: dict[str, Any], - surface: str, -) -> list[AuthzContribution]: - contributions: list[AuthzContribution] = [] - for key, item in items.items(): - if not hasattr(item, "get_authz_contribution"): +@dataclass +class PluginAuthzResult: + """One plugin's derived authz, before the bundle applies its fail-mode policy. + + ``problems`` are deny-worthy **errors**: unruled routes, an OR of distinct permission + sets, a duplicate ``(path, method)``, a malformed permission id, a permission outside + the service's own namespace, or a load/derivation failure. The affected routes are + already emitted as explicit DENY bindings in ``contribution`` (fail-closed), and the + bundle decides — via ``authz.on_invalid_plugin`` — whether to keep just those denies + (``deny_route``), deny the whole plugin (``quarantine``), or refuse to build the bundle + (``hard_fail``). + + ``warnings`` are non-deny-worthy: a missing or conflicting permission *description*. + These are metadata-only — the route still requires the right permission, so they are + surfaced (logged / status endpoint) but never escalate ``on_invalid_plugin`` and never + deny a route. Keeping them out of ``problems`` is what stops a cosmetic description + typo from quarantining a whole plugin (or hard-failing the bundle). + """ + + key: str + contribution: AuthzContribution + problems: list[str] + warnings: list[str] = field(default_factory=list) + mount_name: str = "" + """The ``/apis/`` segment the runner mounts this service at (its + ``NemoService.name``). Captured so the degraded-plugin namespace fence can cover the real + mount path even when it diverges from the entry-point ``key`` — the ``name == key`` + invariant is only warned, not enforced (see ``discover_services``).""" + + +def _deny_binding() -> AuthzEndpointMethod: + """A wire binding that the PDP denies unconditionally (fail-closed marker).""" + return AuthzEndpointMethod(permissions=[], deny=True) + + +def _register_permission(catalog: dict[str, Permission], perm: Permission, warnings: list[str]) -> None: + """Record *perm* in *catalog*, flagging a missing or conflicting description as a warning. + + Description problems are metadata-only (the route still requires the right permission), + so they are surfaced but never deny a route. Id-format validity and namespace ownership + are checked over the whole catalog in :func:`_derive_service_contribution`. + """ + if not perm.description: + warnings.append(f"permission {perm.id!r} is missing a description") + previous = catalog.get(perm.id) + if previous is not None and previous.description != perm.description: + warnings.append( + f"permission {perm.id!r} defined with conflicting descriptions: " + f"{previous.description!r} != {perm.description!r}" + ) + catalog.setdefault(perm.id, perm) + + +def _derive_service_contribution(service: NemoService) -> tuple[AuthzContribution, list[str], list[str]]: + """Derive one plugin's wire contribution, split into deny-worthy errors and warnings. + + Every mounted route must carry a valid ``@path_rule``. A route that doesn't — unruled, + an unrepresentable OR of distinct permission sets, or a duplicate ``(path, method)`` — is + emitted as an explicit DENY binding (never omitted), so it can never fall through to the + ``service:`` no-match bypass. The permission catalog is derived from the permissions the + routes reference plus ``extra_permissions()``; if any permission id is malformed, or sits + outside the service's own ``/apis/`` namespace, the whole plugin fails closed. + + Returns ``(contribution, errors, warnings)``. ``errors`` are deny-worthy and drive the + bundle fail-mode; ``warnings`` (missing/conflicting descriptions) are metadata-only and + never deny a route. + """ + errors: list[str] = [] + warnings: list[str] = [] + catalog: dict[str, Permission] = {} + + # Re-create the runtime mount: /apis/ + RouterSpec.prefix + route path. + composed = APIRouter() + for spec in service.get_routers(): + composed.include_router(spec.router, prefix=f"/apis/{service.name}{spec.prefix}") + + # Pass 1: walk routes, collapse OR'd rules, and collect referenced permissions. + # ``bindings`` holds the tentative allow binding per (path, method); unruled / invalid + # routes are recorded as None and become DENY regardless of namespace validity. + bindings: dict[str, dict[str, AuthzEndpointMethod | None]] = {} + for route in composed.routes: + if not isinstance(route, APIRoute): + # Mount / plain Starlette Route / WebSocket route — not an HTTP API route the PDP + # binds by (path, method). Never silently skip it (that lets it fall through the + # service: no-match bypass). + other_path = getattr(route, "path", None) or repr(route) + other_methods = sorted(getattr(route, "methods", None) or set()) + if other_methods: + # Has HTTP methods (Mount / plain Route): the PDP could enforce it but we can't + # derive a rule, so deny those methods (fail-closed) and flag it as an error. + errors.append(f"{other_path} is a {type(route).__name__}, not an APIRoute — denied (fail-closed)") + for http_method in other_methods: + bindings.setdefault(other_path, {})[http_method.lower()] = None + else: + # Method-less (WebSocket / ASGI): AuthorizationMiddleware is BaseHTTPMiddleware, + # which only sees the http scope, so a WS handshake never reaches the PDP — a + # derived deny would be inert. Surface it as a (non-deny) warning; + # actually closing the WS gap needs pure-ASGI middleware. + warnings.append( + f"{other_path} is a {type(route).__name__}, not an APIRoute — HTTP authz cannot " + f"cover it (WebSocket/ASGI routes bypass the BaseHTTPMiddleware PDP)" + ) continue - if isinstance(item, type): - method = inspect.getattr_static(item, "get_authz_contribution", None) - if method is None or not isinstance(method, classmethod): - # Only classmethods are valid on NemoService subclasses (no instance). - continue + methods = sorted(route.methods or set()) + rules = get_path_rules(route.endpoint) + + binding: AuthzEndpointMethod | None + if not rules: + binding = None + errors.append(f"{route.path} ({', '.join(methods) or 'no methods'}) has no @path_rule") + else: + try: + permissions, scopes, callers = _collapse_rules( + rules, path=route.path, method=methods[0] if methods else "", service=service.name + ) + except (ValueError, AttributeError, TypeError) as exc: + # A single malformed rule denies only its own route — it never crashes the + # plugin (which would empty the whole contribution and fall open). The broad + # catch also covers a bare-string permission that slipped past @path_rule + # (``p.id`` raises AttributeError), not just the unrepresentable-OR ValueError. + binding = None + errors.append(str(exc)) + else: + for perm in permissions: + _register_permission(catalog, perm, warnings) + binding = AuthzEndpointMethod( + permissions=[perm.id for perm in permissions], scopes=scopes, callers=callers + ) + + for http_method in methods: + method_key = http_method.lower() + route_methods = bindings.setdefault(route.path, {}) + if method_key in route_methods: + # Two handlers claim the same (path, method): Starlette serves the first + # registered, but the derived policy could describe the second. Rather than + # let the last writer silently win, fail the pair closed and flag it. + errors.append( + f"duplicate route binding for {http_method.upper()} {route.path} — a second " + f"handler would shadow the first; refusing to guess which policy applies" + ) + route_methods[method_key] = None + else: + route_methods[method_key] = binding + + # Permissions with no 1:1 route (middleware-checked, declared-before-wired). A broken + # extra_permissions() must NOT abort derivation — that would omit the route bindings and + # let them fall through the service: bypass. Record it and keep the route-derived authz. + try: + extra = service.extra_permissions() + except Exception as exc: + extra = [] + errors.append(f"extra_permissions() raised {exc!r}") + for perm in extra: + _register_permission(catalog, perm, warnings) + + # Pass 2: validate the catalog. A malformed permission id would 500 the bundle's + # ``validate_static_authz_data`` if it reached the wire; a permission whose first segment + # isn't the service's own name is namespace squatting (it would silently widen the + # Viewer/Editor role grants for another service's namespace). Either is a fail-closed + # error: deny every route and contribute no permissions, so nothing malformed or + # cross-namespace can reach the merged policy. + owner = service.name + malformed = sorted(pid for pid in catalog if not is_valid_permission_id(pid)) + out_of_namespace = sorted(pid for pid in catalog if pid.split(".", 1)[0] != owner) + if malformed: + errors.append(f"malformed permission id(s) (fail-closed): {malformed}") + if out_of_namespace: + errors.append(f"permission id(s) outside the service namespace {owner!r} (fail-closed): {out_of_namespace}") + if malformed or out_of_namespace: + denied = {path: {method: _deny_binding() for method in methods} for path, methods in bindings.items()} + return AuthzContribution(permissions={}, endpoints=denied), errors, warnings + + endpoints: dict[str, dict[str, AuthzEndpointMethod]] = { + path: {method: (binding if binding is not None else _deny_binding()) for method, binding in methods.items()} + for path, methods in bindings.items() + } + permissions = {perm.id: perm.description for perm in catalog.values()} + return AuthzContribution(permissions=permissions, endpoints=endpoints), errors, warnings + + +_plugin_authz_cache: list[PluginAuthzResult] | None = None + + +def _cached_plugin_authz() -> list[PluginAuthzResult] | None: + """The cached derivation, or ``None`` if nothing is cached.""" + return _plugin_authz_cache + + +def _cache_plugin_authz(results: list[PluginAuthzResult]) -> None: + """Cache *results*, but only a fully clean derivation. + + Pinning a degraded result (say, a transient import error on first build) would 403 the + plugin's namespace until the process restarts. + """ + global _plugin_authz_cache + if all(not result.problems for result in results): + _plugin_authz_cache = results + + +def discover_plugin_authz() -> list[PluginAuthzResult]: + """Derive per-plugin authz results from every installed ``nemo.services`` entry point. + + Each entry point is loaded and its service class instantiated in its own ``try/except``, + so a single broken plugin can never take down derivation for the others. Both a *load* + failure (the module won't import) and a *derivation* failure (instantiation / route walk + raises) are recorded as a fully-degraded result — a problem, no usable contribution — + rather than silently dropped. Silent drop would omit the plugin's routes and let them + fall through the ``service:`` no-match bypass once enforcement is on; the bundle instead + fences ``/apis/`` for a degraded plugin. + + Entry points are enumerated directly (not via ``discover_services``) because + ``discover()`` swallows load failures and excludes the plugin entirely — exactly the + silent drop this fail-closed path must avoid. + + Only an **all-clean** derivation is cached. A degraded result (e.g. a transient first-build + import error) is never pinned for the process lifetime — that would 403 the plugin's + namespace until restart — so the next call re-derives until the failure clears. Call + ``clear_plugin_authz_cache()`` (and ``discover_entry_points.cache_clear()``) in tests after + changing the installed plugin set. + """ + cached = _cached_plugin_authz() + if cached is not None: + return cached + + from nemo_platform_plugin.discovery import discover_entry_points + + results: list[PluginAuthzResult] = [] + for ep_name, ep in discover_entry_points("nemo.services").items(): try: - result = _invoke_get_authz_contribution(item) - except TypeError as exc: - logger.warning( - "Authz on %s %r must be a @classmethod (discover_services loads classes): %s", - surface, - key, - exc, + service_cls = ep.load() + except Exception as exc: + logger.warning("Failed to load nemo.services %r — recording as degraded", ep_name, exc_info=True) + results.append( + PluginAuthzResult( + key=ep_name, + contribution=AuthzContribution(), + problems=[f"failed to load plugin: {exc!r}"], + mount_name=ep_name, + ) ) continue - except Exception: + # Read the mount name off the class (a ClassVar, available even if instantiation + # below fails) so the degraded fence can cover /apis/, not just /apis/. + mount_name = getattr(service_cls, "name", ep_name) or ep_name + try: + contribution, errors, warnings = _derive_service_contribution(service_cls()) + except Exception as exc: logger.warning( - "Failed to get authz contribution from %s %r — skipping", - surface, - key, - exc_info=True, + "Failed to derive authz from nemo.services %r — recording as degraded", ep_name, exc_info=True + ) + results.append( + PluginAuthzResult( + key=ep_name, + contribution=AuthzContribution(), + problems=[f"failed to derive plugin authz: {exc!r}"], + mount_name=mount_name, + ) ) continue - if result is None: - continue - if isinstance(result, AuthzContribution): - contributions.append(result) - elif isinstance(result, dict): - loaded = _load_authz_contribution(lambda: result, source=f"{surface}:{key}") - if loaded is not None: - contributions.append(loaded) - else: - logger.warning( - "Authz contribution from %s %r has unexpected type %r — skipping", - surface, - key, - type(result), + results.append( + PluginAuthzResult( + key=ep_name, contribution=contribution, problems=errors, warnings=warnings, mount_name=mount_name ) - return contributions + ) + _cache_plugin_authz(results) + return results -@cache -def discover_authz_contributions() -> list[AuthzContribution]: - """Collect authz contributions from entry points and plugin surfaces. - Sources (in order): +def clear_plugin_authz_cache() -> None: + """Reset the cached all-clean plugin-authz derivation. - 1. ``nemo.authz`` entry points (callable or class) - 2. ``nemo.services`` classes implementing :meth:`get_authz_contribution` - (e.g. :class:`~nemo_customizer.router.CustomizationRouterService` aggregates - ``nemo.customization.contributors`` backend policy) + Call in tests after changing the installed plugin set (alongside + ``discover_entry_points.cache_clear()``). """ - from nemo_platform_plugin.discovery import discover_entry_points, discover_services - - contributions: list[AuthzContribution] = [] + global _plugin_authz_cache + _plugin_authz_cache = None - for ep_name, ep in discover_entry_points(AUTHZ_GROUP).items(): - try: - loaded = ep.load() - contrib = _load_authz_contribution(loaded, source=f"nemo.authz:{ep_name}") - if contrib is not None: - contributions.append(contrib) - logger.debug("Loaded authz contribution from nemo.authz:%s", ep_name) - except Exception: - logger.warning("Failed to load nemo.authz entry %r — skipping", ep_name, exc_info=True) - contributions.extend(_collect_from_plugin_surface(discover_services(), surface="nemo.services")) - - return contributions +def discover_authz_contributions() -> list[AuthzContribution]: + """Plugin contributions with content (compat shim over :func:`discover_plugin_authz`).""" + return [r.contribution for r in discover_plugin_authz() if r.contribution.permissions or r.contribution.endpoints] def discover_authz_contribution_dicts() -> list[dict[str, Any]]: diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_format.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_format.py index 247315f6ff..6a1738c8b4 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_format.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_format.py @@ -124,10 +124,12 @@ def validate_runtime_authorize_scopes(scopes: list[str] | None) -> None: def validate_static_authz_data(data: dict[str, Any]) -> None: - """Validate permission and scope string formats in loaded static authorization data. + """Validate permission, scope, and caller-kind string formats in loaded static authz data. - Call after parsing ``static-authz.yaml``. Raises the same exceptions as the granular - validators above. + Call after parsing ``static-authz.yaml``. Validates ``roles[*].permissions``, + ``endpoints[*][*].{permissions,scopes,callers}``. Raises the same exceptions as the granular + validators above (and ``ValueError`` for an unknown caller kind) — catching a hand-edited + bad value at load/build time rather than letting it fail silently in policy checks. """ authz = data.get("authz") if not isinstance(authz, dict): @@ -165,3 +167,14 @@ def validate_static_authz_data(data: dict[str, Any]) -> None: [s for s in sc_list if isinstance(s, str)], context=f"endpoints[{path!r}].{method_name}.scopes", ) + callers_list = op.get("callers") + if isinstance(callers_list, list): + # Validate caller-kind strings (e.g. hand-edited static-authz.yaml). Local + # import keeps this format module light and avoids importing the authoring + # surface unless callers are actually present. + from nemo_platform_plugin.authz import validate_caller_strings + + validate_caller_strings( + [c for c in callers_list if isinstance(c, str)], + context=f"endpoints[{path!r}].{method_name}.callers", + ) diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/customization_contributor.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/customization_contributor.py index e235039d7e..9484818fca 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/customization_contributor.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/customization_contributor.py @@ -9,7 +9,6 @@ from typing import Any, ClassVar, Protocol, runtime_checkable import typer -from nemo_platform_plugin.authz import AuthzContribution from nemo_platform_plugin.service import RouterSpec @@ -40,17 +39,12 @@ def get_routers(self) -> list[RouterSpec]: """HTTP routes for this backend (workspace-scoped prefix per backend).""" def get_cli(self) -> typer.Typer | None: - """CLI subgroup mounted at ``nemo customization ``.""" + """CLI subgroup mounted at ``nemo customization ``. - def get_authz_contribution(self) -> AuthzContribution | None: - """Optional authorization policy (endpoints + permissions) for this contributor. - - Return :class:`~nemo_platform_plugin.authz.AuthzContribution`. Policy is - aggregated by :class:`~nemo_customizer.router.CustomizationRouterService` - (``nemo.services``) at discovery time — do not register a separate - ``nemo.authz`` entry point for customization backends. + HTTP authorization is **not** declared here: it is derived from the + ``@path_rule``-decorated routes returned by :meth:`get_routers`, which the + customization hub aggregates into its own ``nemo.services`` route surface. """ - ... def get_sdk_resources(self) -> CustomizationContributorSDKResources | None: """Return SDK resource classes for ``client.customization.``. diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/discovery.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/discovery.py index e5bf6a4b25..9d3c8c8727 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/discovery.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/discovery.py @@ -24,7 +24,11 @@ ``nemo.inference_middleware`` → :func:`discover_inference_middleware` — :class:`~nemo_platform_plugin.inference_middleware.NemoInferenceMiddleware` subclass (typed, IGW instantiates) ``nemo.customization.contributors`` → :func:`discover_customization_contributors` — :class:`~nemo_platform_plugin.customization_contributor.CustomizationContributor` instance (typed, customization router instantiates) ``nemo.seed`` → :func:`discover_seed_jobs` — :class:`~nemo_platform_plugin.seed.NemoSeedJob` subclass (typed, platform instantiates) -``nemo.authz`` → :func:`~nemo_platform_plugin.authz_discovery.discover_authz_contributions` — policy endpoints/permissions (merged at runtime and via ``auth-tools sync-plugins``) + +Plugin HTTP authorization is **not** a discovery surface: it is derived from +``nemo.services`` (each :class:`~nemo_platform_plugin.service.NemoService`'s +``@path_rule``-decorated routes, plus the optional ``extra_permissions`` hatch) by +:func:`~nemo_platform_plugin.authz_discovery.discover_authz_contributions`. Wrappers for surfaces whose types are not yet defined in this package return ``dict[str, Any]`` — callers cast as needed. @@ -79,7 +83,6 @@ "nemo.inference_middleware", "nemo.customization.contributors", "nemo.seed", - "nemo.authz", ) # Surface groups whose entry-point keys are dot-separated as @@ -103,7 +106,6 @@ "nemo.inference_middleware": "NEMO_PLUGIN_INFERENCE_MIDDLEWARE_ALLOWLIST", "nemo.customization.contributors": "NEMO_PLUGIN_CUSTOMIZATION_CONTRIBUTORS_ALLOWLIST", "nemo.seed": "NEMO_PLUGIN_SEED_ALLOWLIST", - "nemo.authz": "NEMO_PLUGIN_AUTHZ_ALLOWLIST", } CUSTOMIZATION_CONTRIBUTORS_GROUP = "nemo.customization.contributors" diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/functions/routes.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/functions/routes.py index 331f872b68..4484b650a6 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/functions/routes.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/functions/routes.py @@ -64,6 +64,7 @@ from fastapi import APIRouter, Depends, Header from fastapi.responses import StreamingResponse +from nemo_platform_plugin.authz import AuthzScope, CallerKind, path_rule from nemo_platform_plugin.dependencies import get_sdk_client from nemo_platform_plugin.function import NemoFunction, returns_async_iterator from nemo_platform_plugin.function_context import FunctionContext @@ -103,6 +104,8 @@ def add_function_routes( function_cls: type[NemoFunction], *, heartbeat_interval_seconds: float = HEARTBEAT_INTERVAL_SECONDS, + authz: AuthzScope | None = None, + permission_description: str | None = None, ) -> APIRouter: """Mount a single ``POST`` route for *function_cls* on a fresh router. @@ -115,6 +118,14 @@ def add_function_routes( responses. Defaults to :data:`HEARTBEAT_INTERVAL_SECONDS`. Lower values are useful in tests; production callers usually leave the default. + authz: The plugin's :class:`~nemo_platform_plugin.authz.AuthzScope`. + When set, a PRINCIPAL ``@path_rule`` is stamped on the route with + an invoke permission minted from it (``.``, + a write action). When omitted the route is left unruled — denied + fail-closed at bundle time. + permission_description: Optional human description for the invoke + permission. Defaults to ``function_cls.description`` or + ``"Invoke the function"``. Requires ``authz``. Returns: An :class:`APIRouter` with one route. The caller mounts it @@ -124,6 +135,9 @@ def add_function_routes( Raises: TypeError: If ``function_cls`` doesn't declare a ``spec_schema``. + ValueError: If ``permission_description`` is given without ``authz`` + (the description rides on the permission stamped from ``authz``, + so alone it would be silently discarded). """ spec_schema = getattr(function_cls, "spec_schema", None) if spec_schema is None: @@ -133,6 +147,12 @@ def add_function_routes( f"subclass on the NemoFunction class." ) + if permission_description is not None and authz is None: + raise ValueError( + "permission_description requires authz to be set (the description rides on the " + "permission stamped from authz); supplying it alone would be silently discarded." + ) + router = APIRouter() instance = function_cls() run_params = function_cls.run_signature().parameters @@ -161,6 +181,20 @@ def add_function_routes( handler.__name__ = f"{function_cls.__name__}__route" handler.__doc__ = function_cls.description or f"Invoke the {function_cls.name} function." + if authz is not None: + # Invoking a function is a write action; the permission id defaults to .. + permission = authz.permission( + function_cls.name, + description=permission_description + or function_cls.description + or f"Invoke the {function_cls.name} function", + ) + path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[permission], + scopes=authz.write(), + )(handler) + router.post( path, summary=function_cls.description or f"Invoke {function_cls.name}", diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/api_factory.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/api_factory.py index 63981b18e2..350b932490 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/api_factory.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/api_factory.py @@ -51,6 +51,7 @@ from nemo_platform.types.jobs.platform_job_step_spec_param import Executor from nemo_platform_plugin.api.filter import ComparisonOperation, FilterOperation, FilterOperator, LogicalOperation from nemo_platform_plugin.api.parsed_filter import ParsedFilter, make_filter_dep +from nemo_platform_plugin.authz import AuthzScope, CallerKind, path_rule from nemo_platform_plugin.dependencies import get_entity_client, get_sdk_client from nemo_platform_plugin.entities import EntityClient from nemo_platform_plugin.jobs.docker import validate_gpu_available_for_docker @@ -86,6 +87,19 @@ EnvironmentVariable = PlatformJobEnvironmentVariableParam EnvironmentVariableFromSecret = PlatformJobSecretEnvironmentVariableRefParam +# Descriptions stamped onto the standard job permissions, keyed by verb. The catalog is +# derived from the routes, so these descriptions are the source of truth for the generated +# permission registry (no separate declaration to keep in sync). +_JOB_PERMISSION_DESCRIPTIONS: dict[str, str] = { + "create": "Create {ns} jobs", + "list": "List {ns} jobs", + "read": "Read {ns} jobs, including status, logs, and results", + "delete": "Delete {ns} jobs", + "cancel": "Cancel {ns} jobs", + "pause": "Pause {ns} jobs", + "resume": "Resume {ns} jobs", +} + JobConfigT = TypeVar("JobConfigT", bound=BaseModel) JobInputT = TypeVar("JobInputT", bound=BaseModel) JobOutputT = TypeVar("JobOutputT", bound=BaseModel) @@ -676,6 +690,7 @@ def job_route_factory( job_output: JobSchemaLike | None = None, input_to_output: InputToOutputTransformer | InputToOutputTransformerAsync | None = None, generate_job_name: JobNameGenerator | None = None, + authz: AuthzScope | None = None, ) -> APIRouter: """Create a job router with standard CRUD operations. @@ -747,6 +762,25 @@ def platform_job_config_compiler( router = APIRouter() service_name = service_name.lower() + def _stamp(endpoint: Callable[..., Any], *, perm: str, write: bool) -> Callable[..., Any]: + """Attach a PRINCIPAL ``@path_rule`` to a generated job route. + + Inert unless the caller passed an ``authz`` scope — so unmigrated callers keep + emitting unauthz'd routes (handled by the bundle fail-mode). Returns *endpoint* + so it can wrap download closures inline. + """ + if authz is not None: + permission = authz.permission( + perm, + description=_JOB_PERMISSION_DESCRIPTIONS[perm].format(ns=authz.namespace), + ) + path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[permission], + scopes=authz.write() if write else authz.read(), + )(endpoint) + return endpoint + # These lines dynamically create new classes, named for the client microservice # using the job route factory, for use as input and output types in the FastAPI routes. # This style (using `type` with three args) ensures the class is named properly, i.e. @@ -1022,6 +1056,18 @@ async def get_job_result( result_dict["download_url"] = f"{request.url}/download" return PlatformJobResultResponse(**result_dict) + # Stamp authorization rules on the generated routes (PRINCIPAL caller). Reads use + # one shared .read permission; mutating routes get their own permission. + _stamp(create_job, perm="create", write=True) + _stamp(list_jobs, perm="list", write=False) + _stamp(get_job, perm="read", write=False) + _stamp(get_job_status, perm="read", write=False) + _stamp(delete_job, perm="delete", write=True) + _stamp(cancel_job, perm="cancel", write=True) + _stamp(get_job_logs, perm="read", write=False) + _stamp(list_job_results, perm="read", write=False) + _stamp(get_job_result, perm="read", write=False) + # Result downloads: # Services that use the api factory can utilize `job_result_routes` to map specific # `result_names`s to differently shaped objects. This can make the generated SDK smarter @@ -1132,7 +1178,7 @@ async def route( router.add_api_route( name=f"download_job_result_{job_result_route.name}", path=f"/jobs/{{job}}/results/{job_result_route.name}/download", - endpoint=_make_explicit_download_endpoint(job_result_route), + endpoint=_stamp(_make_explicit_download_endpoint(job_result_route), perm="read", write=False), **job_result_route.serializer.route_kwargs(), ) @@ -1142,7 +1188,7 @@ async def route( router.add_api_route( name="download_job_result", path="/jobs/{job}/results/{name}/download", - endpoint=_make_generic_download_endpoint(file_result_serializer), + endpoint=_stamp(_make_generic_download_endpoint(file_result_serializer), perm="read", write=False), **file_result_serializer.route_kwargs(), ) @@ -1175,4 +1221,7 @@ async def resume_job( job_resp = await sdk.jobs.resume(name=name, workspace=workspace) return from_response(job_resp) + _stamp(pause_job, perm="pause", write=True) + _stamp(resume_job, perm="resume", write=True) + return router diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/routes.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/routes.py index 4a921a613d..cf606122c4 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/routes.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/jobs/routes.py @@ -33,16 +33,14 @@ generate_job_name=my_name_generator, ) -Scope note (phase-1, MR 1.1b): ``add_job_routes`` applies -:func:`stamp_profile` to the compiled ``PlatformJobSpec`` using -``default_profile``. It does *not* yet thread ``profile`` / ``options`` -body fields through :class:`BaseJobRequest` — those land together with -the ``BaseJobRequest`` extension in a follow-up MR. The -submitter-facing CLI flags (``--profile``, ``-o``) from MR 1.3b reach -``submit_remote``, which POSTs them in the body; the server currently -silently drops them (Pydantic ``extra="ignore"`` default). The wrapper -passes ``profile=None`` / ``options=None`` to ``NemoJob.compile`` -until the body shape is extended. +``add_job_routes`` applies :func:`stamp_profile` to the compiled +``PlatformJobSpec`` using ``default_profile``. It does *not* yet thread +submitter-provided ``profile`` / ``options`` body fields through +:class:`BaseJobRequest`; the wrapper passes ``profile=None`` / +``options=None`` to ``NemoJob.compile`` until the request body shape is +extended. (The submitter CLI flags ``--profile`` / ``-o`` reach +``submit_remote`` and are POSTed in the body, where the server currently +ignores them via Pydantic's ``extra="ignore"`` default.) """ from __future__ import annotations @@ -51,6 +49,7 @@ from fastapi import APIRouter from fastapi.routing import APIRoute +from nemo_platform_plugin.authz import AuthzScope from nemo_platform_plugin.job import job_collection_path_for from nemo_platform_plugin.jobs.api_factory import ( JobRouteOption, @@ -74,6 +73,7 @@ def add_job_routes( job_result_routes: list[PlatformJobResultRoute] | None = None, generate_job_name: "Callable[..., str] | None" = None, default_profile: str = "default", + authz: AuthzScope | None = None, ) -> APIRouter: """Mount submit/list/get/delete routes for *job_cls* on a fresh router. @@ -101,9 +101,8 @@ def add_job_routes( provide a ``name``. Passthrough to the factory. default_profile: Profile label stamped onto each step of the compiled ``PlatformJobSpec`` when the plugin's ``compile`` - didn't set one explicitly. Matches the plan's default-profile - behavior; submitter-chosen ``--profile`` plumbing lands in a - follow-up MR that extends ``BaseJobRequest``. + didn't set one explicitly. Submitter-chosen ``--profile`` + plumbing is not yet wired through ``BaseJobRequest``. Returns: An :class:`APIRouter` with the standard job endpoints mounted. @@ -139,6 +138,7 @@ def add_job_routes( route_options=route_options, job_result_routes=job_result_routes, generate_job_name=generate_job_name, + authz=authz, ) return _rebase_job_collection_routes(router, job_collection_path_for(job_cls)) @@ -260,8 +260,8 @@ def _adapt_compile( The factory calls ``compiler(workspace, original_spec, transformed_spec, entity_client, job_name, sdk)``. :meth:`NemoJob.compile` is an ``async classmethod`` that uses kwargs and also accepts - ``profile`` / ``options`` — phase 1 MR 1.1b passes ``None`` for - both (body-field wiring is a follow-up). After ``compile`` returns, + ``profile`` / ``options``; both are passed as ``None`` until the + request body shape threads them through. After ``compile`` returns, the adapter applies :func:`stamp_profile` with ``default_profile``. Missing-override errors from the ``NemoJob.compile`` base marker diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/service.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/service.py index f4db9854ef..2eb2436950 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/service.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/service.py @@ -41,7 +41,7 @@ async def health() -> dict[str, str]: from fastapi import APIRouter from nemo_platform_plugin._base import _NamedPlugin -from nemo_platform_plugin.authz import AuthzContribution +from nemo_platform_plugin.authz import Permission from starlette.requests import Request from starlette.responses import Response @@ -116,16 +116,18 @@ async def on_shutdown(self) -> None: The default implementation does nothing. """ - @classmethod - def get_authz_contribution(cls) -> AuthzContribution | None: - """Optional authorization policy for routes under ``/apis//``. + def extra_permissions(self) -> list[Permission]: + """Permissions this service owns that are *not* attached to a route. - Override as a **classmethod** on the :class:`NemoService` subclass (``discover_services`` - loads classes, not instances). Return - :class:`~nemo_platform_plugin.authz.AuthzContribution` or register a ``nemo.authz`` - entry point. Default: no plugin-specific authz. + The permission catalog is normally derived entirely from the + :func:`~nemo_platform_plugin.authz.path_rule` rules on ``get_routers()``. Override + this only for permissions with no 1:1 route — e.g. ones checked in middleware, or + declared ahead of the route that will reference them. These are merged into the + derived catalog (and its default role grants) alongside the route-derived ones. + + Default: none. """ - return None + return [] def get_exception_handlers(self) -> dict[type[Exception], ExceptionHandler]: """Return a mapping of exception types to handler functions. diff --git a/packages/nemo_platform_plugin/tests/test_authz.py b/packages/nemo_platform_plugin/tests/test_authz.py index 6f43fcd490..44351f884c 100644 --- a/packages/nemo_platform_plugin/tests/test_authz.py +++ b/packages/nemo_platform_plugin/tests/test_authz.py @@ -3,41 +3,23 @@ from __future__ import annotations -from pathlib import Path -from unittest.mock import MagicMock - import httpx -import pytest -import yaml -from fastapi.routing import APIRoute +from fastapi import APIRouter from nemo_platform_plugin.authz import ( - AuthzContribution, - AuthzEndpointMethod, - authz_for_workspace_job_collection, - combine_authz_contributions, + AuthzScope, + CallerKind, + Permission, + path_rule, + scopes_for, ) from nemo_platform_plugin.authz_discovery import ( - AUTHZ_GROUP, - _collect_from_plugin_surface, + _derive_service_contribution, + clear_plugin_authz_cache, discover_authz_contributions, ) -from nemo_platform_plugin.authz_format import validate_static_authz_data -from nemo_platform_plugin.authz_merge import merge_authz_contributions -from nemo_platform_plugin.discovery import discover_services from nemo_platform_plugin.job import NemoJob from nemo_platform_plugin.scheduler import NemoJobScheduler -from nemo_platform_plugin.service import NemoService - - -def _example_automodel_authz() -> AuthzContribution: - """Example policy for a customization job collection (see authz module docstring).""" - return authz_for_workspace_job_collection( - api_area="customization", - collection_suffix="/automodel/jobs", - permission_prefix="customization.automodel.jobs", - include_healthz=True, - healthz_suffix="/automodel/healthz", - ) +from nemo_platform_plugin.service import NemoService, RouterSpec class _ExampleSubmitJob(NemoJob): @@ -51,155 +33,166 @@ def run(self, config: dict) -> dict: _ExampleSubmitJob.__module__ = "example_plugin.jobs.example_submit" -def test_authz_for_workspace_job_collection_paths() -> None: - contrib = _example_automodel_authz() - assert "/apis/customization/v2/workspaces/{workspace}/automodel/jobs" in contrib.endpoints - post = contrib.endpoints["/apis/customization/v2/workspaces/{workspace}/automodel/jobs"]["post"] - assert post.permissions == ["customization.automodel.jobs.create"] - assert "customization:write" in (post.scopes or []) - assert "customization.automodel.jobs.create" in contrib.permissions +class _FakeEntryPoint: + """Minimal EntryPoint stand-in: discover_plugin_authz only calls ``.load()`` / reads ``.name``.""" - endpoints = contrib.endpoints - cancel = endpoints["/apis/customization/v2/workspaces/{workspace}/automodel/jobs/{name}/cancel"]["post"] - status = endpoints["/apis/customization/v2/workspaces/{workspace}/automodel/jobs/{name}/status"]["get"] - download = endpoints["/apis/customization/v2/workspaces/{workspace}/automodel/jobs/{job}/results/{name}/download"][ - "get" - ] - assert cancel.permissions == ["customization.automodel.jobs.cancel"] - assert status.permissions == ["customization.automodel.jobs.read"] - assert download.permissions == ["customization.automodel.jobs.read"] - assert "customization.automodel.jobs.cancel" in contrib.permissions + def __init__(self, name: str, loader) -> None: + self.name = name + self.value = f"test:{name}" + self._loader = loader + def load(self): + return self._loader() -def test_service_class_get_authz_contribution_without_instance() -> None: - """discover_services yields classes; get_authz_contribution must be a classmethod.""" - class _Svc(NemoService): - name = "example-svc" - dependencies = [] - - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - return authz_for_workspace_job_collection( - api_area="example-svc", - collection_suffix="/jobs", - permission_prefix="example-svc.jobs", - ) +def test_authz_scope_mints_scopes_from_oauth_area() -> None: + """Scope helpers mirror scopes_for(self.scope, ...); .child() keeps the parent area.""" + agents = AuthzScope("agents") + assert agents.read() == scopes_for("agents", write=False) == ["agents:read", "platform:read"] + assert agents.write() == scopes_for("agents", write=True) == ["agents:write", "platform:write"] + # child() deepens the permission namespace but the scope area (hence the scopes) is unchanged. + nested = agents.child("deployments") + assert nested.namespace == "agents.deployments" + assert nested.scope == "agents" + assert nested.write() == scopes_for("agents", write=True) - def get_routers(self): - return [] - contribs = _collect_from_plugin_surface({"example-svc": _Svc}, surface="nemo.services") - assert len(contribs) == 1 - assert "/apis/example-svc/v2/workspaces/{workspace}/jobs" in contribs[0].endpoints +def test_derive_contribution_composes_mounted_path(monkeypatch) -> None: + """A service's @path_rule routes derive to the final /apis// paths. + The permission catalog (id -> description) is derived from the Permission objects on the + routes — there is no separate declaration. + """ + router = APIRouter() -def test_combine_authz_contributions_merges_endpoints_and_permissions() -> None: - a = authz_for_workspace_job_collection( - api_area="customization", - collection_suffix="/automodel/jobs", - permission_prefix="customization.automodel.jobs", - ) - b = authz_for_workspace_job_collection( - api_area="customization", - collection_suffix="/unsloth/jobs", - permission_prefix="customization.unsloth.jobs", + @router.get("/v2/workspaces/{workspace}/items/{name}") + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[Permission("example.items.read", "Read example items")], + scopes=["example:read"], ) - merged = combine_authz_contributions(a, b) - assert "customization.automodel.jobs.create" in merged.permissions - assert "customization.unsloth.jobs.create" in merged.permissions - assert "/apis/customization/v2/workspaces/{workspace}/automodel/jobs" in merged.endpoints - assert "/apis/customization/v2/workspaces/{workspace}/unsloth/jobs" in merged.endpoints - - -def test_customization_router_authz_discovered_via_nemo_services(monkeypatch: pytest.MonkeyPatch) -> None: - """Customization hub aggregates backend authz through nemo.services discovery.""" - - class _FakeContributor: - def get_authz_contribution(self) -> AuthzContribution: - return _example_automodel_authz() - - class _CustomizationHub(NemoService): - name = "customization" - dependencies = [] - - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - from nemo_platform_plugin.discovery import discover_customization_contributors - - hub = AuthzContribution( - endpoints={ - "/apis/customization/healthz": { - "get": AuthzEndpointMethod(permissions=[], scopes=[]), - }, - }, - ) - backend_parts = [ - contribution - for contributor in discover_customization_contributors().values() - if isinstance((contribution := contributor.get_authz_contribution()), AuthzContribution) - ] - return combine_authz_contributions(hub, *backend_parts) - - def get_routers(self): - return [] + async def get_item(workspace: str, name: str) -> dict[str, str]: + return {"name": name} + + class _Svc(NemoService): + name = "example" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + def _fake_discover_entry_points(group: str) -> dict[str, _FakeEntryPoint]: + assert group == "nemo.services" + return {"example": _FakeEntryPoint("example", lambda: _Svc)} monkeypatch.setattr( "nemo_platform_plugin.discovery.discover_entry_points", - lambda group: {}, - ) - monkeypatch.setattr( - "nemo_platform_plugin.discovery.discover_services", - lambda: {"customization": _CustomizationHub}, - ) - monkeypatch.setattr( - "nemo_platform_plugin.discovery.discover_customization_contributors", - lambda: {"automodel": _FakeContributor()}, + _fake_discover_entry_points, ) - discover_authz_contributions.cache_clear() + clear_plugin_authz_cache() try: - contributions = discover_authz_contributions() + contribs = discover_authz_contributions() finally: - discover_authz_contributions.cache_clear() + clear_plugin_authz_cache() - assert len(contributions) == 1 - paths = set(contributions[0].endpoints.keys()) - assert "/apis/customization/healthz" in paths - assert "/apis/customization/v2/workspaces/{workspace}/automodel/jobs" in paths - assert "/apis/customization/v2/workspaces/{workspace}/automodel/healthz" in paths + assert len(contribs) == 1 + contrib = contribs[0] + assert contrib.permissions == {"example.items.read": "Read example items"} + path = "/apis/example/v2/workspaces/{workspace}/items/{name}" + assert set(contrib.endpoints[path]) == {"get"} + binding = contrib.endpoints[path]["get"] + assert binding.permissions == ["example.items.read"] + assert binding.scopes == ["example:read"] + assert binding.callers == ["principal"] -def test_nemo_authz_entry_point_discovered(monkeypatch: pytest.MonkeyPatch) -> None: - """Plugins can register authz via a nemo.authz entry point callable.""" - ep = MagicMock() - ep.load.return_value = _example_automodel_authz - monkeypatch.setattr( - "nemo_platform_plugin.discovery.discover_entry_points", - lambda group: {"automodel": ep} if group == AUTHZ_GROUP else {}, - ) - monkeypatch.setattr( - "nemo_platform_plugin.discovery.discover_services", - lambda: {}, - ) - discover_authz_contributions.cache_clear() - try: - contributions = discover_authz_contributions() - finally: - discover_authz_contributions.cache_clear() +def test_derive_service_only_route_emits_service_principal_callers() -> None: + router = APIRouter() + + @router.post("/v2/internal/sync") + @path_rule(callers=[CallerKind.SERVICE_PRINCIPAL]) + async def sync() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, problems, _warnings = _derive_service_contribution(_Svc()) + assert problems == [] + binding = contrib.endpoints["/apis/svc/v2/internal/sync"]["post"] + assert binding.callers == ["service_principal"] + assert binding.permissions == [] + + +def test_derive_unions_callers_across_rules_with_shared_permissions() -> None: + router = APIRouter() + svc_read = Permission("svc.read", "Read") - assert len(contributions) == 1 - paths = set(contributions[0].endpoints.keys()) - assert "/apis/customization/v2/workspaces/{workspace}/automodel/jobs" in paths - assert "/apis/customization/v2/workspaces/{workspace}/automodel/healthz" in paths + @router.get("/v2/y") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[svc_read]) + @path_rule(callers=[CallerKind.SERVICE_PRINCIPAL], permissions=[svc_read]) + async def y() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, problems, _warnings = _derive_service_contribution(_Svc()) + assert problems == [] + binding = contrib.endpoints["/apis/svc/v2/y"]["get"] + assert binding.callers == ["principal", "service_principal"] + assert binding.permissions == ["svc.read"] + + +def test_derive_denies_route_with_or_of_distinct_permission_sets() -> None: + """v1 cannot represent (principal & permA) OR (service & permB): the route is denied + (fail-closed) with a recorded problem, without crashing the rest of the plugin.""" + router = APIRouter() + + @router.get("/v2/z") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Read svc")]) + @path_rule(callers=[CallerKind.SERVICE_PRINCIPAL], permissions=[Permission("svc.internal", "Internal svc")]) + async def z() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, problems, _warnings = _derive_service_contribution(_Svc()) + assert contrib.endpoints["/apis/svc/v2/z"]["get"].deny is True + assert any("distinct permission sets" in p for p in problems) + + +def test_derive_emits_deny_for_unruled_route() -> None: + router = APIRouter() + + @router.get("/v2/unruled") + async def unruled() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, problems, _warnings = _derive_service_contribution(_Svc()) + # Unruled routes are explicit-deny (fail-closed), never omitted. + assert contrib.endpoints["/apis/svc/v2/unruled"]["get"].deny is True + assert any("no @path_rule" in p for p in problems) def test_submit_remote_forwards_authorization_header() -> None: """Authenticated CLI submit passes Authorization to the protected job route.""" - capture: dict[str, dict[str, str]] = {} + captured: dict[str, str] = {} def handler(request: httpx.Request) -> httpx.Response: - capture["headers"] = dict(request.headers) + captured.update(request.headers) return httpx.Response(200, json={"id": "job-123", "status": "queued"}) client = httpx.Client(transport=httpx.MockTransport(handler)) @@ -215,50 +208,4 @@ def handler(request: httpx.Request) -> httpx.Response: ) assert result == {"id": "job-123", "status": "queued"} - headers = capture["headers"] - assert headers.get("authorization") == "Bearer test-token" - - -def test_discovered_service_routes_have_authz_entries() -> None: - static_path = Path(__file__).resolve().parents[3] / "services/core/auth/src/nmp/core/auth/assets/static-authz.yaml" - with static_path.open() as f: - static_authz = yaml.safe_load(f) - - discover_authz_contributions.cache_clear() - try: - contributions = discover_authz_contributions() - finally: - discover_authz_contributions.cache_clear() - - merged = merge_authz_contributions(static_authz, [contribution.to_dict() for contribution in contributions]) - validate_static_authz_data(merged) - endpoints = merged["authz"]["endpoints"] - - missing = sorted( - f"{method.upper()} {path}" - for path, method in _discovered_service_route_methods() - if method not in endpoints.get(path, {}) - ) - - assert missing == [] - - -def _discovered_service_route_methods() -> set[tuple[str, str]]: - route_methods: set[tuple[str, str]] = set() - for service_name, service_cls in discover_services().items(): - service = service_cls() - for spec in service.get_routers(): - prefix = f"/apis/{service_name}{spec.prefix}".rstrip("/") - for route in spec.router.routes: - if not isinstance(route, APIRoute): - continue - path = f"{prefix}{route.path}" - normalized_path = _normalize_route_path(path) - for method in route.methods or set(): - route_methods.add((normalized_path, method.lower())) - return route_methods - - -def _normalize_route_path(path: str) -> str: - # Authz policy uses placeholder names only; FastAPI route converters are an implementation detail. - return path.replace("{trailing_uri:path}", "{trailing_uri}") + assert captured.get("authorization") == "Bearer test-token" diff --git a/packages/nemo_platform_plugin/tests/test_authz_failmode.py b/packages/nemo_platform_plugin/tests/test_authz_failmode.py new file mode 100644 index 0000000000..ca8b943622 --- /dev/null +++ b/packages/nemo_platform_plugin/tests/test_authz_failmode.py @@ -0,0 +1,419 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Fail-closed derivation: invalid plugin routes derive to explicit DENY + reported errors. + +``_derive_service_contribution`` returns ``(contribution, errors, warnings)``. *Errors* are +deny-worthy (unruled routes, OR of distinct permission sets, duplicate ``(path, method)``, +malformed / cross-namespace permission ids, load/derivation failures). *Warnings* are +metadata-only (missing / conflicting permission descriptions) and never deny a route. +""" + +from __future__ import annotations + +from collections.abc import Callable + +import pytest +from fastapi import APIRouter +from nemo_platform_plugin.authz import ( + AuthzContribution, + AuthzEndpointMethod, + CallerKind, + Permission, + path_rule, +) +from nemo_platform_plugin.authz_discovery import ( + _derive_service_contribution, + _method_from_dict, + clear_plugin_authz_cache, + discover_plugin_authz, +) +from nemo_platform_plugin.service import NemoService, RouterSpec + + +class _FakeEntryPoint: + """Minimal ``importlib.metadata.EntryPoint`` stand-in for discovery tests. + + ``discover_plugin_authz`` enumerates ``discover_entry_points("nemo.services")`` and calls + ``ep.load()`` per entry in its own try/except, so a fake only needs ``name`` and ``load``. + """ + + def __init__(self, name: str, loader: Callable[[], object]) -> None: + self.name = name + self.value = f"test:{name}" + self._loader = loader + + def load(self) -> object: + return self._loader() + + +def _patch_services(monkeypatch: pytest.MonkeyPatch, entry_points: dict[str, _FakeEntryPoint]) -> None: + monkeypatch.setattr("nemo_platform_plugin.discovery.discover_entry_points", lambda group: entry_points) + + +def test_deny_field_round_trips_through_wire_format() -> None: + contrib = AuthzContribution(endpoints={"/x": {"get": AuthzEndpointMethod(permissions=[], deny=True)}}) + serialized = contrib.to_dict()["endpoints"]["/x"]["get"] + assert serialized["deny"] is True + assert _method_from_dict(serialized).deny is True + # Absent deny defaults to False (and is omitted from the wire form). + assert ( + "deny" + not in AuthzContribution(endpoints={"/x": {"get": AuthzEndpointMethod(permissions=["a"])}}).to_dict()[ + "endpoints" + ]["/x"]["get"] + ) + assert _method_from_dict({"permissions": []}).deny is False + + +def test_permissions_outside_service_namespace_fail_closed() -> None: + """A permission whose first segment isn't the service's own name is squatting: every route + is denied (fail-closed) and no permissions are contributed.""" + router = APIRouter() + + @router.get("/v2/x") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.x.read", "Read x")]) + async def x() -> None: ... + + @router.get("/v2/y") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("other.y.read", "Read y")]) + async def y() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, errors, _warnings = _derive_service_contribution(_Svc()) + assert contrib.endpoints["/apis/svc/v2/x"]["get"].deny is True + assert contrib.endpoints["/apis/svc/v2/y"]["get"].deny is True + assert contrib.permissions == {} + assert any("outside the service namespace" in e and "other.y.read" in e for e in errors) + + +def test_malformed_permission_id_fails_closed() -> None: + """A permission id that isn't dot-separated lowercase segments would 500 the bundle's + validate_static_authz_data if it reached the wire — so it fails the plugin closed here.""" + router = APIRouter() + + @router.get("/v2/x") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.bad_segment", "Read x")]) + async def x() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, errors, _warnings = _derive_service_contribution(_Svc()) + assert contrib.endpoints["/apis/svc/v2/x"]["get"].deny is True + assert contrib.permissions == {} + assert any("malformed permission id" in e for e in errors) + + +def test_duplicate_path_method_binding_fails_closed() -> None: + """Two handlers on the same (path, method): Starlette serves the first, but the derived + policy could describe the second. Refuse to guess — deny the pair and flag it.""" + router = APIRouter() + + @router.get("/v2/dup") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Read")]) + async def first() -> None: ... + + @router.get("/v2/dup") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Read")]) + async def second() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, errors, _warnings = _derive_service_contribution(_Svc()) + assert contrib.endpoints["/apis/svc/v2/dup"]["get"].deny is True + assert any("duplicate route binding" in e for e in errors) + + +def test_websocket_route_is_warned_not_denied() -> None: + """A WebSocket/ASGI route never reaches the BaseHTTPMiddleware PDP, so a derived deny would + be inert: it surfaces as a (non-deny) warning rather than an error, and the plugin's HTTP + routes are unaffected.""" + from fastapi import WebSocket + + router = APIRouter() + + @router.get("/v2/x") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Read")]) + async def x() -> None: ... + + @router.websocket("/v2/stream") + async def stream(ws: WebSocket) -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, errors, warnings = _derive_service_contribution(_Svc()) + assert errors == [] + assert any("not an APIRoute" in w for w in warnings) + assert contrib.endpoints["/apis/svc/v2/x"]["get"].deny is False + + +def test_missing_permission_description_is_warning_not_deny() -> None: + router = APIRouter() + + @router.get("/v2/x") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.x.read", "")]) + async def x() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, errors, warnings = _derive_service_contribution(_Svc()) + # A missing description is metadata-only: it's a warning, never an error, and the route + # still requires the right permission (it is not denied). + assert errors == [] + assert any("missing a description" in w for w in warnings) + assert contrib.endpoints["/apis/svc/v2/x"]["get"].deny is False + + +def test_conflicting_descriptions_for_same_id_is_warning() -> None: + router = APIRouter() + + @router.get("/v2/a") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Read")]) + async def a() -> None: ... + + @router.get("/v2/b") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Totally different")]) + async def b() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, errors, warnings = _derive_service_contribution(_Svc()) + # A description conflict is cosmetic — it must not deny a route or escalate the fail-mode. + assert errors == [] + assert any("conflicting descriptions" in w for w in warnings) + assert contrib.endpoints["/apis/svc/v2/a"]["get"].deny is False + assert contrib.endpoints["/apis/svc/v2/b"]["get"].deny is False + + +def test_extra_permissions_adds_non_route_permission_to_catalog() -> None: + """The escape hatch contributes a permission with no 1:1 route (e.g. middleware-checked).""" + router = APIRouter() + + @router.get("/v2/x") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Read")]) + async def x() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + def extra_permissions(self) -> list[Permission]: + return [Permission("svc.admin", "Administer svc")] + + contrib, errors, _warnings = _derive_service_contribution(_Svc()) + assert errors == [] + assert contrib.permissions == {"svc.read": "Read", "svc.admin": "Administer svc"} + # The extra permission has no endpoint binding. + assert all("svc.admin" not in m.permissions for methods in contrib.endpoints.values() for m in methods.values()) + + +def test_extra_permissions_failure_is_reported_routes_survive() -> None: + router = APIRouter() + + @router.get("/v2/x") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.read", "Read")]) + async def x() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + def extra_permissions(self) -> list[Permission]: + raise RuntimeError("boom") + + contrib, errors, _warnings = _derive_service_contribution(_Svc()) + # A broken hatch loses its extras but never invalidates the route-derived authz. + assert any("extra_permissions() raised" in e for e in errors) + assert contrib.endpoints["/apis/svc/v2/x"]["get"].deny is False + assert contrib.endpoints["/apis/svc/v2/x"]["get"].permissions == ["svc.read"] + + +def test_discover_plugin_authz_reports_unruled_route(monkeypatch: pytest.MonkeyPatch) -> None: + router = APIRouter() + + @router.get("/v2/unruled") + async def unruled() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + _patch_services(monkeypatch, {"svc": _FakeEntryPoint("svc", lambda: _Svc)}) + clear_plugin_authz_cache() + try: + results = discover_plugin_authz() + finally: + clear_plugin_authz_cache() + + assert len(results) == 1 + assert results[0].key == "svc" + assert results[0].problems + assert results[0].contribution.endpoints["/apis/svc/v2/unruled"]["get"].deny is True + + +def test_discover_plugin_authz_records_import_load_failure_as_degraded(monkeypatch: pytest.MonkeyPatch) -> None: + """An entry point whose ``load()`` raises (broken import) becomes a degraded result keyed + by the entry-point name, never silently dropped — per-entry-point isolation.""" + + def _boom() -> object: + raise ImportError("module not found") + + _patch_services(monkeypatch, {"broken": _FakeEntryPoint("broken", _boom)}) + clear_plugin_authz_cache() + try: + results = discover_plugin_authz() + finally: + clear_plugin_authz_cache() + + assert len(results) == 1 + assert results[0].key == "broken" + assert any("failed to load" in p for p in results[0].problems) + assert results[0].contribution.endpoints == {} + assert results[0].mount_name == "broken" + + +def test_discover_plugin_authz_records_derivation_failure_as_degraded(monkeypatch: pytest.MonkeyPatch) -> None: + """A class that loads but whose route walk raises is a degraded (derivation) failure, + distinct from a load failure — and the degraded fence still covers /apis/.""" + + class _BadSvc(NemoService): + name = "bad" + + def get_routers(self) -> list[RouterSpec]: + raise RuntimeError("boom") + + _patch_services(monkeypatch, {"bad": _FakeEntryPoint("bad", lambda: _BadSvc)}) + clear_plugin_authz_cache() + try: + results = discover_plugin_authz() + finally: + clear_plugin_authz_cache() + + assert len(results) == 1 + assert results[0].key == "bad" + assert any("failed to derive" in p for p in results[0].problems) + assert results[0].contribution.endpoints == {} + assert results[0].mount_name == "bad" + + +def test_degraded_result_is_not_cached_but_clean_is(monkeypatch: pytest.MonkeyPatch) -> None: + """A degraded derivation is never pinned for the process lifetime (that would 403 the + namespace until restart): the next call re-derives. An all-clean derivation is cached.""" + calls = {"n": 0} + + class _FlakySvc(NemoService): + name = "flaky" + fail = True + + def get_routers(self) -> list[RouterSpec]: + calls["n"] += 1 + if type(self).fail: + raise RuntimeError("transient boom") + router = APIRouter() + + @router.get("/v2/x") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("flaky.read", "Read")]) + async def x() -> None: ... + + return [RouterSpec(router)] + + _patch_services(monkeypatch, {"flaky": _FakeEntryPoint("flaky", lambda: _FlakySvc)}) + clear_plugin_authz_cache() + try: + first = discover_plugin_authz() + second = discover_plugin_authz() + # Degraded: both calls re-derive (the failure is not cached). + assert first[0].problems and second[0].problems + assert calls["n"] == 2 + + # Failure clears; the next derivation is clean and gets cached. + _FlakySvc.fail = False + third = discover_plugin_authz() + fourth = discover_plugin_authz() + assert third[0].problems == [] and fourth[0].problems == [] + assert calls["n"] == 3 # third derived; fourth served from cache + finally: + clear_plugin_authz_cache() + + +def test_clean_plugin_has_no_problems(monkeypatch: pytest.MonkeyPatch) -> None: + router = APIRouter() + + @router.get("/v2/items/{name}") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[Permission("svc.items.read", "Read items")]) + async def get_item(name: str) -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + _patch_services(monkeypatch, {"svc": _FakeEntryPoint("svc", lambda: _Svc)}) + clear_plugin_authz_cache() + try: + results = discover_plugin_authz() + finally: + clear_plugin_authz_cache() + + assert results[0].problems == [] + assert results[0].warnings == [] + assert results[0].contribution.endpoints["/apis/svc/v2/items/{name}"]["get"].deny is False + + +def test_malformed_route_denies_only_itself_not_the_plugin() -> None: + """A route whose rules can't collapse denies only itself — the plugin's other routes survive.""" + router = APIRouter() + svc_read = Permission("svc.read", "Read") + + @router.get("/v2/bad") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[svc_read]) + @path_rule(callers=[CallerKind.SERVICE_PRINCIPAL], permissions=[Permission("svc.internal", "Internal")]) + async def bad() -> None: ... + + @router.get("/v2/good") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[svc_read]) + async def good() -> None: ... + + class _Svc(NemoService): + name = "svc" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(router)] + + contrib, errors, _warnings = _derive_service_contribution(_Svc()) + assert contrib.endpoints["/apis/svc/v2/bad"]["get"].deny is True + assert contrib.endpoints["/apis/svc/v2/good"]["get"].deny is False + assert contrib.endpoints["/apis/svc/v2/good"]["get"].permissions == ["svc.read"] + assert any("distinct permission sets" in e for e in errors) diff --git a/packages/nemo_platform_plugin/tests/test_discovery.py b/packages/nemo_platform_plugin/tests/test_discovery.py index 37426557bf..d52151aa89 100644 --- a/packages/nemo_platform_plugin/tests/test_discovery.py +++ b/packages/nemo_platform_plugin/tests/test_discovery.py @@ -589,9 +589,6 @@ def get_routers(self) -> list[RouterSpec]: def get_cli(self) -> None: return None - def get_authz_contribution(self): - return None - def get_sdk_resources(self): return None @@ -616,9 +613,6 @@ def get_routers(self) -> list[RouterSpec]: def get_cli(self) -> None: return None - def get_authz_contribution(self): - return None - def get_sdk_resources(self): return None @@ -640,9 +634,6 @@ def get_routers(self) -> list[RouterSpec]: def get_cli(self) -> None: return None - def get_authz_contribution(self): - return None - def get_sdk_resources(self): return None diff --git a/packages/nemo_platform_plugin/tests/test_factory_authz.py b/packages/nemo_platform_plugin/tests/test_factory_authz.py new file mode 100644 index 0000000000..b2f6f31e4c --- /dev/null +++ b/packages/nemo_platform_plugin/tests/test_factory_authz.py @@ -0,0 +1,216 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization metadata stamped onto factory-generated job and function routes.""" + +from __future__ import annotations + +import pytest +from fastapi import APIRouter +from fastapi.routing import APIRoute +from nemo_platform_plugin.authz import ( + AuthzScope, + CallerKind, + get_path_rules, +) +from nemo_platform_plugin.authz_discovery import _derive_service_contribution +from nemo_platform_plugin.function import NemoFunction +from nemo_platform_plugin.functions.routes import add_function_routes +from nemo_platform_plugin.jobs.api_factory import ( + FileResultSerializer, + JobRouteOption, + PlatformJobResultRoute, + job_route_factory, +) +from nemo_platform_plugin.jobs.routes import _rebase_job_collection_routes +from nemo_platform_plugin.service import NemoService, RouterSpec +from pydantic import BaseModel + +_READ = ["customization:read", "platform:read"] +_WRITE = ["customization:write", "platform:write"] + + +class _Spec(BaseModel): + value: str = "x" + + +async def _compiler(*args: object, **kwargs: object) -> object: # never called at route-build time + raise NotImplementedError + + +def _rules_by_path_method(router: APIRouter) -> dict[tuple[str, str], list]: + """Map (path, lower-method) -> attached PathRules for every APIRoute in *router*.""" + out: dict[tuple[str, str], list] = {} + for route in router.routes: + if not isinstance(route, APIRoute): + continue + for method in route.methods or set(): + out[(route.path, method.lower())] = get_path_rules(route.endpoint) + return out + + +def _mounted_customization_jobs(**factory_kwargs) -> APIRouter: + """Build the customization job router the way production does: factory -> rebase -> mount.""" + factory_router = job_route_factory( + service_name="customization", + job_type="Widget", + job_input=_Spec, + platform_job_config_compiler=_compiler, + authz=AuthzScope("customization").child("jobs"), + **factory_kwargs, + ) + rebased = _rebase_job_collection_routes(factory_router, "/widget-jobs") + mounted = APIRouter() + mounted.include_router(rebased, prefix="/apis/customization/v2/workspaces/{workspace}") + return mounted + + +def _assert_single_rule(rules: list, perm: str, scopes: list[str]) -> None: + assert len(rules) == 1 + rule = rules[0] + assert rule.callers == [CallerKind.PRINCIPAL] + assert [p.id for p in rule.permissions] == [perm] + assert rule.scopes == scopes + + +def test_job_factory_stamps_every_core_route_exactly() -> None: + """Exact-set assertion: every CORE route carries the right rule, and nothing else is stamped.""" + rules = _rules_by_path_method(_mounted_customization_jobs()) + base = "/apis/customization/v2/workspaces/{workspace}/widget-jobs" + + # (path, method) -> (permission, scopes). The full CORE surface, including both + # /results read routes and the generic download closure. + expected = { + (base, "post"): ("customization.jobs.create", _WRITE), + (base, "get"): ("customization.jobs.list", _READ), + (f"{base}/{{name}}", "get"): ("customization.jobs.read", _READ), + (f"{base}/{{name}}", "delete"): ("customization.jobs.delete", _WRITE), + (f"{base}/{{name}}/status", "get"): ("customization.jobs.read", _READ), + (f"{base}/{{name}}/cancel", "post"): ("customization.jobs.cancel", _WRITE), + (f"{base}/{{name}}/logs", "get"): ("customization.jobs.read", _READ), + (f"{base}/{{name}}/results", "get"): ("customization.jobs.read", _READ), + (f"{base}/{{job}}/results/{{name}}", "get"): ("customization.jobs.read", _READ), + (f"{base}/{{job}}/results/{{name}}/download", "get"): ("customization.jobs.read", _READ), + } + + # Exact set — catches both a dropped stamp and an unexpected/extra stamped route. + assert set(rules) == set(expected) + for key, (perm, scopes) in expected.items(): + _assert_single_rule(rules[key], perm, scopes) + + +def test_job_factory_stamps_pause_resume_when_enabled() -> None: + rules = _rules_by_path_method( + _mounted_customization_jobs(route_options=[JobRouteOption.CORE, JobRouteOption.PAUSE_RESUME]) + ) + base = "/apis/customization/v2/workspaces/{workspace}/widget-jobs" + _assert_single_rule(rules[(f"{base}/{{name}}/pause", "post")], "customization.jobs.pause", _WRITE) + _assert_single_rule(rules[(f"{base}/{{name}}/resume", "post")], "customization.jobs.resume", _WRITE) + + +def test_job_factory_core_only_omits_pause_resume() -> None: + rules = _rules_by_path_method(_mounted_customization_jobs()) + assert not any(path.endswith(("/pause", "/resume")) for path, _ in rules) + + +def test_job_factory_stamps_explicit_result_download_closure() -> None: + rules = _rules_by_path_method( + _mounted_customization_jobs( + job_result_routes=[PlatformJobResultRoute(name="metrics", serializer=FileResultSerializer())] + ) + ) + base = "/apis/customization/v2/workspaces/{workspace}/widget-jobs" + _assert_single_rule( + rules[(f"{base}/{{job}}/results/metrics/download", "get")], + "customization.jobs.read", + _READ, + ) + + +def test_job_factory_no_namespace_is_inert() -> None: + factory_router = job_route_factory( + service_name="customization", + job_type="Bare", + job_input=_Spec, + platform_job_config_compiler=_compiler, + ) + for route in factory_router.routes: + if isinstance(route, APIRoute): + assert get_path_rules(route.endpoint) == [] + + +def test_job_factory_derivation_end_to_end() -> None: + class _Svc(NemoService): + name = "customization" + + def get_routers(self) -> list[RouterSpec]: + router = job_route_factory( + service_name="customization", + job_type="E2E", + job_input=_Spec, + platform_job_config_compiler=_compiler, + authz=AuthzScope("customization").child("jobs"), + ) + return [RouterSpec(router, prefix="/v2/workspaces/{workspace}")] + + contrib, problems, _warnings = _derive_service_contribution(_Svc()) + assert problems == [] + + # The catalog, derived from the routes, carries every id the stamped rules reference. + assert { + "customization.jobs.create", + "customization.jobs.list", + "customization.jobs.read", + "customization.jobs.delete", + "customization.jobs.cancel", + } <= set(contrib.permissions) + + collection = "/apis/customization/v2/workspaces/{workspace}/jobs" + assert contrib.endpoints[collection]["post"].permissions == ["customization.jobs.create"] + assert contrib.endpoints[collection]["post"].callers == ["principal"] + assert contrib.endpoints[f"{collection}/{{name}}"]["delete"].permissions == ["customization.jobs.delete"] + + +def test_add_function_routes_stamps_invoke_permission() -> None: + class _GreetFn(NemoFunction): + name = "greet" + spec_schema = _Spec + + async def run(self, spec: _Spec) -> dict[str, bool]: + return {"ok": True} + + router = add_function_routes(_GreetFn, authz=AuthzScope("example")) + + routes = [r for r in router.routes if isinstance(r, APIRoute)] + assert len(routes) == 1 + _assert_single_rule( + get_path_rules(routes[0].endpoint), + "example.greet", + ["example:write", "platform:write"], + ) + + +def test_add_function_routes_no_namespace_is_inert() -> None: + class _BareFn(NemoFunction): + name = "bare" + spec_schema = _Spec + + async def run(self, spec: _Spec) -> dict[str, bool]: + return {"ok": True} + + router = add_function_routes(_BareFn) + routes = [r for r in router.routes if isinstance(r, APIRoute)] + assert len(routes) == 1 + assert get_path_rules(routes[0].endpoint) == [] + + +def test_add_function_routes_description_without_authz_raises() -> None: + class _DescFn(NemoFunction): + name = "desc" + spec_schema = _Spec + + async def run(self, spec: _Spec) -> dict[str, bool]: + return {"ok": True} + + with pytest.raises(ValueError, match="permission_description requires authz"): + add_function_routes(_DescFn, permission_description="Invoke desc") # authz omitted diff --git a/packages/nemo_platform_plugin/tests/test_functions_routes.py b/packages/nemo_platform_plugin/tests/test_functions_routes.py index fc445e3c32..dda57a3dd6 100644 --- a/packages/nemo_platform_plugin/tests/test_functions_routes.py +++ b/packages/nemo_platform_plugin/tests/test_functions_routes.py @@ -427,3 +427,10 @@ def test_missing_spec_schema_raises_typeerror(self) -> None: ) with pytest.raises(TypeError, match="spec_schema is None"): add_function_routes(cls) + + def test_permission_description_without_authz_is_rejected(self) -> None: + # permission_description only takes effect when authz is set (it rides on the stamped + # permission); supplying it alone would be silently discarded and leave the route + # unruled (→ DENY at bundle time), so it must raise rather than fail open. + with pytest.raises(ValueError, match="permission_description requires authz"): + add_function_routes(_NonStreamingGreet, permission_description="Greet someone") diff --git a/packages/nemo_platform_plugin/tests/test_path_rule.py b/packages/nemo_platform_plugin/tests/test_path_rule.py new file mode 100644 index 0000000000..1ca155a8ab --- /dev/null +++ b/packages/nemo_platform_plugin/tests/test_path_rule.py @@ -0,0 +1,190 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authoring API: CallerKind, Permission, PermissionSet, PathRule, @path_rule, callers plumbing.""" + +from __future__ import annotations + +import inspect + +import pytest +from fastapi import APIRouter +from fastapi.routing import APIRoute +from nemo_platform_plugin.authz import ( + AuthzContribution, + AuthzEndpointMethod, + CallerKind, + PathRule, + Permission, + PermissionSet, + get_path_rules, + path_rule, + perm, + validate_caller_strings, +) +from nemo_platform_plugin.authz_discovery import _method_from_dict +from nemo_platform_plugin.service import NemoService, RouterSpec + +_READ = Permission("x.read", "Read x") + + +def test_caller_kind_values_and_no_anon() -> None: + assert CallerKind.PRINCIPAL == "principal" + assert CallerKind.SERVICE_PRINCIPAL == "service_principal" + assert {c.value for c in CallerKind} == {"principal", "service_principal"} + assert not hasattr(CallerKind, "ANON") + + +def test_permission_is_frozen_and_stringifies_to_id() -> None: + permission = Permission(id="agents.deployments.read", description="Read deployments") + assert permission.id == "agents.deployments.read" + assert str(permission) == "agents.deployments.read" + with pytest.raises(Exception): # frozen dataclass: FrozenInstanceError + permission.id = "other" # type: ignore[misc] + + +def test_permission_set_derives_ids_from_namespace_and_member_name() -> None: + class WidgetPerms(PermissionSet, namespace="widget"): + CREATE = perm("Create a widget") + BULK = perm("Bulk export", suffix="bulk.export") + + assert WidgetPerms.CREATE == Permission("widget.create", "Create a widget") + assert WidgetPerms.BULK == Permission("widget.bulk.export", "Bulk export") + assert set(WidgetPerms.all()) == {WidgetPerms.CREATE, WidgetPerms.BULK} + # A typo'd member doesn't exist — caught at access time, not at the policy layer. + assert not hasattr(WidgetPerms, "CRAETE") + + +def test_path_rule_returns_identical_function_and_signature() -> None: + async def handler(name: str, count: int = 0) -> dict[str, str]: + return {"name": name} + + before = inspect.signature(handler) + decorated = path_rule(callers=[CallerKind.PRINCIPAL], permissions=[_READ])(handler) + + # D5: same object, unchanged signature — never wrapped. + assert decorated is handler + assert inspect.signature(handler) == before + + +def test_path_rule_attaches_rule() -> None: + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[_READ], scopes=["x:read"]) + async def handler() -> None: ... + + rules = get_path_rules(handler) + assert rules == [ + PathRule(callers=[CallerKind.PRINCIPAL], permissions=[_READ], scopes=["x:read"]), + ] + + +def test_path_rule_stacks_as_or() -> None: + # Decorators apply bottom-up; both rules end up attached as OR alternatives. + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[_READ]) + @path_rule(callers=[CallerKind.SERVICE_PRINCIPAL]) + async def handler() -> None: ... + + rules = get_path_rules(handler) + assert len(rules) == 2 + assert {tuple(r.callers) for r in rules} == { + (CallerKind.SERVICE_PRINCIPAL,), + (CallerKind.PRINCIPAL,), + } + + +def test_path_rule_empty_callers_rejected() -> None: + with pytest.raises(ValueError, match="at least one caller"): + path_rule(callers=[]) + + +def test_path_rule_coerces_and_validates_caller_strings() -> None: + # Strings are coerced to CallerKind; unknown values raise. + @path_rule(callers=["principal"]) # type: ignore[list-item] + async def ok() -> None: ... + + assert get_path_rules(ok)[0].callers == [CallerKind.PRINCIPAL] + + with pytest.raises(ValueError): + path_rule(callers=["anon"]) # type: ignore[list-item] + + +def test_path_rule_rejects_bare_string_permission() -> None: + """A permission must be a Permission object. A bare string is rejected at decoration so a + typo (or a forgotten PermissionSet member) can't silently reach the policy layer.""" + with pytest.raises(TypeError, match="must be Permission objects"): + path_rule(callers=[CallerKind.PRINCIPAL], permissions=["x.read"]) # type: ignore[list-item] + + +def test_get_path_rules_empty_for_undecorated() -> None: + async def handler() -> None: ... + + assert get_path_rules(handler) == [] + + +def test_path_rule_survives_router_prefix_rebasing() -> None: + """D5: function-attached metadata must survive include_router(prefix=...) rebasing.""" + router = APIRouter() + items_read = Permission("items.read", "Read items") + + @router.get("/items/{name}") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[items_read]) + async def get_item(name: str) -> dict[str, str]: + return {"name": name} + + # Two prefix hops, as a real plugin mount does (/apis/ then workspace prefix). + inner = APIRouter() + inner.include_router(router, prefix="/v2/workspaces/{workspace}") + app_router = APIRouter() + app_router.include_router(inner, prefix="/apis/example") + + matching = [r for r in app_router.routes if isinstance(r, APIRoute) and r.path.endswith("/items/{name}")] + assert len(matching) == 1 + final_route = matching[0] + assert final_route.path == "/apis/example/v2/workspaces/{workspace}/items/{name}" + + rules = get_path_rules(final_route.endpoint) + assert len(rules) == 1 + assert rules[0].callers == [CallerKind.PRINCIPAL] + assert rules[0].permissions == [items_read] + + +def test_extra_permissions_default_empty() -> None: + class _Svc(NemoService): + name = "example-svc" + + def get_routers(self) -> list[RouterSpec]: + return [] + + assert _Svc().extra_permissions() == [] + + +def test_authz_endpoint_method_callers_roundtrip() -> None: + contrib = AuthzContribution( + endpoints={ + "/apis/x/v2/thing": { + "post": AuthzEndpointMethod( + permissions=["x.create"], + scopes=["x:write"], + callers=["service_principal"], + ), + "get": AuthzEndpointMethod(permissions=["x.read"]), + } + } + ) + serialized = contrib.to_dict() + post = serialized["endpoints"]["/apis/x/v2/thing"]["post"] + get = serialized["endpoints"]["/apis/x/v2/thing"]["get"] + + # Present callers serialize; absent callers are omitted (default ⇒ PRINCIPAL). + assert post["callers"] == ["service_principal"] + assert "callers" not in get + + # Round-trip through the parse chokepoint. + assert _method_from_dict(post).callers == ["service_principal"] + assert _method_from_dict(get).callers is None + + +def test_validate_caller_strings() -> None: + validate_caller_strings(None, context="t") # absence allowed + validate_caller_strings(["principal", "service_principal"], context="t") + with pytest.raises(ValueError, match="Invalid caller kind"): + validate_caller_strings(["anon"], context="t") diff --git a/packages/nmp_common/tests/auth/test_authz_format.py b/packages/nmp_common/tests/auth/test_authz_format.py index fe7b4f35e9..1987b01780 100644 --- a/packages/nmp_common/tests/auth/test_authz_format.py +++ b/packages/nmp_common/tests/auth/test_authz_format.py @@ -123,6 +123,35 @@ def test_invalid_role_permission(self) -> None: with pytest.raises(InvalidPermissionFormatError): validate_static_authz_data(data) + def test_valid_caller_kinds_pass(self) -> None: + data = { + "authz": { + "roles": {}, + "endpoints": { + "/apis/x/v2/thing": { + "get": {"permissions": ["x.read"], "callers": ["principal", "service_principal"]}, + } + }, + } + } + validate_static_authz_data(data) + + def test_invalid_caller_kind_raises(self) -> None: + # A hand-edited static-authz.yaml with an unknown caller kind is caught at load/build, + # rather than failing silently in policy checks (the caller validator is now wired in). + data = { + "authz": { + "roles": {}, + "endpoints": { + "/apis/x/v2/thing": { + "get": {"permissions": ["x.read"], "callers": ["anon"]}, + } + }, + } + } + with pytest.raises(ValueError, match="Invalid caller kind"): + validate_static_authz_data(data) + def test_shipped_static_authz_yaml_passes_validation() -> None: """Regression: real static-authz.yaml must satisfy format checks.""" diff --git a/packages/nmp_customization_common/src/nmp/customization_common/contributor/base.py b/packages/nmp_customization_common/src/nmp/customization_common/contributor/base.py index 8ea7ed2aab..65775e4ebe 100644 --- a/packages/nmp_customization_common/src/nmp/customization_common/contributor/base.py +++ b/packages/nmp_customization_common/src/nmp/customization_common/contributor/base.py @@ -4,7 +4,7 @@ """Base customization contributor. Both the unsloth and automodel contributors implement the same -``get_routers`` / ``get_cli`` / ``get_authz_contribution`` shape, differing only +``get_routers`` / ``get_cli`` shape, differing only in a handful of class-level values. This base collapses that; each backend's ``contributor.py`` keeps a small subclass at the entry-point path (``nemo__plugin.contributor:Contributor``). @@ -16,7 +16,7 @@ import typer from fastapi import APIRouter -from nemo_platform_plugin.authz import AuthzContribution, authz_for_workspace_job_collection +from nemo_platform_plugin.authz import AuthzScope, CallerKind, path_rule from nemo_platform_plugin.customization_contributor import CustomizationContributorSDKResources from nemo_platform_plugin.jobs.api_factory import JobRouteOption from nemo_platform_plugin.jobs.routes import add_job_routes @@ -55,11 +55,19 @@ def _get_config(self) -> Any: raise NotImplementedError def get_routers(self) -> list[RouterSpec]: - """Health endpoint + ``add_job_routes`` for the backend job collection.""" + """Health endpoint + ``add_job_routes`` for the backend job collection. + + HTTP authz is derived from the ``@path_rule``-decorated routes: the health + endpoint is authenticated-but-permissionless, and the job collection's + permissions (``customization..jobs.*``) are stamped onto the factory + routes via the ``customization`` :class:`AuthzScope` (scope ``customization``, + permission namespace deepened to ``customization..jobs``). + """ config = self._get_config() router = APIRouter() @router.get("/healthz") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[]) async def healthz() -> dict[str, str]: return {"backend": self.name, "status": "ok"} @@ -69,6 +77,7 @@ async def healthz() -> dict[str, str]: generate_job_name=self.generate_job_name, route_options=[JobRouteOption.CORE], default_profile=config.default_training_execution_profile, + authz=AuthzScope("customization").child(self.name, "jobs"), ) return [ @@ -103,16 +112,6 @@ def get_cli(self) -> typer.Typer: self.apply_cli_overrides(app) return app - def get_authz_contribution(self) -> AuthzContribution: - """Register the backend job routes with the platform authorization policy.""" - return authz_for_workspace_job_collection( - api_area="customization", - collection_suffix=f"/{self.name}/jobs", - permission_prefix=f"customization.{self.name}.jobs", - include_healthz=True, - healthz_suffix=f"/{self.name}/healthz", - ) - def get_sdk_resources(self) -> CustomizationContributorSDKResources | None: """Return SDK resource classes for ``client.customization.``. diff --git a/plugins/example-plugin/src/nemo_example_plugin/_perms.py b/plugins/example-plugin/src/nemo_example_plugin/_perms.py new file mode 100644 index 0000000000..4e72cf86df --- /dev/null +++ b/plugins/example-plugin/src/nemo_example_plugin/_perms.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Typed permission vocabulary for the example plugin's hand-written routes. + +Reference these constants in ``@path_rule``; the platform derives the permission catalog +from the routes. The ``greet`` / ``count`` function permissions are stamped onto the +factory routes (see ``get_routers``), so they are not declared here. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import PermissionSet, perm + + +class ExampleHelloPerms(PermissionSet, namespace="example.hello"): + READ = perm("Read the example greeting") + + +class ExampleItemPerms(PermissionSet, namespace="example.items"): + CREATE = perm("Create example items") + LIST = perm("List example items") + READ = perm("Read an example items entry") + UPDATE = perm("Update an example items entry") + DELETE = perm("Delete an example items entry") + + +class ExampleMiddlewareConfigPerms(PermissionSet, namespace="example.middleware-configs"): + CREATE = perm("Create example middleware-configs") + LIST = perm("List example middleware-configs") + READ = perm("Read an example middleware-configs entry") + UPDATE = perm("Update an example middleware-configs entry") + DELETE = perm("Delete an example middleware-configs entry") diff --git a/plugins/example-plugin/src/nemo_example_plugin/authz.py b/plugins/example-plugin/src/nemo_example_plugin/authz.py new file mode 100644 index 0000000000..ae33863f7a --- /dev/null +++ b/plugins/example-plugin/src/nemo_example_plugin/authz.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""The example plugin's authz scope. + +The service and middleware route modules import :data:`SCOPE` so the plugin shares one +``AuthzScope("example")``. A dedicated module also avoids a service ↔ middleware import cycle. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import AuthzScope + +SCOPE = AuthzScope("example") diff --git a/plugins/example-plugin/src/nemo_example_plugin/middleware_service.py b/plugins/example-plugin/src/nemo_example_plugin/middleware_service.py index 1a1c97d0ee..84adfec816 100644 --- a/plugins/example-plugin/src/nemo_example_plugin/middleware_service.py +++ b/plugins/example-plugin/src/nemo_example_plugin/middleware_service.py @@ -25,7 +25,10 @@ import logging from fastapi import APIRouter, Depends, HTTPException, Query, status +from nemo_example_plugin._perms import ExampleMiddlewareConfigPerms +from nemo_example_plugin.authz import SCOPE from nemo_example_plugin.middleware_config import ExampleMiddlewareConfig +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import ( NemoEntitiesClient, NemoEntityConflictError, @@ -86,6 +89,11 @@ def build_middleware_config_router() -> APIRouter: status_code=status.HTTP_201_CREATED, summary="Create ExampleMiddlewareConfig", ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleMiddlewareConfigPerms.CREATE], + scopes=SCOPE.write(), + ) async def create_config( workspace: str, body: CreateExampleMiddlewareConfigRequest, @@ -125,6 +133,11 @@ async def create_config( response_model=list[ExampleMiddlewareConfig], summary="List ExampleMiddlewareConfigs", ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleMiddlewareConfigPerms.LIST], + scopes=SCOPE.read(), + ) async def list_configs( workspace: str, page: int = Query(default=1, ge=1), @@ -151,6 +164,11 @@ async def list_configs( response_model=ExampleMiddlewareConfig, summary="Get ExampleMiddlewareConfig", ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleMiddlewareConfigPerms.READ], + scopes=SCOPE.read(), + ) async def get_config( workspace: str, name: str, @@ -173,6 +191,11 @@ async def get_config( response_model=ExampleMiddlewareConfig, summary="Update ExampleMiddlewareConfig", ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleMiddlewareConfigPerms.UPDATE], + scopes=SCOPE.write(), + ) async def update_config( workspace: str, name: str, @@ -210,6 +233,11 @@ async def update_config( status_code=status.HTTP_204_NO_CONTENT, summary="Delete ExampleMiddlewareConfig", ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleMiddlewareConfigPerms.DELETE], + scopes=SCOPE.write(), + ) async def delete_config( workspace: str, name: str, diff --git a/plugins/example-plugin/src/nemo_example_plugin/service.py b/plugins/example-plugin/src/nemo_example_plugin/service.py index f18225d8e0..5453290ca2 100644 --- a/plugins/example-plugin/src/nemo_example_plugin/service.py +++ b/plugins/example-plugin/src/nemo_example_plugin/service.py @@ -31,6 +31,8 @@ from fastapi import APIRouter, Depends, HTTPException, Query, Request from fastapi.responses import Response +from nemo_example_plugin._perms import ExampleHelloPerms, ExampleItemPerms +from nemo_example_plugin.authz import SCOPE from nemo_example_plugin.config import ExampleConfig from nemo_example_plugin.core import say_hello from nemo_example_plugin.entities import ExampleItem @@ -45,6 +47,7 @@ UpdateExampleItemRequest, ) from nemo_platform_plugin.api.filters import make_filter_obj_dep +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import ( NemoEntitiesClient, NemoEntityConflictError, @@ -100,13 +103,21 @@ def get_routers(self) -> list[RouterSpec]: prefix="/v2/workspaces/{workspace}", ), RouterSpec( - add_function_routes(GreetFunction), + add_function_routes( + GreetFunction, + authz=SCOPE, + permission_description="Invoke the greet function", + ), tag="Example Functions", description="Non-streaming NemoFunction example.", prefix="/v2/workspaces/{workspace}", ), RouterSpec( - add_function_routes(CountFunction), + add_function_routes( + CountFunction, + authz=SCOPE, + permission_description="Invoke the count function", + ), tag="Example Functions", description="Streaming NDJSON NemoFunction example.", prefix="/v2/workspaces/{workspace}", @@ -128,6 +139,11 @@ def _build_hello_router() -> APIRouter: router = APIRouter() @router.get("/hello/{name}", response_model=HelloResponse) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleHelloPerms.READ], + scopes=SCOPE.read(), + ) async def hello(name: str) -> HelloResponse: """Greet a name. @@ -162,6 +178,7 @@ def _build_binary_router() -> APIRouter: _store: dict[str, bytes] = {} @router.put("/blob/{name}", status_code=200, response_model=BlobUploadResponse) + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[], scopes=SCOPE.write()) async def upload_blob(name: str, request: Request) -> BlobUploadResponse: """Accept raw binary and store it. Returns byte count.""" data = await request.body() @@ -169,6 +186,7 @@ async def upload_blob(name: str, request: Request) -> BlobUploadResponse: return BlobUploadResponse(name=name, size=len(data)) @router.get("/blob/{name}", response_class=Response) + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[], scopes=SCOPE.read()) async def download_blob(name: str) -> Response: """Return stored binary content.""" if name not in _store: @@ -226,6 +244,11 @@ def _build_items_router() -> APIRouter: status_code=201, tags=["Example Items"], ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleItemPerms.CREATE], + scopes=SCOPE.write(), + ) async def create_item( workspace: str, body: CreateExampleItemRequest, @@ -266,6 +289,11 @@ async def create_item( response_model=ExampleItemPage, tags=["Example Items"], ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleItemPerms.LIST], + scopes=SCOPE.read(), + ) async def list_items( workspace: str, page: int = Query(default=1, ge=1, description="Page number (1-indexed)."), @@ -351,6 +379,11 @@ async def list_items( response_model=ExampleItem, tags=["Example Items"], ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleItemPerms.READ], + scopes=SCOPE.read(), + ) async def get_item( workspace: str, name: str, @@ -379,6 +412,11 @@ async def get_item( response_model=ExampleItem, tags=["Example Items"], ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleItemPerms.UPDATE], + scopes=SCOPE.write(), + ) async def update_item( workspace: str, name: str, @@ -427,6 +465,11 @@ async def update_item( status_code=204, tags=["Example Items"], ) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[ExampleItemPerms.DELETE], + scopes=SCOPE.write(), + ) async def delete_item( workspace: str, name: str, diff --git a/plugins/example-plugin/tests/test_authz.py b/plugins/example-plugin/tests/test_authz.py new file mode 100644 index 0000000000..fef92b3746 --- /dev/null +++ b/plugins/example-plugin/tests/test_authz.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization derivation for the example plugin (every route ruled, no problems).""" + +from __future__ import annotations + +from nemo_example_plugin.service import ExampleService +from nemo_platform_plugin.authz_discovery import _derive_service_contribution + + +def test_example_authz_derivation_has_no_problems() -> None: + contrib, problems, _warnings = _derive_service_contribution(ExampleService()) + assert problems == [] + + # Minimal hello endpoint (non-workspace-scoped). + assert contrib.endpoints["/apis/example/hello/{name}"]["get"].permissions == ["example.hello.read"] + + # Items CRUD. + items = "/apis/example/v2/workspaces/{workspace}/items" + assert contrib.endpoints[items]["post"].permissions == ["example.items.create"] + assert contrib.endpoints[items]["get"].permissions == ["example.items.list"] + assert contrib.endpoints[f"{items}/{{name}}"]["get"].permissions == ["example.items.read"] + assert contrib.endpoints[f"{items}/{{name}}"]["patch"].permissions == ["example.items.update"] + assert contrib.endpoints[f"{items}/{{name}}"]["delete"].permissions == ["example.items.delete"] + + # Middleware-config CRUD (hyphenated namespace segment). + mw = "/apis/example/v2/workspaces/{workspace}/middleware-configs" + assert contrib.endpoints[mw]["post"].permissions == ["example.middleware-configs.create"] + assert contrib.endpoints[f"{mw}/{{name}}"]["delete"].permissions == ["example.middleware-configs.delete"] + + # Factory-stamped function routes: the permissions they reference must be declared. + assert {"example.greet", "example.count"} <= set(contrib.permissions) + + # Every route is PRINCIPAL and none is denied. + for methods in contrib.endpoints.values(): + for binding in methods.values(): + assert binding.callers == ["principal"] + assert binding.deny is False diff --git a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/_perms.py b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/_perms.py new file mode 100644 index 0000000000..413ce91274 --- /dev/null +++ b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/_perms.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Typed permission vocabulary for the agents plugin's hand-written routes. + +Sub-namespaces under ``agents`` for the agent CRUD, deployment lifecycle, and gateway +proxy routes. The five job collections' permissions (``agents.evaluate.*`` etc.) are +stamped onto the factory routes and derived from there, so they are not declared here. +Route handlers reference these constants in their ``@path_rule``; the platform derives +the catalog from the routes. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import PermissionSet, perm + + +class AgentPerms(PermissionSet, namespace="agents.agents"): + CREATE = perm("Create agents") + LIST = perm("List agents") + READ = perm("Read an agent") + DELETE = perm("Delete an agent") + + +class DeploymentPerms(PermissionSet, namespace="agents.deployments"): + CREATE = perm("Create agent deployments") + LIST = perm("List agent deployments") + READ = perm("Read an agent deployment, including its logs and log stream") + DELETE = perm("Delete an agent deployment") + + +class GatewayPerms(PermissionSet, namespace="agents.gateway"): + INVOKE = perm("Invoke a deployed agent through the gateway proxy") diff --git a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/agents.py b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/agents.py index 3cbf142ccc..a4818d7be1 100644 --- a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/agents.py +++ b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/agents.py @@ -13,7 +13,9 @@ import logging from fastapi import APIRouter, Depends, HTTPException, Query +from nemo_agents_plugin.api.v2._perms import AgentPerms from nemo_agents_plugin.api.v2.dependencies import get_entity_client +from nemo_agents_plugin.authz import SCOPE from nemo_agents_plugin.entities import Agent, AgentDeployment from nemo_agents_plugin.schema import ( AgentFilter, @@ -21,6 +23,7 @@ CreateAgentRequest, ) from nemo_platform_plugin.api.filters import make_filter_obj_dep +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityConflictError, NemoEntityNotFoundError from nemo_platform_plugin.schema import PaginationData @@ -37,6 +40,11 @@ @router.post("/agents", response_model=Agent, status_code=201, tags=["Agents"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AgentPerms.CREATE], + scopes=SCOPE.write(), +) async def create_agent( workspace: str, body: CreateAgentRequest, @@ -64,6 +72,11 @@ async def create_agent( @router.get("/agents", response_model=AgentPage, tags=["Agents"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AgentPerms.LIST], + scopes=SCOPE.read(), +) async def list_agents( workspace: str, page: int = Query(default=1, ge=1), @@ -97,6 +110,11 @@ async def list_agents( @router.get("/agents/{name}", response_model=Agent, tags=["Agents"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AgentPerms.READ], + scopes=SCOPE.read(), +) async def get_agent( workspace: str, name: str, @@ -117,6 +135,11 @@ async def get_agent( @router.delete("/agents/{name}", status_code=204, tags=["Agents"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AgentPerms.DELETE], + scopes=SCOPE.write(), +) async def delete_agent( workspace: str, name: str, diff --git a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployment_logs.py b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployment_logs.py index c10a30c4c6..2fb5cb02d3 100644 --- a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployment_logs.py +++ b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployment_logs.py @@ -37,9 +37,12 @@ from fastapi import APIRouter, Depends, HTTPException, Query, Request from fastapi.responses import StreamingResponse +from nemo_agents_plugin.api.v2._perms import DeploymentPerms from nemo_agents_plugin.api.v2.dependencies import get_entity_client +from nemo_agents_plugin.authz import SCOPE from nemo_agents_plugin.entities import AgentDeployment from nemo_agents_plugin.runner.registry import get_runner_backend +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityNotFoundError from pydantic import BaseModel, Field @@ -148,6 +151,11 @@ def _read_tail(path: Path, n: int) -> tuple[list[str], int]: response_model=DeploymentLogsResponse, tags=["Agent Deployments"], ) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[DeploymentPerms.READ], + scopes=SCOPE.read(), +) async def get_deployment_logs( workspace: str, name: str, @@ -241,6 +249,11 @@ async def _stream_log_lines( @router.get("/deployments/{name}/logs/stream", tags=["Agent Deployments"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[DeploymentPerms.READ], + scopes=SCOPE.read(), +) async def stream_deployment_logs( workspace: str, name: str, diff --git a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployments.py b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployments.py index a1828bc44e..de9a7eea6e 100644 --- a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployments.py +++ b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/deployments.py @@ -20,7 +20,9 @@ import secrets from fastapi import APIRouter, Depends, HTTPException, Query +from nemo_agents_plugin.api.v2._perms import DeploymentPerms from nemo_agents_plugin.api.v2.dependencies import get_entity_client +from nemo_agents_plugin.authz import SCOPE from nemo_agents_plugin.entities import Agent, AgentDeployment from nemo_agents_plugin.schema import ( CreateDeploymentRequest, @@ -29,6 +31,7 @@ ) from nemo_agents_plugin.utils import inject_default_model, inject_gateway_url, inject_nemo_trace_fields from nemo_platform_plugin.api.filters import make_filter_obj_dep +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityConflictError, NemoEntityNotFoundError from nemo_platform_plugin.schema import PaginationData @@ -40,6 +43,11 @@ @router.post("/deployments", response_model=AgentDeployment, status_code=201, tags=["Agent Deployments"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[DeploymentPerms.CREATE], + scopes=SCOPE.write(), +) async def create_deployment( workspace: str, body: CreateDeploymentRequest, @@ -93,6 +101,11 @@ async def create_deployment( @router.get("/deployments", response_model=DeploymentPage, tags=["Agent Deployments"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[DeploymentPerms.LIST], + scopes=SCOPE.read(), +) async def list_deployments( workspace: str, page: int = Query(default=1, ge=1), @@ -126,6 +139,11 @@ async def list_deployments( @router.get("/deployments/{name}", response_model=AgentDeployment, tags=["Agent Deployments"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[DeploymentPerms.READ], + scopes=SCOPE.read(), +) async def get_deployment( workspace: str, name: str, @@ -146,6 +164,11 @@ async def get_deployment( @router.delete("/deployments/{name}", status_code=204, tags=["Agent Deployments"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[DeploymentPerms.DELETE], + scopes=SCOPE.write(), +) async def delete_deployment( workspace: str, name: str, diff --git a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/gateway.py b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/gateway.py index eb0c658ac7..c7ee1ede1d 100644 --- a/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/gateway.py +++ b/plugins/nemo-agents/src/nemo_agents_plugin/api/v2/gateway.py @@ -33,16 +33,24 @@ import httpx from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import StreamingResponse +from nemo_agents_plugin.api.v2._perms import GatewayPerms from nemo_agents_plugin.api.v2.dependencies import get_entity_client +from nemo_agents_plugin.authz import SCOPE from nemo_agents_plugin.entities import Agent, AgentDeployment +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityNotFoundError logger = logging.getLogger(__name__) router = APIRouter() -# HTTP methods to forward through the proxy -_PROXY_METHODS = ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"] +# HTTP methods forwarded through the proxy, split by authorization scope. Read-like methods +# require only agents:read; mutating methods require agents:write. This mirrors the Inference +# Gateway's proxy precedent (its GET proxy is scoped inference:read), so a read-scoped token is +# not denied on read-only proxy calls. Both groups still require the same agents.gateway.invoke +# permission. +_PROXY_READ_METHODS = ["GET", "HEAD", "OPTIONS"] +_PROXY_WRITE_METHODS = ["POST", "PUT", "PATCH", "DELETE"] # Headers we strip before forwarding to the agent process (hop-by-hop + platform-internal) _HOP_BY_HOP = { @@ -61,46 +69,77 @@ } +async def _serve_agent_proxy( + workspace: str, + name: str, + trailing_uri: str, + request: Request, + entity_client: NemoEntitiesClient, +) -> StreamingResponse: + """Find the first ``running`` deployment for the named agent and forward the request to it. + + Returns ``503`` if no running deployment is found. Shared by the read/write route handlers, + which differ only in their authorization scope (``agents:read`` vs ``agents:write``). + """ + endpoint = await _resolve_agent_endpoint(name, workspace, entity_client) + return await _proxy(request, endpoint, trailing_uri, model_name=name) + + @router.api_route( "/agents/{name}/-/{trailing_uri:path}", - methods=_PROXY_METHODS, + methods=_PROXY_READ_METHODS, tags=["Agent Gateway"], include_in_schema=False, ) -async def proxy_by_agent_name( +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[GatewayPerms.INVOKE], + scopes=SCOPE.read(), +) +async def proxy_by_agent_name_read( workspace: str, name: str, trailing_uri: str, request: Request, entity_client: NemoEntitiesClient = Depends(get_entity_client), ) -> StreamingResponse: - """Proxy a request to the active deployment for *agent name*. - - The gateway finds the first ``running`` deployment for the named agent - and forwards the request to it. Returns ``503`` if no running deployment - is found. - """ - endpoint = await _resolve_agent_endpoint(name, workspace, entity_client) - return await _proxy(request, endpoint, trailing_uri, model_name=name) + """Read-scoped (GET/HEAD/OPTIONS) proxy to the active deployment for *agent name*.""" + return await _serve_agent_proxy(workspace, name, trailing_uri, request, entity_client) @router.api_route( - "/deployments/{name}/-/{trailing_uri:path}", - methods=_PROXY_METHODS, + "/agents/{name}/-/{trailing_uri:path}", + methods=_PROXY_WRITE_METHODS, tags=["Agent Gateway"], include_in_schema=False, ) -async def proxy_by_deployment_name( +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[GatewayPerms.INVOKE], + scopes=SCOPE.write(), +) +async def proxy_by_agent_name_write( workspace: str, name: str, trailing_uri: str, request: Request, entity_client: NemoEntitiesClient = Depends(get_entity_client), +) -> StreamingResponse: + """Write-scoped (POST/PUT/PATCH/DELETE) proxy to the active deployment for *agent name*.""" + return await _serve_agent_proxy(workspace, name, trailing_uri, request, entity_client) + + +async def _serve_deployment_proxy( + workspace: str, + name: str, + trailing_uri: str, + request: Request, + entity_client: NemoEntitiesClient, ) -> StreamingResponse: """Proxy a request directly to the named deployment. - Returns ``404`` if the deployment doesn't exist, ``503`` if it isn't - currently running. + Returns ``404`` if the deployment doesn't exist, ``503`` if it isn't currently running. + Shared by the read/write route handlers, which differ only in authorization scope. """ try: dep = await entity_client.get(AgentDeployment, name=name, workspace=workspace) @@ -120,6 +159,50 @@ async def proxy_by_deployment_name( return await _proxy(request, dep.endpoint, trailing_uri, model_name=name) +@router.api_route( + "/deployments/{name}/-/{trailing_uri:path}", + methods=_PROXY_READ_METHODS, + tags=["Agent Gateway"], + include_in_schema=False, +) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[GatewayPerms.INVOKE], + scopes=SCOPE.read(), +) +async def proxy_by_deployment_name_read( + workspace: str, + name: str, + trailing_uri: str, + request: Request, + entity_client: NemoEntitiesClient = Depends(get_entity_client), +) -> StreamingResponse: + """Read-scoped (GET/HEAD/OPTIONS) proxy directly to the named deployment.""" + return await _serve_deployment_proxy(workspace, name, trailing_uri, request, entity_client) + + +@router.api_route( + "/deployments/{name}/-/{trailing_uri:path}", + methods=_PROXY_WRITE_METHODS, + tags=["Agent Gateway"], + include_in_schema=False, +) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[GatewayPerms.INVOKE], + scopes=SCOPE.write(), +) +async def proxy_by_deployment_name_write( + workspace: str, + name: str, + trailing_uri: str, + request: Request, + entity_client: NemoEntitiesClient = Depends(get_entity_client), +) -> StreamingResponse: + """Write-scoped (POST/PUT/PATCH/DELETE) proxy directly to the named deployment.""" + return await _serve_deployment_proxy(workspace, name, trailing_uri, request, entity_client) + + async def _resolve_agent_endpoint(name: str, workspace: str, entity_client: NemoEntitiesClient) -> str: """Find the endpoint of the first running deployment for the given agent.""" try: diff --git a/plugins/nemo-agents/src/nemo_agents_plugin/authz.py b/plugins/nemo-agents/src/nemo_agents_plugin/authz.py new file mode 100644 index 0000000000..c6dd83ffea --- /dev/null +++ b/plugins/nemo-agents/src/nemo_agents_plugin/authz.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""The agents plugin's authz scope. + +Route modules and the service import :data:`SCOPE` so the plugin shares one +``AuthzScope("agents")``. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import AuthzScope + +SCOPE = AuthzScope("agents") diff --git a/plugins/nemo-agents/src/nemo_agents_plugin/service.py b/plugins/nemo-agents/src/nemo_agents_plugin/service.py index a01d0ab183..a85ac4ed39 100644 --- a/plugins/nemo-agents/src/nemo_agents_plugin/service.py +++ b/plugins/nemo-agents/src/nemo_agents_plugin/service.py @@ -6,37 +6,69 @@ from __future__ import annotations import logging -from typing import ClassVar +from typing import ClassVar, NamedTuple -from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod +from nemo_agents_plugin.authz import SCOPE +from nemo_platform_plugin.job import NemoJob from nemo_platform_plugin.jobs.routes import add_job_routes from nemo_platform_plugin.service import NemoService, RouterSpec logger = logging.getLogger(__name__) -_SERVICE_NAME = "agents" -_READ_SCOPES = [f"{_SERVICE_NAME}:read", "platform:read"] -_WRITE_SCOPES = [f"{_SERVICE_NAME}:write", "platform:write"] - -def _read_method(permission: str) -> AuthzEndpointMethod: - return AuthzEndpointMethod(permissions=[permission], scopes=list(_READ_SCOPES)) - - -def _write_method(permission: str) -> AuthzEndpointMethod: - return AuthzEndpointMethod(permissions=[permission], scopes=list(_WRITE_SCOPES)) - - -def _read_methods(permission: str) -> dict[str, AuthzEndpointMethod]: - return {method: _read_method(permission) for method in ("get", "head")} - - -def _gateway_methods(permission: str) -> dict[str, AuthzEndpointMethod]: - read_methods = {"get", "head", "options"} - return { - method: _read_method(permission) if method in read_methods else _write_method(permission) - for method in ("delete", "get", "head", "options", "patch", "post", "put") - } +class _JobCollection(NamedTuple): + """One agents job collection — the single source of truth for both its permission + sub-namespace and its mounted router, so the two can't drift.""" + + job_cls: type[NemoJob] + subname: str # permission sub-namespace suffix -> agents..{create,...} + service_name: str | None # distinct jobs source (None ⇒ add_job_routes default) + description: str + + +# Sub-names are concise and stable and need not match the job's URL path segment: +# EvaluateAgentJob /jobs/evaluate -> agents.evaluate +# EvaluateSuiteJob /jobs/evaluate-suite -> agents.suite +# OptimizeSkillsJob /jobs/optimize-skills -> agents.optimize-skills +# AnalyzeBatchJob /jobs/analyze -> agents.analyze +# OptimizeAgentJob /jobs/optimize -> agents.optimize +# Distinct service_name per job type so each list endpoint filters to rows of its own type only +# (add_job_routes filters source=service_name); sharing the default would let /jobs/ pull in +# sibling-type rows and 500 on the wrong schema. +def _job_collections() -> list[_JobCollection]: + from nemo_agents_plugin.jobs.analyze_batch import AnalyzeBatchJob + from nemo_agents_plugin.jobs.evaluate_agent import EvaluateAgentJob + from nemo_agents_plugin.jobs.evaluate_suite import EvaluateSuiteJob + from nemo_agents_plugin.jobs.optimize_agent import OptimizeAgentJob + from nemo_agents_plugin.jobs.optimize_skills import OptimizeSkillsJob + + return [ + _JobCollection(EvaluateAgentJob, "evaluate", None, "Submit and track agent evaluation jobs"), + _JobCollection( + EvaluateSuiteJob, + "suite", + "nemo-agents-plugin-evaluate-suite", + "Submit and track evaluate-suite jobs (Harbor / NAT eval runner).", + ), + _JobCollection( + OptimizeSkillsJob, + "optimize-skills", + "nemo-agents-plugin-optimize-skills", + "Submit and track optimize-skills jobs (skills-improvement loop).", + ), + _JobCollection( + AnalyzeBatchJob, + "analyze", + "nemo-agents-plugin-analyze", + "Submit and track analyze jobs (eval-suite batch analysis).", + ), + _JobCollection( + OptimizeAgentJob, + "optimize", + "nemo-agents-plugin-optimize", + "Submit and track optimize jobs (prompt tuning, HPO).", + ), + ] class AgentsService(NemoService): @@ -52,99 +84,9 @@ class AgentsService(NemoService): not own the controller lifecycle. """ - name: ClassVar[str] = _SERVICE_NAME + name: ClassVar[str] = "agents" dependencies: ClassVar[list[str]] = ["entities", "auth", "secrets", "jobs", "files", "inference-gateway"] - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - """Authorization policy for agents plugin routes.""" - base = f"/apis/{cls.name}/v2/workspaces/{{workspace}}" - - agent_create = f"{cls.name}.agents.create" - agent_delete = f"{cls.name}.agents.delete" - agent_list = f"{cls.name}.agents.list" - agent_read = f"{cls.name}.agents.read" - deployment_create = f"{cls.name}.deployments.create" - deployment_delete = f"{cls.name}.deployments.delete" - deployment_list = f"{cls.name}.deployments.list" - deployment_read = f"{cls.name}.deployments.read" - gateway_exec = f"{cls.name}.gateway.exec" - job_cancel = f"{cls.name}.jobs.cancel" - job_create = f"{cls.name}.jobs.create" - job_delete = f"{cls.name}.jobs.delete" - job_list = f"{cls.name}.jobs.list" - job_read = f"{cls.name}.jobs.read" - - endpoints: dict[str, dict[str, AuthzEndpointMethod]] = { - f"{base}/agents": { - **_read_methods(agent_list), - "post": _write_method(agent_create), - }, - f"{base}/agents/{{name}}": { - "delete": _write_method(agent_delete), - **_read_methods(agent_read), - }, - f"{base}/agents/{{name}}/-/{{trailing_uri}}": _gateway_methods(gateway_exec), - f"{base}/deployments": { - **_read_methods(deployment_list), - "post": _write_method(deployment_create), - }, - f"{base}/deployments/{{name}}": { - "delete": _write_method(deployment_delete), - **_read_methods(deployment_read), - }, - f"{base}/deployments/{{name}}/-/{{trailing_uri}}": _gateway_methods(gateway_exec), - f"{base}/deployments/{{name}}/logs": _read_methods(deployment_read), - f"{base}/deployments/{{name}}/logs/stream": _read_methods(deployment_read), - } - - for job_name in ("evaluate", "evaluate-suite", "optimize-skills", "analyze", "optimize"): - jobs_base = f"{base}/jobs/{job_name}" - endpoints.update( - { - jobs_base: { - **_read_methods(job_list), - "post": _write_method(job_create), - }, - f"{jobs_base}/{{name}}": { - "delete": _write_method(job_delete), - **_read_methods(job_read), - }, - f"{jobs_base}/{{name}}/cancel": { - "post": _write_method(job_cancel), - }, - f"{jobs_base}/{{name}}/logs": _read_methods(job_read), - f"{jobs_base}/{{name}}/results": _read_methods(job_read), - f"{jobs_base}/{{name}}/status": _read_methods(job_read), - f"{jobs_base}/{{job}}/results/{{name}}": _read_methods(job_read), - f"{jobs_base}/{{job}}/results/{{name}}/download": _read_methods(job_read), - } - ) - - return AuthzContribution( - permissions={ - agent_create: "Create agents", - agent_delete: "Delete agents", - agent_list: "List agents", - agent_read: "Read agents", - deployment_create: "Create agent deployments", - deployment_delete: "Delete agent deployments", - deployment_list: "List agent deployments", - deployment_read: "Read agent deployments", - gateway_exec: "Execute agent gateway requests", - job_cancel: "Cancel agent jobs", - job_create: "Create agent jobs", - job_delete: "Delete agent jobs", - job_list: "List agent jobs", - job_read: "Read agent jobs", - }, - endpoints=endpoints, - role_permissions={ - "Viewer": [gateway_exec], - "Editor": [gateway_exec], - }, - ) - def get_routers(self) -> list[RouterSpec]: from nemo_agents_plugin.api.v2 import ( agents, @@ -152,14 +94,9 @@ def get_routers(self) -> list[RouterSpec]: deployments, gateway, ) - from nemo_agents_plugin.jobs.analyze_batch import AnalyzeBatchJob - from nemo_agents_plugin.jobs.evaluate_agent import EvaluateAgentJob - from nemo_agents_plugin.jobs.evaluate_suite import EvaluateSuiteJob - from nemo_agents_plugin.jobs.optimize_agent import OptimizeAgentJob - from nemo_agents_plugin.jobs.optimize_skills import OptimizeSkillsJob _prefix = "/v2/workspaces/{workspace}" - return [ + specs: list[RouterSpec] = [ RouterSpec(agents.router, tag="Agents", description="Agent CRUD", prefix=_prefix), RouterSpec(deployments.router, tag="Agent Deployments", description="Deployment lifecycle", prefix=_prefix), RouterSpec( @@ -171,40 +108,20 @@ def get_routers(self) -> list[RouterSpec]: RouterSpec( gateway.router, tag="Agent Gateway", description="Proxy to running agent deployments", prefix=_prefix ), - RouterSpec( - add_job_routes(EvaluateAgentJob), - tag="Agents", - description="Submit and track agent evaluation jobs", - prefix=_prefix, - ), - # Distinct service_name per job type so each list endpoint filters - # to rows of its own type only. add_job_routes filters by - # source=service_name; if all jobs shared the default service_name - # ("nemo-agents-plugin"), listing /jobs/evaluate would pull in rows - # from sibling types and 500 on Pydantic validation against the - # wrong schema. - RouterSpec( - add_job_routes(EvaluateSuiteJob, service_name="nemo-agents-plugin-evaluate-suite"), - tag="Agents", - description="Submit and track evaluate-suite jobs (Harbor / NAT eval runner).", - prefix=_prefix, - ), - RouterSpec( - add_job_routes(OptimizeSkillsJob, service_name="nemo-agents-plugin-optimize-skills"), - tag="Agents", - description="Submit and track optimize-skills jobs (skills-improvement loop).", - prefix=_prefix, - ), - RouterSpec( - add_job_routes(AnalyzeBatchJob, service_name="nemo-agents-plugin-analyze"), - tag="Agents", - description="Submit and track analyze jobs (eval-suite batch analysis).", - prefix=_prefix, - ), - RouterSpec( - add_job_routes(OptimizeAgentJob, service_name="nemo-agents-plugin-optimize"), - tag="Agents", - description="Submit and track optimize jobs (prompt tuning, HPO).", - prefix=_prefix, - ), ] + # Job-collection routers, derived from the single _job_collections() source so a new job + # can't be wired here but missed in the permission map (or vice versa). + for collection in _job_collections(): + specs.append( + RouterSpec( + add_job_routes( + collection.job_cls, + service_name=collection.service_name, + authz=SCOPE.child(collection.subname), + ), + tag="Agents", + description=collection.description, + prefix=_prefix, + ) + ) + return specs diff --git a/plugins/nemo-agents/tests/test_authz.py b/plugins/nemo-agents/tests/test_authz.py new file mode 100644 index 0000000000..cc4773034f --- /dev/null +++ b/plugins/nemo-agents/tests/test_authz.py @@ -0,0 +1,111 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization derivation for the agents plugin. + +Asserts that every mounted route carries a valid ``@path_rule`` whose permissions all +share the ``agents`` namespace (so ``_derive_service_contribution`` reports no problems +and derives the catalog from the routes), and spot-checks the shapes that matter: a CRUD +binding, the gateway proxy binding (PRINCIPAL + ``agents.gateway.invoke`` across the +wildcard path and every proxied method), and a job-factory binding. +""" + +from __future__ import annotations + +from nemo_agents_plugin.service import AgentsService +from nemo_platform_plugin.authz import AuthzContribution +from nemo_platform_plugin.authz_discovery import _derive_service_contribution + +_BASE = "/apis/agents/v2/workspaces/{workspace}" +_GATEWAY_AGENT = f"{_BASE}/agents/{{name}}/-/{{trailing_uri:path}}" +# Methods the gateway forwards, split by scope (see gateway._PROXY_READ_METHODS / +# _PROXY_WRITE_METHODS), lower-cased for the wire format. +_PROXY_READ_METHODS = {"get", "head", "options"} +_PROXY_WRITE_METHODS = {"post", "put", "patch", "delete"} +_PROXY_METHODS = _PROXY_READ_METHODS | _PROXY_WRITE_METHODS + + +def _contribution() -> AuthzContribution: + contrib, problems, _warnings = _derive_service_contribution(AgentsService()) + # No problems is the load-bearing assertion: every route is ruled and every + # referenced permission lives under the service's own ``agents`` namespace. + assert problems == [], problems + return contrib + + +def test_agents_service_derivation_has_no_problems() -> None: + contrib = _contribution() + # All derived permissions live under the agents namespace. + assert contrib.permissions + assert all(perm_id.startswith("agents.") for perm_id in contrib.permissions) + # Every derived permission carries a non-empty description. + assert all(desc for desc in contrib.permissions.values()) + + +def test_crud_binding_agent_create() -> None: + contrib = _contribution() + binding = contrib.endpoints[f"{_BASE}/agents"]["post"] + assert binding.permissions == ["agents.agents.create"] + assert binding.scopes == ["agents:write", "platform:write"] + assert binding.callers == ["principal"] + assert not binding.deny + # The corresponding permission id is declared with a description. + assert "agents.agents.create" in contrib.permissions + + +def test_crud_binding_deployment_read_covers_logs() -> None: + contrib = _contribution() + # The two log routes are read-only and share the deployments.read permission. + for path in (f"{_BASE}/deployments/{{name}}/logs", f"{_BASE}/deployments/{{name}}/logs/stream"): + binding = contrib.endpoints[path]["get"] + assert binding.permissions == ["agents.deployments.read"] + assert binding.scopes == ["agents:read", "platform:read"] + assert binding.callers == ["principal"] + + +def test_gateway_proxy_binding() -> None: + contrib = _contribution() + methods = contrib.endpoints[_GATEWAY_AGENT] + # The proxy spans the wildcard ``{trailing_uri:path}`` route across every forwarded method. + # All methods require agents.gateway.invoke, but read-like methods are agents:read-scoped + # and mutating methods agents:write-scoped, so a read-scoped token isn't denied on read-only + # proxy calls (mirrors the Inference Gateway's per-method proxy scopes). + assert set(methods) == _PROXY_METHODS + for method, binding in methods.items(): + assert binding.permissions == ["agents.gateway.invoke"], method + assert binding.callers == ["principal"], method + assert not binding.deny, method + expected_scopes = ( + ["agents:write", "platform:write"] if method in _PROXY_WRITE_METHODS else ["agents:read", "platform:read"] + ) + assert binding.scopes == expected_scopes, method + # The deployment-name proxy route is split identically. + deployment_gw = f"{_BASE}/deployments/{{name}}/-/{{trailing_uri:path}}" + assert set(contrib.endpoints[deployment_gw]) == _PROXY_METHODS + assert contrib.endpoints[deployment_gw]["post"].scopes == ["agents:write", "platform:write"] + assert contrib.endpoints[deployment_gw]["get"].scopes == ["agents:read", "platform:read"] + assert contrib.endpoints[deployment_gw]["post"].permissions == ["agents.gateway.invoke"] + # The coarse permission is declared. + assert "agents.gateway.invoke" in contrib.permissions + + +def test_job_factory_binding() -> None: + contrib = _contribution() + # evaluate-suite maps to the ``agents.suite`` sub-namespace; its collection + # POST is a create, item DELETE is a delete, both PRINCIPAL. + collection = f"{_BASE}/jobs/evaluate-suite" + create = contrib.endpoints[collection]["post"] + assert create.permissions == ["agents.suite.create"] + assert create.scopes == ["agents:write", "platform:write"] + assert create.callers == ["principal"] + + delete = contrib.endpoints[f"{collection}/{{name}}"]["delete"] + assert delete.permissions == ["agents.suite.delete"] + + # Every job-factory permission for all five collections is declared. + expected_job_perms = { + f"agents.{sub}.{verb}" + for sub in ("evaluate", "suite", "optimize-skills", "analyze", "optimize") + for verb in ("create", "list", "read", "delete", "cancel") + } + assert expected_job_perms <= set(contrib.permissions) diff --git a/plugins/nemo-agents/tests/unit/test_service.py b/plugins/nemo-agents/tests/unit/test_service.py index b1ad121fc2..6650b9c058 100644 --- a/plugins/nemo-agents/tests/unit/test_service.py +++ b/plugins/nemo-agents/tests/unit/test_service.py @@ -12,8 +12,6 @@ from nemo_agents_plugin.jobs.optimize_agent import OptimizeAgentJob from nemo_agents_plugin.jobs.optimize_skills import OptimizeSkillsJob from nemo_agents_plugin.service import AgentsService -from nemo_platform_plugin.authz_format import validate_static_authz_data -from nemo_platform_plugin.authz_merge import merge_authz_contributions from nemo_platform_plugin.scheduler import submit_path_for @@ -32,56 +30,12 @@ def _mounted_routes() -> dict[str, set[str]]: def _mounted_post_paths() -> set[str]: """All POST paths mounted by AgentsService, regardless of which router owns them. - Avoids the description-string filter that earlier revisions used — copy-only - docstring edits in service.py should not break route-shape tests. + Filters by HTTP method rather than description string, so copy-only docstring + edits in service.py don't break route-shape tests. """ return {path for path, methods in _mounted_routes().items() if "POST" in methods} -def test_authz_contribution_matches_mounted_routes() -> None: - """Every agents API route should be registered with the PDP.""" - endpoints = AgentsService.get_authz_contribution().endpoints - - for path, methods in _mounted_routes().items(): - assert path in endpoints - for method in methods: - assert method.lower() in endpoints[path] - - -def test_authz_contribution_grants_studio_deployments_list_to_viewer() -> None: - """Regression for 403 on GET /apis/agents/v2/workspaces/{workspace}/deployments.""" - contribution = AgentsService.get_authz_contribution() - base_authz = { - "authz": { - "permissions": {}, - "roles": { - "Viewer": {"permissions": []}, - "Editor": {"permissions": []}, - }, - "endpoints": {}, - } - } - - merged = merge_authz_contributions(base_authz, [contribution.to_dict()]) - - validate_static_authz_data(merged) - viewer_permissions = merged["authz"]["roles"]["Viewer"]["permissions"] - editor_permissions = merged["authz"]["roles"]["Editor"]["permissions"] - endpoints = merged["authz"]["endpoints"] - - deployments_path = "/apis/agents/v2/workspaces/{workspace}/deployments" - assert endpoints[deployments_path]["get"]["permissions"] == ["agents.deployments.list"] - assert endpoints[deployments_path]["get"]["scopes"] == ["agents:read", "platform:read"] - assert endpoints[deployments_path]["head"]["permissions"] == ["agents.deployments.list"] - assert endpoints[deployments_path]["head"]["scopes"] == ["agents:read", "platform:read"] - job_result_path = "/apis/agents/v2/workspaces/{workspace}/jobs/evaluate/{job}/results/{name}" - assert endpoints[job_result_path]["head"]["permissions"] == ["agents.jobs.read"] - assert "agents.deployments.list" in viewer_permissions - assert "agents.deployments.read" in viewer_permissions - assert "agents.deployments.create" in editor_permissions - assert "agents.gateway.exec" in viewer_permissions - - def test_evaluate_job_route_matches_generated_submit_path() -> None: assert submit_path_for(EvaluateAgentJob, workspace="{workspace}") in _mounted_post_paths() diff --git a/plugins/nemo-anonymizer/src/nemo_anonymizer_plugin/service.py b/plugins/nemo-anonymizer/src/nemo_anonymizer_plugin/service.py index ecade31a85..c6ca07fccf 100644 --- a/plugins/nemo-anonymizer/src/nemo_anonymizer_plugin/service.py +++ b/plugins/nemo-anonymizer/src/nemo_anonymizer_plugin/service.py @@ -11,12 +11,6 @@ from data_designer_nemo.errors import NDDInternalError, NDDInvalidConfigError from fastapi import Request from nemo_anonymizer_plugin.app.errors import AnonymizerInternalError, AnonymizerInvalidConfigError -from nemo_platform_plugin.authz import ( - AuthzContribution, - authz_for_workspace_function, - authz_for_workspace_job_collection, - combine_authz_contributions, -) from nemo_platform_plugin.service import NemoService, RouterSpec from pydantic import ValidationError from starlette import status @@ -36,36 +30,27 @@ class AnonymizerService(NemoService): "inference-gateway", ] - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - return combine_authz_contributions( - authz_for_workspace_function( - api_area=cls.name, - function_suffix="/preview", - permission_prefix=f"{cls.name}.preview", - ), - authz_for_workspace_job_collection( - api_area=cls.name, - collection_suffix="/jobs/run", - permission_prefix=f"{cls.name}.jobs", - ), - ) - def get_routers(self) -> list[RouterSpec]: from nemo_anonymizer_plugin.functions.preview import PreviewFunction from nemo_anonymizer_plugin.jobs.run import RunJob + from nemo_platform_plugin.authz import AuthzScope from nemo_platform_plugin.functions.routes import add_function_routes from nemo_platform_plugin.jobs.routes import add_job_routes + anonymizer = AuthzScope("anonymizer") return [ RouterSpec( - add_function_routes(PreviewFunction), + add_function_routes( + PreviewFunction, + authz=anonymizer, + permission_description="Preview an Anonymizer config", + ), prefix="/v2/workspaces/{workspace}", tag="Anonymizer", description="Streaming preview of an Anonymizer config.", ), RouterSpec( - add_job_routes(RunJob), + add_job_routes(RunJob, authz=anonymizer), prefix="/v2/workspaces/{workspace}", tag="Anonymizer", description="Job endpoints", diff --git a/plugins/nemo-anonymizer/tests/unit/test_authz.py b/plugins/nemo-anonymizer/tests/unit/test_authz.py new file mode 100644 index 0000000000..b064fdab9a --- /dev/null +++ b/plugins/nemo-anonymizer/tests/unit/test_authz.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization derivation for the anonymizer plugin (every route ruled, no problems).""" + +from __future__ import annotations + +from nemo_anonymizer_plugin.service import AnonymizerService +from nemo_platform_plugin.authz_discovery import _derive_service_contribution + + +def test_anonymizer_authz_derivation_has_no_problems() -> None: + contrib, problems, _warnings = _derive_service_contribution(AnonymizerService()) + assert problems == [] + + # Job-factory perms plus the preview function perm, all declared. + assert { + "anonymizer.create", + "anonymizer.list", + "anonymizer.read", + "anonymizer.delete", + "anonymizer.cancel", + "anonymizer.preview", + } <= set(contrib.permissions) + + # Pin verb->permission so a mis-stamp (e.g. create<->read) is localized to this plugin. + jobs = "/apis/anonymizer/v2/workspaces/{workspace}/jobs/run" + assert contrib.endpoints[jobs]["post"].permissions == ["anonymizer.create"] + assert contrib.endpoints[jobs]["get"].permissions == ["anonymizer.list"] + assert contrib.endpoints[f"{jobs}/{{name}}"]["delete"].permissions == ["anonymizer.delete"] + preview = "/apis/anonymizer/v2/workspaces/{workspace}/preview" + assert contrib.endpoints[preview]["post"].permissions == ["anonymizer.preview"] + + # Every mounted route carries a valid rule (none falls through to deny). + assert contrib.endpoints + for methods in contrib.endpoints.values(): + for binding in methods.values(): + assert binding.deny is False diff --git a/plugins/nemo-auditor/src/nemo_auditor/api/v2/_perms.py b/plugins/nemo-auditor/src/nemo_auditor/api/v2/_perms.py new file mode 100644 index 0000000000..c691070be6 --- /dev/null +++ b/plugins/nemo-auditor/src/nemo_auditor/api/v2/_perms.py @@ -0,0 +1,29 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Typed permission vocabulary for the auditor plugin's CRUD routes. + +Two sub-namespaces under ``auditor`` (one per entity collection). Route handlers +reference these constants in their ``@path_rule``; the platform derives the permission +catalog from the routes, so there is no parallel list to keep in sync. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import PermissionSet, perm + + +class AuditConfigPerms(PermissionSet, namespace="auditor.configs"): + CREATE = perm("Create audit configs") + LIST = perm("List audit configs") + READ = perm("Read an audit configs entry") + UPDATE = perm("Update an audit configs entry") + DELETE = perm("Delete an audit configs entry") + + +class AuditTargetPerms(PermissionSet, namespace="auditor.targets"): + CREATE = perm("Create audit targets") + LIST = perm("List audit targets") + READ = perm("Read an audit targets entry") + UPDATE = perm("Update an audit targets entry") + DELETE = perm("Delete an audit targets entry") diff --git a/plugins/nemo-auditor/src/nemo_auditor/api/v2/configs.py b/plugins/nemo-auditor/src/nemo_auditor/api/v2/configs.py index 59596b259f..e9f7a600fa 100644 --- a/plugins/nemo-auditor/src/nemo_auditor/api/v2/configs.py +++ b/plugins/nemo-auditor/src/nemo_auditor/api/v2/configs.py @@ -13,8 +13,11 @@ from fastapi import APIRouter, Depends, HTTPException, Query from nemo_auditor.api.v2._filters import make_filter_dep +from nemo_auditor.api.v2._perms import AuditConfigPerms from nemo_auditor.api.v2.schemas import ConfigFilter, CreateAuditConfigRequest, UpdateAuditConfigRequest +from nemo_auditor.authz import SCOPE from nemo_auditor.entities import AuditConfig +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import ( NemoEntitiesClient, NemoEntityConflictError, @@ -31,6 +34,11 @@ @router.post("/configs", response_model=AuditConfig, status_code=201, tags=["Auditor Configs"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditConfigPerms.CREATE], + scopes=SCOPE.write(), +) async def create_config( workspace: str, body: CreateAuditConfigRequest, @@ -63,6 +71,11 @@ async def create_config( tags=["Auditor Configs"], openapi_extra=generate_openapi_extra_params(filter_schema=ConfigFilter), ) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditConfigPerms.LIST], + scopes=SCOPE.read(), +) async def list_configs( workspace: str, page: int = Query(default=1, ge=1), @@ -94,6 +107,11 @@ async def list_configs( @router.get("/configs/{name}", response_model=AuditConfig, tags=["Auditor Configs"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditConfigPerms.READ], + scopes=SCOPE.read(), +) async def get_config( workspace: str, name: str, @@ -113,6 +131,11 @@ async def get_config( @router.put("/configs/{name}", response_model=AuditConfig, tags=["Auditor Configs"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditConfigPerms.UPDATE], + scopes=SCOPE.write(), +) async def update_config( workspace: str, name: str, @@ -149,6 +172,11 @@ async def update_config( @router.delete("/configs/{name}", status_code=204, tags=["Auditor Configs"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditConfigPerms.DELETE], + scopes=SCOPE.write(), +) async def delete_config( workspace: str, name: str, diff --git a/plugins/nemo-auditor/src/nemo_auditor/api/v2/targets.py b/plugins/nemo-auditor/src/nemo_auditor/api/v2/targets.py index 3c272d0c08..a59fc59cd6 100644 --- a/plugins/nemo-auditor/src/nemo_auditor/api/v2/targets.py +++ b/plugins/nemo-auditor/src/nemo_auditor/api/v2/targets.py @@ -12,8 +12,11 @@ from fastapi import APIRouter, Depends, HTTPException, Query from nemo_auditor.api.v2._filters import make_filter_dep +from nemo_auditor.api.v2._perms import AuditTargetPerms from nemo_auditor.api.v2.schemas import CreateAuditTargetRequest, TargetFilter, UpdateAuditTargetRequest +from nemo_auditor.authz import SCOPE from nemo_auditor.entities import AuditTarget +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import ( NemoEntitiesClient, NemoEntityConflictError, @@ -30,6 +33,11 @@ @router.post("/targets", response_model=AuditTarget, status_code=201, tags=["Auditor Targets"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditTargetPerms.CREATE], + scopes=SCOPE.write(), +) async def create_target( workspace: str, body: CreateAuditTargetRequest, @@ -61,6 +69,11 @@ async def create_target( tags=["Auditor Targets"], openapi_extra=generate_openapi_extra_params(filter_schema=TargetFilter), ) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditTargetPerms.LIST], + scopes=SCOPE.read(), +) async def list_targets( workspace: str, page: int = Query(default=1, ge=1), @@ -92,6 +105,11 @@ async def list_targets( @router.get("/targets/{name}", response_model=AuditTarget, tags=["Auditor Targets"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditTargetPerms.READ], + scopes=SCOPE.read(), +) async def get_target( workspace: str, name: str, @@ -111,6 +129,11 @@ async def get_target( @router.put("/targets/{name}", response_model=AuditTarget, tags=["Auditor Targets"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditTargetPerms.UPDATE], + scopes=SCOPE.write(), +) async def update_target( workspace: str, name: str, @@ -146,6 +169,11 @@ async def update_target( @router.delete("/targets/{name}", status_code=204, tags=["Auditor Targets"]) +@path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[AuditTargetPerms.DELETE], + scopes=SCOPE.write(), +) async def delete_target( workspace: str, name: str, diff --git a/plugins/nemo-auditor/src/nemo_auditor/authz.py b/plugins/nemo-auditor/src/nemo_auditor/authz.py new file mode 100644 index 0000000000..d924d7c54c --- /dev/null +++ b/plugins/nemo-auditor/src/nemo_auditor/authz.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""The auditor plugin's authz scope. + +The route modules import :data:`SCOPE` so the plugin shares one ``AuthzScope("auditor")``. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import AuthzScope + +SCOPE = AuthzScope("auditor") diff --git a/plugins/nemo-auditor/src/nemo_auditor/service.py b/plugins/nemo-auditor/src/nemo_auditor/service.py index a08d70e898..e97f2e07d2 100644 --- a/plugins/nemo-auditor/src/nemo_auditor/service.py +++ b/plugins/nemo-auditor/src/nemo_auditor/service.py @@ -8,12 +8,9 @@ from typing import ClassVar from fastapi import APIRouter -from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.service import NemoService, RouterSpec -_READ_SCOPES = ["auditor:read", "platform:read"] -_WRITE_SCOPES = ["auditor:write", "platform:write"] - class AuditorPluginService(NemoService): """Auditor plugin service. Exposes healthz and CRUD over audit configs/targets.""" @@ -21,58 +18,13 @@ class AuditorPluginService(NemoService): name: ClassVar[str] = "auditor" dependencies: ClassVar[list[str]] = ["entities"] - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - base = f"/apis/{cls.name}/v2/workspaces/{{workspace}}" - endpoints: dict[str, dict[str, AuthzEndpointMethod]] = { - f"/apis/{cls.name}/v1/healthz": { - "get": AuthzEndpointMethod(permissions=[], scopes=[]), - }, - } - permissions: dict[str, str] = {} - for resource in ("configs", "targets"): - permissions.update( - { - f"{cls.name}.{resource}.create": f"Create auditor {resource}", - f"{cls.name}.{resource}.list": f"List auditor {resource}", - f"{cls.name}.{resource}.read": f"Read auditor {resource}", - f"{cls.name}.{resource}.update": f"Update auditor {resource}", - f"{cls.name}.{resource}.delete": f"Delete auditor {resource}", - } - ) - endpoints[f"{base}/{resource}"] = { - "post": AuthzEndpointMethod( - permissions=[f"{cls.name}.{resource}.create"], - scopes=_WRITE_SCOPES, - ), - "get": AuthzEndpointMethod( - permissions=[f"{cls.name}.{resource}.list"], - scopes=_READ_SCOPES, - ), - } - endpoints[f"{base}/{resource}/{{name}}"] = { - "get": AuthzEndpointMethod( - permissions=[f"{cls.name}.{resource}.read"], - scopes=_READ_SCOPES, - ), - "put": AuthzEndpointMethod( - permissions=[f"{cls.name}.{resource}.update"], - scopes=_WRITE_SCOPES, - ), - "delete": AuthzEndpointMethod( - permissions=[f"{cls.name}.{resource}.delete"], - scopes=_WRITE_SCOPES, - ), - } - - return AuthzContribution(permissions=permissions, endpoints=endpoints) - def get_routers(self) -> list[RouterSpec]: from nemo_auditor.api.v2 import configs, targets healthz_router = APIRouter() @healthz_router.get("/healthz") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[], scopes=[]) async def healthz() -> dict[str, object]: return { "plugin": self.name, diff --git a/plugins/nemo-auditor/tests/test_authz.py b/plugins/nemo-auditor/tests/test_authz.py new file mode 100644 index 0000000000..c7b673d286 --- /dev/null +++ b/plugins/nemo-auditor/tests/test_authz.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization derivation for the auditor plugin (every route ruled, no problems).""" + +from __future__ import annotations + +from nemo_auditor.service import AuditorPluginService +from nemo_platform_plugin.authz_discovery import _derive_service_contribution + + +def test_auditor_authz_derivation_has_no_problems() -> None: + contrib, problems, _warnings = _derive_service_contribution(AuditorPluginService()) + assert problems == [] + + # healthz: authenticated, no permission required. + healthz = contrib.endpoints["/apis/auditor/v1/healthz"]["get"] + assert healthz.callers == ["principal"] + assert healthz.permissions == [] + + # configs CRUD. + configs = "/apis/auditor/v2/workspaces/{workspace}/configs" + assert contrib.endpoints[configs]["post"].permissions == ["auditor.configs.create"] + assert contrib.endpoints[configs]["get"].permissions == ["auditor.configs.list"] + assert contrib.endpoints[f"{configs}/{{name}}"]["get"].permissions == ["auditor.configs.read"] + assert contrib.endpoints[f"{configs}/{{name}}"]["put"].permissions == ["auditor.configs.update"] + assert contrib.endpoints[f"{configs}/{{name}}"]["delete"].permissions == ["auditor.configs.delete"] + + # targets CRUD: spot-check + every referenced permission is declared. + targets = "/apis/auditor/v2/workspaces/{workspace}/targets" + assert contrib.endpoints[targets]["post"].permissions == ["auditor.targets.create"] + assert {"auditor.targets.read", "auditor.targets.delete"} <= set(contrib.permissions) + + # All routes are PRINCIPAL (no service-only routes in this plugin). + for methods in contrib.endpoints.values(): + for binding in methods.values(): + assert binding.callers == ["principal"] + assert binding.deny is False diff --git a/plugins/nemo-customizer/src/nemo_customizer/router.py b/plugins/nemo-customizer/src/nemo_customizer/router.py index 2c471e381e..d804c14c4a 100644 --- a/plugins/nemo-customizer/src/nemo_customizer/router.py +++ b/plugins/nemo-customizer/src/nemo_customizer/router.py @@ -8,7 +8,7 @@ from typing import ClassVar from fastapi import APIRouter -from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod, combine_authz_contributions +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.customization_contributor import CustomizationContributorDiscoveryError from nemo_platform_plugin.discovery import ( CUSTOMIZATION_CONTRIBUTORS_GROUP, @@ -60,32 +60,6 @@ def _assert_no_route_collisions(contributors: dict[str, object]) -> None: seen[op] = key -def _hub_authz_contribution() -> AuthzContribution: - """Authz for the customization router hub (authenticated health check only).""" - return AuthzContribution( - endpoints={ - "/apis/customization/healthz": { - "get": AuthzEndpointMethod(permissions=[], scopes=[]), - }, - }, - ) - - -def _authz_from_contributors(contributors: dict[str, object]) -> AuthzContribution | None: - """Collect and merge authz from installed customization backends.""" - backend_parts: list[AuthzContribution] = [] - for contributor in contributors.values(): - getter = getattr(contributor, "get_authz_contribution", None) - if not callable(getter): - continue - contrib = getter() - if contrib is not None: - backend_parts.append(contrib) - if not backend_parts: - return None - return combine_authz_contributions(_hub_authz_contribution(), *backend_parts) - - class CustomizationRouterService(NemoService): """Sole ``nemo.services`` owner for ``/apis/customization``.""" @@ -103,15 +77,11 @@ def __init__(self) -> None: _assert_no_route_collisions(self._contributors) type(self).dependencies = merge_router_dependencies(self._contributors) - @classmethod - def get_authz_contribution(cls) -> AuthzContribution | None: - """Merge backend contributor authz (automodel, unsloth, …) for policy discovery.""" - return _authz_from_contributors(discover_customization_contributors()) - def get_routers(self) -> list[RouterSpec]: router = APIRouter() @router.get("/healthz") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[]) async def healthz() -> dict[str, object]: return { "plugin": self.name, diff --git a/plugins/nemo-customizer/tests/test_router.py b/plugins/nemo-customizer/tests/test_router.py index d6af3e876f..1fab01c4a1 100644 --- a/plugins/nemo-customizer/tests/test_router.py +++ b/plugins/nemo-customizer/tests/test_router.py @@ -14,7 +14,6 @@ CustomizationRouterService, merge_router_dependencies, ) -from nemo_platform_plugin.authz import authz_for_workspace_job_collection from nemo_platform_plugin.service import RouterSpec @@ -145,60 +144,54 @@ def get_cli(self) -> typer.Typer: assert sorted(service._contributors.keys()) == ["automodel", "unsloth"] -def test_get_authz_contribution_merges_backend_contributors(monkeypatch: pytest.MonkeyPatch) -> None: - class _AutomodelContributor: - name: ClassVar[str] = "automodel" - dependencies: ClassVar[list[str]] = [] +def test_authz_derives_from_contributor_routes(monkeypatch: pytest.MonkeyPatch) -> None: + """The hub's authz is derived from the ``@path_rule``-decorated routes its + contributors mount — there is no separate ``get_authz_contribution`` declaration. - def get_authz_contribution(self) -> object: - return authz_for_workspace_job_collection( - api_area="customization", - collection_suffix="/automodel/jobs", - permission_prefix="customization.automodel.jobs", - include_healthz=True, - healthz_suffix="/automodel/healthz", - ) - - def get_routers(self) -> list[RouterSpec]: - return [] - - def get_cli(self) -> typer.Typer: - return typer.Typer() + Doubles as the Phase-0 derivation gate: the customization hub plus backends + must derive with no problems and no fail-closed DENY bindings. + """ + from nemo_platform_plugin.authz import CallerKind, Permission, path_rule + from nemo_platform_plugin.authz_discovery import _derive_service_contribution - class _UnslothContributor: - name: ClassVar[str] = "unsloth" - dependencies: ClassVar[list[str]] = [] + def _make_contributor(backend: str) -> object: + class _Contributor: + name: ClassVar[str] = backend + dependencies: ClassVar[list[str]] = [] - def get_authz_contribution(self) -> object: - return authz_for_workspace_job_collection( - api_area="customization", - collection_suffix="/unsloth/jobs", - permission_prefix="customization.unsloth.jobs", - ) + def get_routers(self) -> list[RouterSpec]: + router = APIRouter() + create_perm = Permission(f"customization.{backend}.jobs.create", f"Create {backend} jobs") - def get_routers(self) -> list[RouterSpec]: - return [] + @router.post(f"/{backend}/jobs") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[create_perm]) + async def submit() -> dict[str, str]: + return {"backend": backend} - def get_cli(self) -> typer.Typer: - return typer.Typer() + @router.get(f"/{backend}/healthz") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[]) + async def healthz() -> dict[str, str]: + return {"backend": backend} - monkeypatch.setattr( - "nemo_customizer.router.discover_customization_contributors", - lambda: {"automodel": _AutomodelContributor(), "unsloth": _UnslothContributor()}, - ) + return [RouterSpec(router=router, prefix="/v2/workspaces/{workspace}", tag=backend.title())] - contrib = CustomizationRouterService.get_authz_contribution() - assert contrib is not None - assert "/apis/customization/healthz" in contrib.endpoints - assert "/apis/customization/v2/workspaces/{workspace}/automodel/jobs" in contrib.endpoints - assert "/apis/customization/v2/workspaces/{workspace}/unsloth/jobs" in contrib.endpoints - assert "customization.automodel.jobs.create" in contrib.permissions - assert "customization.unsloth.jobs.create" in contrib.permissions + def get_cli(self) -> typer.Typer: + return typer.Typer() + return _Contributor() -def test_get_authz_contribution_returns_none_without_backends(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr( "nemo_customizer.router.discover_customization_contributors", - lambda: {}, + lambda: {"automodel": _make_contributor("automodel"), "unsloth": _make_contributor("unsloth")}, ) - assert CustomizationRouterService.get_authz_contribution() is None + + service = CustomizationRouterService() + contribution, problems, _warnings = _derive_service_contribution(service) + + assert problems == [] + assert not any(spec.deny for methods in contribution.endpoints.values() for spec in methods.values()) + assert "customization.automodel.jobs.create" in contribution.permissions + assert "customization.unsloth.jobs.create" in contribution.permissions + # The hub's own /healthz is authenticated-but-permissionless (ruled, not denied). + hub_healthz = contribution.endpoints["/apis/customization/healthz"]["get"] + assert hub_healthz.permissions == [] and not hub_healthz.deny diff --git a/plugins/nemo-data-designer/src/nemo_data_designer_plugin/service.py b/plugins/nemo-data-designer/src/nemo_data_designer_plugin/service.py index d68a9f6267..bc6b1ccb08 100644 --- a/plugins/nemo-data-designer/src/nemo_data_designer_plugin/service.py +++ b/plugins/nemo-data-designer/src/nemo_data_designer_plugin/service.py @@ -11,12 +11,6 @@ from data_designer.errors import DataDesignerError from data_designer_nemo.errors import NDDInternalError, NDDInvalidConfigError from fastapi import Request -from nemo_platform_plugin.authz import ( - AuthzContribution, - authz_for_workspace_function, - authz_for_workspace_job_collection, - combine_authz_contributions, -) from nemo_platform_plugin.service import NemoService, RouterSpec from pydantic import ValidationError from starlette import status @@ -29,36 +23,27 @@ class DataDesignerService(NemoService): name: ClassVar[str] = "data-designer" dependencies: ClassVar[list[str]] = ["entities", "auth", "jobs", "secrets", "files", "inference-gateway"] - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - return combine_authz_contributions( - authz_for_workspace_function( - api_area=cls.name, - function_suffix="/preview", - permission_prefix=f"{cls.name}.preview", - ), - authz_for_workspace_job_collection( - api_area=cls.name, - collection_suffix="/jobs/create", - permission_prefix=f"{cls.name}.jobs", - ), - ) - def get_routers(self) -> list[RouterSpec]: from nemo_data_designer_plugin.functions.preview import PreviewFunction from nemo_data_designer_plugin.jobs.create import CreateJob + from nemo_platform_plugin.authz import AuthzScope from nemo_platform_plugin.functions.routes import add_function_routes from nemo_platform_plugin.jobs.routes import add_job_routes + data_designer = AuthzScope("data-designer") return [ RouterSpec( - add_function_routes(PreviewFunction), + add_function_routes( + PreviewFunction, + authz=data_designer, + permission_description="Preview a Data Designer config", + ), prefix="/v2/workspaces/{workspace}", tag="Data Designer", description="Streaming preview of a Data Designer config.", ), RouterSpec( - add_job_routes(CreateJob), + add_job_routes(CreateJob, authz=data_designer), prefix="/v2/workspaces/{workspace}", tag="Data Designer", description="Job endpoints", diff --git a/plugins/nemo-data-designer/tests/unit/test_authz.py b/plugins/nemo-data-designer/tests/unit/test_authz.py new file mode 100644 index 0000000000..5437b7dc3d --- /dev/null +++ b/plugins/nemo-data-designer/tests/unit/test_authz.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization derivation for the data-designer plugin (every route ruled, no problems).""" + +from __future__ import annotations + +from nemo_data_designer_plugin.service import DataDesignerService +from nemo_platform_plugin.authz_discovery import _derive_service_contribution + + +def test_data_designer_authz_derivation_has_no_problems() -> None: + contrib, problems, _warnings = _derive_service_contribution(DataDesignerService()) + assert problems == [] + + # Job-factory perms plus the preview function perm, all declared. + assert { + "data-designer.create", + "data-designer.list", + "data-designer.read", + "data-designer.delete", + "data-designer.cancel", + "data-designer.preview", + } <= set(contrib.permissions) + + # Pin verb->permission so a mis-stamp (e.g. create<->read) is localized to this plugin. + jobs = "/apis/data-designer/v2/workspaces/{workspace}/jobs/create" + assert contrib.endpoints[jobs]["post"].permissions == ["data-designer.create"] + assert contrib.endpoints[jobs]["get"].permissions == ["data-designer.list"] + assert contrib.endpoints[f"{jobs}/{{name}}"]["delete"].permissions == ["data-designer.delete"] + preview = "/apis/data-designer/v2/workspaces/{workspace}/preview" + assert contrib.endpoints[preview]["post"].permissions == ["data-designer.preview"] + + # Every mounted route carries a valid rule (none falls through to deny). + assert contrib.endpoints + for methods in contrib.endpoints.values(): + for binding in methods.values(): + assert binding.deny is False diff --git a/plugins/nemo-evaluator/src/nemo_evaluator/api/v2/metrics.py b/plugins/nemo-evaluator/src/nemo_evaluator/api/v2/metrics.py index feaac764d4..d468a362f0 100644 --- a/plugins/nemo-evaluator/src/nemo_evaluator/api/v2/metrics.py +++ b/plugins/nemo-evaluator/src/nemo_evaluator/api/v2/metrics.py @@ -17,8 +17,10 @@ MetricSort, ) from nemo_evaluator.api.service.metric_service import MetricService +from nemo_evaluator.authz import SCOPE from nemo_evaluator.entities import MAX_NAME_LENGTH, NAME_PATTERN from nemo_platform_plugin.api.parsed_filter import ParsedFilter, make_filter_dep +from nemo_platform_plugin.authz import CallerKind, PermissionSet, path_rule, perm from nemo_platform_plugin.entities import EntityValidationError from nemo_platform_plugin.jobs.openapi_utils import generate_openapi_extra_params from nemo_platform_plugin.schema import Page @@ -31,6 +33,15 @@ def _sanitize_for_log(value: object) -> str: return str(value).replace("\r", "").replace("\n", "") +class MetricPerms(PermissionSet, namespace="evaluator.metrics"): + """Permissions for the stored-metrics CRUD collection.""" + + CREATE = perm("Create a stored metric") + LIST = perm("List stored metrics") + READ = perm("Read a stored metric") + DELETE = perm("Delete a stored metric") + + router = APIRouter() @@ -46,6 +57,7 @@ def _sanitize_for_log(value: object) -> str: filter_description="Filter metrics by workspace, name, metric_type, description, created_at, and updated_at.", ), ) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[MetricPerms.LIST], scopes=SCOPE.read()) async def list_metrics( workspace: str, page: int = Query(default=1, ge=1, description="Page number."), @@ -80,6 +92,7 @@ async def list_metrics( status_code=status.HTTP_201_CREATED, responses={status.HTTP_409_CONFLICT: {"description": "Metric already exists"}}, ) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[MetricPerms.CREATE], scopes=SCOPE.write()) async def create_metric( workspace: str, name: Annotated[str, Path(max_length=MAX_NAME_LENGTH, pattern=NAME_PATTERN)], @@ -119,6 +132,7 @@ async def create_metric( status_code=status.HTTP_200_OK, responses={status.HTTP_404_NOT_FOUND: {"description": "Metric not found"}}, ) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[MetricPerms.READ], scopes=SCOPE.read()) async def get_metric( workspace: str, name: str, @@ -148,6 +162,7 @@ async def get_metric( status_code=status.HTTP_204_NO_CONTENT, responses={status.HTTP_404_NOT_FOUND: {"description": "Metric not found"}}, ) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[MetricPerms.DELETE], scopes=SCOPE.write()) async def delete_metric( workspace: str, name: str, diff --git a/plugins/nemo-evaluator/src/nemo_evaluator/authz.py b/plugins/nemo-evaluator/src/nemo_evaluator/authz.py new file mode 100644 index 0000000000..03c79480a7 --- /dev/null +++ b/plugins/nemo-evaluator/src/nemo_evaluator/authz.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""The evaluator plugin's authz scope. + +The service and the metrics route module import :data:`SCOPE` so the plugin shares one +``AuthzScope("evaluator")``. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import AuthzScope + +SCOPE = AuthzScope("evaluator") diff --git a/plugins/nemo-evaluator/src/nemo_evaluator/service.py b/plugins/nemo-evaluator/src/nemo_evaluator/service.py index 048b4aa978..62cc337569 100644 --- a/plugins/nemo-evaluator/src/nemo_evaluator/service.py +++ b/plugins/nemo-evaluator/src/nemo_evaluator/service.py @@ -9,79 +9,38 @@ from fastapi import APIRouter from nemo_evaluator.api.v2 import metrics as metrics_routes +from nemo_evaluator.authz import SCOPE from nemo_evaluator.core import say_hello from nemo_evaluator.jobs.evaluate import EvaluateJob from nemo_evaluator.schema import HelloResponse -from nemo_platform_plugin.authz import ( - AuthzContribution, - AuthzEndpointMethod, - authz_for_workspace_job_collection, - combine_authz_contributions, -) +from nemo_platform_plugin.authz import CallerKind, PermissionSet, path_rule, perm from nemo_platform_plugin.jobs.routes import add_job_routes from nemo_platform_plugin.service import NemoService, RouterSpec -def _authz_for_metrics_collection(api_area: str, permission_prefix: str) -> AuthzContribution: - """Authz for the stored-metrics CRUD collection (full path includes PUT).""" - base = f"/apis/{api_area}/v2/workspaces/{{workspace}}/metrics" - read_scopes = [f"{api_area}:read", "platform:read"] - write_scopes = [f"{api_area}:write", "platform:write"] - return AuthzContribution( - permissions={ - f"{permission_prefix}.create": f"Create {permission_prefix}", - f"{permission_prefix}.list": f"List {permission_prefix}", - f"{permission_prefix}.read": f"Read {permission_prefix}", - f"{permission_prefix}.delete": f"Delete {permission_prefix}", - }, - endpoints={ - base: { - "get": AuthzEndpointMethod(permissions=[f"{permission_prefix}.list"], scopes=read_scopes), - }, - f"{base}/{{name}}": { - "post": AuthzEndpointMethod(permissions=[f"{permission_prefix}.create"], scopes=write_scopes), - "get": AuthzEndpointMethod(permissions=[f"{permission_prefix}.read"], scopes=read_scopes), - "delete": AuthzEndpointMethod(permissions=[f"{permission_prefix}.delete"], scopes=write_scopes), - }, - }, - ) +class EvaluatorPerms(PermissionSet, namespace="evaluator"): + """Permissions owned by the evaluator plugin's hand-written routes. + + The ``EvaluateJob`` collection's permissions (``evaluator.create`` etc.) are stamped + onto the factory routes and derived from there; only the bespoke ``hello`` route's + permission is declared here. + """ + + HELLO_READ = perm("Read the evaluator hello greeting", suffix="hello.read") class EvaluatorPluginService(NemoService): - """Minimal service surface for evaluator pluginification work.""" + """Service surface for the evaluator plugin.""" name: ClassVar[str] = "evaluator" dependencies: ClassVar[list[str]] = ["nemo-evaluator-sdk", "entities", "files"] - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - return combine_authz_contributions( - AuthzContribution( - endpoints={ - f"/apis/{cls.name}/v1/healthz": { - "get": AuthzEndpointMethod(permissions=[], scopes=[]), - }, - f"/apis/{cls.name}/v1/hello/{{name}}": { - "get": AuthzEndpointMethod(permissions=[], scopes=[]), - }, - }, - ), - authz_for_workspace_job_collection( - api_area=cls.name, - collection_suffix="/evaluate/jobs", - permission_prefix=f"{cls.name}.jobs", - ), - _authz_for_metrics_collection( - api_area=cls.name, - permission_prefix=f"{cls.name}.metrics", - ), - ) - def get_routers(self) -> list[RouterSpec]: router = APIRouter() - jobs_router = add_job_routes(EvaluateJob) + jobs_router = add_job_routes(EvaluateJob, authz=SCOPE) @router.get("/healthz") + @path_rule(callers=[CallerKind.PRINCIPAL], permissions=[], scopes=[]) async def healthz() -> dict[str, object]: return { "plugin": self.name, @@ -125,6 +84,11 @@ def _build_hello_router() -> APIRouter: router = APIRouter() @router.get("/hello/{name}", response_model=HelloResponse) + @path_rule( + callers=[CallerKind.PRINCIPAL], + permissions=[EvaluatorPerms.HELLO_READ], + scopes=SCOPE.read(), + ) async def hello(name: str) -> HelloResponse: """Greet a name.""" return HelloResponse(message=say_hello(name)) diff --git a/plugins/nemo-evaluator/tests/test_authz.py b/plugins/nemo-evaluator/tests/test_authz.py new file mode 100644 index 0000000000..6381f0b7e0 --- /dev/null +++ b/plugins/nemo-evaluator/tests/test_authz.py @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization derivation for the evaluator plugin (every route ruled, no problems).""" + +from __future__ import annotations + +from nemo_evaluator.service import EvaluatorPluginService +from nemo_platform_plugin.authz_discovery import _derive_service_contribution + + +def test_evaluator_authz_derivation_has_no_problems() -> None: + contrib, problems, _warnings = _derive_service_contribution(EvaluatorPluginService()) + assert problems == [] + + # Job-factory perms plus the hand-written hello read perm, all declared. + assert { + "evaluator.create", + "evaluator.list", + "evaluator.read", + "evaluator.delete", + "evaluator.cancel", + "evaluator.hello.read", + } <= set(contrib.permissions) + + # Pin the two hand-written routes (mirrors the auditor test's healthz spot-check). + assert contrib.endpoints["/apis/evaluator/v1/healthz"]["get"].permissions == [] + assert contrib.endpoints["/apis/evaluator/v1/hello/{name}"]["get"].permissions == ["evaluator.hello.read"] + + # Pin the job-collection verb->permission mapping so a mis-stamp (e.g. create<->read) is + # localized to this plugin, not only caught by the central factory test. + jobs = "/apis/evaluator/v2/workspaces/{workspace}/evaluate/jobs" + assert contrib.endpoints[jobs]["post"].permissions == ["evaluator.create"] + assert contrib.endpoints[jobs]["get"].permissions == ["evaluator.list"] + assert contrib.endpoints[f"{jobs}/{{name}}"]["delete"].permissions == ["evaluator.delete"] + + # Every mounted route carries a valid rule (none falls through to deny). + assert contrib.endpoints + for methods in contrib.endpoints.values(): + for binding in methods.values(): + assert binding.deny is False diff --git a/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/api/v2/jobs/endpoints.py b/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/api/v2/jobs/endpoints.py index bbb663cdc6..013707c358 100644 --- a/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/api/v2/jobs/endpoints.py +++ b/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/api/v2/jobs/endpoints.py @@ -13,6 +13,7 @@ from nemo_platform import AsyncNeMoPlatform, NotFoundError, PermissionDeniedError from nemo_platform.filesets import FilesetPathError, parse_fileset_ref +from nemo_platform_plugin.authz import AuthzScope from nemo_platform_plugin.entities import EntityClient from nemo_platform_plugin.jobs.api_factory import ( ContainerSpec, @@ -190,6 +191,7 @@ async def job_config_compiler( job_type="SafeSynthesizer", job_input=SafeSynthesizerJobConfig, platform_job_config_compiler=job_config_compiler, + authz=AuthzScope("safe-synthesizer"), job_result_routes=[ PlatformJobResultRoute( name="summary", diff --git a/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/service.py b/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/service.py index b07daf9f10..5faaac15cf 100644 --- a/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/service.py +++ b/plugins/nemo-safe-synthesizer/src/nemo_safe_synthesizer_plugin/service.py @@ -8,55 +8,25 @@ from typing import ClassVar from fastapi import Request -from nemo_platform_plugin.authz import ( - AuthzContribution, - AuthzEndpointMethod, - authz_for_workspace_job_collection, - combine_authz_contributions, -) from nemo_platform_plugin.service import ExceptionHandler, NemoService, RouterSpec from pydantic import ValidationError from starlette import status from starlette.responses import JSONResponse _SERVICE_NAME = "safe-synthesizer" -_JOBS_PERMISSION_PREFIX = f"{_SERVICE_NAME}.jobs" -_READ_SCOPES = [f"{_SERVICE_NAME}:read", "platform:read"] -_RESULT_DOWNLOAD_ALIASES = ("adapter", "evaluation-report", "summary", "synthetic-data") - - -def _read_method(permission: str) -> AuthzEndpointMethod: - return AuthzEndpointMethod(permissions=[permission], scopes=list(_READ_SCOPES)) class SafeSynthesizerService(NemoService): - """Safe Synthesizer service exposed as an NMP plugin.""" + """Safe Synthesizer service exposed as an NMP plugin. + + HTTP authorization is derived from the ``@path_rule``-stamped routes the job + factory generates in ``api.v2.jobs.endpoints`` (``AuthzScope("safe-synthesizer")``); + there is no separate authz declaration here. + """ name: ClassVar[str] = _SERVICE_NAME dependencies: ClassVar[list[str]] = ["entities", "auth", "jobs", "secrets", "files"] - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - """Authorization policy matching the pre-plugin Safe Synthesizer service.""" - base = f"/apis/{cls.name}/v2/workspaces/{{workspace}}/jobs" - read = f"{_JOBS_PERMISSION_PREFIX}.read" - - return combine_authz_contributions( - authz_for_workspace_job_collection( - api_area=cls.name, - collection_suffix="/jobs", - permission_prefix=_JOBS_PERMISSION_PREFIX, - ), - AuthzContribution( - endpoints={ - f"{base}/{{job}}/results/{name}/download": { - "get": _read_method(read), - } - for name in _RESULT_DOWNLOAD_ALIASES - }, - ), - ) - def get_routers(self) -> list[RouterSpec]: from nemo_safe_synthesizer_plugin.api.v2.jobs import endpoints as jobs diff --git a/plugins/nemo-safe-synthesizer/tests/unit/test_authz.py b/plugins/nemo-safe-synthesizer/tests/unit/test_authz.py new file mode 100644 index 0000000000..0fd8f71382 --- /dev/null +++ b/plugins/nemo-safe-synthesizer/tests/unit/test_authz.py @@ -0,0 +1,40 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Authorization derivation for the safe-synthesizer plugin (every route ruled, no problems).""" + +from __future__ import annotations + +import pytest +from nemo_platform_plugin.authz_discovery import _derive_service_contribution +from nemo_safe_synthesizer_plugin.service import SafeSynthesizerService + + +def test_safe_synthesizer_authz_derivation_has_no_problems() -> None: + pytest.importorskip("nemo_safe_synthesizer.config.job") + contrib, problems, _warnings = _derive_service_contribution(SafeSynthesizerService()) + assert problems == [] + + # Single job collection under the flat safe-synthesizer namespace. + assert { + "safe-synthesizer.create", + "safe-synthesizer.list", + "safe-synthesizer.read", + "safe-synthesizer.delete", + "safe-synthesizer.cancel", + } <= set(contrib.permissions) + + # Pin verb->permission, including a custom result-download route (a read action), so a + # mis-stamp is localized to this plugin. + jobs = "/apis/safe-synthesizer/v2/workspaces/{workspace}/jobs" + assert contrib.endpoints[jobs]["post"].permissions == ["safe-synthesizer.create"] + assert contrib.endpoints[jobs]["get"].permissions == ["safe-synthesizer.list"] + assert contrib.endpoints[f"{jobs}/{{name}}"]["delete"].permissions == ["safe-synthesizer.delete"] + assert contrib.endpoints[f"{jobs}/{{job}}/results/summary/download"]["get"].permissions == ["safe-synthesizer.read"] + + # Every mounted route — including the custom result-download routes — + # carries a valid rule (none falls through to deny). + assert contrib.endpoints + for methods in contrib.endpoints.values(): + for binding in methods.values(): + assert binding.deny is False diff --git a/plugins/nemo-safe-synthesizer/tests/unit/test_service.py b/plugins/nemo-safe-synthesizer/tests/unit/test_service.py index 0350bb2e76..63e8f50daa 100644 --- a/plugins/nemo-safe-synthesizer/tests/unit/test_service.py +++ b/plugins/nemo-safe-synthesizer/tests/unit/test_service.py @@ -4,8 +4,6 @@ import pytest from fastapi import FastAPI from fastapi.testclient import TestClient -from nemo_platform_plugin.authz_format import validate_static_authz_data -from nemo_platform_plugin.authz_merge import merge_authz_contributions from nemo_safe_synthesizer_plugin.service import SafeSynthesizerService @@ -32,36 +30,27 @@ def test_service_routes_include_safe_synthesizer_jobs_path(): assert "SafeSynthesizerJobRequest" in spec["components"]["schemas"] -def test_service_authz_contribution_matches_legacy_job_policy(): - contribution = SafeSynthesizerService.get_authz_contribution() - base_authz = { - "authz": { - "permissions": {}, - "roles": { - "Viewer": {"permissions": []}, - "Editor": {"permissions": []}, - }, - "endpoints": {}, - } - } +def test_service_authz_derives_from_job_routes(): + """Authz is derived from the ``@path_rule``-stamped job-factory routes + (``AuthzScope("safe-synthesizer")``); there is no ``get_authz_contribution``. - merged = merge_authz_contributions(base_authz, [contribution.to_dict()]) + Doubles as the Phase-0 derivation gate: the service must derive with no + problems and no fail-closed DENY bindings. + """ + pytest.importorskip("nemo_safe_synthesizer.config.job") + from nemo_platform_plugin.authz_discovery import _derive_service_contribution - validate_static_authz_data(merged) - viewer_permissions = merged["authz"]["roles"]["Viewer"]["permissions"] - editor_permissions = merged["authz"]["roles"]["Editor"]["permissions"] - endpoints = merged["authz"]["endpoints"] + contribution, problems, _warnings = _derive_service_contribution(SafeSynthesizerService()) - assert "safe-synthesizer.jobs.list" in viewer_permissions - assert "safe-synthesizer.jobs.read" in viewer_permissions - assert "safe-synthesizer.jobs.cancel" in editor_permissions - assert "safe-synthesizer.jobs.create" in editor_permissions - assert "safe-synthesizer.jobs.delete" in editor_permissions + assert problems == [] + assert not any(spec.deny for methods in contribution.endpoints.values() for spec in methods.values()) + for verb in ("create", "list", "read", "delete", "cancel"): + assert f"safe-synthesizer.{verb}" in contribution.permissions jobs_path = "/apis/safe-synthesizer/v2/workspaces/{workspace}/jobs" - assert endpoints[jobs_path]["get"]["permissions"] == ["safe-synthesizer.jobs.list"] - assert endpoints[jobs_path]["post"]["scopes"] == ["safe-synthesizer:write", "platform:write"] - assert endpoints[f"{jobs_path}/{{name}}/cancel"]["post"]["permissions"] == ["safe-synthesizer.jobs.cancel"] - assert endpoints[f"{jobs_path}/{{job}}/results/synthetic-data/download"]["get"]["permissions"] == [ - "safe-synthesizer.jobs.read" + assert contribution.endpoints[jobs_path]["get"].permissions == ["safe-synthesizer.list"] + assert contribution.endpoints[jobs_path]["post"].scopes == ["safe-synthesizer:write", "platform:write"] + assert contribution.endpoints[f"{jobs_path}/{{name}}/cancel"]["post"].permissions == ["safe-synthesizer.cancel"] + assert contribution.endpoints[f"{jobs_path}/{{job}}/results/synthetic-data/download"]["get"].permissions == [ + "safe-synthesizer.read" ] diff --git a/plugins/nemo-unsloth/src/nemo_unsloth_plugin/contributor.py b/plugins/nemo-unsloth/src/nemo_unsloth_plugin/contributor.py index 36776c4dbd..749457cc55 100644 --- a/plugins/nemo-unsloth/src/nemo_unsloth_plugin/contributor.py +++ b/plugins/nemo-unsloth/src/nemo_unsloth_plugin/contributor.py @@ -7,9 +7,9 @@ The customization router hub (``nemo-customizer-plugin``) discovers this class at startup and: -- merges :meth:`get_routers` into ``/apis/customization/...`` +- merges :meth:`get_routers` into ``/apis/customization/...`` (HTTP authz is + derived from the ``@path_rule``-decorated routes those routers carry) - adds :meth:`get_cli` under ``nemo customization unsloth`` -- merges :meth:`get_authz_contribution` into the platform authz policy - composes :meth:`get_sdk_resources` under ``client.customization.unsloth`` The shared shape lives in :class:`nmp.customization_common.contributor.base.BaseContributor`. diff --git a/plugins/nemo-unsloth/tests/test_contributor.py b/plugins/nemo-unsloth/tests/test_contributor.py index 006f8ba560..e7916f4f82 100644 --- a/plugins/nemo-unsloth/tests/test_contributor.py +++ b/plugins/nemo-unsloth/tests/test_contributor.py @@ -5,10 +5,10 @@ Pin the contract the customization-router hub depends on: -- ``name`` and ``dependencies`` (used by the hub's authz / dep merger). -- ``get_authz_contribution`` produces an authz block for the unsloth jobs - collection + healthz. -- ``get_routers`` returns the healthz + jobs routers under the right prefix. +- ``name`` and ``dependencies`` (used by the hub's dep merger). +- ``get_routers`` returns the healthz + jobs routers under the right prefix, + with ``@path_rule`` authz stamped on the generated job routes (the platform + derives the policy from those rules — there is no ``get_authz_contribution``). - ``get_cli`` exposes ``run`` / ``submit`` / ``explain`` and the submit group accepts the ``JOB_JSON`` positional. ``run`` hard-fails. """ @@ -46,10 +46,31 @@ def test_dependencies_match_submit_path(self, contributor: object) -> None: class TestAuthz: - def test_authz_contribution_targets_unsloth_collection(self, contributor: object) -> None: - ac = contributor.get_authz_contribution() - repr_ = repr(ac) - assert "unsloth" in repr_ + def test_job_routes_carry_unsloth_path_rules(self, contributor: object) -> None: + """Authz is derived from ``@path_rule`` on the generated job routes + (permission namespace ``customization.unsloth.jobs`` from + ``AuthzScope("customization").child(name, "jobs")``), not a separate + ``get_authz_contribution`` declaration.""" + from fastapi.routing import APIRoute + from nemo_platform_plugin.authz import get_path_rules + + try: + specs = contributor.get_routers() + except ImportError as exc: + pytest.skip(f"router deps unavailable in this env: {exc}") + + route_rules = [ + (route.path, get_path_rules(route.endpoint)) + for spec in specs + for route in spec.router.routes + if isinstance(route, APIRoute) + ] + assert route_rules + unruled = [path for path, rules in route_rules if not rules] + assert not unruled, f"routes without a @path_rule (would be denied fail-closed): {unruled}" + + perm_ids = {perm.id for _path, rules in route_rules for rule in rules for perm in rule.permissions} + assert "customization.unsloth.jobs.create" in perm_ids class TestRouters: diff --git a/services/core/auth/scripts/auth-tools.py b/services/core/auth/scripts/auth-tools.py index e857efff76..367fccc708 100755 --- a/services/core/auth/scripts/auth-tools.py +++ b/services/core/auth/scripts/auth-tools.py @@ -1334,7 +1334,7 @@ def sync_plugins( ), dry_run: bool = typer.Option(False, "--dry-run", "-n", help="Show what would change without writing"), ): - """Merge plugin ``nemo.authz`` / ``get_authz_contribution`` data into static-authz.yaml. + """Merge derived plugin authz (``@path_rule``-decorated routes) into static-authz.yaml. Run from the repo root with workspace plugins installed (``uv sync``). This materializes runtime plugin policy into the committed bundle for CI and @@ -1358,25 +1358,29 @@ def sync_plugins( auth_path = project_root / auth_path try: - from nemo_platform_plugin.authz_discovery import discover_authz_contribution_dicts - from nmp.common.auth.authz_merge import merge_authz_contributions + from nmp.core.auth.app.bundle import get_degraded_plugins, merge_plugin_authz_contributions except ImportError as exc: - console.print(f"[red]Cannot import plugin authz discovery: {exc}[/red]") + console.print(f"[red]Cannot import plugin authz merge: {exc}[/red]") console.print("[yellow]Run from repo root with workspace packages installed (uv sync).[/yellow]") raise typer.Exit(code=1) from exc - contributions = discover_authz_contribution_dicts() - if not contributions: - console.print("[yellow]No plugin authz contributions discovered.[/yellow]") - raise typer.Exit(code=0) - auth_config = load_yaml(auth_path) before_endpoints = set(auth_config.get("authz", {}).get("endpoints", {}).keys()) - merged = merge_authz_contributions(auth_config, contributions) + # Route through the SAME fail-mode merge the running auth service uses (discover → + # on_invalid_plugin fencing/quarantine → merge), so the committed static-authz.yaml cannot + # diverge from runtime and fail-open for a plugin that can't be enumerated (a raw + # discover_authz_contribution_dicts() pass would skip denied_plugin_prefixes). + merged = merge_plugin_authz_contributions(auth_config) after_endpoints = set(merged.get("authz", {}).get("endpoints", {}).keys()) added_paths = sorted(after_endpoints - before_endpoints) - console.print(f"[bold]Merging {len(contributions)} plugin authz contribution(s)...[/bold]") + degraded = get_degraded_plugins() + if degraded: + console.print(f"[yellow]⚠ {len(degraded)} plugin(s) contributed invalid authz (denied / fenced):[/yellow]") + for key, problems in sorted(degraded.items()): + console.print(f" [yellow]![/yellow] {key}: {'; '.join(problems)}") + + console.print("[bold]Merging plugin authz contributions...[/bold]") for path in added_paths: methods = sorted(merged["authz"]["endpoints"][path].keys()) console.print(f" [green]+[/green] {path} ({', '.join(methods)})") diff --git a/services/core/auth/src/nmp/core/auth/app/bundle.py b/services/core/auth/src/nmp/core/auth/app/bundle.py index 3a36474f9a..7b36f8ce6c 100644 --- a/services/core/auth/src/nmp/core/auth/app/bundle.py +++ b/services/core/auth/src/nmp/core/auth/app/bundle.py @@ -83,21 +83,113 @@ async def get_opa_bundle_with_etag(entities_client: Optional[EntityClient] = Non return bundle_bytes, etag -def _merge_plugin_authz_contributions(static_data: dict) -> dict: - """Overlay authorization rules from installed NeMo Platform plugins.""" +# Plugins whose authz failed validation at the most recent bundle build (key -> problems). +# Surfaced for a status/health endpoint; refreshed on every build. +_degraded_plugins: dict[str, list[str]] = {} + + +def get_degraded_plugins() -> dict[str, list[str]]: + """Return plugins with invalid authz at the last bundle build (key -> list of problems).""" + return dict(_degraded_plugins) + + +def _quarantine_contribution(contribution_dict: dict) -> dict: + """Deny every route of a plugin and drop its permissions (quarantine fail-mode).""" + return { + "permissions": {}, + "endpoints": { + path: {method: {"permissions": [], "deny": True} for method in methods} + for path, methods in contribution_dict.get("endpoints", {}).items() + }, + "role_permissions": {}, + } + + +def merge_plugin_authz_contributions(static_data: dict) -> dict: + """Overlay authorization rules from installed NeMo Platform plugins. + + Applies the configured fail-mode (``authz.on_invalid_plugin``) to any plugin with + derived authz **errors** (``PluginAuthzResult.problems``: unruled routes, malformed or + cross-namespace permission ids, duplicate bindings, load failures). The offending routes + are already explicit denies in the derived contribution; this only controls blast radius + — ``deny_route`` keeps just those denies, ``quarantine`` denies the whole plugin, + ``hard_fail`` refuses to build the bundle. Plugins with errors are recorded for the + status endpoint. + + Plugin *warnings* (``PluginAuthzResult.warnings``: missing or conflicting permission + descriptions) are metadata-only — the route still requires the right permission — so they + are logged but never escalate the fail-mode and never mark the plugin degraded. This is + what keeps a cosmetic description typo from quarantining a whole plugin. + """ + global _degraded_plugins try: - from nemo_platform_plugin.authz_discovery import discover_authz_contribution_dicts + from nemo_platform_plugin.authz_discovery import discover_plugin_authz from nmp.common.auth.authz_merge import merge_authz_contributions except ImportError: logger.debug("Plugin authz discovery unavailable; using static authz only") + _degraded_plugins = {} return static_data - contributions = discover_authz_contribution_dicts() - if not contributions: - return static_data - - logger.debug("Merging %d plugin authz contribution(s)", len(contributions)) - return merge_authz_contributions(static_data, contributions) + results = discover_plugin_authz() + on_invalid = get_service_config(AuthServiceConfig).on_invalid_plugin + degraded: dict[str, list[str]] = {} + contributions: list[dict] = [] + + denied_prefixes: list[str] = [] + for result in results: + contribution_dict = result.contribution.to_dict() + if result.problems: + degraded[result.key] = result.problems + logger.error( + "Plugin %r contributed invalid authz (%d problem(s)); on_invalid_plugin=%s: %s", + result.key, + len(result.problems), + on_invalid, + "; ".join(result.problems), + ) + if on_invalid == "hard_fail": + raise RuntimeError( + f"Plugin {result.key!r} contributed invalid authz and " + f"authz.on_invalid_plugin=hard_fail: {'; '.join(result.problems)}" + ) + # Fence the plugin's whole /apis/ namespace (deny-all) whenever per-route + # coverage can't be trusted: + # * no route enumerated at all (load/derivation failure) — the runner may still + # mount this plugin via a separate instantiation; OR + # * quarantine — _quarantine_contribution only rewrites the routes derivation SAW, + # so any runner-mounted-but-unseen route would otherwise stay open. + # deny_route keeps just the per-route denies already in the contribution. The + # runner mounts at /apis/; the name==key invariant is only warned, not + # enforced, so fence both the entry-point key and the declared mount name. + no_endpoints = not contribution_dict.get("endpoints") + if on_invalid == "quarantine": + contribution_dict = _quarantine_contribution(contribution_dict) + if no_endpoints or on_invalid == "quarantine": + denied_prefixes.append(f"/apis/{result.key}") + if result.mount_name and result.mount_name != result.key: + denied_prefixes.append(f"/apis/{result.mount_name}") + if result.warnings: + logger.warning( + "Plugin %r has %d authz warning(s) (non-deny — e.g. missing or conflicting " + "permission descriptions): %s", + result.key, + len(result.warnings), + "; ".join(result.warnings), + ) + contributions.append(contribution_dict) + + _degraded_plugins = degraded + contributions = [c for c in contributions if c.get("permissions") or c.get("endpoints")] + if contributions: + logger.debug("Merging %d plugin authz contribution(s)", len(contributions)) + merged = merge_authz_contributions(static_data, contributions) if contributions else static_data + if denied_prefixes: + logger.error("Fencing degraded plugin namespace(s) (deny-all): %s", ", ".join(sorted(set(denied_prefixes)))) + config = merged.setdefault("authz", {}).setdefault("config", {}) + existing = config.get("denied_plugin_prefixes") or [] + existing_prefixes = existing if isinstance(existing, list) else [] + config["denied_plugin_prefixes"] = sorted(set(existing_prefixes) | set(denied_prefixes)) + return merged async def _build_authorization_data_internal(entities_client: Optional[EntityClient] = None) -> dict: @@ -122,7 +214,7 @@ async def _build_authorization_data_internal(entities_client: Optional[EntityCli with open(static_data_path, "r") as f: static_data = yaml.safe_load(f) - static_data = _merge_plugin_authz_contributions(static_data) + static_data = merge_plugin_authz_contributions(static_data) validate_static_authz_data(static_data) # Initialize workspaces and principals if not present @@ -131,6 +223,12 @@ async def _build_authorization_data_internal(entities_client: Optional[EntityCli if "principals" not in static_data["authz"]: static_data["authz"]["principals"] = {} + # Surface auth-service config the Rego reads (PlatformAdmin exemption from + # SERVICE_PRINCIPAL-only plugin routes — see authz.rego deny_request rules). + static_data["authz"].setdefault("config", {})["platform_admin_exempt_from_service_only"] = get_service_config( + AuthServiceConfig + ).platform_admin_exempt_from_service_only + # Fetch dynamic data from EntityClient if available if entities_client: # Fetch all role bindings across ALL workspaces with pagination diff --git a/services/core/auth/src/nmp/core/auth/app/policies/authz.rego b/services/core/auth/src/nmp/core/auth/app/policies/authz.rego index 17de7d7e69..fecf1cc095 100644 --- a/services/core/auth/src/nmp/core/auth/app/policies/authz.rego +++ b/services/core/auth/src/nmp/core/auth/app/policies/authz.rego @@ -6,14 +6,13 @@ import future.keywords.in import data.authz.extract_method import data.authz.extract_path -import data.authz.extract_scopes -import data.authz.extract_workspace_from_path import data.authz.scope_check_passed +import data.common.endpoint_scan import data.common.get_applicable_principals -import data.common.get_required_permissions import data.common.has_permissions -import data.common.normalize_endpoint -import data.common.path_matches_pattern +import data.common.req_callers +import data.common.req_deny +import data.common.req_permissions # Main entry point - returns result with X-NMP-Authorized header # @@ -59,13 +58,7 @@ allow_request if { allow_request if { principal_id := extract_principal_id startswith(principal_id, "service:") - path := extract_path - base_path := split(path, "?")[0] - matching_patterns := {p | - some p in object.keys(data.authz.endpoints) - path_matches_pattern(base_path, p) - } - count(matching_patterns) == 0 + endpoint_scan == "" } # Allow if any applicable principal has required permissions and scopes (if provided) @@ -78,10 +71,11 @@ allow_request if { path := extract_path method := extract_method - required_permissions := get_required_permissions(path, method) + required_permissions := req_permissions count(required_permissions) > 0 - workspace := extract_workspace_from_path(path) + workspace_scan != "" + workspace := workspace_scan # Skip this rule for wildcard workspace - use cross-workspace rule instead workspace != "-" @@ -103,10 +97,11 @@ allow_request if { path := extract_path method := extract_method - required_permissions := get_required_permissions(path, method) + required_permissions := req_permissions count(required_permissions) > 0 - workspace := extract_workspace_from_path(path) + workspace_scan != "" + workspace := workspace_scan workspace == "-" method in ["POST", "PUT", "PATCH", "DELETE"] @@ -126,16 +121,19 @@ allow_request if { base_path := split(path, "?")[0] startswith(base_path, "/apis/auth/v2/iam/") method := extract_method - required_permissions := get_required_permissions(path, method) + required_permissions := req_permissions count(required_permissions) > 0 - not extract_workspace_from_path(path) + workspace_scan == "" some principal in applicable_principals has_permissions(principal, "system", required_permissions) } -# Allow cross-workspace LIST operations (GET/HEAD without workspace in path) -# for authenticated users. -# If the user has no accessible workspaces, they will get empty list. +# Allow cross-workspace LIST operations (GET/HEAD without workspace in path) for authenticated +# users — the workspace-filtered list case (results scoped to the caller's accessible +# workspaces; an empty list when they have none). +# Only applies when the endpoint declares NO required permissions. A permission-stamped +# no-workspace GET must instead satisfy its permission (rule below); otherwise the stamped +# permission is decorative, which is how the bundle-download endpoint had to be special-cased. allow_request if { applicable_principals := get_applicable_principals count(applicable_principals) > 0 @@ -149,10 +147,43 @@ allow_request if { # Ensure the path matches a known endpoint pattern. # normalize_endpoint is undefined for unknown paths, failing the rule (deny by default). - normalize_endpoint(path) + endpoint_scan != "" # Match if no workspace can be extracted from path (undefined = no workspace placeholder) - not extract_workspace_from_path(path) + workspace_scan == "" + + # Permissionless only: an endpoint with no `permissions` (empty or absent) keeps the + # "any authenticated user" behavior; a permissioned one falls through to the rule below. + not req_has_permissions +} + +# A permission-stamped no-workspace GET/HEAD must satisfy its declared permission in the system +# workspace (the home for non-workspace-scoped resources, matching the IAM rule above), so the +# permission is enforced rather than decorative. +allow_request if { + applicable_principals := get_applicable_principals + count(applicable_principals) > 0 + + scope_check_passed + + method := extract_method + method in ["GET", "HEAD"] + path := extract_path + endpoint_scan != "" + workspace_scan == "" + + required_permissions := req_permissions + count(required_permissions) > 0 + + some principal in applicable_principals + has_permissions(principal, "system", required_permissions) +} + +# True when the matched endpoint declares one or more required permissions for this method. +# Undefined required-permissions (an endpoint with no `permissions` key) makes count() undefined, +# so this is false there — absent/empty permissions are treated alike (no permission required). +req_has_permissions if { + count(req_permissions) > 0 } # Allow cross-workspace LIST operations with "-" wildcard workspace @@ -168,7 +199,8 @@ allow_request if { path := extract_path # Match if workspace is "-" wildcard - workspace := extract_workspace_from_path(path) + workspace_scan != "" + workspace := workspace_scan workspace == "-" } @@ -189,9 +221,7 @@ allow_request if { scope_check_passed path := extract_path method := extract_method - endpoint := normalize_endpoint(path) - method_lower := lower(method) - data.authz.endpoints[endpoint][method_lower].permissions == [] + req_permissions == [] } # DENY REQUEST RULES @@ -199,6 +229,37 @@ allow_request if { # Default allow (deny_request overrides allow_request when true) default deny_request := false +# Explicit deny marker (data.authz.endpoints[...].deny == true) — the fail-closed signal for +# unruled or invalid plugin routes. As a deny_request it overrides every allow rule, including +# the ServiceSystem "*" wildcard and the PlatformAdmin bypass, so an un-annotated plugin route +# can never fall through to the service: no-match bypass and become accessible. +deny_request if { + req_deny +} + +# Fence a degraded plugin's entire namespace. The bundle records /apis/ prefixes for +# plugins whose authz could not be derived at all (load / enumeration failure) — their routes +# may still be mounted by the runner, so deny every path under the prefix rather than let it +# fall through the service: no-match bypass. Undefined config key ⇒ no prefixes ⇒ inert. +deny_request if { + some prefix in object.get(data.authz.config, "denied_plugin_prefixes", []) + path_under_denied_prefix(split(extract_path, "?")[0], prefix) +} + +# A path is fenced if it sits under the prefix (/apis//...) OR equals it exactly +# (the bare /apis/ route). The trailing-slash form alone misses the bare prefix. +# +# WASM constraint: only natively-compiled builtins may be used here. The embedded PDP +# stubs SDK-provided builtins (env::opa_builtin*) to return 0, so a deny arm written with +# e.g. sprintf silently never fires in production while `opa test` (full Go evaluator) +# still passes. Boundary check via startswith + substring/count, all wasm-native. +path_under_denied_prefix(path, prefix) if path == prefix + +path_under_denied_prefix(path, prefix) if { + startswith(path, prefix) + substring(path, count(prefix), 1) == "/" +} + # Deny direct secret value access for non-service principals (including PlatformAdmin). # Secret values must only be accessed through the service delegation pattern, where a # service principal reads the value on behalf of a user with secrets.access permission. @@ -253,6 +314,71 @@ deny_request if { not platform_admin_in_system } +# Caller-kind enforcement for service-only routes. +# +# A route declares allowed caller kinds via the optional `callers` list on its +# endpoint config (see endpoint_callers). A route is "service-only" iff it allows +# service principals but NOT principals: +# callers: ["service_principal"] +# When `callers` is absent, endpoint_callers is undefined and service_only_route is +# false — the route keeps today's PRINCIPAL-default semantics (no new restriction). +# Routes that list "principal" (alone or with "service_principal") are NOT service-only, +# so human callers remain allowed there. +default service_only_route := false + +service_only_route if { + callers := req_callers + "service_principal" in callers + not "principal" in callers +} + +# Deny a human (non-service) caller on a service-only route. This is a deny_request so +# it overrides the allow rules — including the ServiceSystem "*" wildcard and the +# PlatformAdmin allow-bypass — otherwise humans would leak onto service-only routes. +# Service principals (id starts with "service:") are unaffected and stay allowed. +# A human PlatformAdmin is denied by default, unless explicitly exempted via the +# platform_admin_exempt_from_service_only config knob. +deny_request if { + service_only_route + principal_id := extract_principal_id + not startswith(principal_id, "service:") + not platform_admin_exempt +} + +# Caller-kind enforcement for principal-only routes — the symmetric counterpart of the +# service-only deny above. A route is "principal-only" iff it allows principals but NOT service +# principals: +# callers: ["principal"] +# When `callers` is absent, endpoint_callers is undefined and this is false: a route with no +# `callers` keeps the PRINCIPAL-default semantics and imposes no new restriction on service +# principals. Routes listing "service_principal" (alone or with "principal") are NOT principal-only. +default principal_only_route := false + +principal_only_route if { + callers := req_callers + "principal" in callers + not "service_principal" in callers +} + +# Deny a service principal on a principal-only route. Without this, callers=["principal"] was +# one-directional — it kept humans in but never kept service principals out (they passed via the +# ServiceSystem "*" wildcard), so `callers` could not actually scope a route to human users. +deny_request if { + principal_only_route + principal_id := extract_principal_id + startswith(principal_id, "service:") +} + +# True only when a PlatformAdmin caller is present AND the config knob exempts platform +# admins from service-only enforcement. Read defensively: an absent config key is treated +# as false (default deny for human platform admins on service-only routes). +default platform_admin_exempt := false + +platform_admin_exempt if { + platform_admin_in_system + data.authz.config.platform_admin_exempt_from_service_only == true +} + # True when any applicable principal has PlatformAdmin in the system workspace (see allow_request). default platform_admin_in_system := false diff --git a/services/core/auth/src/nmp/core/auth/app/policies/common.rego b/services/core/auth/src/nmp/core/auth/app/policies/common.rego index 3cd2d75872..ebecefb03a 100644 --- a/services/core/auth/src/nmp/core/auth/app/policies/common.rego +++ b/services/core/auth/src/nmp/core/auth/app/policies/common.rego @@ -53,6 +53,24 @@ get_required_permissions(path, method) := perms if { perms := data.authz.endpoints[endpoint][method_lower].permissions } +# Get the allowed caller kinds for an endpoint/method combination. +# Uses the same most-specific endpoint match as get_required_permissions. +# Returns undefined (not []) when the matched endpoint has no `callers` key, so +# callers can treat absence as the default (PRINCIPAL) semantics — no new restriction. +endpoint_callers(path, method) := callers if { + endpoint := normalize_endpoint(path) + method_lower := lower(method) + callers := data.authz.endpoints[endpoint][method_lower].callers +} + +# True when the matched endpoint carries an explicit `deny: true` marker — the fail-closed +# signal emitted for unruled or invalid plugin routes. Undefined (not false) otherwise so it +# only fires where the marker is present. +endpoint_denied(path, method) if { + endpoint := normalize_endpoint(path) + data.authz.endpoints[endpoint][lower(method)].deny == true +} + # Check specific permission (for middleware to check special permissions) # Supports workspace-scoped permissions (format: "workspace/permission") # For permissions with format "workspace/permission": @@ -326,6 +344,26 @@ normalize_endpoint(path) := pattern if { pattern_scores[pattern] == min_score } +# --- Request-scoped memoization ------------------------------------------------- +# extract_path and extract_method are 0-arg rules, and OPA caches complete-rule +# results for the lifetime of a single query. normalize_endpoint scans every +# configured endpoint pattern (O(endpoints)); binding it to a 0-arg rule here makes +# that scan run ONCE per evaluation instead of once per call site. The allow/deny +# rules reference these instead of re-calling the path/method helper functions. +# The functions above are kept intact — the policy tests call them with explicit +# paths/methods, which must not be tied to the live request path. +endpoint_scan := e if { + e := normalize_endpoint(extract_path) +} else := "" + +req_method_lower := lower(extract_method) + +req_permissions := data.authz.endpoints[endpoint_scan][req_method_lower].permissions + +req_callers := data.authz.endpoints[endpoint_scan][req_method_lower].callers + +req_deny if data.authz.endpoints[endpoint_scan][req_method_lower].deny == true + # UTILITY HELPERS # Helper to format boolean as string for headers diff --git a/services/core/auth/src/nmp/core/auth/app/policies/extract.rego b/services/core/auth/src/nmp/core/auth/app/policies/extract.rego index 62c8a21496..e0227e353e 100644 --- a/services/core/auth/src/nmp/core/auth/app/policies/extract.rego +++ b/services/core/auth/src/nmp/core/auth/app/policies/extract.rego @@ -1,8 +1,8 @@ package authz import data.authz.allow -import data.authz.has_role import data.authz.has_permissions +import data.authz.has_role import future.keywords.if import future.keywords.in @@ -177,3 +177,9 @@ extract_workspace_from_path(path) := workspace if { # Extract the workspace ID value workspace := path_parts[i] } + +# Request-scoped workspace, memoized once per evaluation (see common.endpoint_scan). +# The function above stays intact for the policy tests; allow rules use this 0-arg rule. +workspace_scan := w if { + w := extract_workspace_from_path(extract_path) +} else := "" diff --git a/services/core/auth/src/nmp/core/auth/app/policies/scopes.rego b/services/core/auth/src/nmp/core/auth/app/policies/scopes.rego index 92b9b7a3b0..2576ce8b83 100644 --- a/services/core/auth/src/nmp/core/auth/app/policies/scopes.rego +++ b/services/core/auth/src/nmp/core/auth/app/policies/scopes.rego @@ -1,17 +1,17 @@ package authz import data.authz.allow -import data.authz.has_role import data.authz.has_permissions +import data.authz.has_role import future.keywords.contains import future.keywords.if import future.keywords.in -import data.authz.extract_method -import data.authz.extract_path import data.authz.extract_scopes +import data.common.endpoint_scan import data.common.normalize_endpoint +import data.common.req_method_lower # Scope Checking Helpers # @@ -61,9 +61,7 @@ scope_check_passed if { scopes := extract_scopes platform_scopes := [s | s := scopes[_]; contains(s, ":")] count(platform_scopes) > 0 - path := extract_path - method := extract_method - has_required_scopes(path, method, platform_scopes) + req_has_required_scopes(platform_scopes) } # Get required scopes for an endpoint/method combination @@ -94,3 +92,18 @@ has_required_scopes(path, method, provided_scopes) if { some required_scope in required_scopes required_scope in provided_scopes } + +# Cached required-scopes for the request endpoint (mirror of +# get_required_scopes(extract_path, extract_method), but using the memoized endpoint). +req_required_scopes := scopes if { + scopes := data.authz.endpoints[endpoint_scan][req_method_lower].scopes +} else := [] + +req_has_required_scopes(provided_scopes) if { + count(req_required_scopes) == 0 +} + +req_has_required_scopes(provided_scopes) if { + some required_scope in req_required_scopes + required_scope in provided_scopes +} diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego new file mode 100644 index 0000000000..f4dda6c070 --- /dev/null +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego @@ -0,0 +1,308 @@ +package authz_test + +import data.authz + +# Tests for caller-kind enforcement of service-only routes. +# +# Data contract: an endpoint method may declare an optional `callers` list of +# caller-kind strings ("principal", "service_principal"). A route is "service-only" +# when it lists "service_principal" but NOT "principal". On such routes, human +# (non-service) callers are denied — overriding the permission/PlatformAdmin allows — +# unless the platform_admin_exempt_from_service_only config knob is set for a +# PlatformAdmin caller. Absence of `callers` preserves today's PRINCIPAL-default +# behavior (no new restriction). + +caller_kind_test_data := { + "roles": { + "Viewer": { + "permissions": ["models.read", "models.list", "jobs.read", "jobs.list"], + }, + "Editor": { + "includes": ["Viewer"], + "permissions": ["models.create", "jobs.create"], + }, + "PlatformAdmin": { + "includes": ["Editor"], + "permissions": ["iam.read", "iam.create"], + }, + # Default role for service:* principals with no explicit bindings. + "ServiceSystem": { + "permissions": ["*"], + }, + }, + "endpoints": { + # Service-only route: only service principals are allowed. + "/apis/jobs/v2/workspaces/{workspace}/internal-jobs/{name}": { + "get": {"permissions": ["jobs.read"], "callers": ["service_principal"]}, + }, + # Mixed route: both humans and services are allowed. + "/apis/models/v2/workspaces/{workspace}/models/{name}": { + "get": {"permissions": ["models.read"], "callers": ["principal", "service_principal"]}, + }, + # No `callers` key: legacy PRINCIPAL-default behavior. + "/apis/models/v2/workspaces/{workspace}/models": { + "get": {"permissions": ["models.list"]}, + }, + # Principal-only route: only human principals are allowed (callers: ["principal"]). + "/apis/models/v2/workspaces/{workspace}/human-only/{name}": { + "get": {"permissions": ["models.read"], "callers": ["principal"]}, + }, + }, + "workspaces": { + "system": {}, + "ws1": {}, + }, + "principals": { + "platform-admin@example.com": { + "workspaces": {"system": ["PlatformAdmin"]}, + }, + # Human user with permissions sufficient for every endpoint under test. + "user@example.com": { + "workspaces": {"ws1": ["Viewer"]}, + }, + }, +} + +# --- Service-only route --- + +# A normal human principal (with the required permission) is DENIED on a service-only route. +test_service_only_route_denies_human_principal if { + result := authz.allow + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == false +} + +# A service principal is ALLOWED on the same service-only route. +test_service_only_route_allows_service_principal if { + result := authz.allow + with input as { + "principal_id": "service:jobs-controller", + "method": "GET", + "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == true +} + +# A human PlatformAdmin is DENIED on a service-only route by default (no exemption knob). +test_service_only_route_denies_platform_admin_by_default if { + result := authz.allow + with input as { + "principal_id": "platform-admin@example.com", + "method": "GET", + "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == false +} + +# With the config knob enabled, the human PlatformAdmin is ALLOWED on a service-only route. +test_service_only_route_allows_platform_admin_when_exempt if { + result := authz.allow + with input as { + "principal_id": "platform-admin@example.com", + "method": "GET", + "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + with data.authz.config as {"platform_admin_exempt_from_service_only": true} + + result.allowed == true +} + +# The knob does NOT exempt a normal human (non-PlatformAdmin) — still denied. +test_service_only_route_knob_does_not_exempt_normal_human if { + result := authz.allow + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + with data.authz.config as {"platform_admin_exempt_from_service_only": true} + + result.allowed == false +} + +# --- Mixed route (callers: ["principal", "service_principal"]) --- + +# A human principal is ALLOWED on a route that lists both caller kinds. +test_mixed_route_allows_human_principal if { + result := authz.allow + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/models/m-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == true +} + +# A service principal is ALLOWED on the same mixed route. +test_mixed_route_allows_service_principal if { + result := authz.allow + with input as { + "principal_id": "service:customizer", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/models/m-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == true +} + +# --- Principal-only route (callers: ["principal"]) --- + +# A human principal with the required permission is ALLOWED on a principal-only route. +test_principal_only_route_allows_human_principal if { + result := authz.allow + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/human-only/m-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == true +} + +# A service principal is DENIED on the same principal-only route — even though ServiceSystem +# grants "*" — closing the previously one-directional caller enforcement. +test_principal_only_route_denies_service_principal if { + result := authz.allow + with input as { + "principal_id": "service:customizer", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/human-only/m-1", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == false +} + +# principal_only_route is TRUE for a route that lists only "principal". +test_principal_only_route_helper_true_for_principal_only if { + authz.principal_only_route + with input as { + "principal_id": "service:customizer", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/human-only/m-1", + } + with data.authz.endpoints as caller_kind_test_data.endpoints +} + +# principal_only_route is FALSE for a mixed route (which also lists "service_principal"). +test_principal_only_route_helper_false_for_mixed if { + not authz.principal_only_route + with input as { + "principal_id": "service:customizer", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/models/m-1", + } + with data.authz.endpoints as caller_kind_test_data.endpoints +} + +# --- No `callers` key (legacy default) --- + +# A human principal with the required permission is ALLOWED (the new deny does not fire). +test_no_callers_key_allows_human_principal if { + result := authz.allow + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/models", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == true +} + +# A service principal is ALLOWED on the no-callers route (unchanged from today). +test_no_callers_key_allows_service_principal if { + result := authz.allow + with input as { + "principal_id": "service:customizer", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/models", + } + with data.authz.roles as caller_kind_test_data.roles + with data.authz.endpoints as caller_kind_test_data.endpoints + with data.authz.workspaces as caller_kind_test_data.workspaces + with data.authz.principals as caller_kind_test_data.principals + + result.allowed == true +} + +# --- Helper-level checks: service_only_route detection --- + +# service_only_route is TRUE for a route that lists only "service_principal". +test_service_only_route_helper_true_for_service_only if { + authz.service_only_route + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", + } + with data.authz.endpoints as caller_kind_test_data.endpoints +} + +# service_only_route is FALSE for a mixed route that also lists "principal". +test_service_only_route_helper_false_for_mixed if { + not authz.service_only_route + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/models/m-1", + } + with data.authz.endpoints as caller_kind_test_data.endpoints +} + +# service_only_route is FALSE when the route has no `callers` key. +test_service_only_route_helper_false_for_absent_callers if { + not authz.service_only_route + with input as { + "principal_id": "user@example.com", + "method": "GET", + "path": "/apis/models/v2/workspaces/ws1/models", + } + with data.authz.endpoints as caller_kind_test_data.endpoints +} diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/deny_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/deny_test.rego new file mode 100644 index 0000000000..dbc418023e --- /dev/null +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/deny_test.rego @@ -0,0 +1,137 @@ +package authz_test + +import data.authz + +# Tests for the explicit endpoint deny marker (data.authz.endpoints[...].deny == true), +# the fail-closed signal emitted for unruled / invalid plugin routes. A denied route must +# reject EVERY caller, overriding the empty-permissions allow, the ServiceSystem "*" +# wildcard, and the PlatformAdmin bypass. + +deny_test_data := { + "roles": { + "Viewer": {"permissions": ["models.read"]}, + "PlatformAdmin": {"includes": ["Viewer"], "permissions": ["iam.read"]}, + # Default role for service:* principals: wildcard permission. + "ServiceSystem": {"permissions": ["*"]}, + }, + "endpoints": { + # Explicit deny marker (note permissions == [] would otherwise ALLOW any + # authenticated caller via the empty-permissions rule — deny must override that). + "/apis/agents/v2/workspaces/{workspace}/internal": { + "get": {"permissions": [], "deny": true}, + }, + # Negative control: an ordinary route with no deny marker. + "/apis/models/v2/workspaces/{workspace}/models/{name}": { + "get": {"permissions": ["models.read"]}, + }, + }, + "workspaces": {"system": {}, "ws1": {}}, + "principals": { + "platform-admin@example.com": {"workspaces": {"system": ["PlatformAdmin"]}}, + "user@example.com": {"workspaces": {"ws1": ["Viewer"]}}, + }, +} + +test_deny_marker_denies_human_principal if { + result := authz.allow + with input as {"principal_id": "user@example.com", "method": "GET", "path": "/apis/agents/v2/workspaces/ws1/internal"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + + result.allowed == false +} + +# Overrides the ServiceSystem "*" wildcard that a service:* principal defaults to. +test_deny_marker_denies_service_principal if { + result := authz.allow + with input as {"principal_id": "service:agents", "method": "GET", "path": "/apis/agents/v2/workspaces/ws1/internal"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + + result.allowed == false +} + +# Overrides the PlatformAdmin allow-bypass. +test_deny_marker_denies_platform_admin if { + result := authz.allow + with input as {"principal_id": "platform-admin@example.com", "method": "GET", "path": "/apis/agents/v2/workspaces/ws1/internal"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + + result.allowed == false +} + +# A route without the deny marker is unaffected — normal permission check applies. +test_no_deny_marker_allows_with_permission if { + result := authz.allow + with input as {"principal_id": "user@example.com", "method": "GET", "path": "/apis/models/v2/workspaces/ws1/models/m1"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + + result.allowed == true +} + +# Namespace fence: a path NOT in the endpoints map but under a degraded plugin's denied prefix +# must be denied — directly closing the service: no-match bypass for an unenumerable plugin. +test_denied_plugin_prefix_denies_service_principal if { + result := authz.allow + with input as {"principal_id": "service:x", "method": "POST", "path": "/apis/badplugin/v2/workspaces/ws1/anything/deep/path"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + with data.authz.config as {"denied_plugin_prefixes": ["/apis/badplugin"]} + + result.allowed == false +} + +# Control: WITHOUT the fence, that same unknown path is allowed for a service principal via the +# no-match bypass — proving the fence is what closes the hole. +test_unknown_path_without_fence_hits_service_bypass if { + result := authz.allow + with input as {"principal_id": "service:x", "method": "POST", "path": "/apis/badplugin/v2/workspaces/ws1/anything/deep/path"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + + result.allowed == true +} + +# Sibling safety: a prefix-sharing namespace (/apis/badplugin-extra) is NOT collaterally fenced by +# /apis/badplugin. The trailing slash in the fence rule (sprintf("%s/", [prefix])) is the only thing +# keeping the sibling allowed; pin it so a refactor can't silently widen the fence onto a neighbour. +test_sibling_prefix_not_collaterally_fenced if { + result := authz.allow + with input as {"principal_id": "service:x", "method": "POST", "path": "/apis/badplugin-extra/v2/workspaces/ws1/anything"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + with data.authz.config as {"denied_plugin_prefixes": ["/apis/badplugin"]} + + result.allowed == true +} + +# the bare /apis/ route (the prefix with no trailing segment) must be fenced too. +# The old trailing-slash-only rule (startswith(path, "/")) missed this exact-match case, +# leaving a degraded plugin's root path open. +test_bare_prefix_path_is_fenced if { + result := authz.allow + with input as {"principal_id": "service:x", "method": "GET", "path": "/apis/badplugin"} + with data.authz.roles as deny_test_data.roles + with data.authz.endpoints as deny_test_data.endpoints + with data.authz.workspaces as deny_test_data.workspaces + with data.authz.principals as deny_test_data.principals + with data.authz.config as {"denied_plugin_prefixes": ["/apis/badplugin"]} + + result.allowed == false +} diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/generic_entities_deny_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/generic_entities_deny_test.rego index 033604daa1..40e952f9bc 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/generic_entities_deny_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/generic_entities_deny_test.rego @@ -173,7 +173,9 @@ test_service_principal_allowed_entities if { } # Viewer CAN still list workspaces (non-entity endpoint unaffected) -test_viewer_can_list_workspaces if { +test_workspace_viewer_cannot_list_workspaces if { + # viewer@test.com is a Viewer of test-ws only, so it lacks workspaces.list in the system + # workspace — listing workspaces now requires that system-level grant. result := authz.allow with input as { "principal_id": "viewer@test.com", @@ -185,5 +187,5 @@ test_viewer_can_list_workspaces if { with data.authz.workspaces as entities_deny_test_data.workspaces with data.authz.principals as entities_deny_test_data.principals - result.allowed == true + result.allowed == false } diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_access_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_access_test.rego index 0d5ad86fa9..36eb20fc5e 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_access_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_access_test.rego @@ -61,7 +61,11 @@ workspace_access_test_data := { "principals": { "user123": { "workspaces": { - "test-workspace": ["Admin"] + # Admin of test-workspace, plus a system Viewer grant. Listing workspaces now + # requires workspaces.list in the SYSTEM workspace, so a system-level + # role is what lets this user list. + "test-workspace": ["Admin"], + "system": ["Viewer"] } } } @@ -71,9 +75,10 @@ workspace_access_test_data := { # Test 1: List workspaces with no access # ============================================================================ -test_list_workspaces_no_access_allowed if { - # User with no workspace access should still be allowed to list (but gets empty results) - result := authz.allow +test_list_workspaces_without_system_permission_denied if { + # Listing workspaces now requires workspaces.list in the SYSTEM workspace. A user + # with no such grant is denied — previously this rule allowed any authenticated user. + result := authz.allow with input as { "principal_id": "test-user", "principal_email": "test@example.com", @@ -85,10 +90,10 @@ test_list_workspaces_no_access_allowed if { with data.authz.endpoints as workspace_access_test_data.endpoints with data.authz.workspaces as workspace_access_test_data.workspaces with data.authz.principals as { - "test-user": {"workspaces": {}} # No workspace access + "test-user": {"workspaces": {}} # No system workspaces.list grant } - - result.allowed == true + + result.allowed == false } # ============================================================================ @@ -249,9 +254,10 @@ test_creator_list_workspaces_allowed if { # Test 8: Other user list workspaces # ============================================================================ -test_other_user_list_workspaces_allowed if { - # Other user should be allowed to list (but gets empty results) - result := authz.allow +test_other_user_list_workspaces_denied if { + # A user with only workspace-scoped (or no) roles lacks workspaces.list in the system + # workspace, so they can no longer list workspaces. + result := authz.allow with input as { "principal_id": "user456", "principal_email": "other@example.com", @@ -265,8 +271,8 @@ test_other_user_list_workspaces_allowed if { with data.authz.principals as { "user456": {"workspaces": {}} } - - result.allowed == true + + result.allowed == false } # ============================================================================ @@ -317,11 +323,14 @@ test_list_multiple_workspaces if { "workspaces": { "ns1": ["Viewer"], "ns2": ["Editor"], - "ns3": ["Admin"] + "ns3": ["Admin"], + # system Viewer carries workspaces.list, the system-workspace grant that + # listing now requires; the multi-workspace setup is otherwise unchanged. + "system": ["Viewer"] } } } - + result.allowed == true } diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_creation_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_creation_test.rego index 38c6682fc5..96af137a44 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_creation_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/namespace_creation_test.rego @@ -37,7 +37,9 @@ workspace_test_data := { "workspaces": {} # User has no workspace permissions yet }, "existing-user@test.com": { - "workspaces": {"existing-ns": ["Viewer"]} + # Viewer of existing-ns plus a system Viewer grant — listing workspaces now requires + # workspaces.list in the system workspace. + "workspaces": {"existing-ns": ["Viewer"], "system": ["Viewer"]} } } } @@ -87,9 +89,9 @@ test_unauthenticated_user_cannot_create_workspace if { result.allowed == false } -# Test that listing workspaces requires permissions +# Test that listing workspaces requires the system-level workspaces.list permission. test_listing_workspaces_requires_permission if { - # User with permission can list + # existing-user@test.com holds workspaces.list in the system workspace, so it can list. result := authz.allow with input as { "principal_id": "existing-user@test.com", "method": "GET", @@ -103,8 +105,9 @@ test_listing_workspaces_requires_permission if { result.allowed == true } -test_listing_workspaces_allowed_with_empty_filters if { - # Authenticated user can list workspaces even without any accessible workspaces +test_listing_workspaces_without_permission_denied if { + # new-user@test.com has no roles at all, so it lacks workspaces.list in the system + # workspace and can no longer list workspaces. result := authz.allow with input as { "principal_id": "new-user@test.com", "method": "GET", @@ -114,8 +117,8 @@ test_listing_workspaces_allowed_with_empty_filters if { with data.authz.endpoints as workspace_test_data.endpoints with data.authz.workspaces as workspace_test_data.workspaces with data.authz.principals as workspace_test_data.principals - - result.allowed == true + + result.allowed == false } # Test allow for workspace creation diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego index daa590e59f..788bc99af4 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego @@ -252,3 +252,43 @@ test_platform_admin_can_read_secret_metadata if { result.allowed == true } + +# Endpoints that include a service-only route (callers: ["service_principal"]) for +# exercising the caller-kind deny against a PlatformAdmin caller. +service_only_endpoints := { + "/apis/models/v2/workspaces/{workspace}/models/{name}": { + "delete": {"permissions": ["models.delete"], "callers": ["service_principal"]} + } +} + +# Test platform admin is DENIED on a service-only route by default (deny overrides the +# PlatformAdmin allow-bypass). +test_platform_admin_denied_on_service_only_route if { + result := authz.allow with input as { + "principal_id": "platform-admin@example.com", + "method": "DELETE", + "path": "/apis/models/v2/workspaces/workspace1/models/model1" + } + with data.authz.roles as platform_admin_test_data.roles + with data.authz.endpoints as service_only_endpoints + with data.authz.workspaces as platform_admin_test_data.workspaces + with data.authz.principals as platform_admin_test_data.principals + + result.allowed == false +} + +# Test platform admin is ALLOWED on a service-only route when the exemption knob is set. +test_platform_admin_allowed_on_service_only_route_when_exempt if { + result := authz.allow with input as { + "principal_id": "platform-admin@example.com", + "method": "DELETE", + "path": "/apis/models/v2/workspaces/workspace1/models/model1" + } + with data.authz.roles as platform_admin_test_data.roles + with data.authz.endpoints as service_only_endpoints + with data.authz.workspaces as platform_admin_test_data.workspaces + with data.authz.principals as platform_admin_test_data.principals + with data.authz.config as {"platform_admin_exempt_from_service_only": true} + + result.allowed == true +} diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/scopes_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/scopes_test.rego index d30c09f22d..5703ff5444 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/scopes_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/scopes_test.rego @@ -220,12 +220,15 @@ test_allow_with_empty_scopes if { # Test LIST operations with scopes test_list_allow_with_valid_scopes if { + # Valid scopes AND workspaces.list in the system workspace together allow listing. result := allow with input as { "principal_id": "user1", "method": "GET", "path": "/apis/entities/v2/workspaces", "scopes": ["entities:read"] } + with data.authz.principals as {"user1": {"workspaces": {"system": ["Viewer"]}}} + with data.authz.roles as {"Viewer": {"permissions": ["workspaces.list"]}} with data.authz.endpoints as { "/apis/entities/v2/workspaces": { "get": { diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/unknown_endpoint_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/unknown_endpoint_test.rego index bf16a14250..30aad25658 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/unknown_endpoint_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/unknown_endpoint_test.rego @@ -217,15 +217,64 @@ test_known_endpoint_denied_without_permission if { # A known cross-workspace LIST endpoint still works for authenticated users. test_known_cross_workspace_list_still_allowed if { + # Control for the fail-closed unknown-endpoint tests above: a KNOWN no-workspace list + # endpoint is still allowed for a user holding the permission. lister@test.com has Viewer + # (→ workspaces.list) in the system workspace, which listing now requires. result := authz.allow with input as { - "principal_id": "user@test.com", + "principal_id": "lister@test.com", "method": "GET", "path": "/apis/entities/v2/workspaces", } with data.authz.roles as unknown_endpoint_test_data.roles with data.authz.endpoints as unknown_endpoint_test_data.endpoints with data.authz.workspaces as unknown_endpoint_test_data.workspaces - with data.authz.principals as unknown_endpoint_test_data.principals + with data.authz.principals as {"lister@test.com": {"workspaces": {"system": ["Viewer"]}}} + + result.allowed == true +} + +# --- permission enforcement on permission-stamped no-workspace GETs --- + +# A permissionless no-workspace GET stays open to any authenticated user (the cross-workspace +# list path is unchanged for endpoints declaring no permissions). +test_permissionless_no_workspace_get_allows_any_authenticated_user if { + result := authz.allow with input as { + "principal_id": "anyone@test.com", + "method": "GET", + "path": "/apis/example/v1/ping", + } + with data.authz.endpoints as {"/apis/example/v1/ping": {"get": {"permissions": []}}} + with data.authz.principals as {"anyone@test.com": {"workspaces": {}}} + with data.authz.roles as {} + + result.allowed == true +} + +# A permission-stamped no-workspace GET is DENIED without the declared permission in the system +# workspace — the stamped permission is enforced rather than decorative. +test_permissioned_no_workspace_get_denied_without_permission if { + result := authz.allow with input as { + "principal_id": "nobody@test.com", + "method": "GET", + "path": "/apis/example/v1/hello/world", + } + with data.authz.endpoints as {"/apis/example/v1/hello/{name}": {"get": {"permissions": ["example.hello.read"]}}} + with data.authz.principals as {"nobody@test.com": {"workspaces": {}}} + with data.authz.roles as {} + + result.allowed == false +} + +# ...and ALLOWED when the caller holds that permission in the system workspace. +test_permissioned_no_workspace_get_allowed_with_system_permission if { + result := authz.allow with input as { + "principal_id": "reader@test.com", + "method": "GET", + "path": "/apis/example/v1/hello/world", + } + with data.authz.endpoints as {"/apis/example/v1/hello/{name}": {"get": {"permissions": ["example.hello.read"]}}} + with data.authz.principals as {"reader@test.com": {"workspaces": {"system": ["HelloReader"]}}} + with data.authz.roles as {"HelloReader": {"permissions": ["example.hello.read"]}} result.allowed == true } diff --git a/services/core/auth/src/nmp/core/auth/config.py b/services/core/auth/src/nmp/core/auth/config.py index f10769f9ab..2a3185b459 100644 --- a/services/core/auth/src/nmp/core/auth/config.py +++ b/services/core/auth/src/nmp/core/auth/config.py @@ -3,7 +3,7 @@ """Configuration for the Auth service (v2).""" -from typing import Optional +from typing import Literal, Optional from nmp.common.config import AuthConfig as SharedAuthConfig from pydantic import Field @@ -58,6 +58,24 @@ class AuthServiceConfig(SharedAuthConfig): description="Maximum linear memory (MB) the embedded PDP WASM runtime can consume.", ) + # Plugin HTTP authz fail-mode: what to do when a plugin contributes invalid authz + # (an unruled route, or a rule referencing an undeclared / out-of-namespace permission). + # The offending routes are always emitted as explicit denies; this controls the blast + # radius. deny_route: deny only the bad routes. quarantine: deny the whole plugin. + # hard_fail: refuse to build the OPA bundle. + on_invalid_plugin: Literal["deny_route", "quarantine", "hard_fail"] = Field( + default="deny_route", + description="Fail-mode for a plugin that contributes invalid HTTP authz.", + ) + + # When true, a human PlatformAdmin is allowed on plugin routes restricted to + # SERVICE_PRINCIPAL callers. Default false (deny). Read by the Rego policy via + # data.authz.config.platform_admin_exempt_from_service_only. + platform_admin_exempt_from_service_only: bool = Field( + default=False, + description="Allow a human PlatformAdmin on SERVICE_PRINCIPAL-only plugin routes.", + ) + # Backward compatibility alias AuthConfig = AuthServiceConfig diff --git a/services/core/auth/tests/test_bundle.py b/services/core/auth/tests/test_bundle.py index d8a95cfd14..ea5b147f25 100644 --- a/services/core/auth/tests/test_bundle.py +++ b/services/core/auth/tests/test_bundle.py @@ -13,25 +13,37 @@ @pytest.mark.asyncio async def test_authorization_data_merges_plugin_authz_contributions(monkeypatch): - """Plugin authz contributions are included before validation and bundle build.""" + """Plugin authz contributions are included before validation and bundle build. + + The bundle derives contributions via ``discover_plugin_authz`` (routes-derived model), + so the stub returns a clean ``PluginAuthzResult`` rather than a raw contribution dict. + """ + from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod + from nemo_platform_plugin.authz_discovery import PluginAuthzResult from nmp.core.auth.app.bundle import _build_authorization_data_internal plugin_path = "/apis/example-plugin/v2/workspaces/{workspace}/jobs" - contribution = { - "permissions": {"example-plugin.jobs.read": "Read example plugin jobs"}, - "endpoints": { - plugin_path: { - "get": { - "permissions": ["example-plugin.jobs.read"], - "scopes": ["example-plugin:read", "platform:read"], + result = PluginAuthzResult( + key="example-plugin", + contribution=AuthzContribution( + permissions={"example-plugin.jobs.read": "Read example plugin jobs"}, + endpoints={ + plugin_path: { + "get": AuthzEndpointMethod( + permissions=["example-plugin.jobs.read"], + scopes=["example-plugin:read", "platform:read"], + ) } - } - }, - } + }, + ), + problems=[], + warnings=[], + mount_name="example-plugin", + ) monkeypatch.setattr( - "nemo_platform_plugin.authz_discovery.discover_authz_contribution_dicts", - lambda: [contribution], + "nemo_platform_plugin.authz_discovery.discover_plugin_authz", + lambda: [result], ) data = await _build_authorization_data_internal(entities_client=None) @@ -122,3 +134,115 @@ async def test_bundle_etag_stability(): # E-Tag should be the same for same data assert etag1 == etag2 + + +# --- Plugin authz fail-mode (authz.on_invalid_plugin) --- + + +def _problem_result(): + """A plugin result with one valid route and one unruled (deny) route + a problem.""" + from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod + from nemo_platform_plugin.authz_discovery import PluginAuthzResult + + contribution = AuthzContribution( + permissions={"p.read": "Read"}, + endpoints={ + "/apis/p/v2/ok": {"get": AuthzEndpointMethod(permissions=["p.read"])}, + "/apis/p/v2/bad": {"get": AuthzEndpointMethod(permissions=[], deny=True)}, + }, + ) + return PluginAuthzResult(key="p", contribution=contribution, problems=["/apis/p/v2/bad (GET) has no @path_rule"]) + + +def _patch_failmode(monkeypatch, results, on_invalid): + from types import SimpleNamespace + + import nmp.core.auth.app.bundle as bundle + + monkeypatch.setattr("nemo_platform_plugin.authz_discovery.discover_plugin_authz", lambda: results) + monkeypatch.setattr( + bundle, + "get_service_config", + lambda _cls: SimpleNamespace(on_invalid_plugin=on_invalid, platform_admin_exempt_from_service_only=False), + ) + return bundle + + +def test_on_invalid_plugin_deny_route_keeps_valid_routes(monkeypatch): + bundle = _patch_failmode(monkeypatch, [_problem_result()], "deny_route") + merged = bundle.merge_plugin_authz_contributions({"authz": {}}) + endpoints = merged["authz"]["endpoints"] + assert "deny" not in endpoints["/apis/p/v2/ok"]["get"] # valid route preserved + assert endpoints["/apis/p/v2/bad"]["get"]["deny"] is True # only the bad route denied + assert "p" in bundle.get_degraded_plugins() + + +def test_on_invalid_plugin_quarantine_denies_whole_plugin(monkeypatch): + bundle = _patch_failmode(monkeypatch, [_problem_result()], "quarantine") + merged = bundle.merge_plugin_authz_contributions({"authz": {}}) + endpoints = merged["authz"]["endpoints"] + # The previously-valid route is now denied too — the whole plugin is quarantined. + assert endpoints["/apis/p/v2/ok"]["get"]["deny"] is True + assert endpoints["/apis/p/v2/bad"]["get"]["deny"] is True + # quarantine also fences the whole namespace, so a route the runner mounts that + # derivation never saw (quarantine only rewrites the routes it did see) can't fall through. + assert merged["authz"]["config"]["denied_plugin_prefixes"] == ["/apis/p"] + + +def test_on_invalid_plugin_hard_fail_raises(monkeypatch): + bundle = _patch_failmode(monkeypatch, [_problem_result()], "hard_fail") + with pytest.raises(RuntimeError, match="hard_fail"): + bundle.merge_plugin_authz_contributions({"authz": {}}) + + +def test_clean_plugin_merges_without_degraded(monkeypatch): + from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod + from nemo_platform_plugin.authz_discovery import PluginAuthzResult + + clean = PluginAuthzResult( + key="c", + contribution=AuthzContribution( + permissions={"c.read": "Read"}, + endpoints={"/apis/c/v2/x": {"get": AuthzEndpointMethod(permissions=["c.read"])}}, + ), + problems=[], + ) + bundle = _patch_failmode(monkeypatch, [clean], "deny_route") + merged = bundle.merge_plugin_authz_contributions({"authz": {}}) + assert "/apis/c/v2/x" in merged["authz"]["endpoints"] + assert bundle.get_degraded_plugins() == {} + + +def test_degraded_plugin_with_no_routes_is_namespace_fenced(monkeypatch): + """A plugin that couldn't be enumerated (empty contribution) fences its whole namespace, + so any route it still mounts can't fall through the service: no-match bypass.""" + from nemo_platform_plugin.authz import AuthzContribution + from nemo_platform_plugin.authz_discovery import PluginAuthzResult + + degraded = PluginAuthzResult( + key="bad", + contribution=AuthzContribution(), # no endpoints — could not enumerate + problems=["failed to load plugin: RuntimeError('boom')"], + ) + bundle = _patch_failmode(monkeypatch, [degraded], "deny_route") + merged = bundle.merge_plugin_authz_contributions({"authz": {}}) + assert merged["authz"]["config"]["denied_plugin_prefixes"] == ["/apis/bad"] + assert "bad" in bundle.get_degraded_plugins() + + +def test_degraded_plugin_fences_both_key_and_mount_name(monkeypatch): + """When a degraded plugin's declared mount name diverges from its entry-point key (the + name==key invariant is only warned, not enforced), the fence must cover both /apis/ + and /apis/ — the runner mounts the plugin's real routes at /apis/.""" + from nemo_platform_plugin.authz import AuthzContribution + from nemo_platform_plugin.authz_discovery import PluginAuthzResult + + degraded = PluginAuthzResult( + key="bad", + contribution=AuthzContribution(), # no endpoints — could not enumerate + problems=["failed to load plugin: RuntimeError('boom')"], + mount_name="bad-actual", + ) + bundle = _patch_failmode(monkeypatch, [degraded], "deny_route") + merged = bundle.merge_plugin_authz_contributions({"authz": {}}) + assert merged["authz"]["config"]["denied_plugin_prefixes"] == ["/apis/bad", "/apis/bad-actual"] diff --git a/services/core/auth/tests/test_embedded_pdp.py b/services/core/auth/tests/test_embedded_pdp.py index e7def497c0..fdb5595665 100644 --- a/services/core/auth/tests/test_embedded_pdp.py +++ b/services/core/auth/tests/test_embedded_pdp.py @@ -618,10 +618,13 @@ def test_service_principal_allowed(self, static_authz_data, method, path): def test_viewer_can_still_list_workspaces(self, static_authz_data): """Non-entity endpoints under /apis/entities/ are unaffected.""" + # Listing workspaces is a no-{workspace} GET, so its permission is checked in the + # system workspace. A viewer holds workspaces.list there via a system binding (on a + # real platform that's the seeded wildcard Viewer@system). self._setup_principals( static_authz_data, { - "viewer@test.com": {"workspaces": {"my-ws": ["Viewer"]}}, + "viewer@test.com": {"workspaces": {"my-ws": ["Viewer"], "system": ["Viewer"]}}, }, ) result = evaluate( @@ -792,3 +795,36 @@ def test_audit_workspace_endpoints_have_audit_scopes(self, static_authz_data): missing.append(f"{method.upper()} {path}") assert not missing, "Workspace audit endpoints missing audit:* scope:\n" + "\n".join(missing) + + +class TestWasmNativeBuiltins: + """The embedded engine stubs host-provided builtins (env::opa_builtin*) to return 0. + + Any rego that calls an SDK-dependent builtin (sprintf, glob.match, ...) therefore + silently evaluates to undefined in production: allow rules fail closed, but DENY + rules fail OPEN — and ``opa test`` cannot catch it because the Go evaluator + implements every builtin. These tests pin the policy to wasm-native builtins only. + """ + + def test_policy_wasm_requires_no_host_builtins(self): + """The compiled policy must not depend on any host-provided builtin.""" + policy = get_policy() + required = policy._read_json(policy.exports["builtins"](policy.store)) + assert required == {}, ( + f"policy.wasm requires host builtins {list(required)} which the embedded " + "engine stubs out — rewrite the policy using wasm-native builtins only " + "(a deny rule depending on a stubbed builtin silently never fires)." + ) + + def test_namespace_fence_denies_subpaths_in_wasm(self, static_authz_data): + """Regression: the fence's subpath arm was written with sprintf and never fired in WASM.""" + static_authz_data["authz"].setdefault("config", {})["denied_plugin_prefixes"] = ["/apis/brokenplugin"] + set_policy_data(static_authz_data) + cases = [ + ("/apis/brokenplugin/sub/route", False), # subpath fenced (the bug: this was allowed) + ("/apis/brokenplugin", False), # bare prefix fenced + ("/apis/brokenplugin-extra/x", True), # sibling prefix not collaterally fenced + ] + for path, expect in cases: + result = evaluate("allow", {"principal_id": "service:probe", "method": "GET", "path": path}) + assert result["allowed"] is expect, f"GET {path} as service:probe: expected allowed={expect}" From d34608c82f0e3daf8accfebf7f7ef0ca00d4cd62 Mon Sep 17 00:00:00 2001 From: Max Dubrinsky Date: Wed, 24 Jun 2026 13:32:13 -0400 Subject: [PATCH 2/5] remove config for admin access to service-only resources --- docs/set-up/config-reference.mdx | 2 - e2e/authz_oidc/README.md | 7 ++- e2e/authz_oidc/conftest.py | 13 ++--- e2e/authz_oidc/matrix.py | 37 ++------------- .../core/auth/src/nmp/core/auth/app/bundle.py | 6 --- .../src/nmp/core/auth/app/policies/authz.rego | 23 +++------ .../app/policy_tests/caller_kind_test.rego | 47 +++---------------- .../app/policy_tests/platform_admin_test.rego | 22 ++------- .../core/auth/src/nmp/core/auth/config.py | 8 ---- services/core/auth/tests/test_bundle.py | 2 +- 10 files changed, 27 insertions(+), 140 deletions(-) diff --git a/docs/set-up/config-reference.mdx b/docs/set-up/config-reference.mdx index fbc130d5d0..e29bc6561f 100644 --- a/docs/set-up/config-reference.mdx +++ b/docs/set-up/config-reference.mdx @@ -123,8 +123,6 @@ auth: embedded_pdp_memory_limit_mb: 32 # Fail-mode for a plugin that contributes invalid HTTP authz. | default: 'deny_route' | values: 'deny_route' | 'quarantine' | 'hard_fail' on_invalid_plugin: deny_route - # Allow a human PlatformAdmin on SERVICE_PRINCIPAL-only plugin routes. | default: False - platform_admin_exempt_from_service_only: false ``` ### `entities` diff --git a/e2e/authz_oidc/README.md b/e2e/authz_oidc/README.md index 81a118c9fb..91c73f4d4f 100644 --- a/e2e/authz_oidc/README.md +++ b/e2e/authz_oidc/README.md @@ -49,8 +49,7 @@ Not part of CI: everything is marked `e2e` and skipped without `--run-e2e`. rows are untestable). The seeded `*`→Editor@default binding is left alone — no matrix row touches the `default` workspace. 5. **Runs the matrix** (`matrix.py`, ~40 cases), then repeats a small group on - a second platform instance with `on_invalid_plugin=quarantine` + - `platform_admin_exempt_from_service_only=true`. + a second platform instance with `on_invalid_plugin=quarantine`. ## Matrix coverage @@ -60,9 +59,9 @@ Not part of CI: everything is marked `e2e` and skipped without `--run-e2e`. | bindings | no binding → 403; Viewer read-not-write; cross-workspace isolation | | no-workspace-get | permission-stamped no-`{workspace}` GET requires the permission in `system`; permissionless sibling stays open | | scopes | `auditor:read` token: GET 200 / POST 403; `:write` POST 201; OIDC-only scopes = full power (documented); agents-gateway read/write method split | -| caller-kind | service principal denied on `callers=[principal]` route (symmetric half); human & PlatformAdmin denied on service-only route; service no-match bypass pinned as documented behavior | +| caller-kind | service principal denied on `callers=[principal]` route (symmetric half); human denied on service-only route (PlatformAdmin keeps its global bypass); service no-match bypass pinned as documented behavior | | fence | unenumerable plugin namespace denied for human/service/PlatformAdmin incl. bare prefix; unruled route denied for everyone while ruled sibling works | -| knobs | quarantine fences the whole offending plugin; exemption knob admits PlatformAdmin (and only PlatformAdmin) to service-only routes | +| knobs | quarantine fences the whole offending plugin | Status-code conventions asserted throughout: **401** only when no identity was established (missing/invalid token); **403** for every policy denial of an diff --git a/e2e/authz_oidc/conftest.py b/e2e/authz_oidc/conftest.py index c1162f684c..f5d45163d9 100644 --- a/e2e/authz_oidc/conftest.py +++ b/e2e/authz_oidc/conftest.py @@ -13,10 +13,8 @@ Two platform phases (both lazy, session-scoped): -- ``platform`` — default authz knobs (``on_invalid_plugin=deny_route``, - PlatformAdmin not exempt from service-only routes). -- ``platform_knobs`` — ``on_invalid_plugin=quarantine`` + - ``platform_admin_exempt_from_service_only=true``. +- ``platform`` — default authz knobs (``on_invalid_plugin=deny_route``). +- ``platform_knobs`` — ``on_invalid_plugin=quarantine``. Run: ``pytest e2e/authz_oidc -v --run-e2e`` (see README.md). """ @@ -284,15 +282,12 @@ def platform(issuer: MiniOIDCIssuer, tmp_path_factory: pytest.TempPathFactory) - @pytest.fixture(scope="session") def platform_knobs(issuer: MiniOIDCIssuer, tmp_path_factory: pytest.TempPathFactory) -> Iterator[Platform]: - """Quarantine + PlatformAdmin-exemption knob platform (no extra provisioning).""" + """Quarantine-knob platform (no extra provisioning).""" gen = _spawn_platform( issuer, tmp_path_factory, "knobs", - { - "NMP_AUTH_ON_INVALID_PLUGIN": "quarantine", - "NMP_AUTH_PLATFORM_ADMIN_EXEMPT_FROM_SERVICE_ONLY": "true", - }, + {"NMP_AUTH_ON_INVALID_PLUGIN": "quarantine"}, ) with closing(gen): p = next(gen) diff --git a/e2e/authz_oidc/matrix.py b/e2e/authz_oidc/matrix.py index 89c71c9023..41f746da37 100644 --- a/e2e/authz_oidc/matrix.py +++ b/e2e/authz_oidc/matrix.py @@ -292,11 +292,11 @@ class Case: Case( "E4", "caller-kind", - "PlatformAdmin denied on service-only route (exemption knob default false)", + "PlatformAdmin allowed on service-only route (admin global bypass holds)", "GET", SERVICE_ONLY, "admin", - {403}, + {200}, ), Case( "E5", @@ -408,8 +408,7 @@ class Case: {403}, ), # ------------------------------------------------------------------ # - # G. Knob phase: on_invalid_plugin=quarantine + # - # platform_admin_exempt_from_service_only=true (restarted platform). # + # G. Knob phase: on_invalid_plugin=quarantine (restarted platform). # # ------------------------------------------------------------------ # Case( "G1", @@ -434,36 +433,6 @@ class Case: Case( "G3", "knobs", - "Exemption knob: PlatformAdmin now allowed on service-only route", - "GET", - SERVICE_ONLY, - "admin", - {200}, - phase="knobs", - ), - Case( - "G4", - "knobs", - "Exemption knob does NOT extend to plain humans", - "GET", - SERVICE_ONLY, - "nobody", - {403}, - phase="knobs", - ), - Case( - "G5", - "knobs", - "Service principal still allowed on service-only route (control)", - "GET", - SERVICE_ONLY, - "service", - {200}, - phase="knobs", - ), - Case( - "G6", - "knobs", "Platform sanity under knob phase (admin lists workspaces)", "GET", WORKSPACES, diff --git a/services/core/auth/src/nmp/core/auth/app/bundle.py b/services/core/auth/src/nmp/core/auth/app/bundle.py index 7b36f8ce6c..fbfa3445d8 100644 --- a/services/core/auth/src/nmp/core/auth/app/bundle.py +++ b/services/core/auth/src/nmp/core/auth/app/bundle.py @@ -223,12 +223,6 @@ async def _build_authorization_data_internal(entities_client: Optional[EntityCli if "principals" not in static_data["authz"]: static_data["authz"]["principals"] = {} - # Surface auth-service config the Rego reads (PlatformAdmin exemption from - # SERVICE_PRINCIPAL-only plugin routes — see authz.rego deny_request rules). - static_data["authz"].setdefault("config", {})["platform_admin_exempt_from_service_only"] = get_service_config( - AuthServiceConfig - ).platform_admin_exempt_from_service_only - # Fetch dynamic data from EntityClient if available if entities_client: # Fetch all role bindings across ALL workspaces with pagination diff --git a/services/core/auth/src/nmp/core/auth/app/policies/authz.rego b/services/core/auth/src/nmp/core/auth/app/policies/authz.rego index fecf1cc095..a869bf04df 100644 --- a/services/core/auth/src/nmp/core/auth/app/policies/authz.rego +++ b/services/core/auth/src/nmp/core/auth/app/policies/authz.rego @@ -332,17 +332,16 @@ service_only_route if { not "principal" in callers } -# Deny a human (non-service) caller on a service-only route. This is a deny_request so -# it overrides the allow rules — including the ServiceSystem "*" wildcard and the -# PlatformAdmin allow-bypass — otherwise humans would leak onto service-only routes. -# Service principals (id starts with "service:") are unaffected and stay allowed. -# A human PlatformAdmin is denied by default, unless explicitly exempted via the -# platform_admin_exempt_from_service_only config knob. +# Deny a human (non-service) caller on a service-only route. This is a deny_request so it +# overrides the allow rules — including the ServiceSystem "*" wildcard — otherwise humans +# would leak onto service-only routes. Service principals (id starts with "service:") are +# unaffected and stay allowed. A human PlatformAdmin keeps its global bypass here: an admin +# retains access to every route, service-only routes included. deny_request if { service_only_route principal_id := extract_principal_id not startswith(principal_id, "service:") - not platform_admin_exempt + not platform_admin_in_system } # Caller-kind enforcement for principal-only routes — the symmetric counterpart of the @@ -369,16 +368,6 @@ deny_request if { startswith(principal_id, "service:") } -# True only when a PlatformAdmin caller is present AND the config knob exempts platform -# admins from service-only enforcement. Read defensively: an absent config key is treated -# as false (default deny for human platform admins on service-only routes). -default platform_admin_exempt := false - -platform_admin_exempt if { - platform_admin_in_system - data.authz.config.platform_admin_exempt_from_service_only == true -} - # True when any applicable principal has PlatformAdmin in the system workspace (see allow_request). default platform_admin_in_system := false diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego index f4dda6c070..c8bcac57a2 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/caller_kind_test.rego @@ -7,10 +7,9 @@ import data.authz # Data contract: an endpoint method may declare an optional `callers` list of # caller-kind strings ("principal", "service_principal"). A route is "service-only" # when it lists "service_principal" but NOT "principal". On such routes, human -# (non-service) callers are denied — overriding the permission/PlatformAdmin allows — -# unless the platform_admin_exempt_from_service_only config knob is set for a -# PlatformAdmin caller. Absence of `callers` preserves today's PRINCIPAL-default -# behavior (no new restriction). +# (non-service) callers are denied — overriding the permission allows — except a +# PlatformAdmin, who keeps its global bypass and stays allowed. Absence of `callers` +# preserves today's PRINCIPAL-default behavior (no new restriction). caller_kind_test_data := { "roles": { @@ -97,8 +96,10 @@ test_service_only_route_allows_service_principal if { result.allowed == true } -# A human PlatformAdmin is DENIED on a service-only route by default (no exemption knob). -test_service_only_route_denies_platform_admin_by_default if { +# A human PlatformAdmin is ALLOWED on a service-only route — its global admin bypass is not +# clawed back here (only non-admin humans are denied, per +# test_service_only_route_denies_human_principal above). +test_service_only_route_allows_platform_admin if { result := authz.allow with input as { "principal_id": "platform-admin@example.com", @@ -110,43 +111,9 @@ test_service_only_route_denies_platform_admin_by_default if { with data.authz.workspaces as caller_kind_test_data.workspaces with data.authz.principals as caller_kind_test_data.principals - result.allowed == false -} - -# With the config knob enabled, the human PlatformAdmin is ALLOWED on a service-only route. -test_service_only_route_allows_platform_admin_when_exempt if { - result := authz.allow - with input as { - "principal_id": "platform-admin@example.com", - "method": "GET", - "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", - } - with data.authz.roles as caller_kind_test_data.roles - with data.authz.endpoints as caller_kind_test_data.endpoints - with data.authz.workspaces as caller_kind_test_data.workspaces - with data.authz.principals as caller_kind_test_data.principals - with data.authz.config as {"platform_admin_exempt_from_service_only": true} - result.allowed == true } -# The knob does NOT exempt a normal human (non-PlatformAdmin) — still denied. -test_service_only_route_knob_does_not_exempt_normal_human if { - result := authz.allow - with input as { - "principal_id": "user@example.com", - "method": "GET", - "path": "/apis/jobs/v2/workspaces/ws1/internal-jobs/job-1", - } - with data.authz.roles as caller_kind_test_data.roles - with data.authz.endpoints as caller_kind_test_data.endpoints - with data.authz.workspaces as caller_kind_test_data.workspaces - with data.authz.principals as caller_kind_test_data.principals - with data.authz.config as {"platform_admin_exempt_from_service_only": true} - - result.allowed == false -} - # --- Mixed route (callers: ["principal", "service_principal"]) --- # A human principal is ALLOWED on a route that lists both caller kinds. diff --git a/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego b/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego index 788bc99af4..9e27765910 100644 --- a/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego +++ b/services/core/auth/src/nmp/core/auth/app/policy_tests/platform_admin_test.rego @@ -261,9 +261,9 @@ service_only_endpoints := { } } -# Test platform admin is DENIED on a service-only route by default (deny overrides the -# PlatformAdmin allow-bypass). -test_platform_admin_denied_on_service_only_route if { +# Test platform admin is ALLOWED on a service-only route — the admin global bypass holds +# here (only non-admin humans are denied on service-only routes). +test_platform_admin_allowed_on_service_only_route if { result := authz.allow with input as { "principal_id": "platform-admin@example.com", "method": "DELETE", @@ -274,21 +274,5 @@ test_platform_admin_denied_on_service_only_route if { with data.authz.workspaces as platform_admin_test_data.workspaces with data.authz.principals as platform_admin_test_data.principals - result.allowed == false -} - -# Test platform admin is ALLOWED on a service-only route when the exemption knob is set. -test_platform_admin_allowed_on_service_only_route_when_exempt if { - result := authz.allow with input as { - "principal_id": "platform-admin@example.com", - "method": "DELETE", - "path": "/apis/models/v2/workspaces/workspace1/models/model1" - } - with data.authz.roles as platform_admin_test_data.roles - with data.authz.endpoints as service_only_endpoints - with data.authz.workspaces as platform_admin_test_data.workspaces - with data.authz.principals as platform_admin_test_data.principals - with data.authz.config as {"platform_admin_exempt_from_service_only": true} - result.allowed == true } diff --git a/services/core/auth/src/nmp/core/auth/config.py b/services/core/auth/src/nmp/core/auth/config.py index 2a3185b459..62f8b82d44 100644 --- a/services/core/auth/src/nmp/core/auth/config.py +++ b/services/core/auth/src/nmp/core/auth/config.py @@ -68,14 +68,6 @@ class AuthServiceConfig(SharedAuthConfig): description="Fail-mode for a plugin that contributes invalid HTTP authz.", ) - # When true, a human PlatformAdmin is allowed on plugin routes restricted to - # SERVICE_PRINCIPAL callers. Default false (deny). Read by the Rego policy via - # data.authz.config.platform_admin_exempt_from_service_only. - platform_admin_exempt_from_service_only: bool = Field( - default=False, - description="Allow a human PlatformAdmin on SERVICE_PRINCIPAL-only plugin routes.", - ) - # Backward compatibility alias AuthConfig = AuthServiceConfig diff --git a/services/core/auth/tests/test_bundle.py b/services/core/auth/tests/test_bundle.py index ea5b147f25..8001e6cf62 100644 --- a/services/core/auth/tests/test_bundle.py +++ b/services/core/auth/tests/test_bundle.py @@ -163,7 +163,7 @@ def _patch_failmode(monkeypatch, results, on_invalid): monkeypatch.setattr( bundle, "get_service_config", - lambda _cls: SimpleNamespace(on_invalid_plugin=on_invalid, platform_admin_exempt_from_service_only=False), + lambda _cls: SimpleNamespace(on_invalid_plugin=on_invalid), ) return bundle From 6c65a259176f8bbde92aa0484236f728a31067fb Mon Sep 17 00:00:00 2001 From: Max Dubrinsky Date: Wed, 24 Jun 2026 14:10:59 -0400 Subject: [PATCH 3/5] feat(authz): default on_invalid_plugin to hard_fail Flip the plugin-authz fail-mode default from deny_route to hard_fail: a plugin contributing invalid authz (an unruled route or an undeclared permission) now refuses the OPA bundle build rather than silently fencing the offending routes. Matches the spec's "a missing path rule is a validation error". quarantine/deny_route remain available for deployments that load dynamically-discovered or third-party plugins one bad plugin shouldn't be able to wedge. The authz OIDC e2e harness pins deny_route on its default phase because it deliberately installs broken/unruled fixture plugins to exercise per-route fencing on a running platform. Signed-off-by: Max Dubrinsky --- docs/set-up/config-reference.mdx | 4 ++-- e2e/authz_oidc/README.md | 6 ++++-- e2e/authz_oidc/conftest.py | 14 +++++++++++--- services/core/auth/src/nmp/core/auth/config.py | 9 ++++++--- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/docs/set-up/config-reference.mdx b/docs/set-up/config-reference.mdx index e29bc6561f..45ee0d0040 100644 --- a/docs/set-up/config-reference.mdx +++ b/docs/set-up/config-reference.mdx @@ -121,8 +121,8 @@ auth: embedded_pdp_cpu_limit: 200 # Maximum linear memory (MB) the embedded PDP WASM runtime can consume. | default: 32 embedded_pdp_memory_limit_mb: 32 - # Fail-mode for a plugin that contributes invalid HTTP authz. | default: 'deny_route' | values: 'deny_route' | 'quarantine' | 'hard_fail' - on_invalid_plugin: deny_route + # Fail-mode for a plugin that contributes invalid HTTP authz. | default: 'hard_fail' | values: 'deny_route' | 'quarantine' | 'hard_fail' + on_invalid_plugin: hard_fail ``` ### `entities` diff --git a/e2e/authz_oidc/README.md b/e2e/authz_oidc/README.md index 91c73f4d4f..bbe80798fd 100644 --- a/e2e/authz_oidc/README.md +++ b/e2e/authz_oidc/README.md @@ -76,5 +76,7 @@ point). - `X-NMP-Principal-*` headers remain a trusted identity channel in this deployment shape; the harness never sends them, but does not prove they are stripped (that's an ingress concern, out of authz scope). -- `hard_fail` mode aborts bundle build (auth service degraded) — its - observable is process health, not a per-request status; not asserted here. +- `hard_fail` (the default `on_invalid_plugin` mode) aborts bundle build (auth + service degraded) — its observable is process health, not a per-request status; + not asserted here. Both harness phases pin a softer mode (`deny_route` / + `quarantine`) so the platform stays up with the deliberately-broken fixtures. diff --git a/e2e/authz_oidc/conftest.py b/e2e/authz_oidc/conftest.py index f5d45163d9..f3e5ff3da8 100644 --- a/e2e/authz_oidc/conftest.py +++ b/e2e/authz_oidc/conftest.py @@ -13,7 +13,9 @@ Two platform phases (both lazy, session-scoped): -- ``platform`` — default authz knobs (``on_invalid_plugin=deny_route``). +- ``platform`` — ``on_invalid_plugin=deny_route``. The harness deliberately loads + broken/unruled fixture plugins, so it pins per-route fencing rather than inheriting + the strict ``hard_fail`` default, which would abort the bundle and wedge the platform. - ``platform_knobs`` — ``on_invalid_plugin=quarantine``. Run: ``pytest e2e/authz_oidc -v --run-e2e`` (see README.md). @@ -272,8 +274,14 @@ def call(method: str, path: str, body: dict | None = None) -> httpx.Response: @pytest.fixture(scope="session") def platform(issuer: MiniOIDCIssuer, tmp_path_factory: pytest.TempPathFactory) -> Iterator[Platform]: - """Default-knob platform, fully provisioned.""" - gen = _spawn_platform(issuer, tmp_path_factory, "default", {}) + """deny_route-knob platform, fully provisioned. + + Pins ``deny_route`` explicitly: the harness installs broken/unruled fixture + plugins on purpose, so it opts out of the strict ``hard_fail`` default (which + would abort bundle generation and leave the platform degraded) to exercise + per-route fencing on a running platform. + """ + gen = _spawn_platform(issuer, tmp_path_factory, "default", {"NMP_AUTH_ON_INVALID_PLUGIN": "deny_route"}) with closing(gen): p = next(gen) _provision(p) diff --git a/services/core/auth/src/nmp/core/auth/config.py b/services/core/auth/src/nmp/core/auth/config.py index 62f8b82d44..88aa4dd3d4 100644 --- a/services/core/auth/src/nmp/core/auth/config.py +++ b/services/core/auth/src/nmp/core/auth/config.py @@ -61,10 +61,13 @@ class AuthServiceConfig(SharedAuthConfig): # Plugin HTTP authz fail-mode: what to do when a plugin contributes invalid authz # (an unruled route, or a rule referencing an undeclared / out-of-namespace permission). # The offending routes are always emitted as explicit denies; this controls the blast - # radius. deny_route: deny only the bad routes. quarantine: deny the whole plugin. - # hard_fail: refuse to build the OPA bundle. + # radius. hard_fail: refuse to build the OPA bundle (default — fail closed at the platform + # level, matching the 743 spec's "a missing path rule is a validation error"). quarantine: + # deny the whole offending plugin but keep the platform up. deny_route: deny only the bad + # routes. A deployment that loads dynamically-discovered or third-party plugins CI never + # vetted can downgrade to quarantine/deny_route so one bad plugin can't wedge the platform. on_invalid_plugin: Literal["deny_route", "quarantine", "hard_fail"] = Field( - default="deny_route", + default="hard_fail", description="Fail-mode for a plugin that contributes invalid HTTP authz.", ) From e0a2506b01a2049bfeb6dfbb062c57a99dc07db2 Mon Sep 17 00:00:00 2001 From: Max Dubrinsky Date: Wed, 24 Jun 2026 14:11:10 -0400 Subject: [PATCH 4/5] feat(deployments): migrate plugin authz to @path_rule surface The deployments plugin still declared authz through the removed get_authz_contribution() classmethod, which the routes-derived discovery pipeline ignores, leaving all 14 routes unruled (silently fenced under deny_route, a bundle-build abort under the new hard_fail default). Declare permissions as typed PermissionSets and attach @path_rule to every route; the controller-only status PUTs require SERVICE_PRINCIPAL (the existing require_service_principal dependency stays as defense-in-depth). Permission ids are preserved exactly, so role grants are unchanged. Drop the old classmethod and assert per-route rule coverage in the startup test. Signed-off-by: Max Dubrinsky --- .../nemo_deployments_plugin/api/v2/_perms.py | 37 +++++++++++ .../api/v2/deployment_configs.py | 7 +++ .../api/v2/deployments.py | 7 +++ .../nemo_deployments_plugin/api/v2/status.py | 5 ++ .../nemo_deployments_plugin/api/v2/volumes.py | 7 +++ .../src/nemo_deployments_plugin/authz.py | 13 ++++ .../src/nemo_deployments_plugin/service.py | 63 ------------------- .../tests/unit/test_service_startup.py | 25 ++++---- 8 files changed, 88 insertions(+), 76 deletions(-) create mode 100644 plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/_perms.py create mode 100644 plugins/nemo-deployments/src/nemo_deployments_plugin/authz.py diff --git a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/_perms.py b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/_perms.py new file mode 100644 index 0000000000..c6e9646533 --- /dev/null +++ b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/_perms.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Typed permission vocabulary for the deployments plugin's routes. + +Three sub-namespaces under ``deployments`` (one per entity collection). Route handlers +reference these constants in their ``@path_rule``; the platform derives the permission +catalog from the routes, so there is no parallel list to keep in sync. The controller-only +status routes mint a ``status.update`` permission under the collection they project onto. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import PermissionSet, perm + + +class DeploymentConfigPerms(PermissionSet, namespace="deployments.deployment-configs"): + CREATE = perm("Create deployments deployment-configs") + LIST = perm("List deployments deployment-configs") + READ = perm("Read deployments deployment-configs") + DELETE = perm("Delete deployments deployment-configs") + + +class DeploymentPerms(PermissionSet, namespace="deployments.deployments"): + CREATE = perm("Create deployments deployments") + LIST = perm("List deployments deployments") + READ = perm("Read deployments deployments") + DELETE = perm("Delete deployments deployments") + STATUS_UPDATE = perm("Update deployment observed status (controller)", suffix="status.update") + + +class VolumePerms(PermissionSet, namespace="deployments.volumes"): + CREATE = perm("Create deployments volumes") + LIST = perm("List deployments volumes") + READ = perm("Read deployments volumes") + DELETE = perm("Delete deployments volumes") + STATUS_UPDATE = perm("Update volume observed status (controller)", suffix="status.update") diff --git a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployment_configs.py b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployment_configs.py index 7cd5f8300e..3a14e3a943 100644 --- a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployment_configs.py +++ b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployment_configs.py @@ -8,7 +8,9 @@ import logging from fastapi import APIRouter, Depends, HTTPException, Query +from nemo_deployments_plugin.api.v2._perms import DeploymentConfigPerms from nemo_deployments_plugin.api.v2.dependencies import get_entity_client +from nemo_deployments_plugin.authz import SCOPE from nemo_deployments_plugin.entities import DeploymentConfig from nemo_deployments_plugin.references import deployment_names_using_config from nemo_deployments_plugin.schema import ( @@ -23,6 +25,7 @@ prerequisite_names, ) from nemo_platform_plugin.api.filters import make_filter_obj_dep +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityConflictError, NemoEntityNotFoundError from nemo_platform_plugin.schema import PaginationData @@ -55,6 +58,7 @@ async def _list_all_deployment_configs( @router.post("/deployment-configs", response_model=DeploymentConfig, status_code=201, tags=["Deployment Configs"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentConfigPerms.CREATE], scopes=SCOPE.write()) async def create_deployment_config( workspace: str, body: CreateDeploymentConfigRequest, @@ -87,6 +91,7 @@ async def create_deployment_config( @router.get("/deployment-configs", response_model=DeploymentConfigPage, tags=["Deployment Configs"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentConfigPerms.LIST], scopes=SCOPE.read()) async def list_deployment_configs( workspace: str, page: int = Query(default=1, ge=1), @@ -109,6 +114,7 @@ async def list_deployment_configs( @router.get("/deployment-configs/{name}", response_model=DeploymentConfig, tags=["Deployment Configs"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentConfigPerms.READ], scopes=SCOPE.read()) async def get_deployment_config( workspace: str, name: str, @@ -124,6 +130,7 @@ async def get_deployment_config( @router.delete("/deployment-configs/{name}", status_code=204, tags=["Deployment Configs"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentConfigPerms.DELETE], scopes=SCOPE.write()) async def delete_deployment_config( workspace: str, name: str, diff --git a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployments.py b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployments.py index 2515c51088..8bdb1a84a5 100644 --- a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployments.py +++ b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/deployments.py @@ -9,10 +9,13 @@ from typing import cast from fastapi import APIRouter, Depends, HTTPException, Query +from nemo_deployments_plugin.api.v2._perms import DeploymentPerms from nemo_deployments_plugin.api.v2.dependencies import get_entity_client +from nemo_deployments_plugin.authz import SCOPE from nemo_deployments_plugin.entities import Deployment, DeploymentConfig, DeploymentStatus from nemo_deployments_plugin.schema import CreateDeploymentRequest, DeploymentFilter, DeploymentPage from nemo_platform_plugin.api.filters import make_filter_obj_dep +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityConflictError, NemoEntityNotFoundError from nemo_platform_plugin.filter_ops import ComparisonOperation, FilterOperator from nemo_platform_plugin.schema import PaginationData @@ -55,6 +58,7 @@ def _parse_deployment_config_ref(ref: str, default_workspace: str) -> tuple[str, @router.post("/deployments", response_model=Deployment, status_code=201, tags=["Deployments"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentPerms.CREATE], scopes=SCOPE.write()) async def create_deployment( workspace: str, body: CreateDeploymentRequest, @@ -87,6 +91,7 @@ async def create_deployment( @router.get("/deployments", response_model=DeploymentPage, tags=["Deployments"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentPerms.LIST], scopes=SCOPE.read()) async def list_deployments( workspace: str, page: int = Query(default=1, ge=1), @@ -122,6 +127,7 @@ async def list_deployments( @router.get("/deployments/{name}", response_model=Deployment, tags=["Deployments"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentPerms.READ], scopes=SCOPE.read()) async def get_deployment( workspace: str, name: str, @@ -137,6 +143,7 @@ async def get_deployment( @router.delete("/deployments/{name}", status_code=204, tags=["Deployments"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[DeploymentPerms.DELETE], scopes=SCOPE.write()) async def delete_deployment( workspace: str, name: str, diff --git a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/status.py b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/status.py index 32f0864f0f..0855100912 100644 --- a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/status.py +++ b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/status.py @@ -6,15 +6,19 @@ from __future__ import annotations from fastapi import APIRouter, Depends, HTTPException +from nemo_deployments_plugin.api.v2._perms import DeploymentPerms, VolumePerms from nemo_deployments_plugin.api.v2.dependencies import get_entity_client, require_service_principal +from nemo_deployments_plugin.authz import SCOPE from nemo_deployments_plugin.entities import Deployment, Volume from nemo_deployments_plugin.schema import UpdateDeploymentStatusRequest, UpdateVolumeStatusRequest +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityConflictError, NemoEntityNotFoundError router = APIRouter() @router.put("/deployments/{name}/status", response_model=Deployment, tags=["Deployment Status"]) +@path_rule(callers=[CallerKind.SERVICE_PRINCIPAL], permissions=[DeploymentPerms.STATUS_UPDATE], scopes=SCOPE.write()) async def update_deployment_status( workspace: str, name: str, @@ -47,6 +51,7 @@ async def update_deployment_status( @router.put("/volumes/{name}/status", response_model=Volume, tags=["Volume Status"]) +@path_rule(callers=[CallerKind.SERVICE_PRINCIPAL], permissions=[VolumePerms.STATUS_UPDATE], scopes=SCOPE.write()) async def update_volume_status( workspace: str, name: str, diff --git a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/volumes.py b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/volumes.py index 29191afd3c..ad02852bb3 100644 --- a/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/volumes.py +++ b/plugins/nemo-deployments/src/nemo_deployments_plugin/api/v2/volumes.py @@ -6,11 +6,14 @@ from __future__ import annotations from fastapi import APIRouter, Depends, HTTPException, Query +from nemo_deployments_plugin.api.v2._perms import VolumePerms from nemo_deployments_plugin.api.v2.dependencies import get_entity_client +from nemo_deployments_plugin.authz import SCOPE from nemo_deployments_plugin.entities import Volume from nemo_deployments_plugin.references import deployment_config_names_referencing_volume from nemo_deployments_plugin.schema import CreateVolumeRequest, VolumeFilter, VolumePage from nemo_platform_plugin.api.filters import make_filter_obj_dep +from nemo_platform_plugin.authz import CallerKind, path_rule from nemo_platform_plugin.entity_client import NemoEntitiesClient, NemoEntityConflictError, NemoEntityNotFoundError from nemo_platform_plugin.schema import PaginationData @@ -20,6 +23,7 @@ @router.post("/volumes", response_model=Volume, status_code=201, tags=["Volumes"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[VolumePerms.CREATE], scopes=SCOPE.write()) async def create_volume( workspace: str, body: CreateVolumeRequest, @@ -41,6 +45,7 @@ async def create_volume( @router.get("/volumes", response_model=VolumePage, tags=["Volumes"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[VolumePerms.LIST], scopes=SCOPE.read()) async def list_volumes( workspace: str, page: int = Query(default=1, ge=1), @@ -63,6 +68,7 @@ async def list_volumes( @router.get("/volumes/{name}", response_model=Volume, tags=["Volumes"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[VolumePerms.READ], scopes=SCOPE.read()) async def get_volume( workspace: str, name: str, @@ -78,6 +84,7 @@ async def get_volume( @router.delete("/volumes/{name}", status_code=204, tags=["Volumes"]) +@path_rule(callers=[CallerKind.PRINCIPAL], permissions=[VolumePerms.DELETE], scopes=SCOPE.write()) async def delete_volume( workspace: str, name: str, diff --git a/plugins/nemo-deployments/src/nemo_deployments_plugin/authz.py b/plugins/nemo-deployments/src/nemo_deployments_plugin/authz.py new file mode 100644 index 0000000000..e60b22d69d --- /dev/null +++ b/plugins/nemo-deployments/src/nemo_deployments_plugin/authz.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""The deployments plugin's authz scope. + +The route modules import :data:`SCOPE` so the plugin shares one ``AuthzScope("deployments")``. +""" + +from __future__ import annotations + +from nemo_platform_plugin.authz import AuthzScope + +SCOPE = AuthzScope("deployments") diff --git a/plugins/nemo-deployments/src/nemo_deployments_plugin/service.py b/plugins/nemo-deployments/src/nemo_deployments_plugin/service.py index d5ab102a04..300103a6ab 100644 --- a/plugins/nemo-deployments/src/nemo_deployments_plugin/service.py +++ b/plugins/nemo-deployments/src/nemo_deployments_plugin/service.py @@ -11,28 +11,11 @@ from nemo_deployments_plugin.backends.registry import ExecutorRegistry, ExecutorSpec from nemo_deployments_plugin.config import DeploymentsConfig from nemo_platform import AsyncNeMoPlatform -from nemo_platform_plugin.authz import AuthzContribution, AuthzEndpointMethod from nemo_platform_plugin.sdk_provider import get_async_platform_sdk from nemo_platform_plugin.service import NemoService, RouterSpec logger = logging.getLogger(__name__) -_SERVICE_NAME = "deployments" -_READ_SCOPES = [f"{_SERVICE_NAME}:read", "platform:read"] -_WRITE_SCOPES = [f"{_SERVICE_NAME}:write", "platform:write"] - - -def _read_method(permission: str) -> AuthzEndpointMethod: - return AuthzEndpointMethod(permissions=[permission], scopes=list(_READ_SCOPES)) - - -def _write_method(permission: str) -> AuthzEndpointMethod: - return AuthzEndpointMethod(permissions=[permission], scopes=list(_WRITE_SCOPES)) - - -def _read_methods(permission: str) -> dict[str, AuthzEndpointMethod]: - return {method: _read_method(permission) for method in ("get", "head")} - class DeploymentsService(NemoService): """HTTP service for deployment configs, deployments, volumes, and controller status.""" @@ -49,52 +32,6 @@ def executor_registry(self) -> ExecutorRegistry: self._executor_registry = ExecutorRegistry.empty() return self._executor_registry - @classmethod - def get_authz_contribution(cls) -> AuthzContribution: - """Authorization policy for deployments plugin routes.""" - base = f"/apis/{cls.name}/v2/workspaces/{{workspace}}" - permissions: dict[str, str] = {} - endpoints: dict[str, dict[str, AuthzEndpointMethod]] = {} - - for resource, path_segment in ( - ("deployment-configs", "deployment-configs"), - ("deployments", "deployments"), - ("volumes", "volumes"), - ): - create_perm = f"{cls.name}.{resource}.create" - list_perm = f"{cls.name}.{resource}.list" - read_perm = f"{cls.name}.{resource}.read" - delete_perm = f"{cls.name}.{resource}.delete" - permissions.update( - { - create_perm: f"Create {cls.name} {resource}", - list_perm: f"List {cls.name} {resource}", - read_perm: f"Read {cls.name} {resource}", - delete_perm: f"Delete {cls.name} {resource}", - } - ) - endpoints[f"{base}/{path_segment}"] = { - **_read_methods(list_perm), - "post": _write_method(create_perm), - } - endpoints[f"{base}/{path_segment}/{{name}}"] = { - "delete": _write_method(delete_perm), - **_read_methods(read_perm), - } - - deployment_status_perm = f"{cls.name}.deployments.status.update" - volume_status_perm = f"{cls.name}.volumes.status.update" - permissions[deployment_status_perm] = "Update deployment observed status (controller)" - permissions[volume_status_perm] = "Update volume observed status (controller)" - endpoints[f"{base}/deployments/{{name}}/status"] = { - "put": _write_method(deployment_status_perm), - } - endpoints[f"{base}/volumes/{{name}}/status"] = { - "put": _write_method(volume_status_perm), - } - - return AuthzContribution(permissions=permissions, endpoints=endpoints) - def get_routers(self) -> list[RouterSpec]: from nemo_deployments_plugin.api.v2 import ( deployment_configs, diff --git a/plugins/nemo-deployments/tests/unit/test_service_startup.py b/plugins/nemo-deployments/tests/unit/test_service_startup.py index 6826a4f9fa..4cb2628613 100644 --- a/plugins/nemo-deployments/tests/unit/test_service_startup.py +++ b/plugins/nemo-deployments/tests/unit/test_service_startup.py @@ -5,6 +5,7 @@ from fastapi.routing import APIRoute from nemo_deployments_plugin.service import DeploymentsService +from nemo_platform_plugin.authz import get_path_rules def _mounted_paths() -> set[str]: @@ -31,16 +32,14 @@ def test_service_name_matches_entry_point() -> None: def test_service_authz_covers_mounted_routes() -> None: - contribution = DeploymentsService.get_authz_contribution() - endpoint_paths = set(contribution.endpoints.keys()) - for path in _mounted_paths(): - assert path in endpoint_paths, f"missing authz entry for {path}" - route_methods = { - method.lower() - for spec in DeploymentsService().get_routers() - for route in spec.router.routes - if isinstance(route, APIRoute) and f"/apis/deployments{spec.prefix}{route.path}" == path - for method in route.methods or set() - } - for method in route_methods: - assert method in contribution.endpoints[path], f"missing authz method {method} for {path}" + """Every mounted route carries at least one ``@path_rule``. + + Authz is derived from the routes (no separate contribution); an unruled route would be + treated as invalid and fenced/denied by the PDP, so coverage is the property to assert. + """ + service = DeploymentsService() + for spec in service.get_routers(): + for route in spec.router.routes: + if isinstance(route, APIRoute): + full = f"/apis/deployments{spec.prefix}{route.path}" + assert get_path_rules(route.endpoint), f"missing @path_rule for {full}" From 334256537375026fbda29465a3e6f9a643d5d807 Mon Sep 17 00:00:00 2001 From: Max Dubrinsky Date: Wed, 24 Jun 2026 15:04:27 -0400 Subject: [PATCH 5/5] fix(authz): adapt plugin route derivation to fastapi 0.138 lazy include_router fastapi 0.138 / starlette 1.3.1 made include_router(prefix=...) lazy: rebased routes are stashed behind a _IncludedRouter proxy instead of being materialized as APIRoute objects, so the derivation's walk of composed.routes found nothing and every plugin route was treated as unruled. Add _iter_composed_routes() to descend each proxy's effective_route_contexts(), yielding composed-path APIRoute copies (original .endpoint preserved so get_path_rules still resolves the @path_rule metadata) and the composed starlette_route for WebSocket/Mount leaves (so the fail-closed branches still fire and no route is silently dropped). Downstream derivation logic is unchanged; the helper passes concrete routes through, so it also works if eager-include behavior returns. Signed-off-by: Max Dubrinsky --- .../nemo_platform_plugin/authz_discovery.py | 65 ++++++++++++++++--- .../tests/test_factory_authz.py | 20 ++++-- .../tests/test_path_rule.py | 24 +++++-- 3 files changed, 91 insertions(+), 18 deletions(-) diff --git a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py index e68b4f9818..4e839cac25 100644 --- a/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py +++ b/packages/nemo_platform_plugin/src/nemo_platform_plugin/authz_discovery.py @@ -25,7 +25,9 @@ from __future__ import annotations +import copy import logging +from collections.abc import Iterator from dataclasses import dataclass, field from typing import Any @@ -41,6 +43,7 @@ ) from nemo_platform_plugin.authz_format import is_valid_permission_id from nemo_platform_plugin.service import NemoService +from starlette.routing import BaseRoute logger = logging.getLogger(__name__) @@ -162,6 +165,54 @@ def _register_permission(catalog: dict[str, Permission], perm: Permission, warni catalog.setdefault(perm.id, perm) +def _iter_composed_routes(service: NemoService) -> Iterator[BaseRoute]: + """Yield the fully-composed leaf routes of *service*, one ``BaseRoute`` per mounted route. + + This re-creates the runtime mount (``/apis/`` + ``RouterSpec.prefix`` + route path) + and flattens it to leaves so the derivation can read each route's final ``.path``, + ``.methods``, and (for ``APIRoute``) original ``.endpoint``. + + Lazy-include workaround (fastapi 0.138.0 / starlette 1.3.1): ``include_router(prefix=...)`` + no longer materializes rebased ``APIRoute`` objects into ``.routes`` — it stores a + ``fastapi.routing._IncludedRouter`` proxy, so walking ``.routes`` for ``APIRoute`` finds + nothing. We descend each proxy via ``effective_route_contexts()`` (which also recurses + through nested includes) and reconstruct the composed leaves: + + - For an ``APIRoute`` we shallow-copy the original and overwrite ``.path``/``.methods`` with + the context's composed values. The copy keeps ``isinstance(route, APIRoute)`` true and + preserves the original ``.endpoint`` object so ``get_path_rules(route.endpoint)`` still + finds the function-attached rules; copying (rather than mutating) avoids corrupting the + shared original route. + - For non-``APIRoute`` leaves (WebSocketRoute / Mount / plain Route) we yield the context's + ``starlette_route``, which already carries the composed path — so the existing + fail-closed / warning branches still fire and no route is silently dropped. + + A concrete route appearing directly in ``.routes`` (e.g. a future eager-include path) is + yielded as-is, so this keeps working if the proxy behavior changes again. + """ + composed = APIRouter() + for spec in service.get_routers(): + composed.include_router(spec.router, prefix=f"/apis/{service.name}{spec.prefix}") + + for route in composed.routes: + contexts = getattr(route, "effective_route_contexts", None) + if contexts is None: + # Already a concrete leaf (no lazy-include proxy) — pass it through unchanged. + yield route + continue + for ctx in contexts(): + original = ctx.original_route + if isinstance(original, APIRoute): + # Rebased APIRoute: copy + composed path/methods, original endpoint preserved. + rebased = copy.copy(original) + rebased.path = ctx.path + rebased.methods = ctx.methods + yield rebased + else: + # WS / Mount / plain Route: the composed-path route is on the context. + yield ctx.starlette_route or original + + def _derive_service_contribution(service: NemoService) -> tuple[AuthzContribution, list[str], list[str]]: """Derive one plugin's wire contribution, split into deny-worthy errors and warnings. @@ -180,16 +231,12 @@ def _derive_service_contribution(service: NemoService) -> tuple[AuthzContributio warnings: list[str] = [] catalog: dict[str, Permission] = {} - # Re-create the runtime mount: /apis/ + RouterSpec.prefix + route path. - composed = APIRouter() - for spec in service.get_routers(): - composed.include_router(spec.router, prefix=f"/apis/{service.name}{spec.prefix}") - - # Pass 1: walk routes, collapse OR'd rules, and collect referenced permissions. - # ``bindings`` holds the tentative allow binding per (path, method); unruled / invalid - # routes are recorded as None and become DENY regardless of namespace validity. + # Pass 1: walk the fully-composed leaf routes (/apis/ + RouterSpec.prefix + route + # path), collapse OR'd rules, and collect referenced permissions. ``bindings`` holds the + # tentative allow binding per (path, method); unruled / invalid routes are recorded as None + # and become DENY regardless of namespace validity. bindings: dict[str, dict[str, AuthzEndpointMethod | None]] = {} - for route in composed.routes: + for route in _iter_composed_routes(service): if not isinstance(route, APIRoute): # Mount / plain Starlette Route / WebSocket route — not an HTTP API route the PDP # binds by (path, method). Never silently skip it (that lets it fall through the diff --git a/packages/nemo_platform_plugin/tests/test_factory_authz.py b/packages/nemo_platform_plugin/tests/test_factory_authz.py index b2f6f31e4c..de514223bf 100644 --- a/packages/nemo_platform_plugin/tests/test_factory_authz.py +++ b/packages/nemo_platform_plugin/tests/test_factory_authz.py @@ -39,13 +39,25 @@ async def _compiler(*args: object, **kwargs: object) -> object: # never called def _rules_by_path_method(router: APIRouter) -> dict[tuple[str, str], list]: - """Map (path, lower-method) -> attached PathRules for every APIRoute in *router*.""" + """Map (composed-path, lower-method) -> attached PathRules for every APIRoute in *router*. + + fastapi 0.138.0 makes ``include_router(prefix=...)`` lazy: rebased ``APIRoute``\\ s live + behind a ``_IncludedRouter`` proxy rather than in ``.routes``. Descend the proxy via + ``effective_route_contexts()`` to read each route's composed path/methods and original + endpoint (which still carries the ``@path_rule`` metadata). + """ out: dict[tuple[str, str], list] = {} for route in router.routes: - if not isinstance(route, APIRoute): + contexts = getattr(route, "effective_route_contexts", None) + if contexts is None: + if isinstance(route, APIRoute): + for method in route.methods or set(): + out[(route.path, method.lower())] = get_path_rules(route.endpoint) continue - for method in route.methods or set(): - out[(route.path, method.lower())] = get_path_rules(route.endpoint) + for ctx in contexts(): + if isinstance(ctx.original_route, APIRoute): + for method in ctx.methods or set(): + out[(ctx.path, method.lower())] = get_path_rules(ctx.original_route.endpoint) return out diff --git a/packages/nemo_platform_plugin/tests/test_path_rule.py b/packages/nemo_platform_plugin/tests/test_path_rule.py index 1ca155a8ab..ead46859e7 100644 --- a/packages/nemo_platform_plugin/tests/test_path_rule.py +++ b/packages/nemo_platform_plugin/tests/test_path_rule.py @@ -22,7 +22,7 @@ perm, validate_caller_strings, ) -from nemo_platform_plugin.authz_discovery import _method_from_dict +from nemo_platform_plugin.authz_discovery import _iter_composed_routes, _method_from_dict from nemo_platform_plugin.service import NemoService, RouterSpec _READ = Permission("x.read", "Read x") @@ -121,7 +121,12 @@ async def handler() -> None: ... def test_path_rule_survives_router_prefix_rebasing() -> None: - """D5: function-attached metadata must survive include_router(prefix=...) rebasing.""" + """D5: function-attached metadata must survive include_router(prefix=...) rebasing. + + fastapi 0.138.0 makes ``include_router(prefix=...)`` lazy (rebased routes live behind a + ``_IncludedRouter`` proxy, not in ``.routes``), so discoverability is asserted via the + derivation's composed-route enumeration rather than by scanning raw ``.routes``. + """ router = APIRouter() items_read = Permission("items.read", "Read items") @@ -131,16 +136,25 @@ async def get_item(name: str) -> dict[str, str]: return {"name": name} # Two prefix hops, as a real plugin mount does (/apis/ then workspace prefix). + # _iter_composed_routes re-creates the /apis/ mount, so the spec supplies only the + # inner workspace prefix; the helper prepends /apis/example. inner = APIRouter() inner.include_router(router, prefix="/v2/workspaces/{workspace}") - app_router = APIRouter() - app_router.include_router(inner, prefix="/apis/example") - matching = [r for r in app_router.routes if isinstance(r, APIRoute) and r.path.endswith("/items/{name}")] + class _Svc(NemoService): + name = "example" + + def get_routers(self) -> list[RouterSpec]: + return [RouterSpec(inner)] + + matching = [ + r for r in _iter_composed_routes(_Svc()) if isinstance(r, APIRoute) and r.path.endswith("/items/{name}") + ] assert len(matching) == 1 final_route = matching[0] assert final_route.path == "/apis/example/v2/workspaces/{workspace}/items/{name}" + # Metadata survived the rebase: the rule is still readable off the (identity-preserved) endpoint. rules = get_path_rules(final_route.endpoint) assert len(rules) == 1 assert rules[0].callers == [CallerKind.PRINCIPAL]