From cc0d5e1f0fb4b044e188ef11b98b55d219598790 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:26:09 -0400 Subject: [PATCH 01/13] docs: design task proof sdk --- .../plans/2026-06-06-task-proof-sdk-design.md | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 docs/plans/2026-06-06-task-proof-sdk-design.md diff --git a/docs/plans/2026-06-06-task-proof-sdk-design.md b/docs/plans/2026-06-06-task-proof-sdk-design.md new file mode 100644 index 0000000..832db7a --- /dev/null +++ b/docs/plans/2026-06-06-task-proof-sdk-design.md @@ -0,0 +1,151 @@ +# Task and Proof SDK Design + +## Goal + +Expose the portable task, lease, proof-listing, and minimal HTTP client +contracts in `workflow-plugin-compute-core/protocol` so public Workflow plugins +can submit compute tasks and observe proof receipts without importing the +private `workflow-compute/pkg/protocol` package. + +This is Phase 1 of the public distributed-compute platform roadmap. It removes +the immediate private-protocol dependency blocker for `workflow-plugin-product-capture` +while keeping `workflow-compute` as the managed product assembly that owns +scheduling, task mutation, agent supervision, settlement, dashboards, and +deployment policy. + +## Global Design Guidance + +Source: `README.md` + +| Guidance | Design response | +|---|---| +| Compute-core is the public Go module for compute protocol and provider catalog contracts. | Add wire contracts and client helpers to `protocol`, not app behavior. | +| Workflow applications should treat declarations as portable provider-facing base contracts. | Define task/proof request and response shapes that external plugins can compile against. | +| Application-specific scheduling, task state, settlement, dashboards, and worker supervision remain outside compute-core. | Do not add scheduler queues, admin APIs, dashboard models, worker registration, service leasing methods, or settlement helpers. | + +## Approaches Considered + +1. **Recommended: additive public SDK in compute-core.** Add task/lease/status + structs, public response wrappers, and a minimal HTTP client that only + covers task submission, task listing/snapshot, proof listing, and proof + lookup. This satisfies product-capture and preserves the private app + boundary. +2. **Types only, no client.** This is smaller but leaves each plugin copying + HTTP auth, strict decoding, response wrappers, and timeout behavior. That is + the current product-capture problem in a different file. +3. **Full control-plane SDK.** This would centralize more code, but it would + move scheduler/agent/admin concerns into compute-core prematurely and blur + the public product boundary. + +## Design + +Add a new public protocol surface: + +- `TaskStatus` constants matching the existing wire values. +- `Task` with only the current portable JSON fields: protocol version, + product/org/pool/policy IDs, status, workload, placement/proof/network/access + policies, residue/resource limits, input hash, requested time, timeout, + labels, and signature. +- `Lease` for the task-agent wire contract, including capability snapshot, + executor, network/P2P/residue policies, and lease timestamps. +- `TaskStall` and `TaskList` response wrappers for `/v1/tasks`. +- `TaskResponse` and `ProofList` response wrappers for `/v1/tasks` and + `/v1/proofs`. +- `Client` with `SubmitTask`, `ListTasks`, `TaskSnapshot`, `ListProofs`, and + `FindProof`. + +The client will be transport-thin. It will set bearer auth when configured, +require HTTPS for token-bearing non-loopback URLs, use `DecodeStrict`, and +return typed errors for unexpected status codes. It will not implement task +creation policy, retries, async watches, lease acquisition, worker +registration, admin endpoints, provider registration, or dashboard/settlement +views. + +`workflow-compute` will consume these types in a follow-up PR by aliasing its +public protocol package to compute-core where the wire shape is identical. +`workflow-plugin-product-capture` will then switch imports to compute-core in a +later downstream phase and use the public client. + +## Security Review + +- Auth token flow stays caller-owned; compute-core only places a configured + token into `Authorization: Bearer`. +- Token-bearing clients reject non-HTTPS URLs unless the host is loopback. +- Strict JSON decode rejects unrecognized response fields so plugins detect + contract drift early. +- The client does not log token values, request bodies, task payloads, or proof + payloads. +- Server-side authorization remains in `workflow-compute`; client-side config + is not treated as authority. + +## Infrastructure Impact + +This phase changes only the public Go module. It creates no cloud resources, +secrets, databases, queues, migrations, deployment environments, or runtime +processes. Release impact is limited to a compute-core tag after the PR merges. + +No staging deployment is required for the compute-core PR by itself. The +follow-up `workflow-compute` consumer PR must refresh staging and run a real +product-capture-compatible submission/proof smoke because that PR changes the +managed app assembly. + +## Multi-Component Validation + +The compute-core PR must prove: + +- `protocol.Task` and `protocol.Lease` validate representative real task/agent + wire shapes. +- The HTTP client crosses a real `httptest.Server` boundary for submit, list, + snapshot, proof list, auth header, strict decode, and status errors. +- A downstream compatibility check can compile the product-capture client + shape against compute-core types without importing `workflow-compute`. + +The follow-up app PR must prove: + +- `workflow-compute` aliases or delegates matching types to compute-core + without changing API JSON. +- product-capture can compile against the public contract. +- staging accepts a product-capture-style workload from registered local agents + and returns a proof or explicit typed failure. + +## Assumptions + +- The existing `/v1/tasks` and `/v1/proofs` JSON response shapes are intended + public surfaces for plugins. +- Product-capture needs task submission and proof lookup, not agent lease + acquisition. +- `Lease` belongs in compute-core as a public wire type, but lease acquisition + methods are a later agent SDK concern. +- Existing host validation remains stricter than compute-core portable + validation where policy decisions require server state. + +## Self-Challenge + +1. The laziest solution is to keep product-capture copying its private client + and only switch type imports. That reduces code movement now but repeats + auth/strict-decode drift across every future workload plugin. +2. The fragile assumption is that `/v1/tasks` and `/v1/proofs` are stable enough + for public clients. The plan must add drift tests in `workflow-compute` before + downstream live usage. +3. The main YAGNI risk is adding leasing/admin methods. This design excludes + them and records them as a deferred agent SDK concern. + +## Rollback + +Rollback the compute-core PR by reverting the additive public SDK commit and +tagging no release. Because no downstream app code is changed in this PR, no +server deployment rollback is needed. + +If a compute-core tag has already been published, publish a patch tag that +removes or marks the SDK unstable, then keep `workflow-compute` pinned to the +previous known-good compute-core version until the replacement tag is verified. + +## Deferred Issues + +- Switch `workflow-compute` to compute-core aliases/delegates in the next PR. +- Switch `workflow-plugin-product-capture` imports and docs after the + `workflow-compute` consumer PR verifies API compatibility. +- Add lease acquisition, worker registration, and resilient agent upgrade APIs + to the future public agent plugin phase, not this SDK. +- Keep live staging refresh/local-agent registration evidence in the + `workflow-compute` consumer phase where the server actually changes. From 17964257cb52160eccb82a06b03902f86baaff46 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:26:45 -0400 Subject: [PATCH 02/13] docs: review task proof sdk design --- ...2026-06-06-task-proof-sdk-design-review.md | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 docs/plans/2026-06-06-task-proof-sdk-design-review.md diff --git a/docs/plans/2026-06-06-task-proof-sdk-design-review.md b/docs/plans/2026-06-06-task-proof-sdk-design-review.md new file mode 100644 index 0000000..e5f0b9e --- /dev/null +++ b/docs/plans/2026-06-06-task-proof-sdk-design-review.md @@ -0,0 +1,40 @@ +### Adversarial Review Report + +**Phase:** design +**Artifact:** `docs/plans/2026-06-06-task-proof-sdk-design.md` +**Status:** PASS + +**Findings (Critical):** +- None. + +**Findings (Important):** +- None. + +**Findings (Minor):** +- `D1` [YAGNI violations] [Design]: `Lease` is not required by product-capture's immediate import switch. Recommendation: keep `Lease` as a wire type only, and keep lease acquisition/client methods out of this phase. _Resolution: already constrained by the design and Deferred Issues sections._ +- `D2` [Missing failure modes] [Design]: client error taxonomy is named only as "typed errors"; the design does not say whether response bodies are bounded or surfaced. Recommendation: plan an explicit status error test that avoids leaking response bodies and captures status/method/path. _Resolution: plan task must include the test._ +- `D3` [Existence / runtime-validity] [Multi-Component Validation]: downstream compatibility is stated, but the design does not name the exact product-capture import surface to compile. Recommendation: plan a temporary downstream compile proof covering `Task`, `TaskStatus`, `ProofReceipt`, `WorkloadSpec`, `ProviderWorkload`, `ProviderConfig`, `ProductCaptureMode`, `SignatureEnvelope`, and `DecodeStrict`/client replacement. _Resolution: plan task must enumerate these symbols._ + +**Bug-class scan transcript:** + +| Class | Result | Note | +|---|---|---| +| Project-guidance conflicts | Clean | `README.md` keeps scheduling/task state/settlement/dashboard/supervision outside compute-core; design excludes those surfaces. | +| Assumptions under attack | Clean | The design names the stable `/v1/tasks` and `/v1/proofs` assumption and requires workflow-compute drift tests before live usage. | +| Repo-precedent conflicts | Clean | Prior compute-core plans add additive protocol types plus validation and defer workflow-compute consumption. | +| Artifact-class precedent | Clean | Sibling protocol contracts live under `protocol/` with tests in `protocol/*_test.go`; design follows that shape. | +| YAGNI violations | Minor | `Lease` is future-facing for agent interop, but constrained to a type without lease client methods. | +| Missing failure modes | Minor | Status error/body behavior needs explicit plan coverage. | +| Security / privacy at architecture level | Clean | HTTPS requirement for token-bearing non-loopback URLs and no token/body logging are explicit. | +| Infrastructure impact | Clean | Compute-core-only PR has no infra or runtime process impact; workflow-compute consumer phase owns staging. | +| Multi-component validation | Clean | Requires `httptest.Server` boundary and downstream product-capture compile proof. | +| Rollback story | Clean | Revert additive commit before release; patch tag/pin rollback if already released. | +| Simpler alternative not considered | Clean | Types-only and full control-plane SDK alternatives are considered and rejected. | +| User-intent drift | Clean | Design serves the requested public reusable platform boundary and product-capture compatibility. | +| Existence / runtime-validity | Minor | Needs exact downstream compile symbols in the plan. | + +**Options the author may not have considered:** +1. Keep the client in product-capture and publish only task/proof types. This is smaller but preserves duplicated auth/strict-decode behavior across future plugins. +2. Publish client methods in a separate `client` package. That reduces `protocol` package breadth, but this repo already keeps protocol helpers such as canonical hashing and strict decode alongside contracts, so a small client is acceptable if it stays transport-thin. + +**Verdict reasoning:** PASS. The design has no open Critical or Important issue. Minor findings are plan-level constraints: avoid lease methods, test status errors, and compile the exact product-capture symbol surface. From 58426cebf9ca26e6a5d00aa94e9c76d3edd2f164 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:29:10 -0400 Subject: [PATCH 03/13] docs: plan task proof sdk --- docs/plans/2026-06-06-task-proof-sdk.md | 335 ++++++++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 docs/plans/2026-06-06-task-proof-sdk.md diff --git a/docs/plans/2026-06-06-task-proof-sdk.md b/docs/plans/2026-06-06-task-proof-sdk.md new file mode 100644 index 0000000..a6779c1 --- /dev/null +++ b/docs/plans/2026-06-06-task-proof-sdk.md @@ -0,0 +1,335 @@ +# Task Proof SDK Implementation Plan + +> **For the implementing agent:** REQUIRED SUB-SKILL: Use autodev:executing-plans to implement this plan task-by-task. + +**Goal:** Add the public task/proof SDK surface needed for external compute workload plugins to submit tasks and observe proof receipts without importing private workflow-compute packages. + +**Architecture:** Additive protocol contracts and a transport-thin HTTP client live in `protocol/`. The SDK covers task/proof wire shapes only; workflow-compute keeps scheduling, task mutation policy, agent lifecycle, settlement, dashboards, and deployment. + +**Tech Stack:** Go, `net/http`, `httptest`, existing `protocol.DecodeStrict`, existing workload/proof/policy structs. + +**Base branch:** main + +--- + +## Scope Manifest + +**PR Count:** 1 +**Tasks:** 5 +**Estimated Lines of Change:** ~450 + +**Out of scope:** +- workflow-compute alias/delegate changes. +- product-capture import switch and BMW live usage docs. +- lease acquisition, worker registration, agent updater, scheduler/admin/dashboard/settlement APIs. +- staging deployment or local-agent registration changes. + +**PR Grouping:** + +| PR # | Title | Tasks | Branch | +|------|-------|-------|--------| +| 1 | feat: add task proof sdk | Task 1, Task 2, Task 3, Task 4, Task 5 | feat/task-proof-sdk | + +**Status:** Draft + +## Requirements Trace + +| Design requirement | Plan task | +|---|---| +| Public `TaskStatus`, `Task`, `Lease`, task/proof response wrappers | Task 1, Task 2 | +| Portable validation without scheduler/admin policy | Task 1, Task 2 | +| Transport-thin client for submit/list/snapshot/proof lookup | Task 3, Task 4 | +| HTTPS requirement for token-bearing non-loopback URLs | Task 3, Task 4 | +| Strict decode and typed status errors | Task 3, Task 4 | +| Product-capture compatibility symbol proof | Task 5 | +| No workflow-compute/app behavior in compute-core | Task 5 | + +### Task 1: Task and Lease Contract Tests + +**Files:** +- Create: `protocol/task_test.go` +- Modify: none + +**Step 1: Write failing tests** + +Add tests: + +- `TestTaskValidatesPortableWorkloadContract` + - Builds a queued provider task with `ProtocolVersion: protocol.Version`, + workload kind `WorkloadProvider`, a product-capture-style + `ProviderWorkload`, input hash, requested time, positive timeout, and + signature. + - Expects `task.Validate()` to return nil. +- `TestTaskRejectsMalformedPortableContract` + - Uses wrong protocol version, unknown status, missing IDs, invalid workload, + negative timeout, and invalid resource limits. + - Expects the error string to include `protocol_version`, `status`, + `org_id`, `workload`, `timeout_seconds`, and `resource_limits`. +- `TestLeaseValidatesAgentWireContract` + - Builds a lease with task/worker/pool IDs, executor provider/version, + capability OS/arch, network policy, residue policy with session key, + explicit worker binding, policy hash, and valid timestamps. + - Expects `lease.Validate()` to return nil. +- `TestLeaseRejectsMalformedAgentWireContract` + - Uses missing IDs, missing executor provider/version, missing capability + OS/arch, bad residue policy, and `ExpiresAt` before `LeasedAt`. + - Expects the error string to include `executor.provider`, + `capability_snapshot.os`, `residue_policy`, and `expires_at`. + +**Step 2: Verify RED** + +Run: + +```bash +GOWORK=off go test ./protocol -run 'Test(TaskValidatesPortableWorkloadContract|TaskRejectsMalformedPortableContract|LeaseValidatesAgentWireContract|LeaseRejectsMalformedAgentWireContract)' -count=1 +``` + +Expected: FAIL because `Task`, `TaskStatus`, `Lease`, and validation methods are not defined. + +**Step 3: Commit tests** + +```bash +git add protocol/task_test.go +git commit -m "test: cover public task lease contracts" +``` + +### Task 2: Public Task and Lease Contracts + +**Files:** +- Modify: `protocol/types.go` +- Test: `protocol/task_test.go` + +**Step 1: Implement minimal contracts** + +Add: + +- `TaskStatus` constants: queued, leased, running, succeeded, failed, stalled, + canceled. +- `Task` struct with the JSON shape from the design. +- `Task.Validate() error`. +- `Lease` struct with the JSON shape from the design. +- `Lease.Validate() error`. + +Validation rules: + +- `Task.ProtocolVersion` must equal `Version`. +- `Task.ID`, `OrgID`, `PoolID`, and `PolicyID` are required. +- `Task.Status` may be empty or one of the public constants. +- `Task.Workload.Validate()` must pass. +- `Task.Requirements`, proof/network/access/residue/resource policies validate + through existing helpers where available. +- `Task.RequestedAt` is required. +- `Task.TimeoutSeconds` must be positive. +- `Task.Signature` is shape-carried only; server-side signature trust remains + out of scope. +- `Lease` requires IDs, executor provider/version, capability OS/arch, valid + network/P2P/residue policies, and `ExpiresAt` after `LeasedAt`. + +**Step 2: Verify GREEN** + +Run the Task 1 command again. + +Expected: PASS. + +**Step 3: Verify invariant** + +Temporarily remove the `Task.Status` validation or the `Lease.ExpiresAt` +validation, rerun the focused command, and confirm the matching malformed test +fails. Restore the validation and rerun to PASS. + +**Step 4: Commit implementation** + +```bash +git add protocol/types.go protocol/task_test.go +git commit -m "feat: add public task lease contracts" +``` + +### Task 3: HTTP Client Contract Tests + +**Files:** +- Create: `protocol/client_test.go` +- Modify: none + +**Step 1: Write failing tests** + +Add tests using `httptest.Server`: + +- `TestClientSubmitTaskUsesStrictJSONAndBearerAuth` + - Server expects `POST /v1/tasks`, `Authorization: Bearer test-token`, and + JSON task body. + - Server responds `201 {"task": }`. + - Expects returned task ID to match. +- `TestClientListSnapshotAndProofLookup` + - Server handles `GET /v1/tasks` returning `{"tasks":[...],"stalls":[...]}`. + - Server handles `GET /v1/proofs` returning `{"proofs":[...]}`. + - Expects `ListTasks`, `TaskSnapshot`, `ListProofs`, and `FindProof` to use + those responses. +- `TestClientRejectsTokenOverNonLoopbackHTTP` + - Expects `NewClient(ClientConfig{ServerURL:"http://example.test", Token:"x"})` + to fail. +- `TestClientStatusErrorDoesNotExposeBody` + - Server returns non-201 with a response body containing a sentinel secret. + - Expects `StatusError` with method/path/status and no sentinel secret in + `Error()`. +- `TestClientStrictDecodeRejectsUnknownFields` + - Server responds with an unknown field in the task wrapper. + - Expects decode failure. + +**Step 2: Verify RED** + +Run: + +```bash +GOWORK=off go test ./protocol -run 'TestClient(SubmitTaskUsesStrictJSONAndBearerAuth|ListSnapshotAndProofLookup|RejectsTokenOverNonLoopbackHTTP|StatusErrorDoesNotExposeBody|StrictDecodeRejectsUnknownFields)' -count=1 +``` + +Expected: FAIL because client config, client methods, wrappers, and status error are not defined. + +**Step 3: Commit tests** + +```bash +git add protocol/client_test.go +git commit -m "test: cover public task proof client" +``` + +### Task 4: Public Task Proof Client + +**Files:** +- Create: `protocol/client.go` +- Modify: `protocol/client_test.go` +- Test: `protocol/client_test.go` + +**Step 1: Implement minimal client** + +Add: + +- `ClientConfig{ServerURL string, Token string, HTTPClient *http.Client, Timeout time.Duration}`. +- `Client`. +- `NewClient(ClientConfig) (*Client, error)`. +- `StatusError{Method string, Path string, StatusCode int}`. +- `TaskResponse`, `TaskList`, `TaskStall`, and `ProofList`. +- `SubmitTask(ctx context.Context, task Task) (Task, error)`. +- `ListTasks(ctx context.Context) (TaskList, error)`. +- `TaskSnapshot(ctx context.Context, id string) (Task, bool, []TaskStall, error)`. +- `ListProofs(ctx context.Context) ([]ProofReceipt, error)`. +- `FindProof(ctx context.Context, taskID string) (ProofReceipt, bool, error)`. + +Client rules: + +- `ServerURL` must be absolute `http` or `https`. +- Token-bearing non-loopback URLs must use `https`. +- Default timeout is 30 seconds when no HTTP client or timeout is supplied. +- Request bodies use JSON and set `Content-Type: application/json`. +- Token config sets `Authorization: Bearer `. +- Responses decode with `DecodeStrict`. +- Non-expected status returns `StatusError` without reading or exposing the body. + +**Step 2: Verify GREEN** + +Run the Task 3 command again. + +Expected: PASS. + +**Step 3: Verify invariant** + +Temporarily remove the HTTPS guard and rerun +`TestClientRejectsTokenOverNonLoopbackHTTP`; confirm FAIL. Restore the guard and +rerun to PASS. + +**Step 4: Commit implementation** + +```bash +git add protocol/client.go protocol/client_test.go +git commit -m "feat: add public task proof client" +``` + +### Task 5: Product-Capture Compatibility and Boundary Proof + +**Files:** +- Create: `protocol/product_capture_compat_test.go` +- Modify: `README.md` +- Modify: `docs/plans/2026-06-06-task-proof-sdk-design.md` only if execution + reveals a false assumption; do not change the Scope Manifest without an + explicit amendment. + +**Step 1: Write compatibility test** + +Add `TestProductCapturePublicSDKSurface` in package `protocol_test` that builds +the exact public symbol surface product-capture needs: + +- `protocol.WorkloadSpec` +- `protocol.WorkloadProvider` +- `protocol.ProviderWorkload` +- `protocol.ProviderConfig` +- `protocol.ProductCaptureModeBrowser` +- `protocol.Task` +- `protocol.TaskQueued` +- `protocol.SignatureEnvelope` +- `protocol.ProofReceipt` +- `protocol.VerificationAccepted` +- `protocol.TaskList` +- `protocol.TaskStall` +- `protocol.NewClient` + +The test should validate a product-capture-style provider workload and task, +round-trip a `TaskList` with `json.Marshal`/`DecodeStrict`, and assert the +client constructor accepts an HTTPS URL. + +**Step 2: Verify compatibility test** + +Run: + +```bash +GOWORK=off go test ./protocol -run TestProductCapturePublicSDKSurface -count=1 +``` + +Expected: PASS after Tasks 2 and 4. + +**Step 3: Update docs** + +Update `README.md` to state that compute-core now includes portable task/proof +wire contracts and a minimal task/proof HTTP client, while scheduling, agent +lifecycle, settlement, dashboards, and worker supervision remain outside. + +**Step 4: Full verification** + +Run: + +```bash +GOWORK=off go test ./protocol -count=1 +GOWORK=off go test ./... -count=1 +git diff --check +``` + +Expected: all tests pass and diff check is clean. + +**Step 5: Commit compatibility proof and docs** + +```bash +git add protocol/product_capture_compat_test.go README.md docs/plans/2026-06-06-task-proof-sdk-design.md +git commit -m "docs: document task proof sdk boundary" +``` + +## Successor Hand-Off + +After this compute-core PR is merged and green: + +1. Open a workflow-compute PR that aliases/delegates matching task/proof/lease + types to the released compute-core version and verifies JSON/API drift. +2. Run product-capture compile proof against the workflow-compute consumer PR. +3. Refresh staging and run a real product-capture-compatible workload through + registered local agents. +4. Tag the next compute-core release only after the compute-core PR is merged + and CI is green; update downstream pins in the workflow-compute consumer PR. + +## Final PR Verification + +Before PR creation: + +```bash +GOWORK=off go test ./protocol -count=1 +GOWORK=off go test ./... -count=1 +git diff --check +``` + +Expected: all commands exit 0. From ac453de87e3b2897715654c6c9467733529bd85c Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:30:49 -0400 Subject: [PATCH 04/13] docs: review task proof sdk plan --- .../plans/2026-06-06-task-proof-sdk-design.md | 13 ++++- .../2026-06-06-task-proof-sdk-plan-review.md | 49 +++++++++++++++++++ docs/plans/2026-06-06-task-proof-sdk.md | 4 ++ 3 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 docs/plans/2026-06-06-task-proof-sdk-plan-review.md diff --git a/docs/plans/2026-06-06-task-proof-sdk-design.md b/docs/plans/2026-06-06-task-proof-sdk-design.md index 832db7a..33dac71 100644 --- a/docs/plans/2026-06-06-task-proof-sdk-design.md +++ b/docs/plans/2026-06-06-task-proof-sdk-design.md @@ -11,7 +11,8 @@ This is Phase 1 of the public distributed-compute platform roadmap. It removes the immediate private-protocol dependency blocker for `workflow-plugin-product-capture` while keeping `workflow-compute` as the managed product assembly that owns scheduling, task mutation, agent supervision, settlement, dashboards, and -deployment policy. +deployment policy until a later reusable control-plane component is designed and +extracted. ## Global Design Guidance @@ -21,7 +22,7 @@ Source: `README.md` |---|---| | Compute-core is the public Go module for compute protocol and provider catalog contracts. | Add wire contracts and client helpers to `protocol`, not app behavior. | | Workflow applications should treat declarations as portable provider-facing base contracts. | Define task/proof request and response shapes that external plugins can compile against. | -| Application-specific scheduling, task state, settlement, dashboards, and worker supervision remain outside compute-core. | Do not add scheduler queues, admin APIs, dashboard models, worker registration, service leasing methods, or settlement helpers. | +| Application-specific scheduling, task state, settlement, dashboards, and worker supervision remain outside compute-core. | Do not add scheduler queues, admin APIs, dashboard models, worker registration, service leasing methods, or settlement helpers here; future reusable control-plane extraction must be its own phase/component. | ## Approaches Considered @@ -66,6 +67,12 @@ public protocol package to compute-core where the wire shape is identical. `workflow-plugin-product-capture` will then switch imports to compute-core in a later downstream phase and use the public client. +Long-term, GoCodeAlone may extract a reusable control plane that other managed +products can assemble. This SDK is intentionally narrower: it supplies the +shared wire contract that such a control plane would also consume, without +deciding that control plane's storage model, scheduling policy, deployment +shape, authz chain, or operational UI. + ## Security Review - Auth token flow stays caller-owned; compute-core only places a configured @@ -147,5 +154,7 @@ previous known-good compute-core version until the replacement tag is verified. `workflow-compute` consumer PR verifies API compatibility. - Add lease acquisition, worker registration, and resilient agent upgrade APIs to the future public agent plugin phase, not this SDK. +- Design reusable control-plane extraction as its own future platform phase, + not as an expansion of compute-core's protocol/client package. - Keep live staging refresh/local-agent registration evidence in the `workflow-compute` consumer phase where the server actually changes. diff --git a/docs/plans/2026-06-06-task-proof-sdk-plan-review.md b/docs/plans/2026-06-06-task-proof-sdk-plan-review.md new file mode 100644 index 0000000..bdae61b --- /dev/null +++ b/docs/plans/2026-06-06-task-proof-sdk-plan-review.md @@ -0,0 +1,49 @@ +### Adversarial Review Report + +**Phase:** plan +**Artifact:** `docs/plans/2026-06-06-task-proof-sdk.md` +**Status:** PASS + +**Findings (Critical):** +- None. + +**Findings (Important):** +- None. + +**Findings (Minor):** +- `P1` [Missing integration proof] [Task 5]: The compatibility proof is an in-repo symbol/shape test, not a full downstream product-capture repo compile. Recommendation: keep this as the compute-core PR proof, but require the actual product-capture import switch and live plugin proof in the successor phase. _Resolution: Successor Hand-Off explicitly requires product-capture compile proof and staging workload evidence._ +- `P2` [Over-decomposition / under-decomposition] [Tasks 1-4]: TDD steps are larger than the ideal 2-5 minute slices because each task groups related tests. Recommendation: acceptable for this repo because the grouped tests cover one artifact class each; implementer should still commit after each task. _Resolution: accepted as review-size trade-off._ +- `P3` [Rollback wiring] [Successor Hand-Off]: Release rollback is mentioned in the design, but this compute-core PR does not execute a release. Recommendation: do not tag from this PR until merged and green; release/tag remains successor work. _Resolution: Scope Manifest excludes release/tag and Successor Hand-Off gates it after merge._ + +**Bug-class scan transcript:** + +| Class | Result | Note | +|---|---|---| +| Project-guidance conflicts | Clean | README boundary is preserved; app scheduling/admin/dashboard/settlement remain out of scope. | +| Assumptions under attack | Clean | Stable `/v1/tasks` and `/v1/proofs` assumption is deferred to workflow-compute drift tests before live usage. | +| Repo-precedent conflicts | Clean | Plan follows prior compute-core contract extraction pattern with protocol tests first. | +| Artifact-class precedent | Clean | Public protocol contracts and tests stay under `protocol/`. | +| YAGNI violations | Clean | Lease is a type only; no lease client methods or agent admin APIs are planned. | +| Missing failure modes | Clean | Plan tests HTTPS token guard, strict decode, status errors, missing task snapshot, malformed task, and malformed lease. | +| Security / privacy at architecture level | Clean | Token-bearing HTTP guard and status error no-body-leak test cover the client security edge. | +| Infrastructure impact | Clean | No infra, deployment, migration, queue, or secret changes in this PR. | +| Multi-component validation | Minor | `httptest.Server` crosses HTTP boundary; full downstream product-capture compile/live proof is successor work. | +| Rollback story | Clean | Additive PR can be reverted before release; release rollback is outside this PR. | +| Simpler alternative not considered | Clean | Types-only and full SDK alternatives were considered in the design. | +| User-intent drift | Clean | PR advances public reusable compute platform without moving managed app behavior out of workflow-compute. | +| Existence / runtime-validity | Clean | Existing symbols verified by repo grep; new client targets real existing `/v1/tasks` and `/v1/proofs` paths. | +| Over-decomposition / under-decomposition | Minor | Tests are grouped by artifact class rather than per assertion. | +| Verification-class mismatch | Clean | Protocol types use unit tests; HTTP client uses real `httptest.Server`; docs use full suite and diff check. | +| Auth/authz chain composition | Clean | No server-side auth chain is implemented; client only carries bearer token. | +| Hidden serial dependencies | Clean | Tasks are intentionally serial; no parallel execution is claimed. | +| Missing rollback wiring | Minor | Release rollback is successor work, not PR work. | +| Infrastructure verification mismatch | Clean | No infra change. | +| Plugin-loader runtime layout | Clean | No plugin process is spawned or loaded. | +| Config-validation schema rules | Clean | No config/schema artifact is created. | +| Identifier / naming-convention match | Clean | Planned names match existing Go/exported-type conventions and product-capture imports. | + +**Options the author may not have considered:** +1. Use a generated OpenAPI client from workflow-compute. This might produce stronger API drift checks later, but it would add generation/release complexity before the public contract is stable. +2. Put the client in `protocol/client` instead of `protocol`. This narrows package surface, but forces users to import two packages for a single task/proof workflow and diverges from existing protocol helper placement. + +**Verdict reasoning:** PASS. The plan is scoped to one compute-core PR, contains a valid manifest, uses TDD, and keeps full workflow-compute/product-capture/staging proof as explicit successor work rather than silently claiming it here. User feedback that GoCodeAlone may eventually want a reusable control plane is incorporated as a future platform phase; this PR remains the lower-level task/proof wire SDK that such a control plane would consume. diff --git a/docs/plans/2026-06-06-task-proof-sdk.md b/docs/plans/2026-06-06-task-proof-sdk.md index a6779c1..dafcb94 100644 --- a/docs/plans/2026-06-06-task-proof-sdk.md +++ b/docs/plans/2026-06-06-task-proof-sdk.md @@ -22,6 +22,7 @@ - workflow-compute alias/delegate changes. - product-capture import switch and BMW live usage docs. - lease acquisition, worker registration, agent updater, scheduler/admin/dashboard/settlement APIs. +- reusable control-plane extraction or control-plane storage/scheduling/authz design. - staging deployment or local-agent registration changes. **PR Grouping:** @@ -321,6 +322,9 @@ After this compute-core PR is merged and green: registered local agents. 4. Tag the next compute-core release only after the compute-core PR is merged and CI is green; update downstream pins in the workflow-compute consumer PR. +5. Plan reusable control-plane extraction as a later platform phase once the + public task/proof, product-capture, scenarios, staging rollout, and + reconnecting-agent phases have produced enough real-boundary evidence. ## Final PR Verification From 0170bf7534604e991b88b047eb79c56c50adfc1d Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:31:39 -0400 Subject: [PATCH 05/13] chore: lock scope for task proof sdk --- .../2026-06-06-task-proof-sdk-alignment.md | 38 +++++++++++++++++++ docs/plans/2026-06-06-task-proof-sdk.md | 2 +- .../2026-06-06-task-proof-sdk.md.scope-lock | 1 + 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 docs/plans/2026-06-06-task-proof-sdk-alignment.md create mode 100644 docs/plans/2026-06-06-task-proof-sdk.md.scope-lock diff --git a/docs/plans/2026-06-06-task-proof-sdk-alignment.md b/docs/plans/2026-06-06-task-proof-sdk-alignment.md new file mode 100644 index 0000000..bf182d8 --- /dev/null +++ b/docs/plans/2026-06-06-task-proof-sdk-alignment.md @@ -0,0 +1,38 @@ +### Alignment Report + +**Status:** PASS + +**Coverage:** + +| Design Requirement | Plan Task(s) | Status | +|---|---|---| +| Expose portable task, lease, proof-listing, and minimal HTTP client contracts | Task 1, Task 2, Task 3, Task 4 | Covered | +| Remove immediate private-protocol dependency blocker for product-capture | Task 5 | Covered | +| Keep scheduling, task mutation, agent supervision, settlement, dashboards, and deployment policy outside compute-core | Task 5, Scope Manifest out-of-scope | Covered | +| Future reusable control plane must be its own phase/component, not compute-core expansion | Scope Manifest out-of-scope, Successor Hand-Off item 5 | Covered | +| Add `TaskStatus`, `Task`, `Lease`, wrappers, and client methods named in design | Task 1, Task 2, Task 3, Task 4 | Covered | +| Token-bearing clients reject non-HTTPS non-loopback URLs | Task 3, Task 4 | Covered | +| Strict decode and typed status errors without response-body leakage | Task 3, Task 4 | Covered | +| Prove HTTP boundary with `httptest.Server` | Task 3, Task 4 | Covered | +| Prove exact product-capture public symbol surface | Task 5 | Covered | +| No infra/staging action in compute-core PR; staging belongs workflow-compute consumer phase | Scope Manifest, Successor Hand-Off | Covered | +| Rollback by reverting additive SDK before release; tag rollback only if published | Successor Hand-Off, Final PR Verification | Covered | + +**Scope Check:** + +| Plan Task | Design Requirement | Status | +|---|---|---| +| Task 1 | Public task/lease contract tests and representative wire validation | Justified | +| Task 2 | Public `TaskStatus`, `Task`, `Lease`, and portable validation | Justified | +| Task 3 | HTTP client tests for auth, strict decode, status errors, and proof/task endpoints | Justified | +| Task 4 | Transport-thin public client implementation | Justified | +| Task 5 | Product-capture compatibility proof and boundary documentation | Justified | + +**Manifest Trace:** + +- `PR Count: 1` matches the single PR Grouping row. +- `Tasks: 5` matches `### Task 1` through `### Task 5`. +- Every task appears exactly once in the PR Grouping table. +- `plan-scope-check.sh --plan ` returned `PASS: scope-manifest checks succeeded.` + +**Drift Items:** None. diff --git a/docs/plans/2026-06-06-task-proof-sdk.md b/docs/plans/2026-06-06-task-proof-sdk.md index dafcb94..88babcf 100644 --- a/docs/plans/2026-06-06-task-proof-sdk.md +++ b/docs/plans/2026-06-06-task-proof-sdk.md @@ -31,7 +31,7 @@ |------|-------|-------|--------| | 1 | feat: add task proof sdk | Task 1, Task 2, Task 3, Task 4, Task 5 | feat/task-proof-sdk | -**Status:** Draft +**Status:** Locked 2026-06-06T04:30:58Z ## Requirements Trace diff --git a/docs/plans/2026-06-06-task-proof-sdk.md.scope-lock b/docs/plans/2026-06-06-task-proof-sdk.md.scope-lock new file mode 100644 index 0000000..3e0c70a --- /dev/null +++ b/docs/plans/2026-06-06-task-proof-sdk.md.scope-lock @@ -0,0 +1 @@ +57f85ca33fb55a9c7279c3149b808ac475f671e2e925a62313587926d7846f4e From 4c762137953c45e07da680630ac02a1979109b0c Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:33:29 -0400 Subject: [PATCH 06/13] test: cover public task lease contracts --- protocol/task_test.go | 160 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 protocol/task_test.go diff --git a/protocol/task_test.go b/protocol/task_test.go new file mode 100644 index 0000000..ab78eae --- /dev/null +++ b/protocol/task_test.go @@ -0,0 +1,160 @@ +package protocol_test + +import ( + "encoding/json" + "strings" + "testing" + "time" + + "github.com/GoCodeAlone/workflow-plugin-compute-core/protocol" +) + +func TestTaskValidatesPortableWorkloadContract(t *testing.T) { + task := validTask(t) + + if err := task.Validate(); err != nil { + t.Fatalf("task invalid: %v", err) + } +} + +func TestTaskRejectsMalformedPortableContract(t *testing.T) { + task := validTask(t) + task.ProtocolVersion = "wrong" + task.Status = protocol.TaskStatus("mystery") + task.OrgID = "" + task.Workload = protocol.WorkloadSpec{Kind: protocol.WorkloadProvider} + task.TimeoutSeconds = -1 + task.ResourceLimits = protocol.ResourceLimits{RuntimeSeconds: -1} + + err := task.Validate() + if err == nil { + t.Fatal("expected malformed task to fail") + } + for _, want := range []string{ + "protocol_version", + "status", + "org_id", + "workload", + "timeout_seconds", + "resource_limits", + } { + if !strings.Contains(err.Error(), want) { + t.Fatalf("Validate() = %v, want %q", err, want) + } + } +} + +func TestLeaseValidatesAgentWireContract(t *testing.T) { + lease := validLease(t) + + if err := lease.Validate(); err != nil { + t.Fatalf("lease invalid: %v", err) + } +} + +func TestLeaseRejectsMalformedAgentWireContract(t *testing.T) { + lease := validLease(t) + lease.Executor.Provider = "" + lease.Executor.Version = "" + lease.CapabilitySnapshot.OS = "" + lease.ResiduePolicy.PolicyHash = "bad" + lease.ExpiresAt = lease.LeasedAt.Add(-time.Second) + + err := lease.Validate() + if err == nil { + t.Fatal("expected malformed lease to fail") + } + for _, want := range []string{ + "executor.provider", + "capability_snapshot.os", + "residue_policy", + "expires_at", + } { + if !strings.Contains(err.Error(), want) { + t.Fatalf("Validate() = %v, want %q", err, want) + } + } +} + +func validTask(t *testing.T) protocol.Task { + t.Helper() + input := mustTaskRawMessage(t, map[string]any{ + "url": "https://example.test/products/sku-1", + "allowed_hosts": []string{"example.test"}, + "capture_mode": string(protocol.ProductCaptureModeBrowser), + }) + workload := protocol.WorkloadSpec{ + Kind: protocol.WorkloadProvider, + Provider: &protocol.ProviderWorkload{ + ProviderConfig: protocol.ProviderConfig{ + PluginID: "workflow-plugin-product-capture", + ProviderID: "browser", + ContractID: "product-capture.browser.v1", + Version: "v1.0.0", + ConfigRef: "config://providers/product-capture/browser", + }, + Operation: "capture_product", + ImageRef: "ghcr.io/gocodealone/product-capture@sha256:" + strings.Repeat("a", 64), + Input: input, + }, + } + return protocol.Task{ + ProtocolVersion: protocol.Version, + ID: "task-product-capture-1", + ProductID: "product-capture", + OrgID: "org-1", + PoolID: "pool-1", + PolicyID: "policy-1", + Status: protocol.TaskQueued, + Workload: workload, + ResourceLimits: protocol.ResourceLimits{RuntimeSeconds: 30, OutputBytes: 1024}, + InputHash: protocol.CanonicalHash(workload), + RequestedAt: time.Date(2026, 6, 6, 4, 0, 0, 0, time.UTC), + TimeoutSeconds: 60, + Labels: map[string]string{"plugin": "product-capture"}, + Signature: protocol.SignatureEnvelope{ + Algorithm: "test-sha256", + KeyID: "test-key", + Value: strings.Repeat("b", 64), + }, + } +} + +func validLease(t *testing.T) protocol.Lease { + t.Helper() + leasedAt := time.Date(2026, 6, 6, 4, 0, 0, 0, time.UTC) + return protocol.Lease{ + ID: "lease-1", + TaskID: "task-product-capture-1", + WorkerID: "worker-1", + PoolID: "pool-1", + Executor: protocol.ExecutorRef{ + Provider: "sandboxed-command", + Version: "v1.0.0", + }, + CapabilitySnapshot: protocol.Capabilities{ + OS: "darwin", + Arch: "arm64", + }, + NetworkPolicy: protocol.NetworkPolicy{ + Mode: protocol.NetworkModeOffline, + }, + ResiduePolicy: protocol.ResiduePolicy{ + Mode: protocol.ResidueModeSessionBound, + SessionKey: "task-product-capture-1", + PolicyHash: "sha256:" + strings.Repeat("c", 64), + ExplicitWorkerBound: true, + }, + LeasedAt: leasedAt, + ExpiresAt: leasedAt.Add(5 * time.Minute), + } +} + +func mustTaskRawMessage(t *testing.T, value any) json.RawMessage { + t.Helper() + data, err := json.Marshal(value) + if err != nil { + t.Fatalf("marshal raw message: %v", err) + } + return data +} From 0d83ec470c568050831ca43b9c0a8a875c62007e Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:36:53 -0400 Subject: [PATCH 07/13] feat: add public task lease contracts --- protocol/types.go | 471 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 471 insertions(+) diff --git a/protocol/types.go b/protocol/types.go index f95861e..2f92e62 100644 --- a/protocol/types.go +++ b/protocol/types.go @@ -604,6 +604,20 @@ func validateVerifiedSignature(prefix string, sig SignatureEnvelope) error { return errors.Join(errs...) } +func validateSignatureEnvelope(sig SignatureEnvelope, name string) error { + var errs []error + if strings.TrimSpace(sig.Algorithm) == "" { + errs = append(errs, fmt.Errorf("%s.algorithm is required", name)) + } + if strings.TrimSpace(sig.KeyID) == "" { + errs = append(errs, fmt.Errorf("%s.key_id is required", name)) + } + if strings.TrimSpace(sig.Value) == "" { + errs = append(errs, fmt.Errorf("%s.value is required", name)) + } + return errors.Join(errs...) +} + func ExecutorMatchesPlacementRequirements(executor ExecutorRef, req PlacementRequirements) bool { if req.ExecutorProvider != "" && executor.Provider != req.ExecutorProvider { return false @@ -2815,6 +2829,276 @@ const ( NetworkModeOffline NetworkMode = "offline" ) +type NetworkDestination struct { + Protocol string `json:"protocol,omitempty"` + Host string `json:"host,omitempty"` + Port int `json:"port,omitempty"` + ContentRef string `json:"content_ref,omitempty"` +} + +func (d NetworkDestination) Validate() error { + var errs []error + if d.ContentRef != "" { + if d.Protocol != "" || d.Host != "" || d.Port != 0 { + errs = append(errs, errors.New("content_ref is mutually exclusive with protocol, host, and port")) + } + if !strings.HasPrefix(d.ContentRef, "artifact://") && !strings.HasPrefix(d.ContentRef, "content://") { + errs = append(errs, errors.New("content_ref must use artifact:// or content:// scoped ref")) + } + return errors.Join(errs...) + } + if strings.TrimSpace(d.Host) == "" { + errs = append(errs, errors.New("host is required")) + } else if err := validateNetworkHostname(d.Host); err != nil { + errs = append(errs, fmt.Errorf("host: %w", err)) + } + if d.Port < 0 || d.Port > 65535 { + errs = append(errs, errors.New("port must be between 0 and 65535")) + } + if d.Protocol != "" { + switch strings.ToLower(strings.TrimSpace(d.Protocol)) { + case "tcp", "udp", "http", "https": + default: + errs = append(errs, fmt.Errorf("protocol %q is unsupported", d.Protocol)) + } + } + return errors.Join(errs...) +} + +type NetworkPolicy struct { + Mode NetworkMode `json:"mode,omitempty"` + AllowedDestinations []NetworkDestination `json:"allowed_destinations,omitempty"` + IngressHostnames []string `json:"ingress_hostnames,omitempty"` + AllowIngress bool `json:"allow_ingress,omitempty"` + AuditDestinations bool `json:"audit_destinations,omitempty"` +} + +func (p NetworkPolicy) Validate() error { + p.Mode = normalizeNetworkMode(p.Mode) + var errs []error + if !validNetworkMode(p.Mode) { + errs = append(errs, fmt.Errorf("network mode %q is unsupported", p.Mode)) + } + if len(p.IngressHostnames) > 0 && !p.AllowIngress { + errs = append(errs, errors.New("allow_ingress is required when ingress_hostnames are set")) + } + for i, destination := range p.AllowedDestinations { + if err := destination.Validate(); err != nil { + errs = append(errs, fmt.Errorf("allowed_destinations[%d]: %w", i, err)) + } + } + for i, hostname := range p.IngressHostnames { + if err := validateNetworkHostname(hostname); err != nil { + errs = append(errs, fmt.Errorf("ingress_hostnames[%d]: %w", i, err)) + } + } + return errors.Join(errs...) +} + +type P2PSessionMode string + +const ( + P2PSessionModeRelay P2PSessionMode = "relay" + P2PSessionModeStream P2PSessionMode = "p2p_stream" + P2PSessionModeContent P2PSessionMode = "p2p_content" +) + +type P2PSessionPolicy struct { + ProtocolVersion string `json:"protocol_version,omitempty"` + SessionID string `json:"session_id"` + ProductID string `json:"product_id"` + OrgID string `json:"org_id"` + NetworkID string `json:"network_id"` + OperatorID string `json:"operator_id"` + PolicyVersion string `json:"policy_version"` + IssuedAt time.Time `json:"issued_at"` + NotBefore time.Time `json:"not_before"` + ExpiresAt time.Time `json:"expires_at"` + SessionGeneration int `json:"session_generation"` + EventID string `json:"event_id"` + Nonce string `json:"nonce"` + Mode P2PSessionMode `json:"mode"` + Peers []P2PSessionPeer `json:"peers"` + AllowedProtocols []string `json:"allowed_protocols"` + ContentRefs []string `json:"content_refs,omitempty"` + RouteRefs []string `json:"route_refs,omitempty"` + AllowedDestinations []NetworkDestination `json:"allowed_destinations,omitempty"` + IngressHostnames []string `json:"ingress_hostnames,omitempty"` + Limits P2PSessionLimits `json:"limits"` + ProofPolicy string `json:"proof_policy,omitempty"` + RewardPolicyRef string `json:"reward_policy_ref,omitempty"` + RevocationFeedID string `json:"revocation_feed_id"` + RevocationFreshUntil time.Time `json:"revocation_fresh_until"` + KillDirectiveRefs []string `json:"kill_directive_refs,omitempty"` + PreviousGenerationHash string `json:"previous_generation_hash,omitempty"` + PolicyHash string `json:"policy_hash"` + Signature SignatureEnvelope `json:"signature"` +} + +type P2PSessionPeer struct { + ID string `json:"id"` + Role string `json:"role"` + Alias string `json:"alias,omitempty"` + ContentURL string `json:"content_url,omitempty"` + IdentitySHA256 string `json:"identity_sha256,omitempty"` +} + +func (p P2PSessionPeer) Validate() error { + var errs []error + if err := validateIdentifier("id", p.ID); err != nil { + errs = append(errs, err) + } + if err := validateIdentifier("role", p.Role); err != nil { + errs = append(errs, err) + } + if p.IdentitySHA256 != "" && !validSHA256Digest(p.IdentitySHA256) { + errs = append(errs, errors.New("identity_sha256 must be sha256 digest")) + } + return errors.Join(errs...) +} + +type P2PSessionLimits struct { + MaxPeers int `json:"max_peers"` + MaxBytesPerPeer int64 `json:"max_bytes_per_peer,omitempty"` + MaxSessionBytes int64 `json:"max_session_bytes,omitempty"` + MaxDurationSeconds int `json:"max_duration_seconds,omitempty"` +} + +func (l P2PSessionLimits) Validate() error { + var errs []error + if l.MaxPeers <= 0 { + errs = append(errs, errors.New("max_peers must be positive")) + } + if l.MaxBytesPerPeer < 0 { + errs = append(errs, errors.New("max_bytes_per_peer must be non-negative")) + } + if l.MaxSessionBytes < 0 { + errs = append(errs, errors.New("max_session_bytes must be non-negative")) + } + if l.MaxDurationSeconds < 0 { + errs = append(errs, errors.New("max_duration_seconds must be non-negative")) + } + return errors.Join(errs...) +} + +func (p P2PSessionPolicy) SigningPayload() P2PSessionPolicy { + p.PolicyHash = "" + p.Signature = SignatureEnvelope{} + return p +} + +func (p P2PSessionPolicy) Validate(now time.Time) error { + var errs []error + if p.ProtocolVersion != "" && p.ProtocolVersion != Version { + errs = append(errs, fmt.Errorf("protocol_version must be %q", Version)) + } + for _, field := range []struct { + name string + value string + }{ + {name: "session_id", value: p.SessionID}, + {name: "org_id", value: p.OrgID}, + {name: "network_id", value: p.NetworkID}, + {name: "operator_id", value: p.OperatorID}, + {name: "policy_version", value: p.PolicyVersion}, + {name: "event_id", value: p.EventID}, + {name: "nonce", value: p.Nonce}, + {name: "revocation_feed_id", value: p.RevocationFeedID}, + } { + if err := validateIdentifier(field.name, field.value); err != nil { + errs = append(errs, err) + } + } + if err := validateNetworkProductID(p.ProductID); err != nil { + errs = append(errs, err) + } + if p.IssuedAt.IsZero() { + errs = append(errs, errors.New("issued_at is required")) + } + if p.NotBefore.IsZero() { + errs = append(errs, errors.New("not_before is required")) + } + if p.ExpiresAt.IsZero() { + errs = append(errs, errors.New("expires_at is required")) + } + if !p.IssuedAt.IsZero() && !p.NotBefore.IsZero() && p.NotBefore.Before(p.IssuedAt) { + errs = append(errs, errors.New("not_before must not be before issued_at")) + } + if !p.NotBefore.IsZero() && !p.ExpiresAt.IsZero() && !p.NotBefore.Before(p.ExpiresAt) { + errs = append(errs, errors.New("not_before must be before expires_at")) + } + if !now.IsZero() { + if !p.NotBefore.IsZero() && now.Before(p.NotBefore) { + errs = append(errs, errors.New("session policy not yet valid")) + } + if !p.ExpiresAt.IsZero() && !now.Before(p.ExpiresAt) { + errs = append(errs, errors.New("session policy expired")) + } + } + if p.SessionGeneration <= 0 { + errs = append(errs, errors.New("session_generation must be positive")) + } + switch p.Mode { + case P2PSessionModeRelay, P2PSessionModeStream, P2PSessionModeContent: + case "": + errs = append(errs, errors.New("mode is required")) + default: + errs = append(errs, fmt.Errorf("mode %q is unsupported", p.Mode)) + } + if len(p.Peers) == 0 { + errs = append(errs, errors.New("peers is required")) + } + seenPeers := map[string]struct{}{} + for i, peer := range p.Peers { + if err := peer.Validate(); err != nil { + errs = append(errs, fmt.Errorf("peers[%d]: %w", i, err)) + } + if _, exists := seenPeers[peer.ID]; exists { + errs = append(errs, fmt.Errorf("peers[%d].id is duplicated", i)) + } + seenPeers[peer.ID] = struct{}{} + } + if err := p.Limits.Validate(); err != nil { + errs = append(errs, fmt.Errorf("limits: %w", err)) + } + if p.Limits.MaxPeers > 0 && len(p.Peers) > p.Limits.MaxPeers { + errs = append(errs, errors.New("peers exceed max_peers")) + } + if len(p.AllowedProtocols) == 0 { + errs = append(errs, errors.New("allowed_protocols is required")) + } + if p.Mode == P2PSessionModeContent && len(p.ContentRefs) == 0 { + errs = append(errs, errors.New("content_refs is required")) + } + for i, destination := range p.AllowedDestinations { + if err := destination.Validate(); err != nil { + errs = append(errs, fmt.Errorf("allowed_destinations[%d]: %w", i, err)) + } + } + for i, hostname := range p.IngressHostnames { + if err := validateNetworkHostname(hostname); err != nil { + errs = append(errs, fmt.Errorf("ingress_hostnames[%d]: %w", i, err)) + } + } + if p.RevocationFreshUntil.IsZero() { + errs = append(errs, errors.New("revocation_fresh_until is required")) + } else if !now.IsZero() && !p.RevocationFreshUntil.After(now) { + errs = append(errs, errors.New("revocation_fresh_until must be in the future")) + } + if p.PreviousGenerationHash != "" && !validSHA256Digest(p.PreviousGenerationHash) { + errs = append(errs, errors.New("previous_generation_hash must be sha256 digest")) + } + if p.PolicyHash == "" { + errs = append(errs, errors.New("policy_hash is required")) + } else if !validSHA256Digest(p.PolicyHash) { + errs = append(errs, errors.New("policy_hash must be sha256 digest")) + } + if err := validateSignatureEnvelope(p.Signature, "signature"); err != nil { + errs = append(errs, err) + } + return errors.Join(errs...) +} + type PlacementRequirements struct { ExecutorProvider string `json:"executor_provider,omitempty"` ExecutionSecurityTier ExecutionSecurityTier `json:"execution_security_tier,omitempty"` @@ -2942,6 +3226,193 @@ func (p AccessPolicy) Validate() error { return errors.Join(errs...) } +type TaskStatus string + +const ( + TaskQueued TaskStatus = "queued" + TaskLeased TaskStatus = "leased" + TaskRunning TaskStatus = "running" + TaskSucceeded TaskStatus = "succeeded" + TaskFailed TaskStatus = "failed" + TaskStalled TaskStatus = "stalled" + TaskCanceled TaskStatus = "canceled" +) + +type Task struct { + ProtocolVersion string `json:"protocol_version"` + ID string `json:"id"` + ProductID string `json:"product_id,omitempty"` + OrgID string `json:"org_id"` + PoolID string `json:"pool_id"` + PolicyID string `json:"policy_id"` + Status TaskStatus `json:"status,omitempty"` + Workload WorkloadSpec `json:"workload"` + Requirements PlacementRequirements `json:"requirements,omitzero"` + ProofPolicy ProofPolicy `json:"proof_policy,omitzero"` + NetworkPolicy NetworkPolicy `json:"network_policy,omitzero"` + P2PSessionPolicy *P2PSessionPolicy `json:"p2p_session_policy,omitempty"` + AccessPolicy AccessPolicy `json:"access_policy,omitzero"` + ResiduePolicy ResiduePolicy `json:"residue_policy,omitzero"` + ResourceLimits ResourceLimits `json:"resource_limits,omitzero"` + InputHash string `json:"input_hash"` + RequestedAt time.Time `json:"requested_at"` + TimeoutSeconds int `json:"timeout_seconds"` + Labels map[string]string `json:"labels,omitempty"` + Signature SignatureEnvelope `json:"signature"` +} + +func (t Task) Validate() error { + var errs []error + if t.ProtocolVersion != Version { + errs = append(errs, fmt.Errorf("protocol_version must be %q", Version)) + } + for _, field := range []struct { + name string + value string + }{ + {name: "id", value: t.ID}, + {name: "org_id", value: t.OrgID}, + {name: "pool_id", value: t.PoolID}, + {name: "policy_id", value: t.PolicyID}, + } { + if strings.TrimSpace(field.value) == "" { + errs = append(errs, fmt.Errorf("%s is required", field.name)) + } + } + if !validTaskStatus(t.Status) { + errs = append(errs, fmt.Errorf("status %q is unsupported", t.Status)) + } + if err := t.Workload.Validate(); err != nil { + errs = append(errs, fmt.Errorf("workload: %w", err)) + } + if err := ValidateProofPolicy(t.Requirements.ProofTier, t.ProofPolicy); err != nil { + errs = append(errs, err) + } + if err := t.NetworkPolicy.Validate(); err != nil { + errs = append(errs, fmt.Errorf("network_policy: %w", err)) + } + if t.P2PSessionPolicy != nil { + if err := t.P2PSessionPolicy.Validate(time.Time{}); err != nil { + errs = append(errs, fmt.Errorf("p2p_session_policy: %w", err)) + } + } + if err := t.AccessPolicy.Validate(); err != nil { + errs = append(errs, fmt.Errorf("access_policy: %w", err)) + } + if err := t.ResiduePolicy.Validate(ResiduePolicyValidation{}); err != nil { + errs = append(errs, fmt.Errorf("residue_policy: %w", err)) + } + if err := t.ResourceLimits.Validate(); err != nil { + errs = append(errs, fmt.Errorf("resource_limits: %w", err)) + } + if t.RequestedAt.IsZero() { + errs = append(errs, errors.New("requested_at is required")) + } + if t.TimeoutSeconds <= 0 { + errs = append(errs, errors.New("timeout_seconds must be positive")) + } + return errors.Join(errs...) +} + +func validTaskStatus(status TaskStatus) bool { + switch status { + case "", TaskQueued, TaskLeased, TaskRunning, TaskSucceeded, TaskFailed, TaskStalled, TaskCanceled: + return true + default: + return false + } +} + +type GPU struct { + Vendor string `json:"vendor,omitempty"` + Name string `json:"name,omitempty"` + MemoryBytes int64 `json:"memory_bytes,omitempty"` +} + +type Capabilities struct { + MachineID string `json:"machine_id,omitempty"` + OS string `json:"os,omitempty"` + Arch string `json:"arch,omitempty"` + CPUModel string `json:"cpu_model,omitempty"` + CPUCount int `json:"cpu_count,omitempty"` + MemoryBytes int64 `json:"memory_bytes,omitempty"` + DiskBytes int64 `json:"disk_bytes,omitempty"` + BandwidthMbps int64 `json:"bandwidth_mbps,omitempty"` + IngressCapable bool `json:"ingress_capable,omitempty"` + GPU []GPU `json:"gpu,omitempty"` + ExecutorProviders []string `json:"executor_providers,omitempty"` + Executors []ExecutorRef `json:"executors,omitempty"` + WorkloadKinds []string `json:"workload_kinds,omitempty"` + ExecutionTiers []ExecutionSecurityTier `json:"execution_tiers,omitempty"` + ProofTiers []ProofTier `json:"proof_tiers,omitempty"` + NetworkModes []NetworkMode `json:"network_modes,omitempty"` + CapabilityTags []string `json:"capability_tags,omitempty"` + CapabilityReports []ProviderCapabilityReport `json:"capability_reports,omitempty"` + HardwareSecurity HardwareSecurityCapabilities `json:"hardware_security,omitzero"` +} + +type Lease struct { + ID string `json:"id"` + TaskID string `json:"task_id"` + WorkerID string `json:"worker_id"` + PoolID string `json:"pool_id"` + Executor ExecutorRef `json:"executor"` + CapabilitySnapshot Capabilities `json:"capability_snapshot"` + AllowedPushTargets []string `json:"allowed_push_targets,omitempty"` + AllowedPullTargets []string `json:"allowed_pull_targets,omitempty"` + NetworkPolicy NetworkPolicy `json:"network_policy,omitzero"` + P2PSessionPolicy *P2PSessionPolicy `json:"p2p_session_policy,omitempty"` + ResiduePolicy ResiduePolicy `json:"residue_policy,omitzero"` + LeasedAt time.Time `json:"leased_at"` + ExpiresAt time.Time `json:"expires_at"` +} + +func (l Lease) Validate() error { + var errs []error + require := func(name, value string) { + if strings.TrimSpace(value) == "" { + errs = append(errs, fmt.Errorf("%s is required", name)) + } + } + require("id", l.ID) + require("task_id", l.TaskID) + require("worker_id", l.WorkerID) + require("pool_id", l.PoolID) + require("executor.provider", l.Executor.Provider) + require("executor.version", l.Executor.Version) + if strings.TrimSpace(l.CapabilitySnapshot.OS) == "" { + errs = append(errs, errors.New("capability_snapshot.os is required")) + } + if strings.TrimSpace(l.CapabilitySnapshot.Arch) == "" { + errs = append(errs, errors.New("capability_snapshot.arch is required")) + } + if err := l.NetworkPolicy.Validate(); err != nil { + errs = append(errs, fmt.Errorf("network_policy: %w", err)) + } + if l.P2PSessionPolicy != nil { + if err := l.P2PSessionPolicy.Validate(time.Time{}); err != nil { + errs = append(errs, fmt.Errorf("p2p_session_policy: %w", err)) + } + } + if err := l.ResiduePolicy.Validate(ResiduePolicyValidation{ + RequireSessionKey: true, + RequireExplicitWorkerBound: true, + RequirePolicyHash: true, + }); err != nil { + errs = append(errs, fmt.Errorf("residue_policy: %w", err)) + } + if l.LeasedAt.IsZero() { + errs = append(errs, errors.New("leased_at is required")) + } + if l.ExpiresAt.IsZero() { + errs = append(errs, errors.New("expires_at is required")) + } + if !l.LeasedAt.IsZero() && !l.ExpiresAt.IsZero() && !l.ExpiresAt.After(l.LeasedAt) { + errs = append(errs, errors.New("expires_at must be after leased_at")) + } + return errors.Join(errs...) +} + type ProviderContract struct { ProtocolVersion string `json:"protocol_version"` ID string `json:"id"` From 703bc5519549943e53019d378fc3d3b12f7fc99b Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:38:20 -0400 Subject: [PATCH 08/13] test: cover public task proof client --- protocol/client_test.go | 169 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 protocol/client_test.go diff --git a/protocol/client_test.go b/protocol/client_test.go new file mode 100644 index 0000000..68130de --- /dev/null +++ b/protocol/client_test.go @@ -0,0 +1,169 @@ +package protocol_test + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/GoCodeAlone/workflow-plugin-compute-core/protocol" +) + +func TestClientSubmitTaskUsesStrictJSONAndBearerAuth(t *testing.T) { + want := validTask(t) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost || r.URL.Path != "/v1/tasks" { + t.Fatalf("request = %s %s, want POST /v1/tasks", r.Method, r.URL.Path) + } + if got := r.Header.Get("Authorization"); got != "Bearer test-token" { + t.Fatalf("authorization = %q", got) + } + if got := r.Header.Get("Content-Type"); got != "application/json" { + t.Fatalf("content-type = %q", got) + } + var submitted protocol.Task + if err := protocol.DecodeStrict(r.Body, &submitted); err != nil { + t.Fatalf("decode submitted task: %v", err) + } + if submitted.ID != want.ID { + t.Fatalf("submitted task id = %q, want %q", submitted.ID, want.ID) + } + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(protocol.TaskResponse{Task: want}) + })) + defer server.Close() + + client, err := protocol.NewClient(protocol.ClientConfig{ServerURL: server.URL, Token: "test-token"}) + if err != nil { + t.Fatalf("new client: %v", err) + } + + got, err := client.SubmitTask(context.Background(), want) + if err != nil { + t.Fatalf("submit task: %v", err) + } + if got.ID != want.ID { + t.Fatalf("task id = %q, want %q", got.ID, want.ID) + } +} + +func TestClientListSnapshotAndProofLookup(t *testing.T) { + task := validTask(t) + proof := protocol.ProofReceipt{ + ID: "proof-1", + OrgID: task.OrgID, + TaskID: task.ID, + TaskHash: protocol.CanonicalHash(task), + InputHash: task.InputHash, + Verifier: protocol.VerifierResult{ + Provider: "shape", + Status: protocol.VerificationAccepted, + }, + AgentSignature: protocol.SoftwareAgentProofSignature(protocol.ProofReceipt{ID: "proof-1"}), + } + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.Method == http.MethodGet && r.URL.Path == "/v1/tasks": + _ = json.NewEncoder(w).Encode(protocol.TaskList{ + Tasks: []protocol.Task{task}, + Stalls: []protocol.TaskStall{{ + TaskID: task.ID, + Reason: "waiting_for_worker", + AgeMS: 250, + }}, + }) + case r.Method == http.MethodGet && r.URL.Path == "/v1/proofs": + _ = json.NewEncoder(w).Encode(protocol.ProofList{Proofs: []protocol.ProofReceipt{proof}}) + default: + t.Fatalf("unexpected request = %s %s", r.Method, r.URL.Path) + } + })) + defer server.Close() + + client, err := protocol.NewClient(protocol.ClientConfig{ServerURL: server.URL, Timeout: time.Second}) + if err != nil { + t.Fatalf("new client: %v", err) + } + list, err := client.ListTasks(context.Background()) + if err != nil { + t.Fatalf("list tasks: %v", err) + } + if len(list.Tasks) != 1 || list.Tasks[0].ID != task.ID { + t.Fatalf("tasks = %+v", list.Tasks) + } + snapshot, ok, stalls, err := client.TaskSnapshot(context.Background(), task.ID) + if err != nil { + t.Fatalf("task snapshot: %v", err) + } + if !ok || snapshot.ID != task.ID || len(stalls) != 1 { + t.Fatalf("snapshot = %+v ok=%v stalls=%+v", snapshot, ok, stalls) + } + proofs, err := client.ListProofs(context.Background()) + if err != nil { + t.Fatalf("list proofs: %v", err) + } + if len(proofs) != 1 || proofs[0].TaskID != task.ID { + t.Fatalf("proofs = %+v", proofs) + } + found, ok, err := client.FindProof(context.Background(), task.ID) + if err != nil { + t.Fatalf("find proof: %v", err) + } + if !ok || found.ID != proof.ID { + t.Fatalf("found = %+v ok=%v", found, ok) + } +} + +func TestClientRejectsTokenOverNonLoopbackHTTP(t *testing.T) { + if _, err := protocol.NewClient(protocol.ClientConfig{ServerURL: "http://example.test", Token: "secret"}); err == nil { + t.Fatal("expected token over non-loopback http to fail") + } +} + +func TestClientStatusErrorDoesNotExposeBody(t *testing.T) { + const sentinel = "secret-response-body" + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, sentinel, http.StatusUnauthorized) + })) + defer server.Close() + client, err := protocol.NewClient(protocol.ClientConfig{ServerURL: server.URL}) + if err != nil { + t.Fatalf("new client: %v", err) + } + + _, err = client.SubmitTask(context.Background(), validTask(t)) + if err == nil { + t.Fatal("expected status error") + } + var statusErr protocol.StatusError + if !errors.As(err, &statusErr) { + t.Fatalf("error %T = %v, want StatusError", err, err) + } + if statusErr.Method != http.MethodPost || statusErr.Path != "/v1/tasks" || statusErr.StatusCode != http.StatusUnauthorized { + t.Fatalf("status error = %+v", statusErr) + } + if strings.Contains(err.Error(), sentinel) { + t.Fatalf("status error leaked response body: %v", err) + } +} + +func TestClientStrictDecodeRejectsUnknownFields(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusCreated) + _, _ = w.Write([]byte(`{"task":{},"unexpected":true}`)) + })) + defer server.Close() + client, err := protocol.NewClient(protocol.ClientConfig{ServerURL: server.URL}) + if err != nil { + t.Fatalf("new client: %v", err) + } + + _, err = client.SubmitTask(context.Background(), validTask(t)) + if err == nil || !strings.Contains(err.Error(), "unknown field") { + t.Fatalf("SubmitTask error = %v, want strict decode unknown field", err) + } +} From 50990bee8f71ea3e2ce6e52df7ca842f206b9855 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:39:35 -0400 Subject: [PATCH 09/13] feat: add public task proof client --- protocol/client.go | 195 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 protocol/client.go diff --git a/protocol/client.go b/protocol/client.go new file mode 100644 index 0000000..b531b5f --- /dev/null +++ b/protocol/client.go @@ -0,0 +1,195 @@ +package protocol + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "net/url" + "strings" + "time" +) + +const defaultClientTimeout = 30 * time.Second + +type ClientConfig struct { + ServerURL string + Token string + HTTPClient *http.Client + Timeout time.Duration +} + +type Client struct { + baseURL *url.URL + token string + http *http.Client +} + +type TaskResponse struct { + Task Task `json:"task"` +} + +type TaskList struct { + Tasks []Task `json:"tasks"` + Stalls []TaskStall `json:"stalls,omitempty"` +} + +type TaskStall struct { + TaskID string `json:"task_id,omitempty"` + LeaseID string `json:"lease_id,omitempty"` + AgentID string `json:"agent_id,omitempty"` + Reason string `json:"reason"` + AgeMS int64 `json:"age_ms"` +} + +type ProofList struct { + Proofs []ProofReceipt `json:"proofs"` +} + +type StatusError struct { + Method string + Path string + StatusCode int +} + +func (e StatusError) Error() string { + return fmt.Sprintf("%s %s: got status %d", e.Method, e.Path, e.StatusCode) +} + +func NewClient(config ClientConfig) (*Client, error) { + parsed, err := url.ParseRequestURI(config.ServerURL) + if err != nil || (parsed.Scheme != "http" && parsed.Scheme != "https") { + return nil, errors.New("server_url must be absolute http(s) URL") + } + if config.Token != "" && parsed.Scheme != "https" && !isLoopbackHost(parsed.Hostname()) { + return nil, errors.New("server_url must use https when auth token is set") + } + httpClient := config.HTTPClient + if httpClient == nil { + timeout := config.Timeout + if timeout <= 0 { + timeout = defaultClientTimeout + } + httpClient = &http.Client{Timeout: timeout} + } + return &Client{ + baseURL: parsed, + token: config.Token, + http: httpClient, + }, nil +} + +func (c *Client) SubmitTask(ctx context.Context, task Task) (Task, error) { + var out TaskResponse + if err := c.doJSON(ctx, http.MethodPost, "/v1/tasks", task, http.StatusCreated, &out); err != nil { + return Task{}, err + } + return out.Task, nil +} + +func (c *Client) ListTasks(ctx context.Context) (TaskList, error) { + var out TaskList + if err := c.doJSON(ctx, http.MethodGet, "/v1/tasks", nil, http.StatusOK, &out); err != nil { + return TaskList{}, err + } + return out, nil +} + +func (c *Client) TaskSnapshot(ctx context.Context, id string) (Task, bool, []TaskStall, error) { + list, err := c.ListTasks(ctx) + if err != nil { + return Task{}, false, nil, err + } + matchingStalls := make([]TaskStall, 0) + for _, stall := range list.Stalls { + if stall.TaskID == id { + matchingStalls = append(matchingStalls, stall) + } + } + for _, task := range list.Tasks { + if task.ID == id { + return task, true, matchingStalls, nil + } + } + return Task{}, false, matchingStalls, nil +} + +func (c *Client) ListProofs(ctx context.Context) ([]ProofReceipt, error) { + var out ProofList + if err := c.doJSON(ctx, http.MethodGet, "/v1/proofs", nil, http.StatusOK, &out); err != nil { + return nil, err + } + return out.Proofs, nil +} + +func (c *Client) FindProof(ctx context.Context, taskID string) (ProofReceipt, bool, error) { + proofs, err := c.ListProofs(ctx) + if err != nil { + return ProofReceipt{}, false, err + } + for _, proof := range proofs { + if proof.TaskID == taskID { + return proof, true, nil + } + } + return ProofReceipt{}, false, nil +} + +func (c *Client) doJSON(ctx context.Context, method, path string, body any, want int, out any) error { + var requestBody io.Reader = http.NoBody + if body != nil { + data, err := json.Marshal(body) + if err != nil { + return fmt.Errorf("marshal request: %w", err) + } + requestBody = bytes.NewReader(data) + } + endpoint := c.baseURL.JoinPath(path) + req, err := http.NewRequestWithContext(ctx, method, endpoint.String(), requestBody) + if err != nil { + return fmt.Errorf("create request: %w", err) + } + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + if c.token != "" { + req.Header.Set("Authorization", "Bearer "+c.token) + } + resp, err := c.http.Do(req) + if err != nil { + return fmt.Errorf("%s %s: %w", method, path, err) + } + defer resp.Body.Close() + if resp.StatusCode != want { + return StatusError{Method: method, Path: path, StatusCode: resp.StatusCode} + } + if out == nil { + return nil + } + return DecodeStrict(resp.Body, out) +} + +func DecodeStrict(r io.Reader, v any) error { + dec := json.NewDecoder(r) + dec.DisallowUnknownFields() + if err := dec.Decode(v); err != nil { + return err + } + if dec.Decode(&struct{}{}) == io.EOF { + return nil + } + return errors.New("multiple JSON values are not allowed") +} + +func isLoopbackHost(host string) bool { + host = strings.TrimSpace(strings.Trim(host, "[]")) + if host == "localhost" { + return true + } + ip := net.ParseIP(host) + return ip != nil && ip.IsLoopback() +} From 928fa9da5c6faafefb88973e239aa6acaceee6e8 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:40:38 -0400 Subject: [PATCH 10/13] docs: document task proof sdk boundary --- README.md | 34 ++++++--- protocol/product_capture_compat_test.go | 97 +++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 11 deletions(-) create mode 100644 protocol/product_capture_compat_test.go diff --git a/README.md b/README.md index 03546c1..acf9177 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,11 @@ Public Workflow plugin and Go module for compute protocol and provider catalog contracts. -Provider plugins use this module for shared compute provider-catalog data -types, validation helpers, and canonical hashing. They also declare a plugin -dependency on `workflow-plugin-compute-core` in `plugin.json`, giving Workflow a -registry dependency anchor separate from runtime execution plugins. +Provider and workload plugins use this module for shared compute protocol data +types, provider-catalog data types, validation helpers, canonical hashing, and a +minimal task/proof HTTP client. They also declare a plugin dependency on +`workflow-plugin-compute-core` in `plugin.json`, giving Workflow a registry +dependency anchor separate from runtime execution plugins. This is distinct from the Workflow plugin runtime contract: external plugins still expose their runtime capabilities through Workflow's gRPC/protobuf plugin @@ -14,13 +15,24 @@ service contracts. The provider-catalog structs in `protocol/` are the typed declaration data that provider plugins publish and `workflow-plugin-compute` validates. -The public catalog contract includes provider identity, org/pool scoping, -access visibility, supported workload and network modes, runtime profiles, -operation schemas, artifact declarations, residue policy, and upstream client -conformance evidence. Workflow applications should treat these declarations as -the portable provider-facing base contract; application-specific scheduling, -task state, settlement, dashboards, and worker supervision remain outside this -core plugin. +The public contract includes task/proof/lease wire shapes, provider identity, +org/pool scoping, access visibility, supported workload and network modes, +runtime profiles, operation schemas, artifact declarations, residue policy, and +upstream client conformance evidence. Workflow applications should treat these +declarations as the portable provider-facing base contract. + +The task/proof client covers submission and read-only observation: + +- `SubmitTask` +- `ListTasks` +- `TaskSnapshot` +- `ListProofs` +- `FindProof` + +Application-specific scheduling, task mutation policy, settlement, dashboards, +worker supervision, local-agent rollout, and control-plane storage/authz remain +outside this core plugin. Those concerns may be extracted into a reusable +control-plane component later, but they are not implemented by compute-core. This plugin intentionally advertises no module, step, trigger, or IaC runtime capabilities. diff --git a/protocol/product_capture_compat_test.go b/protocol/product_capture_compat_test.go new file mode 100644 index 0000000..d09dc4d --- /dev/null +++ b/protocol/product_capture_compat_test.go @@ -0,0 +1,97 @@ +package protocol_test + +import ( + "bytes" + "encoding/json" + "testing" + + "github.com/GoCodeAlone/workflow-plugin-compute-core/protocol" +) + +func TestProductCapturePublicSDKSurface(t *testing.T) { + workload := protocol.WorkloadSpec{ + Kind: protocol.WorkloadProvider, + Provider: &protocol.ProviderWorkload{ + ProviderConfig: protocol.ProviderConfig{ + PluginID: "workflow-plugin-product-capture", + ProviderID: "browser", + ContractID: "product-capture.browser.v1", + Version: "v1.0.0", + ConfigRef: "config://providers/product-capture/browser", + }, + Operation: "capture_product", + ImageRef: "ghcr.io/gocodealone/product-capture@sha256:" + testHex64("a"), + Input: mustTaskRawMessage(t, map[string]any{ + "url": "https://example.test/products/sku-1", + "allowed_hosts": []string{"example.test"}, + "capture_mode": string(protocol.ProductCaptureModeBrowser), + }), + }, + } + if err := workload.Validate(); err != nil { + t.Fatalf("workload invalid: %v", err) + } + task := protocol.Task{ + ProtocolVersion: protocol.Version, + ID: "task-product-capture-compat", + ProductID: "product-capture", + OrgID: "org-1", + PoolID: "pool-1", + PolicyID: "policy-1", + Status: protocol.TaskQueued, + Workload: workload, + InputHash: protocol.CanonicalHash(workload), + RequestedAt: validTask(t).RequestedAt, + TimeoutSeconds: 60, + Signature: protocol.SignatureEnvelope{ + Algorithm: "test-sha256", + KeyID: "test-key", + Value: testHex64("b"), + }, + } + if err := task.Validate(); err != nil { + t.Fatalf("task invalid: %v", err) + } + proof := protocol.ProofReceipt{ + ID: "proof-product-capture-compat", + TaskID: task.ID, + InputHash: task.InputHash, + Verifier: protocol.VerifierResult{ + Provider: "shape", + Status: protocol.VerificationAccepted, + }, + } + list := protocol.TaskList{ + Tasks: []protocol.Task{task}, + Stalls: []protocol.TaskStall{{ + TaskID: task.ID, + Reason: "waiting_for_worker", + AgeMS: 100, + }}, + } + data, err := json.Marshal(list) + if err != nil { + t.Fatalf("marshal task list: %v", err) + } + var roundTrip protocol.TaskList + if err := protocol.DecodeStrict(bytes.NewReader(data), &roundTrip); err != nil { + t.Fatalf("strict decode task list: %v", err) + } + if len(roundTrip.Tasks) != 1 || roundTrip.Tasks[0].ID != task.ID { + t.Fatalf("round trip tasks = %+v", roundTrip.Tasks) + } + if _, err := protocol.NewClient(protocol.ClientConfig{ServerURL: "https://wfcompute.example.test", Token: "token"}); err != nil { + t.Fatalf("new client: %v", err) + } + if proof.Verifier.Status != protocol.VerificationAccepted { + t.Fatalf("proof verifier status = %q", proof.Verifier.Status) + } +} + +func testHex64(char string) string { + var buf bytes.Buffer + for range 64 { + buf.WriteString(char) + } + return buf.String() +} From a0dfcbb1527445f21c665eeff0f5b55f0a0377da Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:42:33 -0400 Subject: [PATCH 11/13] fix: align capability snapshot contract --- protocol/types.go | 73 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/protocol/types.go b/protocol/types.go index 2f92e62..5337d1a 100644 --- a/protocol/types.go +++ b/protocol/types.go @@ -3092,6 +3092,8 @@ func (p P2PSessionPolicy) Validate(now time.Time) error { errs = append(errs, errors.New("policy_hash is required")) } else if !validSHA256Digest(p.PolicyHash) { errs = append(errs, errors.New("policy_hash must be sha256 digest")) + } else if p.PolicyHash != CanonicalHash(p.SigningPayload()) { + errs = append(errs, errors.New("policy_hash does not match signing payload")) } if err := validateSignatureEnvelope(p.Signature, "signature"); err != nil { errs = append(errs, err) @@ -3329,26 +3331,59 @@ type GPU struct { MemoryBytes int64 `json:"memory_bytes,omitempty"` } +type CapabilityReportStatus string + +const ( + CapabilitySupported CapabilityReportStatus = "supported" + CapabilityDegraded CapabilityReportStatus = "degraded" + CapabilityUnsupported CapabilityReportStatus = "unsupported" +) + +type CapabilityReport struct { + Provider string `json:"provider"` + Backend string `json:"backend,omitempty"` + Status CapabilityReportStatus `json:"status"` + Reason string `json:"reason,omitempty"` + Constraints []string `json:"constraints,omitempty"` +} + +type SigningCapability struct { + Provider string `json:"provider"` + Algorithm string `json:"algorithm"` + KeyID string `json:"key_id,omitempty"` + HardwareBacked bool `json:"hardware_backed,omitempty"` + Available bool `json:"available"` +} + +type Security struct { + TPM bool `json:"tpm,omitempty"` + SecureEnclave bool `json:"secure_enclave,omitempty"` + TEE []string `json:"tee,omitempty"` + HardwareClasses []string `json:"hardware_classes,omitempty"` + HardwareAttestations []HardwareAttestation `json:"hardware_attestations,omitempty"` + Signing []SigningCapability `json:"signing,omitempty"` +} + type Capabilities struct { - MachineID string `json:"machine_id,omitempty"` - OS string `json:"os,omitempty"` - Arch string `json:"arch,omitempty"` - CPUModel string `json:"cpu_model,omitempty"` - CPUCount int `json:"cpu_count,omitempty"` - MemoryBytes int64 `json:"memory_bytes,omitempty"` - DiskBytes int64 `json:"disk_bytes,omitempty"` - BandwidthMbps int64 `json:"bandwidth_mbps,omitempty"` - IngressCapable bool `json:"ingress_capable,omitempty"` - GPU []GPU `json:"gpu,omitempty"` - ExecutorProviders []string `json:"executor_providers,omitempty"` - Executors []ExecutorRef `json:"executors,omitempty"` - WorkloadKinds []string `json:"workload_kinds,omitempty"` - ExecutionTiers []ExecutionSecurityTier `json:"execution_tiers,omitempty"` - ProofTiers []ProofTier `json:"proof_tiers,omitempty"` - NetworkModes []NetworkMode `json:"network_modes,omitempty"` - CapabilityTags []string `json:"capability_tags,omitempty"` - CapabilityReports []ProviderCapabilityReport `json:"capability_reports,omitempty"` - HardwareSecurity HardwareSecurityCapabilities `json:"hardware_security,omitzero"` + MachineID string `json:"machine_id,omitempty"` + OS string `json:"os,omitempty"` + Arch string `json:"arch,omitempty"` + CPUModel string `json:"cpu_model,omitempty"` + CPUCount int `json:"cpu_count,omitempty"` + MemoryBytes int64 `json:"memory_bytes,omitempty"` + DiskBytes int64 `json:"disk_bytes,omitempty"` + BandwidthMbps int64 `json:"bandwidth_mbps,omitempty"` + IngressCapable bool `json:"ingress_capable,omitempty"` + GPU []GPU `json:"gpu,omitempty"` + ExecutorProviders []string `json:"executor_providers,omitempty"` + Executors []ExecutorRef `json:"executors,omitempty"` + WorkloadKinds []string `json:"workload_kinds,omitempty"` + ExecutionTiers []ExecutionSecurityTier `json:"execution_tiers,omitempty"` + ProofTiers []ProofTier `json:"proof_tiers,omitempty"` + NetworkModes []NetworkMode `json:"network_modes,omitempty"` + CapabilityTags []string `json:"capability_tags,omitempty"` + CapabilityReports []CapabilityReport `json:"capability_reports,omitempty"` + HardwareSecurity Security `json:"hardware_security,omitzero"` } type Lease struct { From 72144c065edf0dc0c464347a05efd84b9164d854 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:48:46 -0400 Subject: [PATCH 12/13] fix: validate task signature shape --- protocol/client_test.go | 19 +++++++++++++++++++ protocol/task_test.go | 2 ++ protocol/types.go | 3 +++ 3 files changed, 24 insertions(+) diff --git a/protocol/client_test.go b/protocol/client_test.go index 68130de..a66aa7c 100644 --- a/protocol/client_test.go +++ b/protocol/client_test.go @@ -118,6 +118,25 @@ func TestClientListSnapshotAndProofLookup(t *testing.T) { } } +func TestClientPreservesServerURLBasePath(t *testing.T) { + task := validTask(t) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost || r.URL.Path != "/api/v1/tasks" { + t.Fatalf("request = %s %s, want POST /api/v1/tasks", r.Method, r.URL.Path) + } + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(protocol.TaskResponse{Task: task}) + })) + defer server.Close() + client, err := protocol.NewClient(protocol.ClientConfig{ServerURL: server.URL + "/api"}) + if err != nil { + t.Fatalf("new client: %v", err) + } + if _, err := client.SubmitTask(context.Background(), task); err != nil { + t.Fatalf("submit task: %v", err) + } +} + func TestClientRejectsTokenOverNonLoopbackHTTP(t *testing.T) { if _, err := protocol.NewClient(protocol.ClientConfig{ServerURL: "http://example.test", Token: "secret"}); err == nil { t.Fatal("expected token over non-loopback http to fail") diff --git a/protocol/task_test.go b/protocol/task_test.go index ab78eae..627fea4 100644 --- a/protocol/task_test.go +++ b/protocol/task_test.go @@ -25,6 +25,7 @@ func TestTaskRejectsMalformedPortableContract(t *testing.T) { task.Workload = protocol.WorkloadSpec{Kind: protocol.WorkloadProvider} task.TimeoutSeconds = -1 task.ResourceLimits = protocol.ResourceLimits{RuntimeSeconds: -1} + task.Signature = protocol.SignatureEnvelope{} err := task.Validate() if err == nil { @@ -37,6 +38,7 @@ func TestTaskRejectsMalformedPortableContract(t *testing.T) { "workload", "timeout_seconds", "resource_limits", + "signature", } { if !strings.Contains(err.Error(), want) { t.Fatalf("Validate() = %v, want %q", err, want) diff --git a/protocol/types.go b/protocol/types.go index 5337d1a..df85e96 100644 --- a/protocol/types.go +++ b/protocol/types.go @@ -3307,6 +3307,9 @@ func (t Task) Validate() error { if err := t.ResourceLimits.Validate(); err != nil { errs = append(errs, fmt.Errorf("resource_limits: %w", err)) } + if err := validateSignatureEnvelope(t.Signature, "signature"); err != nil { + errs = append(errs, err) + } if t.RequestedAt.IsZero() { errs = append(errs, errors.New("requested_at is required")) } From 73d8e43b10b005a0a3845e794ff6e58c3ad1f1ae Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Sat, 6 Jun 2026 00:54:28 -0400 Subject: [PATCH 13/13] chore: complete task proof sdk scope lock --- docs/plans/2026-06-06-task-proof-sdk.md | 2 +- docs/plans/2026-06-06-task-proof-sdk.md.scope-lock | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 docs/plans/2026-06-06-task-proof-sdk.md.scope-lock diff --git a/docs/plans/2026-06-06-task-proof-sdk.md b/docs/plans/2026-06-06-task-proof-sdk.md index 88babcf..30f7f73 100644 --- a/docs/plans/2026-06-06-task-proof-sdk.md +++ b/docs/plans/2026-06-06-task-proof-sdk.md @@ -31,7 +31,7 @@ |------|-------|-------|--------| | 1 | feat: add task proof sdk | Task 1, Task 2, Task 3, Task 4, Task 5 | feat/task-proof-sdk | -**Status:** Locked 2026-06-06T04:30:58Z +**Status:** Complete 2026-06-06T04:54:18Z ## Requirements Trace diff --git a/docs/plans/2026-06-06-task-proof-sdk.md.scope-lock b/docs/plans/2026-06-06-task-proof-sdk.md.scope-lock deleted file mode 100644 index 3e0c70a..0000000 --- a/docs/plans/2026-06-06-task-proof-sdk.md.scope-lock +++ /dev/null @@ -1 +0,0 @@ -57f85ca33fb55a9c7279c3149b808ac475f671e2e925a62313587926d7846f4e