OpsOrch · yusufaytas · Jan 7, 2026 · Jan 6, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -127,9 +127,46 @@ Providers can be:
 
 OpsOrch Core can launch a local adapter binary as a child process (no network hops) when `OPSORCH_<CAP>_PLUGIN` is set or the provider config includes a `plugin` path. RPC is JSON over stdin/stdout:
 
-- Request: `{ "method": "incident.query" | "alert.query" | "log.query" | "metric.describe" | ..., "config": {...}, "payload": {...} }`
+- Request: `{ "method": "<capability>.<operation>", "config": {...}, "payload": {...} }`
 - Response: `{ "result": <value>, "error": "<msg>" }`
 
+**RPC Methods by Capability:**
+
+| Capability | Method | Payload |
+|------------|--------|---------|
+| incident | `incident.query` | `IncidentQuery` |
+| incident | `incident.list` | `null` |
+| incident | `incident.get` | `{ "id": string }` |
+| incident | `incident.create` | `CreateIncidentInput` |
+| incident | `incident.update` | `{ "id": string, "input": UpdateIncidentInput }` |
+| incident | `incident.timeline.get` | `{ "id": string }` |
+| incident | `incident.timeline.append` | `{ "id": string, "entry": TimelineAppendInput }` |
+| alert | `alert.query` | `AlertQuery` |
+| alert | `alert.get` | `{ "id": string }` |
+| log | `log.query` | `LogQuery` |
+| metric | `metric.query` | `MetricQuery` |
+| metric | `metric.describe` | `QueryScope` |
+| ticket | `ticket.query` | `TicketQuery` |
+| ticket | `ticket.get` | `{ "id": string }` |
+| ticket | `ticket.create` | `CreateTicketInput` |
+| ticket | `ticket.update` | `{ "id": string, "input": UpdateTicketInput }` |
+| messaging | `messaging.send` | `Message` |
+| service | `service.query` | `ServiceQuery` |
+| deployment | `deployment.query` | `DeploymentQuery` |
+| deployment | `deployment.get` | `{ "id": string }` |
+| team | `team.query` | `TeamQuery` |
+| team | `team.get` | `{ "id": string }` |
+| team | `team.members` | `{ "teamID": string }` |
+| orchestration | `orchestration.plans.query` | `OrchestrationPlanQuery` |
+| orchestration | `orchestration.plans.get` | `{ "planId": string }` |
+| orchestration | `orchestration.runs.query` | `OrchestrationRunQuery` |
+| orchestration | `orchestration.runs.get` | `{ "runId": string }` |
+| orchestration | `orchestration.runs.start` | `{ "planId": string }` |
+| orchestration | `orchestration.runs.steps.complete` | `{ "runId": string, "stepId": string, "actor": string, "note": string }` |
+
+| secret | `secret.get` | `{ "key": string }` |
+| secret | `secret.put` | `{ "key": string, "value": string }` |
+
 The plugin process stays alive and receives multiple RPC calls on the same stdio stream.
 
 ### Building Plugins

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 [![License](https://img.shields.io/github/license/opsorch/opsorch-core)](https://github.com/opsorch/opsorch-core/blob/main/LICENSE)
 [![CI](https://github.com/opsorch/opsorch-core/workflows/CI/badge.svg)](https://github.com/opsorch/opsorch-core/actions)
 
-OpsOrch Core is a stateless, open-source orchestration layer that unifies incident, log, metric, ticket, messaging, and deployment workflows behind a single, provider-agnostic API. 
+OpsOrch Core is a stateless, open-source orchestration layer that unifies incident, log, metric, ticket, messaging, deployment, and workflow orchestration behind a single, provider-agnostic API. 
 It does not store operational data, and it does not include any built-in vendor integrations.  
 External adapters implement provider logic and are loaded dynamically by OpsOrch Core.
 
@@ -27,7 +27,7 @@ Adapters live in separate repos such as:
 
 OpsOrch Core never links vendor logic directly. Each capability is resolved at runtime by either importing an **in-process provider** (Go package that registers itself) or by launching a **local plugin binary** that speaks OpsOrch's stdio RPC protocol. At startup OpsOrch checks for environment overrides first, then falls back to any persisted configuration stored via the secret provider.
 
-Environment variables for any capability (`incident`, `alert`, `log`, `metric`, `ticket`, `messaging`, `service`, `deployment`, `team`, `secret`):
+Environment variables for any capability (`incident`, `alert`, `log`, `metric`, `ticket`, `messaging`, `service`, `deployment`, `team`, `orchestration`, `secret`):
 - `OPSORCH_<CAP>_PROVIDER=<registered name>` – name passed to the corresponding registry
 - `OPSORCH_<CAP>_CONFIG=<json>` – decrypted config map forwarded to the constructor
 - `OPSORCH_<CAP>_PLUGIN=/path/to/binary` – optional local plugin that overrides `OPSORCH_<CAP>_PROVIDER`
@@ -148,6 +148,32 @@ curl -s http://localhost:8080/teams/engineering
 
 # Get team members (requires team provider)
 curl -s http://localhost:8080/teams/engineering/members
+
+# Query Orchestration Plans (requires orchestration provider)
+curl -s -X POST http://localhost:8080/orchestration/plans/query \
+  -H "Content-Type: application/json" \
+  -d '{"scope": {"service": "api"}, "limit": 10}'
+
+# Get a specific plan (requires orchestration provider)
+curl -s http://localhost:8080/orchestration/plans/release-checklist
+
+# Query Orchestration Runs (requires orchestration provider)
+curl -s -X POST http://localhost:8080/orchestration/runs/query \
+  -H "Content-Type: application/json" \
+  -d '{"statuses": ["running", "blocked"]}'
+
+# Get a specific run (requires orchestration provider)
+curl -s http://localhost:8080/orchestration/runs/run-123
+
+# Start a new run from a plan (requires orchestration provider)
+curl -s -X POST http://localhost:8080/orchestration/runs \
+  -H "Content-Type: application/json" \
+  -d '{"planId": "release-checklist"}'
+
+# Complete a manual step (requires orchestration provider)
+curl -s -X POST http://localhost:8080/orchestration/runs/run-123/steps/approval/complete \
+  -H "Content-Type: application/json" \
+  -d '{"actor": "ops@example.com", "note": "Approved after review"}'
 ```
 
 Add `-H "Authorization: Bearer <token>"` to each curl when `OPSORCH_BEARER_TOKEN` is set.
@@ -251,6 +277,7 @@ OpsOrch exposes API endpoints for:
 - Services
 - Deployments
 - Teams
+- Orchestration (Plans and Runs)
 
 Schemas live under `schema/` and evolve as the system matures.
 
@@ -293,6 +320,27 @@ OpsOrch uses structured expressions for querying logs and metrics, replacing fre
 ### Provider Deep Links
 Normalized resources now carry optional `url` fields for deep linking back to upstream systems. For individual resources (incidents, alerts, tickets, etc.), the URL links to that specific resource. For collections like log entries and metric series, the URL links to the query results or filtered view in the source system (e.g., Datadog logs dashboard, Grafana metric chart). Adapters should populate these URLs whenever the provider exposes canonical UI links so OpsOrch clients can jump directly to the source system. The field is passthrough only—OpsOrch does not generate, log, or modify these URLs—so adapters remain responsible for ensuring they do not leak secrets.
 
+### Orchestration: Plans and Runs
+
+Ops teams have logs, metrics, tickets, and alerts—but during incidents or releases, the real challenge is knowing what to do next, in what order, and who needs to do it. Playbooks, runbooks, and release checklists encode that operational knowledge, but they often live as docs or tribal knowledge rather than something that actively guides execution.
+
+The orchestration capability provides a unified API for workflow engines. OpsOrch does not replace these engines—it exposes their plans and runs through a normalized interface so clients can query state and complete manual steps without direct integration with each provider. The actual execution remains provider-owned.
+
+**Key concepts:**
+
+- **Plan**: A provider-owned template describing ordered steps for an operational workflow. Plans are read-only—OpsOrch queries them from the upstream engine.
+- **Run**: A live instance of a plan with runtime state for each step.
+- **Step**: A unit of work within a plan. Steps have types (`manual`, `observe`, `invoke`, `verify`, `record`) and can depend on other steps.
+- **Manual Step Completion**: Clients can complete manual/blocked steps via the API, which forwards the completion to the upstream workflow engine.
+
+**API Endpoints:**
+- `POST /orchestration/plans/query` - Query plans with filters (scope, tags, limit)
+- `GET /orchestration/plans/{planId}` - Get a specific plan with all steps
+- `POST /orchestration/runs/query` - Query runs with filters (status, planId, scope)
+- `GET /orchestration/runs/{runId}` - Get a run with current step states
+- `POST /orchestration/runs` - Start a new run from a plan
+- `POST /orchestration/runs/{runId}/steps/{stepId}/complete` - Complete a manual step
+
 ### Adapter Architecture
 OpsOrch Core contains **no provider logic**.  
 Adapters implement capability interfaces in their own repos and register with the registry.

diff --git a/api/capability.go b/api/capability.go
@@ -23,6 +23,8 @@ func normalizeCapability(name string) (string, bool) {
 		return "deployment", true
 	case "team", "teams":
 		return "team", true
+	case "orchestration", "orchestrations":
+		return "orchestration", true
 	default:
 		return "", false
 	}

diff --git a/api/orchestration_handler.go b/api/orchestration_handler.go
@@ -0,0 +1,151 @@
+package api
+
+import (
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/opsorch/opsorch-core/orcherr"
+	"github.com/opsorch/opsorch-core/orchestration"
+	"github.com/opsorch/opsorch-core/schema"
+)
+
+// OrchestrationHandler wraps provider wiring for orchestration.
+type OrchestrationHandler struct {
+	provider orchestration.Provider
+}
+
+func newOrchestrationHandlerFromEnv(sec SecretProvider) (OrchestrationHandler, error) {
+	name, cfg, pluginPath, err := loadProviderConfig(sec, "orchestration", "OPSORCH_ORCHESTRATION_PROVIDER", "OPSORCH_ORCHESTRATION_CONFIG", "OPSORCH_ORCHESTRATION_PLUGIN")
+	if err != nil || (name == "" && pluginPath == "") {
+		return OrchestrationHandler{}, err
+	}
+	if pluginPath != "" {
+		return OrchestrationHandler{provider: newOrchestrationPluginProvider(pluginPath, cfg)}, nil
+	}
+	constructor, ok := orchestration.LookupProvider(name)
+	if !ok {
+		return OrchestrationHandler{}, fmt.Errorf("orchestration provider %s not registered", name)
+	}
+	provider, err := constructor(cfg)
+	if err != nil {
+		return OrchestrationHandler{}, err
+	}
+	return OrchestrationHandler{provider: provider}, nil
+}
+
+func (s *Server) handleOrchestration(w http.ResponseWriter, r *http.Request) bool {
+	if !strings.HasPrefix(r.URL.Path, "/orchestration") {
+		return false
+	}
+	if s.orchestration.provider == nil {
+		writeError(w, http.StatusNotImplemented, orcherr.OpsOrchError{Code: "orchestration_provider_missing", Message: "orchestration provider not configured"})
+		return true
+	}
+
+	path := strings.TrimSuffix(r.URL.Path, "/")
+	segments := strings.Split(strings.Trim(path, "/"), "/")
+
+	switch {
+	// POST /orchestration/plans/query
+	case len(segments) == 3 && segments[1] == "plans" && segments[2] == "query" && r.Method == http.MethodPost:
+		var query schema.OrchestrationPlanQuery
+		if err := decodeJSON(r, &query); err != nil {
+			writeError(w, http.StatusBadRequest, orcherr.OpsOrchError{Code: "bad_request", Message: err.Error()})
+			return true
+		}
+		plans, err := s.orchestration.provider.QueryPlans(r.Context(), query)
+		if err != nil {
+			writeProviderError(w, err)
+			return true
+		}
+		logAudit(r, "orchestration.plans.query")
+		writeJSON(w, http.StatusOK, plans)
+		return true
+
+	// GET /orchestration/plans/{planId}
+	case len(segments) == 3 && segments[1] == "plans" && r.Method == http.MethodGet:
+		planID := segments[2]
+		plan, err := s.orchestration.provider.GetPlan(r.Context(), planID)
+		if err != nil {
+			writeProviderError(w, err)
+			return true
+		}
+		logAudit(r, "orchestration.plans.get")
+		writeJSON(w, http.StatusOK, plan)
+		return true
+
+	// POST /orchestration/runs/query
+	case len(segments) == 3 && segments[1] == "runs" && segments[2] == "query" && r.Method == http.MethodPost:
+		var query schema.OrchestrationRunQuery
+		if err := decodeJSON(r, &query); err != nil {
+			writeError(w, http.StatusBadRequest, orcherr.OpsOrchError{Code: "bad_request", Message: err.Error()})
+			return true
+		}
+		runs, err := s.orchestration.provider.QueryRuns(r.Context(), query)
+		if err != nil {
+			writeProviderError(w, err)
+			return true
+		}
+		logAudit(r, "orchestration.runs.query")
+		writeJSON(w, http.StatusOK, runs)
+		return true
+
+	// POST /orchestration/runs
+	case len(segments) == 2 && segments[1] == "runs" && r.Method == http.MethodPost:
+		var input struct {
+			PlanID string `json:"planId"`
+		}
+		if err := decodeJSON(r, &input); err != nil {
+			writeError(w, http.StatusBadRequest, orcherr.OpsOrchError{Code: "bad_request", Message: err.Error()})
+			return true
+		}
+		if input.PlanID == "" {
+			writeError(w, http.StatusBadRequest, orcherr.OpsOrchError{Code: "bad_request", Message: "planId is required"})
+			return true
+		}
+		run, err := s.orchestration.provider.StartRun(r.Context(), input.PlanID)
+		if err != nil {
+			writeProviderError(w, err)
+			return true
+		}
+		logAudit(r, "orchestration.runs.start")
+		writeJSON(w, http.StatusCreated, run)
+		return true
+
+	// GET /orchestration/runs/{runId}
+	case len(segments) == 3 && segments[1] == "runs" && r.Method == http.MethodGet:
+		runID := segments[2]
+		run, err := s.orchestration.provider.GetRun(r.Context(), runID)
+		if err != nil {
+			writeProviderError(w, err)
+			return true
+		}
+		logAudit(r, "orchestration.runs.get")
+		writeJSON(w, http.StatusOK, run)
+		return true
+
+	// POST /orchestration/runs/{runId}/steps/{stepId}/complete
+	case len(segments) == 6 && segments[1] == "runs" && segments[3] == "steps" && segments[5] == "complete" && r.Method == http.MethodPost:
+		runID := segments[2]
+		stepID := segments[4]
+		var input struct {
+			Actor string `json:"actor"`
+			Note  string `json:"note"`
+		}
+		if err := decodeJSON(r, &input); err != nil {
+			writeError(w, http.StatusBadRequest, orcherr.OpsOrchError{Code: "bad_request", Message: err.Error()})
+			return true
+		}
+		if err := s.orchestration.provider.CompleteStep(r.Context(), runID, stepID, input.Actor, input.Note); err != nil {
+			writeProviderError(w, err)
+			return true
+		}
+		logAudit(r, "orchestration.runs.steps.complete")
+		writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
+		return true
+
+	default:
+		return false
+	}
+}