Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ func TestExecutionNotesCoverageAdditional(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/notes", strings.NewReader(`{"message":" kept "}`))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Execution-ID", "exec-2")
req.Header.Set("X-Agent-Node-ID", "node-2")
rec := httptest.NewRecorder()
router.ServeHTTP(rec, req)

Expand Down
102 changes: 101 additions & 1 deletion control-plane/internal/handlers/execution_notes.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ package handlers

import (
"context"
"errors"
"fmt"
"net/http"
"strings"
"time"

"github.com/Agent-Field/agentfield/control-plane/internal/events"
"github.com/Agent-Field/agentfield/control-plane/internal/server/middleware"
"github.com/Agent-Field/agentfield/control-plane/pkg/types"

"github.com/gin-gonic/gin"
Expand All @@ -20,6 +22,22 @@ type ExecutionNoteStorage interface {
GetExecutionEventBus() *events.ExecutionEventBus
}

type executionNoteDIDDocumentLookup interface {
GetDIDDocument(ctx context.Context, did string) (*types.DIDDocumentRecord, error)
}

type executionNoteAgentDIDLister interface {
ListAgentDIDs(ctx context.Context) ([]*types.AgentDIDInfo, error)
}

type executionNoteAuthorizationError struct {
message string
}

func (e *executionNoteAuthorizationError) Error() string {
return e.message
}

// AddNoteRequest represents the request body for adding a note to an execution
type AddNoteRequest struct {
Message string `json:"message" binding:"required"`
Expand Down Expand Up @@ -76,12 +94,21 @@ func AddExecutionNoteHandler(storageProvider ExecutionNoteStorage) gin.HandlerFu
}

// Update the execution with the new note
ctx := context.Background()
ctx := c.Request.Context()
callerAgentID, err := executionNoteCallerAgentID(ctx, c, storageProvider)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Failed to resolve caller identity: %v", err)})
return
}

var runID string
updated, err := storageProvider.UpdateExecutionRecord(ctx, executionID, func(execution *types.Execution) (*types.Execution, error) {
if execution == nil {
return nil, fmt.Errorf("execution with ID %s not found", executionID)
}
if err := ensureExecutionNoteOwnership(callerAgentID, execution); err != nil {
return nil, err
}

// Store run ID for SSE event (run_id is the workflow ID equivalent)
runID = execution.RunID
Expand All @@ -99,6 +126,14 @@ func AddExecutionNoteHandler(storageProvider ExecutionNoteStorage) gin.HandlerFu
})

if err != nil {
var authzErr *executionNoteAuthorizationError
if errors.As(err, &authzErr) {
c.JSON(http.StatusForbidden, gin.H{
"error": "execution_ownership_mismatch",
"message": authzErr.message,
})
return
}
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("Failed to add note: %v", err)})
return
}
Expand Down Expand Up @@ -130,6 +165,71 @@ func AddExecutionNoteHandler(storageProvider ExecutionNoteStorage) gin.HandlerFu
}
}

func ensureExecutionNoteOwnership(callerAgentID string, execution *types.Execution) error {
ownerAgentID := strings.TrimSpace(execution.AgentNodeID)
if ownerAgentID == "" {
return &executionNoteAuthorizationError{message: "execution owner is required to add notes"}
}

if callerAgentID == "" {
return &executionNoteAuthorizationError{message: "caller agent identity is required to add notes to this execution"}
}
if callerAgentID != ownerAgentID {
return &executionNoteAuthorizationError{message: "this execution does not belong to the requesting agent"}
}

return nil
}

func executionNoteCallerAgentID(ctx context.Context, c *gin.Context, storageProvider ExecutionNoteStorage) (string, error) {
Copy link
Copy Markdown
Member

@santoshkumarradha santoshkumarradha May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 [CRITICAL] Raw-header fallback becomes sole identity source under default config

executionNoteCallerAgentID has 3 tiers: (1) verified DID, (2) CallerAgentIDKey context value, (3) raw X-Caller-Agent-ID/X-Agent-Node-ID headers. Tiers 1 & 2 are config-gated. Under defaults (APIKey="", did_auth_enabled=false), both are skipped — tier 3 accepts attacker-controlled headers with zero validation, flowing directly to ensureExecutionNoteOwnership.

Evidence:

  • routes_middleware.go:77 — DID middleware not installed when disabled
  • auth.go:26-28 — APIKeyAuth no-ops when APIKey=="", never sets context key
  • execution_notes.go:196-201 — raw header read with no validation
  • When APIKeyAuth does run, it reads the same headers (auth.go:118-124), so the fallback is either dead code or active bypass — never legitimate

Fix: Delete the raw-header fallback. Add a startup assertion in routes_middleware.go that refuses to register write routes when both auth methods are disabled.


Compound Analysis · confidence 95%

🤖 Reviewed by AgentField PR-AF

Copy link
Copy Markdown
Member

@santoshkumarradha santoshkumarradha May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 [CRITICAL] Fix-F4-expose-F1 trap: enabling DID auth activates divergent resolution paths

Operator fixing F4 (default-config bypass) by setting did_auth_enabled=true unknowingly activates two DID→AgentNodeID resolution chains that read different stores:

  • resolveExecutionNoteAgentIDByDID (execution_notes.go:206-230) → SQL via GetDIDDocument + ListAgentDIDs from agent_dids table
  • DIDService.ResolveAgentIDByDID (did_service.go:500-518) → in-memory registry.AgentNodes (used by MemoryPermissionMiddleware, memory_permission.go:140)

No transactional sync between SQL and in-memory registry. During register/unregister race windows, the two stores can return different AgentNodeID values for the same DID — causing false 403s or cross-agent writes.

Fix: (1) Unify DID resolution into a single source of truth. (2) Add startup assertion when write routes registered but no auth enabled. (3) Annotate resolved identity with provenance for audit logging.


Compound Analysis · confidence 82%

🤖 Reviewed by AgentField PR-AF

if callerDID := strings.TrimSpace(middleware.GetVerifiedCallerDID(c)); callerDID != "" {
return resolveExecutionNoteAgentIDByDID(ctx, storageProvider, callerDID)
}

if callerID, exists := c.Get(string(middleware.CallerAgentIDKey)); exists {
Copy link
Copy Markdown
Member

@santoshkumarradha santoshkumarradha May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 [IMPORTANT] Type-unsafe CallerAgentIDKey enables silent fallback to attacker headers

c.Set(string, any) accepts any value type — only convention enforces string. The handler at line 190 does id, ok := callerID.(string); on a non-string value, ok=false, id="", the inner block is skipped, and control falls through to raw X-Caller-Agent-ID / X-Agent-Node-ID headers (lines 196-201) with no log or error.

Attack path: an authenticated request hits a future middleware bug that writes a non-string to the key → identity silently discarded → handler reads attacker-controlled headers → ownership check passes if attacker knows the execution ID.

Proof: context_helpers_additional_test.go:19-20 shows c.Set(string(CallerAgentIDKey), 42) compiles cleanly and GetCallerAgentID returns "" — the language permits the bug.

Fix: (1) Typed setter SetCallerAgentID(c, agentID string) + forbidigo lint rule banning raw c.Set on this key. (2) When exists==true && ok==false, log WARN and return error — never fall to raw headers. (3) Remove the raw-header fallback entirely (see related comment).


Compound Analysis · confidence 70%

🤖 Reviewed by AgentField PR-AF

if id, ok := callerID.(string); ok {
if id = strings.TrimSpace(id); id != "" {
return id, nil
}
}
}
if agentID := strings.TrimSpace(c.GetHeader("X-Caller-Agent-ID")); agentID != "" {
return agentID, nil
}
if agentID := strings.TrimSpace(c.GetHeader("X-Agent-Node-ID")); agentID != "" {
return agentID, nil
}

return "", nil
}

func resolveExecutionNoteAgentIDByDID(ctx context.Context, storageProvider ExecutionNoteStorage, callerDID string) (string, error) {
Copy link
Copy Markdown
Member

@santoshkumarradha santoshkumarradha May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 [IMPORTANT] DID resolution reads differently-named columns from independent tables

resolveExecutionNoteAgentIDByDID (line 206) has two paths returning what should be the same identifier:

  • Path 1 (line 209): record.AgentID from did_documents.agent_id (migration 019)
  • Path 2 (line 226): info.AgentNodeID from agent_dids.agent_node_id (migration 002)

Both compared against execution.AgentNodeID. They converge today only because nodes_register.go:1027,1044 populates both from newNode.ID — pure code coincidence. No FK, unique constraint, or comment ties them. A future migration touching one but not the other silently breaks resolution, causing false 403s.

The naming divergence (AgentID vs AgentNodeID) obscures the invariant.

Fix: (1) Document the equivalence on DIDDocumentRecord.AgentID. (2) Rename to AgentNodeID for consistency. (3) Add invariant test that creates entries in both tables and verifies both paths return the same result.


Field-name divergence · confidence 85%

🤖 Reviewed by AgentField PR-AF

if lookup, ok := storageProvider.(executionNoteDIDDocumentLookup); ok {
Copy link
Copy Markdown
Member

@santoshkumarradha santoshkumarradha May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM] GetDIDDocument errors swallowed; transient failures masked as not-found

resolveExecutionNoteAgentIDByDID (lines 207-211) checks err == nil && record != nil. GetDIDDocument (local.go:8323-8328) returns the same error type for both not-found and DB failures with no sentinel. Transient DB failure → falls through to ListAgentDIDs → if that also returns empty, client gets 403 "caller agent identity is required" while the real cause is infrastructure. Error info is lost.

Fix: (a) sentinel ErrDIDNotFound so only genuine not-found triggers fallback; or (b) log the GetDIDDocument error before falling through so operators can detect degradation.


Reliability · confidence 85%

🤖 Reviewed by AgentField PR-AF

if record, err := lookup.GetDIDDocument(ctx, callerDID); err == nil && record != nil {
Copy link
Copy Markdown
Member

@santoshkumarradha santoshkumarradha May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH] Revoked DIDs still pass ownership check

At line 208, GetDIDDocument result is accepted on err == nil && record != nil with no record.IsRevoked() check. LocalStorage.GetDIDDocument (local.go:8310-8328) returns revoked records with RevokedAt populated but err==nil. Compare GetDIDDocumentByAgentID (local.go:8345) which adds AND revoked_at IS NULL.

A caller holding a revoked DID can still resolve to their old agent ID and pass ownership checks on former executions.

Fix: After successful retrieval, if record.IsRevoked() { /* fall through or error */ }. Or push the filter into the SQL query as GetDIDDocumentByAgentID does.


Security · confidence 95%

🤖 Reviewed by AgentField PR-AF

return strings.TrimSpace(record.AgentID), nil
}
}

lister, ok := storageProvider.(executionNoteAgentDIDLister)
if !ok {
return "", nil
}
agentDIDs, err := lister.ListAgentDIDs(ctx)
if err != nil {
return "", fmt.Errorf("failed to resolve caller DID: %w", err)
}
for _, info := range agentDIDs {
if info == nil {
continue
}
if strings.TrimSpace(info.DID) == callerDID {
return strings.TrimSpace(info.AgentNodeID), nil
}
}

return "", nil
}

// GetExecutionNotesHandler handles GET /api/v1/executions/:execution_id/notes
// Retrieves notes for a specific execution with optional tag filtering
func GetExecutionNotesHandler(storageProvider ExecutionNoteStorage) gin.HandlerFunc {
Copy link
Copy Markdown
Member

@santoshkumarradha santoshkumarradha May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 [IMPORTANT] Read path leaks execution notes — no ownership check

PR fixes IDOR on write path but GetExecutionNotesHandler (line 235) remains open: any API-key-authenticated caller can read any execution's notes by ID. Notes carry workflow state (phase progress, intermediate results, confidence reasoning — see examples/python_agent_nodes/agentic_rag/main.py:912-1097).

storageProvider.GetExecutionRecord() is called with no caller identity resolution or comparison against execution.AgentNodeID — same IDOR pattern just fixed on write.

PR mentions this is "deliberately NOT modified" but provides no rationale, no tests, no code comment. Likely oversight.

Fix: Mirror write path — resolve caller via executionNoteCallerAgentID, then ensureExecutionNoteOwnership, return 403 on mismatch. If intentional, document with code comment + test confirming open-read is the contract.


Authorization Asymmetry · confidence 95%

🤖 Reviewed by AgentField PR-AF

Expand Down
Loading
Loading