Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions policylang/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,17 @@ func (cp *CompiledPolicy) EvaluatePeerExpr(ruleName string, actionIdx int, peerC
return runProgram(prog, peerCtx)
}

// FailClosed reports whether the engine should deny/reject on expression
// evaluation errors. When nil in PolicyDocument, defaults to true
// (fail-closed — more secure). Set fail_closed: false to restore legacy
// fail-open behaviour.
func (cp *CompiledPolicy) FailClosed() bool {
if cp.Doc.FailClosed == nil {
return true // default: fail-closed
}
return *cp.Doc.FailClosed
}

// HasRulesFor returns true if the policy has any rules for the given event type.
func (cp *CompiledPolicy) HasRulesFor(eventType EventType) bool {
for _, cr := range cp.rules {
Expand Down
6 changes: 6 additions & 0 deletions policylang/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,15 @@ type Rule struct {
// DefaultVerdict controls gate-event behavior when no rule produces a
// verdict. Valid values: "allow" (default, backwards-compatible) or "deny"
// (default-deny; write explicit allow rules).
//
// FailClosed controls behavior when an expression evaluation error occurs.
// When nil (default), the engine fails-closed (DENY on gate errors, skip
// actions on cycle/join/leave errors). Set fail_closed: false in the JSON
// to restore legacy fail-open semantics.
type PolicyDocument struct {
Version int `json:"version"`
DefaultVerdict string `json:"default_verdict,omitempty"`
FailClosed *bool `json:"fail_closed,omitempty"`
Config map[string]interface{} `json:"config,omitempty"`
Rules []Rule `json:"rules"`
}
Expand Down
27 changes: 24 additions & 3 deletions runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,17 @@ func mergeTags(nodeInfoTags, policyTags []string) []string {
func (pr *PolicyRunner) EvaluateGate(eventType EventType, ctx map[string]interface{}) bool {
dirs, err := pr.compiled.Evaluate(eventType, ctx)
if err != nil {
slog.Warn("policy: gate eval error", "network_id", pr.netID, "event", eventType, "err", err)
return true // fail open on error
if pr.compiled.FailClosed() {
slog.Error("policy: gate eval error — denying (fail_closed)", "network_id", pr.netID, "event", eventType, "err", err)
pr.runtime.PublishEvent("policy.eval_error", map[string]interface{}{
"network_id": pr.netID,
"event": eventType,
"error": err.Error(),
})
return false
}
slog.Warn("policy: gate eval error — allowing (fail_open)", "network_id", pr.netID, "event", eventType, "err", err)
return true
}

// Execute side effects (tag, etc.) before the verdict.
Expand Down Expand Up @@ -258,6 +267,9 @@ func (pr *PolicyRunner) EvaluateGate(eventType EventType, ctx map[string]interfa
func (pr *PolicyRunner) evaluatePerPeerCycle(ctx map[string]interface{}) {
dirs, err := pr.compiled.Evaluate(EventCycle, ctx)
if err != nil {
if pr.compiled.FailClosed() {
slog.Error("policy: per-peer cycle eval error — skipping (fail_closed)", "network_id", pr.netID, "err", err)
}
return
}
for _, d := range dirs {
Expand All @@ -272,7 +284,16 @@ func (pr *PolicyRunner) evaluatePerPeerCycle(ctx map[string]interface{}) {
func (pr *PolicyRunner) EvaluateActions(eventType EventType, ctx map[string]interface{}) {
dirs, err := pr.compiled.Evaluate(eventType, ctx)
if err != nil {
slog.Warn("policy: action eval error", "network_id", pr.netID, "event", eventType, "err", err)
if pr.compiled.FailClosed() {
slog.Error("policy: action eval error — skipping directives (fail_closed)", "network_id", pr.netID, "event", eventType, "err", err)
pr.runtime.PublishEvent("policy.eval_error", map[string]interface{}{
"network_id": pr.netID,
"event": eventType,
"error": err.Error(),
})
} else {
slog.Warn("policy: action eval error — skipping directives (fail_open)", "network_id", pr.netID, "event", eventType, "err", err)
}
return
}

Expand Down
70 changes: 49 additions & 21 deletions zz_audit_defensive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,12 @@ func TestPin_DefaultVerdictUnrecognized_RejectedByValidate(t *testing.T) {
}
}

// TestPin_EvaluateGate_FailOpenOnEvalError pins the documented fail-open
// behaviour when the expression engine returns an error mid-evaluation.
// The reasoning is in runner.go: "// fail open on error" — flipping this to
// fail-closed without a deliberate review would brick live connections.
// Pinning the current behaviour so the trade-off is an explicit choice next
// time, not an accident.
func TestPin_EvaluateGate_FailOpenOnEvalError(t *testing.T) {
// TestPin_EvaluateGate_FailClosedOnEvalError pins the fail-closed default
// behaviour (PILOT-262): when the expression engine returns an error
// mid-evaluation and fail_closed is not explicitly set, the gate must
// DENY (fail-closed) — the more secure default.
func TestPin_EvaluateGate_FailClosedOnEvalError(t *testing.T) {
t.Parallel()
// Use a rule whose match references a function that exists at
// compile but blows up at runtime — duration("nope") returns an
// error inside expr, which surfaces from runProgram.
doc := &PolicyDocument{
Version: 1,
Rules: []Rule{
Expand All @@ -136,14 +131,46 @@ func TestPin_EvaluateGate_FailOpenOnEvalError(t *testing.T) {
if err != nil {
t.Fatalf("Compile: %v", err)
}
pr := &PolicyRunner{netID: 1, compiled: cp, peers: map[uint32]*managedPeer{}}
// Default (no fail_closed field) → fail-closed → deny on error.
if cp.FailClosed() != true {
t.Fatal("default FailClosed must be true")
}
pr := &PolicyRunner{netID: 1, compiled: cp, peers: map[uint32]*managedPeer{}, runtime: &fakeRuntime{}}
if pr.EvaluateGate(EventConnect, map[string]interface{}{
"port": 80, "peer_id": 1, "network_id": 1,
"peer_tags": []string{}, "peer_age_s": 0.0, "members": 0,
}) {
t.Fatal("fail-closed default: eval error must deny, got allow")
}
}

// Eval error → EvaluateGate returns true (fail-open).
// TestPin_EvaluateGate_FailOpenExplicit preserves the legacy fail-open
// pathway when fail_closed: false is explicitly set in the policy document.
func TestPin_EvaluateGate_FailOpenExplicit(t *testing.T) {
t.Parallel()
f := false
doc := &PolicyDocument{
Version: 1,
FailClosed: &f,
Rules: []Rule{
{Name: "boom", On: EventConnect, Match: `duration("nope") > 0`, Actions: []Action{
{Type: ActionDeny},
}},
},
}
cp, err := Compile(doc)
if err != nil {
t.Fatalf("Compile: %v", err)
}
if cp.FailClosed() != false {
t.Fatal("explicit fail_closed: false must report false")
}
pr := &PolicyRunner{netID: 1, compiled: cp, peers: map[uint32]*managedPeer{}, runtime: &fakeRuntime{}}
if !pr.EvaluateGate(EventConnect, map[string]interface{}{
"port": 80, "peer_id": 1, "network_id": 1,
"peer_tags": []string{}, "peer_age_s": 0.0, "members": 0,
}) {
t.Fatal("fail-open contract: eval error must allow, got deny")
t.Fatal("explicit fail_open: eval error must allow, got deny")
}
}

Expand Down Expand Up @@ -205,14 +232,15 @@ func TestPin_ExprTimeout_GateBoundedLatency(t *testing.T) {
// than tearing down the goroutine. The pkg-level test in policylang already
// covers this; this is the integration-level guard from the runner side so
// the *whole stack* is exercised, not just the helper.
//
// Uses fail_closed: false so the legacy allow-on-error verdict path stays
// tested. The no-panic invariant is what matters here.
func TestPin_ExprTimeout_PanicSurfacesAsError(t *testing.T) {
t.Parallel()
// Compile a rule whose expression accesses a field absent from ctx — under
// expr.AllowUndefinedVariables this becomes nil and dereference is the
// usual panic candidate. If it doesn't panic in this expr version, the
// test still passes (no crash is the invariant).
failOpen := false
doc := &PolicyDocument{
Version: 1,
Version: 1,
FailClosed: &failOpen,
Rules: []Rule{
{Name: "boom", On: EventConnect,
Match: `peer_tags[10] == "x"`, // OOB on empty peer_tags
Expand All @@ -223,19 +251,19 @@ func TestPin_ExprTimeout_PanicSurfacesAsError(t *testing.T) {
if err != nil {
t.Fatalf("Compile: %v", err)
}
pr := &PolicyRunner{netID: 1, compiled: cp, peers: map[uint32]*managedPeer{}}
pr := &PolicyRunner{netID: 1, compiled: cp, peers: map[uint32]*managedPeer{}, runtime: &fakeRuntime{}}

// Must not panic.
defer func() {
if r := recover(); r != nil {
t.Fatalf("BUG: gate panicked instead of fail-open: %v", r)
t.Fatalf("BUG: gate panicked: %v", r)
}
}()
if !pr.EvaluateGate(EventConnect, map[string]interface{}{
"port": 80, "peer_id": 1, "network_id": 1,
"peer_tags": []string{}, "peer_age_s": 0.0, "members": 0,
}) {
t.Fatal("OOB index → expected fail-open allow")
t.Fatal("explicit fail_open: OOB → must allow")
}
}

Expand Down
Loading