Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/raised/caching.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func (self *cacheSlot[K, V]) setVal(ck K, val V) {
return
}

for i := range self.next % maxCached {
for i := range min(self.next, maxCached) {
if ck == self.keys[i] {
self.vals[i] = val
return
Expand Down
35 changes: 8 additions & 27 deletions pkg/raised/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@
// An optional ERROR(n) prefix embeds a numeric code for lightweight
// classification:
//
// type myPkg struct{}
// type pkg struct{}
//
// var (
// ErrNotFound = raised.NewSentinelError[myPkg]("ERROR(1) not found")
// ErrBadRequest = raised.NewSentinelError[myPkg]("ERROR(2) bad request")
// ErrNotFound = raised.NewSentinelError[pkg]("ERROR(1) not found")
// ErrBadRequest = raised.NewSentinelError[pkg]("ERROR(2) bad request")
// )
//
// # Tracing
//
// Trace and TraceAt wrap any error in a propagation trace. Each call records
// Trace wraps any error in a propagation trace. Each call records
// the call site and an optional message. If the error is already a raised
// Error it is extended in place; otherwise a new trace is created with the
// error as its root cause:
Expand All @@ -56,18 +56,6 @@
// fmt.Printf("%+v\n", err)
// }
//
// TraceAt is a hot-path variant that caches the call site PC, avoiding
// repeated runtime.Callers calls:
//
// const flk = 1 // unique constant within the package
//
// func process(item Item) error {
// if err := validate(item); err != nil {
// return raised.TraceAt(flk, err, "process item")
// }
// // ...
// }
//
// # Classification
//
// When a package receives a foreign error it can assert its own sentinel
Expand All @@ -89,18 +77,18 @@
//
// # Error identity and keying
//
// A Keyer computes a stable ErrorKey for a raised Error, derived from its
// An ErrorKeyer computes a stable ErrorKey for a raised Error, derived from its
// propagation path and terminal root cause. Two errors sharing the same
// ErrorKey represent the same problem: identical code path and equivalent
// root cause. This is useful for error aggregation, deduplication, and
// monitoring.
//
// A Keyer is scoped to the sentinel family T, consistent with the phantom
// An ErrorKeyer is scoped to the sentinel family T, consistent with the phantom
// type used for sentinel declaration:
//
// type myPkg struct {}
// type pkg struct {}
//
// var Keyer, _ = raised.NewKeyer[myPkg]()
// var Keyer, _ = raised.NewErrorKeyer[pkg](nil)
//
// func handle(err error) {
// key, ok := Keyer.Key(err)
Expand All @@ -112,11 +100,4 @@
// The ErrorKey is stable across process restarts and hosts as long as the
// source code has not changed — it is derived from file/line strings rather
// than runtime memory addresses.
//
// # Reliability
//
// raised relies on the Go runtime's pclntab for file/line resolution.
// pclntab survives standard production build flags including -s -w and
// -trimpath. File/line resolution will degrade only under deliberate
// obfuscation tools such as garble.
package raised
48 changes: 47 additions & 1 deletion pkg/raised/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ type errTrace struct {
// total Trace call count including compressed-out entries.
next int

// epc (entry PC) holds the first program counter that is module local.
// due to trace compression epc may not be present in pcs.
epc uintptr

// pcs holds the program counter for each recorded Trace call site.
pcs [traceSize]uintptr

Expand Down Expand Up @@ -350,6 +354,7 @@ func (self *errTrace) snapshot(dst *errTraceSnapshot) {
dst.cause = self.cause
dst.class = self.class
dst.next = self.next
dst.epc = self.epc
dst.pcs = self.pcs
}

Expand All @@ -358,6 +363,7 @@ type errTraceSnapshot struct {
cause error
class SentinelError
next int
epc uintptr
pcs [traceSize]uintptr
}

Expand All @@ -383,6 +389,26 @@ func (self *errTraceSnapshot) Unwrap() []error {
return rv
}

// entryPoint returns the program counter of the module entry point for this trace.
// If epc was recorded, it is returned directly as it represents the first module-local
// call site encountered during error propagation, which is the most efficient location
// for a Classify call to stabilize the error Key.
// If epc is zero, falls back to the most recent PC in the trace as a best-effort approximation.
func (self *errTraceSnapshot) entryPoint() uintptr {
if 0 != self.epc {
return self.epc
}

// use most recent pc as entry point
// we may do better iterating pcs in reverse order, looking at pc package until it is different than pcs[c]...
c := min(self.next, traceSize) - 1
if c >= 0 {
return self.pcs[c]
} else {
return 0
}
}

// getFileLines resolves a slice of PCs to formatted file/line strings.
func getFileLines(pcs []uintptr) []string {
if 0 == len(pcs) {
Expand Down Expand Up @@ -441,7 +467,7 @@ var buildModPath string

func init() {
if info, ok := debug.ReadBuildInfo(); ok {
buildModPath = info.Main.Path + "/"
buildModPath = info.Main.Path + "/" // if info.Main.Path is "", buildModPath is not a valid pkgpath prefix.
}
}

Expand Down Expand Up @@ -469,13 +495,28 @@ func addCallerInfo[K ~int](err *errTrace, flk K, msg string, skip int) {
err.pcs[pos] = pc
err.msgs[pos] = strings.TrimSpace(msg)

// record module "entry point"
if err.epc == 0 && isLocal(pc) {
err.epc = pc
}

// clear cached summary & trace
err._summary = ""
err._trace = ""

err.next += 1
}

// isLocal returns true if pc is a program counter within "project" module.
// TODO: we need an heuristic in case module is undefined.
func isLocal(pc uintptr) bool {
// FuncForPC does not alloc where as CallersFrames do
if fn := runtime.FuncForPC(pc - 1); fn != nil && pc > 0 {
return strings.HasPrefix(fn.Name(), buildModPath)
}
return false
}

// ---
// program counter resolution caching

Expand Down Expand Up @@ -546,6 +587,11 @@ func init() {
}
}

// L1Key is a stable identifier for an error's propagation path, derived from
// the module entry point, the recorded propagation PCs, and the total Trace
// call count.
type L1Key = [2 + traceSize]uintptr

// traceL1Key is the stable outer key for traceCache, derived from the recorded
// PCs and total turn count. Two errTrace values sharing this key took the same
// code path and map to the same cacheSlot.
Expand Down
121 changes: 98 additions & 23 deletions pkg/raised/keying.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"slices"
)

// keySize is the number of bytes in an ErrorKey.
const keySize = 16

// ErrorKey is a fixed-size hash derived from an error's propagation path and
Expand All @@ -19,35 +20,86 @@ type ErrorKey = [keySize]byte
// The hash must produce at least keySize bytes.
type HashFunc = func() hash.Hash

// Keyer computes a stable ErrorKey for a raised Error.
type Keyer interface {
// Key returns a stable ErrorKey for err and true if err is a raised Error
// with a resolvable terminal cause. Returns a zero ErrorKey and false
// if err is not a raised Error or has no resolvable terminal cause.
// ErrorKeyer computes a stable ErrorKey for a raised Error.
// The key is derived from the error's propagation path and terminal root cause,
// independently of any dynamic context embedded in error messages.
// Key returns true only when a key could be determined.
type ErrorKeyer interface {
// Key returns an ErrorKey and a bool indicating if the key could be determined.
Key(error) (ErrorKey, bool)

isErrorKeyer() bool
}

// NewKeyer returns a Keyer using SHA256 as the default hash function,
// NewErrorKeyer returns an ErrorKeyer using SHA256 as the default hash function,
// scoped to the sentinel family identified by the phantom type T.
func NewKeyer[T any]() (Keyer, error) {
func NewErrorKeyer[T any](ukl UnstableKeyListener) (ErrorKeyer, error) {
// sha256 should provide better collision resistance than fnv128
return NewSentinelKeyer[T](sha256.New)
return NewSentinelErrorKeyer[T](sha256.New, ukl)
}

// UnstableKeyEvent is delivered to an UnstableKeyListener when the ErrorKey
// for a given code path could not be stably determined, indicating that the
// terminal cause varies across calls originating from the same location.
// This typically occurs when a foreign error embeds transient state — such as
// a request ID or a dynamic value — in its message, preventing key stabilisation.
//
// The most efficient fix is to call Classify on the propagating Error at the
// EntryPoint location, asserting a stable sentinel identity that overrides the
// unstable foreign cause.
type UnstableKeyEvent struct {
// Error is the full raised Error for which a stable key could not be determined.
Error Error

// K1 is the stable propagation path key for Error.
// K1 remains constant for a given code path and can be used by the listener to
// track instability frequency per origin site.
K1 L1Key

// Key is the ErrorKey that was derived for Error on this call.
// It may differ across calls originating from the same code path.
Key ErrorKey

// EntryPoint is the program counter of the module entry site for Error.
// This is the recommended location at which to call Classify in order to
// assert a stable sentinel identity and resolve the instability.
EntryPoint uintptr
}

// UnstableKeyListener is implemented by types that wish to observe key instability.
type UnstableKeyListener interface {
// OnUnstableKey is called each time an ErrorKey fluctuates for a given
// code path, with the associated event.
OnUnstableKey(UnstableKeyEvent)
}

// UnstableKeyListenerFunc is an adapter type to allow the use of ordinary
// functions as UnstableKeyListener.
type UnstableKeyListenerFunc func(UnstableKeyEvent)

// OnUnstableKey calls self.
func (self UnstableKeyListenerFunc) OnUnstableKey(evt UnstableKeyEvent) {
self(evt)
}

// sentinelKeyer is the Keyer implementation scoped to sentinel family T.
type sentinelKeyer[T any] struct {
// sentinelErrorKeyer is the ErrorKeyer implementation scoped to sentinel family T.
// It is immutable after construction.
type sentinelErrorKeyer[T any] struct {
// hf is the hash factory used to compute ErrorKeys.
hf HashFunc

// tc caches computed ErrorKeys keyed by code path and terminal cause string,
// amortizing the cost of hash computation and file/line resolution on hot paths.
tc *keyCache

// ukl is the optional listener notified on each cache miss. May be nil.
ukl UnstableKeyListener
}

// NewSentinelKeyer returns a Keyer scoped to the sentinel family identified
// NewSentinelErrorKeyer returns a ErrorKeyer scoped to the sentinel family identified
// by the phantom type T, using hf as the hash function.
// Returns ErrInvalidHash if hf is nil or produces fewer than keySize bytes.
func NewSentinelKeyer[T any](hf HashFunc) (Keyer, error) {
func NewSentinelErrorKeyer[T any](hf HashFunc, ukl UnstableKeyListener) (ErrorKeyer, error) {
// validate hf
if nil == hf {
return nil, Trace(ErrInvalidHash, "nil hash function")
Expand All @@ -57,17 +109,17 @@ func NewSentinelKeyer[T any](hf HashFunc) (Keyer, error) {
return nil, Trace(ErrInvalidHash, "insufficient hash size %d < %d", h.Size(), keySize)
}

sk := sentinelKeyer[T]{hf: hf, tc: &keyCache{clock: ticks}}
sk := sentinelErrorKeyer[T]{hf: hf, tc: &keyCache{clock: ticks}, ukl: ukl}

return &sk, nil
}

// Key computes a stable ErrorKey for err. err must be a raised Error produced
// by Trace or TraceAt. The key is derived from the error's propagation path
// by Trace. The key is derived from the error's propagation path
// (as file/line strings) and the terminal cause resolved via UnwrapTerminal[T].
// Results are cached by code path and terminal cause string.
// Returns false if err is not a raised Error or has no resolvable terminal cause.
func (self *sentinelKeyer[T]) Key(err error) (ErrorKey, bool) {
func (self *sentinelErrorKeyer[T]) Key(err error) (ErrorKey, bool) {
erk := ErrorKey{}

// abort if err is not an *errTrace
Expand All @@ -86,10 +138,21 @@ func (self *sentinelKeyer[T]) Key(err error) (ErrorKey, bool) {
}

// determine caching keys
k1 := traceL1Key{turnCount: snp.next, pcs: snp.pcs}
cause := trm.Error() // less noisy than ert.cause which can be any error...

erk, rs := self.tc.Get(k1, cause)
k1 := L1Key{}
k1[0] = snp.epc
copy(k1[1:(1+traceSize)], snp.pcs[:])
k1[1+traceSize] = uintptr(snp.next) // not a valid PC, used to simplify k1

k2 := ""
stn, ok := trm.(SentinelError)
if ok {
k2 = stn.Fingerprint()
} else {
k2 = trm.Error() // less noisy than ert.cause which can be any error...
}

erk, rs := self.tc.Get(k1, k2)
if cchHit == rs {
return erk, true
}
Expand All @@ -102,9 +165,9 @@ func (self *sentinelKeyer[T]) Key(err error) (ErrorKey, bool) {
// ---
// cause component
hs.Write([]byte("CS"))
binary.BigEndian.PutUint64(ib[:], uint64(len(cause)))
binary.BigEndian.PutUint64(ib[:], uint64(len(k2)))
hs.Write(ib[:])
hs.Write([]byte(cause))
hs.Write([]byte(k2))

// ---
// next component
Expand All @@ -123,12 +186,14 @@ func (self *sentinelKeyer[T]) Key(err error) (ErrorKey, bool) {
case flc > traceSize:
flc = traceSize
}
flc += 1 // pc are read from k1 which is [epc|pcs...|next]
hs.Write([]byte("FLC"))
binary.BigEndian.PutUint64(ib[:], uint64(flc))
hs.Write(ib[:])

// hash each fileline in ert code path
fls := getFileLines(snp.pcs[:flc])
// flc allows excluding next which is not a valid pc
fls := getFileLines(k1[:flc])
for _, fln := range fls {
hs.Write([]byte("FLN"))
binary.BigEndian.PutUint64(ib[:], uint64(len(fln)))
Expand All @@ -141,15 +206,25 @@ func (self *sentinelKeyer[T]) Key(err error) (ErrorKey, bool) {
copy(erk[:], hs.Sum(nil))

if rs == cchMissCacheNew {
self.tc.Set(k1, cause, erk)
self.tc.Set(k1, k2, erk)
}

// dispatch new KeyMissEvent...
if nil != self.ukl {
evt := UnstableKeyEvent{Error: ert, K1: k1, Key: erk, EntryPoint: snp.entryPoint()}
self.ukl.OnUnstableKey(evt)
}

return erk, true

}

func (self *sentinelErrorKeyer[T]) isErrorKeyer() bool {
return true
}

// keyCache is a timedCache mapping (code path, terminal cause string) to ErrorKey.
type keyCache = timedCache[traceL1Key, string, ErrorKey]
type keyCache = timedCache[L1Key, string, ErrorKey]

// UnwrapTerminal returns "minimal" error obtained by recursively unwrapping err or
// casting err to Sentinel[T], SentinelError...
Expand Down
Loading
Loading