Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
e223b43
fix(covenantsigner): rename misleading test after resilient loading c…
piotr-roslaniec Apr 9, 2026
1ed010f
fix(covenantsigner): use errors.Is for errJobNotFound comparison in Poll
piotr-roslaniec Apr 9, 2026
3157827
fix(covenantsigner): restrict healthz auth bypass to GET method
piotr-roslaniec Apr 9, 2026
b1482fe
docs(covenantsigner): warn against CLI flag for AuthToken
piotr-roslaniec Apr 9, 2026
5cf862d
fix(covenantsigner): add aggregate load summary with skip count
piotr-roslaniec Apr 9, 2026
cc69434
fix(covenantsigner): remove superseded job from byRequestID on dedup
piotr-roslaniec Apr 9, 2026
065519c
fix(covenantsigner): use deterministic tiebreaker when both timestamp…
piotr-roslaniec Apr 9, 2026
495caf3
fix(covenantsigner): poison route keys from skipped jobs to preserve …
piotr-roslaniec Apr 9, 2026
9eb4194
fix(covenantsigner): extract Submit critical section into createOrDedup
piotr-roslaniec Apr 9, 2026
07b9bcc
fix(covenantsigner): cancel service context on init failure and OS si…
piotr-roslaniec Apr 9, 2026
272b661
docs(covenantsigner): document advisory flock limitations and storage…
piotr-roslaniec Apr 9, 2026
795f50b
fix(tbtc): improve error messages and docs for degraded wallet registry
piotr-roslaniec Apr 9, 2026
054e261
fix(tbtc): use canonicaljson.Marshal for handoff payload hash
piotr-roslaniec Apr 9, 2026
5ef7617
fix(covenantsigner): correctly distinguish single vs both unparseable…
piotr-roslaniec Apr 9, 2026
c9f9544
fix(covenantsigner): correct context preservation test to use service…
piotr-roslaniec Apr 9, 2026
0c15ba1
fix(covenantsigner): remove subsystem signal handler that steals proc…
piotr-roslaniec Apr 9, 2026
7615a85
fix(covenantsigner): clear route poison when a valid job loads for sa…
piotr-roslaniec Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pkg/chain/ethereum/tbtc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1478,8 +1478,10 @@ func (tc *TbtcChain) GetWallet(

walletRegistryWallet, err := tc.walletRegistry.GetWallet(wallet.EcdsaWalletID)
if err != nil {
logger.Warnf(
"cannot get wallet registry data for wallet [0x%x]: [%v]",
logger.Errorf(
"wallet registry unavailable for wallet [0x%x]; "+
"MembersIDsHash will be zero -- signer approval "+
"operations will fail until the registry recovers: [%v]",
wallet.EcdsaWalletID,
err,
)
Expand Down
4 changes: 3 additions & 1 deletion pkg/covenantsigner/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ type Config struct {
// binds to. Empty defaults to loopback-only.
ListenAddress string
// AuthToken enables static Bearer authentication for signer endpoints.
// Non-loopback binds must set this.
// Non-loopback binds must set this. Prefer environment variables or
// config files over CLI flags to avoid exposing the token in
// /proc/PID/cmdline.
AuthToken string
// EnableSelfV1 exposes the self_v1 signer HTTP routes. Keep this disabled
// for a qc_v1-first launch unless self_v1 has cleared its own go-live gate.
Expand Down
3 changes: 2 additions & 1 deletion pkg/covenantsigner/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ func Initialize(

listener, err := net.Listen("tcp", server.httpServer.Addr)
if err != nil {
cancelService()
return nil, false, fmt.Errorf("failed to bind covenant signer port [%d]: %w", config.Port, err)
}

Expand Down Expand Up @@ -251,7 +252,7 @@ func newHandler(service *Service, serviceCtx context.Context, authToken string,
}

return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/healthz" {
if r.Method == http.MethodGet && r.URL.Path == "/healthz" {
mux.ServeHTTP(w, r)
return
}
Expand Down
18 changes: 8 additions & 10 deletions pkg/covenantsigner/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ func TestSubmitHandlerPreCancelledContextStillSucceeds(t *testing.T) {

type contextKey string

func TestSubmitHandlerPreservesContextValues(t *testing.T) {
func TestSubmitHandlerPreservesServiceContextValues(t *testing.T) {
const testKey contextKey = "test-trace-id"
const testValue = "trace-abc-123"

Expand All @@ -889,15 +889,13 @@ func TestSubmitHandlerPreservesContextValues(t *testing.T) {
t.Fatal(err)
}

// Wrap the handler with middleware that injects a value into the request
// context. The detached context should preserve this value.
innerHandler := newHandler(service, context.Background(), "", true)
wrappedHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
enrichedCtx := context.WithValue(r.Context(), testKey, testValue)
innerHandler.ServeHTTP(w, r.WithContext(enrichedCtx))
})
// Inject a value into the service context. The submit handler derives
// its timeout context from serviceCtx (not from the HTTP request), so
// values on the service context must be visible to the engine.
serviceCtx := context.WithValue(context.Background(), testKey, testValue)
handler := newHandler(service, serviceCtx, "", true)

server := httptest.NewServer(wrappedHandler)
server := httptest.NewServer(handler)
defer server.Close()

submitPayload := mustJSON(t, SignerSubmitInput{
Expand Down Expand Up @@ -925,7 +923,7 @@ func TestSubmitHandlerPreservesContextValues(t *testing.T) {
defer mu.Unlock()
if capturedValue != testValue {
t.Fatalf(
"expected context value %q to be preserved through detachment, "+
"expected service context value %q to be visible in engine, "+
"got %v",
testValue,
capturedValue,
Expand Down
109 changes: 63 additions & 46 deletions pkg/covenantsigner/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"reflect"
"sync"
Expand Down Expand Up @@ -229,50 +230,28 @@ func (s *Service) loadPollJob(route TemplateID, input SignerPollInput) (*Job, er
return job, nil
}

func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubmitInput) (StepResult, error) {
submitValidationOptions := validationOptions{
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
depositorTrustRoots: s.depositorTrustRoots,
custodianTrustRoots: s.custodianTrustRoots,
requireFreshMigrationPlanQuote: true,
migrationPlanQuoteVerificationNow: s.now(),
signerApprovalVerifier: s.signerApprovalVerifier,
}
if err := validateSubmitInput(route, input, submitValidationOptions); err != nil {
return StepResult{}, err
}

normalizedRequest, err := normalizeRouteSubmitRequest(
input.Request,
validationOptions{
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
depositorTrustRoots: s.depositorTrustRoots,
custodianTrustRoots: s.custodianTrustRoots,
signerApprovalVerifier: s.signerApprovalVerifier,
},
)
if err != nil {
return StepResult{}, err
}

requestDigest, err := requestDigestFromNormalized(normalizedRequest)
if err != nil {
return StepResult{}, err
}

// createOrDedup creates a new job under the service mutex, or returns the
// existing job result if the route request is already known. Returns
// (job, nil, nil) for a new job, or (nil, result, nil) for a dedup hit.
func (s *Service) createOrDedup(
route TemplateID,
input SignerSubmitInput,
normalizedRequest RouteSubmitRequest,
requestDigest string,
) (*Job, *StepResult, error) {
s.mutex.Lock()
defer s.mutex.Unlock()

if existing, ok, err := s.store.GetByRouteRequest(route, input.RouteRequestID); err != nil {
s.mutex.Unlock()
return StepResult{}, err
return nil, nil, err
} else if ok {
if existing.RequestDigest != requestDigest {
s.mutex.Unlock()
return StepResult{}, &inputError{
return nil, nil, &inputError{
"routeRequestId already exists with a different request payload",
}
}
s.mutex.Unlock()
return mapJobResult(existing), nil
result := mapJobResult(existing)
return nil, &result, nil
}

requestIDPrefix := ""
Expand All @@ -282,14 +261,12 @@ func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubm
case TemplateSelfV1:
requestIDPrefix = "kcs_self"
default:
s.mutex.Unlock()
return StepResult{}, fmt.Errorf("unsupported route: %s", route)
return nil, nil, fmt.Errorf("unsupported route: %s", route)
}

requestID, err := newRequestID(requestIDPrefix)
if err != nil {
s.mutex.Unlock()
return StepResult{}, err
return nil, nil, err
}

now := s.now()
Expand All @@ -309,10 +286,50 @@ func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubm
}

if err := s.store.Put(job); err != nil {
s.mutex.Unlock()
return nil, nil, err
}

return job, nil, nil
}

func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubmitInput) (StepResult, error) {
submitValidationOptions := validationOptions{
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
depositorTrustRoots: s.depositorTrustRoots,
custodianTrustRoots: s.custodianTrustRoots,
requireFreshMigrationPlanQuote: true,
migrationPlanQuoteVerificationNow: s.now(),
signerApprovalVerifier: s.signerApprovalVerifier,
}
if err := validateSubmitInput(route, input, submitValidationOptions); err != nil {
return StepResult{}, err
}
s.mutex.Unlock()

normalizedRequest, err := normalizeRouteSubmitRequest(
input.Request,
validationOptions{
migrationPlanQuoteTrustRoots: s.migrationPlanQuoteTrustRoots,
depositorTrustRoots: s.depositorTrustRoots,
custodianTrustRoots: s.custodianTrustRoots,
signerApprovalVerifier: s.signerApprovalVerifier,
},
)
if err != nil {
return StepResult{}, err
}

requestDigest, err := requestDigestFromNormalized(normalizedRequest)
if err != nil {
return StepResult{}, err
}

job, existingResult, err := s.createOrDedup(route, input, normalizedRequest, requestDigest)
if err != nil {
return StepResult{}, err
}
if existingResult != nil {
return *existingResult, nil
}

transition, err := s.engine.OnSubmit(ctx, job)
if err != nil {
Expand All @@ -329,7 +346,7 @@ func (s *Service) Submit(ctx context.Context, route TemplateID, input SignerSubm
s.mutex.Lock()
defer s.mutex.Unlock()

currentJob, ok, err := s.store.GetByRequestID(requestID)
currentJob, ok, err := s.store.GetByRequestID(job.RequestID)
if err != nil {
return StepResult{}, err
}
Expand Down Expand Up @@ -378,7 +395,7 @@ func (s *Service) Poll(ctx context.Context, route TemplateID, input SignerPollIn

transition, pollErr := s.engine.OnPoll(ctx, job)
if pollErr != nil {
if pollErr != errJobNotFound {
if !errors.Is(pollErr, errJobNotFound) {
return StepResult{}, pollErr
}
}
Expand All @@ -398,7 +415,7 @@ func (s *Service) Poll(ctx context.Context, route TemplateID, input SignerPollIn
return mapJobResult(currentJob), nil
}

if pollErr == errJobNotFound {
if errors.Is(pollErr, errJobNotFound) {
applyTransition(currentJob, &Transition{
State: JobStateFailed,
Reason: ReasonJobNotFound,
Expand Down
Loading
Loading