lightningnetwork · yyforyongyu · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/docs/release-notes/release-notes-0.21.1.md b/docs/release-notes/release-notes-0.21.1.md
@@ -0,0 +1,62 @@
+# Release Notes
+- [Bug Fixes](#bug-fixes)
+- [New Features](#new-features)
+    - [Functional Enhancements](#functional-enhancements)
+    - [RPC Additions](#rpc-additions)
+    - [lncli Additions](#lncli-additions)
+- [Improvements](#improvements)
+    - [Functional Updates](#functional-updates)
+    - [RPC Updates](#rpc-updates)
+    - [lncli Updates](#lncli-updates)
+    - [Breaking Changes](#breaking-changes)
+    - [Performance Improvements](#performance-improvements)
+    - [Deprecations](#deprecations)
+- [Technical and Architectural Updates](#technical-and-architectural-updates)
+    - [BOLT Spec Updates](#bolt-spec-updates)
+    - [Testing](#testing)
+    - [Database](#database)
+    - [Code Health](#code-health)
+    - [Tooling and Documentation](#tooling-and-documentation)
+- [Contributors (Alphabetical Order)](#contributors-alphabetical-order)
+
+# Bug Fixes
+
+* The sweeper now isolates singleton inputs from a rejected sweep batch when
+  `testmempoolaccept` returns an unattributed non-fee mempool or script error,
+  avoiding fatal failure of the entire batch when only one input is invalid.
+
+# New Features
+
+## Functional Enhancements
+
+## RPC Additions
+
+## lncli Additions
+
+# Improvements
+
+## Functional Updates
+
+## RPC Updates
+
+## lncli Updates
+
+## Breaking Changes
+
+## Performance Improvements
+
+## Deprecations
+
+# Technical and Architectural Updates
+
+## BOLT Spec Updates
+
+## Testing
+
+## Database
+
+## Code Health
+
+## Tooling and Documentation
+
+# Contributors (Alphabetical Order)
diff --git a/sweep/README.md b/sweep/README.md
@@ -111,6 +111,23 @@ perform an RBF again in the coming blocks.
 for the first time, unless its budget has been used up, `TxPublisher` will
 guarantee that the initial publish meets the RBF requirements.
 
+`TxPublisher` is also responsible for diagnosing failures that require access to
+the exact transaction candidate and the chain backend's mempool acceptance
+oracle. If `testmempoolaccept` reports missing inputs, `TxPublisher` uses spend
+notifications to identify inputs spent by another transaction and reports them
+as `TxUnknownSpend`. If a multi-input batch is rejected with an unattributed
+non-fee mempool or script error, `TxPublisher` can build no-broadcast probe
+transactions for subsets of the original inputs to identify any input that fails
+by itself. Probe transactions are only tested for mempool acceptance; they are
+not published, stored, or monitored.
+
+`UtxoSweeper` owns pending input state and applies these diagnoses. Missing or
+unknown-spent inputs are removed through the existing `TxUnknownSpend` flow. For
+diagnosed mempool/script failures, only singleton inputs reported as bad are
+marked fatal; the remaining inputs in the batch are marked publish-failed so
+they can be retried by normal clustering. If probing is skipped or inconclusive,
+the batch remains retryable unless the failure is a singleton fatal error.
+
 #### `FeeFunction`
 
 `FeeFunction` is an interface that specifies a function over a starting fee
@@ -210,4 +227,3 @@ outputs minus the sum of their budgets. By default, 50% of this value is used
 as the budget, to customize it, either use
 `--sweeper.budget.anchorcpfp` to specify sats, or use
 `--sweeper.budget.anchorcpfpratio` to specify a ratio.
-
diff --git a/sweep/fee_bumper.go b/sweep/fee_bumper.go
@@ -47,6 +47,15 @@ var (
 	// ErrInputMissing is returned when a given input no longer exists,
 	// e.g., spending from an orphan tx.
 	ErrInputMissing = errors.New("input no longer exists")
+
+	// errMempoolRejected marks errors that came from mempool acceptance
+	// checks. It is used internally to avoid probing unrelated construction
+	// or signing errors.
+	errMempoolRejected = errors.New("mempool rejected tx")
+
+	// errBadInputNotFound is returned when bad-input diagnosis finishes
+	// without finding a singleton input that fails mempool acceptance.
+	errBadInputNotFound = errors.New("bad input not found")
 )
 
 var (
@@ -281,6 +290,12 @@ type BumpResult struct {
 	// current tx to be failed.
 	SpentInputs map[wire.OutPoint]*wire.MsgTx
 
+	// BadInputs are inputs that failed a singleton mempool acceptance
+	// probe. The fee bumper only diagnoses one bad input per failed batch,
+	// so this slice contains at most one outpoint. A nil slice means no bad
+	// input was diagnosed.
+	BadInputs []wire.OutPoint
+
 	// requestID is the ID of the request that created this record.
 	requestID uint64
 }
@@ -600,10 +615,6 @@ func (t *TxPublisher) createRBFCompliantTx(
 				}
 			}
 
-		// TODO(yy): suppose there's only one bad input, we can do a
-		// binary search to find out which input is causing this error
-		// by recreating a tx using half of the inputs and check its
-		// mempool acceptance.
 		default:
 			log.Debugf("Failed to create RBF-compliant tx: %v", err)
 			return nil, err
@@ -673,8 +684,139 @@ func (t *TxPublisher) createAndCheckTx(r *monitorRecord) (*sweepTxCtx, error) {
 		return sweepCtx, ErrInputMissing
 	}
 
-	return sweepCtx, fmt.Errorf("tx=%v failed mempool check: %w",
-		sweepCtx.tx.TxHash(), err)
+	return sweepCtx, fmt.Errorf("%w: tx=%v failed mempool check: %w",
+		errMempoolRejected, sweepCtx.tx.TxHash(), err)
+}
+
+// shouldDiagnoseBadInputs returns true if a mempool rejection should be
+// isolated with no-broadcast subset probes.
+func (t *TxPublisher) shouldDiagnoseBadInputs(r *monitorRecord,
+	err error) bool {
+
+	// Only mempool rejections can be diagnosed with mempool probes. Other
+	// errors happen during construction or signing and keep their existing
+	// fatal handling.
+	if !errors.Is(err, errMempoolRejected) {
+		return false
+	}
+
+	// A singleton rejection already identifies the only input in the batch,
+	// so there is nothing to isolate with subset probes.
+	if len(r.req.Inputs) <= 1 {
+		return false
+	}
+
+	// Aux sweepers may derive addresses or other sweep details from the
+	// full input set, so probing subsets would bypass their custom logic.
+	if t.cfg.AuxSweeper.IsSome() {
+		log.Infof(
+			"Skipping bad-input diagnosis for requestID=%v: aux "+
+				"sweeper is active", r.requestID,
+		)
+
+		return false
+	}
+
+	return true
+}
+
+// probeInputSet builds and mempool-tests a sweep transaction for the given
+// inputs without publishing, storing, or monitoring it.
+func (t *TxPublisher) probeInputSet(r *monitorRecord,
+	inputs []input.Input) error {
+
+	sweepCtx, err := t.createSweepTx(
+		inputs, r.req.DeliveryAddress, r.feeFunction.FeeRate(),
+	)
+	if err != nil {
+		return fmt.Errorf("create probe sweep tx: %w", err)
+	}
+
+	err = t.cfg.Wallet.CheckMempoolAcceptance(sweepCtx.tx)
+	if err == nil {
+		return nil
+	}
+
+	// Missing-input failures have a dedicated handler that can inspect the
+	// spend and retry the unspent inputs. Keep that flow distinct from the
+	// generic mempool rejection sentinel below.
+	if errors.Is(err, chain.ErrMissingInputs) {
+		log.Debugf("Probe tx %v missing inputs", sweepCtx.tx.TxHash())
+
+		return ErrInputMissing
+	}
+
+	log.Infof("Probe tx=%v with %v inputs failed mempool check: %v",
+		sweepCtx.tx.TxHash(), len(inputs), err)
+
+	return errMempoolRejected
+}
+
+// findBadInput binary-searches a rejected input batch with no-broadcast
+// mempool probes to find a single input that fails by itself. Each iteration
+// probes the left half of the current set. If that half is rejected, the search
+// narrows left; otherwise it searches the right half. The search stops as
+// soon as a singleton mempool rejection is found. If the final singleton is
+// accepted, then no individual bad input could be identified.
+func (t *TxPublisher) findBadInput(r *monitorRecord) (wire.OutPoint, error) {
+	var search func([]input.Input) (wire.OutPoint, error)
+	search = func(inputs []input.Input) (wire.OutPoint, error) {
+		switch len(inputs) {
+		case 0:
+			return wire.OutPoint{}, errBadInputNotFound
+
+		case 1:
+			err := t.probeInputSet(r, inputs)
+			switch {
+			case err == nil:
+				log.Warnf(
+					"Bad-input diagnosis for requestID=%v "+
+						"found no singleton bad input",
+					r.requestID,
+				)
+
+				return wire.OutPoint{}, errBadInputNotFound
+
+			case errors.Is(err, ErrInputMissing):
+				return wire.OutPoint{}, ErrInputMissing
+
+			case errors.Is(err, errMempoolRejected):
+				return inputs[0].OutPoint(), nil
+
+			default:
+				log.Warnf(
+					"Stop diagnosis: requestID=%v, "+
+						"singleton probe err: %v",
+					r.requestID, err,
+				)
+
+				return wire.OutPoint{}, err
+			}
+		}
+
+		mid := len(inputs) / 2
+		left := inputs[:mid]
+		err := t.probeInputSet(r, left)
+		switch {
+		case err == nil:
+			return search(inputs[mid:])
+
+		case errors.Is(err, ErrInputMissing):
+			return wire.OutPoint{}, ErrInputMissing
+
+		case errors.Is(err, errMempoolRejected):
+			return search(left)
+
+		default:
+			log.Warnf("Stopping bad-input diagnosis for "+
+				"requestID=%v: probe for %v inputs failed: %v",
+				r.requestID, len(left), err)
+
+			return wire.OutPoint{}, err
+		}
+	}
+
+	return search(r.req.Inputs)
 }
 
 // handleMissingInputs handles the case when the chain backend reports back a
@@ -714,7 +856,7 @@ func (t *TxPublisher) handleMissingInputs(r *monitorRecord) *BumpResult {
 	// current sweeping tx has been failed due to missing inputs, the
 	// spending tx must be a different tx, thus it should NOT be matched. We
 	// perform a sanity check here to catch the unexpected state.
-	if !t.isUnknownSpent(r, spends) {
+	if r.tx != nil && !t.isUnknownSpent(r, spends) {
 		log.Errorf("Sweeping tx %v has missing inputs, yet the "+
 			"spending tx is the sweeping tx itself: %v",
 			r.tx.TxHash(), r.spentInputs)
@@ -1090,6 +1232,41 @@ func (t *TxPublisher) handleTxConfirmed(r *monitorRecord) {
 	t.handleResult(result)
 }
 
+// handleBadInputs handles a non-fee mempool rejection by trying to identify a
+// single input that fails mempool acceptance by itself.
+func (t *TxPublisher) handleBadInputs(r *monitorRecord,
+	err error) *BumpResult {
+
+	result := &BumpResult{
+		Err:       err,
+		requestID: r.requestID,
+	}
+
+	if !t.shouldDiagnoseBadInputs(r, err) {
+		result.Event = TxFatal
+
+		return result
+	}
+
+	badInput, probeErr := t.findBadInput(r)
+	if errors.Is(probeErr, ErrInputMissing) {
+		return t.handleMissingInputs(r)
+	}
+
+	result.Event = TxFailed
+	if r.feeFunction != nil {
+		result.FeeRate = r.feeFunction.FeeRate()
+	}
+
+	if probeErr != nil {
+		return result
+	}
+
+	result.BadInputs = []wire.OutPoint{badInput}
+
+	return result
+}
+
 // handleInitialTxError takes the error from `initializeTx` and decides the
 // bump event. It will construct a BumpResult and handles it.
 func (t *TxPublisher) handleInitialTxError(r *monitorRecord, err error) {
@@ -1142,15 +1319,11 @@ func (t *TxPublisher) handleInitialTxError(r *monitorRecord, err error) {
 	case errors.Is(err, ErrInputMissing):
 		result = t.handleMissingInputs(r)
 
-	// Otherwise this is not a fee-related error and the tx cannot be
-	// retried. In that case we will fail ALL the inputs in this tx, which
-	// means they will be removed from the sweeper and never be tried
-	// again.
-	//
-	// TODO(yy): Find out which input is causing the failure and fail that
-	// one only.
+	// Otherwise this may be a non-fee mempool rejection. For multi-input
+	// batches, try to isolate singleton bad inputs before deciding whether
+	// the whole set is fatal.
 	default:
-		result.Event = TxFatal
+		result = t.handleBadInputs(r, err)
 	}
 
 	t.handleResult(result)