diff --git a/CHANGELOG.md b/CHANGELOG.md index ef6381f..5d24ba8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,48 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - ARM NEON SIMD support (Go 1.26 `simd/archsimd` intrinsics — [#120](https://github.com/coregx/coregex/issues/120)) - SIMD prefilter for CompositeSequenceDFA (#83) +## [0.12.8] - 2026-03-10 + +### Performance +- **Streaming ReplaceAll — single-pass without `[][]int` allocation** (Issue [#135](https://github.com/coregx/coregex/issues/135)) — + `ReplaceAllStringFunc`, `ReplaceAllFunc`, `ReplaceAllLiteral`, and `ReplaceAllLiteralString` + converted from two-pass (collect all match indices → iterate) to single-pass streaming. + Eliminates `[][]int` allocation for high-match-count inputs (e.g., 800KB for 50K matches). + Returns original string when no matches (Cow-like optimization, avoids copy). + +- **DFA-first FindSubmatchAt — PikeVM on match span only** (Issue [#135](https://github.com/coregx/coregex/issues/135)) — + Implements Rust-style two-phase search for capture extraction: + Phase 1: DFA/strategy finds match boundaries `[start, end]` — O(n) fast scan. + Phase 2: PikeVM runs anchored within `[start..end]` for captures — O(match_len). + Reduces PikeVM work from O(remaining_haystack) to O(match_len) per match. + For 50K matches on 10MB: ~400x less PikeVM work. Also adds `is_capture_search_needed` + optimization: when only group 0 is needed, PikeVM is skipped entirely. + +- **FindAllSubmatch state reuse** — acquires `SearchState` once for entire iteration loop, + eliminating per-match `sync.Pool` get/put overhead. Critical for race detector performance. + +### Fixed +- **FindAllSubmatch context loss** — `FindAllSubmatch` previously sliced the haystack + (`haystack[pos:]`), losing lookbehind context for `\b` word boundary assertions at + match boundaries. Now uses `FindSubmatchAt` with full haystack preservation. + +- **BoundedBacktracker stack overflow on 386/macOS** — two-phase search routed through + `BoundedBacktracker` for Phase 1, causing recursive stack overflow on large inputs + with deep UTF-8 NFA chains (386/macOS 250MB stack limit). Fix: strategies using + BoundedBacktracker and NFA bypass two-phase search, going directly to pooled PikeVM. + +- **`\B` false positive at end of input** — `SearchWithCapturesAt` at `at==len(haystack)` + used `matchesEmpty()` which evaluates with `nil,0`, losing lookbehind context. + For `\B` at position 2 of "xx", left='x' (word char) means word boundary, so `\B` + should NOT match — but context loss caused a false positive. Fix: uses + `matchesEmptyAt(haystack, at)` to preserve full context. + +- **Data race in concurrent FindSubmatch** — strategies `UseDFA`, `UseBoth`, and + `UseDigitPrefilter` access shared mutable state (`e.dfa` lazy DFA, `e.pikevm`) in + their `findIndicesAt` dispatch paths. Concurrent `FindSubmatch` calls raced on this + shared state. Fix: these strategies bypass two-phase search, going directly to + pooled `state.pikevm.SearchWithCapturesAt()` which is thread-safe by design. + ## [0.12.7] - 2026-03-10 ### Performance diff --git a/ROADMAP.md b/ROADMAP.md index 81249e7..89df4a5 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -2,7 +2,7 @@ > **Strategic Focus**: Production-grade regex engine with RE2/rust-regex level optimizations -**Last Updated**: 2026-03-10 | **Current Version**: v0.12.7 | **Target**: v1.0.0 stable +**Last Updated**: 2026-03-10 | **Current Version**: v0.12.8 | **Target**: v1.0.0 stable --- @@ -12,7 +12,7 @@ Build a **production-ready, high-performance regex engine** for Go that matches ### Current State vs Target -| Metric | Current (v0.12.7) | Target (v1.0.0) | +| Metric | Current (v0.12.8) | Target (v1.0.0) | |--------|-------------------|-----------------| | Inner literal speedup | **280-3154x** | ✅ Achieved | | Case-insensitive speedup | **263x** | ✅ Achieved | @@ -70,7 +70,9 @@ v0.12.5 ✅ → Non-greedy quantifier fix, ReverseSuffix correctness (#124) ↓ v0.12.6 ✅ → BoundedBacktracker span-based CanHandle, ReplaceAllStringFunc O(n) (#127) ↓ -v0.12.7 (Current) ✅ → PikeVM sparse-dispatch for dot patterns, 2.8-4.8x speedup (#132) +v0.12.7 ✅ → PikeVM sparse-dispatch for dot patterns, 2.8-4.8x speedup (#132) + ↓ +v0.12.8 (Current) ✅ → Streaming ReplaceAll + DFA-first FindSubmatchAt (#135) ↓ v1.0.0-rc → Feature freeze, API locked ↓ @@ -106,6 +108,8 @@ v1.0.0 STABLE → Production release with API stability guarantee - ✅ **v0.12.5**: Non-greedy quantifier fix, ReverseSuffix forward verification (#124) - ✅ **v0.12.6**: BoundedBacktracker span-based CanHandle, ReplaceAllStringFunc O(n) (#127) - ✅ **v0.12.7**: PikeVM sparse-dispatch for `.` patterns, 2.8-4.8x speedup (#132) +- ✅ **v0.12.8**: Streaming ReplaceAll + DFA-first FindSubmatchAt, Rust-style two-phase search (#135) +- ✅ **v0.12.8**: Streaming ReplaceAll + DFA-first FindSubmatchAt, Rust-style two-phase search (#135) --- @@ -197,7 +201,7 @@ v1.0.0 STABLE → Production release with API stability guarantee ## Feature Comparison Matrix -| Feature | RE2 | rust-regex | coregex v0.12.7 | coregex v1.0 | +| Feature | RE2 | rust-regex | coregex v0.12.8 | coregex v1.0 | |---------|-----|------------|-----------------|--------------| | Lazy DFA | ✅ | ✅ | ✅ | ✅ | | Thompson NFA | ✅ | ✅ | ✅ | ✅ | @@ -355,7 +359,8 @@ Reference implementations available locally: | Version | Date | Type | Key Changes | |---------|------|------|-------------| -| **v0.12.7** | 2026-03-10 | Performance | **PikeVM sparse-dispatch for `.` patterns, 2.8-4.8x speedup (#132)** | +| **v0.12.8** | 2026-03-10 | Performance | **Streaming ReplaceAll + DFA-first FindSubmatchAt (#135)** | +| v0.12.8 | 2026-03-10 | Performance | PikeVM sparse-dispatch for `.` patterns, 2.8-4.8x speedup (#132) | | v0.12.6 | 2026-03-08 | Fix | BoundedBacktracker span-based CanHandle, ReplaceAllStringFunc O(n) (#127) | | v0.12.5 | 2026-03-08 | Fix | Non-greedy quantifier fix, ReverseSuffix correctness (#124) | | v0.12.4 | 2026-03-01 | Test | Test coverage 80%+, CI improvements, awesome-go readiness | @@ -396,4 +401,4 @@ Reference implementations available locally: --- -*Current: v0.12.7 | Next: v0.13.0 | Target: v1.0.0* +*Current: v0.12.8 | Target: v1.0.0* diff --git a/meta/findall.go b/meta/findall.go index 10e109d..ba80453 100644 --- a/meta/findall.go +++ b/meta/findall.go @@ -38,31 +38,92 @@ func (e *Engine) FindSubmatch(haystack []byte) *MatchWithCaptures { // This method is used by ReplaceAll* operations to correctly handle anchors like ^. // Unlike FindSubmatch, it takes the FULL haystack and a starting position. // Thread-safe: uses pooled state for both OnePass cache and PikeVM. +// +// Two-phase search (Rust-style optimization): +// +// Phase 1: DFA/strategy finds match boundaries [start, end] — O(n) fast scan +// Phase 2: PikeVM extracts captures within [start, end] — O(match_len) +// +// This reduces PikeVM work from O(remaining_haystack) to O(match_len) per match. +// For 50K matches on 10MB input: ~400x less PikeVM work. func (e *Engine) FindSubmatchAt(haystack []byte, at int) *MatchWithCaptures { - // Get pooled state first for thread-safe access + if at > len(haystack) { + return nil + } + + // Get pooled state for thread-safe access state := e.getSearchState() defer e.putSearchState(state) - // For position 0, try OnePass DFA if available (10-20x faster for anchored patterns) + return e.findSubmatchAtWithState(haystack, at, state) +} + +// findSubmatchAtWithState is the state-reusing internal version of FindSubmatchAt. +// Used by FindAllSubmatch to avoid per-match sync.Pool get/put overhead. +func (e *Engine) findSubmatchAtWithState(haystack []byte, at int, state *SearchState) *MatchWithCaptures { + // For position 0, try OnePass DFA if available (10-20x faster for anchored patterns). + // OnePass handles captures natively — no need for two-phase search. if at == 0 && e.onepass != nil && state.onepassCache != nil { atomic.AddUint64(&e.stats.OnePassSearches, 1) slots := e.onepass.Search(haystack, state.onepassCache) if slots != nil { - // Convert flat slots [start0, end0, start1, end1, ...] to nested captures captures := slotsToCaptures(slots) return NewMatchWithCaptures(haystack, captures) } - // OnePass failed (input doesn't match from position 0) - // Fall through to PikeVM which can find match anywhere + // OnePass failed — fall through to two-phase search } - atomic.AddUint64(&e.stats.NFASearches, 1) + // Strategies that must bypass two-phase search and go directly to PikeVM: + // + // Thread-safety: UseDFA, UseBoth, UseDigitPrefilter access shared mutable state + // (e.dfa lazy DFA, e.pikevm) that is NOT safe for concurrent access. + // findSubmatchAtWithState is called with a pooled SearchState, but Phase 1 + // dispatches to findIndicesDFAAt/findIndicesAdaptiveAt/findIndicesDigitPrefilterAt + // which use e.dfa and e.pikevm directly, causing data races. + // + // Performance: UseNFA Phase 1 uses the same PikeVM as Phase 2, so two-phase + // adds overhead without benefit. + // + // Safety: UseBoundedBacktracker's recursive implementation can overflow the + // stack on large inputs with deep UTF-8 NFA chains (386/macOS 250MB limit). + switch e.strategy { + case UseBoundedBacktracker, UseNFA, + UseDFA, UseBoth, UseDigitPrefilter: + atomic.AddUint64(&e.stats.NFASearches, 1) + nfaMatch := state.pikevm.SearchWithCapturesAt(haystack, at) + if nfaMatch == nil { + return nil + } + return NewMatchWithCaptures(haystack, nfaMatch.Captures) + } - nfaMatch := state.pikevm.SearchWithCapturesAt(haystack, at) - if nfaMatch == nil { + // Phase 1: Use DFA/strategy to find match boundaries. + // This is the fast O(n) scan that locates [start, end] without captures. + start, end, found := e.findIndicesAtWithState(haystack, at, state) + if !found { return nil } + // Optimization: if only group 0 is needed (no sub-captures), skip PikeVM. + // The DFA result already provides exact [start, end] boundaries. + if e.nfa.CaptureCount() <= 1 { + captures := [][]int{{start, end}} + return NewMatchWithCaptures(haystack, captures) + } + + // Phase 2: PikeVM extracts captures within the narrow [start, end] span. + // The full haystack is passed for lookbehind context (\b at span boundary), + // but PikeVM only processes bytes within [start, end]. + atomic.AddUint64(&e.stats.NFASearches, 1) + nfaMatch := state.pikevm.SearchWithCapturesInSpan(haystack, start, end) + if nfaMatch == nil { + // Defensive fallback: DFA found a match but PikeVM disagrees. + nfaMatch = state.pikevm.SearchWithCapturesAt(haystack, at) + if nfaMatch == nil { + return nil + } + } + return NewMatchWithCaptures(haystack, nfaMatch.Captures) } @@ -253,6 +314,10 @@ func (e *Engine) Count(haystack []byte, n int) int { // FindAllSubmatch returns all successive matches with capture group information. // If n > 0, returns at most n matches. If n <= 0, returns all matches. // +// Uses DFA-first two-phase search: DFA finds match boundaries, then PikeVM +// extracts captures within the narrow match span. This reduces PikeVM work +// from O(remaining_haystack) to O(match_len) per match. +// // Example: // // engine, _ := meta.Compile(`(\w+)@(\w+)\.(\w+)`) @@ -265,35 +330,46 @@ func (e *Engine) FindAllSubmatch(haystack []byte, n int) []*MatchWithCaptures { var matches []*MatchWithCaptures pos := 0 + lastMatchEnd := -1 + + // Get state ONCE for entire iteration — eliminates sync.Pool overhead per match. + // Critical for race detector performance (10+ minute timeout without this). + state := e.getSearchState() + defer e.putSearchState(state) for pos <= len(haystack) { - // Use PikeVM for capture extraction - atomic.AddUint64(&e.stats.NFASearches, 1) - nfaMatch := e.pikevm.SearchWithCaptures(haystack[pos:]) - if nfaMatch == nil { + match := e.findSubmatchAtWithState(haystack, pos, state) + if match == nil { break } - // Adjust captures to absolute positions - // Captures is [][]int where each element is [start, end] for a group - adjustedCaptures := make([][]int, len(nfaMatch.Captures)) - for i, cap := range nfaMatch.Captures { - if len(cap) >= 2 && cap[0] >= 0 { - adjustedCaptures[i] = []int{pos + cap[0], pos + cap[1]} - } else { - adjustedCaptures[i] = nil // Unmatched group + matchStart := match.Start() + matchEnd := match.End() + + // Skip empty matches at the end of previous non-empty match (stdlib behavior) + //nolint:gocritic // badCond: intentional - checking empty match at lastMatchEnd + if matchStart == matchEnd && matchStart == lastMatchEnd { + pos++ + if pos > len(haystack) { + break } + continue } - match := NewMatchWithCaptures(haystack, adjustedCaptures) matches = append(matches, match) + // Track non-empty match ends for the skip rule + if matchStart != matchEnd { + lastMatchEnd = matchEnd + } + // Move position past this match - end := nfaMatch.End - if end > 0 { - pos += end - } else { - // Empty match: advance by 1 to avoid infinite loop + switch { + case matchStart == matchEnd: + pos = matchEnd + 1 + case matchEnd > pos: + pos = matchEnd + default: pos++ } diff --git a/nfa/pikevm.go b/nfa/pikevm.go index 7e5e13d..9fcd7ae 100644 --- a/nfa/pikevm.go +++ b/nfa/pikevm.go @@ -910,8 +910,10 @@ func (p *PikeVM) SearchWithCapturesAt(haystack []byte, at int) *MatchWithCapture } if at == len(haystack) { - // At end of input - check if empty string matches - if p.matchesEmpty() { + // At end of input - check if empty string matches at this position. + // Must use matchesEmptyAt with full haystack context for correct + // look assertion evaluation (e.g., \B needs previous byte context). + if p.matchesEmptyAt(haystack, at) { return &MatchWithCaptures{ Start: at, End: at, @@ -922,8 +924,8 @@ func (p *PikeVM) SearchWithCapturesAt(haystack []byte, at int) *MatchWithCapture } if len(haystack) == 0 { - // Check if empty string matches - if p.matchesEmpty() { + // Check if empty string matches (haystack is empty, pos=0) + if p.matchesEmptyAt(haystack, 0) { return &MatchWithCaptures{ Start: 0, End: 0, @@ -1085,6 +1087,97 @@ func (p *PikeVM) searchAtWithCaptures(haystack []byte, startPos int) *MatchWithC return nil } +// SearchWithCapturesInSpan searches for a match anchored at spanStart, +// not exceeding spanEnd. The full haystack is preserved for lookbehind +// context (e.g., \b word boundary assertions at spanStart-1). +// +// This implements Phase 2 of the DFA-first two-phase search: +// +// Phase 1: DFA/strategy finds match boundaries [spanStart, spanEnd] +// Phase 2: PikeVM extracts captures within [spanStart, spanEnd] +// +// The search is anchored: threads are seeded only at spanStart, not at +// every position. This reduces PikeVM work from O(remaining_haystack) +// to O(match_len) per match. +// +// Preconditions: +// - 0 <= spanStart <= spanEnd <= len(haystack) +// - A match is known to exist in [spanStart, spanEnd] (from Phase 1) +// +// Returns nil if no match is found (should not happen if Phase 1 is correct). +// +//nolint:gocognit // Merged match-check + step loop (Rust's nexts pattern) is inherently complex +func (p *PikeVM) SearchWithCapturesInSpan(haystack []byte, spanStart, spanEnd int) *MatchWithCaptures { + if spanStart > spanEnd || spanEnd > len(haystack) { + return nil + } + + // Reset state + p.internalState.Queue = p.internalState.Queue[:0] + p.internalState.NextQueue = p.internalState.NextQueue[:0] + p.internalState.Visited.Clear() + + // Seed thread only at spanStart (anchored search within span) + caps := p.newCaptures() + p.addThread(thread{state: p.nfa.StartAnchored(), startPos: spanStart, captures: caps}, haystack, spanStart) + + lastMatchPos := -1 + var lastMatchCaptures []int + + // Process bytes from spanStart to spanEnd (not len(haystack)). + // The full haystack slice is kept so that addThread/step can evaluate + // lookbehind assertions (\b) using bytes before spanStart. + for pos := spanStart; pos <= spanEnd; pos++ { + if pos < spanEnd { + b := haystack[pos] + p.internalState.Visited.Clear() + for _, t := range p.internalState.Queue { + if p.nfa.IsMatch(t.state) { + if pos > lastMatchPos || lastMatchPos == -1 { + lastMatchPos = pos + lastMatchCaptures = t.captures.copyData() + } + if !p.internalState.Longest { + break + } + continue + } + p.step(t, b, haystack, pos+1) + } + } else { + // At spanEnd: only check for match states, don't step further + for _, t := range p.internalState.Queue { + if p.nfa.IsMatch(t.state) { + if pos > lastMatchPos || lastMatchPos == -1 { + lastMatchPos = pos + lastMatchCaptures = t.captures.copyData() + } + break + } + } + } + + if len(p.internalState.NextQueue) == 0 && (pos >= spanEnd || lastMatchPos != -1) { + break + } + + if pos >= spanEnd { + break + } + + p.internalState.Queue, p.internalState.NextQueue = p.internalState.NextQueue, p.internalState.Queue[:0] + } + + if lastMatchPos != -1 { + return &MatchWithCaptures{ + Start: spanStart, + End: lastMatchPos, + Captures: p.buildCapturesResult(lastMatchCaptures, spanStart, lastMatchPos), + } + } + return nil +} + // buildCapturesResult converts internal capture slots to the result format func (p *PikeVM) buildCapturesResult(caps []int, matchStart, matchEnd int) [][]int { numGroups := p.nfa.CaptureCount() diff --git a/regex.go b/regex.go index 0a86eb0..4795229 100644 --- a/regex.go +++ b/regex.go @@ -841,38 +841,64 @@ func (r *Regex) FindAllStringIndex(s string, n int) [][]int { // result := re.ReplaceAllLiteral([]byte("age: 42"), []byte("XX")) // // result = []byte("age: XX") func (r *Regex) ReplaceAllLiteral(src, repl []byte) []byte { - indices := r.FindAllIndex(src, -1) - if len(indices) == 0 { - // No matches, return copy of src - result := make([]byte, len(src)) - copy(result, src) - return result - } + var result []byte + lastEnd := 0 + pos := 0 + lastMatchEnd := -1 + matched := false - // Pre-allocate result buffer - // Estimate: len(src) + (len(repl)-avgMatchLen)*numMatches - totalMatchLen := 0 - for _, idx := range indices { - totalMatchLen += idx[1] - idx[0] - } - avgMatchLen := totalMatchLen / len(indices) - estimatedLen := len(src) + (len(repl)-avgMatchLen)*len(indices) - if estimatedLen < 0 { - estimatedLen = len(src) - } + for { + start, end, found := r.engine.FindIndicesAt(src, pos) + if !found { + break + } - result := make([]byte, 0, estimatedLen) - lastEnd := 0 + // Skip empty matches at the position where a non-empty match just ended. + // This matches Go stdlib behavior (see FindAllIndex for details). + //nolint:gocritic // badCond: intentional - checking empty match at lastMatchEnd + if start == end && start == lastMatchEnd { + pos++ + if pos > len(src) { + break + } + continue + } - for _, idx := range indices { - // Append text before match - result = append(result, src[lastEnd:idx[0]]...) - // Append replacement + if !matched { + // Lazy allocation on first match + result = make([]byte, 0, len(src)) + matched = true + } + + result = append(result, src[lastEnd:start]...) result = append(result, repl...) - lastEnd = idx[1] + lastEnd = end + + if start != end { + lastMatchEnd = end + } + + switch { + case start == end: + pos = end + 1 + case end > pos: + pos = end + default: + pos++ + } + + if pos > len(src) { + break + } + } + + if !matched { + // No matches: return a copy of src (stdlib compatibility) + out := make([]byte, len(src)) + copy(out, src) + return out } - // Append remaining text result = append(result, src[lastEnd:]...) return result } @@ -887,7 +913,61 @@ func (r *Regex) ReplaceAllLiteral(src, repl []byte) []byte { // result := re.ReplaceAllLiteralString("age: 42", "XX") // // result = "age: XX" func (r *Regex) ReplaceAllLiteralString(src, repl string) string { - return string(r.ReplaceAllLiteral([]byte(src), []byte(repl))) + b := stringToBytes(src) + var buf strings.Builder + lastEnd := 0 + pos := 0 + lastMatchEnd := -1 + matched := false + + for { + start, end, found := r.engine.FindIndicesAt(b, pos) + if !found { + break + } + + //nolint:gocritic // badCond: intentional - checking empty match at lastMatchEnd + if start == end && start == lastMatchEnd { + pos++ + if pos > len(src) { + break + } + continue + } + + if !matched { + buf.Grow(len(src)) + matched = true + } + + buf.WriteString(src[lastEnd:start]) + buf.WriteString(repl) + lastEnd = end + + if start != end { + lastMatchEnd = end + } + + switch { + case start == end: + pos = end + 1 + case end > pos: + pos = end + default: + pos++ + } + + if pos > len(src) { + break + } + } + + if !matched { + return src + } + + buf.WriteString(src[lastEnd:]) + return buf.String() } // Expand appends template to dst and returns the result; during the @@ -1107,27 +1187,61 @@ func (r *Regex) ReplaceAllString(src, repl string) string { // }) // // result = []byte("2 4 6") func (r *Regex) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { - indices := r.FindAllIndex(src, -1) - if len(indices) == 0 { - // No matches, return copy of src - result := make([]byte, len(src)) - copy(result, src) - return result - } - var result []byte lastEnd := 0 + pos := 0 + lastMatchEnd := -1 + matched := false - for _, idx := range indices { - // Append text before match - result = append(result, src[lastEnd:idx[0]]...) - // Apply replacement function - replacement := repl(src[idx[0]:idx[1]]) - result = append(result, replacement...) - lastEnd = idx[1] + for { + start, end, found := r.engine.FindIndicesAt(src, pos) + if !found { + break + } + + //nolint:gocritic // badCond: intentional - checking empty match at lastMatchEnd + if start == end && start == lastMatchEnd { + pos++ + if pos > len(src) { + break + } + continue + } + + if !matched { + result = make([]byte, 0, len(src)) + matched = true + } + + result = append(result, src[lastEnd:start]...) + result = append(result, repl(src[start:end])...) + lastEnd = end + + if start != end { + lastMatchEnd = end + } + + switch { + case start == end: + pos = end + 1 + case end > pos: + pos = end + default: + pos++ + } + + if pos > len(src) { + break + } + } + + if !matched { + // No matches: return a copy of src (stdlib compatibility) + out := make([]byte, len(src)) + copy(out, src) + return out } - // Append remaining text result = append(result, src[lastEnd:]...) return result } @@ -1146,19 +1260,57 @@ func (r *Regex) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { // }) // // result = "2 4 6" func (r *Regex) ReplaceAllStringFunc(src string, repl func(string) string) string { - indices := r.FindAllStringIndex(src, -1) - if len(indices) == 0 { - return src - } - + b := stringToBytes(src) var buf strings.Builder - buf.Grow(len(src)) lastEnd := 0 + pos := 0 + lastMatchEnd := -1 + matched := false - for _, idx := range indices { - buf.WriteString(src[lastEnd:idx[0]]) - buf.WriteString(repl(src[idx[0]:idx[1]])) - lastEnd = idx[1] + for { + start, end, found := r.engine.FindIndicesAt(b, pos) + if !found { + break + } + + //nolint:gocritic // badCond: intentional - checking empty match at lastMatchEnd + if start == end && start == lastMatchEnd { + pos++ + if pos > len(src) { + break + } + continue + } + + if !matched { + buf.Grow(len(src)) + matched = true + } + + buf.WriteString(src[lastEnd:start]) + buf.WriteString(repl(src[start:end])) + lastEnd = end + + if start != end { + lastMatchEnd = end + } + + switch { + case start == end: + pos = end + 1 + case end > pos: + pos = end + default: + pos++ + } + + if pos > len(src) { + break + } + } + + if !matched { + return src } buf.WriteString(src[lastEnd:])