@@ -279,11 +279,12 @@ func buildCharClassSearchers(
279279 strategy Strategy ,
280280 re * syntax.Regexp ,
281281 nfaEngine * nfa.NFA ,
282+ btNFA * nfa.NFA , // NFA for BoundedBacktracker (runeNFA when available, else nfaEngine)
282283) charClassSearcherResult {
283284 result := charClassSearcherResult {finalStrategy : strategy }
284285
285286 if strategy == UseBoundedBacktracker {
286- result .boundedBT = nfa .NewBoundedBacktracker (nfaEngine )
287+ result .boundedBT = nfa .NewBoundedBacktracker (btNFA )
287288 }
288289
289290 if strategy == UseCharClassSearcher {
@@ -298,7 +299,7 @@ func buildCharClassSearchers(
298299 } else {
299300 // Fallback to BoundedBacktracker if extraction fails
300301 result .finalStrategy = UseBoundedBacktracker
301- result .boundedBT = nfa .NewBoundedBacktracker (nfaEngine )
302+ result .boundedBT = nfa .NewBoundedBacktracker (btNFA )
302303 }
303304 }
304305
@@ -309,7 +310,7 @@ func buildCharClassSearchers(
309310 if result .compositeSrch == nil {
310311 // Fallback to BoundedBacktracker if extraction fails
311312 result .finalStrategy = UseBoundedBacktracker
312- result .boundedBT = nfa .NewBoundedBacktracker (nfaEngine )
313+ result .boundedBT = nfa .NewBoundedBacktracker (btNFA )
313314 } else {
314315 // Try to build faster DFA (uses subset construction for overlapping patterns)
315316 result .compositeSeqDFA = nfa .NewCompositeSequenceDFA (re )
@@ -334,7 +335,7 @@ func buildCharClassSearchers(
334335 if result .branchDispatcher == nil {
335336 // Fallback to BoundedBacktracker if dispatch not possible
336337 result .finalStrategy = UseBoundedBacktracker
337- result .boundedBT = nfa .NewBoundedBacktracker (nfaEngine )
338+ result .boundedBT = nfa .NewBoundedBacktracker (btNFA )
338339 }
339340 }
340341
@@ -343,12 +344,63 @@ func buildCharClassSearchers(
343344 // generation-based visited tracking (O(1) reset) vs PikeVM's thread queues.
344345 // This is similar to how stdlib uses backtracking for simple patterns.
345346 if result .finalStrategy == UseNFA && result .boundedBT == nil && nfaEngine .States () < 50 {
346- result .boundedBT = nfa .NewBoundedBacktracker (nfaEngine )
347+ result .boundedBT = nfa .NewBoundedBacktracker (btNFA )
347348 }
348349
349350 return result
350351}
351352
353+ // buildDotOptimizedNFAs compiles optimized NFA variants for patterns with '.'.
354+ // Returns:
355+ // - asciiNFA: NFA with '.' compiled as single ASCII byte range (for ASCII-only input)
356+ // - asciiBT: BoundedBacktracker for asciiNFA
357+ // - runeNFA: NFA with '.' compiled as sparse dispatch (fewer split states for PikeVM)
358+ func buildDotOptimizedNFAs (
359+ re * syntax.Regexp , config Config ,
360+ ) (* nfa.NFA , * nfa.BoundedBacktracker , * nfa.NFA ) {
361+ if ! nfa .ContainsDot (re ) {
362+ return nil , nil , nil
363+ }
364+
365+ // ASCII-only NFA (V11-002 optimization):
366+ // compile '.' as single byte range [0x00-0x7F] for ASCII-only inputs.
367+ var asciiNFAEngine * nfa.NFA
368+ var asciiBT * nfa.BoundedBacktracker
369+ if config .EnableASCIIOptimization {
370+ asciiCompiler := nfa .NewCompiler (nfa.CompilerConfig {
371+ UTF8 : true ,
372+ Anchored : false ,
373+ DotNewline : false ,
374+ ASCIIOnly : true ,
375+ MaxRecursionDepth : config .MaxRecursionDepth ,
376+ })
377+ var err error
378+ asciiNFAEngine , err = asciiCompiler .CompileRegexp (re )
379+ if err == nil {
380+ asciiBT = nfa .NewBoundedBacktracker (asciiNFAEngine )
381+ }
382+ }
383+
384+ // Sparse-dispatch NFA: compile '.' as a single sparse state mapping each
385+ // leading byte range to the correct continuation chain. This eliminates
386+ // ~9 split states per dot, giving PikeVM O(1) dispatch instead of
387+ // O(branches) split-chain DFS. Measured 2.8-4.8x PikeVM speedup.
388+ var runeNFAEngine * nfa.NFA
389+ runeCompiler := nfa .NewCompiler (nfa.CompilerConfig {
390+ UTF8 : true ,
391+ Anchored : false ,
392+ DotNewline : false ,
393+ UseRuneStates : true ,
394+ MaxRecursionDepth : config .MaxRecursionDepth ,
395+ })
396+ runeNFAEngine , err := runeCompiler .CompileRegexp (re )
397+ if err != nil {
398+ runeNFAEngine = nil
399+ }
400+
401+ return asciiNFAEngine , asciiBT , runeNFAEngine
402+ }
403+
352404// CompileRegexp compiles a parsed syntax.Regexp with default configuration.
353405//
354406// This is useful when you already have a parsed regexp from another source.
@@ -373,25 +425,8 @@ func CompileRegexp(re *syntax.Regexp, config Config) (*Engine, error) {
373425 }
374426 }
375427
376- // Compile ASCII-only NFA for patterns with '.' (V11-002 optimization).
377- // This enables runtime ASCII detection: if input is all ASCII, use the faster
378- // ASCII NFA which has ~2.8x fewer states for '.'-heavy patterns.
379- var asciiNFAEngine * nfa.NFA
380- var asciiBT * nfa.BoundedBacktracker
381- if nfa .ContainsDot (re ) && config .EnableASCIIOptimization {
382- asciiCompiler := nfa .NewCompiler (nfa.CompilerConfig {
383- UTF8 : true ,
384- Anchored : false ,
385- DotNewline : false ,
386- ASCIIOnly : true , // Key: compile '.' as single byte range
387- MaxRecursionDepth : config .MaxRecursionDepth ,
388- })
389- asciiNFAEngine , err = asciiCompiler .CompileRegexp (re )
390- if err == nil {
391- asciiBT = nfa .NewBoundedBacktracker (asciiNFAEngine )
392- }
393- // If ASCII NFA compilation fails, we fall back to UTF-8 NFA (asciiNFAEngine stays nil)
394- }
428+ // Compile optimized NFA variants for patterns with '.'
429+ asciiNFAEngine , asciiBT , runeNFAEngine := buildDotOptimizedNFAs (re , config )
395430
396431 // Extract literals for prefiltering
397432 // NOTE: Don't build prefilter for start-anchored patterns (^...).
@@ -418,8 +453,14 @@ func CompileRegexp(re *syntax.Regexp, config Config) (*Engine, error) {
418453 // Select strategy (pass re for anchor detection)
419454 strategy := SelectStrategy (nfaEngine , re , literals , config )
420455
421- // Build PikeVM (always needed for fallback)
422- pikevm := nfa .NewPikeVM (nfaEngine )
456+ // Build PikeVM (always needed for fallback).
457+ // Use runeNFA when available — sparse dispatch replaces ~9 split states
458+ // with a single sparse state, giving PikeVM O(1) byte dispatch per '.'.
459+ pikevmNFA := nfaEngine
460+ if runeNFAEngine != nil {
461+ pikevmNFA = runeNFAEngine
462+ }
463+ pikevm := nfa .NewPikeVM (pikevmNFA )
423464
424465 // Build OnePass DFA for anchored patterns with captures (optional optimization)
425466 onePassRes := buildOnePassDFA (re , nfaEngine , config )
@@ -428,8 +469,9 @@ func CompileRegexp(re *syntax.Regexp, config Config) (*Engine, error) {
428469 engines := buildStrategyEngines (strategy , re , nfaEngine , literals , pf , config )
429470 strategy = engines .finalStrategy
430471
431- // Build specialized searchers for character class patterns
432- charClassResult := buildCharClassSearchers (strategy , re , nfaEngine )
472+ // Build specialized searchers for character class patterns.
473+ // Pass pikevmNFA so BoundedBacktrackers benefit from rune states.
474+ charClassResult := buildCharClassSearchers (strategy , re , nfaEngine , pikevmNFA )
433475 strategy = charClassResult .finalStrategy
434476
435477 // Check if pattern can match empty string.
@@ -497,7 +539,7 @@ func CompileRegexp(re *syntax.Regexp, config Config) (*Engine, error) {
497539 // Fallback if detection fails (shouldn't happen since SelectStrategy checked)
498540 if anchoredLiteralInfo == nil {
499541 strategy = UseBoundedBacktracker
500- charClassResult .boundedBT = nfa .NewBoundedBacktracker (nfaEngine )
542+ charClassResult .boundedBT = nfa .NewBoundedBacktracker (pikevmNFA )
501543 }
502544 }
503545
@@ -506,6 +548,7 @@ func CompileRegexp(re *syntax.Regexp, config Config) (*Engine, error) {
506548
507549 return & Engine {
508550 nfa : nfaEngine ,
551+ runeNFA : runeNFAEngine ,
509552 asciiNFA : asciiNFAEngine ,
510553 asciiBoundedBacktracker : asciiBT ,
511554 dfa : engines .dfa ,
@@ -534,7 +577,7 @@ func CompileRegexp(re *syntax.Regexp, config Config) (*Engine, error) {
534577 canMatchEmpty : canMatchEmpty ,
535578 isStartAnchored : isStartAnchored ,
536579 fatTeddyFallback : fatTeddyFallback ,
537- statePool : newSearchStatePool (nfaEngine , numCaptures ),
580+ statePool : newSearchStatePool (pikevmNFA , numCaptures ),
538581 stats : Stats {},
539582 }, nil
540583}
0 commit comments