From 92ef716bbac2893fcf4b9e858e7db64d8a89cae0 Mon Sep 17 00:00:00 2001 From: Augusto Xavier Date: Thu, 11 Jun 2026 06:59:33 -0300 Subject: [PATCH] Fix polynomial ReDoS in `regexify` alternation matching The `/\((.*?)\)/` pattern used by `Faker::Base#regexify` to expand alternation groups (e.g. `(this|that)`) runs in polynomial time on uncontrolled locale input: because `.*?` can also match `(`, an input such as `(a(a(a...` forces super-linear backtracking. This is the remaining "Polynomial regular expression used on uncontrolled data" code-scanning alert. Restrict the captured content to non-parenthesis characters (`[^()]*`), which cannot backtrack across a delimiter and so matches in linear time. This mirrors the possessive-quantifier hardening applied to the sibling regexes in the same method in #3196, and matches `regexify`'s documented contract that it does not handle nested parentheses. Output is unchanged for all supported patterns. [Fix #3183] --- lib/faker.rb | 2 +- test/test_faker.rb | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/faker.rb b/lib/faker.rb index a5d7d472b9..29eb6d9a7e 100644 --- a/lib/faker.rb +++ b/lib/faker.rb @@ -110,7 +110,7 @@ def regexify(reg) .gsub(/(\[[^\]]++\])\{(\d+),(\d+)\}/) { |_match| Regexp.last_match(1) * sample(Array(Range.new(Regexp.last_match(2).to_i, Regexp.last_match(3).to_i))) } # [12]{1,2} becomes [12] or [12][12] .gsub(/(\([^)]++\))\{(\d+),(\d+)\}/) { |_match| Regexp.last_match(1) * sample(Array(Range.new(Regexp.last_match(2).to_i, Regexp.last_match(3).to_i))) } # (12|34){1,2} becomes (12|34) or (12|34)(12|34) .gsub(/(\\?.)\{(\d+),(\d+)\}/) { |_match| Regexp.last_match(1) * sample(Array(Range.new(Regexp.last_match(2).to_i, Regexp.last_match(3).to_i))) } # A{1,2} becomes A or AA or \d{3} becomes \d\d\d - .gsub(/\((.*?)\)/) { |match| sample(match.gsub(/[()]/, '').split('|')) } # (this|that) becomes 'this' or 'that' + .gsub(/\(([^()]*)\)/) { |match| sample(match.gsub(/[()]/, '').split('|')) } # (this|that) becomes 'this' or 'that' .gsub(/\[([^\]]++)\]/) { |match| match.gsub(/(\w-\w)/) { |range| sample(Array(Range.new(*range.split('-')))) } } # All A-Z inside of [] become C (or X, or whatever) .gsub(/\[([^\]]++)\]/) { |_match| sample(Regexp.last_match(1).chars) } # All [ABC] become B (or A or C) .gsub('\d') { |_match| sample(Numbers) } diff --git a/test/test_faker.rb b/test/test_faker.rb index dc564432ac..7155f5f8aa 100644 --- a/test/test_faker.rb +++ b/test/test_faker.rb @@ -27,7 +27,8 @@ def test_letterify def test_regexify { 'uk post code' => /^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$/, - 'us phone' => /^(1-?)[2-8][0-1][0-9]-\d{3}-\d{4}$/ + 'us phone' => /^(1-?)[2-8][0-1][0-9]-\d{3}-\d{4}$/, + 'alternation group' => /^(foo|bar|baz)$/ }.each do |label, re| deterministically_verify -> { Faker::Base.regexify(re) } do |result| assert_match re, result, "#{result} is not a match for #{label}"