From 3dd2349b20fd5ef360ad29c68ef85799d6c55854 Mon Sep 17 00:00:00 2001 From: uwezkhan06 Date: Fri, 24 Apr 2026 16:29:30 +0530 Subject: [PATCH] Hardening: Implement nesting depth limits and systematic overflow protection --- re2/parse.cc | 17 ++++++++++++++++- re2/pod_array.h | 3 ++- re2/regexp.cc | 4 ++++ re2/regexp.h | 2 ++ re2/repro_decref.cc | 26 ++++++++++++++++++++++++++ 5 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 re2/repro_decref.cc diff --git a/re2/parse.cc b/re2/parse.cc index 545b965e9..a34868153 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -46,10 +46,17 @@ namespace re2 { // Controls the maximum repeat count permitted by the parser. static int maximum_repeat_count = 1000; +// Controls the maximum nesting depth permitted by the parser. +static int maximum_nesting_depth = 1000; + void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) { maximum_repeat_count = i; } +void Regexp::FUZZING_ONLY_set_maximum_nesting_depth(int i) { + maximum_nesting_depth = i; +} + // Regular expression parse state. // The list of parsed regexps so far is maintained as a vector of // Regexp pointers called the stack. Left parenthesis and vertical @@ -179,6 +186,7 @@ class Regexp::ParseState { RegexpStatus* status_; Regexp* stacktop_; int ncap_; // number of capturing parens seen + int depth_; // nesting depth int rune_max_; // maximum char value for this encoding ParseState(const ParseState&) = delete; @@ -193,7 +201,7 @@ Regexp::ParseState::ParseState(ParseFlags flags, absl::string_view whole_regexp, RegexpStatus* status) : flags_(flags), whole_regexp_(whole_regexp), - status_(status), stacktop_(NULL), ncap_(0) { + status_(status), stacktop_(NULL), ncap_(0), depth_(0) { if (flags_ & Latin1) rune_max_ = 0xFF; else @@ -630,8 +638,14 @@ bool Regexp::ParseState::IsMarker(RegexpOp op) { // Processes a left parenthesis in the input. // Pushes a marker onto the stack. bool Regexp::ParseState::DoLeftParen(absl::string_view name) { + if (depth_ >= maximum_nesting_depth) { + status_->set_code(kRegexpNestedTooDeep); + status_->set_error_arg(whole_regexp_); + return false; + } Regexp* re = new Regexp(kLeftParen, flags_); re->cap_ = ++ncap_; + depth_++; if (name.data() != NULL) re->name_ = new std::string(name); return PushRegexp(re); @@ -730,6 +744,7 @@ bool Regexp::ParseState::DoRightParen() { re->Decref(); re = r1; } + depth_--; return PushRegexp(re); } diff --git a/re2/pod_array.h b/re2/pod_array.h index f234e976f..4153d09d2 100644 --- a/re2/pod_array.h +++ b/re2/pod_array.h @@ -7,6 +7,7 @@ #include #include +#include "absl/log/absl_check.h" namespace re2 { @@ -19,7 +20,7 @@ class PODArray { PODArray() : ptr_() {} explicit PODArray(int len) - : ptr_(std::allocator().allocate(len), Deleter(len)) {} + : ptr_((ABSL_CHECK_GE(len, 0), std::allocator().allocate(len)), Deleter(len)) {} T* data() const { return ptr_.get(); diff --git a/re2/regexp.cc b/re2/regexp.cc index f7e5ba297..aeb55a8ff 100644 --- a/re2/regexp.cc +++ b/re2/regexp.cc @@ -187,6 +187,9 @@ void Regexp::AddRuneToString(Rune r) { runes_ = new Rune[8]; } else if (nrunes_ >= 8 && (nrunes_ & (nrunes_ - 1)) == 0) { // double on powers of two + if (nrunes_ >= (1 << 30)) { + ABSL_LOG(FATAL) << "Too many runes in LiteralString"; + } Rune *old = runes_; runes_ = new Rune[nrunes_ * 2]; for (int i = 0; i < nrunes_; i++) @@ -524,6 +527,7 @@ static const char *kErrorStrings[] = { "invalid perl operator", "invalid UTF-8", "invalid named capture group", + "expression nested too deeply", }; std::string RegexpStatus::CodeText(enum RegexpStatusCode code) { diff --git a/re2/regexp.h b/re2/regexp.h index 531b42044..0d9d1920d 100644 --- a/re2/regexp.h +++ b/re2/regexp.h @@ -187,6 +187,7 @@ enum RegexpStatusCode { kRegexpBadPerlOp, // bad perl operator kRegexpBadUTF8, // invalid UTF-8 in regexp kRegexpBadNamedCapture, // bad named capture + kRegexpNestedTooDeep, // nested too deeply }; // Error status for certain operations. @@ -480,6 +481,7 @@ class Regexp { // Controls the maximum repeat count permitted by the parser. // FOR FUZZING ONLY. static void FUZZING_ONLY_set_maximum_repeat_count(int i); + static void FUZZING_ONLY_set_maximum_nesting_depth(int i); private: // Constructor allocates vectors as appropriate for operator. diff --git a/re2/repro_decref.cc b/re2/repro_decref.cc new file mode 100644 index 000000000..d2d882633 --- /dev/null +++ b/re2/repro_decref.cc @@ -0,0 +1,26 @@ + +#include +#include +#include "re2/re2.h" +#include "re2/regexp.h" + +int main() { + std::string pattern = "a"; + int depth = 2000; + for (int i = 0; i < depth; i++) { + pattern = "(" + pattern + ")a"; + } + + std::cout << "Parsing pattern of depth " << depth << "..." << std::endl; + re2::RE2::Options options; + options.set_log_errors(false); + re2::RE2 re(pattern, options); + + if (re.ok()) { + std::cout << "Success!" << std::endl; + } else { + std::cout << "Failed to parse: " << re.error() << std::endl; + } + + return 0; +}