From e4ab2b1f5535aa35a47a3395dd9064ebc284d510 Mon Sep 17 00:00:00 2001 From: Christoph Weidling Date: Wed, 10 Jun 2026 11:03:16 +0200 Subject: [PATCH 1/3] Reuse the precompiled regex for collection variable selectors resolveRegularExpression() in the collection backends compiled (and JIT-compiled) a fresh Utils::Regex from the pattern string on every call. For a regex variable selector such as TX:/regex/ that is evaluated per transaction, this recompiled the same pattern on every request - even though the calling VariableRegex already holds it compiled once at configuration time in its m_r member. Add a Collection::resolveRegularExpression(Utils::Regex *) overload that accepts the pre-compiled regex. The base class keeps the previous behaviour by default (it compiles from r->pattern and delegates), so backends that do not override it are unaffected; InMemoryPerProcess overrides it to scan the collection with the supplied regex directly. Tx_DictElementRegexp now passes its already-compiled &m_r instead of the pattern string. Behaviour is unchanged: m_r is constructed with the same arguments (Utils::Regex(pattern, /*ignoreCase=*/true)) the backend used, so the identical regex is applied - it is just compiled once instead of per transaction. A regression test covering TX:/regex/ selection is added. --- headers/modsecurity/collection/collection.h | 14 ++++ src/Makefile.am | 1 + .../backend/in_memory-per_process.cc | 10 ++- .../backend/in_memory-per_process.h | 3 + src/collection/collection.cc | 39 ++++++++++ src/variables/tx.h | 7 +- .../variable-tx-regex-precompiled.json | 78 +++++++++++++++++++ 7 files changed, 146 insertions(+), 6 deletions(-) create mode 100644 src/collection/collection.cc create mode 100644 test/test-cases/regression/variable-tx-regex-precompiled.json diff --git a/headers/modsecurity/collection/collection.h b/headers/modsecurity/collection/collection.h index 352dfb7702..21878c1f3f 100644 --- a/headers/modsecurity/collection/collection.h +++ b/headers/modsecurity/collection/collection.h @@ -37,6 +37,9 @@ typedef struct Variable_t Variables; #ifdef __cplusplus namespace modsecurity { +namespace Utils { +class Regex; +} namespace variables { class KeyExclusions; } @@ -69,6 +72,17 @@ class Collection { std::vector *l, variables::KeyExclusions &ke) = 0; + /* + * Resolve using a regular expression that was already compiled (e.g. the + * one held by a VariableRegex). This avoids recompiling - and JIT'ing - the + * same pattern on every transaction. The default implementation delegates + * to the string overload using the pattern text, so backends that do not + * override it keep their previous behaviour. + */ + virtual void resolveRegularExpression(Utils::Regex *r, + std::vector *l, + variables::KeyExclusions &ke); + /* storeOrUpdateFirst */ virtual bool storeOrUpdateFirst(const std::string &key, diff --git a/src/Makefile.am b/src/Makefile.am index 7154215633..0a9f48739e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -240,6 +240,7 @@ UTILS = \ COLLECTION = \ + collection/collection.cc \ collection/collections.cc \ collection/backend/collection_data.cc \ collection/backend/in_memory-per_process.cc \ diff --git a/src/collection/backend/in_memory-per_process.cc b/src/collection/backend/in_memory-per_process.cc index b16ee843ac..d7c8a8f418 100644 --- a/src/collection/backend/in_memory-per_process.cc +++ b/src/collection/backend/in_memory-per_process.cc @@ -194,14 +194,22 @@ void InMemoryPerProcess::resolveMultiMatches(const std::string& var, void InMemoryPerProcess::resolveRegularExpression(const std::string& var, std::vector *l, variables::KeyExclusions &ke) { + // Callers that do not hold a compiled regex (e.g. the compartment-prefixed + // overloads) still pay the compilation cost here. Utils::Regex r(var, true); + resolveRegularExpression(&r, l, ke); +} + + +void InMemoryPerProcess::resolveRegularExpression(Utils::Regex *r, + std::vector *l, variables::KeyExclusions &ke) { std::list expiredVars; { const std::shared_lock lock(m_mutex); // read lock (shared access) for (const auto& x : m_map) { - const auto ret = Utils::regex_search(x.first, r); + const auto ret = Utils::regex_search(x.first, *r); if (ret <= 0) { continue; } diff --git a/src/collection/backend/in_memory-per_process.h b/src/collection/backend/in_memory-per_process.h index 4aa7b1d076..29b61b78b3 100644 --- a/src/collection/backend/in_memory-per_process.h +++ b/src/collection/backend/in_memory-per_process.h @@ -99,6 +99,9 @@ class InMemoryPerProcess : void resolveRegularExpression(const std::string& var, std::vector *l, variables::KeyExclusions &ke) override; + void resolveRegularExpression(Utils::Regex *r, + std::vector *l, + variables::KeyExclusions &ke) override; /* store */ virtual void store(const std::string &key, std::string &compartment, diff --git a/src/collection/collection.cc b/src/collection/collection.cc new file mode 100644 index 0000000000..6880c7e52a --- /dev/null +++ b/src/collection/collection.cc @@ -0,0 +1,39 @@ +/* + * ModSecurity, http://www.modsecurity.org/ + * Copyright (c) 2026 OWASP ModSecurity project + * + * You may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * If any of the files related to licensing are missing or if you have any + * other questions related to licensing please contact OWASP. + * directly using the email address modsecurity@owasp.org. + * + */ + + +#include "modsecurity/collection/collection.h" + +#include +#include + +#include "src/utils/regex.h" + + +namespace modsecurity { +namespace collection { + + +void Collection::resolveRegularExpression(Utils::Regex *r, + std::vector *l, variables::KeyExclusions &ke) { + // Default behaviour: fall back to the string-based resolution using the + // pattern carried by the compiled regex. Backends that can take advantage + // of the already-compiled regex (e.g. InMemoryPerProcess) override this. + resolveRegularExpression(r->pattern, l, ke); +} + + +} // namespace collection +} // namespace modsecurity diff --git a/src/variables/tx.h b/src/variables/tx.h index 1fae827f13..31585b3441 100644 --- a/src/variables/tx.h +++ b/src/variables/tx.h @@ -66,17 +66,14 @@ class Tx_NoDictElement : public Variable { class Tx_DictElementRegexp : public VariableRegex { public: explicit Tx_DictElementRegexp(const std::string &dictElement) - : VariableRegex("TX", dictElement), - m_dictElement(dictElement) { } + : VariableRegex("TX", dictElement) { } void evaluate(Transaction *t, RuleWithActions *rule, std::vector *l) override { t->m_collections.m_tx_collection->resolveRegularExpression( - m_dictElement, l, m_keyExclusion); + &m_r, l, m_keyExclusion); } - - std::string m_dictElement; }; diff --git a/test/test-cases/regression/variable-tx-regex-precompiled.json b/test/test-cases/regression/variable-tx-regex-precompiled.json new file mode 100644 index 0000000000..870cb2170c --- /dev/null +++ b/test/test-cases/regression/variable-tx-regex-precompiled.json @@ -0,0 +1,78 @@ +[ + { + "enabled": 1, + "version_min": 300000, + "title": "TX regex variable selector (precompiled regex) - matches in a chained rule", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Content-Length": "0" + }, + "uri": "/test", + "method": "GET" + }, + "response": { + "headers": { + "Content-Type": "text/html", + "Content-Length": "8" + }, + "body": ["no need."] + }, + "expected": { + "http_code": 403 + }, + "rules": [ + "SecRuleEngine On", + "SecAction \"id:1,phase:1,nolog,pass,setvar:'tx.score_a=1',setvar:'tx.score_b=1',setvar:'tx.other=1'\"", + "SecRule REQUEST_URI \"@contains /test\" \"id:2,phase:2,deny,status:403,log,chain\"", + " SecRule TX:/^score_/ \"@eq 1\" \"t:none\"" + ] + }, + { + "enabled": 1, + "version_min": 300000, + "title": "TX regex variable selector (precompiled regex) - selector excludes non-matching keys", + "client": { + "ip": "200.249.12.31", + "port": 123 + }, + "server": { + "ip": "200.249.12.31", + "port": 80 + }, + "request": { + "headers": { + "Host": "localhost", + "User-Agent": "curl/7.38.0", + "Content-Length": "0" + }, + "uri": "/test", + "method": "GET" + }, + "response": { + "headers": { + "Content-Type": "text/html", + "Content-Length": "8" + }, + "body": ["no need."] + }, + "expected": { + "http_code": 200 + }, + "rules": [ + "SecRuleEngine On", + "SecAction \"id:1,phase:1,nolog,pass,setvar:'tx.other=1'\"", + "SecRule REQUEST_URI \"@contains /test\" \"id:2,phase:2,deny,status:403,log,chain\"", + " SecRule TX:/^score_/ \"@eq 1\" \"t:none\"" + ] + } +] From fad615344c92174156ac3e3ab387362c10f908c2 Mon Sep 17 00:00:00 2001 From: Christoph Weidling Date: Thu, 18 Jun 2026 17:39:39 +0200 Subject: [PATCH 2/3] Address review: take compiled regex by const pointer; fix license header --- headers/modsecurity/collection/collection.h | 2 +- src/collection/backend/in_memory-per_process.cc | 2 +- src/collection/backend/in_memory-per_process.h | 2 +- src/collection/collection.cc | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/headers/modsecurity/collection/collection.h b/headers/modsecurity/collection/collection.h index 21878c1f3f..6a7244aedc 100644 --- a/headers/modsecurity/collection/collection.h +++ b/headers/modsecurity/collection/collection.h @@ -79,7 +79,7 @@ class Collection { * to the string overload using the pattern text, so backends that do not * override it keep their previous behaviour. */ - virtual void resolveRegularExpression(Utils::Regex *r, + virtual void resolveRegularExpression(const Utils::Regex *r, std::vector *l, variables::KeyExclusions &ke); diff --git a/src/collection/backend/in_memory-per_process.cc b/src/collection/backend/in_memory-per_process.cc index d7c8a8f418..bdf3d09e68 100644 --- a/src/collection/backend/in_memory-per_process.cc +++ b/src/collection/backend/in_memory-per_process.cc @@ -201,7 +201,7 @@ void InMemoryPerProcess::resolveRegularExpression(const std::string& var, } -void InMemoryPerProcess::resolveRegularExpression(Utils::Regex *r, +void InMemoryPerProcess::resolveRegularExpression(const Utils::Regex *r, std::vector *l, variables::KeyExclusions &ke) { std::list expiredVars; diff --git a/src/collection/backend/in_memory-per_process.h b/src/collection/backend/in_memory-per_process.h index 29b61b78b3..4faf999c2c 100644 --- a/src/collection/backend/in_memory-per_process.h +++ b/src/collection/backend/in_memory-per_process.h @@ -99,7 +99,7 @@ class InMemoryPerProcess : void resolveRegularExpression(const std::string& var, std::vector *l, variables::KeyExclusions &ke) override; - void resolveRegularExpression(Utils::Regex *r, + void resolveRegularExpression(const Utils::Regex *r, std::vector *l, variables::KeyExclusions &ke) override; diff --git a/src/collection/collection.cc b/src/collection/collection.cc index 6880c7e52a..0a5787cb49 100644 --- a/src/collection/collection.cc +++ b/src/collection/collection.cc @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * * If any of the files related to licensing are missing or if you have any - * other questions related to licensing please contact OWASP. + * other questions related to licensing please contact OWASP * directly using the email address modsecurity@owasp.org. * */ @@ -26,7 +26,7 @@ namespace modsecurity { namespace collection { -void Collection::resolveRegularExpression(Utils::Regex *r, +void Collection::resolveRegularExpression(const Utils::Regex *r, std::vector *l, variables::KeyExclusions &ke) { // Default behaviour: fall back to the string-based resolution using the // pattern carried by the compiled regex. Backends that can take advantage From 634bb80fdd3e2e92bcdc7cb4a460f3e462fd8b2f Mon Sep 17 00:00:00 2001 From: Christoph Weidling Date: Fri, 19 Jun 2026 08:45:53 +0200 Subject: [PATCH 3/3] Keep the optimization off the Collection ABI Move the precompiled-regex fast path off the public Collection base class (its vtable is unchanged) and onto the concrete `InMemoryPerProcess` backend. `Tx_DictElementRegexp` reaches it via a guarded `dynamic_cast`, falling back to the string-based resolution for any other backend. Drops the previously added Collection `virtual` and `src/collection/collection.cc`. --- headers/modsecurity/collection/collection.h | 14 ------- src/Makefile.am | 1 - .../backend/in_memory-per_process.h | 6 ++- src/collection/collection.cc | 39 ------------------- src/variables/tx.h | 17 +++++++- 5 files changed, 20 insertions(+), 57 deletions(-) delete mode 100644 src/collection/collection.cc diff --git a/headers/modsecurity/collection/collection.h b/headers/modsecurity/collection/collection.h index 6a7244aedc..352dfb7702 100644 --- a/headers/modsecurity/collection/collection.h +++ b/headers/modsecurity/collection/collection.h @@ -37,9 +37,6 @@ typedef struct Variable_t Variables; #ifdef __cplusplus namespace modsecurity { -namespace Utils { -class Regex; -} namespace variables { class KeyExclusions; } @@ -72,17 +69,6 @@ class Collection { std::vector *l, variables::KeyExclusions &ke) = 0; - /* - * Resolve using a regular expression that was already compiled (e.g. the - * one held by a VariableRegex). This avoids recompiling - and JIT'ing - the - * same pattern on every transaction. The default implementation delegates - * to the string overload using the pattern text, so backends that do not - * override it keep their previous behaviour. - */ - virtual void resolveRegularExpression(const Utils::Regex *r, - std::vector *l, - variables::KeyExclusions &ke); - /* storeOrUpdateFirst */ virtual bool storeOrUpdateFirst(const std::string &key, diff --git a/src/Makefile.am b/src/Makefile.am index 0a9f48739e..7154215633 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -240,7 +240,6 @@ UTILS = \ COLLECTION = \ - collection/collection.cc \ collection/collections.cc \ collection/backend/collection_data.cc \ collection/backend/in_memory-per_process.cc \ diff --git a/src/collection/backend/in_memory-per_process.h b/src/collection/backend/in_memory-per_process.h index 4faf999c2c..9f7d9506f4 100644 --- a/src/collection/backend/in_memory-per_process.h +++ b/src/collection/backend/in_memory-per_process.h @@ -99,9 +99,13 @@ class InMemoryPerProcess : void resolveRegularExpression(const std::string& var, std::vector *l, variables::KeyExclusions &ke) override; + // Concrete fast path: reuse an already-compiled regex (e.g. the one a + // VariableRegex holds in m_r) instead of recompiling the pattern on every + // call. Intentionally NOT declared on the Collection base class, to keep + // that public interface's vtable - and therefore its ABI - unchanged. void resolveRegularExpression(const Utils::Regex *r, std::vector *l, - variables::KeyExclusions &ke) override; + variables::KeyExclusions &ke); /* store */ virtual void store(const std::string &key, std::string &compartment, diff --git a/src/collection/collection.cc b/src/collection/collection.cc deleted file mode 100644 index 0a5787cb49..0000000000 --- a/src/collection/collection.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * ModSecurity, http://www.modsecurity.org/ - * Copyright (c) 2026 OWASP ModSecurity project - * - * You may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * If any of the files related to licensing are missing or if you have any - * other questions related to licensing please contact OWASP - * directly using the email address modsecurity@owasp.org. - * - */ - - -#include "modsecurity/collection/collection.h" - -#include -#include - -#include "src/utils/regex.h" - - -namespace modsecurity { -namespace collection { - - -void Collection::resolveRegularExpression(const Utils::Regex *r, - std::vector *l, variables::KeyExclusions &ke) { - // Default behaviour: fall back to the string-based resolution using the - // pattern carried by the compiled regex. Backends that can take advantage - // of the already-compiled regex (e.g. InMemoryPerProcess) override this. - resolveRegularExpression(r->pattern, l, ke); -} - - -} // namespace collection -} // namespace modsecurity diff --git a/src/variables/tx.h b/src/variables/tx.h index 31585b3441..59e2103c8e 100644 --- a/src/variables/tx.h +++ b/src/variables/tx.h @@ -25,6 +25,7 @@ #include "src/variables/variable.h" #include "src/run_time_string.h" +#include "src/collection/backend/in_memory-per_process.h" namespace modsecurity { @@ -71,8 +72,20 @@ class Tx_DictElementRegexp : public VariableRegex { void evaluate(Transaction *t, RuleWithActions *rule, std::vector *l) override { - t->m_collections.m_tx_collection->resolveRegularExpression( - &m_r, l, m_keyExclusion); + // TX is always backed by InMemoryPerProcess. Reuse the regex compiled + // once in VariableRegex::m_r via the backend's concrete fast path, + // instead of recompiling the pattern on every transaction. This is kept + // off the Collection base class to avoid an ABI-breaking vtable change; + // any other backend falls back to the string-based resolution. + auto *collection = t->m_collections.m_tx_collection; + if (auto *inMemory = + dynamic_cast( + collection)) { + inMemory->resolveRegularExpression(&m_r, l, m_keyExclusion); + } else { + collection->resolveRegularExpression(m_r.pattern, l, + m_keyExclusion); + } } };