Skip to content

Commit af100dc

Browse files
committed
Allow to customize how regexes are provided
1 parent 72e3954 commit af100dc

7 files changed

Lines changed: 223 additions & 161 deletions

File tree

engine/src/ast/field_expr.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::{
1111
filter::CompiledExpr,
1212
lex::{Lex, LexErrorKind, LexResult, LexWith, expect, skip_space, span},
1313
range_set::RangeSet,
14-
rhs_types::{Bytes, ExplicitIpRange, ListName, Regex, Wildcard},
14+
rhs_types::{Bytes, ExplicitIpRange, ListName, RegexExpr, Wildcard},
1515
scheme::{Field, Identifier, List},
1616
searcher::{EmptySearcher, TwoWaySearcher},
1717
strict_partial_ord::StrictPartialOrd,
@@ -152,7 +152,7 @@ pub enum ComparisonOpExpr {
152152

153153
/// "matches / ~" comparison
154154
#[serde(serialize_with = "serialize_matches")]
155-
Matches(Regex),
155+
Matches(RegexExpr),
156156

157157
/// "wildcard" comparison
158158
#[serde(serialize_with = "serialize_wildcard")]
@@ -205,7 +205,7 @@ fn serialize_contains<S: Serializer>(rhs: &Bytes, ser: S) -> Result<S::Ok, S::Er
205205
serialize_op_rhs("Contains", rhs, ser)
206206
}
207207

208-
fn serialize_matches<S: Serializer>(rhs: &Regex, ser: S) -> Result<S::Ok, S::Error> {
208+
fn serialize_matches<S: Serializer>(rhs: &RegexExpr, ser: S) -> Result<S::Ok, S::Error> {
209209
serialize_op_rhs("Matches", rhs, ser)
210210
}
211211

@@ -376,7 +376,7 @@ impl ComparisonExpr {
376376
(ComparisonOpExpr::Contains(bytes), input)
377377
}
378378
BytesOp::Matches => {
379-
let (regex, input) = Regex::lex_with(input, parser)?;
379+
let (regex, input) = RegexExpr::lex_with(input, parser)?;
380380
(ComparisonOpExpr::Matches(regex), input)
381381
}
382382
BytesOp::Wildcard => {
@@ -688,7 +688,9 @@ impl Expr for ComparisonExpr {
688688

689689
search!(TwoWaySearcher::new(bytes))
690690
}
691-
ComparisonOpExpr::Matches(regex) => lhs.compile_with(compiler, false, regex),
691+
ComparisonOpExpr::Matches(regex) => {
692+
lhs.compile_with(compiler, false, regex.into_regex())
693+
}
692694
ComparisonOpExpr::Wildcard(wildcard) => lhs.compile_with(compiler, false, wildcard),
693695
ComparisonOpExpr::StrictWildcard(wildcard) => {
694696
lhs.compile_with(compiler, false, wildcard)
@@ -2806,7 +2808,12 @@ mod tests {
28062808

28072809
// Matches operator
28082810
let parser = FilterParser::new(&SCHEME);
2809-
let r = Regex::new("a.b", RegexFormat::Literal, parser.settings()).unwrap();
2811+
let r = RegexExpr::new(
2812+
"a.b",
2813+
RegexFormat::Literal,
2814+
&parser.settings().regex_provider,
2815+
)
2816+
.unwrap();
28102817
let expr = assert_ok!(
28112818
parser.lex_as("http.host matches r###\"a.b\"###"),
28122819
ComparisonExpr {

engine/src/ast/parse.rs

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
use super::{FilterAst, FilterValueAst};
22
use crate::{
3+
RegexProvider,
34
lex::{LexErrorKind, LexResult, LexWith, complete},
5+
rhs_types::DefaultRegexProvider,
46
scheme::Scheme,
57
};
68
use std::cmp::{max, min};
79
use std::error::Error;
810
use std::fmt::{self, Debug, Display, Formatter};
11+
use std::sync::{Arc, LazyLock};
912

1013
/// An opaque filter parsing error associated with the original input.
1114
///
@@ -96,28 +99,37 @@ impl Display for ParseError<'_> {
9699
}
97100
}
98101

102+
static DEFAULT_REGEX_PROVIDER: LazyLock<Arc<dyn RegexProvider>> =
103+
LazyLock::new(|| Arc::new(DefaultRegexProvider::default()));
104+
99105
/// Parser settings.
100-
#[derive(Clone, Debug, PartialEq, Eq)]
106+
#[derive(Clone, Debug)]
101107
pub struct ParserSettings {
102-
/// Approximate size of the cache used by the DFA of a regex.
103-
/// Default: 10MB
104-
pub regex_dfa_size_limit: usize,
105-
/// Approximate size limit of the compiled regular expression.
106-
/// Default: 2MB
107-
pub regex_compiled_size_limit: usize,
108+
/// Regex provider.
109+
pub regex_provider: Arc<dyn RegexProvider>,
108110
/// Maximum number of star metacharacters allowed in a wildcard.
109111
/// Default: unlimited
110112
pub wildcard_star_limit: usize,
111113
}
112114

115+
impl PartialEq for ParserSettings {
116+
fn eq(&self, other: &Self) -> bool {
117+
let Self {
118+
regex_provider,
119+
wildcard_star_limit,
120+
} = self;
121+
Arc::ptr_eq(regex_provider, &other.regex_provider)
122+
&& *wildcard_star_limit == other.wildcard_star_limit
123+
}
124+
}
125+
126+
impl Eq for ParserSettings {}
127+
113128
impl Default for ParserSettings {
114129
#[inline]
115130
fn default() -> Self {
116131
Self {
117-
// Default value extracted from the regex crate.
118-
regex_compiled_size_limit: 10 * (1 << 20),
119-
// Default value extracted from the regex crate.
120-
regex_dfa_size_limit: 2 * (1 << 20),
132+
regex_provider: DEFAULT_REGEX_PROVIDER.clone(),
121133
wildcard_star_limit: usize::MAX,
122134
}
123135
}
@@ -176,30 +188,6 @@ impl<'s> FilterParser<'s> {
176188
&self.settings
177189
}
178190

179-
/// Set the approximate size limit of the compiled regular expression.
180-
#[inline]
181-
pub fn regex_set_compiled_size_limit(&mut self, regex_compiled_size_limit: usize) {
182-
self.settings.regex_compiled_size_limit = regex_compiled_size_limit;
183-
}
184-
185-
/// Get the approximate size limit of the compiled regular expression.
186-
#[inline]
187-
pub fn regex_get_compiled_size_limit(&self) -> usize {
188-
self.settings.regex_compiled_size_limit
189-
}
190-
191-
/// Set the approximate size of the cache used by the DFA of a regex.
192-
#[inline]
193-
pub fn regex_set_dfa_size_limit(&mut self, regex_dfa_size_limit: usize) {
194-
self.settings.regex_dfa_size_limit = regex_dfa_size_limit;
195-
}
196-
197-
/// Get the approximate size of the cache used by the DFA of a regex.
198-
#[inline]
199-
pub fn regex_get_dfa_size_limit(&self) -> usize {
200-
self.settings.regex_dfa_size_limit
201-
}
202-
203191
/// Set the maximum number of star metacharacters allowed in a wildcard.
204192
#[inline]
205193
pub fn wildcard_set_star_limit(&mut self, wildcard_star_limit: usize) {

engine/src/lib.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ pub use self::{
112112
panic_catcher_get_backtrace, panic_catcher_set_fallback_mode, panic_catcher_set_hook,
113113
},
114114
rhs_types::{
115-
Bytes, BytesFormat, ExplicitIpRange, IntRange, IpCidr, IpRange, Regex, RegexError,
116-
RegexFormat,
115+
Bytes, BytesFormat, DefaultRegexProvider, ExplicitIpRange, IntRange, IpCidr, IpRange,
116+
Regex, RegexError, RegexExpr, RegexFormat, RegexProvider,
117117
},
118118
scheme::{
119119
Field, FieldIndex, FieldRedefinitionError, FieldRef, Function, FunctionRedefinitionError,
@@ -125,3 +125,6 @@ pub use self::{
125125
TypeMismatchError,
126126
},
127127
};
128+
129+
#[cfg(feature = "regex")]
130+
pub use self::rhs_types::RegexSettings;

engine/src/rhs_types/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ pub use self::{
1616
ip::{ExplicitIpRange, IpCidr, IpRange},
1717
list::ListName,
1818
map::UninhabitedMap,
19-
regex::{Error as RegexError, Regex, RegexFormat},
19+
regex::{
20+
DefaultRegexProvider, Error as RegexError, Regex, RegexExpr, RegexFormat, RegexProvider,
21+
},
2022
wildcard::{Wildcard, WildcardError},
2123
};
24+
25+
#[cfg(feature = "regex")]
26+
pub use self::regex::RegexSettings;
Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,51 @@
1-
use regex_automata::MatchKind;
2-
3-
use super::Error;
4-
use crate::{ParserSettings, RegexFormat};
5-
use std::ops::Deref;
1+
use super::{Error, Regex};
2+
use crate::RegexProvider;
63
use std::sync::Arc;
74

8-
/// Wrapper around [`regex_automata::meta::Regex`]
9-
#[derive(Clone)]
10-
pub struct Regex {
11-
pattern: Arc<str>,
12-
regex: regex_automata::meta::Regex,
13-
format: RegexFormat,
5+
pub(crate) type MetaRegex = regex_automata::meta::Regex;
6+
7+
impl Regex for MetaRegex {
8+
#[inline]
9+
fn is_match(&self, input: &[u8]) -> bool {
10+
MetaRegex::is_match(self, input)
11+
}
12+
}
13+
14+
/// Regex settings.
15+
#[derive(Clone, Debug, PartialEq, Eq)]
16+
pub struct RegexSettings {
17+
/// Approximate size of the cache used by the DFA of a regex.
18+
/// Default: 10MB
19+
pub dfa_size_limit: usize,
20+
/// Approximate size limit of the compiled regular expression.
21+
/// Default: 2MB
22+
pub compiled_size_limit: usize,
23+
}
24+
25+
impl Default for RegexSettings {
26+
#[inline]
27+
fn default() -> Self {
28+
Self {
29+
// Default value extracted from the regex crate.
30+
compiled_size_limit: 10 * (1 << 20),
31+
// Default value extracted from the regex crate.
32+
dfa_size_limit: 2 * (1 << 20),
33+
}
34+
}
35+
}
36+
37+
/// Default regex provider.
38+
#[derive(Debug, Default)]
39+
pub struct DefaultRegexProvider {
40+
settings: RegexSettings,
1441
}
1542

16-
impl Regex {
43+
impl DefaultRegexProvider {
44+
/// Creates a new default regex provider.
45+
pub const fn new(settings: RegexSettings) -> Self {
46+
Self { settings }
47+
}
48+
1749
/// Retrieves the syntax configuration that will be used to build the regex.
1850
#[inline]
1951
fn syntax_config() -> regex_automata::util::syntax::Config {
@@ -24,32 +56,25 @@ impl Regex {
2456

2557
/// Retrieves the meta configuration that will be used to build the regex.
2658
#[inline]
27-
fn meta_config(settings: &ParserSettings) -> regex_automata::meta::Config {
59+
fn meta_config(settings: &RegexSettings) -> regex_automata::meta::Config {
2860
regex_automata::meta::Config::new()
29-
.match_kind(MatchKind::LeftmostFirst)
61+
.match_kind(regex_automata::MatchKind::LeftmostFirst)
3062
.utf8_empty(false)
3163
.dfa(false)
32-
.nfa_size_limit(Some(settings.regex_compiled_size_limit))
33-
.onepass_size_limit(Some(settings.regex_compiled_size_limit))
34-
.dfa_size_limit(Some(settings.regex_compiled_size_limit))
35-
.hybrid_cache_capacity(settings.regex_dfa_size_limit)
64+
.nfa_size_limit(Some(settings.compiled_size_limit))
65+
.onepass_size_limit(Some(settings.compiled_size_limit))
66+
.dfa_size_limit(Some(settings.compiled_size_limit))
67+
.hybrid_cache_capacity(settings.dfa_size_limit)
3668
}
69+
}
3770

38-
/// Compiles a regular expression.
39-
pub fn new(
40-
pattern: &str,
41-
format: RegexFormat,
42-
settings: &ParserSettings,
43-
) -> Result<Self, Error> {
71+
impl RegexProvider for DefaultRegexProvider {
72+
fn lookup_regex(&self, pattern: &str) -> Result<Arc<dyn Regex>, Error> {
4473
::regex_automata::meta::Builder::new()
45-
.configure(Self::meta_config(settings))
74+
.configure(Self::meta_config(&self.settings))
4675
.syntax(Self::syntax_config())
4776
.build(pattern)
48-
.map(|regex| Regex {
49-
pattern: Arc::from(pattern),
50-
regex,
51-
format,
52-
})
77+
.map(|re| Arc::new(re) as Arc<dyn Regex>)
5378
.map_err(|err| {
5479
if let Some(limit) = err.size_limit() {
5580
Error::CompiledTooBig(limit)
@@ -60,45 +85,21 @@ impl Regex {
6085
}
6186
})
6287
}
63-
64-
/// Returns the pattern of this regex.
65-
#[inline]
66-
pub fn as_str(&self) -> &str {
67-
&self.pattern
68-
}
69-
70-
/// Returns the format used by the pattern.
71-
#[inline]
72-
pub fn format(&self) -> RegexFormat {
73-
self.format
74-
}
75-
}
76-
77-
impl From<Regex> for regex_automata::meta::Regex {
78-
#[inline]
79-
fn from(regex: Regex) -> Self {
80-
regex.regex
81-
}
82-
}
83-
84-
impl Deref for Regex {
85-
type Target = regex_automata::meta::Regex;
86-
87-
#[inline]
88-
fn deref(&self) -> &Self::Target {
89-
&self.regex
90-
}
9188
}
9289

9390
#[test]
9491
fn test_compiled_size_limit() {
92+
use super::{DefaultRegexProvider, RegexSettings};
93+
use crate::{RegexExpr, RegexFormat};
94+
9595
const COMPILED_SIZE_LIMIT: usize = 1024 * 1024;
96-
let settings = ParserSettings {
97-
regex_compiled_size_limit: COMPILED_SIZE_LIMIT,
96+
let settings = RegexSettings {
97+
compiled_size_limit: COMPILED_SIZE_LIMIT,
9898
..Default::default()
9999
};
100+
let regex_provider = DefaultRegexProvider::new(settings);
100101
assert_eq!(
101-
Regex::new(".{4079,65535}", RegexFormat::Literal, &settings),
102+
RegexExpr::new(".{4079,65535}", RegexFormat::Literal, &regex_provider),
102103
Err(Error::CompiledTooBig(COMPILED_SIZE_LIMIT))
103104
);
104105
}
Lines changed: 11 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,21 @@
1-
use thiserror::Error;
2-
31
use crate::{FilterParser, RegexFormat};
2+
use thiserror::Error;
43

5-
/// Dummy regex error.
6-
#[derive(Debug, PartialEq, Error)]
7-
pub enum Error {}
8-
9-
/// Dummy regex wrapper that can only store a pattern
10-
/// but not actually be used for matching.
11-
#[derive(Clone)]
12-
pub struct Regex {
13-
pattern: String,
14-
format: RegexFormat,
15-
}
16-
17-
impl Regex {
18-
/// Creates a new dummy regex.
19-
pub fn new(pattern: &str, format: RegexFormat, _: &FilterParser<'_>) -> Result<Self, Error> {
20-
Ok(Self {
21-
pattern: pattern.to_string(),
22-
format,
23-
})
24-
}
4+
pub(crate) struct StubRegex {}
255

6+
impl Regex for StubRegex {
267
/// Not implemented and will panic if called.
27-
pub fn is_match(&self, _text: &[u8]) -> bool {
8+
fn is_match(&self, _text: &[u8]) -> bool {
289
unimplemented!("Engine was built without regex support")
2910
}
11+
}
3012

31-
/// Returns the original string of this dummy regex wrapper.
32-
pub fn as_str(&self) -> &str {
33-
self.pattern.as_str()
34-
}
13+
/// Default regex provider.
14+
#[derive(Debug, Default)]
15+
pub struct DefaultRegexProvider;
3516

36-
/// Returns the format behind the regex
37-
pub fn format(&self) -> RegexFormat {
38-
self.format
17+
impl RegexProvider for DefaultRegexProvider {
18+
fn lookup(&self, pattern: &str) -> Result<Arc<dyn AsRegex>, Error> {
19+
Ok(Arc::new(StubRegex {}))
3920
}
4021
}

0 commit comments

Comments
 (0)