Skip to content

Commit 5b7b7e3

Browse files
Perf: cache condition parsing, type hints, and optimize string operations (#452)
* Improve performance: cache condition parsing, type hints, and optimize string operations Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/96b970a7-15bc-4b53-bbeb-51ca1c790721 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> * Address review feedback: fix variable shadowing and frozenset creation Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/96b970a7-15bc-4b53-bbeb-51ca1c790721 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> * Fix mypy type checking errors in sigma/types.py Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/29062a21-3d71-4f22-b83a-15c994c5ead9 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> * Fix docstring for _parse_condition_string to clarify deep-copy responsibility Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/29062a21-3d71-4f22-b83a-15c994c5ead9 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> Co-authored-by: Thomas Patzke <thomas@patzke.org>
1 parent f49862d commit 5b7b7e3

3 files changed

Lines changed: 82 additions & 34 deletions

File tree

sigma/conditions.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from dataclasses import dataclass, field
44
from abc import ABC
5+
import copy
6+
from functools import lru_cache
57
import re
68
from sigma.processing.tracking import ProcessingItemTrackingMixin
79
from pyparsing import (
@@ -12,11 +14,15 @@
1214
opAssoc,
1315
ParseResults,
1416
ParseException,
17+
ParserElement,
1518
)
1619
from typing import ClassVar, Type, cast, TYPE_CHECKING
1720
from sigma.types import SigmaType
1821
from sigma.exceptions import SigmaConditionError, SigmaRuleLocation
1922

23+
# Enable packrat parsing for faster parsing of complex condition expressions
24+
ParserElement.enable_packrat(cache_size_limit=128)
25+
2026
if TYPE_CHECKING:
2127
from sigma.rule.detection import SigmaDetection, SigmaDetectionItem, SigmaDetections
2228

@@ -282,6 +288,17 @@ class ConditionValueExpression(ParentChainMixin):
282288
)
283289

284290

291+
@lru_cache(maxsize=256)
292+
def _parse_condition_string(
293+
condition_str: str,
294+
) -> ConditionItem:
295+
"""Parse a condition string using pyparsing, with caching for repeated strings.
296+
297+
Callers must deep-copy the returned result since postprocessing mutates the parse tree.
298+
"""
299+
return cast(ConditionItem, condition.parse_string(condition_str, parse_all=True)[0])
300+
301+
285302
@dataclass
286303
class SigmaCondition(ProcessingItemTrackingMixin):
287304
condition: str
@@ -304,7 +321,8 @@ def parse(
304321
"The pipe syntax in Sigma conditions has been deprecated and replaced by Sigma correlations. pySigma doesn't supports this syntax."
305322
)
306323
try:
307-
parsed = cast(ConditionItem, condition.parse_string(self.condition, parse_all=True)[0])
324+
# Use cached parse result, deep-copied since postprocessing mutates the tree
325+
parsed = copy.deepcopy(_parse_condition_string(self.condition))
308326
if postprocess:
309327
return parsed.postprocess(self.detections, source=self.source)
310328
else:

sigma/modifiers.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,22 @@ def __init__(
6666
self.applied_modifiers = applied_modifiers
6767
self.source = source
6868

69+
# Cache for type hints resolved from modify() method, keyed by class
70+
_type_hint_cache: ClassVar[dict[type, Any]] = {}
71+
72+
def _get_modify_type_hint(self) -> Any:
73+
"""Get the type hint for the 'val' parameter of the modify method, with caching per class."""
74+
cls = type(self)
75+
try:
76+
return SigmaModifier._type_hint_cache[cls]
77+
except KeyError:
78+
th = get_type_hints(self.modify)["val"]
79+
SigmaModifier._type_hint_cache[cls] = th
80+
return th
81+
6982
def type_check(self, val: Any, explicit_type: Type[Any] | None = None) -> bool:
7083
th = (
71-
explicit_type or get_type_hints(self.modify)["val"]
84+
explicit_type or self._get_modify_type_hint()
7285
) # get type annotation from val parameter of apply method or explicit_type parameter
7386
if th is Any:
7487
return True

sigma/types.py

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -338,20 +338,14 @@ def replace_with_placeholder(
338338

339339
def _merge_strs(self) -> "SigmaString":
340340
"""Merge consecutive plain strings in self.s."""
341-
src = list(reversed(self.s))
342-
res: list[SigmaStringPartType] = []
343-
while src:
344-
item = src.pop()
345-
try:
346-
if isinstance(res[-1], str) and isinstance(
347-
item, str
348-
): # append current item to last result element if both are strings
349-
res[-1] += item
350-
else:
351-
res.append(item)
352-
except IndexError: # first element
341+
if not self.s:
342+
return self
343+
res: list[SigmaStringPartType] = [self.s[0]]
344+
for item in self.s[1:]:
345+
if isinstance(res[-1], str) and isinstance(item, str):
346+
res[-1] += item
347+
else:
353348
res.append(item)
354-
355349
self.s = res
356350
return self
357351

@@ -453,7 +447,7 @@ def endswith(self, val: SigmaStringPartType) -> bool:
453447

454448
def contains_special(self) -> bool:
455449
"""Check if string contains special characters."""
456-
return any([isinstance(item, SpecialChars) for item in self.s])
450+
return any(isinstance(item, SpecialChars) for item in self.s)
457451

458452
def contains_placeholder(
459453
self, include: list[str] | None = None, exclude: list[str] | None = None
@@ -575,40 +569,56 @@ def convert(
575569
Setting one of the wildcard or multiple parameters to None indicates that this feature is not supported. Appearance
576570
of these characters in a string will raise a SigmaValueError.
577571
"""
578-
s = ""
572+
result = []
579573
escaped_chars = frozenset((wildcard_multi or "") + (wildcard_single or "") + add_escaped)
580-
581-
for c in iter(self):
582-
if isinstance(c, str): # c is plain character
583-
if c in filter_chars: # Skip filtered characters
584-
continue
585-
if c in escaped_chars:
586-
s += escape_char
587-
s += c
588-
elif isinstance(c, SpecialChars): # special handling for special characters
589-
if c == SpecialChars.WILDCARD_MULTI:
574+
filter_set = frozenset(filter_chars)
575+
576+
for part in self.s:
577+
if isinstance(part, str): # part is a plain string segment
578+
if not filter_set and not escaped_chars:
579+
# Fast path: no escaping or filtering needed
580+
result.append(part)
581+
elif not filter_set and escaped_chars:
582+
# Only escaping needed, process character-by-character only if necessary
583+
if any(c in escaped_chars for c in part):
584+
for c in part:
585+
if c in escaped_chars and escape_char is not None:
586+
result.append(escape_char)
587+
result.append(c)
588+
else:
589+
result.append(part)
590+
else:
591+
# Both filtering and escaping
592+
for c in part:
593+
if c in filter_set:
594+
continue
595+
if c in escaped_chars and escape_char is not None:
596+
result.append(escape_char)
597+
result.append(c)
598+
elif isinstance(part, SpecialChars): # special handling for special characters
599+
if part == SpecialChars.WILDCARD_MULTI:
590600
if wildcard_multi is not None:
591-
s += wildcard_multi
601+
result.append(wildcard_multi)
592602
else:
593603
raise SigmaValueError(
594604
"Multi-character wildcard not specified for conversion"
595605
)
596-
elif c == SpecialChars.WILDCARD_SINGLE:
606+
elif part == SpecialChars.WILDCARD_SINGLE:
597607
if wildcard_single is not None:
598-
s += wildcard_single
608+
result.append(wildcard_single)
599609
else:
600610
raise SigmaValueError(
601611
"Single-character wildcard not specified for conversion"
602612
)
603-
elif isinstance(c, Placeholder):
613+
elif isinstance(part, Placeholder):
604614
raise SigmaPlaceholderError(
605-
f"Attempt to convert unhandled placeholder '{c.name}' into query."
615+
f"Attempt to convert unhandled placeholder '{part.name}' into query."
606616
)
607617
else:
608618
raise SigmaValueError(
609-
f"Trying to convert SigmaString containing part of type '{type(c).__name__}'"
619+
f"Trying to convert SigmaString containing part of type '{type(part).__name__}'"
610620
)
611-
return s
621+
return "".join(result)
612622

613623
def to_regex(self, custom_escaped: str = "") -> "SigmaRegularExpression":
614624
"""Convert SigmaString into a regular expression."""
@@ -1014,6 +1024,13 @@ class SigmaExpansion(NoPlainConversionMixin, SigmaType):
10141024

10151025
def sigma_type(v: (int | float | str | bool) | None) -> SigmaType:
10161026
"""Return Sigma type from Python value"""
1027+
# Check bool before int since bool is a subclass of int in Python
1028+
if isinstance(v, bool):
1029+
return SigmaBool(v)
1030+
matched = type_map.get(type(v))
1031+
if matched is not None:
1032+
return matched(v)
1033+
# Fallback to isinstance checks for subclasses
10171034
for t, st in type_map.items():
10181035
if isinstance(v, t):
10191036
return st(v)

0 commit comments

Comments
 (0)