Skip to content

Commit be4e220

Browse files
committed
expose descriptions for named patterns
1 parent 84a6e6a commit be4e220

6 files changed

Lines changed: 219 additions & 173 deletions

File tree

refinery/lib/batch/emulator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ def execute_command(self, cmd: SynCommand, std: IO, in_group: bool):
10271027
if handler is None:
10281028
if self.state.exists_file(verb):
10291029
self.state.ec = 0
1030-
elif not indicators.winfpath.value.fullmatch(verb):
1030+
elif not indicators.wintpath.value.fullmatch(verb):
10311031
if '\uFFFD' in verb or not verb.isprintable():
10321032
self.state.ec = 9009
10331033
cmd.junk = True

refinery/lib/patterns/__init__.py

Lines changed: 164 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,11 @@ class pattern(PatternMethods):
9999
str_pattern: str
100100
bin_pattern: bytes
101101

102-
def __init__(self, pattern: str, flags: int = 0):
102+
def __init__(self, pattern: str, flags: int = 0, description: str = ''):
103103
self.str_pattern = pattern
104104
self.bin_pattern = pattern.encode('ascii')
105105
self.flags = flags
106+
self.description = description
106107

107108
def __bytes__(self):
108109
return self.bin_pattern
@@ -243,8 +244,27 @@ def __getattr__(self, name):
243244
raise AttributeError
244245

245246
@property
246-
def display(self):
247-
return normalize_to_display(self.name)
247+
def description(self):
248+
return self.value.description
249+
250+
@classmethod
251+
def make_table_with_shorts(cls, name: str):
252+
alias = {p.name: name for name, p in cls.__members__.items() if name != p.name}
253+
for p in cls:
254+
if p.name.endswith('array'):
255+
alias[p.name] = F'[{p.name[:-5]}]'
256+
width = max(len(p.name) for p in cls) + 4
257+
table = [
258+
(name.upper(), 'SHORT', 'DESCRIPTION'), *(
259+
(p.name, alias.get(p.name, ''), p.description) for p in cls)]
260+
return '\n'.join((
261+
F'{"":>8}{n:>{width}} {s:<5} {d}' for n, s, d in table))
262+
263+
@classmethod
264+
def make_table(cls, name: str):
265+
width = max(len(p.name) for p in cls) + 4
266+
table = [(name.upper(), 'DESCRIPTION'), *((p.name, p.description) for p in cls)]
267+
return '\n'.join((F'{"":>8}{n:>{width}} {d}' for n, d in table))
248268

249269

250270
_TLDS = R'(?i:{possible_tld})(?!(?:{dealbreakers}))'.format(
@@ -500,108 +520,114 @@ def make_hexline_pattern(blocksize: int) -> str:
500520

501521

502522
class checks(_PatternEnum):
503-
json = pattern(_pattern_json)
504-
"Data that consists of JSON-like tokens; cannot detect actual JSON data."
505-
path_element_nospace = pattern(_pattern_pathpart_nospace)
506-
"A string that can be a valid file system path component and contains no spaces."
523+
json = pattern(_pattern_json,
524+
description="Data that consists of JSON-like tokens; cannot detect actual JSON data.")
525+
path_element_nospace = pattern(_pattern_pathpart_nospace,
526+
description="A string that can be a valid file system path component and contains no spaces.")
507527

508528

509529
class formats(_PatternEnum):
510530
"""
511531
An enumeration of patterns for certain formats.
512532
"""
513-
int = pattern(_pattern_integer)
514-
"Integer expressions"
515-
flt = pattern(_pattern_float)
516-
"Floating point number expressions"
517-
num = pattern(_pattern_number)
518-
"Either an integer or a float"
519-
str = pattern(_pattern_string)
520-
"C syntax string literal"
521-
cmdstr = pattern(_pattern_cmdstr)
522-
"Windows command line escaped string literal"
523-
ps1str = pattern(_pattern_ps1str, flags=re.DOTALL)
524-
"PowerShell escaped string literal"
525-
vbastr = pattern(_pattern_vbastr)
526-
"VBS/VBA string literal"
527-
vbaint = pattern(_pattern_vbaint)
528-
"VBS/VBA integer literal"
529-
printable = alphabet(R'[\s!-~]')
530-
"Any sequence of printable characters"
531-
urlquote = pattern(_pattern_urlenc)
532-
"Any sequence of url-encoded characters, default char set"
533-
urlhex = pattern(_pattern_urlhex)
534-
"A hex-encoded buffer using URL escape sequences"
535-
htmlesc = pattern(_pattern_htmlesc)
536-
"A sequence of HTML-escape characters"
537-
intarray = tokenize(_pattern_integer, sep=R'[;,]', bound='', unique_sep=True)
538-
"Sequences of integers, separated by commas or semicolons"
539-
strarray = tokenize(_pattern_string, sep=R'[;,]', bound='', unique_sep=True)
540-
"Sequences of strings, separated by commas or semicolons"
541-
numarray = tokenize(_pattern_number, sep=R'[;,]', bound='', unique_sep=True)
542-
"Sequences of numbers, separated by commas or semicolons"
543-
hexarray = tokenize(R'[0-9A-Fa-f]{2}', sep=R'[;,]', bound='', unique_sep=True)
544-
"Arrays of hexadecimal strings, separated by commas or semicolons"
545-
word = alphabet(R'\\w')
546-
"Sequences of word characters"
547-
letters = alphabet(R'[a-zA-Z]')
548-
"Sequences of alphabetic characters"
549-
wshenc = pattern(_pattern_wshenc)
550-
"Encoded Windows Scripting Host Scripts (JS/VBS)"
551-
alnum = alphabet(R'[a-zA-Z0-9]')
552-
"Sequences of alpha-numeric characters"
553-
b32 = pattern('[A-Z2-7]+|[a-z2-7+]')
554-
"Base32 encoded strings"
555-
b58 = alphabet(R'(?:[1-9A-HJ-NP-Za-km-z]')
556-
"Base58 encoded strings"
557-
b62 = alphabet(R'(?:[0-9A-Za-z]')
558-
"Base62 encoded strings"
559-
b64 = alphabet(R'(?:[0-9a-zA-Z\+/]{4})', suffix=R'(?:(?:[0-9a-zA-Z\+/]{2,3})={0,3})?', suffix_max=6, token_size=4)
560-
"Base64 encoded strings"
561-
b85 = alphabet(R'[-!+*()#-&^-~0-9;-Z]')
562-
"Base85 encoded strings"
563-
a85 = alphabet(R'[!-u]')
564-
"Ascii85 encoded strings"
565-
z85 = alphabet(R'[-0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]')
566-
"Z85 encoded strings"
567-
b92 = pattern(_pattern_b92)
568-
"Base92 encoded strings"
569-
b64u = alphabet(R'[-\w]{4}', suffix=R'(?:[-\w]{2,3}={0,3})?', suffix_max=6)
570-
"Base64 encoded strings using URL-safe alphabet"
571-
hex = alphabet(R'[0-9a-fA-F]{2}', token_size=2)
572-
"Hexadecimal strings"
573-
b16 = alphabet(R'[0-9A-F]{2}', token_size=2)
574-
"Uppercase hexadecimal strings"
575-
b16s = tokenize(R'[0-9a-fA-F]+', R'\s*', bound='')
576-
"Hexadecimal strings"
577-
b64s = alphabet(R'[-\s\w\+/]', suffix=R'(?:={0,3})?', suffix_max=3)
578-
"Base64 encoded strings, separated by whitespace"
579-
b85s = alphabet(R'[-!+*()#-&^-~0-9;-Z\s]')
580-
"Base85 encoded string, separated by whitespace"
581-
a85s = alphabet(R'[!-u\s]')
582-
"Ascii85 encoded string, separated by whitespace"
583-
z85s = alphabet(R'[-\s0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]')
584-
"Z85 encoded string, separated by whitespace"
585-
utf8 = pattern(_pattern_utf8)
586-
"A sequence of bytes that can be decoded as UTF8."
587-
hexdump = tokenize(_pattern_hexline, bound='', sep=R'\s*\n')
588-
"""
589-
This pattern matches a typical hexdump output where hexadecimally encoded
590-
bytes are followed by a string which contains dots or printable characters
591-
from the dump. For example:
592-
593-
46 4F 4F 0A 42 41 52 0A FOO.BAR.
594-
F0 0B AA BA F0 0B ......
595-
"""
596-
uuenc = pattern(_pattern_uuencode)
597-
"UUEncoded data"
533+
integer = pattern(_pattern_integer,
534+
description="any integer literal expression")
535+
float = pattern(_pattern_float,
536+
description="floating point literals")
537+
number = pattern(_pattern_number,
538+
description="either an integer or a float")
539+
string = pattern(_pattern_string,
540+
description="c-syntax string literal")
541+
cmdstr = pattern(_pattern_cmdstr,
542+
description="Windows command line escaped string literal")
543+
ps1str = pattern(_pattern_ps1str, flags=re.DOTALL,
544+
description="PowerShell escaped string literal")
545+
vbastr = pattern(_pattern_vbastr,
546+
description="VBS/VBA string literal")
547+
vbaint = pattern(_pattern_vbaint,
548+
description="VBS/VBA integer literal")
549+
printable = alphabet(R'[\s!-~]',
550+
description="printable strings (includes whitespace)")
551+
urlquote = pattern(_pattern_urlenc,
552+
description="url-encoded characters, default char set")
553+
urlhex = pattern(_pattern_urlhex,
554+
description="hex-encoded buffer using URL escape sequences")
555+
htmlesc = pattern(_pattern_htmlesc,
556+
description="sequence of HTML-escape characters")
557+
intarray = tokenize(_pattern_integer, sep=R'[;,]', bound='', unique_sep=True,
558+
description="integers separated by commas or semicolons")
559+
strarray = tokenize(_pattern_string, sep=R'[;,]', bound='', unique_sep=True,
560+
description="strings separated by commas or semicolons")
561+
numarray = tokenize(_pattern_number, sep=R'[;,]', bound='', unique_sep=True,
562+
description="numbers separated by commas or semicolons")
563+
hexarray = tokenize(R'[0-9A-Fa-f]{2}', sep=R'[;,]', bound='', unique_sep=True,
564+
description="hex sequences separated by commas or semicolons")
565+
letters = alphabet(R'[a-zA-Z]',
566+
description="alphabetic characters")
567+
wshenc = pattern(_pattern_wshenc,
568+
description="encoded Windows Scripting Host Scripts (JS/VBS)")
569+
alnum = alphabet(R'[a-zA-Z0-9]',
570+
description="alphanumeric characters")
571+
base32 = pattern('[A-Z2-7]+|[a-z2-7+]',
572+
description="Base32 encoded strings")
573+
base58 = alphabet(R'(?:[1-9A-HJ-NP-Za-km-z]',
574+
description="Base58 encoded strings")
575+
base62 = alphabet(R'(?:[0-9A-Za-z]',
576+
description="Base62 encoded strings")
577+
base64 = alphabet(R'(?:[0-9a-zA-Z\+/]{4})', suffix=R'(?:(?:[0-9a-zA-Z\+/]{2,3})={0,3})?', suffix_max=6, token_size=4,
578+
description="Base64 encoded strings")
579+
base85 = alphabet(R'[-!+*()#-&^-~0-9;-Z]',
580+
description="Base85 encoded strings")
581+
ascii85 = alphabet(R'[!-u]',
582+
description="Ascii85 encoded strings")
583+
z85 = alphabet(R'[-0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]',
584+
description="Z85 encoded strings")
585+
base92 = pattern(_pattern_b92,
586+
description="Base92 encoded strings")
587+
base64u = alphabet(R'[-\w]{4}', suffix=R'(?:[-\w]{2,3}={0,3})?', suffix_max=6,
588+
description="Base64 encoded strings using URL-safe alphabet")
589+
hex = alphabet(R'[0-9a-fA-F]{2}', token_size=2,
590+
description="hexadecimal strings")
591+
base16 = alphabet(R'[0-9A-F]{2}', token_size=2,
592+
description="uppercase hexadecimal strings")
593+
base16s = tokenize(R'[0-9a-fA-F]+', R'\s*', bound='',
594+
description="hexadecimal strings")
595+
base64s = alphabet(R'[-\s\w\+/]', suffix=R'(?:={0,3})?', suffix_max=3,
596+
description="Base64 encoded strings, separated by whitespace")
597+
base85s = alphabet(R'[-!+*()#-&^-~0-9;-Z\s]',
598+
description="Base85 encoded string, separated by whitespace")
599+
a85s = alphabet(R'[!-u\s]',
600+
description="Ascii85 encoded string, separated by whitespace")
601+
z85s = alphabet(R'[-\s0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]',
602+
description="Z85 encoded string, separated by whitespace")
603+
utf8 = pattern(_pattern_utf8,
604+
description="sequences of bytes that can be decoded as UTF8")
605+
hexdump = tokenize(_pattern_hexline, bound='', sep=R'\s*\n',
606+
description="typical hexdump output")
607+
uuenc = pattern(_pattern_uuencode,
608+
description="UUEncoded data")
598609

599610
# shortcuts
600-
float = flt
601-
integer = int
602-
number = num
603-
string = str
611+
flt = float
612+
int = integer
613+
num = number
614+
str = string
615+
b32 = base32
616+
b58 = base58
617+
b62 = base62
618+
b64 = base64
619+
b85 = base85
620+
b92 = base92
621+
a85 = ascii85
622+
b16 = base16
623+
b64u = base64u
624+
b16s = base16s
625+
b64s = base64s
626+
b85s = base85s
604627
ps = printable
628+
hd = hexdump
629+
uq = urlquote
630+
uh = urlhex
605631

606632
@classmethod
607633
def from_dashname(cls, key: str):
@@ -646,48 +672,48 @@ class indicators(_PatternEnum):
646672
"""
647673
An enumeration of patterns for indicators.
648674
"""
649-
domain = pattern(_pattern_serrated_domain)
650-
"Domain names"
651-
email = pattern(_pattern_email)
652-
"Email addresses"
653-
guid = pattern(_pattern_guid)
654-
"Windows GUID strings"
655-
date = pattern(_pattern_date)
656-
"A date or timestamp value in a common format"
657-
ipv4 = pattern(_pattern_serrated_ipv4)
658-
"String representations of IPv4 addresses"
659-
ipv6 = pattern(_pattern_ipv6)
660-
"String representations of IPv6 addresses"
661-
md5 = alphabet('[0-9A-Fa-f]', lower=32, upper=32)
662-
"Hexadecimal strings of length 32"
663-
sha1 = alphabet('[0-9A-Fa-f]', lower=40, upper=40)
664-
"Hexadecimal strings of length 40"
665-
sha256 = alphabet('[0-9A-Fa-f]', lower=64, upper=64)
666-
"Hexadecimal strings of length 64"
667-
host = pattern(_pattern_serrated_host)
668-
"Any domain name or IPv4 address, optionally followed by a colon and a port number."
669-
socket = pattern(_pattern_serrated_socket)
670-
"Any domain name or IPv4 address followed by a colon and a (port) number"
671-
subdomain = pattern(_pattern_subdomain)
672-
"A domain which contains at least three parts, including the top level"
673-
url = pattern(_pattern_serrated_url)
674-
"Uniform resource locator addresses"
675-
pem = pattern(_pattern_pem)
676-
"A pattern matching PEM encoded cryptographic parameters"
677-
path = pattern(_pattern_any_path)
678-
"Windows and Linux path names"
679-
winpath = pattern(_pattern_win_path)
680-
"Windows path names"
681-
nixpath = pattern(_pattern_nix_path)
682-
"Posix path names"
683-
fpath = pattern(_pattern_any_path_terse)
684-
"Terser pattern for Windows and Linux path names"
685-
winfpath = pattern(_pattern_win_path_terse)
686-
"Terser pattern for Windows path names"
687-
nixfpath = pattern(_pattern_nix_path_terse)
688-
"Terser pattern for Posix path names"
689-
evar = pattern(_pattern_win_env_variable)
690-
"Windows environment variables, i.e. something like `%APPDATA%`"
675+
date = pattern(_pattern_date,
676+
description="date or timestamp value in a common format")
677+
domain = pattern(_pattern_serrated_domain,
678+
description="domain names")
679+
email = pattern(_pattern_email,
680+
description="email addresses")
681+
guid = pattern(_pattern_guid,
682+
description="Windows GUID")
683+
ipv4 = pattern(_pattern_serrated_ipv4,
684+
description="IPv4 address string")
685+
ipv6 = pattern(_pattern_ipv6,
686+
description="IPv6 address string")
687+
host = pattern(_pattern_serrated_host,
688+
description="domain or IPv4 optionally followed by colon and port")
689+
socket = pattern(_pattern_serrated_socket,
690+
description="domain or IPv4 followed by colon and port number")
691+
url = pattern(_pattern_serrated_url,
692+
description="uniform resource locator addresses")
693+
md5 = alphabet('[0-9A-Fa-f]', lower=32, upper=32,
694+
description="hex strings of length 32")
695+
sha1 = alphabet('[0-9A-Fa-f]', lower=40, upper=40,
696+
description="hex strings of length 40")
697+
sha256 = alphabet('[0-9A-Fa-f]', lower=64, upper=64,
698+
description="hex strings of length 64")
699+
subdomain = pattern(_pattern_subdomain,
700+
description="domain containing at least three parts including TLD")
701+
pem = pattern(_pattern_pem,
702+
description="PEM encoded cryptographic parameters")
703+
path = pattern(_pattern_any_path,
704+
description="Windows and Linux file paths")
705+
winpath = pattern(_pattern_win_path,
706+
description="file paths (Windows)")
707+
nixpath = pattern(_pattern_nix_path,
708+
description="file paths (Linux)")
709+
tpath = pattern(_pattern_any_path_terse,
710+
description="terser pattern for file paths")
711+
wintpath = pattern(_pattern_win_path_terse,
712+
description="terser file path pattern (Windows)")
713+
nixtpath = pattern(_pattern_nix_path_terse,
714+
description="terser file path pattern (Linux)")
715+
evar = pattern(_pattern_win_env_variable,
716+
description="Windows environment variable, i.e. %APPDATA%")
691717

692718
hostname = host
693719

refinery/units/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,12 @@ def __missing__(self, key):
385385
if isinstance(default, (list, tuple, set)):
386386
if not default:
387387
return 'empty'
388-
elif len(default) == 1:
389-
default = next(iter(default))
388+
else:
389+
sentinel = next(iter(default))
390+
if len(default) == 1:
391+
return sentinel
392+
elif isinstance(sentinel, str):
393+
return '"{}"'.format(' '.join((str(x) for x in default)))
390394
if isinstance(default, slice):
391395
parts = [default.start or '', default.stop or '', default.step]
392396
default = ':'.join(str(x) for x in parts if x is not None)

0 commit comments

Comments
 (0)