Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions regex-filtered/examples/matchindex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use clap::Parser;
use std::io::BufRead;

#[derive(Parser)]
struct Args {
regexes: String,
useragents: String,
}

fn main() {
let Args {
regexes,
useragents,
} = Args::parse();
let regexes: Vec<_> = std::io::BufReader::new(std::fs::File::open(regexes).unwrap())
.lines()
.map(|l| regex::Regex::new(&l.unwrap()).unwrap())
.collect();

let mut uas = std::io::BufReader::new(std::fs::File::open(useragents).unwrap());
let mut line = String::with_capacity(150);
while let Ok(n) = uas.read_line(&mut line) {
if n == 0 {
break;
}
let line_ = line.strip_suffix("\n").unwrap_or(&line);
let m = regexes
.iter()
.enumerate()
.find(|(_, regex)| regex.is_match(line_));
if let Some((i, _)) = m {
println!("{i}");
} else {
println!("-");
}
line.clear();
}
}
40 changes: 40 additions & 0 deletions regex-filtered/examples/matchindex2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use clap::Parser;
use std::io::BufRead;

#[derive(Parser)]
struct Args {
regexes: String,
useragents: String,
}

fn main() {
let Args {
regexes,
useragents,
} = Args::parse();
let regexes = regex_filtered::Builder::new()
.push_all(
std::io::BufReader::new(std::fs::File::open(regexes).unwrap())
.lines()
.map(Result::unwrap),
)
.unwrap()
.build()
.unwrap();

let mut uas = std::io::BufReader::new(std::fs::File::open(useragents).unwrap());
let mut line = String::with_capacity(150);
while let Ok(n) = uas.read_line(&mut line) {
if n == 0 {
break;
}
let line_ = line.strip_suffix("\n").unwrap_or(&line);
let m = regexes.matching(line_).next();
if let Some((i, _)) = m {
println!("{i}");
} else {
println!("-");
}
line.clear();
}
}
33 changes: 28 additions & 5 deletions scripts/devices
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,41 @@ r"""Compiles regexes.yaml to just the device regexps, with rewriting:
Note that this is only done for a lower bound of `0` or `1`, but
that's the case of all large bounded repetitions in regexes.yaml.
"""
import argparse
import string
import sys
import urllib.request

from yaml import SafeLoader, load

def main() -> None:
with open(sys.argv[1]) as f:
regexes = load(f, Loader=SafeLoader)
for dev in regexes['device_parsers']:
parser = argparse.ArgumentParser()
parser.add_argument(
'regexes_yaml',
help="path or url to a regexes.yaml file, `-` for stdin",
)
parser.add_argument(
'--section',
default='device',
choices=['user_agent', 'os', 'device'],
help="regex section to convert, defaults to `device`"
)
args = parser.parse_args()

regexes = args.regexes_yaml
if regexes == '-':
regexes = load(sys.stdin, Loader=SafeLoader)
elif regexes.startswith('http'):
with urllib.request.urlopen(regexes) as f:
regexes = load(f, Loader=SafeLoader)
else:
with open(regexes) as f:
regexes = load(f, Loader=SafeLoader)

for item in regexes[f'{args.section}_parsers']:
print(
f'(?{f})' if (f := dev.get('regex_flag')) else '',
rewrite(dev['regex']),
f'(?{f})' if (f := item.get('regex_flag')) else '',
rewrite(item['regex']),
sep='',
)

Expand Down
32 changes: 32 additions & 0 deletions scripts/matchindex
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python

import argparse
import re

parser = argparse.ArgumentParser()
parser.add_argument(
'regexes',
help="regexes to try on the user agents",
)
parser.add_argument(
'useragents',
type=argparse.FileType(),
help="user agents to parse, `-` for stdin",
)
args = parser.parse_args()

with open(args.regexes) as r:
regexes = [
re.compile(pattern.rstrip('\n'))
for pattern in r
]

with args.useragents as r:
for u in r:
u = u.rstrip('\n')
for i, p in enumerate(regexes):
if p.search(u):
print(i)
break
else:
print('-')
Loading