Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
/target
**/target
**/corpus
**/artifacts
*.txt
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,9 @@ regex = "1.11.1"
[[bench]]
name = "regex_benchmark"
harness = false

[profile.release]
debug = true

[lib]
name = "regex_engine"
60 changes: 60 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,63 @@ Contributions are welcome! Please follow these steps to contribute:
## License

This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.

## Using the fuzzer

### Prerequisites

1. **Install rustup** (if you don't have it):
```bash
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```

2. **Install cargo-fuzz**:
```bash
cargo install cargo-fuzz
```

### Running the fuzzer

Run the fuzzer with nightly Rust (required for libFuzzer support):

```bash
# Fuzz Thompson construction for 60 seconds
cargo +nightly fuzz run regex_thompson -- -max_total_time=60

# Fuzz Glushkov construction for 60 seconds
cargo +nightly fuzz run regex_glushkov -- -max_total_time=60

# List all available fuzz targets
cargo +nightly fuzz list

# Run indefinitely (stop with Ctrl+C)
cargo +nightly fuzz run regex_thompson
```

### Analyzing crashes

If the fuzzer finds crashes, they'll be saved in `fuzz/artifacts/`:

```bash
# View a crash file
hexdump -C fuzz/artifacts/regex_thompson/crash-<hash>

# Reproduce a specific crash
cargo +nightly fuzz run regex_thompson fuzz/artifacts/regex_thompson/crash-<hash>

# Minimize a crashing input
cargo +nightly fuzz tmin regex_thompson fuzz/artifacts/regex_thompson/crash-<hash>
```

### Useful options

```bash
# Run with multiple workers (parallel fuzzing)
cargo +nightly fuzz run regex_thompson -- -workers=4

# Run for specific number of iterations
cargo +nightly fuzz run regex_thompson -- -runs=10000

# Show final statistics
cargo +nightly fuzz run regex_thompson -- -print_final_stats=1
```
162 changes: 162 additions & 0 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[package]
name = "regex_engine-fuzz"
version = "0.0.0"
publish = false
edition = "2024"

[package.metadata]
cargo-fuzz = true

[dependencies]
arbitrary = { version = "1.4.2", features = ["derive"] }
libfuzzer-sys = "0.4"

[dependencies.regex_engine]
path = ".."

[[bin]]
name = "regex_thompson"
path = "fuzz_targets/regex_thompson.rs"
test = false
doc = false
bench = false

[[bin]]
name = "regex_glushkov"
path = "fuzz_targets/regex_glushkov.rs"
test = false
doc = false
bench = false
40 changes: 40 additions & 0 deletions fuzz/fuzz_targets/regex_glushkov.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use regex_engine::{ConstructionType, Regex};

fuzz_target!(|data: &[u8]| {
if let Ok(regex_str) = std::str::from_utf8(data) {
// Limit input size to avoid timeouts
if regex_str.len() > 50 {
return;
}

// Skip obviously invalid inputs to focus on potentially valid ones
if regex_str.is_empty()
|| regex_str.starts_with('*')
|| regex_str.starts_with('+')
|| regex_str.starts_with('?')
|| regex_str.starts_with(')')
{
return;
}

// Test Glushkov construction - should not panic for any input
let result = std::panic::catch_unwind(|| Regex::new(regex_str, ConstructionType::Glushkov));

match result {
Ok(Ok(_)) => {
// eprintln!("✅ Success");
}
Ok(Err(_)) => {
// eprintln!("❌ Expected error: {}", e);
}
Err(_) => {
eprintln!("💥 PANIC on input: {:?}", regex_str);
}
}
}
// else {
// eprintln!("❌ Invalid UTF-8");
// }
});
40 changes: 40 additions & 0 deletions fuzz/fuzz_targets/regex_thompson.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use regex_engine::{ConstructionType, Regex};

fuzz_target!(|data: &[u8]| {
if let Ok(regex_str) = std::str::from_utf8(data) {
// Limit input size to avoid timeouts
if regex_str.len() > 50 {
return;
}

// Skip obviously invalid inputs to focus on potentially valid ones
if regex_str.is_empty()
|| regex_str.starts_with('*')
|| regex_str.starts_with('+')
|| regex_str.starts_with('?')
|| regex_str.starts_with(')')
{
return;
}

// Test Thompson construction - should not panic for any input
let result = std::panic::catch_unwind(|| Regex::new(regex_str, ConstructionType::Thompson));

match result {
Ok(Ok(_)) => {
// eprintln!("✅ Success");
}
Ok(Err(_)) => {
// eprintln!("❌ Expected error: {}", e);
}
Err(_) => {
eprintln!("💥 PANIC on input: {:?}", regex_str);
}
}
}
// else {
// eprintln!("❌ Invalid UTF-8");
// }
});
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,9 @@ pub fn normalise_regex(regex: &str) -> String {
}
}
_ => {
normalised.insert(normalised.len() - 1, '(');
if normalised.len() > 0 {
normalised.insert(normalised.len() - 1, '(');
}
}
}
normalised.push_str("|)");
Expand Down