diff --git a/.gitignore b/.gitignore index eed3cc5..dffb4a4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ -/target +**/target +**/corpus +**/artifacts *.txt diff --git a/Cargo.toml b/Cargo.toml index 4cdf3fd..99f6d95 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,9 @@ regex = "1.11.1" [[bench]] name = "regex_benchmark" harness = false + +[profile.release] +debug = true + +[lib] +name = "regex_engine" diff --git a/README.md b/README.md index 2630728..9def9f1 100644 --- a/README.md +++ b/README.md @@ -74,3 +74,63 @@ Contributions are welcome! Please follow these steps to contribute: ## License This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. + +## Using the fuzzer + +### Prerequisites + +1. **Install rustup** (if you don't have it): + ```bash + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + ``` + +2. **Install cargo-fuzz**: + ```bash + cargo install cargo-fuzz + ``` + +### Running the fuzzer + +Run the fuzzer with nightly Rust (required for libFuzzer support): + +```bash +# Fuzz Thompson construction for 60 seconds +cargo +nightly fuzz run regex_thompson -- -max_total_time=60 + +# Fuzz Glushkov construction for 60 seconds +cargo +nightly fuzz run regex_glushkov -- -max_total_time=60 + +# List all available fuzz targets +cargo +nightly fuzz list + +# Run indefinitely (stop with Ctrl+C) +cargo +nightly fuzz run regex_thompson +``` + +### Analyzing crashes + +If the fuzzer finds crashes, they'll be saved in `fuzz/artifacts/`: + +```bash +# View a crash file +hexdump -C fuzz/artifacts/regex_thompson/crash- + +# Reproduce a specific crash +cargo +nightly fuzz run regex_thompson fuzz/artifacts/regex_thompson/crash- + +# Minimize a crashing input +cargo +nightly fuzz tmin regex_thompson fuzz/artifacts/regex_thompson/crash- +``` + +### Useful options + +```bash +# Run with multiple workers (parallel fuzzing) +cargo +nightly fuzz run regex_thompson -- -workers=4 + +# Run for specific number of iterations +cargo +nightly fuzz run regex_thompson -- -runs=10000 + +# Show final statistics +cargo +nightly fuzz run regex_thompson -- -print_final_stats=1 +``` diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock new file mode 100644 index 0000000..95cab61 --- /dev/null +++ b/fuzz/Cargo.lock @@ -0,0 +1,162 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "bitflags" +version = "2.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" + +[[package]] +name = "cc" +version = "1.2.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "regex_engine" +version = "0.1.0" + +[[package]] +name = "regex_engine-fuzz" +version = "0.0.0" +dependencies = [ + "arbitrary", + "libfuzzer-sys", + "regex_engine", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..2a15e9f --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "regex_engine-fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +arbitrary = { version = "1.4.2", features = ["derive"] } +libfuzzer-sys = "0.4" + +[dependencies.regex_engine] +path = ".." + +[[bin]] +name = "regex_thompson" +path = "fuzz_targets/regex_thompson.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "regex_glushkov" +path = "fuzz_targets/regex_glushkov.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/regex_glushkov.rs b/fuzz/fuzz_targets/regex_glushkov.rs new file mode 100644 index 0000000..e87f675 --- /dev/null +++ b/fuzz/fuzz_targets/regex_glushkov.rs @@ -0,0 +1,40 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use regex_engine::{ConstructionType, Regex}; + +fuzz_target!(|data: &[u8]| { + if let Ok(regex_str) = std::str::from_utf8(data) { + // Limit input size to avoid timeouts + if regex_str.len() > 50 { + return; + } + + // Skip obviously invalid inputs to focus on potentially valid ones + if regex_str.is_empty() + || regex_str.starts_with('*') + || regex_str.starts_with('+') + || regex_str.starts_with('?') + || regex_str.starts_with(')') + { + return; + } + + // Test Glushkov construction - should not panic for any input + let result = std::panic::catch_unwind(|| Regex::new(regex_str, ConstructionType::Glushkov)); + + match result { + Ok(Ok(_)) => { + // eprintln!("✅ Success"); + } + Ok(Err(_)) => { + // eprintln!("❌ Expected error: {}", e); + } + Err(_) => { + eprintln!("💥 PANIC on input: {:?}", regex_str); + } + } + } + // else { + // eprintln!("❌ Invalid UTF-8"); + // } +}); diff --git a/fuzz/fuzz_targets/regex_thompson.rs b/fuzz/fuzz_targets/regex_thompson.rs new file mode 100644 index 0000000..742975a --- /dev/null +++ b/fuzz/fuzz_targets/regex_thompson.rs @@ -0,0 +1,40 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use regex_engine::{ConstructionType, Regex}; + +fuzz_target!(|data: &[u8]| { + if let Ok(regex_str) = std::str::from_utf8(data) { + // Limit input size to avoid timeouts + if regex_str.len() > 50 { + return; + } + + // Skip obviously invalid inputs to focus on potentially valid ones + if regex_str.is_empty() + || regex_str.starts_with('*') + || regex_str.starts_with('+') + || regex_str.starts_with('?') + || regex_str.starts_with(')') + { + return; + } + + // Test Thompson construction - should not panic for any input + let result = std::panic::catch_unwind(|| Regex::new(regex_str, ConstructionType::Thompson)); + + match result { + Ok(Ok(_)) => { + // eprintln!("✅ Success"); + } + Ok(Err(_)) => { + // eprintln!("❌ Expected error: {}", e); + } + Err(_) => { + eprintln!("💥 PANIC on input: {:?}", regex_str); + } + } + } + // else { + // eprintln!("❌ Invalid UTF-8"); + // } +}); diff --git a/src/lib.rs b/src/lib.rs index 482ee9a..fb1bff1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -451,7 +451,9 @@ pub fn normalise_regex(regex: &str) -> String { } } _ => { - normalised.insert(normalised.len() - 1, '('); + if normalised.len() > 0 { + normalised.insert(normalised.len() - 1, '('); + } } } normalised.push_str("|)");