From 8ded16eba9c7056b81136a31cd029d6904fd8214 Mon Sep 17 00:00:00 2001 From: oleh Date: Tue, 17 Sep 2024 03:30:15 +0200 Subject: [PATCH 1/3] feat: initial noir support (#1) --- packages/compiler/src/bin/compiler.rs | 12 ++- packages/compiler/src/lib.rs | 11 +++ packages/compiler/src/noir.rs | 115 ++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 packages/compiler/src/noir.rs diff --git a/packages/compiler/src/bin/compiler.rs b/packages/compiler/src/bin/compiler.rs index ba53749b..9591fabb 100644 --- a/packages/compiler/src/bin/compiler.rs +++ b/packages/compiler/src/bin/compiler.rs @@ -60,12 +60,14 @@ enum Commands { Decomposed { #[arg(short, long)] decomposed_regex_path: String, - #[arg(short, long)] + #[arg(long)] halo2_dir_path: Option, #[arg(short, long)] circom_file_path: Option, #[arg(short, long)] template_name: Option, + #[arg(long)] + noir_file_path: Option, #[arg(short, long)] gen_substrs: Option, }, @@ -74,12 +76,14 @@ enum Commands { raw_regex: String, #[arg(short, long)] substrs_json_path: Option, - #[arg(short, long)] + #[arg(long)] halo2_dir_path: Option, #[arg(short, long)] circom_file_path: Option, #[arg(short, long)] template_name: Option, + #[arg(long)] + noir_file_path: Option, #[arg(short, long)] gen_substrs: Option, }, @@ -99,6 +103,7 @@ fn process_decomposed(cli: Cli) { halo2_dir_path, circom_file_path, template_name, + noir_file_path, gen_substrs, } = cli.command { @@ -107,6 +112,7 @@ fn process_decomposed(cli: Cli) { halo2_dir_path.as_deref(), circom_file_path.as_deref(), template_name.as_deref(), + noir_file_path.as_deref(), gen_substrs, ) { eprintln!("Error: {}", e); @@ -122,6 +128,7 @@ fn process_raw(cli: Cli) { halo2_dir_path, circom_file_path, template_name, + noir_file_path, gen_substrs, } = cli.command { @@ -131,6 +138,7 @@ fn process_raw(cli: Cli) { halo2_dir_path.as_deref(), circom_file_path.as_deref(), template_name.as_deref(), + noir_file_path.as_deref(), gen_substrs, ) { eprintln!("Error: {}", e); diff --git a/packages/compiler/src/lib.rs b/packages/compiler/src/lib.rs index 3a7fa04e..ca1bc042 100644 --- a/packages/compiler/src/lib.rs +++ b/packages/compiler/src/lib.rs @@ -1,6 +1,7 @@ mod circom; mod errors; mod halo2; +mod noir; mod regex; mod structs; mod wasm; @@ -9,6 +10,7 @@ use circom::gen_circom_template; use errors::CompilerError; use halo2::gen_halo2_tables; use itertools::Itertools; +use noir::gen_noir_fn; use regex::{create_regex_and_dfa_from_str_and_defs, get_regex_and_dfa}; use std::{fs::File, path::PathBuf}; use structs::{DecomposedRegexConfig, RegexAndDFA, SubstringDefinitionsJson}; @@ -55,6 +57,7 @@ fn generate_outputs( halo2_dir_path: Option<&str>, circom_file_path: Option<&str>, circom_template_name: Option<&str>, + noir_file_path: Option<&str>, num_public_parts: usize, gen_substrs: bool, ) -> Result<(), CompilerError> { @@ -86,6 +89,10 @@ fn generate_outputs( )?; } + if let Some(noir_file_path) = noir_file_path { + gen_noir_fn(regex_and_dfa, &PathBuf::from(noir_file_path))?; + } + Ok(()) } @@ -107,6 +114,7 @@ pub fn gen_from_decomposed( halo2_dir_path: Option<&str>, circom_file_path: Option<&str>, circom_template_name: Option<&str>, + noir_file_path: Option<&str>, gen_substrs: Option, ) -> Result<(), CompilerError> { let mut decomposed_regex_config: DecomposedRegexConfig = @@ -126,6 +134,7 @@ pub fn gen_from_decomposed( halo2_dir_path, circom_file_path, circom_template_name, + noir_file_path, num_public_parts, gen_substrs, )?; @@ -153,6 +162,7 @@ pub fn gen_from_raw( halo2_dir_path: Option<&str>, circom_file_path: Option<&str>, template_name: Option<&str>, + noir_file_path: Option<&str>, gen_substrs: Option, ) -> Result<(), CompilerError> { let substrs_defs_json = load_substring_definitions_json(substrs_json_path)?; @@ -167,6 +177,7 @@ pub fn gen_from_raw( halo2_dir_path, circom_file_path, template_name, + noir_file_path, num_public_parts, gen_substrs, )?; diff --git a/packages/compiler/src/noir.rs b/packages/compiler/src/noir.rs new file mode 100644 index 00000000..0abc8d1c --- /dev/null +++ b/packages/compiler/src/noir.rs @@ -0,0 +1,115 @@ +use std::{collections::HashSet, fs::File, io::Write, iter::FromIterator, path::Path}; + +use itertools::Itertools; + +use crate::structs::RegexAndDFA; + +const ACCEPT_STATE_ID: &str = "accept"; + +pub fn gen_noir_fn(regex_and_dfa: &RegexAndDFA, path: &Path) -> Result<(), std::io::Error> { + let noir_fn = to_noir_fn(regex_and_dfa); + let mut file = File::create(path)?; + file.write_all(noir_fn.as_bytes())?; + file.flush()?; + Ok(()) +} + +fn to_noir_fn(regex_and_dfa: &RegexAndDFA) -> String { + let accept_state_ids = { + let accept_states = regex_and_dfa + .dfa + .states + .iter() + .filter(|s| s.state_type == ACCEPT_STATE_ID) + .map(|s| s.state_id) + .collect_vec(); + assert!(accept_states.len() > 0, "no accept states"); + accept_states + }; + + const BYTE_SIZE: u32 = 256; // u8 size + let mut lookup_table_body = String::new(); + + // curr_state + char_code -> next_state + let mut rows: Vec<(usize, u8, usize)> = vec![]; + + for state in regex_and_dfa.dfa.states.iter() { + for (&tran_next_state_id, tran) in &state.transitions { + for &char_code in tran { + rows.push((state.state_id, char_code, tran_next_state_id)); + } + } + if state.state_type == ACCEPT_STATE_ID { + let existing_char_codes = &state + .transitions + .iter() + .flat_map(|(_, tran)| tran.iter().copied().collect_vec()) + .collect::>(); + let all_char_codes = HashSet::from_iter(0..=255); + let mut char_codes = all_char_codes.difference(existing_char_codes).collect_vec(); + char_codes.sort(); // to be deterministic + for &char_code in char_codes { + rows.push((state.state_id, char_code, state.state_id)); + } + } + } + + for (curr_state_id, char_code, next_state_id) in rows { + lookup_table_body += + &format!("table[{curr_state_id} * {BYTE_SIZE} + {char_code}] = {next_state_id};\n",); + } + + lookup_table_body = indent(&lookup_table_body); + let table_size = BYTE_SIZE as usize * regex_and_dfa.dfa.states.len(); + let lookup_table = format!( + r#" +comptime fn make_lookup_table() -> [Field; {table_size}] {{ + let mut table = [0; {table_size}]; +{lookup_table_body} + + table +}} + "# + ); + + let final_states_condition_body = accept_state_ids + .iter() + .map(|id| format!("(s == {id})")) + .collect_vec() + .join(" | "); + let fn_body = format!( + r#" +global table = comptime {{ make_lookup_table() }}; +pub fn regex_match(input: [u8; N]) {{ + // regex: {regex_pattern} + let mut s = 0; + for i in 0..input.len() {{ + s = table[s * {BYTE_SIZE} + input[i] as Field]; + }} + assert({final_states_condition_body}, f"no match: {{s}}"); +}} + "#, + regex_pattern = regex_and_dfa.regex_pattern, + ); + format!( + r#" + {fn_body} + {lookup_table} + "# + ) + .trim() + .to_owned() +} + +fn indent(s: &str) -> String { + s.split("\n") + .map(|s| { + if s.trim().is_empty() { + s.to_owned() + } else { + format!("{}{}", " ", s) + } + }) + .collect::>() + .join("\n") +} From 77e4bffcb2a6f4e6e41d5cc02c4d6929db359792 Mon Sep 17 00:00:00 2001 From: Elena Fuentes Bongenaar Date: Mon, 16 Sep 2024 18:19:36 -0600 Subject: [PATCH 2/3] =?UTF-8?q?Mark=20beginning=20of=20input=20byte=20arra?= =?UTF-8?q?y=20with=20255,=20which=20makes=20the=20check=20for=20caret=20a?= =?UTF-8?q?nchor=20(=CB=86)=20works.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note that ^ is only taken into consideration in the decomposed mode. --- packages/compiler/src/noir.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/compiler/src/noir.rs b/packages/compiler/src/noir.rs index 0abc8d1c..b42ea56c 100644 --- a/packages/compiler/src/noir.rs +++ b/packages/compiler/src/noir.rs @@ -83,6 +83,7 @@ global table = comptime {{ make_lookup_table() }}; pub fn regex_match(input: [u8; N]) {{ // regex: {regex_pattern} let mut s = 0; + s = table[s * 256 + 255 as Field]; for i in 0..input.len() {{ s = table[s * {BYTE_SIZE} + input[i] as Field]; }} From 6623488cffe85b1a15d141ddf7ad80648309b645 Mon Sep 17 00:00:00 2001 From: Elena Fuentes Bongenaar Date: Thu, 19 Sep 2024 14:26:13 -0600 Subject: [PATCH 3/3] Simplified statement. --- packages/compiler/src/noir.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/compiler/src/noir.rs b/packages/compiler/src/noir.rs index b42ea56c..5ae94213 100644 --- a/packages/compiler/src/noir.rs +++ b/packages/compiler/src/noir.rs @@ -83,7 +83,7 @@ global table = comptime {{ make_lookup_table() }}; pub fn regex_match(input: [u8; N]) {{ // regex: {regex_pattern} let mut s = 0; - s = table[s * 256 + 255 as Field]; + s = table[255]; for i in 0..input.len() {{ s = table[s * {BYTE_SIZE} + input[i] as Field]; }}