From 11e624b2212c170c440fc31e953d2b3a5e5b7483 Mon Sep 17 00:00:00 2001 From: Noah Cape Date: Tue, 29 Jul 2025 08:02:50 -0700 Subject: [PATCH 1/2] update chumsky and error handling --- Cargo.lock | 92 ++++---- Cargo.toml | 4 +- src/error.rs | 163 ++++++------- src/execute.rs | 74 +++--- src/geometry/compile/functions.rs | 4 +- src/geometry/compile/reads.rs | 8 +- src/geometry/compile/transformation.rs | 6 +- src/geometry/compile/utils.rs | 6 +- src/geometry/interpret.rs | 18 +- src/geometry/lexer.rs | 23 +- src/geometry/mod.rs | 33 ++- src/geometry/parser.rs | 312 +++++++++++++------------ src/processors/mod.rs | 2 +- tests/common/mod.rs | 1 + tests/common/utils.rs | 12 + tests/compile_tests.rs | 253 ++++---------------- tests/lexer_tests.rs | 37 +-- tests/parser_tests.rs | 274 ++++++++-------------- 18 files changed, 570 insertions(+), 752 deletions(-) create mode 100644 tests/common/mod.rs create mode 100644 tests/common/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 9cc7726..7f4a3c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "adler2" @@ -8,18 +8,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -114,9 +102,9 @@ checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" [[package]] name = "ariadne" -version = "0.3.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd" +checksum = "36f5e3dca4e09a6f340a61a0e9c7b61e030c69fc27bf29d73218f7e5e3b7638f" dependencies = [ "unicode-width", "yansi", @@ -226,12 +214,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "0.9.3" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +checksum = "14377e276b2c8300513dff55ba4cc4142b44e5d6de6d00eb5b2307d650bb4ec1" dependencies = [ "hashbrown", + "regex-automata 0.3.9", + "serde", "stacker", + "unicode-ident", + "unicode-segmentation", ] [[package]] @@ -359,6 +351,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.10" @@ -386,6 +384,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -406,12 +410,13 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" dependencies = [ - "ahash", "allocator-api2", + "equivalent", + "foldhash", ] [[package]] @@ -713,6 +718,17 @@ dependencies = [ "regex-syntax 0.6.29", ] +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] + [[package]] name = "regex-automata" version = "0.4.9" @@ -730,6 +746,12 @@ version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -1044,12 +1066,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "wait-timeout" version = "0.2.1" @@ -1183,26 +1199,6 @@ dependencies = [ [[package]] name = "yansi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" diff --git a/Cargo.toml b/Cargo.toml index f3d88bc..3ee9c70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,8 +18,8 @@ path = "src/bin/bin.rs" antisequence = { git = "https://github.com/noahcape/ANTISEQUENCE.git", branch = 'dev' } tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } tracing = "0.1.37" -ariadne = "0.3.0" -chumsky = "0.9.3" +ariadne = "0.5.0" +chumsky = "0.10.1" clap = { version = "4.4.8", features = ["derive"] } anyhow = "1.0" tempfile = "3.5.0" diff --git a/src/error.rs b/src/error.rs index 9040613..b566772 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,130 +1,101 @@ -use std::ops::Range; - -use ariadne::{Color, Fmt, Label, Report, ReportKind, Source}; -use chumsky::{error::SimpleReason, prelude::*}; +use ariadne::{Color, Label, Report, ReportKind, Source}; +use chumsky::{error::RichReason, prelude::*}; use crate::lexer::Token; -pub fn handle_errors(errs: Vec>, source: String) { +pub fn handle_errors(errs: Vec>, source: String) { // error recovery errs.into_iter().for_each(|e| { - let report = Report::build(ReportKind::Error, (), e.span().start); - - let report = match e.reason() { - chumsky::error::SimpleReason::Custom(msg) => report - .with_message("Parsing and Compiling EFGDL") - .with_label( - Label::new(e.span()) - .with_message(format!("{}", msg.fg(Color::Red))) - .with_color(Color::Red), - ), - chumsky::error::SimpleReason::Unclosed { span, delimiter } => report - .with_message(format!( - "Unclosed delimiter {}", - delimiter.fg(Color::Yellow) - )) - .with_label( - Label::new(span.clone()) - .with_message(format!( - "Unclosed delimiter {}", - delimiter.fg(Color::Yellow) - )) - .with_color(Color::Yellow), - ) - .with_label( - Label::new(e.span()) - .with_message(format!( - "Must be closed before this {}", - e.found() - .unwrap_or(&"end of file".to_string()) - .fg(Color::Red) - )) - .with_color(Color::Red), - ), - chumsky::error::SimpleReason::Unexpected => { - report.with_message(format!( - "{}, expected {}", - if e.found().is_some() { - "Unexpected token in input" - } else { - "Unexpected end of input" - }, - if e.expected().len() == 0 { - "something else".to_string() - } else { - e.expected() - .map(|expected| match expected { - Some(expected) => expected.to_string(), - None => "end of input".to_string(), - }) - .collect::>() - .join(", ") - } - )) - } + Report::build(ReportKind::Error, ((), e.span().into_range())) + .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte)) + .with_message(e.to_string()) .with_label( - Label::new(e.span()) - .with_message(format!( - "Unexpected token {}", - e.found() - .unwrap_or(&"end of file".to_string()) - .fg(Color::Red) - )) + Label::new(((), e.span().into_range())) + .with_message(e.reason().to_string()) .with_color(Color::Red), - ), - }; - - report.finish().print(Source::from(source.clone())).unwrap(); + ) + .with_labels(e.contexts().map(|(label, span)| { + Label::new(((), span.into_range())) + .with_message(format!("while parsing this {label}")) + .with_color(Color::Yellow) + })) + .finish() + .print(Source::from(&source)) + .unwrap(); }); } -pub fn missing_delimiter(token: Token, span: Range, obj: Option<&str>) -> Simple { +pub fn missing_delimiter<'a>(token: Token, span: SimpleSpan, obj: Option<&str>) -> Rich<'a, Token> { let msg = |d1, d2| match obj { Some(obj) => format!("Missing delimitter for {obj} - delimit with '{d1} .. {d2}'."), None => format!("Missing delimtter - delimit with '{d1} .. {d2}'."), }; match token { - Token::RParen | Token::LParen => Simple::custom(span, msg('(', ')')), - Token::RBrace | Token::LBrace => Simple::custom(span, msg('{', '}')), - Token::RBracket | Token::LBracket => Simple::custom(span, msg('[', ']')), - Token::RAngle | Token::LAngle => Simple::custom(span, msg('<', '>')), - _ => Simple::custom(span, "Missing delimitter"), + Token::RParen | Token::LParen => Rich::custom(span, msg('(', ')')), + Token::RBrace | Token::LBrace => Rich::custom(span, msg('{', '}')), + Token::RBracket | Token::LBracket => Rich::custom(span, msg('[', ']')), + Token::RAngle | Token::LAngle => Rich::custom(span, msg('<', '>')), + _ => Rich::custom(span, "Missing delimitter"), } } -pub fn comma(span: Range) -> Simple { - Simple::custom(span, "Expected a ',' to separate arguments.") +pub fn comma<'a>(span: SimpleSpan) -> Rich<'a, Token> { + Rich::custom(span, "Expected a ',' to separate arguments.") } -pub fn throw(prev_err: Simple, next_err: Simple) -> Simple { - let expected = prev_err - .expected() - .map(|expected| match expected { - Some(expected) => expected.to_string(), - None => "end of input".to_string(), - }) - .collect::>(); - - if expected.len() == 1 { - let expected = expected.first().unwrap(); +pub fn throw<'a>(prev_err: Rich<'a, Token>, next_err: Rich<'a, Token>) -> Rich<'a, Token> { + if prev_err.expected().len() > 0 { + let expected = prev_err.clone(); let range = prev_err.span(); let start = range.start; - let msg = match expected.as_str() { - ":" => { - Some("Unfinished interval - add a ':' or specify interval with different length.") - } - _ => None, + let msg = match prev_err.clone().into_reason() { + chumsky::error::RichReason::Custom(msg) => match msg.as_str() { + ":" => Some(String::from( + "Unfinished interval - add a ':' or specify interval with different length.", + )), + _ => None, + }, + chumsky::error::RichReason::ExpectedFound { + expected, + found: o_found, + } => match o_found { + Some(found) => match found { + chumsky::util::Maybe::Ref(r_t) => Some(format!( + "Expected {} but found: {}.", + expected + .iter() + .map(|exp| format!("{exp}")) + .collect::(), + r_t + )), + chumsky::util::Maybe::Val(t) => Some(format!( + "Expected {} but found: {}.", + expected + .iter() + .map(|exp| format!("{exp}")) + .collect::(), + t + )), + }, + None => Some(format!( + "Expected {} but found nothing.", + expected + .iter() + .map(|exp| format!("{exp}")) + .collect::() + )), + }, }; if let Some(msg) = msg { - return Simple::custom(start - 1..start - 1 + expected.len(), msg); + return Rich::custom((start..start + expected.span().end).into(), msg); } } match prev_err.reason() { - SimpleReason::Custom(_) => prev_err, + RichReason::Custom(_) => prev_err, _ => next_err, } } diff --git a/src/execute.rs b/src/execute.rs index 91c5456..fe85b1d 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -1,10 +1,14 @@ use std::{ - fs::File, io::BufWriter, panic, path::{Path, PathBuf}, thread + fs::File, + io::BufWriter, + panic, + path::{Path, PathBuf}, + thread, }; use antisequence::graph::*; use anyhow::{bail, Result}; -use chumsky::{prelude::Simple, Parser, Stream}; +use chumsky::{error::Rich, Parser}; use nix::sys::stat; use nix::unistd; use tempfile::tempdir; @@ -121,36 +125,54 @@ fn interpret_to_pipes( } } -pub fn compile_geom(geom: String) -> Result>> { - let (tokens, mut errs) = lexer::lexer().parse_recovery(geom); - - let parse_errs = if let Some(tokens) = tokens { - match parser().parse(Stream::from_iter( - tokens.len()..tokens.len() + 1, - tokens.into_iter(), - )) { - Err(errs) => errs, - Ok(description) => { - let res = compile(description.clone()); - - if let Err(e) = res { - errs.push(Simple::custom(e.span, e.msg)); - } else { - return Ok(res.ok().unwrap()); - }; - - vec![] +pub fn compile_geom(geom: String) -> Result>> { + let parse_res = lexer::lexer().parse(&geom); + + let lex_errors = match parse_res.clone().into_result() { + Ok(_) => vec![], + Err(errs) => errs, + }; + + let parse_errors = match parse_res.into_result() { + Ok(tokens) => { + let input_tokens = tokens.iter().map(|(t, _)| t.clone()).collect::>(); + let parser_instance = parser(); + match parser_instance.parse(input_tokens.as_slice()).into_result() { + Err(errs) => errs + .into_iter() + .map(|e| Rich::::custom(*e.span(), e.reason().to_string())) + .collect::>(), + Ok(description) => { + let res = compile(description.clone()); + + if let Err(e) = res { + vec![Rich::::custom(e.span, e.msg)] + } else { + return Ok(res.ok().unwrap()); + }; + + vec![] + } } } - } else { - Vec::new() + Err(err) => err + .iter() + .map(|rich_err| { + Rich::::custom(*rich_err.span(), format!("{}", rich_err.reason())) + }) + .collect::>(), }; - let errors = errs + let errors = lex_errors .into_iter() - .map(|e| e.map(|c| c.to_string())) - .chain(parse_errs.into_iter().map(|e| e.map(|tok| tok.to_string()))) + .map(|e| Rich::custom(*e.span(), e.reason().to_string())) + .chain( + parse_errors + .into_iter() + .map(|e| Rich::custom(*e.span(), e.reason().to_string())), + ) .collect::>(); + Err(errors) } diff --git a/src/geometry/compile/functions.rs b/src/geometry/compile/functions.rs index 78541a3..dec8ce6 100644 --- a/src/geometry/compile/functions.rs +++ b/src/geometry/compile/functions.rs @@ -130,8 +130,8 @@ fn compile_inner_expr( match expr { Expr::Function(fn_, fn_expr) => { expr = fn_expr.unboxed().0; - span = fn_.1.clone(); - let compiled_fn = compile_fn(fn_.clone(), S(expr.clone(), span.clone())); + span = fn_.1; + let compiled_fn = compile_fn(fn_.clone(), S(expr.clone(), span)); if compiled_fn.is_ok() { inner_stack.push(compiled_fn.ok().unwrap()); } else { diff --git a/src/geometry/compile/reads.rs b/src/geometry/compile/reads.rs index 86c2d6d..8d73c46 100644 --- a/src/geometry/compile/reads.rs +++ b/src/geometry/compile/reads.rs @@ -37,7 +37,7 @@ pub fn validate_geometry( if !expect_next.contains(&type_) { return Err(Error { - span: span.clone(), + span: *span, msg: format!("Ambiguous Geometry: expected {expect_next:?}, found: {type_}"), }); } @@ -140,7 +140,7 @@ pub fn compile_reads( Expr::Label(S(ref l, ref span)) => { if labels.contains(l) { err = Some(Error { - span: span.clone(), + span: *span, msg: format!( "`{l}` has already been used. Cannot use same variable more than once." ), @@ -160,7 +160,7 @@ pub fn compile_reads( inner_expr.expr.0.type_, inner_expr.expr.0.size.clone(), ), - inner_expr.expr.1.clone(), + inner_expr.expr.1, ), )?); } @@ -174,7 +174,7 @@ pub fn compile_reads( break 'inner; } else { err = Some(Error { - span: span.clone(), + span: *span, msg: format!("No variable declared with label: {l}"), }); diff --git a/src/geometry/compile/transformation.rs b/src/geometry/compile/transformation.rs index 54bd405..fefdec5 100644 --- a/src/geometry/compile/transformation.rs +++ b/src/geometry/compile/transformation.rs @@ -50,11 +50,11 @@ pub fn compile_transformation( } Expr::LabeledGeomPiece(_, _) | Expr::GeomPiece(_, _) => return Err(Error { span: expr.1, - msg: format!("{} - Cannot construct intervals in a transformation", generic_transformation_msg) + msg: format!("{generic_transformation_msg} - Cannot construct intervals in a transformation") }), Expr::Self_ => return Err(Error { span: expr.1, - msg: format!("{} - Misplaced reference of 'self', this is a reserved token for the 'map' function.", generic_transformation_msg), + msg: format!("{generic_transformation_msg} - Misplaced reference of 'self', this is a reserved token for the 'map' function."), }) } } @@ -88,7 +88,7 @@ pub fn compile_transformation( for fn_ in &gp.stack { if let S(CompiledFunction::Remove, span) = fn_ { return Err(Error { - span: span.clone(), + span: *span, msg: "Cannot reference a void interval after '->' - if you want to keep this interval then remove the 'remove' transformation.".to_string() }); } diff --git a/src/geometry/compile/utils.rs b/src/geometry/compile/utils.rs index f639ab0..c0b1009 100644 --- a/src/geometry/compile/utils.rs +++ b/src/geometry/compile/utils.rs @@ -113,10 +113,10 @@ impl GeometryMeta { } }; - let mut return_type = S(expr_type, expr_span.clone()); + let mut return_type = S(expr_type, *expr_span); for S(fn_, span) in self.stack.iter().rev() { - return_type = validate_composition(S(fn_, span.clone()), return_type, &expr.size)?; + return_type = validate_composition(S(fn_, *span), return_type, &expr.size)?; } Ok(()) @@ -304,7 +304,7 @@ impl GeometryMeta { impl IntervalShape { pub fn update_size_to(&self, n: usize) -> Self { - IntervalShape::FixedLen(S(n, 0..1)) + IntervalShape::FixedLen(S(n, (0..1).into())) } pub fn update_size_add(self, n: usize) -> Self { diff --git a/src/geometry/interpret.rs b/src/geometry/interpret.rs index fb4d682..c2c6ff8 100644 --- a/src/geometry/interpret.rs +++ b/src/geometry/interpret.rs @@ -1,4 +1,4 @@ -use std::{path::PathBuf, str::FromStr, usize}; +use std::{path::PathBuf, str::FromStr}; use antisequence::{ graph::MatchType::{ @@ -6,7 +6,6 @@ use antisequence::{ }, *, }; -use chumsky::chain::Chain; use expr::Expr; use graph::{ Graph, @@ -143,10 +142,7 @@ fn parse_additional_args(arg: String, args: &[&str]) -> PathBuf { let len = args.len(); match arg.parse::() { Ok(n) => PathBuf::from_str(args.get(n).unwrap_or_else(|| { - panic!( - "Expected {n} additional arguments with `--additional` tag. Found only {}.", - len - ) + panic!("Expected {n} additional arguments with `--additional` tag. Found only {len}.",) })) .unwrap_or_else(|_| { panic!("Expected path as argument -- could not parse argument {n} as path.") @@ -171,7 +167,7 @@ fn execute_stack( }; let interval_length = match size { - IntervalShape::FixedSeq(v) => v.len(), + IntervalShape::FixedSeq(S(v, _)) => v.len(), IntervalShape::FixedLen(S(n, _)) => *n, IntervalShape::RangedLen(S((_, b), _)) => *b, IntervalShape::UnboundedLen => 0, @@ -195,7 +191,7 @@ fn execute_stack( execute_stack(fns, label, size, additional_args, &mut fallback_graph); graph.add(SelectOp::new( - Expr::from(expr::attr(&format!("{label}.{MAPPED}"))).not(), + Expr::from(expr::attr(format!("{label}.{MAPPED}"))).not(), fallback_graph, )); } @@ -214,7 +210,7 @@ fn execute_stack( execute_stack(fns, label, size, additional_args, &mut fallback_graph); graph.add(SelectOp::new( - Expr::from(expr::attr(&format!("{label}.{MAPPED}"))).not(), + Expr::from(expr::attr(format!("{label}.{MAPPED}"))).not(), fallback_graph, )); } @@ -278,7 +274,7 @@ impl<'a> GeometryMeta { }; if type_ == IntervalKind::Discard { - stack.push(S(CompiledFunction::Remove, 0..1)); + stack.push(S(CompiledFunction::Remove, (0..1).into())); } // this is only called from `interpret_dual` which is for variable to fixedSeq @@ -313,7 +309,7 @@ impl<'a> GeometryMeta { let next_label = format!("{cur_label}{NEXT_RIGHT}"); if type_ == IntervalKind::Discard { - stack.push(S(CompiledFunction::Remove, 0..1)); + stack.push(S(CompiledFunction::Remove, (0..1).into())); } // execute the requisite process here diff --git a/src/geometry/lexer.rs b/src/geometry/lexer.rs index 1ce41be..2ad1ff7 100644 --- a/src/geometry/lexer.rs +++ b/src/geometry/lexer.rs @@ -160,7 +160,7 @@ impl fmt::Display for Token { } /// Returns a lexer for EFGDL. -pub fn lexer() -> impl Parser, Error = Simple> { +pub fn lexer<'a>() -> impl Parser<'a, &'a str, Vec<(Token, Span)>, extra::Err>> { let int = text::int(10).from_str().unwrapped().map(Token::Num); let ctrl = choice(( @@ -183,7 +183,14 @@ pub fn lexer() -> impl Parser, Error = Simple> { let file = just('"') .ignored() - .then(take_until(just('"').ignored())) + .then( + any() + .and_is(just('"').not()) + .repeated() + .collect::>() + .then(just('"')), + ) + // .then(take_until(just('"').ignored())) .padded() .map(|((), (f, _))| Token::File(f.into_iter().collect::())); @@ -201,7 +208,7 @@ pub fn lexer() -> impl Parser, Error = Simple> { just('U').to(Token::U), )); - let ident = text::ident().map(|s: String| match s.as_str() { + let ident = text::ident().map(|s: &str| match s { "rev" => Token::Reverse, "revcomp" => Token::ReverseComp, "remove" => Token::Remove, @@ -227,13 +234,14 @@ pub fn lexer() -> impl Parser, Error = Simple> { "f" => Token::FixedSeq, _ => { if s.starts_with('_') { - Token::Reserved(s) + Token::Reserved(s.to_owned()) } else { - Token::Label(s) + Token::Label(s.to_owned()) } } }); + // TODO: remove recovery let token = nucs .or(argument) .or(ident) @@ -241,11 +249,10 @@ pub fn lexer() -> impl Parser, Error = Simple> { .or(int) .or(ctrl) .or(special) - .or(file) - .recover_with(skip_then_retry_until([])); + .or(file); token - .map_with_span(|tok, span| (tok, span)) + .map_with(|tok, state| (tok, state.span())) .padded() .repeated() .collect() diff --git a/src/geometry/mod.rs b/src/geometry/mod.rs index f8c8d8f..537ddf2 100644 --- a/src/geometry/mod.rs +++ b/src/geometry/mod.rs @@ -3,12 +3,16 @@ pub mod interpret; pub mod lexer; pub mod parser; +use std::hash::{Hash, Hasher}; + use std::{ fmt::{self, Write}, ops::Range, slice, }; +use chumsky::span::SimpleSpan; + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[repr(u8)] // Necessary for by-ref conversion to `str` pub enum Nucleotide { @@ -54,12 +58,37 @@ impl Nucleotide { } /// A range of characters in the input file. -pub type Span = Range; +pub type Span = SimpleSpan; /// Associates a `T` with a corresponding span in the source file. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, Eq)] pub struct S(pub T, pub Span); +impl S { + pub fn new(t: T, s: Range) -> Self { + S(t, SimpleSpan::from(s)) + } +} + +impl PartialEq for S +where + T: PartialEq + Eq, +{ + // TODO: This is a patch fix for testing with the new version of chumsky + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Hash for S +where + T: PartialEq + Eq + Hash, +{ + fn hash(&self, hasher: &mut H) { + self.0.hash(hasher); + } +} + impl S { pub fn boxed(self) -> S> { S(Box::new(self.0), self.1) diff --git a/src/geometry/parser.rs b/src/geometry/parser.rs index 575b8ec..c488448 100644 --- a/src/geometry/parser.rs +++ b/src/geometry/parser.rs @@ -199,7 +199,8 @@ pub struct Description { pub transforms: Option>>>, } -pub fn parser() -> impl Parser> + Clone { +pub fn parser<'src>( +) -> impl Parser<'src, &'src [Token], Description, extra::Err>> + Clone { /* Start with creating combinators and a recursive definition of a geom_piece @@ -234,41 +235,43 @@ pub fn parser() -> impl Parser> + Clon }; let inline_label = label - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom( + Rich::custom( span, "Found delimiters '<' and '>' which must delimit a label.", ), ) }) .delimited_by(just(Token::LAngle), just(Token::RAngle)) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw(t, missing_delimiter(Token::RAngle, span, Some("label"))) }) - .map_with_span(|l, span| Expr::Label(S(l, span))) + .map_with(|l, extra| Expr::Label(S(l, extra.span()))) .labelled("label"); - let label = label.map_with_span(S).labelled("label"); + let label = label + .map_with(|s, state| S(s, state.span())) + .labelled("label"); let self_ = just(Token::Self_).to(Expr::Self_).labelled("self"); let range = num .then_ignore(just(Token::Dash)) .then(num) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom( + Rich::custom( span, "Expected a numerical literal after '-' for a ranged length interval.", ), ) }) - .map_with_span(|(a, b), span| IntervalShape::RangedLen(S((a, b), span))) + .map_with(|(a, b), state| IntervalShape::RangedLen(S((a, b), state.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, missing_delimiter(Token::RBracket, span, Some("variable length interval")), @@ -276,18 +279,18 @@ pub fn parser() -> impl Parser> + Clon }); let fixed_len = num - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom( + Rich::custom( span, "Expecting a length specifier '[-]', or '[]'.", ), ) }) - .map_with_span(|n, span| IntervalShape::FixedLen(S(n, span))) + .map_with(|n, state| IntervalShape::FixedLen(S(n, state.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, missing_delimiter(Token::LBracket, span, Some("fixed length interval")), @@ -298,18 +301,18 @@ pub fn parser() -> impl Parser> + Clon let seq = nuc .repeated() .at_least(1) - .map_err_with_span(|t, span| { + .collect::>() + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "A fragment must contain at least one ATGCU character"), + Rich::custom(span, "A fragment must contain at least one ATGCU character"), ) - }) - .collect::>(); + }); let nucstr = seq - .map_with_span(|nucstr, span| IntervalShape::FixedSeq(S(nucstr, span))) + .map_with(|nucstr, state| IntervalShape::FixedSeq(S(nucstr, state.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, missing_delimiter(Token::LBracket, span, Some("fragment specifier")), @@ -318,18 +321,18 @@ pub fn parser() -> impl Parser> + Clon .labelled("nucstr"); let unbounded = piece_type - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), + Rich::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), ) }) .then(inline_label.clone().or_not()) .then_ignore(just(Token::Colon)) - .map_with_span(|(type_, label), span| { + .map_with(|(type_, label), state| { let expr = Expr::GeomPiece(type_, IntervalShape::UnboundedLen); if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) + Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) } else { expr } @@ -337,27 +340,27 @@ pub fn parser() -> impl Parser> + Clon .labelled("unbound_seg"); let ranged = piece_type - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), + Rich::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), ) }) .then(inline_label.clone().or_not()) .then(range) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom( + Rich::custom( span, "Expecting a length specifier either ':', '[-]', or '[]'.", ), ) }) - .map_with_span(|((type_, label), range), span| { + .map_with(|((type_, label), range), state| { let expr = Expr::GeomPiece(type_, range); if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) + Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) } else { expr } @@ -365,27 +368,27 @@ pub fn parser() -> impl Parser> + Clon .labelled("ranged_len_seg"); let fixed = piece_type - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), + Rich::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), ) }) .then(inline_label.clone().or_not()) .then(fixed_len) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom( + Rich::custom( span, "Expecting a length specifier either ':', '[-]', or '[]'.", ), ) }) - .map_with_span(|((type_, label), len), span| { + .map_with(|((type_, label), len), state| { let expr = Expr::GeomPiece(type_, len); if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) + Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) } else { expr } @@ -396,16 +399,16 @@ pub fn parser() -> impl Parser> + Clon .to(IntervalKind::FixedSeq) .then(inline_label.clone().or_not()) .then(nucstr) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Expecting a sequence to match delimited by '[ .. ]'."), + Rich::custom(span, "Expecting a sequence to match delimited by '[ .. ]'."), ) }) - .map_with_span(|((type_, label), nucs), span| { + .map_with(|((type_, label), nucs), state| { let expr = Expr::GeomPiece(type_, nucs); if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) + Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) } else { expr } @@ -424,69 +427,69 @@ pub fn parser() -> impl Parser> + Clon let transformed_pieces = recursive(|transformed_pieces| { let transformed_pieces = transformed_pieces - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Invalid declaration of interval"))); + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Invalid declaration of interval"))); let recursive_num_arg = transformed_pieces .clone() .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a ',' to separate arguments."))) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a ',' to separate arguments."))) .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as a second argument."))) - .map_with_span(S) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as a second argument."))) + .map_with(|s, state| S(s, state.span())) .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, None))); + .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, None))); let recursive_num_nuc_args = transformed_pieces .clone() .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as a second argument."))) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as a second argument."))) .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(nuc) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected an ATGCU literal as a third argument."))) - .map_with_span(S) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected an ATGCU literal as a third argument."))) + .map_with(|s, state| S(s, state.span())) .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, None))); + .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, None))); let recursive_no_arg = transformed_pieces .clone() - .map_with_span(S) + .map_with(|s, state| S(s, state.span())) .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, None))); + .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, None))); choice(( geom_piece.clone() - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Unexpected error when creating an interval."))), + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Unexpected error when creating an interval."))), just(Token::Remove) - .map_with_span(|_, span| S(Function::Remove, span)) + .map_with(|_, state| S(Function::Remove, state.span())) .then(recursive_no_arg.clone()) .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) .labelled("remove"), just(Token::Normalize) - .map_with_span(|_, span| S(Function::Normalize, span)) + .map_with(|_, state| S(Function::Normalize, state.span())) .then(recursive_no_arg.clone()) .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) .labelled("norm"), just(Token::Hamming) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then( geom_piece .clone() - .map_err_with_span(|t, span| { - throw(t, Simple::custom(span, "Expected a fragment specified interval as the first argument - 'hamming' cannot take a transformed interval.")) + .map_err_with_state(|t, span, _| { + throw(t, Rich::custom(span, "Expected a fragment specified interval as the first argument - 'hamming' cannot take a transformed interval.")) }) .then_ignore(just(Token::Comma)) - .then(num).map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numeric literal as a second argument."))) - .map_with_span(S) + .then(num).map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numeric literal as a second argument."))) .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'hamming'")))), + .map_with(|s, state| S(s, state.span())) + .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'hamming'")))), ) - .map_err_with_span(|t, span| { - throw(t, Simple::custom(span, "Missing argument for hamming - ")) + .map_err_with_state(|t, span, _| { + throw(t, Rich::custom(span, "Missing argument for hamming - ")) }) - .map(|(fn_span, S((geom_p, num), span))| { + .map_with(|(fn_span, S((geom_p, num), span)), _| { Expr::Function( S(Function::Hamming(num), fn_span), S(Box::new(geom_p), span), @@ -494,9 +497,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("hamming"), just(Token::Truncate) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { + .map_with(|(fn_span, S((geom_p, num), span)), _| { Expr::Function( S(Function::Truncate(num), fn_span), S(Box::new(geom_p), span), @@ -504,9 +507,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("trunc"), just(Token::TruncateLeft) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { + .map_with(|(fn_span, S((geom_p, num), span)), _| { Expr::Function( S(Function::TruncateLeft(num), fn_span), S(Box::new(geom_p), span), @@ -514,9 +517,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("trunc_left"), just(Token::TruncateTo) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { + .map_with(|(fn_span, S((geom_p, num), span)), _| { Expr::Function( S(Function::TruncateTo(num), fn_span), S(Box::new(geom_p), span), @@ -524,9 +527,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("trunc_to"), just(Token::TruncateToLeft) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { + .map_with(|(fn_span, S((geom_p, num), span)), _| { Expr::Function( S(Function::TruncateToLeft(num), fn_span), S(Box::new(geom_p), span), @@ -534,9 +537,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("trunc_to_left"), just(Token::Pad) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_nuc_args.clone()) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { + .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { Expr::Function( S(Function::Pad(num, nuc), fn_span), S(Box::new(geom_p), span), @@ -544,9 +547,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("pad"), just(Token::PadLeft) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_nuc_args.clone()) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { + .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { Expr::Function( S(Function::PadLeft(num, nuc), fn_span), S(Box::new(geom_p), span), @@ -554,9 +557,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("pad_left"), just(Token::PadTo) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_nuc_args.clone()) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { + .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { Expr::Function( S(Function::PadTo(num, nuc), fn_span), S(Box::new(geom_p), span), @@ -564,9 +567,9 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("pad_to"), just(Token::PadToLeft) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then(recursive_num_nuc_args) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { + .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { Expr::Function( S(Function::PadToLeft(num, nuc), fn_span), S(Box::new(geom_p), span), @@ -574,32 +577,32 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("pad_to_left"), just(Token::Reverse) - .map_with_span(|_, span| S(Function::Reverse, span)) + .map_with(|_, state| S(Function::Reverse, state.span())) .then(recursive_no_arg.clone()) .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) .labelled("rev"), just(Token::ReverseComp) - .map_with_span(|_, span| S(Function::ReverseComp, span)) + .map_with(|_, state| S(Function::ReverseComp, state.span())) .then(recursive_no_arg.clone()) .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) .labelled("revcomp"), just(Token::Map) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then( transformed_pieces .clone() .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(transformed_pieces.clone().map_with_span(S)) - .map_with_span(S) + .map_err_with_state(|t, span, _| throw(t, comma(span))) + .then(transformed_pieces.clone().map_with(|s, state| S(s, state.span()))) + .map_with(|s, state| S(s, state.span())) .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'map'")))), + .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'map'")))), ) - .map(|(fn_span, S(((geom_p, path), self_expr), span))| { + .map_with(|(fn_span, S(((geom_p, path), self_expr), span)), _| { Expr::Function( S(Function::Map(path, self_expr.boxed()), fn_span), S(Box::new(geom_p), span), @@ -607,26 +610,26 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("map"), just(Token::MapWithMismatch) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then( transformed_pieces .clone() .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(transformed_pieces.clone().map_with_span(S)) + .map_err_with_state(|t, span, _| throw(t, comma(span))) + .then(transformed_pieces.clone().map_with(|s, state| S(s, state.span()))) .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as the allowable mismatch when mapping interval."))) - .map_with_span(S) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as the allowable mismatch when mapping interval."))) + .map_with(|s, state| S(s, state.span())) .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'map_with_mismatch'")))), + .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'map_with_mismatch'")))), ) - .map(|(fn_span, S((((geom_p, path), self_expr), num), span))| { + .map_with(|(fn_span, S((((geom_p, path), self_expr), num), span)), _| { Expr::Function( S( Function::MapWithMismatch(path, self_expr.boxed(), num), @@ -637,22 +640,22 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("map_dist"), just(Token::FilterWithinDist) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then( geom_piece .clone() .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as the allowable mismatch when filtering interval."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("filter_with_mismatch")))), + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as the allowable mismatch when filtering interval."))) + .map_with(|s, state| S(s, state.span())) + .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("filter_with_mismatch")))), ) - .map(|(fn_span, S(((geom_p, path), num), span))| { + .map_with(|(fn_span, S(((geom_p, path), num), span)), _| { Expr::Function( S(Function::FilterWithinDist(path, num), fn_span), S(Box::new(geom_p), span), @@ -660,17 +663,17 @@ pub fn parser() -> impl Parser> + Clon }) .labelled("filter_dist"), just(Token::Filter) - .map_with_span(|_, span| span) + .map_with(|_, state| state.span()) .then( geom_piece .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) + .map_err_with_state(|t, span, _| throw(t, comma(span))) .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'filter'")))), + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) + .map_with(|s, state| S(s, state.span())) + .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'filter'")))), ) - .map(|(fn_span, S((geom_p, path), span))| { + .map_with(|(fn_span, S((geom_p, path), span)),_| { Expr::Function( S(Function::FilterWithinDist(path, 0), fn_span), S(Box::new(geom_p), span), @@ -679,127 +682,134 @@ pub fn parser() -> impl Parser> + Clon .labelled("filter"), )) }) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Invalid construction of an interval"))) - .map_with_span(S); + .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Invalid construction of an interval"))) + .map_with(|s, state| S(s, state.span())); let definitions = label - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Expected a label to begin a definition."), + Rich::custom(span, "Expected a label to begin a definition."), ) }) .then_ignore(just(Token::Equals)) .then(transformed_pieces.clone()) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Error creating variable declaration"), - ) + .map_err_with_state(|t, span, _| { + throw(t, Rich::custom(span, "Error creating variable declaration")) }) - .map_with_span(|(label, geom_p), span| { + .map_with(|(label, geom_p), state| { S( Definition { label, expr: geom_p, }, - span, + state.span(), ) }) .repeated() - .map_with_span(S); + .collect() + .map_with(|s, state| S(s, state.span())); let reads = num - .map_err_with_span(|t, span| { - throw(t, Simple::custom(span, "Expected a number to start a read")) + .map_err_with_state(|t, span, _| { + throw(t, Rich::custom(span, "Expected a number to start a read")) }) - .map_with_span(S) + .map_with(|s, state| S(s, state.span())) .then( transformed_pieces .clone() .labelled("transformed_pieces_for_reads") .repeated() .at_least(1) + .collect() .delimited_by(just(Token::LBrace), just(Token::RBrace)) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw(t, missing_delimiter(Token::LBrace, span, Some("reads"))) }), ) - .map_with_span(|(n, read), span| { + .map_with(|(n, read), state| { S( Read { index: n, exprs: read, }, - span, + state.span(), ) }) .repeated() .exactly(2) - .map_err_with_span(|t, span| { + .collect::>() + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Must provide two reads - only found one"), + Rich::custom(span, "Must provide two reads - only found one"), ) - }) - .collect::>(); + }); let transform_read = num - .map_with_span(S) + .map_with(|s, state| S(s, state.span())) .then( transformed_pieces .clone() .repeated() .at_least(1) + .collect() .delimited_by(just(Token::LBrace), just(Token::RBrace)) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, missing_delimiter(Token::LBrace, span, Some("transformation")), ) }), ) - .map_with_span(|(n, read), span| { + .map_with(|(n, read), state| { S( Read { index: n, exprs: read, }, - span, + state.span(), ) }); let transformation = choice(( end().map(|()| None), just(Token::TransformTo) - .then(transform_read.repeated().at_least(1).at_most(2).then(end())) - .map_with_span(|(_, (val, _)), span| Some(S(val, span))), + .then( + transform_read + .repeated() + .at_least(1) + .at_most(2) + .collect() + .then(end()), + ) + .map_with(|(_, (val, _)), state| Some(S(val, state.span()))), )); definitions - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Error while parsing EFGDL specification."), + Rich::custom(span, "Error while parsing EFGDL specification."), ) }) - .then(reads.map_with_span(S)) + .then(reads.map_with(|s, state| S(s, state.span()))) .then(transformation) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Error while parsing EFGDL specification."), + Rich::custom(span, "Error while parsing EFGDL specification."), ) }) - .map(|((definitions, reads), transforms)| Description { + .map_with(|((definitions, reads), transforms), _| Description { definitions, reads, transforms, }) - .map_err_with_span(|t, span| { + .map_err_with_state(|t, span, _| { throw( t, - Simple::custom(span, "Error while parsing EFGDL specification."), + Rich::custom(span, "Error while parsing EFGDL specification."), ) }) } diff --git a/src/processors/mod.rs b/src/processors/mod.rs index 1b5ef7f..89b3644 100644 --- a/src/processors/mod.rs +++ b/src/processors/mod.rs @@ -150,7 +150,7 @@ pub fn map(this_label: &str, patterns: Patterns, match_type: MatchType, graph: & Expr::from(expr::attr(format!("{this_label}.{SUB}"))), )); graph.add(SelectOp::new( - Expr::from(expr::attr(&format!("{this_label}.{MAPPED}"))), + Expr::from(expr::attr(format!("{this_label}.{MAPPED}"))), mapping_graph, )); } diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..b5614dd --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1 @@ +pub mod utils; diff --git a/tests/common/utils.rs b/tests/common/utils.rs new file mode 100644 index 0000000..26f14c4 --- /dev/null +++ b/tests/common/utils.rs @@ -0,0 +1,12 @@ +use chumsky::{error::Rich, Parser}; +use seqproc::lexer::lexer; + +pub fn into_input_tokens(i: &str) -> (Vec, Vec>) { + let (res, lex_err) = lexer().parse(i).into_output_errors(); + + let res = res.unwrap(); + + let input_tokens = res.iter().map(|(t, _)| t.clone()).collect::>(); + + (input_tokens, lex_err) +} diff --git a/tests/compile_tests.rs b/tests/compile_tests.rs index 60697e0..2c74d73 100644 --- a/tests/compile_tests.rs +++ b/tests/compile_tests.rs @@ -1,10 +1,11 @@ +mod common; + use std::collections::HashMap; -use chumsky::{prelude::*, Stream}; +use chumsky::prelude::*; use seqproc::{ compile::{compile, definitions::compile_definitions, reads::compile_reads, utils::Error}, execute::compile_geom, - lexer::lexer, parser::parser, }; @@ -12,16 +13,9 @@ use seqproc::{ fn no_err() -> Result<(), Error> { let src = "1{remove(hamming(f[CAG], 1))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile_reads(res.reads, HashMap::new())?; @@ -32,16 +26,9 @@ fn no_err() -> Result<(), Error> { fn fail_norm() { let src = "1{norm(r:)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -52,16 +39,9 @@ fn fail_norm() { fn pass_composition() { let src = "1{trunc_to(rev(r:), 1)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -72,16 +52,9 @@ fn pass_composition() { fn fail_remove() { let src = "1{rev(remove(r:))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -92,16 +65,9 @@ fn fail_remove() { fn discard_as_void() { let src = "1{rev(x[10])}2{r:}"; - let (res, _) = lexer().parse_recovery(src); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -115,16 +81,9 @@ brc = b[10] brc1 = b[1-4] 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let def_map = compile_definitions(res.definitions)?; @@ -140,16 +99,9 @@ brc = b[10] brc = b[1-4] 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let def_map = compile_definitions(res.definitions); @@ -158,19 +110,12 @@ brc = b[1-4] #[test] fn label_replacement() { - let src = "test = r: + let src = "test = r: 1{pad_to(, 5, A)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -181,19 +126,12 @@ fn label_replacement() { #[test] fn no_variable() { - let src = "testing = r: + let src = "testing = r: 1{pad(, 5, A)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -206,16 +144,9 @@ fn no_variable() { fn expr_unwrap() -> Result<(), Error> { let src = "1{pad(norm(b[9-10]), 1, A)remove(f[CAGAGC])u[8]remove(b[10])}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile(res)?; @@ -228,16 +159,9 @@ fn fail_reuse_label() { brc = b[10] 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -253,16 +177,9 @@ brc = b[10] brc1 = pad(, 1, A) 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let def_map = compile_definitions(res.definitions); @@ -276,16 +193,9 @@ brc = b[10] umi = pad(u[10], 1, A) 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile(res)?; @@ -299,16 +209,9 @@ brc = b[10] umi = pad(u[10], 1, A) 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile(res); @@ -321,16 +224,9 @@ fn fail_label_composition() { brc = remove(trunc(b[10], 3)) 1{pad(, 1, A)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile(res); @@ -341,16 +237,9 @@ brc = remove(trunc(b[10], 3)) fn valid_geom() -> Result<(), Error> { let src = "1{b[9-11]remove(f[CAGAGC])u[8]b[10]}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile(res)?; @@ -361,16 +250,9 @@ fn valid_geom() -> Result<(), Error> { fn invalid_geom_one() { let src = "1{b[9-11]f[CAGAGC]r:u[8]b[10]}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile(res); @@ -381,16 +263,9 @@ fn invalid_geom_one() { fn invalid_geom_two() { let src = "1{f[GAG]b[10-11]b[10]}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); let res = compile(res); @@ -406,16 +281,9 @@ test = r: 1{pad(, 1, A)f[CAGAGC]f[CAGA]}2{r:} -> 1{remove()remove()} "; - let (res, _) = lexer().parse_recovery(src); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile(res)?; @@ -430,16 +298,9 @@ umi = norm(u[9-11]) 1{pad(, 1, A)f[CAGAGC]f[CAGA]}2{r:} -> 1{remove()remove(pad(, 1, A))} "; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile(res)?; @@ -450,16 +311,9 @@ umi = norm(u[9-11]) fn compile_map_arguments() -> Result<(), Error> { let src = "1{map(b[10-11], \"file\", norm(self))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile(res)?; @@ -469,19 +323,12 @@ fn compile_map_arguments() -> Result<(), Error> { #[test] fn compile_map_arguments_with_label() -> Result<(), Error> { let src = " -brc = b[10-11] +brc = b[10-11] 1{map(, \"file\", norm(self))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, _) = common::utils::into_input_tokens(src); - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let res = parser().parse(&input_tokens).into_output().unwrap(); compile(res)?; diff --git a/tests/lexer_tests.rs b/tests/lexer_tests.rs index c46c7c1..0aa6080 100644 --- a/tests/lexer_tests.rs +++ b/tests/lexer_tests.rs @@ -5,16 +5,23 @@ use seqproc::lexer::{lexer, Token}; fn nucs() { let src = "GCA"; - let expected_res = vec![(Token::G, 0..1), (Token::C, 1..2), (Token::A, 2..3)]; + let expected_res = vec![ + (Token::G, SimpleSpan::from(0..1)), + (Token::C, SimpleSpan::from(1..2)), + (Token::A, SimpleSpan::from(2..3)), + ]; - assert_eq!(expected_res, lexer().parse(src).unwrap()); + assert_eq!(expected_res, lexer().parse(src).into_output().unwrap()); } #[test] fn token() { let src = "1"; - assert_eq!(vec![(Token::Num(1), 0..1)], lexer().parse(src).unwrap()); + assert_eq!( + vec![(Token::Num(1), SimpleSpan::from(0..1))], + lexer().parse(src).unwrap() + ); } #[test] @@ -23,12 +30,12 @@ fn tokens() { assert_eq!( vec![ - (Token::Label("bc1".to_string()), 0..3), - (Token::Equals, 4..5), - (Token::Barcode, 6..7), - (Token::LBracket, 7..8), - (Token::Num(10), 8..10), - (Token::RBracket, 10..11), + (Token::Label("bc1".to_string()), SimpleSpan::from(0..3)), + (Token::Equals, SimpleSpan::from(4..5)), + (Token::Barcode, SimpleSpan::from(6..7)), + (Token::LBracket, SimpleSpan::from(7..8)), + (Token::Num(10), SimpleSpan::from(8..10)), + (Token::RBracket, SimpleSpan::from(10..11)), ], lexer().parse(src).unwrap() ); @@ -38,7 +45,7 @@ fn tokens() { fn fail() { let src = "1 ? 2"; - let (_, err) = lexer().parse_recovery(src); + let (_, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 1); } @@ -47,11 +54,11 @@ fn fail() { fn label() { let src = "barcode"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 0); assert_eq!( - vec![(Token::Label("barcode".to_string()), 0..7)], + vec![(Token::Label("barcode".to_string()), SimpleSpan::from(0..7))], res.unwrap() ); } @@ -60,7 +67,7 @@ fn label() { fn precidence() { let src = "b[1-2] -> 1{}"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 0); @@ -87,7 +94,7 @@ fn precidence() { fn map_vs_with_mismatch() { let src = "map()map_with_mismatch()"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 0); @@ -110,7 +117,7 @@ fn map_vs_with_mismatch() { fn arguments() { let src = "map(f[ATG], $0, self)"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); dbg!(&err); assert_eq!(err.len(), 0); diff --git a/tests/parser_tests.rs b/tests/parser_tests.rs index 60e81b4..5d23409 100644 --- a/tests/parser_tests.rs +++ b/tests/parser_tests.rs @@ -1,6 +1,7 @@ -use chumsky::{prelude::*, Stream}; +mod common; + +use chumsky::prelude::*; use seqproc::{ - lexer::lexer, parser::{parser, Definition, Expr, Function, IntervalKind, IntervalShape, Read}, Nucleotide, S, }; @@ -9,24 +10,21 @@ use seqproc::{ fn definition() { let src = "brc = b[10] 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; - let expected_res = S( - vec![S( + let expected_res = S::new( + vec![S::new( Definition { - label: S("brc".to_string(), 0..3), - expr: S( - Expr::GeomPiece(IntervalKind::Barcode, IntervalShape::FixedLen(S(10, 8..10))), + label: S::new("brc".to_string(), 0..3), + expr: S::new( + Expr::GeomPiece( + IntervalKind::Barcode, + IntervalShape::FixedLen(S::new(10, 8..10)), + ), 6..11, ), }, @@ -44,30 +42,26 @@ fn definition() { fn transformation() { let src = "1{b[1]}2{r:} -> 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let len = res.len(); + println!("{:?}", input_tokens); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = vec![ - S( + S::new( Read { - index: S(1, 16..17), - exprs: vec![S(Expr::Label(S("t".to_string(), 18..21)), 18..21)], + index: S::new(1, 16..17), + exprs: vec![S::new(Expr::Label(S::new("t".to_string(), 18..21)), 18..21)], }, 16..22, ), - S( + S::new( Read { - index: S(2, 22..23), - exprs: vec![S( + index: S::new(2, 22..23), + exprs: vec![S::new( Expr::GeomPiece(IntervalKind::ReadSeq, IntervalShape::UnboundedLen), 24..26, )], @@ -90,13 +84,9 @@ another = remove(u[9-11]) -> 1{} "; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert!(parser_err.is_empty()); @@ -106,24 +96,21 @@ another = remove(u[9-11]) fn hamming() { let src = "1{hamming(, 1)}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::Function( - S(Function::Hamming(1), 2..9), - S(Box::new(Expr::Label(S("brc".to_string(), 10..15))), 10..18), + S::new(Function::Hamming(1), 2..9), + S::new( + Box::new(Expr::Label(S::new("brc".to_string(), 10..15))), + 10..18, + ), ), 2..19, )], @@ -138,24 +125,21 @@ fn hamming() { fn remove() { let src = "1{remove()}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::Function( - S(Function::Remove, 2..8), - S(Box::new(Expr::Label(S("brc".to_string(), 9..14))), 9..14), + S::new(Function::Remove, 2..8), + S::new( + Box::new(Expr::Label(S::new("brc".to_string(), 9..14))), + 9..14, + ), ), 2..15, )], @@ -170,13 +154,9 @@ fn remove() { fn illegal_nest() { let src = "1{hamming(pad(, 1), 1)}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert_eq!(1, parser_err.len()); @@ -186,27 +166,24 @@ fn illegal_nest() { fn nested() { let src = "1{rev(norm())}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::Function( - S(Function::Reverse, 2..5), - S( + S::new(Function::Reverse, 2..5), + S::new( Box::new(Expr::Function( - S(Function::Normalize, 6..10), - S(Box::new(Expr::Label(S("brc".to_string(), 11..16))), 11..16), + S::new(Function::Normalize, 6..10), + S::new( + Box::new(Expr::Label(S::new("brc".to_string(), 11..16))), + 11..16, + ), )), 6..17, ), @@ -224,24 +201,18 @@ fn nested() { fn labeled_unbounded() { let src = "1{b:}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::LabeledGeomPiece( - S("barcode".to_string(), 3..12), - S( + S::new("barcode".to_string(), 3..12), + S::new( Box::new(Expr::GeomPiece( IntervalKind::Barcode, IntervalShape::UnboundedLen, @@ -262,24 +233,18 @@ fn labeled_unbounded() { fn ranged() { let src = "1{b[10-11]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::GeomPiece( IntervalKind::Barcode, - IntervalShape::RangedLen(S((10, 11), 4..9)), + IntervalShape::RangedLen(S::new((10, 11), 4..9)), ), 2..10, )], @@ -294,22 +259,19 @@ fn ranged() { fn fixed() { let src = "1{r[10]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( - Expr::GeomPiece(IntervalKind::ReadSeq, IntervalShape::FixedLen(S(10, 4..6))), + index: S::new(1, 0..1), + exprs: vec![S::new( + Expr::GeomPiece( + IntervalKind::ReadSeq, + IntervalShape::FixedLen(S::new(10, 4..6)), + ), 2..7, )], }; @@ -323,24 +285,18 @@ fn fixed() { fn fixed_seq() { let src = "1{f[GACTU]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { + let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { panic!() }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::GeomPiece( IntervalKind::FixedSeq, - IntervalShape::FixedSeq(S( + IntervalShape::FixedSeq(S::new( vec![ Nucleotide::G, Nucleotide::A, @@ -364,13 +320,9 @@ fn fixed_seq() { fn fail_ranged_seq() { let src = "1{f[1-2]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert_eq!(1, parser_err.len()); @@ -380,13 +332,9 @@ fn fail_ranged_seq() { fn allow_expr_arg() { let src = "1{map(b[9-10], \"filepath\", norm(self))}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert!(parser_err.is_empty()); @@ -396,13 +344,9 @@ fn allow_expr_arg() { fn fail_map() { let src = "1{map(pad(b[9-10], 3), \"filepath\", norm(self))}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert_eq!(1, parser_err.len()); @@ -412,13 +356,9 @@ fn fail_map() { fn fail_prefix_label_underscore() { let src = "_brc = b[10] 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert_eq!(1, parser_err.len()); @@ -428,13 +368,9 @@ fn fail_prefix_label_underscore() { fn fail_prefix_inlinelabel_underscore() { let src = "1{b<_brc>[10]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert_eq!(1, parser_err.len()); @@ -444,13 +380,9 @@ fn fail_prefix_inlinelabel_underscore() { fn ok_mid_inlinelabel_underscore() { let src = "1{b[10]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert!(parser_err.is_empty()); @@ -460,13 +392,9 @@ fn ok_mid_inlinelabel_underscore() { fn ok_mid_label_underscore() { let src = "b_rc = b[10] 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert!(parser_err.is_empty()); @@ -476,13 +404,9 @@ fn ok_mid_label_underscore() { fn filter_test() { let src = "b_rc = filter(b[10], $0) 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert!(parser_err.is_empty()); @@ -492,13 +416,9 @@ fn filter_test() { fn filter_test_too_many_args() { let src = "b_rc = filter(b[10], $0, 1) 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); assert!(lex_err.is_empty()); assert_eq!(1, parser_err.len()); From 016005de88dc8d2256515ee36e80279dacd41b2d Mon Sep 17 00:00:00 2001 From: Noah Cape Date: Mon, 29 Dec 2025 09:20:57 -0800 Subject: [PATCH 2/2] updated parser --- Cargo.lock | 52 +- Cargo.toml | 2 +- src/bin/bin.rs | 11 +- src/error.rs | 132 ++--- src/execute.rs | 66 +-- src/geometry/compile/functions.rs | 1 + src/geometry/lexer.rs | 14 +- src/geometry/mod.rs | 12 +- src/geometry/parser.rs | 836 +++++++++++------------------- tests/common/utils.rs | 39 +- tests/compile_tests.rs | 210 +++++--- tests/parser_tests.rs | 276 ++++++---- 12 files changed, 725 insertions(+), 926 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7f4a3c6..3f9b3cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 4 [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" @@ -163,18 +163,18 @@ dependencies = [ [[package]] name = "buffer-redux" -version = "1.0.2" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e8acf87c5b9f5897cd3ebb9a327f420e0cae9dd4e5c1d2e36f2c84c571a58f1" +checksum = "431a9cc8d7efa49bc326729264537f5e60affce816c66edf434350778c9f4f54" dependencies = [ "memchr", ] [[package]] name = "bytecount" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "bzip2" @@ -188,12 +188,11 @@ dependencies = [ [[package]] name = "bzip2-sys" -version = "0.1.12+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] @@ -214,9 +213,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "0.10.1" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14377e276b2c8300513dff55ba4cc4142b44e5d6de6d00eb5b2307d650bb4ec1" +checksum = "4ba4a05c9ce83b07de31b31c874e87c069881ac4355db9e752e3a55c11ec75a6" dependencies = [ "hashbrown", "regex-automata 0.3.9", @@ -305,9 +304,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -375,9 +374,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-ng-sys", @@ -451,9 +450,9 @@ checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "libz-ng-sys" -version = "1.1.21" +version = "1.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cee1488e961a80d172564fd6fcda11d8a4ac6672c06fe008e9213fa60520c2b" +checksum = "7bf914b7dd154ca9193afec311d8e39345c1bd93b48b3faa77329f0db8f553c0" dependencies = [ "cmake", "libc", @@ -508,11 +507,12 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -578,9 +578,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "predicates" @@ -834,9 +834,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.139" +version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ "itoa", "memchr", @@ -859,6 +859,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "similar" version = "2.7.0" diff --git a/Cargo.toml b/Cargo.toml index 3ee9c70..9551e8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ antisequence = { git = "https://github.com/noahcape/ANTISEQUENCE.git", branch = tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } tracing = "0.1.37" ariadne = "0.5.0" -chumsky = "0.10.1" +chumsky = "0.12.0" clap = { version = "4.4.8", features = ["derive"] } anyhow = "1.0" tempfile = "3.5.0" diff --git a/src/bin/bin.rs b/src/bin/bin.rs index 9fe6c9f..2a1b33c 100644 --- a/src/bin/bin.rs +++ b/src/bin/bin.rs @@ -1,12 +1,11 @@ +use std::process::exit; + use clap::arg; use std::io; use std::path::PathBuf; use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter}; -use seqproc::{ - error::handle_errors, - execute::{compile_geom, interpret}, -}; +use seqproc::execute::{compile_geom, interpret}; /// General puprose sequence preprocessor #[derive(Debug, clap::Parser)] @@ -82,8 +81,6 @@ fn main() { additional_args, geom, ), - Err(e) => { - handle_errors(e, geom); - } + Err(_) => exit(1), } } diff --git a/src/error.rs b/src/error.rs index b566772..7621514 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,101 +1,39 @@ -use ariadne::{Color, Label, Report, ReportKind, Source}; -use chumsky::{error::RichReason, prelude::*}; - -use crate::lexer::Token; - -pub fn handle_errors(errs: Vec>, source: String) { - // error recovery - errs.into_iter().for_each(|e| { - Report::build(ReportKind::Error, ((), e.span().into_range())) - .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte)) - .with_message(e.to_string()) - .with_label( - Label::new(((), e.span().into_range())) - .with_message(e.reason().to_string()) - .with_color(Color::Red), - ) - .with_labels(e.contexts().map(|(label, span)| { - Label::new(((), span.into_range())) - .with_message(format!("while parsing this {label}")) - .with_color(Color::Yellow) - })) - .finish() - .print(Source::from(&source)) - .unwrap(); - }); -} - -pub fn missing_delimiter<'a>(token: Token, span: SimpleSpan, obj: Option<&str>) -> Rich<'a, Token> { - let msg = |d1, d2| match obj { - Some(obj) => format!("Missing delimitter for {obj} - delimit with '{d1} .. {d2}'."), - None => format!("Missing delimtter - delimit with '{d1} .. {d2}'."), - }; +use std::fmt; - match token { - Token::RParen | Token::LParen => Rich::custom(span, msg('(', ')')), - Token::RBrace | Token::LBrace => Rich::custom(span, msg('{', '}')), - Token::RBracket | Token::LBracket => Rich::custom(span, msg('[', ']')), - Token::RAngle | Token::LAngle => Rich::custom(span, msg('<', '>')), - _ => Rich::custom(span, "Missing delimitter"), - } -} - -pub fn comma<'a>(span: SimpleSpan) -> Rich<'a, Token> { - Rich::custom(span, "Expected a ',' to separate arguments.") +use ariadne::{Color, Label, Report, ReportKind, Source}; +use chumsky::prelude::*; + +pub fn failure( + msg: String, + label: (String, SimpleSpan), + extra_labels: impl IntoIterator, + source: String, +) -> ! { + Report::build(ReportKind::Error, ((), label.1.into_range())) + .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte)) + .with_message(&msg) + .with_label( + Label::new(((), label.1.into_range())) + .with_message(label.0) + .with_color(Color::Red), + ) + .with_labels(extra_labels.into_iter().map(|label2| { + Label::new(((), label2.1.into_range())) + .with_message(label2.0) + .with_color(Color::Yellow) + })) + .finish() + .print(Source::from(&source)) + .unwrap(); + std::process::exit(1) } -pub fn throw<'a>(prev_err: Rich<'a, Token>, next_err: Rich<'a, Token>) -> Rich<'a, Token> { - if prev_err.expected().len() > 0 { - let expected = prev_err.clone(); - let range = prev_err.span(); - let start = range.start; - - let msg = match prev_err.clone().into_reason() { - chumsky::error::RichReason::Custom(msg) => match msg.as_str() { - ":" => Some(String::from( - "Unfinished interval - add a ':' or specify interval with different length.", - )), - _ => None, - }, - chumsky::error::RichReason::ExpectedFound { - expected, - found: o_found, - } => match o_found { - Some(found) => match found { - chumsky::util::Maybe::Ref(r_t) => Some(format!( - "Expected {} but found: {}.", - expected - .iter() - .map(|exp| format!("{exp}")) - .collect::(), - r_t - )), - chumsky::util::Maybe::Val(t) => Some(format!( - "Expected {} but found: {}.", - expected - .iter() - .map(|exp| format!("{exp}")) - .collect::(), - t - )), - }, - None => Some(format!( - "Expected {} but found nothing.", - expected - .iter() - .map(|exp| format!("{exp}")) - .collect::() - )), - }, - }; - - if let Some(msg) = msg { - return Rich::custom((start..start + expected.span().end).into(), msg); - } - } - - match prev_err.reason() { - RichReason::Custom(_) => prev_err, - _ => next_err, - } +pub fn parse_failure(err: &Rich<'_, impl fmt::Display>, src: String) -> ! { + failure( + err.to_string(), + (err.reason().to_string(), *err.span()), + err.contexts() + .map(|(l, s)| (format!("while parsing this {l}"), *s)), + src, + ) } diff --git a/src/execute.rs b/src/execute.rs index fe85b1d..bea2b1e 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -8,7 +8,7 @@ use std::{ use antisequence::graph::*; use anyhow::{bail, Result}; -use chumsky::{error::Rich, Parser}; +use chumsky::{error::Rich, input::Input, Parser}; use nix::sys::stat; use nix::unistd; use tempfile::tempdir; @@ -16,6 +16,7 @@ use tracing::info; use crate::{ compile::{compile, CompiledData}, + error::parse_failure, lexer, parser::parser, }; @@ -126,54 +127,27 @@ fn interpret_to_pipes( } pub fn compile_geom(geom: String) -> Result>> { - let parse_res = lexer::lexer().parse(&geom); - - let lex_errors = match parse_res.clone().into_result() { - Ok(_) => vec![], - Err(errs) => errs, - }; - - let parse_errors = match parse_res.into_result() { - Ok(tokens) => { - let input_tokens = tokens.iter().map(|(t, _)| t.clone()).collect::>(); - let parser_instance = parser(); - match parser_instance.parse(input_tokens.as_slice()).into_result() { - Err(errs) => errs - .into_iter() - .map(|e| Rich::::custom(*e.span(), e.reason().to_string())) - .collect::>(), - Ok(description) => { - let res = compile(description.clone()); - - if let Err(e) = res { - vec![Rich::::custom(e.span, e.msg)] - } else { - return Ok(res.ok().unwrap()); - }; - - vec![] - } - } - } - Err(err) => err - .iter() - .map(|rich_err| { - Rich::::custom(*rich_err.span(), format!("{}", rich_err.reason())) - }) - .collect::>(), - }; - - let errors = lex_errors + // lex input + let tokens = lexer::lexer() + .parse(&geom) + .into_result() + .unwrap_or_else(|errs| parse_failure(&errs[0], geom.clone())); + + let tokens = tokens .into_iter() - .map(|e| Rich::custom(*e.span(), e.reason().to_string())) - .chain( - parse_errors - .into_iter() - .map(|e| Rich::custom(*e.span(), e.reason().to_string())), - ) + .map(|(tok, span)| chumsky::span::Spanned { inner: tok, span }) .collect::>(); + let input = tokens[..].split_spanned((0..geom.len()).into()); + + // parse token + let description = parser() + .parse(input) + .into_result() + .unwrap_or_else(|errs| parse_failure(&errs[0], geom.clone())); - Err(errors) + // compile ast + compile(description) + .map_err(|e| parse_failure(&Rich::::custom(e.span, e.msg), geom.clone())) } pub fn read_pairs_to_file( diff --git a/src/geometry/compile/functions.rs b/src/geometry/compile/functions.rs index dec8ce6..ea9295c 100644 --- a/src/geometry/compile/functions.rs +++ b/src/geometry/compile/functions.rs @@ -80,6 +80,7 @@ pub fn compile_fn( path, compile_inner_expr(expr.unboxed(), S(parent_expr, expr_span))?, ), + Function::Filter(path) => CompiledFunction::FilterWithinDist(path, 0), Function::FilterWithinDist(path, mismatch) => { CompiledFunction::FilterWithinDist(path, mismatch) } diff --git a/src/geometry/lexer.rs b/src/geometry/lexer.rs index 2ad1ff7..561686d 100644 --- a/src/geometry/lexer.rs +++ b/src/geometry/lexer.rs @@ -160,7 +160,8 @@ impl fmt::Display for Token { } /// Returns a lexer for EFGDL. -pub fn lexer<'a>() -> impl Parser<'a, &'a str, Vec<(Token, Span)>, extra::Err>> { +pub fn lexer<'src>( +) -> impl Parser<'src, &'src str, Vec<(Token, Span)>, extra::Err>> { let int = text::int(10).from_str().unwrapped().map(Token::Num); let ctrl = choice(( @@ -242,18 +243,11 @@ pub fn lexer<'a>() -> impl Parser<'a, &'a str, Vec<(Token, Span)>, extra::Err(pub T, pub Span); impl S { @@ -70,16 +70,6 @@ impl S { } } -impl PartialEq for S -where - T: PartialEq + Eq, -{ - // TODO: This is a patch fix for testing with the new version of chumsky - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 - } -} - impl Hash for S where T: PartialEq + Eq + Hash, diff --git a/src/geometry/parser.rs b/src/geometry/parser.rs index c488448..309606a 100644 --- a/src/geometry/parser.rs +++ b/src/geometry/parser.rs @@ -2,13 +2,11 @@ use std::fmt::{self, Write}; -use chumsky::prelude::*; +use chumsky::{extra::Err as ExtraErr, input::MappedInput, prelude::*}; -use crate::{ - error::{comma, missing_delimiter, throw}, - lexer::Token, - Nucleotide, S, -}; +use crate::{lexer::Token, Nucleotide, S}; + +use super::Span; /// The length of a nucleotide interval, /// and whether it must match a specific sequence. @@ -73,6 +71,8 @@ pub enum Function { Map(String, S>), /// `map_with_mismatch(I, A, F, n)` MapWithMismatch(String, S>, usize), + /// `filter(I, A)` + Filter(String), /// `filter_within_dist(I, A, n)` FilterWithinDist(String, usize), /// `hamming(F, n)` @@ -103,6 +103,7 @@ impl Function { let S(s, _) = b; write!(f, "map_with_mismatch({first}, {p}, {s}, {n})") } + Filter(p) => write!(f, "filter({first}, {p})"), FilterWithinDist(p, n) => write!(f, "filter_within_dist({first}, {p}, {n})"), Hamming(n) => write!(f, "hamming({first}, {n})"), } @@ -199,617 +200,376 @@ pub struct Description { pub transforms: Option>>>, } -pub fn parser<'src>( -) -> impl Parser<'src, &'src [Token], Description, extra::Err>> + Clone { - /* - Start with creating combinators and - a recursive definition of a geom_piece +fn make_geom_piece( + kind: IntervalKind, + shape: IntervalShape, + label: Option, + span: Span, +) -> Expr { + let expr = Expr::GeomPiece(kind, shape); + if let Some(Expr::Label(lbl)) = label { + Expr::LabeledGeomPiece(lbl, S(Box::new(expr), span)) + } else { + expr + } +} + +type Input<'a> = MappedInput<'a, Token, Span, &'a [Spanned]>; + +macro_rules! function_arguments { + ($base:expr) => {{ + $base + .map_with(|res, state| S(res, state.span())) + .delimited_by( + just(Token::LParen), + just(Token::RParen) + ) + }}; + + ($base:expr, $first:expr $(, $rest:expr)* $(,)?) => {{ + $base + .then_ignore(just(Token::Comma)) + .then($first) + $( + .then_ignore(just(Token::Comma)).then($rest) + )* + .map_with(|res, state| S(res, state.span())) + .delimited_by( + just(Token::LParen), + just(Token::RParen) + ) + }} +} + +macro_rules! unary_function { + ($func:tt, $arg:expr) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map(move |(fn_span, S(geom_p, span))| { + Expr::Function( + S(Function::$func.clone(), fn_span), + S(Box::new(geom_p), span), + ) + }) + .labelled(concat!("Unary function ", stringify!($func))) + .as_context() + }}; +} + +macro_rules! binary_function { + ($func:tt, $arg:expr) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map(move |(fn_span, S((geom_p, arg), span))| { + Expr::Function( + S(Function::$func.clone()(arg), fn_span), + S(Box::new(geom_p), span), + ) + }) + .labelled(concat!("Binary function ", stringify!($func))) + .as_context() + }}; +} - At execution time we will check if it is a valid - geometry without any ambiguity. Here we will - restruct some invalid definitions - */ +macro_rules! ternary_function { + ($func:tt, $arg:expr) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map(move |(fn_span, S(((geom_p, arg_one), arg_two), span))| { + Expr::Function( + S(Function::$func.clone()(arg_one, arg_two), fn_span), + S(Box::new(geom_p), span), + ) + }) + .labelled(concat!("Ternary function ", stringify!($func))) + .as_context() + }}; +} - let label = select! { Token::Label(ident) => ident }; +macro_rules! quaternary_function { + ($func:tt, $arg:expr $(,)?) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map( + move |(fn_span, S((((geom_p, arg_one), arg_two), arg_three), span))| { + Expr::Function( + S( + Function::$func.clone()(arg_one, arg_two, arg_three), + fn_span, + ), + S(Box::new(geom_p), span), + ) + }, + ) + .labelled(concat!("Quaternary function ", stringify!($func))) + .as_context() + }}; +} - let num = select! { Token::Num(n) => n }; +macro_rules! nary_functions { + ($helper:ident, $arg:expr, $($func:tt),* $(,)?) => {{ + choice(( + $( + $helper!($func, $arg.clone()), + )* + )) + }} +} - let file = select! { Token::File(f) => f }; +macro_rules! parse_geometry_piece { + ($piece_type:expr, $inline_label:expr, $kind:expr) => {{ + $piece_type + .then($inline_label.or_not()) + .then($kind) + .map_with(|((kind, label), shape), state| { + make_geom_piece(kind, shape, label, state.span()) + }) + }}; +} - let argument = select! { Token::Arg(n) => n.to_string() }; +// TODO: label everything to add better errors +pub fn parser<'tokens>( +) -> Box, Description, ExtraErr>> + 'tokens> +{ + // begin with defining basic token selectors + let label = select! { Token::Label(x) => x.clone() }; + let num = select! {Token::Num(n) => n }; + let file = select! {Token::File(f) => f.clone() }; + let argument = select! {Token::Arg(n) => n.to_string() }; + let self_ = select! { Token::Self_ => Expr::Self_ }; let piece_type = select! { Token::Barcode => IntervalKind::Barcode, Token::Umi => IntervalKind::Umi, Token::Discard => IntervalKind::Discard, Token::ReadSeq => IntervalKind::ReadSeq, - } - .labelled("specifier"); + }; let nuc = select! { - Token::U => Nucleotide::U, Token::A => Nucleotide::A, Token::T => Nucleotide::T, Token::G => Nucleotide::G, Token::C => Nucleotide::C, + Token::U => Nucleotide::U, }; let inline_label = label - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom( - span, - "Found delimiters '<' and '>' which must delimit a label.", - ), - ) - }) - .delimited_by(just(Token::LAngle), just(Token::RAngle)) - .map_err_with_state(|t, span, _| { - throw(t, missing_delimiter(Token::RAngle, span, Some("label"))) - }) - .map_with(|l, extra| Expr::Label(S(l, extra.span()))) - .labelled("label"); - - let label = label - .map_with(|s, state| S(s, state.span())) - .labelled("label"); - - let self_ = just(Token::Self_).to(Expr::Self_).labelled("self"); + .delimited_by( + just(Token::LAngle).labelled("opening '<'"), + just(Token::RAngle).labelled("closing '>'"), + ) + .map_with(|l, span: &mut _| Expr::Label(S(l, span.span()))) + .labelled("inline label"); + // interval shape parsers let range = num + .labelled("number") .then_ignore(just(Token::Dash)) - .then(num) - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom( - span, - "Expected a numerical literal after '-' for a ranged length interval.", - ), - ) - }) - .map_with(|(a, b), state| IntervalShape::RangedLen(S((a, b), state.span()))) + .then(num.labelled("number")) + .map_with(|(a, b), span| IntervalShape::RangedLen(S((a, b), span.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_state(|t, span, _| { - throw( - t, - missing_delimiter(Token::RBracket, span, Some("variable length interval")), - ) - }); + .labelled("variable length geometry peice shape: [-]"); let fixed_len = num - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom( - span, - "Expecting a length specifier '[-]', or '[]'.", - ), - ) - }) + .labelled("number") .map_with(|n, state| IntervalShape::FixedLen(S(n, state.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_state(|t, span, _| { - throw( - t, - missing_delimiter(Token::LBracket, span, Some("fixed length interval")), - ) - }) - .labelled("fixed_len"); + .labelled("fixed length geometry piece shape: []"); - let seq = nuc + let nuc_seq = nuc + .labelled("nucleotide") .repeated() .at_least(1) .collect::>() - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "A fragment must contain at least one ATGCU character"), - ) - }); - - let nucstr = seq - .map_with(|nucstr, state| IntervalShape::FixedSeq(S(nucstr, state.span()))) + .map_with(|seq, span| IntervalShape::FixedSeq(S(seq, span.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_state(|t, span, _| { - throw( - t, - missing_delimiter(Token::LBracket, span, Some("fragment specifier")), - ) - }) - .labelled("nucstr"); + .labelled("nucleotide sequence"); + // geom piece parsers let unbounded = piece_type - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), - ) - }) .then(inline_label.clone().or_not()) .then_ignore(just(Token::Colon)) - .map_with(|(type_, label), state| { - let expr = Expr::GeomPiece(type_, IntervalShape::UnboundedLen); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) - } else { - expr - } + .map_with(|(kind, label), span| { + make_geom_piece(kind, IntervalShape::UnboundedLen, label, span.span()) }) - .labelled("unbound_seg"); - - let ranged = piece_type - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), - ) - }) - .then(inline_label.clone().or_not()) - .then(range) - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom( - span, - "Expecting a length specifier either ':', '[-]', or '[]'.", - ), - ) - }) - .map_with(|((type_, label), range), state| { - let expr = Expr::GeomPiece(type_, range); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) - } else { - expr - } - }) - .labelled("ranged_len_seg"); - - let fixed = piece_type - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), - ) - }) - .then(inline_label.clone().or_not()) - .then(fixed_len) - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom( - span, - "Expecting a length specifier either ':', '[-]', or '[]'.", - ), - ) - }) - .map_with(|((type_, label), len), state| { - let expr = Expr::GeomPiece(type_, len); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) - } else { - expr - } - }) - .labelled("fixed_len_seg"); - - let fixed_seq = just(Token::FixedSeq) - .to(IntervalKind::FixedSeq) - .then(inline_label.clone().or_not()) - .then(nucstr) - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Expecting a sequence to match delimited by '[ .. ]'."), - ) - }) - .map_with(|((type_, label), nucs), state| { - let expr = Expr::GeomPiece(type_, nucs); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), state.span())) - } else { - expr - } - }) - .labelled("seq_seg"); - - let geom_piece = choice(( - unbounded.clone(), - ranged.clone(), - fixed.clone(), - fixed_seq.clone(), - inline_label, - self_, - )) - .labelled("geom_piece"); - - let transformed_pieces = recursive(|transformed_pieces| { - let transformed_pieces = transformed_pieces - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Invalid declaration of interval"))); - - let recursive_num_arg = transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a ',' to separate arguments."))) - .then(num) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as a second argument."))) - .map_with(|s, state| S(s, state.span())) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, None))); - - let recursive_num_nuc_args = transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(num) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as a second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(nuc) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected an ATGCU literal as a third argument."))) - .map_with(|s, state| S(s, state.span())) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, None))); - - let recursive_no_arg = transformed_pieces - .clone() - .map_with(|s, state| S(s, state.span())) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, None))); - + .labelled("Unbounded geometry peice: e.g. 'r:'") + .as_context(); + + let ranged = parse_geometry_piece!(piece_type, inline_label.clone(), range) + .labelled("Variable length geometry piece: e.g. 'b[9-10]'") + .as_context(); + let fixed_seq = parse_geometry_piece!( + just(Token::FixedSeq).to(IntervalKind::FixedSeq), + inline_label.clone(), + nuc_seq + ) + .labelled("Fixed sequence geometry piece: e.g. 'f[ATGC]'") + .as_context(); + let fixed = parse_geometry_piece!(piece_type, inline_label.clone(), fixed_len) + .labelled("Fixed length geometry piece: e.g. 'b[10]'") + .as_context(); + + // what constitutes a valid geometry peice + let geom_piece = choice((unbounded, ranged, fixed, fixed_seq, inline_label, self_)); + + // transformed peices + let transformed_pieces = recursive(|tp| { choice(( - geom_piece.clone() - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Unexpected error when creating an interval."))), - just(Token::Remove) - .map_with(|_, state| S(Function::Remove, state.span())) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("remove"), - just(Token::Normalize) - .map_with(|_, state| S(Function::Normalize, state.span())) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("norm"), - just(Token::Hamming) - .map_with(|_, state| state.span()) - .then( - geom_piece - .clone() - .map_err_with_state(|t, span, _| { - throw(t, Rich::custom(span, "Expected a fragment specified interval as the first argument - 'hamming' cannot take a transformed interval.")) - }) - .then_ignore(just(Token::Comma)) - .then(num).map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numeric literal as a second argument."))) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_with(|s, state| S(s, state.span())) - .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'hamming'")))), - ) - .map_err_with_state(|t, span, _| { - throw(t, Rich::custom(span, "Missing argument for hamming - ")) - }) - .map_with(|(fn_span, S((geom_p, num), span)), _| { - Expr::Function( - S(Function::Hamming(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("hamming"), - just(Token::Truncate) - .map_with(|_, state| state.span()) - .then(recursive_num_arg.clone()) - .map_with(|(fn_span, S((geom_p, num), span)), _| { - Expr::Function( - S(Function::Truncate(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc"), - just(Token::TruncateLeft) - .map_with(|_, state| state.span()) - .then(recursive_num_arg.clone()) - .map_with(|(fn_span, S((geom_p, num), span)), _| { - Expr::Function( - S(Function::TruncateLeft(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc_left"), - just(Token::TruncateTo) - .map_with(|_, state| state.span()) - .then(recursive_num_arg.clone()) - .map_with(|(fn_span, S((geom_p, num), span)), _| { - Expr::Function( - S(Function::TruncateTo(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc_to"), - just(Token::TruncateToLeft) - .map_with(|_, state| state.span()) - .then(recursive_num_arg.clone()) - .map_with(|(fn_span, S((geom_p, num), span)), _| { - Expr::Function( - S(Function::TruncateToLeft(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc_to_left"), - just(Token::Pad) - .map_with(|_, state| state.span()) - .then(recursive_num_nuc_args.clone()) - .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { - Expr::Function( - S(Function::Pad(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad"), - just(Token::PadLeft) - .map_with(|_, state| state.span()) - .then(recursive_num_nuc_args.clone()) - .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { - Expr::Function( - S(Function::PadLeft(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad_left"), - just(Token::PadTo) - .map_with(|_, state| state.span()) - .then(recursive_num_nuc_args.clone()) - .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { - Expr::Function( - S(Function::PadTo(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad_to"), - just(Token::PadToLeft) - .map_with(|_, state| state.span()) - .then(recursive_num_nuc_args) - .map_with(|(fn_span, S(((geom_p, num), nuc), span)), _| { - Expr::Function( - S(Function::PadToLeft(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad_to_left"), - just(Token::Reverse) - .map_with(|_, state| S(Function::Reverse, state.span())) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("rev"), - just(Token::ReverseComp) - .map_with(|_, state| S(Function::ReverseComp, state.span())) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("revcomp"), - just(Token::Map) - .map_with(|_, state| state.span()) - .then( - transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(transformed_pieces.clone().map_with(|s, state| S(s, state.span()))) - .map_with(|s, state| S(s, state.span())) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'map'")))), - ) - .map_with(|(fn_span, S(((geom_p, path), self_expr), span)), _| { - Expr::Function( - S(Function::Map(path, self_expr.boxed()), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("map"), - just(Token::MapWithMismatch) - .map_with(|_, state| state.span()) - .then( - transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(transformed_pieces.clone().map_with(|s, state| S(s, state.span()))) - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(num) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as the allowable mismatch when mapping interval."))) - .map_with(|s, state| S(s, state.span())) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'map_with_mismatch'")))), - ) - .map_with(|(fn_span, S((((geom_p, path), self_expr), num), span)), _| { - Expr::Function( - S( - Function::MapWithMismatch(path, self_expr.boxed(), num), - fn_span, - ), - S(Box::new(geom_p), span), - ) - }) - .labelled("map_dist"), - just(Token::FilterWithinDist) - .map_with(|_, state| state.span()) - .then( - geom_piece - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(num) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a numerical literal as the allowable mismatch when filtering interval."))) - .map_with(|s, state| S(s, state.span())) - .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("filter_with_mismatch")))), + geom_piece.clone(), + nary_functions!( + unary_function, + function_arguments!(tp + .clone() + .labelled("geometry piece as sole argument to function")), + ReverseComp, + Reverse, + Remove, + Normalize + ), + nary_functions!( + binary_function, + function_arguments!( + tp.clone() + .labelled("geometry peice as argument to binary function"), + num.labelled("numerical argument to binary function") + ), + Hamming, + Truncate, + TruncateLeft, + TruncateTo, + TruncateToLeft + ), + binary_function!( + Filter, + function_arguments!( + tp.clone() + .labelled("geometry piece as argument to 'filter'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")) ) - .map_with(|(fn_span, S(((geom_p, path), num), span)), _| { - Expr::Function( - S(Function::FilterWithinDist(path, num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("filter_dist"), - just(Token::Filter) - .map_with(|_, state| state.span()) - .then( - geom_piece - .then_ignore(just(Token::Comma)) - .map_err_with_state(|t, span, _| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .map_with(|s, state| S(s, state.span())) - .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_state(|t, span, _| throw(t, missing_delimiter(Token::LParen, span, Some("'filter'")))), + ), + nary_functions!( + ternary_function, + function_arguments!( + tp.clone() + .labelled("geometry piece as argument to 'pad'-llike functions"), + num.labelled("numerical argument to 'pad'-like functions"), + nuc.labelled("nucleotide to pad with") + ), + Pad, + PadLeft, + PadTo, + PadToLeft + ), + nary_functions!( + ternary_function, + function_arguments!( + tp.clone().labelled("geometry peice to 'map'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")), + tp.clone() + .labelled("geometry piece after mapping") + .map_with(|transf_p, state| S(Box::new(transf_p), state.span())) + ), + Map, + ), + ternary_function!( + FilterWithinDist, + function_arguments!( + tp.clone() + .labelled("geometry piece to 'filter_within_dist'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")), + num.labelled("numerical argument") ) - .map_with(|(fn_span, S((geom_p, path), span)),_| { - Expr::Function( - S(Function::FilterWithinDist(path, 0), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("filter"), + ), + nary_functions!( + quaternary_function, + function_arguments!( + tp.clone().labelled("geometry piece to 'map_with_mismatch'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")), + tp.clone() + .labelled("geometry piece after mapping") + .map_with(|transf_p, state| S(Box::new(transf_p), state.span())), + num.labelled("numerical argument") + ), + MapWithMismatch, + ), )) }) - .map_err_with_state(|t, span, _| throw(t, Rich::custom(span, "Invalid construction of an interval"))) - .map_with(|s, state| S(s, state.span())); + .map_with(|s, state| S(s, state.span())); + // define the basic peices of an EFGDL description let definitions = label - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Expected a label to begin a definition."), - ) - }) + .labelled("definition identifier") + .map_with(|l, state| S(l, state.span())) .then_ignore(just(Token::Equals)) .then(transformed_pieces.clone()) - .map_err_with_state(|t, span, _| { - throw(t, Rich::custom(span, "Error creating variable declaration")) - }) - .map_with(|(label, geom_p), state| { - S( - Definition { - label, - expr: geom_p, - }, - state.span(), - ) - }) + .map_with(|(label, expr), span| S(Definition { label, expr }, span.span())) .repeated() .collect() - .map_with(|s, state| S(s, state.span())); + .map_with(|defs, span| S(defs, span.span())); let reads = num - .map_err_with_state(|t, span, _| { - throw(t, Rich::custom(span, "Expected a number to start a read")) - }) - .map_with(|s, state| S(s, state.span())) + .labelled("read number") + .map_with(|n, state| S(n, state.span())) .then( transformed_pieces .clone() - .labelled("transformed_pieces_for_reads") .repeated() .at_least(1) .collect() - .delimited_by(just(Token::LBrace), just(Token::RBrace)) - .map_err_with_state(|t, span, _| { - throw(t, missing_delimiter(Token::LBrace, span, Some("reads"))) - }), + .delimited_by(just(Token::LBrace), just(Token::RBrace)), ) - .map_with(|(n, read), state| { - S( - Read { - index: n, - exprs: read, - }, - state.span(), - ) - }) + .map_with(|(index, exprs), span| S(Read { index, exprs }, span.span())) .repeated() .exactly(2) .collect::>() - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Must provide two reads - only found one"), - ) - }); + .map_with(|v, span| S(v, span.span())); let transform_read = num - .map_with(|s, state| S(s, state.span())) + .labelled("read number") + .map_with(|n, state| S(n, state.span())) .then( transformed_pieces - .clone() .repeated() .at_least(1) .collect() - .delimited_by(just(Token::LBrace), just(Token::RBrace)) - .map_err_with_state(|t, span, _| { - throw( - t, - missing_delimiter(Token::LBrace, span, Some("transformation")), - ) - }), + .delimited_by(just(Token::LBrace), just(Token::RBrace)), ) - .map_with(|(n, read), state| { - S( - Read { - index: n, - exprs: read, - }, - state.span(), - ) - }); + .map_with(|(index, exprs), state| S(Read { index, exprs }, state.span())); - let transformation = choice(( - end().map(|()| None), + let transformations = choice(( + end().map(|_| None), just(Token::TransformTo) .then( transform_read .repeated() .at_least(1) .at_most(2) - .collect() + .collect::>() .then(end()), ) .map_with(|(_, (val, _)), state| Some(S(val, state.span()))), )); - definitions - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Error while parsing EFGDL specification."), - ) - }) - .then(reads.map_with(|s, state| S(s, state.span()))) - .then(transformation) - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Error while parsing EFGDL specification."), - ) - }) - .map_with(|((definitions, reads), transforms), _| Description { - definitions, - reads, - transforms, - }) - .map_err_with_state(|t, span, _| { - throw( - t, - Rich::custom(span, "Error while parsing EFGDL specification."), - ) - }) + Box::new( + definitions + .then(reads) + .then(transformations) + .map(|((defs, reads), transforms)| Description { + definitions: defs, + reads, + transforms, + }), + ) } diff --git a/tests/common/utils.rs b/tests/common/utils.rs index 26f14c4..7185b32 100644 --- a/tests/common/utils.rs +++ b/tests/common/utils.rs @@ -1,12 +1,37 @@ -use chumsky::{error::Rich, Parser}; -use seqproc::lexer::lexer; +use chumsky::{error::Rich, input::Input, Parser}; +use seqproc::{ + lexer::{self, Token}, + parser::{parser, Description}, +}; -pub fn into_input_tokens(i: &str) -> (Vec, Vec>) { - let (res, lex_err) = lexer().parse(i).into_output_errors(); +pub struct ParsedInput<'a> { + pub parse_res: Option, + pub lex_errs: Vec>, + pub parse_errs: Vec>, +} + +pub fn result_with_errs<'a>(input: &'a str) -> ParsedInput<'a> { + // lex input + let (lex_res, lex_errs) = lexer::lexer().parse(&input).into_output_errors(); + let tokens = lex_res.unwrap(); + + let tokens = tokens + .into_iter() + .map(|(tok, span)| chumsky::span::Spanned { inner: tok, span }) + .collect::>(); + let input = tokens[..].split_spanned((0..input.len()).into()); - let res = res.unwrap(); + // parse token + let (parse_res, parse_errs) = parser().parse(input).into_output_errors(); - let input_tokens = res.iter().map(|(t, _)| t.clone()).collect::>(); + let parse_errs = parse_errs + .into_iter() + .map(|r| Rich::custom(*r.span(), r.reason())) + .collect::>(); - (input_tokens, lex_err) + ParsedInput { + parse_res, + lex_errs, + parse_errs, + } } diff --git a/tests/compile_tests.rs b/tests/compile_tests.rs index 2c74d73..3e45686 100644 --- a/tests/compile_tests.rs +++ b/tests/compile_tests.rs @@ -1,21 +1,25 @@ +#[macro_use] mod common; use std::collections::HashMap; -use chumsky::prelude::*; use seqproc::{ compile::{compile, definitions::compile_definitions, reads::compile_reads, utils::Error}, execute::compile_geom, - parser::parser, }; +use crate::common::utils::{result_with_errs, ParsedInput}; + #[test] fn no_err() -> Result<(), Error> { let src = "1{remove(hamming(f[CAG], 1))}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile_reads(res.reads, HashMap::new())?; @@ -26,9 +30,12 @@ fn no_err() -> Result<(), Error> { fn fail_norm() { let src = "1{norm(r:)}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -39,9 +46,12 @@ fn fail_norm() { fn pass_composition() { let src = "1{trunc_to(rev(r:), 1)}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -52,9 +62,12 @@ fn pass_composition() { fn fail_remove() { let src = "1{rev(remove(r:))}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -65,9 +78,13 @@ fn fail_remove() { fn discard_as_void() { let src = "1{rev(x[10])}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); + println!("{:?} {:?}", lex_errs, parse_errs); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -81,9 +98,12 @@ brc = b[10] brc1 = b[1-4] 1{}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions)?; @@ -99,10 +119,12 @@ brc = b[10] brc = b[1-4] 1{}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions); assert!(def_map.is_err()); @@ -113,9 +135,12 @@ fn label_replacement() { let src = "test = r: 1{pad_to(, 5, A)}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -129,9 +154,12 @@ fn no_variable() { let src = "testing = r: 1{pad(, 5, A)}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -144,9 +172,12 @@ fn no_variable() { fn expr_unwrap() -> Result<(), Error> { let src = "1{pad(norm(b[9-10]), 1, A)remove(f[CAGAGC])u[8]remove(b[10])}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -159,9 +190,12 @@ fn fail_reuse_label() { brc = b[10] 1{}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -177,9 +211,12 @@ brc = b[10] brc1 = pad(, 1, A) 1{}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions); @@ -193,9 +230,12 @@ brc = b[10] umi = pad(u[10], 1, A) 1{}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -209,10 +249,12 @@ brc = b[10] umi = pad(u[10], 1, A) 1{}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); assert!(res.is_err()); @@ -224,9 +266,12 @@ fn fail_label_composition() { brc = remove(trunc(b[10], 3)) 1{pad(, 1, A)}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); @@ -237,10 +282,12 @@ brc = remove(trunc(b[10], 3)) fn valid_geom() -> Result<(), Error> { let src = "1{b[9-11]remove(f[CAGAGC])u[8]b[10]}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; Ok(()) @@ -250,9 +297,12 @@ fn valid_geom() -> Result<(), Error> { fn invalid_geom_one() { let src = "1{b[9-11]f[CAGAGC]r:u[8]b[10]}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); @@ -263,10 +313,12 @@ fn invalid_geom_one() { fn invalid_geom_two() { let src = "1{f[GAG]b[10-11]b[10]}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); assert!(res.is_err()); @@ -281,9 +333,12 @@ test = r: 1{pad(, 1, A)f[CAGAGC]f[CAGA]}2{r:} -> 1{remove()remove()} "; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -298,9 +353,12 @@ umi = norm(u[9-11]) 1{pad(, 1, A)f[CAGAGC]f[CAGA]}2{r:} -> 1{remove()remove(pad(, 1, A))} "; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -311,10 +369,12 @@ umi = norm(u[9-11]) fn compile_map_arguments() -> Result<(), Error> { let src = "1{map(b[10-11], \"file\", norm(self))}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; Ok(()) @@ -326,10 +386,12 @@ fn compile_map_arguments_with_label() -> Result<(), Error> { brc = b[10-11] 1{map(, \"file\", norm(self))}2{r:}"; - let (input_tokens, _) = common::utils::into_input_tokens(src); - - let res = parser().parse(&input_tokens).into_output().unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; Ok(()) diff --git a/tests/parser_tests.rs b/tests/parser_tests.rs index 5d23409..21ca77f 100644 --- a/tests/parser_tests.rs +++ b/tests/parser_tests.rs @@ -1,19 +1,25 @@ mod common; -use chumsky::prelude::*; use seqproc::{ - parser::{parser, Definition, Expr, Function, IntervalKind, IntervalShape, Read}, + parser::{Definition, Expr, Function, IntervalKind, IntervalShape, Read}, Nucleotide, S, }; +use crate::common::utils::{result_with_errs, ParsedInput}; + #[test] fn definition() { let src = "brc = b[10] 1{}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = S::new( @@ -33,8 +39,8 @@ fn definition() { 0..11, ); - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.definitions, expected_res); } @@ -42,12 +48,15 @@ fn definition() { fn transformation() { let src = "1{b[1]}2{r:} -> 1{}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - println!("{:?}", input_tokens); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = vec![ @@ -70,8 +79,8 @@ fn transformation() { ), ]; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.transforms.unwrap().0, expected_res); } @@ -84,22 +93,29 @@ another = remove(u[9-11]) -> 1{} "; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn hamming() { let src = "1{hamming(, 1)}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { @@ -116,8 +132,8 @@ fn hamming() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -125,10 +141,15 @@ fn hamming() { fn remove() { let src = "1{remove()}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { @@ -145,31 +166,38 @@ fn remove() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } #[test] fn illegal_nest() { - let src = "1{hamming(pad(, 1), 1)}"; + let src = "1{hamming(pad(>, 1, A), 1)}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); - - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn nested() { let src = "1{rev(norm())}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { @@ -192,8 +220,8 @@ fn nested() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -201,10 +229,15 @@ fn nested() { fn labeled_unbounded() { let src = "1{b:}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { @@ -224,8 +257,8 @@ fn labeled_unbounded() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -233,10 +266,15 @@ fn labeled_unbounded() { fn ranged() { let src = "1{b[10-11]}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { @@ -250,8 +288,8 @@ fn ranged() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -259,10 +297,15 @@ fn ranged() { fn fixed() { let src = "1{r[10]}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { @@ -276,8 +319,8 @@ fn fixed() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -285,10 +328,15 @@ fn fixed() { fn fixed_seq() { let src = "1{f[GACTU]}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = parser().parse(&input_tokens).into_output_errors() else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { @@ -311,8 +359,8 @@ fn fixed_seq() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -320,106 +368,110 @@ fn fixed_seq() { fn fail_ranged_seq() { let src = "1{f[1-2]}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn allow_expr_arg() { let src = "1{map(b[9-10], \"filepath\", norm(self))}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); - - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); -} - -#[test] -fn fail_map() { - let src = "1{map(pad(b[9-10], 3), \"filepath\", norm(self))}2{r:}"; - - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn fail_prefix_label_underscore() { let src = "_brc = b[10] 1{}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); - - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn fail_prefix_inlinelabel_underscore() { let src = "1{b<_brc>[10]}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn ok_mid_inlinelabel_underscore() { let src = "1{b[10]}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); - - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn ok_mid_label_underscore() { let src = "b_rc = b[10] 1{}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn filter_test() { let src = "b_rc = filter(b[10], $0) 1{}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); - - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn filter_test_too_many_args() { let src = "b_rc = filter(b[10], $0, 1) 1{}2{r:}"; - let (input_tokens, lex_err) = common::utils::into_input_tokens(src); - - let (_, parser_err) = parser().parse(&input_tokens).into_output_errors(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); }