diff --git a/Cargo.lock b/Cargo.lock index 9cc7726..3f9b3cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,24 +1,12 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" - -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aho-corasick" @@ -114,9 +102,9 @@ checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" [[package]] name = "ariadne" -version = "0.3.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd" +checksum = "36f5e3dca4e09a6f340a61a0e9c7b61e030c69fc27bf29d73218f7e5e3b7638f" dependencies = [ "unicode-width", "yansi", @@ -175,18 +163,18 @@ dependencies = [ [[package]] name = "buffer-redux" -version = "1.0.2" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e8acf87c5b9f5897cd3ebb9a327f420e0cae9dd4e5c1d2e36f2c84c571a58f1" +checksum = "431a9cc8d7efa49bc326729264537f5e60affce816c66edf434350778c9f4f54" dependencies = [ "memchr", ] [[package]] name = "bytecount" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" [[package]] name = "bzip2" @@ -200,12 +188,11 @@ dependencies = [ [[package]] name = "bzip2-sys" -version = "0.1.12+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] @@ -226,12 +213,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "0.9.3" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +checksum = "4ba4a05c9ce83b07de31b31c874e87c069881ac4355db9e752e3a55c11ec75a6" dependencies = [ "hashbrown", + "regex-automata 0.3.9", + "serde", "stacker", + "unicode-ident", + "unicode-segmentation", ] [[package]] @@ -313,9 +304,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -359,6 +350,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.10" @@ -377,15 +374,21 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-ng-sys", "miniz_oxide", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -406,12 +409,13 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" dependencies = [ - "ahash", "allocator-api2", + "equivalent", + "foldhash", ] [[package]] @@ -446,9 +450,9 @@ checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "libz-ng-sys" -version = "1.1.21" +version = "1.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cee1488e961a80d172564fd6fcda11d8a4ac6672c06fe008e9213fa60520c2b" +checksum = "7bf914b7dd154ca9193afec311d8e39345c1bd93b48b3faa77329f0db8f553c0" dependencies = [ "cmake", "libc", @@ -503,11 +507,12 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -573,9 +578,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "predicates" @@ -713,6 +718,17 @@ dependencies = [ "regex-syntax 0.6.29", ] +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] + [[package]] name = "regex-automata" version = "0.4.9" @@ -730,6 +746,12 @@ version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -812,9 +834,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.139" +version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ "itoa", "memchr", @@ -837,6 +859,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "similar" version = "2.7.0" @@ -1044,12 +1072,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "wait-timeout" version = "0.2.1" @@ -1183,26 +1205,6 @@ dependencies = [ [[package]] name = "yansi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" diff --git a/Cargo.toml b/Cargo.toml index f3d88bc..9551e8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,8 +18,8 @@ path = "src/bin/bin.rs" antisequence = { git = "https://github.com/noahcape/ANTISEQUENCE.git", branch = 'dev' } tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } tracing = "0.1.37" -ariadne = "0.3.0" -chumsky = "0.9.3" +ariadne = "0.5.0" +chumsky = "0.12.0" clap = { version = "4.4.8", features = ["derive"] } anyhow = "1.0" tempfile = "3.5.0" diff --git a/src/bin/bin.rs b/src/bin/bin.rs index 9fe6c9f..2a1b33c 100644 --- a/src/bin/bin.rs +++ b/src/bin/bin.rs @@ -1,12 +1,11 @@ +use std::process::exit; + use clap::arg; use std::io; use std::path::PathBuf; use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter}; -use seqproc::{ - error::handle_errors, - execute::{compile_geom, interpret}, -}; +use seqproc::execute::{compile_geom, interpret}; /// General puprose sequence preprocessor #[derive(Debug, clap::Parser)] @@ -82,8 +81,6 @@ fn main() { additional_args, geom, ), - Err(e) => { - handle_errors(e, geom); - } + Err(_) => exit(1), } } diff --git a/src/error.rs b/src/error.rs index 9040613..7621514 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,130 +1,39 @@ -use std::ops::Range; - -use ariadne::{Color, Fmt, Label, Report, ReportKind, Source}; -use chumsky::{error::SimpleReason, prelude::*}; - -use crate::lexer::Token; - -pub fn handle_errors(errs: Vec>, source: String) { - // error recovery - errs.into_iter().for_each(|e| { - let report = Report::build(ReportKind::Error, (), e.span().start); - - let report = match e.reason() { - chumsky::error::SimpleReason::Custom(msg) => report - .with_message("Parsing and Compiling EFGDL") - .with_label( - Label::new(e.span()) - .with_message(format!("{}", msg.fg(Color::Red))) - .with_color(Color::Red), - ), - chumsky::error::SimpleReason::Unclosed { span, delimiter } => report - .with_message(format!( - "Unclosed delimiter {}", - delimiter.fg(Color::Yellow) - )) - .with_label( - Label::new(span.clone()) - .with_message(format!( - "Unclosed delimiter {}", - delimiter.fg(Color::Yellow) - )) - .with_color(Color::Yellow), - ) - .with_label( - Label::new(e.span()) - .with_message(format!( - "Must be closed before this {}", - e.found() - .unwrap_or(&"end of file".to_string()) - .fg(Color::Red) - )) - .with_color(Color::Red), - ), - chumsky::error::SimpleReason::Unexpected => { - report.with_message(format!( - "{}, expected {}", - if e.found().is_some() { - "Unexpected token in input" - } else { - "Unexpected end of input" - }, - if e.expected().len() == 0 { - "something else".to_string() - } else { - e.expected() - .map(|expected| match expected { - Some(expected) => expected.to_string(), - None => "end of input".to_string(), - }) - .collect::>() - .join(", ") - } - )) - } - .with_label( - Label::new(e.span()) - .with_message(format!( - "Unexpected token {}", - e.found() - .unwrap_or(&"end of file".to_string()) - .fg(Color::Red) - )) - .with_color(Color::Red), - ), - }; - - report.finish().print(Source::from(source.clone())).unwrap(); - }); -} - -pub fn missing_delimiter(token: Token, span: Range, obj: Option<&str>) -> Simple { - let msg = |d1, d2| match obj { - Some(obj) => format!("Missing delimitter for {obj} - delimit with '{d1} .. {d2}'."), - None => format!("Missing delimtter - delimit with '{d1} .. {d2}'."), - }; - - match token { - Token::RParen | Token::LParen => Simple::custom(span, msg('(', ')')), - Token::RBrace | Token::LBrace => Simple::custom(span, msg('{', '}')), - Token::RBracket | Token::LBracket => Simple::custom(span, msg('[', ']')), - Token::RAngle | Token::LAngle => Simple::custom(span, msg('<', '>')), - _ => Simple::custom(span, "Missing delimitter"), - } -} - -pub fn comma(span: Range) -> Simple { - Simple::custom(span, "Expected a ',' to separate arguments.") +use std::fmt; + +use ariadne::{Color, Label, Report, ReportKind, Source}; +use chumsky::prelude::*; + +pub fn failure( + msg: String, + label: (String, SimpleSpan), + extra_labels: impl IntoIterator, + source: String, +) -> ! { + Report::build(ReportKind::Error, ((), label.1.into_range())) + .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte)) + .with_message(&msg) + .with_label( + Label::new(((), label.1.into_range())) + .with_message(label.0) + .with_color(Color::Red), + ) + .with_labels(extra_labels.into_iter().map(|label2| { + Label::new(((), label2.1.into_range())) + .with_message(label2.0) + .with_color(Color::Yellow) + })) + .finish() + .print(Source::from(&source)) + .unwrap(); + std::process::exit(1) } -pub fn throw(prev_err: Simple, next_err: Simple) -> Simple { - let expected = prev_err - .expected() - .map(|expected| match expected { - Some(expected) => expected.to_string(), - None => "end of input".to_string(), - }) - .collect::>(); - - if expected.len() == 1 { - let expected = expected.first().unwrap(); - let range = prev_err.span(); - let start = range.start; - - let msg = match expected.as_str() { - ":" => { - Some("Unfinished interval - add a ':' or specify interval with different length.") - } - _ => None, - }; - - if let Some(msg) = msg { - return Simple::custom(start - 1..start - 1 + expected.len(), msg); - } - } - - match prev_err.reason() { - SimpleReason::Custom(_) => prev_err, - _ => next_err, - } +pub fn parse_failure(err: &Rich<'_, impl fmt::Display>, src: String) -> ! { + failure( + err.to_string(), + (err.reason().to_string(), *err.span()), + err.contexts() + .map(|(l, s)| (format!("while parsing this {l}"), *s)), + src, + ) } diff --git a/src/execute.rs b/src/execute.rs index 91c5456..bea2b1e 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -1,10 +1,14 @@ use std::{ - fs::File, io::BufWriter, panic, path::{Path, PathBuf}, thread + fs::File, + io::BufWriter, + panic, + path::{Path, PathBuf}, + thread, }; use antisequence::graph::*; use anyhow::{bail, Result}; -use chumsky::{prelude::Simple, Parser, Stream}; +use chumsky::{error::Rich, input::Input, Parser}; use nix::sys::stat; use nix::unistd; use tempfile::tempdir; @@ -12,6 +16,7 @@ use tracing::info; use crate::{ compile::{compile, CompiledData}, + error::parse_failure, lexer, parser::parser, }; @@ -121,37 +126,28 @@ fn interpret_to_pipes( } } -pub fn compile_geom(geom: String) -> Result>> { - let (tokens, mut errs) = lexer::lexer().parse_recovery(geom); - - let parse_errs = if let Some(tokens) = tokens { - match parser().parse(Stream::from_iter( - tokens.len()..tokens.len() + 1, - tokens.into_iter(), - )) { - Err(errs) => errs, - Ok(description) => { - let res = compile(description.clone()); - - if let Err(e) = res { - errs.push(Simple::custom(e.span, e.msg)); - } else { - return Ok(res.ok().unwrap()); - }; - - vec![] - } - } - } else { - Vec::new() - }; +pub fn compile_geom(geom: String) -> Result>> { + // lex input + let tokens = lexer::lexer() + .parse(&geom) + .into_result() + .unwrap_or_else(|errs| parse_failure(&errs[0], geom.clone())); - let errors = errs + let tokens = tokens .into_iter() - .map(|e| e.map(|c| c.to_string())) - .chain(parse_errs.into_iter().map(|e| e.map(|tok| tok.to_string()))) + .map(|(tok, span)| chumsky::span::Spanned { inner: tok, span }) .collect::>(); - Err(errors) + let input = tokens[..].split_spanned((0..geom.len()).into()); + + // parse token + let description = parser() + .parse(input) + .into_result() + .unwrap_or_else(|errs| parse_failure(&errs[0], geom.clone())); + + // compile ast + compile(description) + .map_err(|e| parse_failure(&Rich::::custom(e.span, e.msg), geom.clone())) } pub fn read_pairs_to_file( diff --git a/src/geometry/compile/functions.rs b/src/geometry/compile/functions.rs index 78541a3..ea9295c 100644 --- a/src/geometry/compile/functions.rs +++ b/src/geometry/compile/functions.rs @@ -80,6 +80,7 @@ pub fn compile_fn( path, compile_inner_expr(expr.unboxed(), S(parent_expr, expr_span))?, ), + Function::Filter(path) => CompiledFunction::FilterWithinDist(path, 0), Function::FilterWithinDist(path, mismatch) => { CompiledFunction::FilterWithinDist(path, mismatch) } @@ -130,8 +131,8 @@ fn compile_inner_expr( match expr { Expr::Function(fn_, fn_expr) => { expr = fn_expr.unboxed().0; - span = fn_.1.clone(); - let compiled_fn = compile_fn(fn_.clone(), S(expr.clone(), span.clone())); + span = fn_.1; + let compiled_fn = compile_fn(fn_.clone(), S(expr.clone(), span)); if compiled_fn.is_ok() { inner_stack.push(compiled_fn.ok().unwrap()); } else { diff --git a/src/geometry/compile/reads.rs b/src/geometry/compile/reads.rs index 86c2d6d..8d73c46 100644 --- a/src/geometry/compile/reads.rs +++ b/src/geometry/compile/reads.rs @@ -37,7 +37,7 @@ pub fn validate_geometry( if !expect_next.contains(&type_) { return Err(Error { - span: span.clone(), + span: *span, msg: format!("Ambiguous Geometry: expected {expect_next:?}, found: {type_}"), }); } @@ -140,7 +140,7 @@ pub fn compile_reads( Expr::Label(S(ref l, ref span)) => { if labels.contains(l) { err = Some(Error { - span: span.clone(), + span: *span, msg: format!( "`{l}` has already been used. Cannot use same variable more than once." ), @@ -160,7 +160,7 @@ pub fn compile_reads( inner_expr.expr.0.type_, inner_expr.expr.0.size.clone(), ), - inner_expr.expr.1.clone(), + inner_expr.expr.1, ), )?); } @@ -174,7 +174,7 @@ pub fn compile_reads( break 'inner; } else { err = Some(Error { - span: span.clone(), + span: *span, msg: format!("No variable declared with label: {l}"), }); diff --git a/src/geometry/compile/transformation.rs b/src/geometry/compile/transformation.rs index 54bd405..fefdec5 100644 --- a/src/geometry/compile/transformation.rs +++ b/src/geometry/compile/transformation.rs @@ -50,11 +50,11 @@ pub fn compile_transformation( } Expr::LabeledGeomPiece(_, _) | Expr::GeomPiece(_, _) => return Err(Error { span: expr.1, - msg: format!("{} - Cannot construct intervals in a transformation", generic_transformation_msg) + msg: format!("{generic_transformation_msg} - Cannot construct intervals in a transformation") }), Expr::Self_ => return Err(Error { span: expr.1, - msg: format!("{} - Misplaced reference of 'self', this is a reserved token for the 'map' function.", generic_transformation_msg), + msg: format!("{generic_transformation_msg} - Misplaced reference of 'self', this is a reserved token for the 'map' function."), }) } } @@ -88,7 +88,7 @@ pub fn compile_transformation( for fn_ in &gp.stack { if let S(CompiledFunction::Remove, span) = fn_ { return Err(Error { - span: span.clone(), + span: *span, msg: "Cannot reference a void interval after '->' - if you want to keep this interval then remove the 'remove' transformation.".to_string() }); } diff --git a/src/geometry/compile/utils.rs b/src/geometry/compile/utils.rs index f639ab0..c0b1009 100644 --- a/src/geometry/compile/utils.rs +++ b/src/geometry/compile/utils.rs @@ -113,10 +113,10 @@ impl GeometryMeta { } }; - let mut return_type = S(expr_type, expr_span.clone()); + let mut return_type = S(expr_type, *expr_span); for S(fn_, span) in self.stack.iter().rev() { - return_type = validate_composition(S(fn_, span.clone()), return_type, &expr.size)?; + return_type = validate_composition(S(fn_, *span), return_type, &expr.size)?; } Ok(()) @@ -304,7 +304,7 @@ impl GeometryMeta { impl IntervalShape { pub fn update_size_to(&self, n: usize) -> Self { - IntervalShape::FixedLen(S(n, 0..1)) + IntervalShape::FixedLen(S(n, (0..1).into())) } pub fn update_size_add(self, n: usize) -> Self { diff --git a/src/geometry/interpret.rs b/src/geometry/interpret.rs index fb4d682..c2c6ff8 100644 --- a/src/geometry/interpret.rs +++ b/src/geometry/interpret.rs @@ -1,4 +1,4 @@ -use std::{path::PathBuf, str::FromStr, usize}; +use std::{path::PathBuf, str::FromStr}; use antisequence::{ graph::MatchType::{ @@ -6,7 +6,6 @@ use antisequence::{ }, *, }; -use chumsky::chain::Chain; use expr::Expr; use graph::{ Graph, @@ -143,10 +142,7 @@ fn parse_additional_args(arg: String, args: &[&str]) -> PathBuf { let len = args.len(); match arg.parse::() { Ok(n) => PathBuf::from_str(args.get(n).unwrap_or_else(|| { - panic!( - "Expected {n} additional arguments with `--additional` tag. Found only {}.", - len - ) + panic!("Expected {n} additional arguments with `--additional` tag. Found only {len}.",) })) .unwrap_or_else(|_| { panic!("Expected path as argument -- could not parse argument {n} as path.") @@ -171,7 +167,7 @@ fn execute_stack( }; let interval_length = match size { - IntervalShape::FixedSeq(v) => v.len(), + IntervalShape::FixedSeq(S(v, _)) => v.len(), IntervalShape::FixedLen(S(n, _)) => *n, IntervalShape::RangedLen(S((_, b), _)) => *b, IntervalShape::UnboundedLen => 0, @@ -195,7 +191,7 @@ fn execute_stack( execute_stack(fns, label, size, additional_args, &mut fallback_graph); graph.add(SelectOp::new( - Expr::from(expr::attr(&format!("{label}.{MAPPED}"))).not(), + Expr::from(expr::attr(format!("{label}.{MAPPED}"))).not(), fallback_graph, )); } @@ -214,7 +210,7 @@ fn execute_stack( execute_stack(fns, label, size, additional_args, &mut fallback_graph); graph.add(SelectOp::new( - Expr::from(expr::attr(&format!("{label}.{MAPPED}"))).not(), + Expr::from(expr::attr(format!("{label}.{MAPPED}"))).not(), fallback_graph, )); } @@ -278,7 +274,7 @@ impl<'a> GeometryMeta { }; if type_ == IntervalKind::Discard { - stack.push(S(CompiledFunction::Remove, 0..1)); + stack.push(S(CompiledFunction::Remove, (0..1).into())); } // this is only called from `interpret_dual` which is for variable to fixedSeq @@ -313,7 +309,7 @@ impl<'a> GeometryMeta { let next_label = format!("{cur_label}{NEXT_RIGHT}"); if type_ == IntervalKind::Discard { - stack.push(S(CompiledFunction::Remove, 0..1)); + stack.push(S(CompiledFunction::Remove, (0..1).into())); } // execute the requisite process here diff --git a/src/geometry/lexer.rs b/src/geometry/lexer.rs index 1ce41be..561686d 100644 --- a/src/geometry/lexer.rs +++ b/src/geometry/lexer.rs @@ -160,7 +160,8 @@ impl fmt::Display for Token { } /// Returns a lexer for EFGDL. -pub fn lexer() -> impl Parser, Error = Simple> { +pub fn lexer<'src>( +) -> impl Parser<'src, &'src str, Vec<(Token, Span)>, extra::Err>> { let int = text::int(10).from_str().unwrapped().map(Token::Num); let ctrl = choice(( @@ -183,7 +184,14 @@ pub fn lexer() -> impl Parser, Error = Simple> { let file = just('"') .ignored() - .then(take_until(just('"').ignored())) + .then( + any() + .and_is(just('"').not()) + .repeated() + .collect::>() + .then(just('"')), + ) + // .then(take_until(just('"').ignored())) .padded() .map(|((), (f, _))| Token::File(f.into_iter().collect::())); @@ -201,7 +209,7 @@ pub fn lexer() -> impl Parser, Error = Simple> { just('U').to(Token::U), )); - let ident = text::ident().map(|s: String| match s.as_str() { + let ident = text::ident().map(|s: &str| match s { "rev" => Token::Reverse, "revcomp" => Token::ReverseComp, "remove" => Token::Remove, @@ -227,26 +235,19 @@ pub fn lexer() -> impl Parser, Error = Simple> { "f" => Token::FixedSeq, _ => { if s.starts_with('_') { - Token::Reserved(s) + Token::Reserved(s.to_owned()) } else { - Token::Label(s) + Token::Label(s.to_owned()) } } }); - let token = nucs - .or(argument) - .or(ident) - .or(transformto) - .or(int) - .or(ctrl) - .or(special) - .or(file) - .recover_with(skip_then_retry_until([])); + // TODO: remove recovery + let token = choice((nucs, argument, ident, transformto, int, ctrl, special, file)); token - .map_with_span(|tok, span| (tok, span)) - .padded() + .map_with(|tok, state| (tok, state.span())) + .padded_by(text::whitespace()) .repeated() .collect() } diff --git a/src/geometry/mod.rs b/src/geometry/mod.rs index f8c8d8f..a8f54e7 100644 --- a/src/geometry/mod.rs +++ b/src/geometry/mod.rs @@ -3,12 +3,16 @@ pub mod interpret; pub mod lexer; pub mod parser; +use std::hash::{Hash, Hasher}; + use std::{ fmt::{self, Write}, ops::Range, slice, }; +use chumsky::span::SimpleSpan; + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[repr(u8)] // Necessary for by-ref conversion to `str` pub enum Nucleotide { @@ -54,12 +58,27 @@ impl Nucleotide { } /// A range of characters in the input file. -pub type Span = Range; +pub type Span = SimpleSpan; /// Associates a `T` with a corresponding span in the source file. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct S(pub T, pub Span); +impl S { + pub fn new(t: T, s: Range) -> Self { + S(t, SimpleSpan::from(s)) + } +} + +impl Hash for S +where + T: PartialEq + Eq + Hash, +{ + fn hash(&self, hasher: &mut H) { + self.0.hash(hasher); + } +} + impl S { pub fn boxed(self) -> S> { S(Box::new(self.0), self.1) diff --git a/src/geometry/parser.rs b/src/geometry/parser.rs index 575b8ec..309606a 100644 --- a/src/geometry/parser.rs +++ b/src/geometry/parser.rs @@ -2,13 +2,11 @@ use std::fmt::{self, Write}; -use chumsky::prelude::*; +use chumsky::{extra::Err as ExtraErr, input::MappedInput, prelude::*}; -use crate::{ - error::{comma, missing_delimiter, throw}, - lexer::Token, - Nucleotide, S, -}; +use crate::{lexer::Token, Nucleotide, S}; + +use super::Span; /// The length of a nucleotide interval, /// and whether it must match a specific sequence. @@ -73,6 +71,8 @@ pub enum Function { Map(String, S>), /// `map_with_mismatch(I, A, F, n)` MapWithMismatch(String, S>, usize), + /// `filter(I, A)` + Filter(String), /// `filter_within_dist(I, A, n)` FilterWithinDist(String, usize), /// `hamming(F, n)` @@ -103,6 +103,7 @@ impl Function { let S(s, _) = b; write!(f, "map_with_mismatch({first}, {p}, {s}, {n})") } + Filter(p) => write!(f, "filter({first}, {p})"), FilterWithinDist(p, n) => write!(f, "filter_within_dist({first}, {p}, {n})"), Hamming(n) => write!(f, "hamming({first}, {n})"), } @@ -199,607 +200,376 @@ pub struct Description { pub transforms: Option>>>, } -pub fn parser() -> impl Parser> + Clone { - /* - Start with creating combinators and - a recursive definition of a geom_piece +fn make_geom_piece( + kind: IntervalKind, + shape: IntervalShape, + label: Option, + span: Span, +) -> Expr { + let expr = Expr::GeomPiece(kind, shape); + if let Some(Expr::Label(lbl)) = label { + Expr::LabeledGeomPiece(lbl, S(Box::new(expr), span)) + } else { + expr + } +} - At execution time we will check if it is a valid - geometry without any ambiguity. Here we will - restruct some invalid definitions - */ +type Input<'a> = MappedInput<'a, Token, Span, &'a [Spanned]>; - let label = select! { Token::Label(ident) => ident }; +macro_rules! function_arguments { + ($base:expr) => {{ + $base + .map_with(|res, state| S(res, state.span())) + .delimited_by( + just(Token::LParen), + just(Token::RParen) + ) + }}; - let num = select! { Token::Num(n) => n }; + ($base:expr, $first:expr $(, $rest:expr)* $(,)?) => {{ + $base + .then_ignore(just(Token::Comma)) + .then($first) + $( + .then_ignore(just(Token::Comma)).then($rest) + )* + .map_with(|res, state| S(res, state.span())) + .delimited_by( + just(Token::LParen), + just(Token::RParen) + ) + }} +} - let file = select! { Token::File(f) => f }; +macro_rules! unary_function { + ($func:tt, $arg:expr) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map(move |(fn_span, S(geom_p, span))| { + Expr::Function( + S(Function::$func.clone(), fn_span), + S(Box::new(geom_p), span), + ) + }) + .labelled(concat!("Unary function ", stringify!($func))) + .as_context() + }}; +} - let argument = select! { Token::Arg(n) => n.to_string() }; +macro_rules! binary_function { + ($func:tt, $arg:expr) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map(move |(fn_span, S((geom_p, arg), span))| { + Expr::Function( + S(Function::$func.clone()(arg), fn_span), + S(Box::new(geom_p), span), + ) + }) + .labelled(concat!("Binary function ", stringify!($func))) + .as_context() + }}; +} + +macro_rules! ternary_function { + ($func:tt, $arg:expr) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map(move |(fn_span, S(((geom_p, arg_one), arg_two), span))| { + Expr::Function( + S(Function::$func.clone()(arg_one, arg_two), fn_span), + S(Box::new(geom_p), span), + ) + }) + .labelled(concat!("Ternary function ", stringify!($func))) + .as_context() + }}; +} + +macro_rules! quaternary_function { + ($func:tt, $arg:expr $(,)?) => {{ + just(Token::$func) + .labelled(stringify!($func)) + .map_with(|_, state| state.span()) + .then($arg) + .map( + move |(fn_span, S((((geom_p, arg_one), arg_two), arg_three), span))| { + Expr::Function( + S( + Function::$func.clone()(arg_one, arg_two, arg_three), + fn_span, + ), + S(Box::new(geom_p), span), + ) + }, + ) + .labelled(concat!("Quaternary function ", stringify!($func))) + .as_context() + }}; +} + +macro_rules! nary_functions { + ($helper:ident, $arg:expr, $($func:tt),* $(,)?) => {{ + choice(( + $( + $helper!($func, $arg.clone()), + )* + )) + }} +} + +macro_rules! parse_geometry_piece { + ($piece_type:expr, $inline_label:expr, $kind:expr) => {{ + $piece_type + .then($inline_label.or_not()) + .then($kind) + .map_with(|((kind, label), shape), state| { + make_geom_piece(kind, shape, label, state.span()) + }) + }}; +} + +// TODO: label everything to add better errors +pub fn parser<'tokens>( +) -> Box, Description, ExtraErr>> + 'tokens> +{ + // begin with defining basic token selectors + let label = select! { Token::Label(x) => x.clone() }; + let num = select! {Token::Num(n) => n }; + let file = select! {Token::File(f) => f.clone() }; + let argument = select! {Token::Arg(n) => n.to_string() }; + let self_ = select! { Token::Self_ => Expr::Self_ }; let piece_type = select! { Token::Barcode => IntervalKind::Barcode, Token::Umi => IntervalKind::Umi, Token::Discard => IntervalKind::Discard, Token::ReadSeq => IntervalKind::ReadSeq, - } - .labelled("specifier"); + }; let nuc = select! { - Token::U => Nucleotide::U, Token::A => Nucleotide::A, Token::T => Nucleotide::T, Token::G => Nucleotide::G, Token::C => Nucleotide::C, + Token::U => Nucleotide::U, }; let inline_label = label - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom( - span, - "Found delimiters '<' and '>' which must delimit a label.", - ), - ) - }) - .delimited_by(just(Token::LAngle), just(Token::RAngle)) - .map_err_with_span(|t, span| { - throw(t, missing_delimiter(Token::RAngle, span, Some("label"))) - }) - .map_with_span(|l, span| Expr::Label(S(l, span))) - .labelled("label"); - - let label = label.map_with_span(S).labelled("label"); - - let self_ = just(Token::Self_).to(Expr::Self_).labelled("self"); + .delimited_by( + just(Token::LAngle).labelled("opening '<'"), + just(Token::RAngle).labelled("closing '>'"), + ) + .map_with(|l, span: &mut _| Expr::Label(S(l, span.span()))) + .labelled("inline label"); + // interval shape parsers let range = num + .labelled("number") .then_ignore(just(Token::Dash)) - .then(num) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom( - span, - "Expected a numerical literal after '-' for a ranged length interval.", - ), - ) - }) - .map_with_span(|(a, b), span| IntervalShape::RangedLen(S((a, b), span))) + .then(num.labelled("number")) + .map_with(|(a, b), span| IntervalShape::RangedLen(S((a, b), span.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_span(|t, span| { - throw( - t, - missing_delimiter(Token::RBracket, span, Some("variable length interval")), - ) - }); + .labelled("variable length geometry peice shape: [-]"); let fixed_len = num - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom( - span, - "Expecting a length specifier '[-]', or '[]'.", - ), - ) - }) - .map_with_span(|n, span| IntervalShape::FixedLen(S(n, span))) + .labelled("number") + .map_with(|n, state| IntervalShape::FixedLen(S(n, state.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_span(|t, span| { - throw( - t, - missing_delimiter(Token::LBracket, span, Some("fixed length interval")), - ) - }) - .labelled("fixed_len"); + .labelled("fixed length geometry piece shape: []"); - let seq = nuc + let nuc_seq = nuc + .labelled("nucleotide") .repeated() .at_least(1) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "A fragment must contain at least one ATGCU character"), - ) - }) - .collect::>(); - - let nucstr = seq - .map_with_span(|nucstr, span| IntervalShape::FixedSeq(S(nucstr, span))) + .collect::>() + .map_with(|seq, span| IntervalShape::FixedSeq(S(seq, span.span()))) .delimited_by(just(Token::LBracket), just(Token::RBracket)) - .map_err_with_span(|t, span| { - throw( - t, - missing_delimiter(Token::LBracket, span, Some("fragment specifier")), - ) - }) - .labelled("nucstr"); + .labelled("nucleotide sequence"); + // geom piece parsers let unbounded = piece_type - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), - ) - }) .then(inline_label.clone().or_not()) .then_ignore(just(Token::Colon)) - .map_with_span(|(type_, label), span| { - let expr = Expr::GeomPiece(type_, IntervalShape::UnboundedLen); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) - } else { - expr - } - }) - .labelled("unbound_seg"); - - let ranged = piece_type - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), - ) - }) - .then(inline_label.clone().or_not()) - .then(range) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom( - span, - "Expecting a length specifier either ':', '[-]', or '[]'.", - ), - ) - }) - .map_with_span(|((type_, label), range), span| { - let expr = Expr::GeomPiece(type_, range); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) - } else { - expr - } - }) - .labelled("ranged_len_seg"); - - let fixed = piece_type - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Specify interval with either 'b'/'u'/'f'/'r'/'x'."), - ) - }) - .then(inline_label.clone().or_not()) - .then(fixed_len) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom( - span, - "Expecting a length specifier either ':', '[-]', or '[]'.", - ), - ) - }) - .map_with_span(|((type_, label), len), span| { - let expr = Expr::GeomPiece(type_, len); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) - } else { - expr - } - }) - .labelled("fixed_len_seg"); - - let fixed_seq = just(Token::FixedSeq) - .to(IntervalKind::FixedSeq) - .then(inline_label.clone().or_not()) - .then(nucstr) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Expecting a sequence to match delimited by '[ .. ]'."), - ) + .map_with(|(kind, label), span| { + make_geom_piece(kind, IntervalShape::UnboundedLen, label, span.span()) }) - .map_with_span(|((type_, label), nucs), span| { - let expr = Expr::GeomPiece(type_, nucs); - if let Some(Expr::Label(label)) = label { - Expr::LabeledGeomPiece(label, S(Box::new(expr), span)) - } else { - expr - } - }) - .labelled("seq_seg"); - - let geom_piece = choice(( - unbounded.clone(), - ranged.clone(), - fixed.clone(), - fixed_seq.clone(), - inline_label, - self_, - )) - .labelled("geom_piece"); - - let transformed_pieces = recursive(|transformed_pieces| { - let transformed_pieces = transformed_pieces - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Invalid declaration of interval"))); - - let recursive_num_arg = transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a ',' to separate arguments."))) - .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as a second argument."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, None))); - - let recursive_num_nuc_args = transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as a second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(nuc) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected an ATGCU literal as a third argument."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, None))); - - let recursive_no_arg = transformed_pieces - .clone() - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, None))); - + .labelled("Unbounded geometry peice: e.g. 'r:'") + .as_context(); + + let ranged = parse_geometry_piece!(piece_type, inline_label.clone(), range) + .labelled("Variable length geometry piece: e.g. 'b[9-10]'") + .as_context(); + let fixed_seq = parse_geometry_piece!( + just(Token::FixedSeq).to(IntervalKind::FixedSeq), + inline_label.clone(), + nuc_seq + ) + .labelled("Fixed sequence geometry piece: e.g. 'f[ATGC]'") + .as_context(); + let fixed = parse_geometry_piece!(piece_type, inline_label.clone(), fixed_len) + .labelled("Fixed length geometry piece: e.g. 'b[10]'") + .as_context(); + + // what constitutes a valid geometry peice + let geom_piece = choice((unbounded, ranged, fixed, fixed_seq, inline_label, self_)); + + // transformed peices + let transformed_pieces = recursive(|tp| { choice(( - geom_piece.clone() - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Unexpected error when creating an interval."))), - just(Token::Remove) - .map_with_span(|_, span| S(Function::Remove, span)) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("remove"), - just(Token::Normalize) - .map_with_span(|_, span| S(Function::Normalize, span)) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("norm"), - just(Token::Hamming) - .map_with_span(|_, span| span) - .then( - geom_piece - .clone() - .map_err_with_span(|t, span| { - throw(t, Simple::custom(span, "Expected a fragment specified interval as the first argument - 'hamming' cannot take a transformed interval.")) - }) - .then_ignore(just(Token::Comma)) - .then(num).map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numeric literal as a second argument."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'hamming'")))), - ) - .map_err_with_span(|t, span| { - throw(t, Simple::custom(span, "Missing argument for hamming - ")) - }) - .map(|(fn_span, S((geom_p, num), span))| { - Expr::Function( - S(Function::Hamming(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("hamming"), - just(Token::Truncate) - .map_with_span(|_, span| span) - .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { - Expr::Function( - S(Function::Truncate(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc"), - just(Token::TruncateLeft) - .map_with_span(|_, span| span) - .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { - Expr::Function( - S(Function::TruncateLeft(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc_left"), - just(Token::TruncateTo) - .map_with_span(|_, span| span) - .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { - Expr::Function( - S(Function::TruncateTo(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc_to"), - just(Token::TruncateToLeft) - .map_with_span(|_, span| span) - .then(recursive_num_arg.clone()) - .map(|(fn_span, S((geom_p, num), span))| { - Expr::Function( - S(Function::TruncateToLeft(num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("trunc_to_left"), - just(Token::Pad) - .map_with_span(|_, span| span) - .then(recursive_num_nuc_args.clone()) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { - Expr::Function( - S(Function::Pad(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad"), - just(Token::PadLeft) - .map_with_span(|_, span| span) - .then(recursive_num_nuc_args.clone()) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { - Expr::Function( - S(Function::PadLeft(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad_left"), - just(Token::PadTo) - .map_with_span(|_, span| span) - .then(recursive_num_nuc_args.clone()) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { - Expr::Function( - S(Function::PadTo(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad_to"), - just(Token::PadToLeft) - .map_with_span(|_, span| span) - .then(recursive_num_nuc_args) - .map(|(fn_span, S(((geom_p, num), nuc), span))| { - Expr::Function( - S(Function::PadToLeft(num, nuc), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("pad_to_left"), - just(Token::Reverse) - .map_with_span(|_, span| S(Function::Reverse, span)) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("rev"), - just(Token::ReverseComp) - .map_with_span(|_, span| S(Function::ReverseComp, span)) - .then(recursive_no_arg.clone()) - .map(|(fn_, tok)| Expr::Function(fn_, tok.boxed())) - .labelled("revcomp"), - just(Token::Map) - .map_with_span(|_, span| span) - .then( - transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(transformed_pieces.clone().map_with_span(S)) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'map'")))), - ) - .map(|(fn_span, S(((geom_p, path), self_expr), span))| { - Expr::Function( - S(Function::Map(path, self_expr.boxed()), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("map"), - just(Token::MapWithMismatch) - .map_with_span(|_, span| span) - .then( - transformed_pieces - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(transformed_pieces.clone().map_with_span(S)) - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as the allowable mismatch when mapping interval."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)) - .map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'map_with_mismatch'")))), - ) - .map(|(fn_span, S((((geom_p, path), self_expr), num), span))| { - Expr::Function( - S( - Function::MapWithMismatch(path, self_expr.boxed(), num), - fn_span, - ), - S(Box::new(geom_p), span), - ) - }) - .labelled("map_dist"), - just(Token::FilterWithinDist) - .map_with_span(|_, span| span) - .then( - geom_piece - .clone() - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(num) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a numerical literal as the allowable mismatch when filtering interval."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("filter_with_mismatch")))), + geom_piece.clone(), + nary_functions!( + unary_function, + function_arguments!(tp + .clone() + .labelled("geometry piece as sole argument to function")), + ReverseComp, + Reverse, + Remove, + Normalize + ), + nary_functions!( + binary_function, + function_arguments!( + tp.clone() + .labelled("geometry peice as argument to binary function"), + num.labelled("numerical argument to binary function") + ), + Hamming, + Truncate, + TruncateLeft, + TruncateTo, + TruncateToLeft + ), + binary_function!( + Filter, + function_arguments!( + tp.clone() + .labelled("geometry piece as argument to 'filter'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")) ) - .map(|(fn_span, S(((geom_p, path), num), span))| { - Expr::Function( - S(Function::FilterWithinDist(path, num), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("filter_dist"), - just(Token::Filter) - .map_with_span(|_, span| span) - .then( - geom_piece - .then_ignore(just(Token::Comma)) - .map_err_with_span(|t, span| throw(t, comma(span))) - .then(file.or(argument)) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Expected a file or $ to be mapped to command line argument as second argument."))) - .map_with_span(S) - .delimited_by(just(Token::LParen), just(Token::RParen)).map_err_with_span(|t, span| throw(t, missing_delimiter(Token::LParen, span, Some("'filter'")))), + ), + nary_functions!( + ternary_function, + function_arguments!( + tp.clone() + .labelled("geometry piece as argument to 'pad'-llike functions"), + num.labelled("numerical argument to 'pad'-like functions"), + nuc.labelled("nucleotide to pad with") + ), + Pad, + PadLeft, + PadTo, + PadToLeft + ), + nary_functions!( + ternary_function, + function_arguments!( + tp.clone().labelled("geometry peice to 'map'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")), + tp.clone() + .labelled("geometry piece after mapping") + .map_with(|transf_p, state| S(Box::new(transf_p), state.span())) + ), + Map, + ), + ternary_function!( + FilterWithinDist, + function_arguments!( + tp.clone() + .labelled("geometry piece to 'filter_within_dist'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")), + num.labelled("numerical argument") ) - .map(|(fn_span, S((geom_p, path), span))| { - Expr::Function( - S(Function::FilterWithinDist(path, 0), fn_span), - S(Box::new(geom_p), span), - ) - }) - .labelled("filter"), + ), + nary_functions!( + quaternary_function, + function_arguments!( + tp.clone().labelled("geometry piece to 'map_with_mismatch'"), + file.labelled("file name") + .or(argument.labelled("argument from commandline")), + tp.clone() + .labelled("geometry piece after mapping") + .map_with(|transf_p, state| S(Box::new(transf_p), state.span())), + num.labelled("numerical argument") + ), + MapWithMismatch, + ), )) }) - .map_err_with_span(|t, span| throw(t, Simple::custom(span, "Invalid construction of an interval"))) - .map_with_span(S); + .map_with(|s, state| S(s, state.span())); + // define the basic peices of an EFGDL description let definitions = label - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Expected a label to begin a definition."), - ) - }) + .labelled("definition identifier") + .map_with(|l, state| S(l, state.span())) .then_ignore(just(Token::Equals)) .then(transformed_pieces.clone()) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Error creating variable declaration"), - ) - }) - .map_with_span(|(label, geom_p), span| { - S( - Definition { - label, - expr: geom_p, - }, - span, - ) - }) + .map_with(|(label, expr), span| S(Definition { label, expr }, span.span())) .repeated() - .map_with_span(S); + .collect() + .map_with(|defs, span| S(defs, span.span())); let reads = num - .map_err_with_span(|t, span| { - throw(t, Simple::custom(span, "Expected a number to start a read")) - }) - .map_with_span(S) + .labelled("read number") + .map_with(|n, state| S(n, state.span())) .then( transformed_pieces .clone() - .labelled("transformed_pieces_for_reads") .repeated() .at_least(1) - .delimited_by(just(Token::LBrace), just(Token::RBrace)) - .map_err_with_span(|t, span| { - throw(t, missing_delimiter(Token::LBrace, span, Some("reads"))) - }), + .collect() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), ) - .map_with_span(|(n, read), span| { - S( - Read { - index: n, - exprs: read, - }, - span, - ) - }) + .map_with(|(index, exprs), span| S(Read { index, exprs }, span.span())) .repeated() .exactly(2) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Must provide two reads - only found one"), - ) - }) - .collect::>(); + .collect::>() + .map_with(|v, span| S(v, span.span())); let transform_read = num - .map_with_span(S) + .labelled("read number") + .map_with(|n, state| S(n, state.span())) .then( transformed_pieces - .clone() .repeated() .at_least(1) - .delimited_by(just(Token::LBrace), just(Token::RBrace)) - .map_err_with_span(|t, span| { - throw( - t, - missing_delimiter(Token::LBrace, span, Some("transformation")), - ) - }), + .collect() + .delimited_by(just(Token::LBrace), just(Token::RBrace)), ) - .map_with_span(|(n, read), span| { - S( - Read { - index: n, - exprs: read, - }, - span, - ) - }); + .map_with(|(index, exprs), state| S(Read { index, exprs }, state.span())); - let transformation = choice(( - end().map(|()| None), + let transformations = choice(( + end().map(|_| None), just(Token::TransformTo) - .then(transform_read.repeated().at_least(1).at_most(2).then(end())) - .map_with_span(|(_, (val, _)), span| Some(S(val, span))), + .then( + transform_read + .repeated() + .at_least(1) + .at_most(2) + .collect::>() + .then(end()), + ) + .map_with(|(_, (val, _)), state| Some(S(val, state.span()))), )); - definitions - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Error while parsing EFGDL specification."), - ) - }) - .then(reads.map_with_span(S)) - .then(transformation) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Error while parsing EFGDL specification."), - ) - }) - .map(|((definitions, reads), transforms)| Description { - definitions, - reads, - transforms, - }) - .map_err_with_span(|t, span| { - throw( - t, - Simple::custom(span, "Error while parsing EFGDL specification."), - ) - }) + Box::new( + definitions + .then(reads) + .then(transformations) + .map(|((defs, reads), transforms)| Description { + definitions: defs, + reads, + transforms, + }), + ) } diff --git a/src/processors/mod.rs b/src/processors/mod.rs index 1b5ef7f..89b3644 100644 --- a/src/processors/mod.rs +++ b/src/processors/mod.rs @@ -150,7 +150,7 @@ pub fn map(this_label: &str, patterns: Patterns, match_type: MatchType, graph: & Expr::from(expr::attr(format!("{this_label}.{SUB}"))), )); graph.add(SelectOp::new( - Expr::from(expr::attr(&format!("{this_label}.{MAPPED}"))), + Expr::from(expr::attr(format!("{this_label}.{MAPPED}"))), mapping_graph, )); } diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 0000000..b5614dd --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1 @@ +pub mod utils; diff --git a/tests/common/utils.rs b/tests/common/utils.rs new file mode 100644 index 0000000..7185b32 --- /dev/null +++ b/tests/common/utils.rs @@ -0,0 +1,37 @@ +use chumsky::{error::Rich, input::Input, Parser}; +use seqproc::{ + lexer::{self, Token}, + parser::{parser, Description}, +}; + +pub struct ParsedInput<'a> { + pub parse_res: Option, + pub lex_errs: Vec>, + pub parse_errs: Vec>, +} + +pub fn result_with_errs<'a>(input: &'a str) -> ParsedInput<'a> { + // lex input + let (lex_res, lex_errs) = lexer::lexer().parse(&input).into_output_errors(); + let tokens = lex_res.unwrap(); + + let tokens = tokens + .into_iter() + .map(|(tok, span)| chumsky::span::Spanned { inner: tok, span }) + .collect::>(); + let input = tokens[..].split_spanned((0..input.len()).into()); + + // parse token + let (parse_res, parse_errs) = parser().parse(input).into_output_errors(); + + let parse_errs = parse_errs + .into_iter() + .map(|r| Rich::custom(*r.span(), r.reason())) + .collect::>(); + + ParsedInput { + parse_res, + lex_errs, + parse_errs, + } +} diff --git a/tests/compile_tests.rs b/tests/compile_tests.rs index 60697e0..3e45686 100644 --- a/tests/compile_tests.rs +++ b/tests/compile_tests.rs @@ -1,27 +1,25 @@ +#[macro_use] +mod common; + use std::collections::HashMap; -use chumsky::{prelude::*, Stream}; use seqproc::{ compile::{compile, definitions::compile_definitions, reads::compile_reads, utils::Error}, execute::compile_geom, - lexer::lexer, - parser::parser, }; +use crate::common::utils::{result_with_errs, ParsedInput}; + #[test] fn no_err() -> Result<(), Error> { let src = "1{remove(hamming(f[CAG], 1))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile_reads(res.reads, HashMap::new())?; @@ -32,16 +30,12 @@ fn no_err() -> Result<(), Error> { fn fail_norm() { let src = "1{norm(r:)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -52,16 +46,12 @@ fn fail_norm() { fn pass_composition() { let src = "1{trunc_to(rev(r:), 1)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -72,16 +62,12 @@ fn pass_composition() { fn fail_remove() { let src = "1{rev(remove(r:))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -92,16 +78,13 @@ fn fail_remove() { fn discard_as_void() { let src = "1{rev(x[10])}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); + println!("{:?} {:?}", lex_errs, parse_errs); + let res = parse_res.unwrap(); let res = compile_reads(res.reads, HashMap::new()); @@ -115,16 +98,12 @@ brc = b[10] brc1 = b[1-4] 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions)?; @@ -140,17 +119,12 @@ brc = b[10] brc = b[1-4] 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions); assert!(def_map.is_err()); @@ -158,19 +132,15 @@ brc = b[1-4] #[test] fn label_replacement() { - let src = "test = r: + let src = "test = r: 1{pad_to(, 5, A)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -181,19 +151,15 @@ fn label_replacement() { #[test] fn no_variable() { - let src = "testing = r: + let src = "testing = r: 1{pad(, 5, A)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -206,16 +172,12 @@ fn no_variable() { fn expr_unwrap() -> Result<(), Error> { let src = "1{pad(norm(b[9-10]), 1, A)remove(f[CAGAGC])u[8]remove(b[10])}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -228,16 +190,12 @@ fn fail_reuse_label() { brc = b[10] 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions).unwrap(); @@ -253,16 +211,12 @@ brc = b[10] brc1 = pad(, 1, A) 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let def_map = compile_definitions(res.definitions); @@ -276,16 +230,12 @@ brc = b[10] umi = pad(u[10], 1, A) 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -299,17 +249,12 @@ brc = b[10] umi = pad(u[10], 1, A) 1{}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); assert!(res.is_err()); @@ -321,16 +266,12 @@ fn fail_label_composition() { brc = remove(trunc(b[10], 3)) 1{pad(, 1, A)}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); @@ -341,17 +282,12 @@ brc = remove(trunc(b[10], 3)) fn valid_geom() -> Result<(), Error> { let src = "1{b[9-11]remove(f[CAGAGC])u[8]b[10]}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; Ok(()) @@ -361,16 +297,12 @@ fn valid_geom() -> Result<(), Error> { fn invalid_geom_one() { let src = "1{b[9-11]f[CAGAGC]r:u[8]b[10]}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); @@ -381,17 +313,12 @@ fn invalid_geom_one() { fn invalid_geom_two() { let src = "1{f[GAG]b[10-11]b[10]}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); let res = compile(res); assert!(res.is_err()); @@ -406,16 +333,12 @@ test = r: 1{pad(, 1, A)f[CAGAGC]f[CAGA]}2{r:} -> 1{remove()remove()} "; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -430,16 +353,12 @@ umi = norm(u[9-11]) 1{pad(, 1, A)f[CAGAGC]f[CAGA]}2{r:} -> 1{remove()remove(pad(, 1, A))} "; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; @@ -450,17 +369,12 @@ umi = norm(u[9-11]) fn compile_map_arguments() -> Result<(), Error> { let src = "1{map(b[10-11], \"file\", norm(self))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; Ok(()) @@ -469,20 +383,15 @@ fn compile_map_arguments() -> Result<(), Error> { #[test] fn compile_map_arguments_with_label() -> Result<(), Error> { let src = " -brc = b[10-11] +brc = b[10-11] 1{map(, \"file\", norm(self))}2{r:}"; - let (res, _) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let res = parser() - .parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - .0 - .unwrap(); - + let ParsedInput { + parse_res, + lex_errs: _, + parse_errs: _, + } = result_with_errs(src); + let res = parse_res.unwrap(); compile(res)?; Ok(()) diff --git a/tests/lexer_tests.rs b/tests/lexer_tests.rs index c46c7c1..0aa6080 100644 --- a/tests/lexer_tests.rs +++ b/tests/lexer_tests.rs @@ -5,16 +5,23 @@ use seqproc::lexer::{lexer, Token}; fn nucs() { let src = "GCA"; - let expected_res = vec![(Token::G, 0..1), (Token::C, 1..2), (Token::A, 2..3)]; + let expected_res = vec![ + (Token::G, SimpleSpan::from(0..1)), + (Token::C, SimpleSpan::from(1..2)), + (Token::A, SimpleSpan::from(2..3)), + ]; - assert_eq!(expected_res, lexer().parse(src).unwrap()); + assert_eq!(expected_res, lexer().parse(src).into_output().unwrap()); } #[test] fn token() { let src = "1"; - assert_eq!(vec![(Token::Num(1), 0..1)], lexer().parse(src).unwrap()); + assert_eq!( + vec![(Token::Num(1), SimpleSpan::from(0..1))], + lexer().parse(src).unwrap() + ); } #[test] @@ -23,12 +30,12 @@ fn tokens() { assert_eq!( vec![ - (Token::Label("bc1".to_string()), 0..3), - (Token::Equals, 4..5), - (Token::Barcode, 6..7), - (Token::LBracket, 7..8), - (Token::Num(10), 8..10), - (Token::RBracket, 10..11), + (Token::Label("bc1".to_string()), SimpleSpan::from(0..3)), + (Token::Equals, SimpleSpan::from(4..5)), + (Token::Barcode, SimpleSpan::from(6..7)), + (Token::LBracket, SimpleSpan::from(7..8)), + (Token::Num(10), SimpleSpan::from(8..10)), + (Token::RBracket, SimpleSpan::from(10..11)), ], lexer().parse(src).unwrap() ); @@ -38,7 +45,7 @@ fn tokens() { fn fail() { let src = "1 ? 2"; - let (_, err) = lexer().parse_recovery(src); + let (_, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 1); } @@ -47,11 +54,11 @@ fn fail() { fn label() { let src = "barcode"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 0); assert_eq!( - vec![(Token::Label("barcode".to_string()), 0..7)], + vec![(Token::Label("barcode".to_string()), SimpleSpan::from(0..7))], res.unwrap() ); } @@ -60,7 +67,7 @@ fn label() { fn precidence() { let src = "b[1-2] -> 1{}"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 0); @@ -87,7 +94,7 @@ fn precidence() { fn map_vs_with_mismatch() { let src = "map()map_with_mismatch()"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); assert_eq!(err.len(), 0); @@ -110,7 +117,7 @@ fn map_vs_with_mismatch() { fn arguments() { let src = "map(f[ATG], $0, self)"; - let (res, err) = lexer().parse_recovery(src); + let (res, err) = lexer().parse(src).into_output_errors(); dbg!(&err); assert_eq!(err.len(), 0); diff --git a/tests/parser_tests.rs b/tests/parser_tests.rs index 60e81b4..21ca77f 100644 --- a/tests/parser_tests.rs +++ b/tests/parser_tests.rs @@ -1,32 +1,36 @@ -use chumsky::{prelude::*, Stream}; +mod common; + use seqproc::{ - lexer::lexer, - parser::{parser, Definition, Expr, Function, IntervalKind, IntervalShape, Read}, + parser::{Definition, Expr, Function, IntervalKind, IntervalShape, Read}, Nucleotide, S, }; +use crate::common::utils::{result_with_errs, ParsedInput}; + #[test] fn definition() { let src = "brc = b[10] 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; - let expected_res = S( - vec![S( + let expected_res = S::new( + vec![S::new( Definition { - label: S("brc".to_string(), 0..3), - expr: S( - Expr::GeomPiece(IntervalKind::Barcode, IntervalShape::FixedLen(S(10, 8..10))), + label: S::new("brc".to_string(), 0..3), + expr: S::new( + Expr::GeomPiece( + IntervalKind::Barcode, + IntervalShape::FixedLen(S::new(10, 8..10)), + ), 6..11, ), }, @@ -35,8 +39,8 @@ fn definition() { 0..11, ); - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.definitions, expected_res); } @@ -44,30 +48,29 @@ fn definition() { fn transformation() { let src = "1{b[1]}2{r:} -> 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = vec![ - S( + S::new( Read { - index: S(1, 16..17), - exprs: vec![S(Expr::Label(S("t".to_string(), 18..21)), 18..21)], + index: S::new(1, 16..17), + exprs: vec![S::new(Expr::Label(S::new("t".to_string(), 18..21)), 18..21)], }, 16..22, ), - S( + S::new( Read { - index: S(2, 22..23), - exprs: vec![S( + index: S::new(2, 22..23), + exprs: vec![S::new( Expr::GeomPiece(IntervalKind::ReadSeq, IntervalShape::UnboundedLen), 24..26, )], @@ -76,8 +79,8 @@ fn transformation() { ), ]; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.transforms.unwrap().0, expected_res); } @@ -90,47 +93,47 @@ another = remove(u[9-11]) -> 1{} "; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn hamming() { let src = "1{hamming(, 1)}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::Function( - S(Function::Hamming(1), 2..9), - S(Box::new(Expr::Label(S("brc".to_string(), 10..15))), 10..18), + S::new(Function::Hamming(1), 2..9), + S::new( + Box::new(Expr::Label(S::new("brc".to_string(), 10..15))), + 10..18, + ), ), 2..19, )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -138,75 +141,77 @@ fn hamming() { fn remove() { let src = "1{remove()}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::Function( - S(Function::Remove, 2..8), - S(Box::new(Expr::Label(S("brc".to_string(), 9..14))), 9..14), + S::new(Function::Remove, 2..8), + S::new( + Box::new(Expr::Label(S::new("brc".to_string(), 9..14))), + 9..14, + ), ), 2..15, )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } #[test] fn illegal_nest() { - let src = "1{hamming(pad(, 1), 1)}"; + let src = "1{hamming(pad(>, 1, A), 1)}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn nested() { let src = "1{rev(norm())}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::Function( - S(Function::Reverse, 2..5), - S( + S::new(Function::Reverse, 2..5), + S::new( Box::new(Expr::Function( - S(Function::Normalize, 6..10), - S(Box::new(Expr::Label(S("brc".to_string(), 11..16))), 11..16), + S::new(Function::Normalize, 6..10), + S::new( + Box::new(Expr::Label(S::new("brc".to_string(), 11..16))), + 11..16, + ), )), 6..17, ), @@ -215,8 +220,8 @@ fn nested() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -224,24 +229,23 @@ fn nested() { fn labeled_unbounded() { let src = "1{b:}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::LabeledGeomPiece( - S("barcode".to_string(), 3..12), - S( + S::new("barcode".to_string(), 3..12), + S::new( Box::new(Expr::GeomPiece( IntervalKind::Barcode, IntervalShape::UnboundedLen, @@ -253,8 +257,8 @@ fn labeled_unbounded() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -262,31 +266,30 @@ fn labeled_unbounded() { fn ranged() { let src = "1{b[10-11]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::GeomPiece( IntervalKind::Barcode, - IntervalShape::RangedLen(S((10, 11), 4..9)), + IntervalShape::RangedLen(S::new((10, 11), 4..9)), ), 2..10, )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -294,28 +297,30 @@ fn ranged() { fn fixed() { let src = "1{r[10]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( - Expr::GeomPiece(IntervalKind::ReadSeq, IntervalShape::FixedLen(S(10, 4..6))), + index: S::new(1, 0..1), + exprs: vec![S::new( + Expr::GeomPiece( + IntervalKind::ReadSeq, + IntervalShape::FixedLen(S::new(10, 4..6)), + ), 2..7, )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -323,24 +328,23 @@ fn fixed() { fn fixed_seq() { let src = "1{f[GACTU]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (Some(res), parser_err) = - parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())) - else { - panic!() + let res = match parse_res { + Some(res) => res, + None => panic!(), }; let expected_res = Read { - index: S(1, 0..1), - exprs: vec![S( + index: S::new(1, 0..1), + exprs: vec![S::new( Expr::GeomPiece( IntervalKind::FixedSeq, - IntervalShape::FixedSeq(S( + IntervalShape::FixedSeq(S::new( vec![ Nucleotide::G, Nucleotide::A, @@ -355,8 +359,8 @@ fn fixed_seq() { )], }; - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); assert_eq!(res.reads.0[0].0, expected_res); } @@ -364,142 +368,110 @@ fn fixed_seq() { fn fail_ranged_seq() { let src = "1{f[1-2]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn allow_expr_arg() { let src = "1{map(b[9-10], \"filepath\", norm(self))}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); -} - -#[test] -fn fail_map() { - let src = "1{map(pad(b[9-10], 3), \"filepath\", norm(self))}2{r:}"; - - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn fail_prefix_label_underscore() { let src = "_brc = b[10] 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn fail_prefix_inlinelabel_underscore() { let src = "1{b<_brc>[10]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); } #[test] fn ok_mid_inlinelabel_underscore() { let src = "1{b[10]}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn ok_mid_label_underscore() { let src = "b_rc = b[10] 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); - - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn filter_test() { let src = "b_rc = filter(b[10], $0) 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert!(parser_err.is_empty()); + assert!(lex_errs.is_empty()); + assert!(parse_errs.is_empty()); } #[test] fn filter_test_too_many_args() { let src = "b_rc = filter(b[10], $0, 1) 1{}2{r:}"; - let (res, lex_err) = lexer().parse_recovery(src); - - let res = res.unwrap(); - - let len = res.len(); - - let (_, parser_err) = parser().parse_recovery(Stream::from_iter(len..len + 1, res.into_iter())); + let ParsedInput { + parse_res: _, + lex_errs, + parse_errs, + } = result_with_errs(src); - assert!(lex_err.is_empty()); - assert_eq!(1, parser_err.len()); + assert!(lex_errs.is_empty()); + assert_eq!(1, parse_errs.len()); }