Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "noa-parser"
version = "0.5.0"
version = "0.6.0"
edition = "2024"
homepage = "https://github.com/Akanoa/noa-parser"
repository = "https://github.com/Akanoa/noa-parser"
Expand Down
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

**0.6.0**

- Add Recognizer

**0.5.0**

- Add support to separated list
Expand Down
5 changes: 3 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ use noa_parser::matcher::{Match, MatchSize};
use noa_parser::recognizer::Recognizer;
use noa_parser::scanner::Scanner;

#[derive(Debug)]
enum OperatorTokens {
/// The `==` operator.
Equal,
Expand Down Expand Up @@ -365,7 +366,7 @@ fn main() -> ParseResult<()> {
.finish()
.ok_or(ParseError::UnexpectedToken)?;

println!("{}", String::from_utf8_lossy(recognized)); // ==
println!("{:?}", recognized); // ==

let data = b"!= 2";
let mut scanner = Scanner::new(data);
Expand All @@ -375,7 +376,7 @@ fn main() -> ParseResult<()> {
.finish()
.ok_or(ParseError::UnexpectedToken)?;

println!("{}", String::from_utf8_lossy(recognized)); // !=
println!("{:?}", recognized); // !=

let data = b"> 2";
let mut scanner = Scanner::new(data);
Expand Down
5 changes: 3 additions & 2 deletions examples/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use noa_parser::matcher::{Match, MatchSize};
use noa_parser::recognizer::Recognizer;
use noa_parser::scanner::Scanner;

#[derive(Debug)]
enum OperatorTokens {
/// The `==` operator.
Equal,
Expand Down Expand Up @@ -38,7 +39,7 @@ fn main() -> ParseResult<()> {
.finish()
.ok_or(ParseError::UnexpectedToken)?;

println!("{}", String::from_utf8_lossy(recognized)); // ==
println!("{:?}", recognized); // ==

let data = b"!= 2";
let mut scanner = Scanner::new(data);
Expand All @@ -48,7 +49,7 @@ fn main() -> ParseResult<()> {
.finish()
.ok_or(ParseError::UnexpectedToken)?;

println!("{}", String::from_utf8_lossy(recognized)); // !=
println!("{:?}", recognized); // !=

let data = b"> 2";
let mut scanner = Scanner::new(data);
Expand Down
1 change: 1 addition & 0 deletions src/bytes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
pub mod components;
pub mod matchers;
pub mod primitives;
mod recognizer;
pub mod token;
1 change: 1 addition & 0 deletions src/bytes/primitives/whitespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub struct OptionalWhitespaces;
impl<'a> Visitor<'a, u8> for Whitespaces {
fn accept(scanner: &mut Scanner<'a, u8>) -> ParseResult<Self> {
let mut found = false;

while Token::Whitespace.recognize(scanner)?.is_some() {
found = true;
}
Expand Down
28 changes: 28 additions & 0 deletions src/bytes/recognizer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use crate::errors::{ParseError, ParseResult};
use crate::matcher::{Match, MatchSize};
use crate::recognizer::Recognizable;
use crate::scanner::Scanner;

/// Recognize an object for the given scanner.
/// Return a slice of the recognized object.
impl<'a, M: Match<u8> + MatchSize> Recognizable<'a, u8, &'a [u8]> for M {
fn recognize(self, scanner: &mut Scanner<'a, u8>) -> ParseResult<Option<&'a [u8]>> {
if scanner.is_empty() {
return Err(ParseError::UnexpectedEndOfInput);
}

let data = scanner.remaining();

let (result, size) = self.matcher(data);
if !result {
return Ok(None);
}
let curent_position = scanner.current_position();
if !scanner.is_empty() {
scanner.bump_by(size);
}
Ok(Some(
&scanner.data()[curent_position..curent_position + size],
))
}
}
67 changes: 52 additions & 15 deletions src/recognizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ pub trait Recognizable<'a, T, V>: MatchSize {
fn recognize(self, scanner: &mut Scanner<'a, T>) -> ParseResult<Option<V>>;
}

pub trait RecognizeSelf<'a, T, V>: MatchSize {
/// Try to recognize the object for the given scanner.
///
/// # Arguments
/// * `scanner` - The scanner to recognize the object for.
///
/// # Returns
/// * `Ok(Some(V))` if the object was recognized,
/// * `Ok(None)` if the object was not recognized,
/// * `Err(ParseError)` if an error occurred
fn recognize_self(self, scanner: &mut Scanner<'a, T>) -> ParseResult<Option<V>>;
}

/// Recognize an object for the given scanner.
///
/// # Type Parameters
Expand Down Expand Up @@ -60,8 +73,8 @@ pub fn recognize<'a, T, V, R: Recognizable<'a, T, V>>(

/// Recognize an object for the given scanner.
/// Return a slice of the recognized object.
impl<'a, T, M: Match<T> + MatchSize> Recognizable<'a, T, &'a [T]> for M {
fn recognize(self, scanner: &mut Scanner<'a, T>) -> ParseResult<Option<&'a [T]>> {
impl<'a, T, M: Match<T> + MatchSize> RecognizeSelf<'a, T, M> for M {
fn recognize_self(self, scanner: &mut Scanner<'a, T>) -> ParseResult<Option<M>> {
if scanner.is_empty() {
return Err(ParseError::UnexpectedEndOfInput);
}
Expand All @@ -72,13 +85,10 @@ impl<'a, T, M: Match<T> + MatchSize> Recognizable<'a, T, &'a [T]> for M {
if !result {
return Ok(None);
}
let curent_position = scanner.current_position();
if !scanner.is_empty() {
scanner.bump_by(size);
}
Ok(Some(
&scanner.data()[curent_position..curent_position + size],
))
Ok(Some(self))
}
}

Expand All @@ -100,7 +110,7 @@ pub struct Recognizer<'a, 'container, T, U> {
scanner: &'container mut Scanner<'a, T>,
}

impl<'a, 'b, T, U> Recognizer<'a, 'b, T, U> {
impl<'a, 'b, T, R: RecognizeSelf<'a, T, R>> Recognizer<'a, 'b, T, R> {
/// Create a new `Recognizer` with the given scanner.
///
/// # Arguments
Expand Down Expand Up @@ -131,16 +141,13 @@ impl<'a, 'b, T, U> Recognizer<'a, 'b, T, U> {
/// returns the current recognizer with the current position of the scanner
/// rewound to the position at which the `U` was attempted, and `data` is left
/// `None`.
pub fn try_or<R: Recognizable<'a, T, U>>(
mut self,
element: R,
) -> ParseResult<Recognizer<'a, 'b, T, U>> {
pub fn try_or(mut self, element: R) -> ParseResult<Recognizer<'a, 'b, T, R>> {
// Propagate result
if self.data.is_some() {
return Ok(self);
}
// Or apply current recognizer
if let Some(found) = element.recognize(self.scanner)? {
if let Some(found) = element.recognize_self(self.scanner)? {
self.data = Some(found);
}
Ok(self)
Expand All @@ -153,7 +160,7 @@ impl<'a, 'b, T, U> Recognizer<'a, 'b, T, U> {
///
/// If the recognizer was successful (i.e., `data` is `Some`), returns the
/// `U` that was recognized. Otherwise, returns `None`.
pub fn finish(self) -> Option<U> {
pub fn finish(self) -> Option<R> {
self.data
}

Expand All @@ -172,13 +179,43 @@ impl<'a, 'b, T, U> Recognizer<'a, 'b, T, U> {
/// `U` that was recognized. If the recognizer was not successful, the
/// `closure` is called with the `Scanner` and the result of the closure is
/// returned.
pub fn finish_with<F>(self, closure: F) -> ParseResult<U>
pub fn finish_with<F>(self, closure: F) -> ParseResult<R>
where
F: FnOnce(&mut Scanner<'a, T>) -> ParseResult<U>,
F: FnOnce(&mut Scanner<'a, T>) -> ParseResult<R>,
{
match self.data {
None => closure(self.scanner),
Some(token) => Ok(token),
}
}
}

#[cfg(test)]
mod tests {
use crate::bytes::token::Token;
use crate::errors::ParseResult;
use crate::recognizer::{RecognizeSelf, Recognizer};

#[test]
fn test_recognizer() {
let data = b">";
let mut scanner = crate::scanner::Scanner::new(data);
let result = Token::GreaterThan
.recognize_self(&mut scanner)
.expect("failed to parse");
assert_eq!(result, Some(Token::GreaterThan));
}

#[test]
fn test_recognizer_multiple() -> ParseResult<()> {
let data = b">>";
let mut scanner = crate::scanner::Scanner::new(data);
let result = Recognizer::new(&mut scanner)
.try_or(Token::LessThan)?
.try_or(Token::GreaterThan)?
.finish()
.expect("failed to parse");
assert_eq!(result, Token::GreaterThan);
Ok(())
}
}