From 03c3970653e36c7d370d11b182125f4b4533faf0 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sun, 2 Feb 2025 14:05:45 +0100 Subject: [PATCH 01/25] chore: gitignore .vscode/ --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index efe3eb1..82587a5 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ Cargo.lock # Added by cargo /target + +.vscode/ \ No newline at end of file From 79693c0181aa76da375f72045eea1e5073563292 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sun, 2 Feb 2025 14:17:11 +0100 Subject: [PATCH 02/25] wip: token implementation --- Cargo.toml | 2 +- src/lexer.rs | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 18 ++++++----- src/tokenizer.rs | 58 ---------------------------------- tests/test.asm | 40 +++++++++++++++++++++++ 5 files changed, 134 insertions(+), 66 deletions(-) create mode 100644 src/lexer.rs delete mode 100644 src/tokenizer.rs create mode 100644 tests/test.asm diff --git a/Cargo.toml b/Cargo.toml index 49d0dc8..b7fae66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] -logos = "0.15.0" +logos = { version = "0.15.0", features = ["debug"] } [dev-dependencies] rusty-hook = "^0.11.2" diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..edc5ef9 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,82 @@ +use logos::{Lexer, Logos}; + +#[derive(Logos, Debug, PartialEq)] +#[logos(skip r"\s+")] +pub enum Token { + #[regex(r" [+-~&|^=] ", Op::new, priority = 8)] + Operation(Op), + + #[regex(r" [(==)<>(>=)(<=)(!=)] ", |_| Cond::Eq, priority = 7)] + Condition(Cond), + + #[regex(r"[0-9]+", |_| 3, priority = 6)] + #[regex("(0x|0X){1}[a-fA-F0-9]+", |_| 3, priority = 6)] + #[regex("(0b|0B){1}(0|1)+", |_| 3, priority = 6)] + Value(u16), + + #[token("JMP", |_| Inst::Jump, priority = 5)] + Instruction(Inst), + + #[token(":", |_| Dir::Label, priority = 4)] + #[token("DEFINE", |_| Dir::Define, priority = 4)] + Directive(Dir), + + #[token("A", |_| Reg::A, priority = 3)] + #[token("*A", |_| Reg::AStar, priority = 3)] + #[token("V", |_| Reg::V, priority = 3)] + #[token("*V", |_| Reg::VStar, priority = 3)] + #[token("D", |_| Reg::D, priority = 3)] + Register(Reg), + + #[regex(r"[a-zA-Z_]+", |lex| lex.slice().to_string(), priority = 1)] + Identifier(String), + + #[regex(r"\s?;.*")] + Comment, +} + +#[derive(Debug, PartialEq)] +pub enum Op { + Add, + Sub, + And, + Assignement, +} + +impl Op { + fn new(lex: &mut Lexer) -> Option { + match lex.slice().trim() { + "+" => Some(Op::Add), + "-" => Some(Op::Sub), + "&" => Some(Op::And), + "=" => Some(Op::Assignement), + _ => None + } + } +} + +#[derive(Debug, PartialEq)] +pub enum Reg { + A, + V, + AStar, + VStar, + D, +} + +#[derive(Debug, PartialEq)] +pub enum Cond { + Eq, + Neq, +} + +#[derive(Debug, PartialEq)] +pub enum Inst { + Jump, +} + +#[derive(Debug, PartialEq)] +pub enum Dir { + Define, + Label, +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 02c85a5..4ba646a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,20 @@ mod constants; -mod tokenizer; +mod lexer; -use tokenizer::Token; +use lexer::Token; use logos::Logos; fn main() { - let lex = Token::lexer("A <- 256"); + use std::io::Read; - for result in lex { - match result { - Ok(token) => println!("{:#?}", token), - Err(_) => panic!("Err occured"), + if let Ok(mut file) = std::fs::File::open("tests/test.asm") { + let mut content = String::new(); + let _ = file.read_to_string(&mut content); + + let mut lex = Token::lexer(content.as_str()); + + while let Some(result) = lex.next() { + println!("{:?}", result); } } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs deleted file mode 100644 index 054cd8b..0000000 --- a/src/tokenizer.rs +++ /dev/null @@ -1,58 +0,0 @@ -use logos::{Lexer, Logos}; -use crate::constants::MAX_LOAD_VALUE; - -pub fn parse_int(lex: &mut Lexer) -> Option { - let slice = lex.slice(); - let n: u16 = slice.parse().ok()?; - assert!( - n <= MAX_LOAD_VALUE, - "Can't load data exceeding {} from ram", - MAX_LOAD_VALUE - ); - Some(n) -} - -#[derive(Logos, Debug, PartialEq)] -#[logos(skip r"[ \t\n\f]+")] -pub enum Token { - // Operations - #[token("+")] - #[token("ADD")] - Add, - - #[token("<-")] - Assignment, - - // Registers - #[token("A")] - A, - - #[token("*A")] - StarA, - - #[token("V")] - V, - - #[token("*V")] - StarV, - - #[token("C")] - C, - - // Values - #[regex("[0-9]+", parse_int)] - Number(u16), -} - - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_add_token() { - let mut lex = Token::lexer("+ ADD"); - assert_eq!(lex.next(), Some(Ok(Token::Add))); - assert_eq!(lex.next(), Some(Ok(Token::Add))); - } -} \ No newline at end of file diff --git a/tests/test.asm b/tests/test.asm new file mode 100644 index 0000000..ba9a5d1 --- /dev/null +++ b/tests/test.asm @@ -0,0 +1,40 @@ +DEFINE io_adr 0x7fff +DEFINE ob_detection_mask 0x100 +DEFINE movement_mask 0x600 +DEFINE move_mask 0x4 +DEFINE left 0x8 + + +wait: +A = movement_mask +D = A +A = io_adr +D = D & *A +A = wait +D ; JNE + +check: +A = ob_detection_mask +D = A +A = io_adr +D = D & *A +A = move +D ; JEQ + +turn: +A = left +D = A + +send: +A = io_adr +*A = D +A = wait +JMP + +move: +A = move_mask +D = A +A = send +JMP +D ; this is a comment +D \ No newline at end of file From b433187a48e33dba09e6df9974ff9fe653006ced Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sun, 2 Feb 2025 15:04:25 +0100 Subject: [PATCH 03/25] feat: implement new for register - using regex for register to be able to return the closest Reg --- src/lexer.rs | 27 ++++++++---- tests/test.token_stream | 92 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 tests/test.token_stream diff --git a/src/lexer.rs b/src/lexer.rs index edc5ef9..fc2b4d4 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -10,8 +10,8 @@ pub enum Token { Condition(Cond), #[regex(r"[0-9]+", |_| 3, priority = 6)] - #[regex("(0x|0X){1}[a-fA-F0-9]+", |_| 3, priority = 6)] - #[regex("(0b|0B){1}(0|1)+", |_| 3, priority = 6)] + #[regex("(0x|0X){1}[a-fA-F0-9]+", |_| 3, priority = 6)] + #[regex("(0b|0B){1}(0|1)+", |_| 3, priority = 6)] Value(u16), #[token("JMP", |_| Inst::Jump, priority = 5)] @@ -21,11 +21,7 @@ pub enum Token { #[token("DEFINE", |_| Dir::Define, priority = 4)] Directive(Dir), - #[token("A", |_| Reg::A, priority = 3)] - #[token("*A", |_| Reg::AStar, priority = 3)] - #[token("V", |_| Reg::V, priority = 3)] - #[token("*V", |_| Reg::VStar, priority = 3)] - #[token("D", |_| Reg::D, priority = 3)] + #[regex(r"\*?[A-Z]{1}", Reg::new, priority = 3)] Register(Reg), #[regex(r"[a-zA-Z_]+", |lex| lex.slice().to_string(), priority = 1)] @@ -50,7 +46,7 @@ impl Op { "-" => Some(Op::Sub), "&" => Some(Op::And), "=" => Some(Op::Assignement), - _ => None + _ => None, // todo: return a beautiful error } } } @@ -64,6 +60,19 @@ pub enum Reg { D, } +impl Reg { + fn new(lex: &mut Lexer) -> Option { + match lex.slice() { + "A" => Some(Reg::A), + "V" => Some(Reg::V), + "*A" => Some(Reg::AStar), + "*V" => Some(Reg::VStar), + "D" => Some(Reg::D), + _ => None, // todo: return a beautiful error + } + } +} + #[derive(Debug, PartialEq)] pub enum Cond { Eq, @@ -79,4 +88,4 @@ pub enum Inst { pub enum Dir { Define, Label, -} \ No newline at end of file +} diff --git a/tests/test.token_stream b/tests/test.token_stream new file mode 100644 index 0000000..a4bb09f --- /dev/null +++ b/tests/test.token_stream @@ -0,0 +1,92 @@ +Ok(Directive(Define)) +Ok(Identifier("io_adr")) +Ok(Value(3)) +Ok(Directive(Define)) +Ok(Identifier("ob_detection_mask")) +Ok(Value(3)) +Ok(Directive(Define)) +Ok(Identifier("movement_mask")) +Ok(Value(3)) +Ok(Directive(Define)) +Ok(Identifier("move_mask")) +Ok(Value(3)) +Ok(Directive(Define)) +Ok(Identifier("left")) +Ok(Value(3)) +Ok(Identifier("wait")) +Ok(Directive(Label)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("movement_mask")) +Ok(Register(D)) +Ok(Operation(Assignement)) +Ok(Register(A)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("io_adr")) +Ok(Register(D)) +Ok(Operation(Assignement)) +Ok(Register(D)) +Ok(Operation(And)) +Ok(Register(AStar)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("wait")) +Ok(Register(D)) +Ok(Comment) +Ok(Identifier("check")) +Ok(Directive(Label)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("ob_detection_mask")) +Ok(Register(D)) +Ok(Operation(Assignement)) +Ok(Register(A)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("io_adr")) +Ok(Register(D)) +Ok(Operation(Assignement)) +Ok(Register(D)) +Ok(Operation(And)) +Ok(Register(AStar)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("move")) +Ok(Register(D)) +Ok(Comment) +Ok(Identifier("turn")) +Ok(Directive(Label)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("left")) +Ok(Register(D)) +Ok(Operation(Assignement)) +Ok(Register(A)) +Ok(Identifier("send")) +Ok(Directive(Label)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("io_adr")) +Ok(Register(AStar)) +Ok(Operation(Assignement)) +Ok(Register(D)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("wait")) +Ok(Instruction(Jump)) +Ok(Identifier("move")) +Ok(Directive(Label)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("move_mask")) +Ok(Register(D)) +Ok(Operation(Assignement)) +Ok(Register(A)) +Ok(Register(A)) +Ok(Operation(Assignement)) +Ok(Identifier("send")) +Ok(Instruction(Jump)) +Ok(Register(D)) +Ok(Comment) +Ok(Register(D)) From d51b5f4446cd57a171d9b4f69705450ba2a8668a Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sun, 2 Feb 2025 16:30:46 +0100 Subject: [PATCH 04/25] refactor: spec in a separate file from lexer - trait to handle token --- src/constants.rs | 1 - src/lexer.rs | 67 +++++++++------------------------------ src/main.rs | 1 - src/spec.rs | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 55 deletions(-) delete mode 100644 src/constants.rs create mode 100644 src/spec.rs diff --git a/src/constants.rs b/src/constants.rs deleted file mode 100644 index 96fa0e9..0000000 --- a/src/constants.rs +++ /dev/null @@ -1 +0,0 @@ -pub const MAX_LOAD_VALUE: u16 = 2_u16.pow(15); \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs index fc2b4d4..f6e33cb 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,5 +1,19 @@ use logos::{Lexer, Logos}; +/* >> Architecture being used << */ +#[path ="spec.rs"] +mod spec; +use spec::arch_v1::*; + +/// This trait is either used by the lexer to produce Token with the new method +/// or by the parser to generate the bit stream from a Token +pub trait HandleToken { + fn bit_stream(&self) -> String; // get the bit stream from an item (Reg, Op, Inst) + + fn new(lex: &mut Lexer) -> Option + where Self: Sized; // todo: default implementation qui renvoit une erreur en spécifiant le type Self +} + #[derive(Logos, Debug, PartialEq)] #[logos(skip r"\s+")] pub enum Token { @@ -31,59 +45,6 @@ pub enum Token { Comment, } -#[derive(Debug, PartialEq)] -pub enum Op { - Add, - Sub, - And, - Assignement, -} - -impl Op { - fn new(lex: &mut Lexer) -> Option { - match lex.slice().trim() { - "+" => Some(Op::Add), - "-" => Some(Op::Sub), - "&" => Some(Op::And), - "=" => Some(Op::Assignement), - _ => None, // todo: return a beautiful error - } - } -} - -#[derive(Debug, PartialEq)] -pub enum Reg { - A, - V, - AStar, - VStar, - D, -} - -impl Reg { - fn new(lex: &mut Lexer) -> Option { - match lex.slice() { - "A" => Some(Reg::A), - "V" => Some(Reg::V), - "*A" => Some(Reg::AStar), - "*V" => Some(Reg::VStar), - "D" => Some(Reg::D), - _ => None, // todo: return a beautiful error - } - } -} - -#[derive(Debug, PartialEq)] -pub enum Cond { - Eq, - Neq, -} - -#[derive(Debug, PartialEq)] -pub enum Inst { - Jump, -} - #[derive(Debug, PartialEq)] pub enum Dir { Define, diff --git a/src/main.rs b/src/main.rs index 4ba646a..b049273 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,3 @@ -mod constants; mod lexer; use lexer::Token; diff --git a/src/spec.rs b/src/spec.rs new file mode 100644 index 0000000..4031c5a --- /dev/null +++ b/src/spec.rs @@ -0,0 +1,82 @@ +use logos::Lexer; + + +pub mod arch_v1 { + use logos::Lexer; + use crate::lexer::HandleToken; + + #[allow(dead_code)] + + pub const MAX_LOAD_VALUE: u16 = 2_u16.pow(15) - 1; + + #[derive(Debug, PartialEq)] + pub enum Op { + Add, + Sub, + And, + Assignement, + } + + impl HandleToken for Op { + fn new(lex: &mut Lexer) -> Option { + match lex.slice().trim() { + "+" => Some(Op::Add), + "-" => Some(Op::Sub), + "&" => Some(Op::And), + "=" => Some(Op::Assignement), + _ => None, // todo: return a beautiful error + } + } + + fn bit_stream(&self) -> String { + match self { + _ => "00011" + } + .to_string() + } + } + + #[derive(Debug, PartialEq)] + pub enum Reg { + A, + V, + AStar, + VStar, + D, + } + + impl HandleToken for Reg { + fn new(lex: &mut Lexer) -> Option { + match lex.slice() { + "A" => Some(Reg::A), + "V" => Some(Reg::V), + "*A" => Some(Reg::AStar), + "*V" => Some(Reg::VStar), + "D" => Some(Reg::D), + _ => None, // todo: return a beautiful error + } + } + + fn bit_stream(&self) -> String { + match self { + Reg::A => "000", + Reg::AStar => "001", + Reg::V => "010", + Reg::VStar => "011", + Reg::D => "100", + } + .to_string() + } // todo: colorize the string depending on the register + } + + #[derive(Debug, PartialEq)] + pub enum Cond { + Eq, + Neq, + } + + #[derive(Debug, PartialEq)] + pub enum Inst { + Jump, + } +} \ No newline at end of file From e05e0108f03a60242040cd66fc6381e315e15ba2 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sun, 2 Feb 2025 17:35:50 +0100 Subject: [PATCH 05/25] feat!: changed regex and priority - implemented all operations --- src/lexer.rs | 67 +++++++++++++++---- src/main.rs | 2 +- src/spec.rs | 9 ++- tests/realistic_test.asm | 40 +++++++++++ ...ken_stream => realistic_test.token_stream} | 0 tests/test.asm | 12 +++- 6 files changed, 111 insertions(+), 19 deletions(-) create mode 100644 tests/realistic_test.asm rename tests/{test.token_stream => realistic_test.token_stream} (100%) diff --git a/src/lexer.rs b/src/lexer.rs index f6e33cb..0615a91 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -15,33 +15,41 @@ pub trait HandleToken { } #[derive(Logos, Debug, PartialEq)] -#[logos(skip r"\s+")] +#[logos(skip r"[ \t\r\n\f]+")] pub enum Token { - #[regex(r" [+-~&|^=] ", Op::new, priority = 8)] + + // watchout you need to escape the good char + #[regex(r"[\+&=\-|^~]", Op::new)] Operation(Op), - #[regex(r" [(==)<>(>=)(<=)(!=)] ", |_| Cond::Eq, priority = 7)] + // WIP + #[regex(r"==", |_| Cond::Eq)] Condition(Cond), - #[regex(r"[0-9]+", |_| 3, priority = 6)] - #[regex("(0x|0X){1}[a-fA-F0-9]+", |_| 3, priority = 6)] - #[regex("(0b|0B){1}(0|1)+", |_| 3, priority = 6)] + // WIP + #[regex(r"[0-9]+", |_| 3)] + #[regex("(0x|0X){1}[a-fA-F0-9]+", |_| 3)] + #[regex("(0b|0B){1}(0|1)+", |_| 3)] Value(u16), - #[token("JMP", |_| Inst::Jump, priority = 5)] + // WIP + #[token("JMP", |_| Inst::Jump)] Instruction(Inst), - #[token(":", |_| Dir::Label, priority = 4)] - #[token("DEFINE", |_| Dir::Define, priority = 4)] + // No test + #[token(":", |_| Dir::Label)] + #[token("DEFINE", |_| Dir::Define)] Directive(Dir), - #[regex(r"\*?[A-Z]{1}", Reg::new, priority = 3)] + // Register has a higher priority than Identifier + #[regex(r"\*?[A-Z]", Reg::new, priority = 2)] Register(Reg), - - #[regex(r"[a-zA-Z_]+", |lex| lex.slice().to_string(), priority = 1)] + // No test + #[regex(r"[a-z_A-Z]+", |lex| lex.slice().to_string(), priority = 1)] Identifier(String), - #[regex(r"\s?;.*")] + // No test + #[regex(r";[^\n]*")] Comment, } @@ -50,3 +58,36 @@ pub enum Dir { Define, Label, } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_operation() { + let mut lex = Token::lexer("+~-&|^="); + + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Not)))); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Sub)))); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::And)))); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Or)))); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Xor)))); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Assignement)))); + + let mut lex = Token::lexer(" +"); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); + + let mut lex = Token::lexer("+ \n"); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); + + let mut lex = Token::lexer("A+A\n"); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); + + let mut lex = Token::lexer("A +A\n"); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); + } + +} diff --git a/src/main.rs b/src/main.rs index b049273..1a25b8f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,7 +6,7 @@ use logos::Logos; fn main() { use std::io::Read; - if let Ok(mut file) = std::fs::File::open("tests/test.asm") { + if let Ok(mut file) = std::fs::File::open("tests/realistic_test.asm") { let mut content = String::new(); let _ = file.read_to_string(&mut content); diff --git a/src/spec.rs b/src/spec.rs index 4031c5a..12d1929 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -1,6 +1,3 @@ -use logos::Lexer; - - pub mod arch_v1 { use logos::Lexer; use crate::lexer::HandleToken; @@ -14,6 +11,9 @@ pub mod arch_v1 { Add, Sub, And, + Not, + Or, + Xor, Assignement, } @@ -23,6 +23,9 @@ pub mod arch_v1 { "+" => Some(Op::Add), "-" => Some(Op::Sub), "&" => Some(Op::And), + "~" => Some(Op::Not), + "|" => Some(Op::Or), + "^" => Some(Op::Xor), "=" => Some(Op::Assignement), _ => None, // todo: return a beautiful error } diff --git a/tests/realistic_test.asm b/tests/realistic_test.asm new file mode 100644 index 0000000..ba9a5d1 --- /dev/null +++ b/tests/realistic_test.asm @@ -0,0 +1,40 @@ +DEFINE io_adr 0x7fff +DEFINE ob_detection_mask 0x100 +DEFINE movement_mask 0x600 +DEFINE move_mask 0x4 +DEFINE left 0x8 + + +wait: +A = movement_mask +D = A +A = io_adr +D = D & *A +A = wait +D ; JNE + +check: +A = ob_detection_mask +D = A +A = io_adr +D = D & *A +A = move +D ; JEQ + +turn: +A = left +D = A + +send: +A = io_adr +*A = D +A = wait +JMP + +move: +A = move_mask +D = A +A = send +JMP +D ; this is a comment +D \ No newline at end of file diff --git a/tests/test.token_stream b/tests/realistic_test.token_stream similarity index 100% rename from tests/test.token_stream rename to tests/realistic_test.token_stream diff --git a/tests/test.asm b/tests/test.asm index ba9a5d1..bd5b954 100644 --- a/tests/test.asm +++ b/tests/test.asm @@ -36,5 +36,13 @@ A = move_mask D = A A = send JMP -D ; this is a comment -D \ No newline at end of file + +id: +A = A - D +A = A ~D +A = D | *A +A = D ^ *A +A=A+A +; this is a comment + ; comment +A ; comment \ No newline at end of file From 17f80995c348f40fac6a20bcc63008e101383ccf Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sun, 2 Feb 2025 17:51:46 +0100 Subject: [PATCH 06/25] feat: test for register --- src/lexer.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/lexer.rs b/src/lexer.rs index 0615a91..0172d0c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -19,6 +19,7 @@ pub trait HandleToken { pub enum Token { // watchout you need to escape the good char + // tested #[regex(r"[\+&=\-|^~]", Op::new)] Operation(Op), @@ -42,6 +43,7 @@ pub enum Token { Directive(Dir), // Register has a higher priority than Identifier + // tested #[regex(r"\*?[A-Z]", Reg::new, priority = 2)] Register(Reg), // No test @@ -90,4 +92,39 @@ mod tests { assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); } + #[test] + fn test_register() { + let mut lex = Token::lexer("A V D *A *V"); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::A)))); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::V)))); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::D)))); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::AStar)))); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::VStar)))); + + let mut lex = Token::lexer("A="); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::A)))); + + let mut lex = Token::lexer("A\n"); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::A)))); + + // the wrong syntax below will be catch by the parser + let mut lex = Token::lexer("A:"); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::A)))); + + // the wrong syntax below will be catch by the parser + let mut lex = Token::lexer("DEFINE A"); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::A)))); + + // the wrong syntax below will be catch by the parser + let mut lex = Token::lexer("DEFINE *A"); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::AStar)))); + + // Register B doesn't exist + // TODO: return a specific error with helper including the list of valid registers + let mut lex = Token::lexer("B"); + assert_eq!(lex.next(), Some(Err(()))); + } + } From cc3f2f38c645b11ebb6dfaeb1cd233df7e41427d Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sun, 2 Feb 2025 18:31:54 +0100 Subject: [PATCH 07/25] feat-test: condition --- src/lexer.rs | 31 ++++++++++++++++++++++++------- src/spec.rs | 31 ++++++++++++++++++++++++------- tests/realistic_test.token_stream | 4 ++-- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 0172d0c..7984782 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -18,13 +18,13 @@ pub trait HandleToken { #[logos(skip r"[ \t\r\n\f]+")] pub enum Token { + // Condition has a higher priority than Operation // watchout you need to escape the good char // tested - #[regex(r"[\+&=\-|^~]", Op::new)] + #[regex(r"[\+&=\-|^~]", Op::new, priority = 1)] Operation(Op), - // WIP - #[regex(r"==", |_| Cond::Eq)] + #[regex(r"(==)|(!=)|(<=)|(>=)|<|>|(JMP)", Cond::new, priority = 2)] Condition(Cond), // WIP @@ -33,10 +33,6 @@ pub enum Token { #[regex("(0b|0B){1}(0|1)+", |_| 3)] Value(u16), - // WIP - #[token("JMP", |_| Inst::Jump)] - Instruction(Inst), - // No test #[token(":", |_| Dir::Label)] #[token("DEFINE", |_| Dir::Define)] @@ -127,4 +123,25 @@ mod tests { assert_eq!(lex.next(), Some(Err(()))); } + #[test] + fn test_condition() { + let mut lex = Token::lexer("== >= <= > < != JMP"); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Eq)))); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::GtEq)))); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::LtEq)))); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Gt)))); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Lt)))); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Neq)))); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Jump)))); + + let mut lex = Token::lexer("A==A\n"); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Eq)))); + + let mut lex = Token::lexer("=A==A\n"); + lex.next(); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Eq)))); + } + } diff --git a/src/spec.rs b/src/spec.rs index 12d1929..3b9aa13 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -32,10 +32,7 @@ pub mod arch_v1 { } fn bit_stream(&self) -> String { - match self { - _ => "00011" - } - .to_string() + todo!(); } } @@ -76,10 +73,30 @@ pub mod arch_v1 { pub enum Cond { Eq, Neq, + Gt, + Lt, + GtEq, + LtEq, + Jump, } - #[derive(Debug, PartialEq)] - pub enum Inst { - Jump, + impl HandleToken for Cond { + fn new(lex: &mut Lexer) -> Option + where Self: Sized { + match lex.slice() { + "==" => Some(Cond::Eq), + ">" => Some(Cond::Gt), + "<" => Some(Cond::Lt), + ">=" => Some(Cond::GtEq), + "<=" => Some(Cond::LtEq), + "!=" => Some(Cond::Neq), + "JMP" => Some(Cond::Jump), + _ => None // todo error + } + } + + fn bit_stream(&self) -> String { + todo!(); + } } } \ No newline at end of file diff --git a/tests/realistic_test.token_stream b/tests/realistic_test.token_stream index a4bb09f..6d79854 100644 --- a/tests/realistic_test.token_stream +++ b/tests/realistic_test.token_stream @@ -74,7 +74,7 @@ Ok(Register(D)) Ok(Register(A)) Ok(Operation(Assignement)) Ok(Identifier("wait")) -Ok(Instruction(Jump)) +Ok(Condition(Jump)) Ok(Identifier("move")) Ok(Directive(Label)) Ok(Register(A)) @@ -86,7 +86,7 @@ Ok(Register(A)) Ok(Register(A)) Ok(Operation(Assignement)) Ok(Identifier("send")) -Ok(Instruction(Jump)) +Ok(Condition(Jump)) Ok(Register(D)) Ok(Comment) Ok(Register(D)) From b6e7f894cd26780a62d138fd6a72167167523f59 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Mon, 3 Feb 2025 00:16:13 +0100 Subject: [PATCH 08/25] feat-test: directives and values --- src/lexer.rs | 90 ++++++++++++++++++++++++++----- src/spec.rs | 32 +++++------ tests/realistic_test.token_stream | 10 ++-- 3 files changed, 100 insertions(+), 32 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 7984782..2f805e4 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,39 +1,54 @@ use logos::{Lexer, Logos}; /* >> Architecture being used << */ -#[path ="spec.rs"] +#[path = "spec.rs"] mod spec; use spec::arch_v1::*; /// This trait is either used by the lexer to produce Token with the new method /// or by the parser to generate the bit stream from a Token pub trait HandleToken { - fn bit_stream(&self) -> String; // get the bit stream from an item (Reg, Op, Inst) + // get the bit stream from an item (Reg, Op, Inst) + fn bit_stream(&self) -> String { + String::new() + } fn new(lex: &mut Lexer) -> Option - where Self: Sized; // todo: default implementation qui renvoit une erreur en spécifiant le type Self + where + Self: Sized; // todo: default implementation qui renvoit une erreur en spécifiant le type Self +} + +macro_rules! parse_number { + ($name:ident, $prefix:expr, $radix:expr) => { + fn $name(lex: &mut Lexer) -> Option { + let raw_slice = lex.slice().trim_start_matches($prefix); + match u16::from_str_radix(raw_slice, $radix) { + Ok(n) if n <= MAX_LOAD_VALUE => Some(n), + Ok(_) | Err(_) => None, + } + } + }; } #[derive(Logos, Debug, PartialEq)] #[logos(skip r"[ \t\r\n\f]+")] pub enum Token { - // Condition has a higher priority than Operation // watchout you need to escape the good char // tested #[regex(r"[\+&=\-|^~]", Op::new, priority = 1)] Operation(Op), - // WIP + // tested #[regex(r"(==)|(!=)|(<=)|(>=)|<|>|(JMP)", Cond::new, priority = 2)] Condition(Cond), // WIP - #[regex(r"[0-9]+", |_| 3)] - #[regex("(0x|0X){1}[a-fA-F0-9]+", |_| 3)] - #[regex("(0b|0B){1}(0|1)+", |_| 3)] + #[regex(r"[0-9]+", Token::decimal)] + #[regex("0x[a-fA-F0-9]+", Token::hexadecimal)] + #[regex("0b(0|1)+", Token::binary)] Value(u16), - // No test + // tested #[token(":", |_| Dir::Label)] #[token("DEFINE", |_| Dir::Define)] Directive(Dir), @@ -51,12 +66,31 @@ pub enum Token { Comment, } +impl Token { + parse_number!(decimal, "", 10); + parse_number!(hexadecimal, "0x", 16); + parse_number!(binary, "0b", 2); +} + #[derive(Debug, PartialEq)] pub enum Dir { Define, Label, } +impl HandleToken for Dir { + fn new(lex: &mut Lexer) -> Option + where + Self: Sized, + { + match lex.slice() { + "DEFINE" => Some(Dir::Define), + ":" => Some(Dir::Label), + _ => None, + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -64,7 +98,7 @@ mod tests { #[test] fn test_operation() { let mut lex = Token::lexer("+~-&|^="); - + assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Not)))); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Sub)))); @@ -82,7 +116,7 @@ mod tests { let mut lex = Token::lexer("A+A\n"); lex.next(); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); - + let mut lex = Token::lexer("A +A\n"); lex.next(); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); @@ -117,7 +151,7 @@ mod tests { lex.next(); assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::AStar)))); - // Register B doesn't exist + // Register B doesn't exist // TODO: return a specific error with helper including the list of valid registers let mut lex = Token::lexer("B"); assert_eq!(lex.next(), Some(Err(()))); @@ -144,4 +178,36 @@ mod tests { assert_eq!(lex.next(), Some(Ok(Token::Condition(Cond::Eq)))); } + #[test] + fn test_directive() { + let mut lex = Token::lexer("DEFINE label 0x0\ntest:"); + assert_eq!(lex.next(), Some(Ok(Token::Directive(Dir::Define)))); + lex.next(); + lex.next(); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Directive(Dir::Label)))); + + let mut lex = Token::lexer("DEFINE:\n"); + assert_eq!(lex.next(), Some(Ok(Token::Directive(Dir::Define)))); + assert_eq!(lex.next(), Some(Ok(Token::Directive(Dir::Label)))); + } + + #[test] + fn test_values() { + let mut lex = Token::lexer("0 1 32767 0x0 0x1 0x7fff 0b0 0b1 0b111111111111111\n"); + assert_eq!(lex.next(), Some(Ok(Token::Value(0)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(1)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(32767)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(0)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(1)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(32767)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(0)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(1)))); + assert_eq!(lex.next(), Some(Ok(Token::Value(32767)))); + + let mut lex = Token::lexer("32768 0x8000 0b1000000000000000"); + assert_eq!(lex.next(), Some(Err(()))); + assert_eq!(lex.next(), Some(Err(()))); + assert_eq!(lex.next(), Some(Err(()))); + } } diff --git a/src/spec.rs b/src/spec.rs index 3b9aa13..a082975 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -1,6 +1,6 @@ pub mod arch_v1 { - use logos::Lexer; use crate::lexer::HandleToken; + use logos::Lexer; #[allow(dead_code)] @@ -29,7 +29,7 @@ pub mod arch_v1 { "=" => Some(Op::Assignement), _ => None, // todo: return a beautiful error } - } + } fn bit_stream(&self) -> String { todo!(); @@ -55,7 +55,7 @@ pub mod arch_v1 { "D" => Some(Reg::D), _ => None, // todo: return a beautiful error } - } + } fn bit_stream(&self) -> String { match self { @@ -82,21 +82,23 @@ pub mod arch_v1 { impl HandleToken for Cond { fn new(lex: &mut Lexer) -> Option - where Self: Sized { - match lex.slice() { - "==" => Some(Cond::Eq), - ">" => Some(Cond::Gt), - "<" => Some(Cond::Lt), - ">=" => Some(Cond::GtEq), - "<=" => Some(Cond::LtEq), - "!=" => Some(Cond::Neq), - "JMP" => Some(Cond::Jump), - _ => None // todo error - } + where + Self: Sized, + { + match lex.slice() { + "==" => Some(Cond::Eq), + ">" => Some(Cond::Gt), + "<" => Some(Cond::Lt), + ">=" => Some(Cond::GtEq), + "<=" => Some(Cond::LtEq), + "!=" => Some(Cond::Neq), + "JMP" => Some(Cond::Jump), + _ => None, // todo error + } } fn bit_stream(&self) -> String { todo!(); } } -} \ No newline at end of file +} diff --git a/tests/realistic_test.token_stream b/tests/realistic_test.token_stream index 6d79854..04b6229 100644 --- a/tests/realistic_test.token_stream +++ b/tests/realistic_test.token_stream @@ -1,18 +1,18 @@ Ok(Directive(Define)) Ok(Identifier("io_adr")) -Ok(Value(3)) +Ok(Value(32767)) Ok(Directive(Define)) Ok(Identifier("ob_detection_mask")) -Ok(Value(3)) +Ok(Value(256)) Ok(Directive(Define)) Ok(Identifier("movement_mask")) -Ok(Value(3)) +Ok(Value(1536)) Ok(Directive(Define)) Ok(Identifier("move_mask")) -Ok(Value(3)) +Ok(Value(4)) Ok(Directive(Define)) Ok(Identifier("left")) -Ok(Value(3)) +Ok(Value(8)) Ok(Identifier("wait")) Ok(Directive(Label)) Ok(Register(A)) From ab87560cb5e83648bb0c8e303ef18e4d0b0a519a Mon Sep 17 00:00:00 2001 From: benoitlx Date: Mon, 3 Feb 2025 09:16:21 +0100 Subject: [PATCH 09/25] test: identifiers --- src/lexer.rs | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 2f805e4..9ca05f2 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -42,23 +42,23 @@ pub enum Token { #[regex(r"(==)|(!=)|(<=)|(>=)|<|>|(JMP)", Cond::new, priority = 2)] Condition(Cond), - // WIP + // tested #[regex(r"[0-9]+", Token::decimal)] #[regex("0x[a-fA-F0-9]+", Token::hexadecimal)] #[regex("0b(0|1)+", Token::binary)] Value(u16), // tested - #[token(":", |_| Dir::Label)] - #[token("DEFINE", |_| Dir::Define)] + #[token(":", Dir::new)] + #[token("DEFINE", Dir::new)] Directive(Dir), // Register has a higher priority than Identifier // tested #[regex(r"\*?[A-Z]", Reg::new, priority = 2)] Register(Reg), - // No test - #[regex(r"[a-z_A-Z]+", |lex| lex.slice().to_string(), priority = 1)] + // tested + #[regex(r"[a-z_A-Z]+", Token::text, priority = 1)] Identifier(String), // No test @@ -70,6 +70,10 @@ impl Token { parse_number!(decimal, "", 10); parse_number!(hexadecimal, "0x", 16); parse_number!(binary, "0b", 2); + + fn text(lex: &mut Lexer) -> Option { + Some(lex.slice().to_string()) + } } #[derive(Debug, PartialEq)] @@ -210,4 +214,21 @@ mod tests { assert_eq!(lex.next(), Some(Err(()))); assert_eq!(lex.next(), Some(Err(()))); } + + #[test] + fn test_identifier() { + let mut lex = Token::lexer("DEFINE id 0x0\nid:"); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Identifier("id".to_string())))); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Identifier("id".to_string())))); + + let test_string = "a b c foo bar FOO BAR foo_bar FOO_BAR Foo_Bar"; + let string_iter = test_string.split(" "); + let mut lex = Token::lexer(test_string); + + for word in string_iter { + assert_eq!(lex.next(), Some(Ok(Token::Identifier(word.to_string())))); + } + } } From 3e0fbcaba33323d6700989880cf4f41d4e8ade62 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Mon, 3 Feb 2025 09:25:01 +0100 Subject: [PATCH 10/25] test: comment --- src/lexer.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/lexer.rs b/src/lexer.rs index 9ca05f2..a5c8c7a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -231,4 +231,14 @@ mod tests { assert_eq!(lex.next(), Some(Ok(Token::Identifier(word.to_string())))); } } + + #[test] + fn test_comment() { + let mut lex = Token::lexer("; this is a comment\nD ; comment\n;;;;\n;comm\n"); + assert_eq!(lex.next(), Some(Ok(Token::Comment))); + lex.next(); + assert_eq!(lex.next(), Some(Ok(Token::Comment))); + assert_eq!(lex.next(), Some(Ok(Token::Comment))); + assert_eq!(lex.next(), Some(Ok(Token::Comment))); + } } From d3787a96fb8392dcfa8f51e1fb5d59a3b6ac29f0 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Mon, 3 Feb 2025 13:12:07 +0100 Subject: [PATCH 11/25] fix: handle directives with the new callback --- src/lexer.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index a5c8c7a..310b16f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -49,8 +49,7 @@ pub enum Token { Value(u16), // tested - #[token(":", Dir::new)] - #[token("DEFINE", Dir::new)] + #[regex(r"(DEFINE|:)", Dir::new)] Directive(Dir), // Register has a higher priority than Identifier @@ -61,7 +60,7 @@ pub enum Token { #[regex(r"[a-z_A-Z]+", Token::text, priority = 1)] Identifier(String), - // No test + // Tested #[regex(r";[^\n]*")] Comment, } From 3017466c4238edcd23014584899fdc4e6a9bca93 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Mon, 3 Feb 2025 17:28:25 +0100 Subject: [PATCH 12/25] test: integration test for lexer --- .gitignore | 3 +- Cargo.toml | 1 + src/lexer.rs | 6 ++- tests/lexer_test.rs | 39 +++++++++++++++ tests/{ => real_test}/realistic_test.asm | 0 .../realistic_test.token_stream | 0 tests/test.asm | 48 ------------------- 7 files changed, 46 insertions(+), 51 deletions(-) create mode 100644 tests/lexer_test.rs rename tests/{ => real_test}/realistic_test.asm (100%) rename tests/{ => real_test}/realistic_test.token_stream (100%) delete mode 100644 tests/test.asm diff --git a/.gitignore b/.gitignore index 82587a5..78f67b6 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ Cargo.lock /target -.vscode/ \ No newline at end of file +.vscode/ +.temp/ \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index b7fae66..2455517 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,4 @@ logos = { version = "0.15.0", features = ["debug"] } [dev-dependencies] rusty-hook = "^0.11.2" +pretty_assertions = "1" diff --git a/src/lexer.rs b/src/lexer.rs index 310b16f..1dedc90 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -49,7 +49,8 @@ pub enum Token { Value(u16), // tested - #[regex(r"(DEFINE|:)", Dir::new)] + #[token(":", Dir::new)] + #[token("DEFINE", Dir::new)] Directive(Dir), // Register has a higher priority than Identifier @@ -60,7 +61,7 @@ pub enum Token { #[regex(r"[a-z_A-Z]+", Token::text, priority = 1)] Identifier(String), - // Tested + // No test #[regex(r";[^\n]*")] Comment, } @@ -97,6 +98,7 @@ impl HandleToken for Dir { #[cfg(test)] mod tests { use super::*; + use pretty_assertions::assert_eq; #[test] fn test_operation() { diff --git a/tests/lexer_test.rs b/tests/lexer_test.rs new file mode 100644 index 0000000..17c3470 --- /dev/null +++ b/tests/lexer_test.rs @@ -0,0 +1,39 @@ +use std::fs::{read_to_string, File}; +use pretty_assertions::assert_eq; + +#[path = "../src/lexer.rs"] +mod lexer; + +#[test] +fn test_lexer() { + use lexer::Token; + use logos::Logos; + use std::fs::OpenOptions; + use std::io::{Read, Write}; + + let source_file_path = "tests/real_test/realistic_test.asm"; + let temp_file_path = "tests/.temp/realistic_test.token_stream.temp"; + let expected_file_path = "tests/real_test/realistic_test.token_stream"; + + let mut temp_file = OpenOptions::new() + .create(true) + .write(true) + .open(temp_file_path) + .unwrap(); + + if let Ok(mut source_file) = File::open(source_file_path) { + let mut content = String::new(); + let _ = source_file.read_to_string(&mut content); + + let mut lex = Token::lexer(content.as_str()); + + while let Some(result) = lex.next() { + writeln!(temp_file, "{:?}", result).unwrap(); + } + } + + let content1 = read_to_string(expected_file_path).unwrap(); + let content2 = read_to_string(temp_file_path).unwrap(); + + assert_eq!(content1, content2); +} diff --git a/tests/realistic_test.asm b/tests/real_test/realistic_test.asm similarity index 100% rename from tests/realistic_test.asm rename to tests/real_test/realistic_test.asm diff --git a/tests/realistic_test.token_stream b/tests/real_test/realistic_test.token_stream similarity index 100% rename from tests/realistic_test.token_stream rename to tests/real_test/realistic_test.token_stream diff --git a/tests/test.asm b/tests/test.asm deleted file mode 100644 index bd5b954..0000000 --- a/tests/test.asm +++ /dev/null @@ -1,48 +0,0 @@ -DEFINE io_adr 0x7fff -DEFINE ob_detection_mask 0x100 -DEFINE movement_mask 0x600 -DEFINE move_mask 0x4 -DEFINE left 0x8 - - -wait: -A = movement_mask -D = A -A = io_adr -D = D & *A -A = wait -D ; JNE - -check: -A = ob_detection_mask -D = A -A = io_adr -D = D & *A -A = move -D ; JEQ - -turn: -A = left -D = A - -send: -A = io_adr -*A = D -A = wait -JMP - -move: -A = move_mask -D = A -A = send -JMP - -id: -A = A - D -A = A ~D -A = D | *A -A = D ^ *A -A=A+A -; this is a comment - ; comment -A ; comment \ No newline at end of file From 062e263f44598c6cab84d88fcbcd1b916420d430 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Mon, 3 Feb 2025 19:24:30 +0100 Subject: [PATCH 13/25] refactor-test: separate assignement from operation --- src/lexer.rs | 29 ++++++++++++++--- tests/real_test/realistic_test.token_stream | 36 ++++++++++----------- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 1dedc90..83ba917 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -2,7 +2,7 @@ use logos::{Lexer, Logos}; /* >> Architecture being used << */ #[path = "spec.rs"] -mod spec; +pub mod spec; use spec::arch_v1::*; /// This trait is either used by the lexer to produce Token with the new method @@ -36,12 +36,15 @@ pub enum Token { // Condition has a higher priority than Operation // watchout you need to escape the good char // tested - #[regex(r"[\+&=\-|^~]", Op::new, priority = 1)] + #[regex(r"[\+&\-|^~]", Op::new, priority = 1)] Operation(Op), // tested #[regex(r"(==)|(!=)|(<=)|(>=)|<|>|(JMP)", Cond::new, priority = 2)] Condition(Cond), + #[token("=")] + Assignement, + // tested #[regex(r"[0-9]+", Token::decimal)] #[regex("0x[a-fA-F0-9]+", Token::hexadecimal)] @@ -102,7 +105,7 @@ mod tests { #[test] fn test_operation() { - let mut lex = Token::lexer("+~-&|^="); + let mut lex = Token::lexer("+~-&|^"); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Not)))); @@ -110,7 +113,6 @@ mod tests { assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::And)))); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Or)))); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Xor)))); - assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Assignement)))); let mut lex = Token::lexer(" +"); assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); @@ -127,6 +129,12 @@ mod tests { assert_eq!(lex.next(), Some(Ok(Token::Operation(Op::Add)))); } + #[test] + fn test_assignement() { + let mut lex = Token::lexer("="); + assert_eq!(lex.next(), Some(Ok(Token::Assignement))); + } + #[test] fn test_register() { let mut lex = Token::lexer("A V D *A *V"); @@ -242,4 +250,17 @@ mod tests { assert_eq!(lex.next(), Some(Ok(Token::Comment))); assert_eq!(lex.next(), Some(Ok(Token::Comment))); } + + #[test] + fn test_weird_behavior() { + let mut lex = Token::lexer("move:\nA = move_mask\nD = A\n"); + assert_eq!(lex.next(), Some(Ok(Token::Identifier("move".to_string())))); + assert_eq!(lex.next(), Some(Ok(Token::Directive(Dir::Label)))); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::A)))); + assert_eq!(lex.next(), Some(Ok(Token::Assignement))); + assert_eq!(lex.next(), Some(Ok(Token::Identifier("move_mask".to_string())))); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::D)))); + assert_eq!(lex.next(), Some(Ok(Token::Assignement))); + assert_eq!(lex.next(), Some(Ok(Token::Register(Reg::A)))); + } } diff --git a/tests/real_test/realistic_test.token_stream b/tests/real_test/realistic_test.token_stream index 04b6229..9cdaf67 100644 --- a/tests/real_test/realistic_test.token_stream +++ b/tests/real_test/realistic_test.token_stream @@ -16,75 +16,75 @@ Ok(Value(8)) Ok(Identifier("wait")) Ok(Directive(Label)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("movement_mask")) Ok(Register(D)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Register(A)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("io_adr")) Ok(Register(D)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Register(D)) Ok(Operation(And)) Ok(Register(AStar)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("wait")) Ok(Register(D)) Ok(Comment) Ok(Identifier("check")) Ok(Directive(Label)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("ob_detection_mask")) Ok(Register(D)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Register(A)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("io_adr")) Ok(Register(D)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Register(D)) Ok(Operation(And)) Ok(Register(AStar)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("move")) Ok(Register(D)) Ok(Comment) Ok(Identifier("turn")) Ok(Directive(Label)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("left")) Ok(Register(D)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Register(A)) Ok(Identifier("send")) Ok(Directive(Label)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("io_adr")) Ok(Register(AStar)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Register(D)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("wait")) Ok(Condition(Jump)) Ok(Identifier("move")) Ok(Directive(Label)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("move_mask")) Ok(Register(D)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Register(A)) Ok(Register(A)) -Ok(Operation(Assignement)) +Ok(Assignement) Ok(Identifier("send")) Ok(Condition(Jump)) Ok(Register(D)) From c6dcab113f4d3bad397864bcff00a5cc5bbe79e2 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Tue, 4 Feb 2025 22:37:33 +0100 Subject: [PATCH 14/25] wip: parser --- Cargo.toml | 1 + src/main.rs | 7 +- src/parser.rs | 204 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/spec.rs | 22 +++++- 4 files changed, 226 insertions(+), 8 deletions(-) create mode 100644 src/parser.rs diff --git a/Cargo.toml b/Cargo.toml index 2455517..59a6bc9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +colored = "3.0.0" logos = { version = "0.15.0", features = ["debug"] } [dev-dependencies] diff --git a/src/main.rs b/src/main.rs index 1a25b8f..fc618d7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod lexer; +mod parser; use lexer::Token; use logos::Logos; @@ -6,14 +7,12 @@ use logos::Logos; fn main() { use std::io::Read; - if let Ok(mut file) = std::fs::File::open("tests/realistic_test.asm") { + if let Ok(mut file) = std::fs::File::open("tests/real_test/realistic_test.asm") { let mut content = String::new(); let _ = file.read_to_string(&mut content); let mut lex = Token::lexer(content.as_str()); - while let Some(result) = lex.next() { - println!("{:?}", result); - } + println!("{}", parser::generate_bit_stream(&mut lex)); } } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..239d1af --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,204 @@ +use crate::lexer::spec::arch_v1::*; +use crate::lexer::{Token::*, *}; +use colored::Colorize; +use logos::Lexer; +use std::collections::HashMap; + +/* +TODO: +- parser les instructions avec un non ~ +- parser les instructions du types A = D (avec lex.peek()) +- tests +- soigner le code +- pour chaque panic donner les bonnes infos l'endroit du token fautif ... + +Futur: +- Error handling with miette +- cli +*/ + +pub fn generate_bit_stream(lex: &mut Lexer) -> String { + let mut bit_stream = String::new(); + + // Hashmap for the identifiers + let mut id_collect: HashMap = HashMap::new(); + let mut adr = 0; + + // let mut lex = lex.peekable(); + + while let Some(Ok(token)) = lex.next() { + let current_bit_stream: Option = match token { + Directive(Dir::Define) => { + match lex.next() { + Some(Ok(Identifier(id))) => { + let start = lex.span().start; + let end = lex.span().end; + match lex.next() { + Some(Ok(Value(v))) => { + let id_ref = id_collect.get(&id); + + if id_ref == None { + id_collect.insert(id, (v, start, end)); + } else { + let start = (*id_ref.unwrap()).1; + let end = (*id_ref.unwrap()).2; + panic!("identifier already used there {}..{}", start, end); + } + } + Some(Ok(t)) => panic!("Expected Token::Identifier, found {:?}", t), + _ => panic!("Found EOF or unknown token, expected Token::Identifier"), + } + } + Some(Ok(t)) => panic!("Expected Token::Identifier, found {:?}", t), + _ => panic!("Found EOF or unknown token, expected Token::Identifier"), + } + None + } + Identifier(id) => { + // todo: replace the code below with a match statement + if let Some(Ok(token)) = lex.next() { + if token != Directive(Dir::Label) { + panic!("Expected Dir::Label found {:?}", token); + } else { + let id_ref = id_collect.get(&id); + + if id_ref == None { + id_collect.insert(id, (adr, lex.span().start, lex.span().end)); + } else { + let start = (*id_ref.unwrap()).1; + let end = (*id_ref.unwrap()).2; + panic!("identifier already used there {}..{}", start, end); + } + } + } else { + panic!("Expected Dir::Label found EOF"); + } + None + } + Condition(cond) if cond == Cond::Jump => { + adr += 16; + Some(format!( + "{}{}000000000000", + "1".green().bold(), + cond.bit_stream().blue() + )) + } + Register(regc) => { + adr += 16; + let mut inst_bits = String::new(); + let mut bits_a = String::new(); + let mut bits_b = String::new(); + let bits_c = regc.bit_stream(); + let mut bits_op = String::new(); + + let mut inst_mode = false; + + match lex.next() { + Some(Ok(Token::Assignement)) => (), + Some(Ok(t)) => panic!("Expected Token::Assignement, found {:?}", t), + _ => panic!("Found EOF or unknown token, expected Token::Assignement"), + } + + match lex.next() { + Some(Ok(Value(integer))) => { + inst_bits.push('1'); + inst_bits.push_str(format!("{:b}", integer).as_str()); + break; // all tokens are consumed for this instruction + } + Some(Ok(Identifier(id))) => { + inst_bits.push('1'); + inst_bits.push_str(id.as_str()); + } + Some(Ok(Register(rega))) => { + inst_mode = true; + bits_a = rega.bit_stream(); + } + Some(Ok(t)) => panic!("Found {:?}, expected one of Token::Value(_) or Token::Register(_)", t), + _ => panic!("Found EOF or unknown token, expected one of Token::Value(_) or Token::Register(_)") + } + + if inst_mode { + match lex.next() { + Some(Ok(Operation(op))) => { + bits_op = op.bit_stream(); + } + Some(Ok(t)) => panic!( + "Expected Token::Operation, found {:?} {}..{}", + t, + lex.span().start, + lex.span().end + ), + _ => panic!("Found EOF or unknown token, expected Token::Operation"), + } + + match lex.next() { + Some(Ok(Register(regb))) => { + bits_b = regb.bit_stream(); + } + Some(Ok(t)) => panic!("Expected Token::Register, found {:?}", t), + _ => panic!("Found EOF or unknown token, expected Token::Register"), + } + inst_bits = format!( + "{}{}{}{}{}000", + "0".green().bold(), + bits_op.blue(), + bits_a.yellow(), + bits_b.purple(), + bits_c.cyan() + ); + } + + Some(inst_bits) + } + Comment => None, + _ => panic!( + "Can't start with something other than label directive register or jump {}..{}", + lex.span().start, + lex.span().end + ), + }; + + if let Some(mut str_to_push) = current_bit_stream { + str_to_push.push('\n'); + bit_stream.push_str(&str_to_push.as_str()); + } + } + + println!( + "{}\n{}\n{}\n{}\n{}\n{}\n", + "15 bits value".red(), + "op/jump code".blue(), + "mode bit".green(), + "source A reg".yellow(), + "source B reg".purple(), + "dest reg".cyan() + ); + + replace_identifiers(&bit_stream, &id_collect) +} + +fn replace_identifiers(input: &str, id_collect: &HashMap) -> String { + let mut output = String::new(); + let mut i = 0; + let chars: Vec = input.chars().collect(); + + while i < chars.len() { + if chars[i].is_alphabetic() || chars[i] == '_' { + let start = i; + while i < chars.len() && (chars[i].is_alphabetic() || chars[i] == '_') { + i += 1; + } + let word: String = chars[start..i].iter().collect(); + if let Some(value) = id_collect.get(&word) { + output.push_str(&format!("{}", format!("{:015b}", value.0).red())); + } else { + output.push_str(&word); + } + } else { + output.push(chars[i]); + i += 1; + } + } + + output +} diff --git a/src/spec.rs b/src/spec.rs index a082975..bc20031 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -14,7 +14,6 @@ pub mod arch_v1 { Not, Or, Xor, - Assignement, } impl HandleToken for Op { @@ -26,13 +25,19 @@ pub mod arch_v1 { "~" => Some(Op::Not), "|" => Some(Op::Or), "^" => Some(Op::Xor), - "=" => Some(Op::Assignement), _ => None, // todo: return a beautiful error } } fn bit_stream(&self) -> String { - todo!(); + match self { + Op::Add => "000", + Op::Sub => "001", + Op::And => "010", + Op::Or => "011", + Op::Xor => "100", + Op::Not => "101" + }.to_string() } } @@ -98,7 +103,16 @@ pub mod arch_v1 { } fn bit_stream(&self) -> String { - todo!(); + match self { + Cond::Eq => "010", + Cond::Neq => "101", + Cond::Gt => "001", + Cond::Lt => "100", + Cond::GtEq => "011", + Cond::LtEq => "110", + Cond::Jump => "111", + } + .to_string() } } } From c427b1998495a7e8de39f6f34f66a7b88220c240 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Wed, 19 Feb 2025 00:13:46 +0100 Subject: [PATCH 15/25] refactor-feat: single pattern matching for all expression - handle instruction of type A = D ; A = ~D, ... --- src/main.rs | 6 +- src/parser.rs | 339 +++++++++++++++++++++++++++----------------------- src/spec.rs | 6 +- 3 files changed, 188 insertions(+), 163 deletions(-) diff --git a/src/main.rs b/src/main.rs index fc618d7..8465482 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,8 +11,10 @@ fn main() { let mut content = String::new(); let _ = file.read_to_string(&mut content); - let mut lex = Token::lexer(content.as_str()); + let lex = Token::lexer(content.as_str()); - println!("{}", parser::generate_bit_stream(&mut lex)); + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + println!("{}", parser::generate_bit_stream_v2(&mut tokens)); } } diff --git a/src/parser.rs b/src/parser.rs index 239d1af..358ef69 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,15 +1,15 @@ use crate::lexer::spec::arch_v1::*; use crate::lexer::{Token::*, *}; +use std::ops::Range; use colored::Colorize; -use logos::Lexer; use std::collections::HashMap; /* TODO: -- parser les instructions avec un non ~ -- parser les instructions du types A = D (avec lex.peek()) +- should merge Op and Cond since they have the same bit placement in the instruction +- Add default implementation for Reg Cond and Op +- parser les conditions - tests -- soigner le code - pour chaque panic donner les bonnes infos l'endroit du token fautif ... Futur: @@ -17,153 +17,202 @@ Futur: - cli */ -pub fn generate_bit_stream(lex: &mut Lexer) -> String { - let mut bit_stream = String::new(); +fn data_mode_format(val: u16) -> String { + format!("{}{}", "1".green(), format!("{:015b}", val).red()) +} + +fn inst_mode_format(op_or_cond: Op, rega: Reg, regb: Reg, regc: Reg) -> String { + format!( + "{}{}{}{}{}000", + "0".green().bold(), + op_or_cond.bit_stream().blue(), + rega.bit_stream().yellow(), + regb.bit_stream().purple(), + regc.bit_stream().cyan() + ) +} + +pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range)>) -> String { + // colored::control::set_override(false); + + let mut bit_stream_with_id: Vec = vec![]; // Hashmap for the identifiers - let mut id_collect: HashMap = HashMap::new(); + let mut id_collect: HashMap)> = HashMap::new(); let mut adr = 0; - // let mut lex = lex.peekable(); - - while let Some(Ok(token)) = lex.next() { - let current_bit_stream: Option = match token { - Directive(Dir::Define) => { - match lex.next() { - Some(Ok(Identifier(id))) => { - let start = lex.span().start; - let end = lex.span().end; - match lex.next() { - Some(Ok(Value(v))) => { - let id_ref = id_collect.get(&id); - - if id_ref == None { - id_collect.insert(id, (v, start, end)); - } else { - let start = (*id_ref.unwrap()).1; - let end = (*id_ref.unwrap()).2; - panic!("identifier already used there {}..{}", start, end); - } - } - Some(Ok(t)) => panic!("Expected Token::Identifier, found {:?}", t), - _ => panic!("Found EOF or unknown token, expected Token::Identifier"), - } - } - Some(Ok(t)) => panic!("Expected Token::Identifier, found {:?}", t), - _ => panic!("Found EOF or unknown token, expected Token::Identifier"), + let mut i = 0; + let n = tokens.len(); + + // fill the token vec with 5 comments + for _ in 0..5 { + tokens.push((Ok(Token::Comment), 0..0)); + } + + while i < n { + let tokens_window = &tokens[i..(i+5)]; + // println!("{i}, {:?}", tokens_window); + + let inst_word = match tokens_window { + // A <- D & *A + [ + (Ok(Register(regc)), _), + (Ok(Assignement), _), + (Ok(Register(rega)), _), + (Ok(Operation(op)), _), + (Ok(Register(regb)), _), + ] => { + // FIXME: protection rule between the regs + i += 5; + adr += 16; + inst_mode_format(*op, *rega, *regb, *regc) + } + // A <- mask + [ + (Ok(Register(regc)), _), + (Ok(Assignement), _), + (Ok(Identifier(id)), _), + _, + _, + ] => { + if *regc != Reg::A { + panic!("Can't push direct value into an other register than A") } - None + + i += 3; + adr += 16; + id.clone() } - Identifier(id) => { - // todo: replace the code below with a match statement - if let Some(Ok(token)) = lex.next() { - if token != Directive(Dir::Label) { - panic!("Expected Dir::Label found {:?}", token); - } else { - let id_ref = id_collect.get(&id); - - if id_ref == None { - id_collect.insert(id, (adr, lex.span().start, lex.span().end)); - } else { - let start = (*id_ref.unwrap()).1; - let end = (*id_ref.unwrap()).2; - panic!("identifier already used there {}..{}", start, end); - } - } - } else { - panic!("Expected Dir::Label found EOF"); + // A <- 0x7fff + [ + (Ok(Register(regc)), _), + (Ok(Assignement), _), + (Ok(Value(val)), _), + _, + _, + ] => { + if *regc != Reg::A { + panic!("Can't push direct value into an other register than A") } - None + + i += 3; + adr += 16; + data_mode_format(*val) } - Condition(cond) if cond == Cond::Jump => { + // A <- D + [ + (Ok(Register(regc)), _), + (Ok(Assignement), _), + (Ok(Register(rega)), _), + _, + _, + ] => { + i += 3; adr += 16; - Some(format!( - "{}{}000000000000", - "1".green().bold(), - cond.bit_stream().blue() - )) + inst_mode_format(Op::Or, *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero } - Register(regc) => { + // A <- ~D + [ + (Ok(Register(regc)), _), + (Ok(Assignement), _), + (Ok(Operation(op)), _), + (Ok(Register(rega)), _), + _, + ] => { + i += 4; adr += 16; - let mut inst_bits = String::new(); - let mut bits_a = String::new(); - let mut bits_b = String::new(); - let bits_c = regc.bit_stream(); - let mut bits_op = String::new(); - - let mut inst_mode = false; - - match lex.next() { - Some(Ok(Token::Assignement)) => (), - Some(Ok(t)) => panic!("Expected Token::Assignement, found {:?}", t), - _ => panic!("Found EOF or unknown token, expected Token::Assignement"), - } - - match lex.next() { - Some(Ok(Value(integer))) => { - inst_bits.push('1'); - inst_bits.push_str(format!("{:b}", integer).as_str()); - break; // all tokens are consumed for this instruction - } - Some(Ok(Identifier(id))) => { - inst_bits.push('1'); - inst_bits.push_str(id.as_str()); - } - Some(Ok(Register(rega))) => { - inst_mode = true; - bits_a = rega.bit_stream(); - } - Some(Ok(t)) => panic!("Found {:?}, expected one of Token::Value(_) or Token::Register(_)", t), - _ => panic!("Found EOF or unknown token, expected one of Token::Value(_) or Token::Register(_)") + inst_mode_format(*op, *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero + } + // A == D + [ + (Ok(Register(rega)), _), + (Ok(Condition(_cond)), _), + (Ok(Register(regb)), _), + _, + _, + ] => { + i += 1; + adr += 16; + inst_mode_format(Op::Add, *rega, *regb, Reg::A) // Fixme: argument type for cond + } + // JMP + [ + (Ok(Condition(Cond::Jump)), _), + _, _, _, _, + ] => { + i += 1; + adr += 16; + // inst_mode_format(Op::Add, Reg::A, Reg::A, Reg::A) // fixme: replace with proper values + format!("0111000000000000") + } + // label: + [ + (Ok(Identifier(id)), span), + (Ok(Directive(Dir::Label)), _), + _, _, _, + ] => { + i += 2; + + let id_ref = id_collect.get(id); + + if id_ref == None { + id_collect.insert(id.clone(), (adr, span.clone())); + } else { + let other_span = (*id_ref.unwrap()).1.clone(); + panic!("identifier already used there {}..{}", other_span.start, other_span.end); } - if inst_mode { - match lex.next() { - Some(Ok(Operation(op))) => { - bits_op = op.bit_stream(); - } - Some(Ok(t)) => panic!( - "Expected Token::Operation, found {:?} {}..{}", - t, - lex.span().start, - lex.span().end - ), - _ => panic!("Found EOF or unknown token, expected Token::Operation"), - } - - match lex.next() { - Some(Ok(Register(regb))) => { - bits_b = regb.bit_stream(); - } - Some(Ok(t)) => panic!("Expected Token::Register, found {:?}", t), - _ => panic!("Found EOF or unknown token, expected Token::Register"), - } - inst_bits = format!( - "{}{}{}{}{}000", - "0".green().bold(), - bits_op.blue(), - bits_a.yellow(), - bits_b.purple(), - bits_c.cyan() - ); + String::new() + } + // DEFINE mask 0x1 + [ + (Ok(Directive(Dir::Define)), _), + (Ok(Identifier(id)), span), + (Ok(Value(val)), _), + _, _, + ] => { + i += 3; + + let id_ref = id_collect.get(id); + + if id_ref == None { + id_collect.insert(id.clone(), (*val, span.clone())); + } else { + let other_span = (*id_ref.unwrap()).1.clone(); + panic!("identifier already used there {}..{}", other_span.start, other_span.end); } - Some(inst_bits) + String::new() } - Comment => None, - _ => panic!( - "Can't start with something other than label directive register or jump {}..{}", - lex.span().start, - lex.span().end - ), + [ + (Ok(Comment), _), + _, _, _, _ + ] => { + i += 1; + String::new() + } + _ => panic!("Unexpected Error") }; - if let Some(mut str_to_push) = current_bit_stream { - str_to_push.push('\n'); - bit_stream.push_str(&str_to_push.as_str()); + if inst_word != "" { + bit_stream_with_id.push(inst_word); + } + } + + fn handle_id(id: String, col: &mut HashMap)>) -> String { + if id.chars().all(|c| c.is_alphabetic() || c == '_') { + return if let Some(value) = col.get(&id) { + data_mode_format(value.0) + } else { + "Error".to_string() + } } + + id } + let bit_stream: Vec = bit_stream_with_id.into_iter().map(|s| handle_id(s, &mut id_collect)).collect(); + println!( "{}\n{}\n{}\n{}\n{}\n{}\n", "15 bits value".red(), @@ -174,31 +223,5 @@ pub fn generate_bit_stream(lex: &mut Lexer) -> String { "dest reg".cyan() ); - replace_identifiers(&bit_stream, &id_collect) -} - -fn replace_identifiers(input: &str, id_collect: &HashMap) -> String { - let mut output = String::new(); - let mut i = 0; - let chars: Vec = input.chars().collect(); - - while i < chars.len() { - if chars[i].is_alphabetic() || chars[i] == '_' { - let start = i; - while i < chars.len() && (chars[i].is_alphabetic() || chars[i] == '_') { - i += 1; - } - let word: String = chars[start..i].iter().collect(); - if let Some(value) = id_collect.get(&word) { - output.push_str(&format!("{}", format!("{:015b}", value.0).red())); - } else { - output.push_str(&word); - } - } else { - output.push(chars[i]); - i += 1; - } - } - - output -} + bit_stream.join("\n") +} \ No newline at end of file diff --git a/src/spec.rs b/src/spec.rs index bc20031..6c5b253 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -6,7 +6,7 @@ pub mod arch_v1 { pub const MAX_LOAD_VALUE: u16 = 2_u16.pow(15) - 1; - #[derive(Debug, PartialEq)] + #[derive(Clone, Copy, Debug, PartialEq)] pub enum Op { Add, Sub, @@ -41,7 +41,7 @@ pub mod arch_v1 { } } - #[derive(Debug, PartialEq)] + #[derive(Clone, Copy, Debug, PartialEq)] pub enum Reg { A, V, @@ -74,7 +74,7 @@ pub mod arch_v1 { } // todo: colorize the string depending on the register } - #[derive(Debug, PartialEq)] + #[derive(Clone, Copy, Debug, PartialEq)] pub enum Cond { Eq, Neq, From c2f6c589333e746b538b9b126364cd50b70af201 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Wed, 19 Feb 2025 12:19:03 +0100 Subject: [PATCH 16/25] feat: output format with colorization debug and separator --- src/main.rs | 5 +- src/parser.rs | 141 +++++++++++++++++++++----------------------------- 2 files changed, 62 insertions(+), 84 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8465482..69a3a88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,9 @@ fn main() { let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); - println!("{}", parser::generate_bit_stream_v2(&mut tokens)); + println!( + "{}", + parser::generate_bit_stream_v2(&mut tokens, true, false, "\n") + ); } } diff --git a/src/parser.rs b/src/parser.rs index 358ef69..240eb5b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,8 +1,8 @@ use crate::lexer::spec::arch_v1::*; use crate::lexer::{Token::*, *}; -use std::ops::Range; use colored::Colorize; use std::collections::HashMap; +use std::ops::Range; /* TODO: @@ -32,8 +32,24 @@ fn inst_mode_format(op_or_cond: Op, rega: Reg, regb: Reg, regc: Reg) -> String { ) } -pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range)>) -> String { - // colored::control::set_override(false); +pub fn generate_bit_stream_v2( + tokens: &mut Vec<(Result, Range)>, + colorize: bool, + debug: bool, + sep: &str, +) -> String { + colored::control::set_override(colorize); + if colorize { + println!( + "{}\n{}\n{}\n{}\n{}\n{}\n", + "15 bits value".red(), + "op/jump code".blue(), + "mode bit".green(), + "source A reg".yellow(), + "source B reg".purple(), + "dest reg".cyan() + ); + } let mut bit_stream_with_id: Vec = vec![]; @@ -50,31 +66,23 @@ pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range) } while i < n { - let tokens_window = &tokens[i..(i+5)]; - // println!("{i}, {:?}", tokens_window); + let tokens_window = &tokens[i..(i + 5)]; + + if debug { + println!("{i}, {:?}", tokens_window); + } let inst_word = match tokens_window { // A <- D & *A - [ - (Ok(Register(regc)), _), - (Ok(Assignement), _), - (Ok(Register(rega)), _), - (Ok(Operation(op)), _), - (Ok(Register(regb)), _), - ] => { + [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Register(rega)), _), (Ok(Operation(op)), _), (Ok(Register(regb)), _)] => + { // FIXME: protection rule between the regs i += 5; adr += 16; inst_mode_format(*op, *rega, *regb, *regc) } // A <- mask - [ - (Ok(Register(regc)), _), - (Ok(Assignement), _), - (Ok(Identifier(id)), _), - _, - _, - ] => { + [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Identifier(id)), _), _, _] => { if *regc != Reg::A { panic!("Can't push direct value into an other register than A") } @@ -84,13 +92,7 @@ pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range) id.clone() } // A <- 0x7fff - [ - (Ok(Register(regc)), _), - (Ok(Assignement), _), - (Ok(Value(val)), _), - _, - _, - ] => { + [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Value(val)), _), _, _] => { if *regc != Reg::A { panic!("Can't push direct value into an other register than A") } @@ -100,57 +102,33 @@ pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range) data_mode_format(*val) } // A <- D - [ - (Ok(Register(regc)), _), - (Ok(Assignement), _), - (Ok(Register(rega)), _), - _, - _, - ] => { + [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Register(rega)), _), _, _] => { i += 3; adr += 16; inst_mode_format(Op::Or, *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero } // A <- ~D - [ - (Ok(Register(regc)), _), - (Ok(Assignement), _), - (Ok(Operation(op)), _), - (Ok(Register(rega)), _), - _, - ] => { + [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Operation(op)), _), (Ok(Register(rega)), _), _] => + { i += 4; adr += 16; inst_mode_format(*op, *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero } // A == D - [ - (Ok(Register(rega)), _), - (Ok(Condition(_cond)), _), - (Ok(Register(regb)), _), - _, - _, - ] => { + [(Ok(Register(rega)), _), (Ok(Condition(_cond)), _), (Ok(Register(regb)), _), _, _] => { i += 1; adr += 16; inst_mode_format(Op::Add, *rega, *regb, Reg::A) // Fixme: argument type for cond } // JMP - [ - (Ok(Condition(Cond::Jump)), _), - _, _, _, _, - ] => { + [(Ok(Condition(Cond::Jump)), _), _, _, _, _] => { i += 1; adr += 16; // inst_mode_format(Op::Add, Reg::A, Reg::A, Reg::A) // fixme: replace with proper values format!("0111000000000000") } // label: - [ - (Ok(Identifier(id)), span), - (Ok(Directive(Dir::Label)), _), - _, _, _, - ] => { + [(Ok(Identifier(id)), span), (Ok(Directive(Dir::Label)), _), _, _, _] => { i += 2; let id_ref = id_collect.get(id); @@ -159,18 +137,17 @@ pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range) id_collect.insert(id.clone(), (adr, span.clone())); } else { let other_span = (*id_ref.unwrap()).1.clone(); - panic!("identifier already used there {}..{}", other_span.start, other_span.end); + panic!( + "identifier already used there {}..{}", + other_span.start, other_span.end + ); } String::new() } // DEFINE mask 0x1 - [ - (Ok(Directive(Dir::Define)), _), - (Ok(Identifier(id)), span), - (Ok(Value(val)), _), - _, _, - ] => { + [(Ok(Directive(Dir::Define)), _), (Ok(Identifier(id)), span), (Ok(Value(val)), _), _, _] => + { i += 3; let id_ref = id_collect.get(id); @@ -179,19 +156,19 @@ pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range) id_collect.insert(id.clone(), (*val, span.clone())); } else { let other_span = (*id_ref.unwrap()).1.clone(); - panic!("identifier already used there {}..{}", other_span.start, other_span.end); + panic!( + "identifier already used there {}..{}", + other_span.start, other_span.end + ); } String::new() } - [ - (Ok(Comment), _), - _, _, _, _ - ] => { + [(Ok(Comment), _), _, _, _, _] => { i += 1; String::new() } - _ => panic!("Unexpected Error") + _ => panic!("Unexpected Error"), }; if inst_word != "" { @@ -199,29 +176,27 @@ pub fn generate_bit_stream_v2(tokens: &mut Vec<(Result, Range) } } + if debug { + println!("{:.?}", id_collect); + } + fn handle_id(id: String, col: &mut HashMap)>) -> String { if id.chars().all(|c| c.is_alphabetic() || c == '_') { return if let Some(value) = col.get(&id) { data_mode_format(value.0) } else { "Error".to_string() - } + // todo return a proper error and where it happened + }; } id } - let bit_stream: Vec = bit_stream_with_id.into_iter().map(|s| handle_id(s, &mut id_collect)).collect(); + let bit_stream: Vec = bit_stream_with_id + .into_iter() + .map(|s| handle_id(s, &mut id_collect)) + .collect(); - println!( - "{}\n{}\n{}\n{}\n{}\n{}\n", - "15 bits value".red(), - "op/jump code".blue(), - "mode bit".green(), - "source A reg".yellow(), - "source B reg".purple(), - "dest reg".cyan() - ); - - bit_stream.join("\n") -} \ No newline at end of file + bit_stream.join(sep) +} From eb4165fc9075a4a4c9d4d15e7b6b5f93d3367fa1 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Wed, 19 Feb 2025 21:37:48 +0100 Subject: [PATCH 17/25] fix: integration test for lexer --- tests/real_test/realistic_test.asm | 8 +++----- tests/real_test/realistic_test.token_stream | 9 ++++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/real_test/realistic_test.asm b/tests/real_test/realistic_test.asm index ba9a5d1..c37b052 100644 --- a/tests/real_test/realistic_test.asm +++ b/tests/real_test/realistic_test.asm @@ -11,7 +11,7 @@ D = A A = io_adr D = D & *A A = wait -D ; JNE +D != A check: A = ob_detection_mask @@ -19,7 +19,7 @@ D = A A = io_adr D = D & *A A = move -D ; JEQ +D == *A turn: A = left @@ -35,6 +35,4 @@ move: A = move_mask D = A A = send -JMP -D ; this is a comment -D \ No newline at end of file +JMP \ No newline at end of file diff --git a/tests/real_test/realistic_test.token_stream b/tests/real_test/realistic_test.token_stream index 9cdaf67..b0c6d40 100644 --- a/tests/real_test/realistic_test.token_stream +++ b/tests/real_test/realistic_test.token_stream @@ -33,7 +33,8 @@ Ok(Register(A)) Ok(Assignement) Ok(Identifier("wait")) Ok(Register(D)) -Ok(Comment) +Ok(Condition(Neq)) +Ok(Register(A)) Ok(Identifier("check")) Ok(Directive(Label)) Ok(Register(A)) @@ -54,7 +55,8 @@ Ok(Register(A)) Ok(Assignement) Ok(Identifier("move")) Ok(Register(D)) -Ok(Comment) +Ok(Condition(Eq)) +Ok(Register(AStar)) Ok(Identifier("turn")) Ok(Directive(Label)) Ok(Register(A)) @@ -87,6 +89,3 @@ Ok(Register(A)) Ok(Assignement) Ok(Identifier("send")) Ok(Condition(Jump)) -Ok(Register(D)) -Ok(Comment) -Ok(Register(D)) From 2c780a60faa0d31dd76f6dcf67c7019d88827a7d Mon Sep 17 00:00:00 2001 From: benoitlx Date: Wed, 19 Feb 2025 21:51:53 +0100 Subject: [PATCH 18/25] build: fix missing .temp/ directory in remote --- .gitignore | 3 +-- tests/.temp/.gitignore | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 tests/.temp/.gitignore diff --git a/.gitignore b/.gitignore index 78f67b6..82587a5 100644 --- a/.gitignore +++ b/.gitignore @@ -24,5 +24,4 @@ Cargo.lock /target -.vscode/ -.temp/ \ No newline at end of file +.vscode/ \ No newline at end of file diff --git a/tests/.temp/.gitignore b/tests/.temp/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/tests/.temp/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file From df2c74c0d6d210f77bcd15db0bd7e1b38f50898a Mon Sep 17 00:00:00 2001 From: benoitlx Date: Wed, 19 Feb 2025 22:14:53 +0100 Subject: [PATCH 19/25] test: define parsing --- src/main.rs | 2 +- src/parser.rs | 32 ++++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/main.rs b/src/main.rs index 69a3a88..a5b3b39 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,7 +17,7 @@ fn main() { println!( "{}", - parser::generate_bit_stream_v2(&mut tokens, true, false, "\n") + parser::generate_bit_stream(&mut tokens, true, true, "\n").0 ); } } diff --git a/src/parser.rs b/src/parser.rs index 240eb5b..ce72e4f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -32,12 +32,12 @@ fn inst_mode_format(op_or_cond: Op, rega: Reg, regb: Reg, regc: Reg) -> String { ) } -pub fn generate_bit_stream_v2( +pub fn generate_bit_stream( tokens: &mut Vec<(Result, Range)>, colorize: bool, debug: bool, sep: &str, -) -> String { +) -> (String, HashMap)>) { colored::control::set_override(colorize); if colorize { println!( @@ -116,7 +116,7 @@ pub fn generate_bit_stream_v2( } // A == D [(Ok(Register(rega)), _), (Ok(Condition(_cond)), _), (Ok(Register(regb)), _), _, _] => { - i += 1; + i += 3; adr += 16; inst_mode_format(Op::Add, *rega, *regb, Reg::A) // Fixme: argument type for cond } @@ -198,5 +198,29 @@ pub fn generate_bit_stream_v2( .map(|s| handle_id(s, &mut id_collect)) .collect(); - bit_stream.join(sep) + (bit_stream.join(sep), id_collect) } + +#[cfg(test)] +mod tests { + use super::*; + use logos::Logos; + + #[test] + fn test_define() { + let src = "DEFINE foo 0\nDEFINE bar 1\nDEFINE titi 42\nDEFINE tata 73"; + let mut collection: HashMap)> = HashMap::new(); + collection.insert("foo".to_string(), (0, 7..10)); + collection.insert("bar".to_string(), (1, 20..23)); + collection.insert("titi".to_string(), (42, 33..37)); + collection.insert("tata".to_string(), (73, 48..52)); + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + assert_eq!(collection, generate_bit_stream(&mut tokens, false, false, "").1); + assert_eq!(collection, generate_bit_stream(&mut tokens, false, true, "").1); + assert_eq!(collection, generate_bit_stream(&mut tokens, true, false, "").1); + assert_eq!(collection, generate_bit_stream(&mut tokens, true, true, "").1); + } +} \ No newline at end of file From ac8c8b30c43952feb26d178d6b17b9dccf4dc0c2 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Fri, 21 Feb 2025 11:39:37 +0100 Subject: [PATCH 20/25] test: label parsing --- src/parser.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/parser.rs b/src/parser.rs index ce72e4f..4162551 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -223,4 +223,21 @@ mod tests { assert_eq!(collection, generate_bit_stream(&mut tokens, true, false, "").1); assert_eq!(collection, generate_bit_stream(&mut tokens, true, true, "").1); } + + #[test] + fn test_label() { + let src = "main:\nJMP\nlabel:\nJMP\nJMP\nJMP\nJMP\nJMP\nJMP\nJMP\nJMP\ntiti:"; + let mut collection: HashMap)> = HashMap::new(); + collection.insert("main".to_string(), (0, 0..4)); + collection.insert("label".to_string(), (16, 10..15)); + collection.insert("titi".to_string(), (144, 49..53)); + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + assert_eq!(collection, generate_bit_stream(&mut tokens, false, false, "").1); + assert_eq!(collection, generate_bit_stream(&mut tokens, false, true, "").1); + assert_eq!(collection, generate_bit_stream(&mut tokens, true, false, "").1); + assert_eq!(collection, generate_bit_stream(&mut tokens, true, true, "").1); + } } \ No newline at end of file From 7880a50d42f0ec1bee9711a5946660217286a312 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Fri, 21 Feb 2025 12:27:54 +0100 Subject: [PATCH 21/25] test: load value parsing and bit stream generation --- Cargo.toml | 6 ++++++ src/parser.rs | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 59a6bc9..bd3eb78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,9 @@ logos = { version = "0.15.0", features = ["debug"] } [dev-dependencies] rusty-hook = "^0.11.2" pretty_assertions = "1" + +[features] +# this effectively enable the feature `no-color` of colored when testing with +# `cargo test --feature dumb_terminal` +dumb_terminal = ["colored/no-color"] +default = ["dumb_terminal"] diff --git a/src/parser.rs b/src/parser.rs index 4162551..17e59f8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -240,4 +240,28 @@ mod tests { assert_eq!(collection, generate_bit_stream(&mut tokens, true, false, "").1); assert_eq!(collection, generate_bit_stream(&mut tokens, true, true, "").1); } + + #[test] + fn test_load_value() { + let src = "DEFINE mask 42\nA = 0\nA = 0x7fff\nA = mask"; + let expected = "1000000000000000\n1111111111111111\n1000000000101010"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + assert_eq!(expected, generate_bit_stream(&mut tokens, false, false, "\n").0); + assert_eq!(expected, generate_bit_stream(&mut tokens, false, true, "\n").0); + } + + #[test] + #[should_panic] + fn test_load_value_into_wrong_register() { + let src = "D = 0"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + generate_bit_stream(&mut tokens, false, false, ""); + } } \ No newline at end of file From f86277a33d2b0773397d52a0eb5808a179ec2ef8 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sat, 22 Feb 2025 11:49:28 +0100 Subject: [PATCH 22/25] test: registers transfer - conditions - jump --- src/parser.rs | 196 ++++++++++++++++++++++++++++++++++++++++++-------- src/spec.rs | 6 ++ 2 files changed, 174 insertions(+), 28 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 17e59f8..d966650 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -21,9 +21,27 @@ fn data_mode_format(val: u16) -> String { format!("{}{}", "1".green(), format!("{:015b}", val).red()) } -fn inst_mode_format(op_or_cond: Op, rega: Reg, regb: Reg, regc: Reg) -> String { +trait BitStream { + fn bit_stream(&self) -> String; +} + +enum OpOrCond { + Operation(Op), + Condition(Cond), +} + +impl BitStream for OpOrCond { + fn bit_stream(&self) -> String { + match self { + OpOrCond::Operation(op) => op.bit_stream(), + OpOrCond::Condition(cond) => cond.bit_stream(), + } + } +} + +fn inst_mode_format(op_or_cond: OpOrCond, rega: Reg, regb: Reg, regc: Reg) -> String { format!( - "{}{}{}{}{}000", + "{}{}000{}{}{}", "0".green().bold(), op_or_cond.bit_stream().blue(), rega.bit_stream().yellow(), @@ -76,12 +94,17 @@ pub fn generate_bit_stream( // A <- D & *A [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Register(rega)), _), (Ok(Operation(op)), _), (Ok(Register(regb)), _)] => { - // FIXME: protection rule between the regs + if *regc == Reg::A && (*rega == Reg::AStar || *regb == Reg::AStar) { + panic!("Cannot change A value when reading *A"); + } + if *regc == Reg::V && (*rega == Reg::VStar || *regb == Reg::VStar) { + panic!("Cannot change V value when reading *V"); + } i += 5; adr += 16; - inst_mode_format(*op, *rega, *regb, *regc) + inst_mode_format(OpOrCond::Operation(*op), *rega, *regb, *regc) } - // A <- mask + // A <- mask, tested [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Identifier(id)), _), _, _] => { if *regc != Reg::A { panic!("Can't push direct value into an other register than A") @@ -91,7 +114,7 @@ pub fn generate_bit_stream( adr += 16; id.clone() } - // A <- 0x7fff + // A <- 0x7fff, tested [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Value(val)), _), _, _] => { if *regc != Reg::A { panic!("Can't push direct value into an other register than A") @@ -101,33 +124,49 @@ pub fn generate_bit_stream( adr += 16; data_mode_format(*val) } - // A <- D + // A <- D, tested [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Register(rega)), _), _, _] => { + if *regc == Reg::A && *rega == Reg::AStar { + panic!("Cannot change A value when reading *A"); + } + if *regc == Reg::V && *rega == Reg::VStar { + panic!("Cannot change V value when reading *V"); + } i += 3; adr += 16; - inst_mode_format(Op::Or, *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero + inst_mode_format(OpOrCond::Operation(Op::Or), *rega, Reg::Zero, *regc) } // A <- ~D [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Operation(op)), _), (Ok(Register(rega)), _), _] => { + if *regc == Reg::A && *rega == Reg::AStar { + panic!("Cannot change A value when reading *A"); + } + if *regc == Reg::V && *rega == Reg::VStar { + panic!("Cannot change V value when reading *V"); + } i += 4; adr += 16; - inst_mode_format(*op, *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero + inst_mode_format(OpOrCond::Operation(*op), *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero } - // A == D - [(Ok(Register(rega)), _), (Ok(Condition(_cond)), _), (Ok(Register(regb)), _), _, _] => { - i += 3; + // D>=, tested + /* + A <- main + D>= (<=> D >= 0 ?) + JMP + */ + [(Ok(Register(rega)), _), (Ok(Condition(cond)), _), _, _, _] => { + i += 2; adr += 16; - inst_mode_format(Op::Add, *rega, *regb, Reg::A) // Fixme: argument type for cond + inst_mode_format(OpOrCond::Condition(*cond), *rega, Reg::Zero, Reg::Zero) // Fixme: argument type for cond } - // JMP + // JMP, tested [(Ok(Condition(Cond::Jump)), _), _, _, _, _] => { i += 1; adr += 16; - // inst_mode_format(Op::Add, Reg::A, Reg::A, Reg::A) // fixme: replace with proper values format!("0111000000000000") } - // label: + // label:, tested [(Ok(Identifier(id)), span), (Ok(Directive(Dir::Label)), _), _, _, _] => { i += 2; @@ -145,7 +184,7 @@ pub fn generate_bit_stream( String::new() } - // DEFINE mask 0x1 + // DEFINE mask 0x1, tested [(Ok(Directive(Dir::Define)), _), (Ok(Identifier(id)), span), (Ok(Value(val)), _), _, _] => { i += 3; @@ -218,10 +257,22 @@ mod tests { let lex = Token::lexer(src); let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); - assert_eq!(collection, generate_bit_stream(&mut tokens, false, false, "").1); - assert_eq!(collection, generate_bit_stream(&mut tokens, false, true, "").1); - assert_eq!(collection, generate_bit_stream(&mut tokens, true, false, "").1); - assert_eq!(collection, generate_bit_stream(&mut tokens, true, true, "").1); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, false, false, "").1 + ); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, false, true, "").1 + ); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, true, false, "").1 + ); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, true, true, "").1 + ); } #[test] @@ -235,10 +286,22 @@ mod tests { let lex = Token::lexer(src); let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); - assert_eq!(collection, generate_bit_stream(&mut tokens, false, false, "").1); - assert_eq!(collection, generate_bit_stream(&mut tokens, false, true, "").1); - assert_eq!(collection, generate_bit_stream(&mut tokens, true, false, "").1); - assert_eq!(collection, generate_bit_stream(&mut tokens, true, true, "").1); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, false, false, "").1 + ); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, false, true, "").1 + ); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, true, false, "").1 + ); + assert_eq!( + collection, + generate_bit_stream(&mut tokens, true, true, "").1 + ); } #[test] @@ -250,8 +313,14 @@ mod tests { let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); - assert_eq!(expected, generate_bit_stream(&mut tokens, false, false, "\n").0); - assert_eq!(expected, generate_bit_stream(&mut tokens, false, true, "\n").0); + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, false, "\n").0 + ); + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, true, "\n").0 + ); } #[test] @@ -264,4 +333,75 @@ mod tests { let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); generate_bit_stream(&mut tokens, false, false, ""); } -} \ No newline at end of file + + #[test] + fn test_register_transfer() { + let src = "D = A\nD = D\nD = *A\nA = D\n*A = D\n*A = A"; + + let expected = "0011000000110100\n0011000100110100\n0011000001110100\n0011000100110000\n0011000100110001\n0011000000110001"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, false, "\n").0 + ); + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, true, "\n").0 + ); + } + + #[test] + #[should_panic] + fn test_wrong_register_transfer() { + let src = "A = *A\nV = *V"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + generate_bit_stream(&mut tokens, false, false, ""); + } + + #[test] + fn test_condition() { + let src = "D==\nD>=\n*A>="; + + let expected = "0010000100110110\n0011000100110110\n0011000001110110"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, false, "\n").0 + ); + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, true, "\n").0 + ); + } + + #[test] + fn test_jump() { + let src = "JMP"; + + let expected = "0111000000000000"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, false, "\n").0 + ); + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, true, "\n").0 + ); + } +} diff --git a/src/spec.rs b/src/spec.rs index 6c5b253..dd8cabf 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -48,6 +48,8 @@ pub mod arch_v1 { AStar, VStar, D, + Zero, + One } impl HandleToken for Reg { @@ -58,6 +60,8 @@ pub mod arch_v1 { "*A" => Some(Reg::AStar), "*V" => Some(Reg::VStar), "D" => Some(Reg::D), + "Z" => Some(Reg::Zero), + "O" => Some(Reg::One), _ => None, // todo: return a beautiful error } } @@ -69,6 +73,8 @@ pub mod arch_v1 { Reg::V => "010", Reg::VStar => "011", Reg::D => "100", + Reg::Zero => "110", + Reg::One => "111", } .to_string() } // todo: colorize the string depending on the register From 4e6bc6090aa934bc060acef75f9dff96fc697920 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sat, 22 Feb 2025 12:02:01 +0100 Subject: [PATCH 23/25] test: single operand operation --- src/parser.rs | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index d966650..aa6ae69 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -136,9 +136,12 @@ pub fn generate_bit_stream( adr += 16; inst_mode_format(OpOrCond::Operation(Op::Or), *rega, Reg::Zero, *regc) } - // A <- ~D + // A <- ~D, tested [(Ok(Register(regc)), _), (Ok(Assignement), _), (Ok(Operation(op)), _), (Ok(Register(rega)), _), _] => { + if *op != Op::Not { + panic!("Expected a not operation"); + } if *regc == Reg::A && *rega == Reg::AStar { panic!("Cannot change A value when reading *A"); } @@ -404,4 +407,36 @@ mod tests { generate_bit_stream(&mut tokens, false, true, "\n").0 ); } + + #[test] + fn test_not() { + let src = "A = ~D\nD = ~V"; + + let expected = "0101000100000000\n0101000010000100"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, false, "\n").0 + ); + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, true, "\n").0 + ); + } + + #[test] + #[should_panic] + fn test_non_single_operand_operation() { + let src = "A = +D"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + generate_bit_stream(&mut tokens, false, false, ""); + } } From 865ea2d16b9e61a7e9f497822196c4d16ad8606c Mon Sep 17 00:00:00 2001 From: benoitlx Date: Sat, 22 Feb 2025 12:08:16 +0100 Subject: [PATCH 24/25] test: updating syntax for condition in realistic_test --- tests/real_test/realistic_test.asm | 6 ++++-- tests/real_test/realistic_test.token_stream | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/real_test/realistic_test.asm b/tests/real_test/realistic_test.asm index c37b052..31ad529 100644 --- a/tests/real_test/realistic_test.asm +++ b/tests/real_test/realistic_test.asm @@ -11,7 +11,8 @@ D = A A = io_adr D = D & *A A = wait -D != A +D != +JMP check: A = ob_detection_mask @@ -19,7 +20,8 @@ D = A A = io_adr D = D & *A A = move -D == *A +D == +JMP turn: A = left diff --git a/tests/real_test/realistic_test.token_stream b/tests/real_test/realistic_test.token_stream index b0c6d40..00e8d32 100644 --- a/tests/real_test/realistic_test.token_stream +++ b/tests/real_test/realistic_test.token_stream @@ -34,7 +34,7 @@ Ok(Assignement) Ok(Identifier("wait")) Ok(Register(D)) Ok(Condition(Neq)) -Ok(Register(A)) +Ok(Condition(Jump)) Ok(Identifier("check")) Ok(Directive(Label)) Ok(Register(A)) @@ -56,7 +56,7 @@ Ok(Assignement) Ok(Identifier("move")) Ok(Register(D)) Ok(Condition(Eq)) -Ok(Register(AStar)) +Ok(Condition(Jump)) Ok(Identifier("turn")) Ok(Directive(Label)) Ok(Register(A)) From 73916a8cbf7a3fe7924e828c5e4bbde628471838 Mon Sep 17 00:00:00 2001 From: benoitlx Date: Mon, 24 Feb 2025 11:12:19 +0100 Subject: [PATCH 25/25] test: double operand instructions --- src/parser.rs | 55 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index aa6ae69..c88835a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -6,10 +6,6 @@ use std::ops::Range; /* TODO: -- should merge Op and Cond since they have the same bit placement in the instruction -- Add default implementation for Reg Cond and Op -- parser les conditions -- tests - pour chaque panic donner les bonnes infos l'endroit du token fautif ... Futur: @@ -100,6 +96,9 @@ pub fn generate_bit_stream( if *regc == Reg::V && (*rega == Reg::VStar || *regb == Reg::VStar) { panic!("Cannot change V value when reading *V"); } + if *op == Op::Not { + panic!("Too many operand for ~"); + } i += 5; adr += 16; inst_mode_format(OpOrCond::Operation(*op), *rega, *regb, *regc) @@ -150,7 +149,7 @@ pub fn generate_bit_stream( } i += 4; adr += 16; - inst_mode_format(OpOrCond::Operation(*op), *rega, Reg::A, *regc) // Fixme: add Reg::One and Reg::Zero + inst_mode_format(OpOrCond::Operation(*op), *rega, Reg::A, *regc) } // D>=, tested /* @@ -161,7 +160,7 @@ pub fn generate_bit_stream( [(Ok(Register(rega)), _), (Ok(Condition(cond)), _), _, _, _] => { i += 2; adr += 16; - inst_mode_format(OpOrCond::Condition(*cond), *rega, Reg::Zero, Reg::Zero) // Fixme: argument type for cond + inst_mode_format(OpOrCond::Condition(*cond), *rega, Reg::Zero, Reg::Zero) } // JMP, tested [(Ok(Condition(Cond::Jump)), _), _, _, _, _] => { @@ -439,4 +438,48 @@ mod tests { generate_bit_stream(&mut tokens, false, false, ""); } + + #[test] + fn test_double_operand_operation() { + let src = "A = A + D\nA = A & D\nD = *A | A"; + + let expected = "0000000000100000\n0010000000100000\n0011000001000100"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, false, "\n").0 + ); + assert_eq!( + expected, + generate_bit_stream(&mut tokens, false, true, "\n").0 + ); + } + + #[test] + #[should_panic] + fn test_wrong_double_operand_operation() { + let src = "A = A ~ D"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + generate_bit_stream(&mut tokens, false, false, ""); + } + + #[test] + #[should_panic] + fn test_incompatible_registers() { + let src = "A = A + *A\nV = V + *V\nA = *A & D"; + + let lex = Token::lexer(src); + + let mut tokens: Vec<(Result, std::ops::Range)> = lex.spanned().collect(); + + generate_bit_stream(&mut tokens, false, false, ""); + } }