diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..ad52559 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "makefile.configureOnOpen": true +} diff --git a/Lexer/LexerClasses - Part 1.cpp b/Lexer/LexerClasses - Part 1.cpp deleted file mode 100644 index d7c1ae9..0000000 --- a/Lexer/LexerClasses - Part 1.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include - -enum class TokenType { - DEF, - RETURN, - IF, - ELSE, - PRINT, - FOR, - WHILE, - - IDENTIFIER, - NUMBER, - STRING, - - PLUS, // + - MINUS, // - - STAR, // * - SLASH, // / - ASSIGN, // = - MODULO, // % - GREATERTHAN, // > - LESSERTHAN, // < - - LPAREN, // ( - RPAREN, // ) - - NEWLINE, - EOF_TOKEN, - - COMMA, // , - COLON, // : - INDENT, - DEDENT, - COMMENT // # -}; - - -class Token{ - public: - // Main part of a token - TokenType type; - std::string value; - - // Used to tell user about error in case error is found - int line; - int column; - - // This constructor will allow us to easily make the tokens while coding - Token(TokenType type, std::string val, int l, int c){ - this->type = type; this->value = val; - this->line = l; this->column = c; - } -}; - - -class Lexer{ - public: - Lexer(std::string input_string){ - this->source_code = preprocess_indents(input_string); - } - std::vector scan_Tokens(); - - private: - std::string source_code; - static inline const std::unordered_map keywords = { - {"def", TokenType::DEF}, - {"return", TokenType::RETURN}, - {"if", TokenType::IF}, - {"else", TokenType::ELSE}, - {"print", TokenType::PRINT}, - {"=", TokenType::ASSIGN}, - {"+", TokenType::PLUS}, - {"-", TokenType::MINUS}, - {"*", TokenType::STAR}, - {"/", TokenType::SLASH}, - {"(", TokenType::LPAREN}, - {")", TokenType::RPAREN}, - {":", TokenType::COLON}, - {",", TokenType::COMMA}, - {"\n", TokenType::NEWLINE}, - {"\t", TokenType::INDENT}, - {"\r", TokenType::DEDENT} - }; - std::vector tokens; - int start = 0; - int current_index = 0; - int line = 1; - - - // FUNCTIONS NEEDED FOR LEXER TO WORK - - bool isAtEnd(); // Checks for last character - char advance(); // Return current char and move forward - char peek(); // Sometimes, we don't actually want to read a character and may only want to peek at it - char peekNext(); // Peak at the next character - - void addToken(TokenType type); - - // Specific scanners for complex types - void scanString(); - void scanNumber(); - void scanIdentifier(); - std::string preprocess_indents(std::string raw); -}; - - -std::string Lexer::preprocess_indents(std::string raw) { - std::string clean_code = ""; - bool atLineStart = true; - - for (int i = 0; i < raw.length(); i++) { - if (atLineStart) { - if (raw[i] == ' ') { - if (i + 3 < raw.length() && raw.substr(i, 4) == " ") { - clean_code += '\t'; - i += 3; - continue; - } - } - else if (raw[i] == '\t') { - clean_code += '\t'; - continue; - } - else if (raw[i] == '\n') { - clean_code += '\n'; - atLineStart = true; - continue; - } - else { - atLineStart = false; - } - } - clean_code += raw[i]; - - if (raw[i] == '\n') { - atLineStart = true; - } - } - - return clean_code; -} \ No newline at end of file diff --git a/src/common/token/token.cpp b/src/common/token/token.cpp new file mode 100644 index 0000000..cc33e7e --- /dev/null +++ b/src/common/token/token.cpp @@ -0,0 +1,5 @@ +#include +#include + +Token::Token(TokenType type, std::string val, int l, int c) : type(type), value(val), line(l), column(c){} + diff --git a/src/common/token/token.hpp b/src/common/token/token.hpp index 7489d7a..8f2196d 100644 --- a/src/common/token/token.hpp +++ b/src/common/token/token.hpp @@ -33,7 +33,11 @@ enum class TokenType { COLON, // : INDENT, DEDENT, - COMMENT // # + COMMENT, // # + PRINT, + AMPERSAND, // & + PIPE, // | + SPACE, }; diff --git a/src/lexer/keywords.hpp b/src/lexer/keywords.hpp index e69de29..838abd5 100644 --- a/src/lexer/keywords.hpp +++ b/src/lexer/keywords.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +#include "token.hpp" + +inline const std::unordered_map keywords = { + {"def", TokenType::DEF}, + {"return", TokenType::RETURN}, + {"if", TokenType::IF}, + {"else", TokenType::ELSE}, + {"print", TokenType::PRINT}, + {"=", TokenType::ASSIGN}, + {"+", TokenType::PLUS}, + {"-", TokenType::MINUS}, + {"*", TokenType::STAR}, + {"/", TokenType::SLASH}, + {"(", TokenType::LPAREN}, + {")", TokenType::RPAREN}, + {":", TokenType::COLON}, + {",", TokenType::COMMA}, + {"\n", TokenType::NEWLINE}, + {"\t", TokenType::INDENT}, + {" ", TokenType::SPACE} + }; diff --git a/src/lexer/lexer.cpp b/src/lexer/lexer.cpp index e69de29..54dcd69 100644 --- a/src/lexer/lexer.cpp +++ b/src/lexer/lexer.cpp @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include + +Lexer::Lexer(std::string input_string) : source_code(input_string){} +bool Lexer::isAtEnd(){ + if (current_index >= source_code.size()){ + return true; + } + return false; +} +char Lexer::peek(){ + if (current_index >= source_code.size()){ + return '\0'; + } + return source_code[current_index]; +} + +char Lexer::peekNext(){ + if (current_index+1 >= source_code.size()){ + return '\0'; + } + return source_code[current_index+1]; +} + +char Lexer::advance(){ + if (current_index >= source_code.size()){ + return '\0'; + } + char c = source_code[current_index++]; + if (c == '\n'){ + line++; + column=1; + } + else {column++;} + return c; +} + +void Lexer::scanNumber(std::string curr){ + int start = column-1; + while (std::isdigit(peek())){ + curr += Lexer::advance(); + } + if (std::isalnum(peek())){ + // throw an error + } + Token token(TokenType::NUMBER, curr, line, start); + tokens.push_back(token); +} + +void Lexer::scanString(std::string quote){ + int start = column-1; + while (!isAtEnd() && peek()!=quote[0] && peek()!='\n'){ + quote += advance(); + } + if (isAtEnd()){ + // throw an error + } + else if (peek()=='\n'){ + // throw an error + } + else{ + quote += advance(); + } + Token token(TokenType::STRING, quote, line, start); + tokens.push_back(token); +} + +void Lexer::scanIdentifier(std::string curr){ + int start = column-1; + while (std::isalnum(peek()) || peek()=='_'){ + curr += advance(); + } + Token token(TokenType::IDENTIFIER, curr, line, start); + if (keywords.count(curr)){ + token.type = keywords.at(curr); + } + tokens.push_back(token); +} + + +std::vector Lexer::scan_Tokens(){ + while (true){ + if (isAtEnd()){ + Token token(TokenType::EOF_TOKEN, "", line, column); + tokens.push_back(token); + break; + } + std::string curr = ""; + curr += advance(); + if (keywords.count(curr)){ + TokenType type = keywords.at(curr); + if (type == TokenType::NEWLINE){ + column = 0; + line++; + } + else if (type == TokenType::INDENT){ + column += 3; + } + else if (type == TokenType::DEDENT){ + column -= 5; + } + Token token(type, curr, line, column); + tokens.push_back(token); + } + else{ + if (std::isdigit(curr[0])){ + scanNumber(curr); + } + else if (std::isalpha(curr[0])){ + scanIdentifier(curr); + } + } +} +} diff --git a/src/lexer/lexer.hpp b/src/lexer/lexer.hpp index cb547a2..6623693 100644 --- a/src/lexer/lexer.hpp +++ b/src/lexer/lexer.hpp @@ -3,51 +3,37 @@ #include #include #include +#include //to check if char is alphanumeric #include "token.hpp" class Lexer { public: Lexer(std::string input_string); - std::vector scan_Tokens(void); + std::vector scan_Tokens(); + private: std::string source_code; - static inline const std::unordered_map keywords = { - {"def", TokenType::DEF}, - {"return", TokenType::RETURN}, - {"if", TokenType::IF}, - {"else", TokenType::ELSE}, - {"print", TokenType::PRINT}, - {"=", TokenType::ASSIGN}, - {"+", TokenType::PLUS}, - {"-", TokenType::MINUS}, - {"*", TokenType::STAR}, - {"/", TokenType::SLASH}, - {"(", TokenType::LPAREN}, - {")", TokenType::RPAREN}, - {":", TokenType::COLON}, - {",", TokenType::COMMA}, - {"\n", TokenType::NEWLINE}, - {"\t", TokenType::INDENT} - }; std::vector tokens; int start = 0; int current_index = 0; int line = 1; + int column = 1; // FUNCTIONS NEEDED FOR LEXER TO WORK - bool isAtEnd(void); // Checks for last character - char advance(void); // Return current char and move forward - char peek(void); // Sometimes, we don't actually want to read a character and may only want to peek at it - char peekNext(void); // Peak at the next character + bool isAtEnd(); // Checks for last character + char advance(); // Return current char and move forward + char peek(); // Sometimes, we don't actually want to read a character and may only want to peek at it + char peekNext(); // Peak at the next character void addToken(TokenType type); // Specific scanners for complex types - void scanString(void); - void scanNumber(void); - void scanIdentifier(void); + void scanString(std::string first); + void scanNumber(std::string first); + void scanIdentifier(std::string first); + std::string preprocess_indents(std::string raw); }; diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/parser/parser.hpp b/src/parser/parser.hpp new file mode 100644 index 0000000..ca5e5bd --- /dev/null +++ b/src/parser/parser.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include "token.hpp"