diff --git a/.gitignore b/.gitignore index 80a6dbe..b4c3726 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ #build directory build/ +# Config files +.vscode + # Prerequisites *.d diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..ad52559 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "makefile.configureOnOpen": true +} diff --git a/Lexer/LexerClasses - Part 1.cpp b/Lexer/LexerClasses - Part 1.cpp deleted file mode 100644 index d7c1ae9..0000000 --- a/Lexer/LexerClasses - Part 1.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include - -enum class TokenType { - DEF, - RETURN, - IF, - ELSE, - PRINT, - FOR, - WHILE, - - IDENTIFIER, - NUMBER, - STRING, - - PLUS, // + - MINUS, // - - STAR, // * - SLASH, // / - ASSIGN, // = - MODULO, // % - GREATERTHAN, // > - LESSERTHAN, // < - - LPAREN, // ( - RPAREN, // ) - - NEWLINE, - EOF_TOKEN, - - COMMA, // , - COLON, // : - INDENT, - DEDENT, - COMMENT // # -}; - - -class Token{ - public: - // Main part of a token - TokenType type; - std::string value; - - // Used to tell user about error in case error is found - int line; - int column; - - // This constructor will allow us to easily make the tokens while coding - Token(TokenType type, std::string val, int l, int c){ - this->type = type; this->value = val; - this->line = l; this->column = c; - } -}; - - -class Lexer{ - public: - Lexer(std::string input_string){ - this->source_code = preprocess_indents(input_string); - } - std::vector scan_Tokens(); - - private: - std::string source_code; - static inline const std::unordered_map keywords = { - {"def", TokenType::DEF}, - {"return", TokenType::RETURN}, - {"if", TokenType::IF}, - {"else", TokenType::ELSE}, - {"print", TokenType::PRINT}, - {"=", TokenType::ASSIGN}, - {"+", TokenType::PLUS}, - {"-", TokenType::MINUS}, - {"*", TokenType::STAR}, - {"/", TokenType::SLASH}, - {"(", TokenType::LPAREN}, - {")", TokenType::RPAREN}, - {":", TokenType::COLON}, - {",", TokenType::COMMA}, - {"\n", TokenType::NEWLINE}, - {"\t", TokenType::INDENT}, - {"\r", TokenType::DEDENT} - }; - std::vector tokens; - int start = 0; - int current_index = 0; - int line = 1; - - - // FUNCTIONS NEEDED FOR LEXER TO WORK - - bool isAtEnd(); // Checks for last character - char advance(); // Return current char and move forward - char peek(); // Sometimes, we don't actually want to read a character and may only want to peek at it - char peekNext(); // Peak at the next character - - void addToken(TokenType type); - - // Specific scanners for complex types - void scanString(); - void scanNumber(); - void scanIdentifier(); - std::string preprocess_indents(std::string raw); -}; - - -std::string Lexer::preprocess_indents(std::string raw) { - std::string clean_code = ""; - bool atLineStart = true; - - for (int i = 0; i < raw.length(); i++) { - if (atLineStart) { - if (raw[i] == ' ') { - if (i + 3 < raw.length() && raw.substr(i, 4) == " ") { - clean_code += '\t'; - i += 3; - continue; - } - } - else if (raw[i] == '\t') { - clean_code += '\t'; - continue; - } - else if (raw[i] == '\n') { - clean_code += '\n'; - atLineStart = true; - continue; - } - else { - atLineStart = false; - } - } - clean_code += raw[i]; - - if (raw[i] == '\n') { - atLineStart = true; - } - } - - return clean_code; -} \ No newline at end of file diff --git a/Makefile b/Makefile index 8c8b5f8..7123bd2 100644 --- a/Makefile +++ b/Makefile @@ -33,13 +33,14 @@ $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp #for change in header files -include $(DEPS) -.PHONY: all clean run +.PHONY: all clean run format check-format run: all ./$(BUILD_DIR)/$(TARGET_EXEC) clean: rm -rf $(BUILD_DIR) + format: find $(SRC_DIR)/ -name '*.cpp' -o -name '*.h' | xargs clang-format -i diff --git a/src/common/ast/astexpr.cpp b/src/common/ast/astexpr.cpp new file mode 100644 index 0000000..996bafa --- /dev/null +++ b/src/common/ast/astexpr.cpp @@ -0,0 +1,22 @@ +#include "astexpr.hpp" + +ASTExprNode::ASTExprNode(ASTExprNodeType type) : type(type) {} + +BinaryOperatorNode::BinaryOperatorNode(OperatorType op, std::unique_ptr lhs, + std::unique_ptr rhs) + : ASTExprNode(ASTExprNodeType::BINARY), op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} + +UnaryOperatorNode::UnaryOperatorNode(OperatorType op, std::unique_ptr rhs) + : ASTExprNode(ASTExprNodeType::UNARY), op(op), rhs(std::move(rhs)) {} + +StringNode::StringNode(std::string value) + : ASTExprNode(ASTExprNodeType::STRING), value(std::move(value)) {} + +NumberNode::NumberNode(double value) : ASTExprNode(ASTExprNodeType::NUMBER), value(value) {} + +BooleanNode::BooleanNode(bool value) : ASTExprNode(ASTExprNodeType::BOOLEAN), value(value) {} + +NoneNode::NoneNode() : ASTExprNode(ASTExprNodeType::NONE) {} + +ReferenceNode::ReferenceNode(std::string name) + : ASTExprNode(ASTExprNodeType::REFERENCE), name(std::move(name)) {} diff --git a/src/common/ast/astexpr.hpp b/src/common/ast/astexpr.hpp new file mode 100644 index 0000000..83cbb79 --- /dev/null +++ b/src/common/ast/astexpr.hpp @@ -0,0 +1,104 @@ +#pragma once + +#include +#include + +enum class ASTExprNodeType { + BINARY, //binary operators + UNARY, //unary operators + STRING, //string literals + NUMBER, //number literals + BOOLEAN, //boolean literals + NONE, //None + REFERENCE //variable(identifier) references +}; + +enum class OperatorType { + //logical + OR, // or + AND, // and + NOT, // not + + //comparisons + GREATERTHAN, // > + GREATEREQUAL, // >= + LESSEQUAL, // <= + LESSERTHAN, // < + EQEQUAL, // == + NOTEQUAL, // != + + //additive/unary + PLUS, // + + MINUS, // - + + //multiplicative + STAR, // * + SLASH, // / + DOUBLESLASH, // // + MODULO, // % + + //power + POWER, // ** +}; + +//abstract ASTExprNode class +class ASTExprNode { +public: + ASTExprNode(ASTExprNodeType type); + virtual ~ASTNode() = default; + + //type + ASTExprNodeType type; +}; + +class BinaryOperatorNode : public ASTExprNode { +public: + BinaryOperatorNode(OperatorType op, std::unique_ptr lhs, std::unique_ptr rhs); + + //operator type + OperatorType op; + std::unique_ptr lhs; + std::unique_ptr rhs; +}; + +class UnaryOperatorNode : public ASTExprNode { +public: + UnaryOperatorNode(OperatorType op, std::unique_ptr rhs); + + //operator type + OperatorType op; + std::unique_ptr rhs; +}; + +class StringNode : public ASTExprNode { +public: + StringNode(std::string value); + + std::string value; +}; + +class NumberNode : public ASTExprNode { +public: + NumberNode(double value); + + double value; +}; + +class BooleanNode : public ASTExprNode { +public: + BooleanNode(bool value); + + bool value; +} + +class NoneNode : public ASTExprNode { +public: + NoneNode(); +} + +class ReferenceNode : public ASTExprNode { +public: + ReferenceNode(std::string name); + + std::string name; +}; diff --git a/src/common/token/token.cpp b/src/common/token/token.cpp new file mode 100644 index 0000000..59c1123 --- /dev/null +++ b/src/common/token/token.cpp @@ -0,0 +1,6 @@ +#include "token.hpp" + +#include + +Token::Token(TokenType type, std::string val, int l, int c) + : type(type), value(val), line(l), column(c) {} diff --git a/src/common/token/token.hpp b/src/common/token/token.hpp index 7489d7a..8f2196d 100644 --- a/src/common/token/token.hpp +++ b/src/common/token/token.hpp @@ -33,7 +33,11 @@ enum class TokenType { COLON, // : INDENT, DEDENT, - COMMENT // # + COMMENT, // # + PRINT, + AMPERSAND, // & + PIPE, // | + SPACE, }; diff --git a/src/lexer/keywords.hpp b/src/lexer/keywords.hpp index e69de29..838abd5 100644 --- a/src/lexer/keywords.hpp +++ b/src/lexer/keywords.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +#include "token.hpp" + +inline const std::unordered_map keywords = { + {"def", TokenType::DEF}, + {"return", TokenType::RETURN}, + {"if", TokenType::IF}, + {"else", TokenType::ELSE}, + {"print", TokenType::PRINT}, + {"=", TokenType::ASSIGN}, + {"+", TokenType::PLUS}, + {"-", TokenType::MINUS}, + {"*", TokenType::STAR}, + {"/", TokenType::SLASH}, + {"(", TokenType::LPAREN}, + {")", TokenType::RPAREN}, + {":", TokenType::COLON}, + {",", TokenType::COMMA}, + {"\n", TokenType::NEWLINE}, + {"\t", TokenType::INDENT}, + {" ", TokenType::SPACE} + }; diff --git a/src/lexer/lexer.cpp b/src/lexer/lexer.cpp index e69de29..bcc8a7a 100644 --- a/src/lexer/lexer.cpp +++ b/src/lexer/lexer.cpp @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include + +Lexer::Lexer(std::string input_string) : source_code(input_string) {} + +bool Lexer::isAtEnd() { + if (current_index >= source_code.size()) { + return true; + } + return false; +} +char Lexer::peek() { + if (current_index >= source_code.size()) { + return '\0'; + } + return source_code[current_index]; +} + +char Lexer::peekNext() { + if (current_index + 1 >= source_code.size()) { + return '\0'; + } + return source_code[current_index + 1]; +} + +char Lexer::advance() { + if (current_index >= source_code.size()) { + return '\0'; + } + char c = source_code[current_index++]; + if (c == '\n') { + line++; + column = 1; + } else { + column++; + } + return c; +} + +void Lexer::scanNumber(std::string curr) { + int start = column - 1; + while (std::isdigit(peek())) { + curr += Lexer::advance(); + } + if (std::isalnum(peek())) { + // throw an error + } + Token token(TokenType::NUMBER, curr, line, start); + tokens.push_back(token); +} + +void Lexer::scanString(std::string quote) { + int start = column - 1; + while (!isAtEnd() && peek() != quote[0] && peek() != '\n') { + quote += advance(); + } + if (isAtEnd()) { + // throw an error + } else if (peek() == '\n') { + // throw an error + } else { + quote += advance(); + } + Token token(TokenType::STRING, quote, line, start); + tokens.push_back(token); +} + +void Lexer::scanIdentifier(std::string curr) { + int start = column - 1; + while (std::isalnum(peek()) || peek() == '_') { + curr += advance(); + } + Token token(TokenType::IDENTIFIER, curr, line, start); + if (keywords.count(curr)) { + token.type = keywords.at(curr); + } + tokens.push_back(token); +} + +std::vector Lexer::scan_Tokens() { + while (true) { + if (isAtEnd()) { + Token token(TokenType::EOF_TOKEN, "", line, column); + tokens.push_back(token); + break; + } + std::string curr = ""; + curr += advance(); + if (keywords.count(curr)) { + TokenType type = keywords.at(curr); + if (type == TokenType::NEWLINE) { + column = 0; + line++; + } else if (type == TokenType::INDENT) { + column += 3; + } else if (type == TokenType::DEDENT) { + column -= 5; + } + Token token(type, curr, line, column); + tokens.push_back(token); + } else { + if (std::isdigit(curr[0])) { + scanNumber(curr); + } else if (std::isalpha(curr[0])) { + scanIdentifier(curr); + } + } + } +} diff --git a/src/lexer/lexer.hpp b/src/lexer/lexer.hpp index cb547a2..6623693 100644 --- a/src/lexer/lexer.hpp +++ b/src/lexer/lexer.hpp @@ -3,51 +3,37 @@ #include #include #include +#include //to check if char is alphanumeric #include "token.hpp" class Lexer { public: Lexer(std::string input_string); - std::vector scan_Tokens(void); + std::vector scan_Tokens(); + private: std::string source_code; - static inline const std::unordered_map keywords = { - {"def", TokenType::DEF}, - {"return", TokenType::RETURN}, - {"if", TokenType::IF}, - {"else", TokenType::ELSE}, - {"print", TokenType::PRINT}, - {"=", TokenType::ASSIGN}, - {"+", TokenType::PLUS}, - {"-", TokenType::MINUS}, - {"*", TokenType::STAR}, - {"/", TokenType::SLASH}, - {"(", TokenType::LPAREN}, - {")", TokenType::RPAREN}, - {":", TokenType::COLON}, - {",", TokenType::COMMA}, - {"\n", TokenType::NEWLINE}, - {"\t", TokenType::INDENT} - }; std::vector tokens; int start = 0; int current_index = 0; int line = 1; + int column = 1; // FUNCTIONS NEEDED FOR LEXER TO WORK - bool isAtEnd(void); // Checks for last character - char advance(void); // Return current char and move forward - char peek(void); // Sometimes, we don't actually want to read a character and may only want to peek at it - char peekNext(void); // Peak at the next character + bool isAtEnd(); // Checks for last character + char advance(); // Return current char and move forward + char peek(); // Sometimes, we don't actually want to read a character and may only want to peek at it + char peekNext(); // Peak at the next character void addToken(TokenType type); // Specific scanners for complex types - void scanString(void); - void scanNumber(void); - void scanIdentifier(void); + void scanString(std::string first); + void scanNumber(std::string first); + void scanIdentifier(std::string first); + std::string preprocess_indents(std::string raw); }; diff --git a/src/parser/exprparser.cpp b/src/parser/exprparser.cpp new file mode 100644 index 0000000..4a442cd --- /dev/null +++ b/src/parser/exprparser.cpp @@ -0,0 +1,206 @@ +#include "exprparser.hpp" + +#include + +ExprParser::ExprParser(std::vector tokens) : tokens(tokens) {} + +Token ExprParser::peek() { + if (index < tokens.size()) + return tokens[index]; + else + return tokens.back(); // EOF token +} + +Token ExprParser::peekNext() { + if (index + 1 < tokens.size()) + return tokens[index + 1]; + else + return tokens.back(); // EOF token +} + +void advance() { + if (index < tokens.size()) + index++; +} + +bool ExprParser::match(TokenType type) { + if (index < tokens.size() && peek().type == type) + return true; + else + return false; +} + +OperatorType ExprParser::toOperatorType(TokenType type) { + switch (type) { + // comparison + case TokenType::GREATERTHAN: + return OperatorType::GREATERTHAN; + case TokenType::LESSTHAN: + return OperatorType::LESSTHAN; + case TokenType::GREATEREQUAL: + return OperatorType::GREATEREQUAL; + case TokenType::LESSEQUAL: + return OperatorType::LESSEQUAL; + case TokenType::EQEQUAL: + return OperatorType::EQEQUAL; + case TokenType::NOTEQUAL: + return OperatorType::NOTEQUAL; + + // additive + case TokenType::PLUS: + return OperatorType::PLUS; + case TokenType::MINUS: + return OperatorType::MINUS; + + // multiplicative + case TokenType::STAR: + return OperatorType::STAR; + case TokenType::SLASH: + return OperatorType::SLASH; + case TokenType::DOUBLESLASH: + return OperatorType::DOUBLESLASH; + case TokenType::MODULO: + return OperatorType::MODULO; + + default: + // never supposed to happen + throw std::runtime_error("unexpected operator"); + } +} + +std::unique_ptr ExprParser::parseExpr() { + return parseLogicalOr(); +} + +std::unique_ptr ExprParser::parseLogicalOr() { + auto lhs = parseLogicalAnd(); + + // left associative + while (match(TokenType::OR)) { + advance(); + auto rhs = parseLogicalAnd(); + lhs = + std::make_unique(OperatorType::OR, std::move(lhs), std::move(rhs)); + } + + return std::move(lhs); +} + +std::unique_ptr ExprParser::parseLogicalAnd() { + auto lhs = parseLogicalNot(); + + // left associative + while (match(TokenType::AND)) { + advance(); + auto rhs = parseLogicalNot(); + lhs = + std::make_unique(OperatorType::AND, std::move(lhs), std::move(rhs)); + } + + return std::move(lhs); +} + +std::unique_ptr ExprParser::parseLogicalNot() { + // right associative + if (match(TokenType::NOT)) { + advance(); + auto rhs = parseLogicalNot(); + return std::make_unique(OperatorType::NOT, std::move(rhs)); + } + + return parseComparison(); +} + +std::unique_ptr ExprParser::parseComparison() { + auto lhs = parseAdditive(); + + // left associative + while (match(TokenType::GREATERTHAN) || match(TokenType::LESSTHAN) || + match(TokenType::GREATEREQUAL) || match(TokenType::LESSEQUAL) || + match(TokenType::EQEQUAL) || match(TokenType::NOTEQUAL)) { + TokenType type = peek().type; + advance(); + auto rhs = parseAdditive(); + lhs = std::make_unique(toOperatorType(type), std::move(lhs), + std::move(rhs)); + } + + return std::move(lhs); +} + +std::unique_ptr ExprParser::parseAdditive() { + auto lhs = parseMultiplicative(); + + // left associative + while (match(TokenType::PLUS) || match(TokenType::MINUS)) { + TokenType type = peek().type; + advance(); + auto rhs = parseMultiplicative(); + lhs = std::make_unique(toOperatorType(type), std::move(lhs), + std::move(rhs)); + } + + return std::move(lhs); +} + +std::unique_ptr ExprParser::parseMultiplicative() { + auto lhs = parseUnary(); + + // left associative + while (match(TokenType::STAR) || match(TokenType::SLASH) || match(TokenType::DOUBLESLASH) || + match(TokenType::MODULO)) { + TokenType type = peek().type; + advance(); + auto rhs = parseUnary(); + lhs = std::make_unique(toOperatorType(type), std::move(lhs), + std::move(rhs)); + } + + return std::move(lhs); +} + +std::unique_ptr ExprParser::parseUnary() { + // right associative + if (match(TokenType::PLUS) || match(TokenType::MINUS)) { + TokenType type = peek().type; + advance(); + auto rhs = parseUnary(); + return std::make_unique(toOperatorType(type), std::move(rhs)); + } + + return parsePrimary(); +} + +std::unique_ptr ExprParser::parsePrimary() { + if (match(TokenType::LPAREN)) { + advance(); + auto expr = parseExpr(); + + if (!match(TokenType::RPAREN)) { + throw std::runtime_error("expected closing parenthesis"); + } + advance(); + + return expr; + } + + token = peek(); + advance(); + switch (token.type) { + case TokenType::NUMBER: + double value = std::stod(token.value); + return make_unique(value); + case TokenType::STRING: + return make_unique(std::move(token.value)); + case TokenType::TRUE: + return make_unique(true); + case TokenType::FALSE: + return make_unique(false); + case TokenType::NONE: + return make_unique(); + case TokenType::REFERENCE: + return make_unique(std::move(token.value)); + default: + throw std::runtime_error("invalid token found"); + } +} diff --git a/src/parser/exprparser.hpp b/src/parser/exprparser.hpp new file mode 100644 index 0000000..5694052 --- /dev/null +++ b/src/parser/exprparser.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +#include "token.hpp" +#include "astexpr.hpp" + +class ExprParser { +public: + ExprParser(std::vector tokens); + std::unique_ptr parseExpr(); + +private: + //precedence parsing(from highest precedence to lowest precedence) + std::unique_ptr parsePrimary(); + std::unique_ptr parsePower(); + std::unique_ptr parseUnary(); + std::unique_ptr parseMultiplicative(); + std::unique_ptr parseAdditive(); + std::unique_ptr parseComparison(); + std::unique_ptr parseLogicalNot(); + std::unique_ptr parseLogicalAnd(); + std::unique_ptr parseLogicalOr(); + + //helpers + Token peek(); + Token peekNext(); + void advance(); + bool match(TokenType type); + OperatorType toOperatorType(TokenType type); + + //index in the token vector + int index; +};