From 078e26271bd9aeb7711085e6239b4d02c38ffec4 Mon Sep 17 00:00:00 2001 From: Siddharth Maira Date: Mon, 2 Feb 2026 21:50:50 +0530 Subject: [PATCH 1/8] test commit --- test.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test.txt diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..e69de29 From 8ae90c214b98eb6cc38f7e64d85b8b3beefacb38 Mon Sep 17 00:00:00 2001 From: Sasank <213149805+sash070@users.noreply.github.com> Date: Mon, 2 Feb 2026 21:59:28 +0530 Subject: [PATCH 2/8] cahnge --- madhav.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/madhav.txt b/madhav.txt index 9012a49..51568ea 100644 --- a/madhav.txt +++ b/madhav.txt @@ -1 +1 @@ -i like madhav more than no one +i like madhav more than sujal From b220e9b507fcb9ecf600dc478eb09d3f53c6ffad Mon Sep 17 00:00:00 2001 From: Sasank <213149805+sash070@users.noreply.github.com> Date: Mon, 2 Feb 2026 22:40:44 +0530 Subject: [PATCH 3/8] change --- madhav.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/madhav.txt b/madhav.txt index 51568ea..0a1fc2e 100644 --- a/madhav.txt +++ b/madhav.txt @@ -1 +1,2 @@ i like madhav more than sujal +change From ff5f98cf02428e748ff659572db6f4ba8161b92c Mon Sep 17 00:00:00 2001 From: Siddharth Maira Date: Thu, 12 Feb 2026 12:39:07 +0530 Subject: [PATCH 4/8] Defined structure for lexing and created Makefile --- .gitignore | 3 ++ Makefile | 42 ++++++++++++++++++++++++++ test.txt => src/common/token/token.hpp | 0 src/lexer/indentation.cpp | 0 src/lexer/indentation.hpp | 0 src/lexer/keywords.hpp | 0 src/lexer/lexer.cpp | 0 src/lexer/lexer.hpp | 0 8 files changed, 45 insertions(+) create mode 100644 Makefile rename test.txt => src/common/token/token.hpp (100%) create mode 100644 src/lexer/indentation.cpp create mode 100644 src/lexer/indentation.hpp create mode 100644 src/lexer/keywords.hpp create mode 100644 src/lexer/lexer.cpp create mode 100644 src/lexer/lexer.hpp diff --git a/.gitignore b/.gitignore index d4fb281..80a6dbe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +#build directory +build/ + # Prerequisites *.d diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e82cc23 --- /dev/null +++ b/Makefile @@ -0,0 +1,42 @@ +#vars +TARGET_EXEC := executable +BUILD_DIR := build +SRC_DIR := src + +SRCS := $(shell find $(SRC_DIR) -name '*.cpp') +OBJS := $(SRCS:$(SRC_DIR)/%.cpp=$(BUILD_DIR)/%.o) +DEPS := $(OBJS:.o=.d) +INCS := $(shell find $(SRC_DIR) -type d) + +INC_FLAGS := $(addprefix -I,$(INCS)) +CPPFLAGS := $(INC_FLAGS) -MMD -MP +LDFLAGS := + +CXX := g++ + +#all +all: $(BUILD_DIR)/$(TARGET_EXEC) + +#executable dependencies +$(BUILD_DIR)/$(TARGET_EXEC): $(OBJS) + @echo "Linking" + mkdir -p $(BUILD_DIR) + $(CXX) $(OBJS) -o $@ $(LDFLAGS) + +#object dependencies +$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp + @echo "Building dependencies" + mkdir -p $(dir $@) + $(CXX) $(CPPFLAGS) -c $< -o $@ + @echo + +#for change in header files +-include $(DEPS) + +.PHONY: all clean run + +run: all + ./$(BUILD_DIR)/$(TARGET_EXEC) + +clean: + rm -rf $(BUILD_DIR) diff --git a/test.txt b/src/common/token/token.hpp similarity index 100% rename from test.txt rename to src/common/token/token.hpp diff --git a/src/lexer/indentation.cpp b/src/lexer/indentation.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/lexer/indentation.hpp b/src/lexer/indentation.hpp new file mode 100644 index 0000000..e69de29 diff --git a/src/lexer/keywords.hpp b/src/lexer/keywords.hpp new file mode 100644 index 0000000..e69de29 diff --git a/src/lexer/lexer.cpp b/src/lexer/lexer.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/lexer/lexer.hpp b/src/lexer/lexer.hpp new file mode 100644 index 0000000..e69de29 From b7544260698cb5f61862fbc855d22658e65a7fed Mon Sep 17 00:00:00 2001 From: Siddharth Maira Date: Thu, 12 Feb 2026 22:07:52 +0530 Subject: [PATCH 5/8] filled token.hpp and lexer.hpp with basic declarations(from ankit's commit) --- src/common/token/token.hpp | 57 ++++++++++++++++++++++++++++++++++++++ src/lexer/lexer.hpp | 56 +++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/src/common/token/token.hpp b/src/common/token/token.hpp index e69de29..91fb490 100644 --- a/src/common/token/token.hpp +++ b/src/common/token/token.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include + +enum class TokenType { + DEF, + RETURN, + IF, + ELSE, + FOR, + WHILE, + + IDENTIFIER, + NUMBER, + STRING, + + PLUS, // + + MINUS, // - + STAR, // * + SLASH, // / + ASSIGN, // = + MODULO, // % + GREATERTHAN, // > + LESSERTHAN, // < + + LPAREN, // ( + RPAREN, // ) + + NEWLINE, + EOF_TOKEN, + + COMMA, // , + COLON, // : + INDENT, + DEDENT, + COMMENT // # +}; + + +class Token{ +public: + // Main part of a token + TokenType type; + std::string value; + + // Used to tell user about error in case error is found + int line; + int column; + + // This constructor will allow us to easily make the tokens while coding + Token(TokenType type, std::string val, int l, int c) +}; + + + + + diff --git a/src/lexer/lexer.hpp b/src/lexer/lexer.hpp index e69de29..be04547 100644 --- a/src/lexer/lexer.hpp +++ b/src/lexer/lexer.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include + +#include "token.hpp" + +class Lexer{ +public: + Lexer(std::string input_string){ + this->source_code = preprocess_indents(input_string); + } + std::vector scan_Tokens(); + +private: + std::string source_code; + static inline const std::unordered_map keywords = { + {"def", TokenType::DEF}, + {"return", TokenType::RETURN}, + {"if", TokenType::IF}, + {"else", TokenType::ELSE}, + {"print", TokenType::PRINT}, + {"=", TokenType::ASSIGN}, + {"+", TokenType::PLUS}, + {"-", TokenType::MINUS}, + {"*", TokenType::STAR}, + {"/", TokenType::SLASH}, + {"(", TokenType::LPAREN}, + {")", TokenType::RPAREN}, + {":", TokenType::COLON}, + {",", TokenType::COMMA}, + {"\n", TokenType::NEWLINE}, + {"\t", TokenType::INDENT}, + {"\r", TokenType::DEDENT} + }; + std::vector tokens; + int start = 0; + int current_index = 0; + int line = 1; + + + // FUNCTIONS NEEDED FOR LEXER TO WORK + bool isAtEnd(); // Checks for last character + char advance(); // Return current char and move forward + char peek(); // Sometimes, we don't actually want to read a character and may only want to peek at it + char peekNext(); // Peak at the next character + + void addToken(TokenType type); + + // Specific scanners for complex types + void scanString(); + void scanNumber(); + void scanIdentifier(); + std::string preprocess_indents(std::string raw); +}; From 5f4b061b95aec1ae7d5ab44cfeed4b8ac8c7d564 Mon Sep 17 00:00:00 2001 From: Siddharth Maira Date: Thu, 12 Feb 2026 22:10:46 +0530 Subject: [PATCH 6/8] clean up --- src/common/token/token.hpp | 4 ++-- src/lexer/lexer.hpp | 25 +++++++++++-------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/common/token/token.hpp b/src/common/token/token.hpp index 91fb490..7489d7a 100644 --- a/src/common/token/token.hpp +++ b/src/common/token/token.hpp @@ -37,7 +37,7 @@ enum class TokenType { }; -class Token{ +class Token { public: // Main part of a token TokenType type; @@ -48,7 +48,7 @@ class Token{ int column; // This constructor will allow us to easily make the tokens while coding - Token(TokenType type, std::string val, int l, int c) + Token(TokenType type, std::string val, int l, int c); }; diff --git a/src/lexer/lexer.hpp b/src/lexer/lexer.hpp index be04547..cb547a2 100644 --- a/src/lexer/lexer.hpp +++ b/src/lexer/lexer.hpp @@ -6,12 +6,10 @@ #include "token.hpp" -class Lexer{ +class Lexer { public: - Lexer(std::string input_string){ - this->source_code = preprocess_indents(input_string); - } - std::vector scan_Tokens(); + Lexer(std::string input_string); + std::vector scan_Tokens(void); private: std::string source_code; @@ -31,8 +29,7 @@ class Lexer{ {":", TokenType::COLON}, {",", TokenType::COMMA}, {"\n", TokenType::NEWLINE}, - {"\t", TokenType::INDENT}, - {"\r", TokenType::DEDENT} + {"\t", TokenType::INDENT} }; std::vector tokens; int start = 0; @@ -41,16 +38,16 @@ class Lexer{ // FUNCTIONS NEEDED FOR LEXER TO WORK - bool isAtEnd(); // Checks for last character - char advance(); // Return current char and move forward - char peek(); // Sometimes, we don't actually want to read a character and may only want to peek at it - char peekNext(); // Peak at the next character + bool isAtEnd(void); // Checks for last character + char advance(void); // Return current char and move forward + char peek(void); // Sometimes, we don't actually want to read a character and may only want to peek at it + char peekNext(void); // Peak at the next character void addToken(TokenType type); // Specific scanners for complex types - void scanString(); - void scanNumber(); - void scanIdentifier(); + void scanString(void); + void scanNumber(void); + void scanIdentifier(void); std::string preprocess_indents(std::string raw); }; From 81b9a990832d76e8483c3a3fe921bcd1d6673e30 Mon Sep 17 00:00:00 2001 From: Sasank <213149805+sash070@users.noreply.github.com> Date: Sat, 28 Feb 2026 17:03:31 +0530 Subject: [PATCH 7/8] added basic lexer implementation except errors and indent stack --- .vscode/settings.json | 3 + src/common/token/token.cpp | 5 ++ src/common/token/token.hpp | 6 +- src/lexer/keywords.hpp | 27 +++++++++ src/lexer/lexer.cpp | 117 +++++++++++++++++++++++++++++++++++++ src/lexer/lexer.hpp | 37 ++++-------- 6 files changed, 168 insertions(+), 27 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 src/common/token/token.cpp diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..ad52559 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "makefile.configureOnOpen": true +} diff --git a/src/common/token/token.cpp b/src/common/token/token.cpp new file mode 100644 index 0000000..cc33e7e --- /dev/null +++ b/src/common/token/token.cpp @@ -0,0 +1,5 @@ +#include +#include + +Token::Token(TokenType type, std::string val, int l, int c) : type(type), value(val), line(l), column(c){} + diff --git a/src/common/token/token.hpp b/src/common/token/token.hpp index 7489d7a..8f2196d 100644 --- a/src/common/token/token.hpp +++ b/src/common/token/token.hpp @@ -33,7 +33,11 @@ enum class TokenType { COLON, // : INDENT, DEDENT, - COMMENT // # + COMMENT, // # + PRINT, + AMPERSAND, // & + PIPE, // | + SPACE, }; diff --git a/src/lexer/keywords.hpp b/src/lexer/keywords.hpp index e69de29..838abd5 100644 --- a/src/lexer/keywords.hpp +++ b/src/lexer/keywords.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +#include "token.hpp" + +inline const std::unordered_map keywords = { + {"def", TokenType::DEF}, + {"return", TokenType::RETURN}, + {"if", TokenType::IF}, + {"else", TokenType::ELSE}, + {"print", TokenType::PRINT}, + {"=", TokenType::ASSIGN}, + {"+", TokenType::PLUS}, + {"-", TokenType::MINUS}, + {"*", TokenType::STAR}, + {"/", TokenType::SLASH}, + {"(", TokenType::LPAREN}, + {")", TokenType::RPAREN}, + {":", TokenType::COLON}, + {",", TokenType::COMMA}, + {"\n", TokenType::NEWLINE}, + {"\t", TokenType::INDENT}, + {" ", TokenType::SPACE} + }; diff --git a/src/lexer/lexer.cpp b/src/lexer/lexer.cpp index e69de29..54dcd69 100644 --- a/src/lexer/lexer.cpp +++ b/src/lexer/lexer.cpp @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include + +Lexer::Lexer(std::string input_string) : source_code(input_string){} +bool Lexer::isAtEnd(){ + if (current_index >= source_code.size()){ + return true; + } + return false; +} +char Lexer::peek(){ + if (current_index >= source_code.size()){ + return '\0'; + } + return source_code[current_index]; +} + +char Lexer::peekNext(){ + if (current_index+1 >= source_code.size()){ + return '\0'; + } + return source_code[current_index+1]; +} + +char Lexer::advance(){ + if (current_index >= source_code.size()){ + return '\0'; + } + char c = source_code[current_index++]; + if (c == '\n'){ + line++; + column=1; + } + else {column++;} + return c; +} + +void Lexer::scanNumber(std::string curr){ + int start = column-1; + while (std::isdigit(peek())){ + curr += Lexer::advance(); + } + if (std::isalnum(peek())){ + // throw an error + } + Token token(TokenType::NUMBER, curr, line, start); + tokens.push_back(token); +} + +void Lexer::scanString(std::string quote){ + int start = column-1; + while (!isAtEnd() && peek()!=quote[0] && peek()!='\n'){ + quote += advance(); + } + if (isAtEnd()){ + // throw an error + } + else if (peek()=='\n'){ + // throw an error + } + else{ + quote += advance(); + } + Token token(TokenType::STRING, quote, line, start); + tokens.push_back(token); +} + +void Lexer::scanIdentifier(std::string curr){ + int start = column-1; + while (std::isalnum(peek()) || peek()=='_'){ + curr += advance(); + } + Token token(TokenType::IDENTIFIER, curr, line, start); + if (keywords.count(curr)){ + token.type = keywords.at(curr); + } + tokens.push_back(token); +} + + +std::vector Lexer::scan_Tokens(){ + while (true){ + if (isAtEnd()){ + Token token(TokenType::EOF_TOKEN, "", line, column); + tokens.push_back(token); + break; + } + std::string curr = ""; + curr += advance(); + if (keywords.count(curr)){ + TokenType type = keywords.at(curr); + if (type == TokenType::NEWLINE){ + column = 0; + line++; + } + else if (type == TokenType::INDENT){ + column += 3; + } + else if (type == TokenType::DEDENT){ + column -= 5; + } + Token token(type, curr, line, column); + tokens.push_back(token); + } + else{ + if (std::isdigit(curr[0])){ + scanNumber(curr); + } + else if (std::isalpha(curr[0])){ + scanIdentifier(curr); + } + } +} +} diff --git a/src/lexer/lexer.hpp b/src/lexer/lexer.hpp index cb547a2..ae15f7a 100644 --- a/src/lexer/lexer.hpp +++ b/src/lexer/lexer.hpp @@ -3,51 +3,36 @@ #include #include #include +#include //to check if char is alphanumeric #include "token.hpp" class Lexer { public: Lexer(std::string input_string); - std::vector scan_Tokens(void); + std::vector scan_Tokens(); + private: std::string source_code; - static inline const std::unordered_map keywords = { - {"def", TokenType::DEF}, - {"return", TokenType::RETURN}, - {"if", TokenType::IF}, - {"else", TokenType::ELSE}, - {"print", TokenType::PRINT}, - {"=", TokenType::ASSIGN}, - {"+", TokenType::PLUS}, - {"-", TokenType::MINUS}, - {"*", TokenType::STAR}, - {"/", TokenType::SLASH}, - {"(", TokenType::LPAREN}, - {")", TokenType::RPAREN}, - {":", TokenType::COLON}, - {",", TokenType::COMMA}, - {"\n", TokenType::NEWLINE}, - {"\t", TokenType::INDENT} - }; std::vector tokens; int start = 0; int current_index = 0; int line = 1; + int column = 1; // FUNCTIONS NEEDED FOR LEXER TO WORK - bool isAtEnd(void); // Checks for last character - char advance(void); // Return current char and move forward - char peek(void); // Sometimes, we don't actually want to read a character and may only want to peek at it - char peekNext(void); // Peak at the next character + bool isAtEnd(); // Checks for last character + char advance(); // Return current char and move forward + char peek(); // Sometimes, we don't actually want to read a character and may only want to peek at it + char peekNext(); // Peak at the next character void addToken(TokenType type); // Specific scanners for complex types - void scanString(void); - void scanNumber(void); - void scanIdentifier(void); + void scanString(std::string first); + void scanNumber(std::string first); + void scanIdentifier(std::string first); std::string preprocess_indents(std::string raw); }; From fa0e7fa45bd46ee5e48087b18e594071ea02e8b0 Mon Sep 17 00:00:00 2001 From: Siddharth Maira Date: Mon, 2 Mar 2026 14:26:16 +0530 Subject: [PATCH 8/8] initial commit to work on other stuff --- src/parser/parser.cpp | 0 src/parser/parser.hpp | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 src/parser/parser.cpp create mode 100644 src/parser/parser.hpp diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/parser/parser.hpp b/src/parser/parser.hpp new file mode 100644 index 0000000..ca5e5bd --- /dev/null +++ b/src/parser/parser.hpp @@ -0,0 +1,3 @@ +#pragma once + +#include "token.hpp"