Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 27 additions & 102 deletions spice/analyze.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,9 @@
import os

# this is the universal token, used by all lexers to know what to output
from lexers.token import TokenType

# these are the individual lexers for all languages we support
from lexers.ruby.rubylexer import RubyLexer
from lexers.python.pythonlexer import PythonLexer
from lexers.javascript.javascriptlexer import JavaScriptLexer
from lexers.golang.golexer import GoLexer

# gustavo testando alguma coisa
from spice.analyzers.identation import detect_indentation


from spice.identation import detect_indentation

# this will read the file extension and return the correct lexer
def get_lexer_for_file(file_path):
_, ext = os.path.splitext(file_path)

if ext == ".rb":
return RubyLexer
elif ext == ".py":
return PythonLexer
elif ext == ".js":
return JavaScriptLexer
elif ext == ".go":
return GoLexer
else:
raise ValueError(f"Unsupported file extension: {ext}")


# this is the analyze function
def analyze_file(file_path: str, selected_stats=None):
Expand Down Expand Up @@ -56,21 +32,31 @@ def analyze_file(file_path: str, selected_stats=None):

# line count if requested
if "line_count" in selected_stats:
from spice.analyzers.count_lines import count_lines
results["line_count"] = count_lines(code)

# comment line count if requested
if "comment_line_count" in selected_stats:
from spice.analyzers.count_comment_lines import count_comment_lines
results["comment_line_count"] = count_comment_lines(code)

# @gtins botei sua funcao aqui pq ela usa o codigo raw e nao o tokenizado, ai so tirei ela ali de baixo pra nao ficar chamando o parser sem precisar
# edit: ok i see whats going on, instead of appending the results to the resuls, this will itself print the results to the terminal
# TODO: make analyze_code_structure return the results, then append those results to the results array
if "identation_level" in selected_stats:
analyze_code_structure(code)

# only put the code through the lexer and proceed with tokenization if we need function count or comment count (UPDATE THIS WHEN NEEDED PLEASE !!!!!!!!)
if "function_count" in selected_stats or "comment_line_count" in selected_stats:
# only put the code through the lexer and proceed with tokenization if we need function count (UPDATE THIS WHEN NEEDED PLEASE !!!!!!!!)
if "function_count" in selected_stats:

# get the lexer for the code's language
from spice.utils.get_lexer import get_lexer_for_file
LexerClass = get_lexer_for_file(file_path)

# tokenize the code via lexer
lexer = LexerClass(code)
tokens = lexer.tokenize()

# process comment line count if requested
if "comment_line_count" in selected_stats:
results["comment_line_count"] = count_comment_lines(code)

# only put the code through the parser and proceed with parsing if we need function count (UPDATE THIS WHEN NEEDED PLEASE !!!!!!!!)
if "function_count" in selected_stats:

Expand All @@ -82,83 +68,20 @@ def analyze_file(file_path: str, selected_stats=None):
ast = parser.parse()

# count functions
from spice.analyzers.count_functions import count_functions
results["function_count"] = count_functions(ast)
if "identation_level" in selected_stats:
analyze_code_structure(code)

return results


# this will count lines straight from the raw code
def count_lines(code):
return code.count("\n") + 1


# this will count functions in the AST
def count_functions(ast):
# import function definition from the parser's ast
from parser.ast import FunctionDefinition, Program

if not isinstance(ast, Program):
return 0

function_count = 0

# recursive search for function definitions in the AST
def search_node(node):
nonlocal function_count

if isinstance(node, FunctionDefinition):
function_count += 1

# process child nodes if they exist
if hasattr(node, 'statements') and node.statements:
for statement in node.statements:
search_node(statement)

if hasattr(node, 'body') and node.body:
for body_statement in node.body:
search_node(body_statement)

# for binary operation, check both sides
if hasattr(node, 'left'):
search_node(node.left)
if hasattr(node, 'right'):
search_node(node.right)

# check the value part of an assignment
if hasattr(node, 'value'):
search_node(node.value)

# check function call arguments
if hasattr(node, 'arguments') and node.arguments:
for arg in node.arguments:
search_node(arg)

# start recursive search from the root Program node
search_node(ast)

return function_count


# this will count comment lines, since our AST/Parser doesn't include comment lines, this needs to be done in the tokenized output of the lexer
# COMMENT LINE IS A LINE THAT EXCLUSIVELY HAS A COMMENT
# so like: y = 5 #sets y to 5 IS NOT A COMMENT LINE!!!!!!!!
def count_comment_lines(code):
"""Count lines that are exclusively comments (no code on the same line)"""
# split the code into lines
lines = code.splitlines()
comment_count = 0

for line in lines:
# Remove leading whitespace
stripped = line.strip()
# Check if this line consists only of a comment
if stripped and stripped.startswith('#'):
comment_count += 1

return comment_count

# im not sure what to do with this part 😂
# this is the identation analyzer
# but it's not included in the menu?
# im not going to change this since gtins knows better than me how this works
# but this needs to be refactores and included directly into the analyze_file function and the analyze menu
def analyze_code_structure(code):
indentation_info = detect_indentation(code)

Expand All @@ -167,4 +90,6 @@ def analyze_code_structure(code):
for line, level in indentation_info["levels"]:
# print(f"Indentation Level {level}: {line}")
print(f"Detected Indentation Type: {indentation_info['indent_type']}")
print(f"Detected Indentation Size: {indentation_info['indent_size']}")
print(f"Detected Indentation Size: {indentation_info['indent_size']}")

# ----------------------------------------------------------------------------------------------------
Empty file added spice/analyzers/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions spice/analyzers/count_comment_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# this will count comment lines, since our AST/Parser doesn't include comment lines, this needs to be done in the tokenized output of the lexer
# not sure about that first line, im pretty sure like about 200% sure this is analyzing the raw code and not the tokenized code but ok
# COMMENT LINE IS A LINE THAT EXCLUSIVELY HAS A COMMENT
# so like: y = 5 #sets y to 5 IS NOT A COMMENT LINE!!!!!!!!
def count_comment_lines(code):
"""Count lines that are exclusively comments (no code on the same line)"""
# split the code into lines
lines = code.splitlines()
comment_count = 0

for line in lines:
# Remove leading whitespace
stripped = line.strip()
# Check if this line consists only of a comment
if stripped and stripped.startswith('#'):
comment_count += 1

return comment_count
45 changes: 45 additions & 0 deletions spice/analyzers/count_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# this will count functions in the AST
def count_functions(ast):
# import function definition from the parser's ast
from parser.ast import FunctionDefinition, Program

if not isinstance(ast, Program):
return 0

function_count = 0

# recursive search for function definitions in the AST
def search_node(node):
nonlocal function_count

if isinstance(node, FunctionDefinition):
function_count += 1

# process child nodes if they exist
if hasattr(node, 'statements') and node.statements:
for statement in node.statements:
search_node(statement)

if hasattr(node, 'body') and node.body:
for body_statement in node.body:
search_node(body_statement)

# for binary operation, check both sides
if hasattr(node, 'left'):
search_node(node.left)
if hasattr(node, 'right'):
search_node(node.right)

# check the value part of an assignment
if hasattr(node, 'value'):
search_node(node.value)

# check function call arguments
if hasattr(node, 'arguments') and node.arguments:
for arg in node.arguments:
search_node(arg)

# start recursive search from the root Program node
search_node(ast)

return function_count
3 changes: 3 additions & 0 deletions spice/analyzers/count_lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# this will count lines straight from the raw code
def count_lines(code):
return code.count("\n") + 1
1 change: 0 additions & 1 deletion spice/identation.py → spice/analyzers/identation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import re

def detect_indentation(code):
Expand Down
Empty file added spice/utils/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions spice/utils/get_lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os


# this will read the file extension and return the correct lexer
def get_lexer_for_file(file_path):
_, ext = os.path.splitext(file_path)

if ext == ".rb":
from lexers.ruby.rubylexer import RubyLexer
return RubyLexer

elif ext == ".py":
from lexers.python.pythonlexer import PythonLexer
return PythonLexer

elif ext == ".js":
from lexers.javascript.javascriptlexer import JavaScriptLexer
return JavaScriptLexer

elif ext == ".go":
from lexers.golang.golexer import GoLexer
return GoLexer

else:
raise ValueError(f"Unsupported file extension: {ext}")