Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions spice/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# gustavo testando alguma coisa
from spice.analyzers.identation import detect_indentation

from spice.utils.get_langague import detect_language


# this is the analyze function
Expand All @@ -26,6 +26,8 @@ def analyze_file(file_path: str, selected_stats=None):
"file_name": os.path.basename(file_path)
}

LANG = detect_language(file_path)

# read the code file only once and load it into memory
with open(file_path, "r", encoding="utf-8") as file:
code = file.read()
Expand All @@ -38,7 +40,7 @@ def analyze_file(file_path: str, selected_stats=None):
# comment line count if requested
if "comment_line_count" in selected_stats:
from spice.analyzers.count_comment_lines import count_comment_lines
results["comment_line_count"] = count_comment_lines(code)
results["comment_line_count"] = count_comment_lines(code, LANG)

# @gtins botei sua funcao aqui pq ela usa o codigo raw e nao o tokenizado, ai so tirei ela ali de baixo pra nao ficar chamando o parser sem precisar
# edit: ok i see whats going on, instead of appending the results to the resuls, this will itself print the results to the terminal
Expand Down
59 changes: 52 additions & 7 deletions spice/analyzers/count_comment_lines.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,63 @@
# this will count comment lines, since our AST/Parser doesn't include comment lines, this needs to be done in the tokenized output of the lexer
# not sure about that first line, im pretty sure like about 200% sure this is analyzing the raw code and not the tokenized code but ok
# this will count comment lines for Python, JavaScript, Ruby, and Go
# COMMENT LINE IS A LINE THAT EXCLUSIVELY HAS A COMMENT
# so like: y = 5 #sets y to 5 IS NOT A COMMENT LINE!!!!!!!!
def count_comment_lines(code):
"""Count lines that are exclusively comments (no code on the same line)"""
def count_comment_lines(code, lang):
# split the code into lines
lines = code.splitlines()
comment_count = 0

# Set language-specific comment markers
if lang.lower() == "python":
single_comment = "#"
multi_start = '"""'
alt_multi_start = "'''"
elif lang.lower() == "javascript" or lang.lower() == "go":
single_comment = "//"
multi_start = "/*"
elif lang.lower() == "ruby":
single_comment = "#"
multi_start = "=begin"
else:
raise ValueError(f"Unsupported language: {lang}")

# Track if we're inside a multi-line comment
in_multi_comment = False

for line in lines:
# Remove leading whitespace
stripped = line.strip()
# Check if this line consists only of a comment
if stripped and stripped.startswith('#'):

# Skip empty lines
if not stripped:
continue

# Handle multi-line comment blocks
if in_multi_comment:
comment_count += 1
# Check for end of multi-line comment
if lang == "python" and (stripped.endswith('"""') or stripped.endswith("'''")):
in_multi_comment = False
elif (lang == "javascript" or lang == "go") and "*/" in stripped:
in_multi_comment = False
elif lang == "ruby" and stripped == "=end":
in_multi_comment = False
continue

# Check for start of multi-line comment
if lang == "python" and (stripped.startswith('"""') or stripped.startswith("'''")):
in_multi_comment = True
comment_count += 1
continue
elif (lang == "javascript" or lang == "go") and stripped.startswith("/*"):
in_multi_comment = True
comment_count += 1
continue
elif lang == "ruby" and stripped == "=begin":
in_multi_comment = True
comment_count += 1
continue

# Check for single-line comments
if stripped.startswith(single_comment):
comment_count += 1

return comment_count
20 changes: 20 additions & 0 deletions spice/utils/get_langague.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os

def detect_language(file_path):
_, ext = os.path.splitext(file_path)

if ext == ".rb":
return "ruby"
elif ext == ".py":
return "python"
elif ext == ".js":
return "javascript"
elif ext == ".go":
return "go"
else:
raise ValueError(f"Unsupported file extension: {ext}")

# Example usage:
if __name__ == "__main__":
for path in ["example.py", "example.js", "example.rb", "example.go"]:
print(f"{path}: {detect_language(path)}")