diff --git a/spice/analyze.py b/spice/analyze.py index e8b9ef9..b53e76f 100644 --- a/spice/analyze.py +++ b/spice/analyze.py @@ -2,7 +2,7 @@ # gustavo testando alguma coisa from spice.analyzers.identation import detect_indentation - +from spice.utils.get_langague import detect_language # this is the analyze function @@ -26,6 +26,8 @@ def analyze_file(file_path: str, selected_stats=None): "file_name": os.path.basename(file_path) } + LANG = detect_language(file_path) + # read the code file only once and load it into memory with open(file_path, "r", encoding="utf-8") as file: code = file.read() @@ -38,7 +40,7 @@ def analyze_file(file_path: str, selected_stats=None): # comment line count if requested if "comment_line_count" in selected_stats: from spice.analyzers.count_comment_lines import count_comment_lines - results["comment_line_count"] = count_comment_lines(code) + results["comment_line_count"] = count_comment_lines(code, LANG) # @gtins botei sua funcao aqui pq ela usa o codigo raw e nao o tokenizado, ai so tirei ela ali de baixo pra nao ficar chamando o parser sem precisar # edit: ok i see whats going on, instead of appending the results to the resuls, this will itself print the results to the terminal diff --git a/spice/analyzers/count_comment_lines.py b/spice/analyzers/count_comment_lines.py index 75914e0..9b2f74d 100644 --- a/spice/analyzers/count_comment_lines.py +++ b/spice/analyzers/count_comment_lines.py @@ -1,18 +1,63 @@ -# this will count comment lines, since our AST/Parser doesn't include comment lines, this needs to be done in the tokenized output of the lexer -# not sure about that first line, im pretty sure like about 200% sure this is analyzing the raw code and not the tokenized code but ok +# this will count comment lines for Python, JavaScript, Ruby, and Go # COMMENT LINE IS A LINE THAT EXCLUSIVELY HAS A COMMENT -# so like: y = 5 #sets y to 5 IS NOT A COMMENT LINE!!!!!!!! -def count_comment_lines(code): - """Count lines that are exclusively comments (no code on the same line)""" +def count_comment_lines(code, lang): # split the code into lines lines = code.splitlines() comment_count = 0 + # Set language-specific comment markers + if lang.lower() == "python": + single_comment = "#" + multi_start = '"""' + alt_multi_start = "'''" + elif lang.lower() == "javascript" or lang.lower() == "go": + single_comment = "//" + multi_start = "/*" + elif lang.lower() == "ruby": + single_comment = "#" + multi_start = "=begin" + else: + raise ValueError(f"Unsupported language: {lang}") + + # Track if we're inside a multi-line comment + in_multi_comment = False + for line in lines: # Remove leading whitespace stripped = line.strip() - # Check if this line consists only of a comment - if stripped and stripped.startswith('#'): + + # Skip empty lines + if not stripped: + continue + + # Handle multi-line comment blocks + if in_multi_comment: + comment_count += 1 + # Check for end of multi-line comment + if lang == "python" and (stripped.endswith('"""') or stripped.endswith("'''")): + in_multi_comment = False + elif (lang == "javascript" or lang == "go") and "*/" in stripped: + in_multi_comment = False + elif lang == "ruby" and stripped == "=end": + in_multi_comment = False + continue + + # Check for start of multi-line comment + if lang == "python" and (stripped.startswith('"""') or stripped.startswith("'''")): + in_multi_comment = True + comment_count += 1 + continue + elif (lang == "javascript" or lang == "go") and stripped.startswith("/*"): + in_multi_comment = True + comment_count += 1 + continue + elif lang == "ruby" and stripped == "=begin": + in_multi_comment = True + comment_count += 1 + continue + + # Check for single-line comments + if stripped.startswith(single_comment): comment_count += 1 return comment_count \ No newline at end of file diff --git a/spice/utils/get_langague.py b/spice/utils/get_langague.py new file mode 100644 index 0000000..e8d07c3 --- /dev/null +++ b/spice/utils/get_langague.py @@ -0,0 +1,20 @@ +import os + +def detect_language(file_path): + _, ext = os.path.splitext(file_path) + + if ext == ".rb": + return "ruby" + elif ext == ".py": + return "python" + elif ext == ".js": + return "javascript" + elif ext == ".go": + return "go" + else: + raise ValueError(f"Unsupported file extension: {ext}") + +# Example usage: +if __name__ == "__main__": + for path in ["example.py", "example.js", "example.rb", "example.go"]: + print(f"{path}: {detect_language(path)}")