diff --git a/main.py b/main.py index 20fabdc..2bf95e9 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,6 @@ import argparse from src.clone_repo import clone_repo, clone_repos -from src.analyze_commits import analyze_commits, analyze_multiple_repos_from_json +from src.analyze_commits import analyze_commits, analyze_multiple_repos_from_json, generate_commits_distribution_plot from src.analyze_contributions import generate_reports, generate_loc_report from src.generate_md_report import generate_md_reports, generate_md_report import os @@ -242,6 +242,11 @@ def full_run(yaml_config_file_path: str, skip_clone: bool = False): with open(output_file, "w") as f: json.dump(analysis, f, indent=2) + # Generate commits distribution plot + activity_image_path = os.path.join(commits_reports_dir, f"{project_name}_plot.png") + generate_commits_distribution_plot(analysis, activity_image_path) + project_data["activity_image_path"] = activity_image_path + # Generate LOC reports print("Analyzing lines of code...") loc_reports_dir = config["folders"]["line_of_code_reports"] @@ -267,6 +272,7 @@ def full_run(yaml_config_file_path: str, skip_clone: bool = False): "repository": project_name, "repository_url": project_data["url"], "loc_data": project_data["loc_report"], + "activity_image_path": project_data["activity_image_path"], } report_folder_dir = os.path.join(markdown_reports_dir, project_name) diff --git a/requirements.txt b/requirements.txt index d010d46..df82683 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ rich==13.7.0 pandas==2.1.4 numpy==1.26.3 pytest==7.4.3 +pyyaml==6.0.2 +matplotlib==3.10.0 diff --git a/src/analyze_commits.py b/src/analyze_commits.py index 95606cb..a33a8e3 100644 --- a/src/analyze_commits.py +++ b/src/analyze_commits.py @@ -101,6 +101,47 @@ def get_commit_per_member(repo, account_mapping): return members_commits +def generate_commits_distribution_plot(repo_analysis, output_file): + import pandas as pd + import matplotlib.pyplot as plt + + # Processes commit log data into a DataFrame. + monthly_data = [] + yearly_data = [] + for entry in repo_analysis["commit_dates"]: + author, timestamp = entry + # Convert "2022-03-07 17:41:37+01:00' to '2022-03' + month = pd.to_datetime(timestamp).strftime("%Y-%m") + monthly_data.append((month, author)) + year = pd.to_datetime(timestamp).strftime("%Y") + yearly_data.append((year, author)) + + monthly_df = pd.DataFrame(monthly_data, columns=["Month", "Author"]) + yearly_df = pd.DataFrame(yearly_data, columns=["Year", "Author"]) + + # Generates a bar plot of commits per contributor per month + commit_counts = monthly_df.groupby(["Month", "Author"]).size().unstack(fill_value=0) + commit_counts.plot(kind="bar", stacked=True, figsize=(16, 6), colormap="Dark2", width=0.8) + plt.title("Commits per Contributor per Month") + plt.xlabel("Month") + plt.ylabel("Number of Commits") + plt.legend(title="Contributor", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.xticks(rotation=45) + plt.tight_layout() + plt.savefig(output_file) + + # Generates a bar plot of commits per contributor per year + commit_counts = yearly_df.groupby(["Year", "Author"]).size().unstack(fill_value=0) + commit_counts.plot(kind="bar", stacked=True, figsize=(16, 3), colormap="Dark2", width=0.8) + plt.title("Commits per Contributor per Year") + plt.xlabel("Year") + plt.ylabel("Number of Commits") + plt.legend(title="Contributor", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.xticks(rotation=45) + plt.tight_layout() + plt.savefig(output_file.replace(".png", "_yearly.png")) + + def analyze_commits(repo_dir, account_mapping): """ Analyze commit activity for a given repository, applying account mapping. diff --git a/src/generate_md_report.py b/src/generate_md_report.py index e5eab6a..712fb49 100644 --- a/src/generate_md_report.py +++ b/src/generate_md_report.py @@ -27,6 +27,7 @@ def generate_md_report_text(repo_data: dict, account_mapping: dict): report += f"**Repository URL:** {repo_data.get('repository_url', 'N/A')}\n\n" report += "## Commits\n" report += f"**Total Commits:** {repo_data.get('total_commits', 'N/A')}\n\n" + report += f"![Activity](../../{repo_data.get('activity_image_path')})\n\n" if "loc_data" in repo_data: loc_data = repo_data["loc_data"]