From c9b2493c649d2f8ec9849cabeb73ba115533ddd5 Mon Sep 17 00:00:00 2001 From: "tom.mansion" Date: Fri, 31 Jan 2025 14:11:30 +0100 Subject: [PATCH 1/2] Added commit activity plot --- main.py | 8 +++++++- requirements.txt | 2 ++ src/analyze_commits.py | 26 ++++++++++++++++++++++++++ src/generate_md_report.py | 1 + 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 20fabdc..2bf95e9 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,6 @@ import argparse from src.clone_repo import clone_repo, clone_repos -from src.analyze_commits import analyze_commits, analyze_multiple_repos_from_json +from src.analyze_commits import analyze_commits, analyze_multiple_repos_from_json, generate_commits_distribution_plot from src.analyze_contributions import generate_reports, generate_loc_report from src.generate_md_report import generate_md_reports, generate_md_report import os @@ -242,6 +242,11 @@ def full_run(yaml_config_file_path: str, skip_clone: bool = False): with open(output_file, "w") as f: json.dump(analysis, f, indent=2) + # Generate commits distribution plot + activity_image_path = os.path.join(commits_reports_dir, f"{project_name}_plot.png") + generate_commits_distribution_plot(analysis, activity_image_path) + project_data["activity_image_path"] = activity_image_path + # Generate LOC reports print("Analyzing lines of code...") loc_reports_dir = config["folders"]["line_of_code_reports"] @@ -267,6 +272,7 @@ def full_run(yaml_config_file_path: str, skip_clone: bool = False): "repository": project_name, "repository_url": project_data["url"], "loc_data": project_data["loc_report"], + "activity_image_path": project_data["activity_image_path"], } report_folder_dir = os.path.join(markdown_reports_dir, project_name) diff --git a/requirements.txt b/requirements.txt index d010d46..df82683 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ rich==13.7.0 pandas==2.1.4 numpy==1.26.3 pytest==7.4.3 +pyyaml==6.0.2 +matplotlib==3.10.0 diff --git a/src/analyze_commits.py b/src/analyze_commits.py index 95606cb..ad15008 100644 --- a/src/analyze_commits.py +++ b/src/analyze_commits.py @@ -101,6 +101,32 @@ def get_commit_per_member(repo, account_mapping): return members_commits +def generate_commits_distribution_plot(repo_analysis, output_file): + import pandas as pd + import matplotlib.pyplot as plt + + # Processes commit log data into a DataFrame. + data = [] + for entry in repo_analysis["commit_dates"]: + author, timestamp = entry + # Convert "2022-03-07 17:41:37+01:00' to '2022-03' + month = pd.to_datetime(timestamp).strftime("%Y-%m") + data.append((month, author)) + + df = pd.DataFrame(data, columns=["Month", "Author"]) + + # Generates a bar plot of commits per contributor per month + commit_counts = df.groupby(["Month", "Author"]).size().unstack(fill_value=0) + commit_counts.plot(kind="bar", stacked=True, figsize=(12, 6), colormap="Dark2", width=0.8) + plt.title("Commits per Contributor per Month") + plt.xlabel("Month") + plt.ylabel("Number of Commits") + plt.legend(title="Contributor", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.xticks(rotation=45) + plt.tight_layout() + plt.savefig(output_file) + + def analyze_commits(repo_dir, account_mapping): """ Analyze commit activity for a given repository, applying account mapping. diff --git a/src/generate_md_report.py b/src/generate_md_report.py index e5eab6a..712fb49 100644 --- a/src/generate_md_report.py +++ b/src/generate_md_report.py @@ -27,6 +27,7 @@ def generate_md_report_text(repo_data: dict, account_mapping: dict): report += f"**Repository URL:** {repo_data.get('repository_url', 'N/A')}\n\n" report += "## Commits\n" report += f"**Total Commits:** {repo_data.get('total_commits', 'N/A')}\n\n" + report += f"![Activity](../../{repo_data.get('activity_image_path')})\n\n" if "loc_data" in repo_data: loc_data = repo_data["loc_data"] From 54d13823e3c760553427e17dde744b3e3abcfc79 Mon Sep 17 00:00:00 2001 From: "tom.mansion" Date: Thu, 6 Feb 2025 15:01:24 +0100 Subject: [PATCH 2/2] Added yearly commit nb plot --- src/analyze_commits.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/analyze_commits.py b/src/analyze_commits.py index ad15008..a33a8e3 100644 --- a/src/analyze_commits.py +++ b/src/analyze_commits.py @@ -106,18 +106,22 @@ def generate_commits_distribution_plot(repo_analysis, output_file): import matplotlib.pyplot as plt # Processes commit log data into a DataFrame. - data = [] + monthly_data = [] + yearly_data = [] for entry in repo_analysis["commit_dates"]: author, timestamp = entry # Convert "2022-03-07 17:41:37+01:00' to '2022-03' month = pd.to_datetime(timestamp).strftime("%Y-%m") - data.append((month, author)) + monthly_data.append((month, author)) + year = pd.to_datetime(timestamp).strftime("%Y") + yearly_data.append((year, author)) - df = pd.DataFrame(data, columns=["Month", "Author"]) + monthly_df = pd.DataFrame(monthly_data, columns=["Month", "Author"]) + yearly_df = pd.DataFrame(yearly_data, columns=["Year", "Author"]) # Generates a bar plot of commits per contributor per month - commit_counts = df.groupby(["Month", "Author"]).size().unstack(fill_value=0) - commit_counts.plot(kind="bar", stacked=True, figsize=(12, 6), colormap="Dark2", width=0.8) + commit_counts = monthly_df.groupby(["Month", "Author"]).size().unstack(fill_value=0) + commit_counts.plot(kind="bar", stacked=True, figsize=(16, 6), colormap="Dark2", width=0.8) plt.title("Commits per Contributor per Month") plt.xlabel("Month") plt.ylabel("Number of Commits") @@ -126,6 +130,17 @@ def generate_commits_distribution_plot(repo_analysis, output_file): plt.tight_layout() plt.savefig(output_file) + # Generates a bar plot of commits per contributor per year + commit_counts = yearly_df.groupby(["Year", "Author"]).size().unstack(fill_value=0) + commit_counts.plot(kind="bar", stacked=True, figsize=(16, 3), colormap="Dark2", width=0.8) + plt.title("Commits per Contributor per Year") + plt.xlabel("Year") + plt.ylabel("Number of Commits") + plt.legend(title="Contributor", bbox_to_anchor=(1.05, 1), loc='upper left') + plt.xticks(rotation=45) + plt.tight_layout() + plt.savefig(output_file.replace(".png", "_yearly.png")) + def analyze_commits(repo_dir, account_mapping): """