From 4178003f9c3c74b2632a47c38c30b552da0e1adc Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Dec 2025 06:46:03 +0900 Subject: [PATCH 1/5] GH-48623: [CI][Archery][Dev] Add missing headers to email reports --- dev/archery/archery/ci/cli.py | 12 ++++++---- dev/archery/archery/crossbow/cli.py | 23 ++++++++++++------- dev/archery/archery/crossbow/reports.py | 6 +++-- .../crossbow/tests/fixtures/email-report.txt | 4 ++++ .../archery/crossbow/tests/test_reports.py | 7 ++++-- .../templates/email_nightly_report.txt.j2 | 10 ++++---- .../templates/email_token_expiration.txt.j2 | 6 ++++- .../templates/email_workflow_report.txt.j2 | 10 ++++---- 8 files changed, 53 insertions(+), 25 deletions(-) diff --git a/dev/archery/archery/ci/cli.py b/dev/archery/archery/ci/cli.py index bf7b68d5327..5d34fd582ca 100644 --- a/dev/archery/archery/ci/cli.py +++ b/dev/archery/archery/ci/cli.py @@ -16,6 +16,7 @@ # under the License. import click +import email.utils from .core import Workflow from ..crossbow.reports import ChatReport, EmailReport, ReportUtils @@ -105,12 +106,15 @@ def report_email(obj, workflow_id, sender_name, sender_email, recipient_email, """ output = obj['output'] + workflow = Workflow(workflow_id, repository, + ignore_job=ignore, gh_token=obj['github_token']) email_report = EmailReport( - report=Workflow(workflow_id, repository, - ignore_job=ignore, gh_token=obj['github_token']), - sender_name=sender_name, + date=email.utils.formatdate(workflow.datetime), + message_id=email.utils.make_msgid(), + recipient_email=recipient_email, + report=workflow, sender_email=sender_email, - recipient_email=recipient_email + sender_name=sender_name, ) if send: diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index c73c4d1ff7e..48afbee0988 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,6 +16,7 @@ # under the License. from datetime import date +import email.utils from pathlib import Path import time import sys @@ -382,11 +383,14 @@ def report(obj, job_name, sender_name, sender_email, recipient_email, queue.fetch() job = queue.get(job_name) + report = Report(job) email_report = EmailReport( - report=Report(job), - sender_name=sender_name, + date=email.utils.formatdate(), + message_id=email.utils.make_msgid(), + recipient_email=recipient_email, + report=report, sender_email=sender_email, - recipient_email=recipient_email + sender_name=sender_name, ) if poll: @@ -645,15 +649,18 @@ def __init__(self, token_expiration_date, days_left): self.token_expiration_date = token_expiration_date self.days_left = days_left + report = TokenExpirationReport( + token_expiration_date or "ALREADY_EXPIRED", days_left) email_report = EmailReport( - report=TokenExpirationReport( - token_expiration_date or "ALREADY_EXPIRED", days_left), - sender_name=sender_name, + date=email.utils.formatdate(), + message_id=email.utils.make_msgid(), + recipient_email=recipient_email, + report=report, sender_email=sender_email, - recipient_email=recipient_email + sender_name=sender_name, ) - message = email_report.render("token_expiration").strip() + message = email_report.render("token_expiration") if send: ReportUtils.send_email( smtp_user=smtp_user, diff --git a/dev/archery/archery/crossbow/reports.py b/dev/archery/archery/crossbow/reports.py index 32962410d6e..38d0a66aae6 100644 --- a/dev/archery/archery/crossbow/reports.py +++ b/dev/archery/archery/crossbow/reports.py @@ -277,10 +277,12 @@ class EmailReport(JinjaReport): 'workflow_report': 'email_workflow_report.txt.j2', } fields = [ + 'date', + 'message_id', + 'recipient_email', 'report', - 'sender_name', 'sender_email', - 'recipient_email', + 'sender_name', ] diff --git a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt b/dev/archery/archery/crossbow/tests/fixtures/email-report.txt index c29cafd3938..bab93e6f89d 100644 --- a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt +++ b/dev/archery/archery/crossbow/tests/fixtures/email-report.txt @@ -1,3 +1,7 @@ +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Message-Id: +Date: Mon, 22 Dec 2025 22:12:00 -0000 From: Sender Reporter To: recipient@arrow.com Subject: [NIGHTLY] Arrow Build Report for Job ursabot-1: 2 failed, 1 pending diff --git a/dev/archery/archery/crossbow/tests/test_reports.py b/dev/archery/archery/crossbow/tests/test_reports.py index 620b4c78bbc..829c1e833e6 100644 --- a/dev/archery/archery/crossbow/tests/test_reports.py +++ b/dev/archery/archery/crossbow/tests/test_reports.py @@ -76,9 +76,12 @@ def test_crossbow_email_report(load_fixture): job = load_fixture('crossbow-job.yaml', decoder=yaml.load) report = Report(job) assert report.tasks_by_state is not None - email_report = EmailReport(report=report, sender_name="Sender Reporter", + email_report = EmailReport(date="Mon, 22 Dec 2025 22:12:00 -0000", + message_id="", + recipient_email="recipient@arrow.com", + report=report, sender_email="sender@arrow.com", - recipient_email="recipient@arrow.com") + sender_name="Sender Reporter") assert ( email_report.render("nightly_report") == textwrap.dedent(expected_msg) diff --git a/dev/archery/archery/templates/email_nightly_report.txt.j2 b/dev/archery/archery/templates/email_nightly_report.txt.j2 index bc040734b03..959738467f9 100644 --- a/dev/archery/archery/templates/email_nightly_report.txt.j2 +++ b/dev/archery/archery/templates/email_nightly_report.txt.j2 @@ -15,9 +15,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -#} -{%- if True -%} -{%- endif -%} +-#} +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Message-Id: {{ message_id }} +Date: {{ date }} From: {{ sender_name }} <{{ sender_email }}> To: {{ recipient_email }} Subject: [NIGHTLY] Arrow Build Report for Job {{report.job.branch}}: {{ (report.tasks_by_state["error"] | length) + (report.tasks_by_state["failure"] | length) }} failed, {{ report.tasks_by_state["pending"] | length }} pending @@ -58,4 +60,4 @@ Succeeded Tasks: - {{ task_name }} {{ report.task_url(task) }} {% endfor %} -{%- endif -%} \ No newline at end of file +{%- endif -%} diff --git a/dev/archery/archery/templates/email_token_expiration.txt.j2 b/dev/archery/archery/templates/email_token_expiration.txt.j2 index 54c2005e57e..2af8aaaea85 100644 --- a/dev/archery/archery/templates/email_token_expiration.txt.j2 +++ b/dev/archery/archery/templates/email_token_expiration.txt.j2 @@ -15,7 +15,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -#} +-#} +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Message-Id: {{ message_id }} +Date: {{ date }} From: {{ sender_name }} <{{ sender_email }}> To: {{ recipient_email }} Subject: [CI] Arrow Crossbow Token Expiration in {{ report.token_expiration_date }} diff --git a/dev/archery/archery/templates/email_workflow_report.txt.j2 b/dev/archery/archery/templates/email_workflow_report.txt.j2 index 193856c1806..370eb557ca3 100644 --- a/dev/archery/archery/templates/email_workflow_report.txt.j2 +++ b/dev/archery/archery/templates/email_workflow_report.txt.j2 @@ -15,9 +15,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -#} -{%- if True -%} -{%- endif -%} +-#} +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Message-Id: {{ message_id }} +Date: {{ date }} From: {{ sender_name }} <{{ sender_email }}> To: {{ recipient_email }} Subject: [{{ report.datetime.strftime('%Y-%m-%d') }}] Arrow Build Report for {{ report.name }}: {{ report.failed_jobs() | length }} failed @@ -42,4 +44,4 @@ Succeeded Jobs: - {{ job.name }} {{ job.url }} {% endfor %} -{%- endif -%} \ No newline at end of file +{%- endif -%} From 1a1c9d5d0a73a9d460a0f02a5a77b82f9eac1dac Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 24 Dec 2025 17:08:27 +0900 Subject: [PATCH 2/5] Use email.message.EmailMessage --- dev/archery/archery/ci/cli.py | 28 ++++++---- dev/archery/archery/crossbow/cli.py | 53 +++++++++++-------- dev/archery/archery/crossbow/reports.py | 7 +-- .../crossbow/tests/fixtures/email-report.txt | 8 --- .../archery/crossbow/tests/test_reports.py | 7 +-- .../templates/email_nightly_report.txt.j2 | 8 --- .../templates/email_token_expiration.txt.j2 | 8 --- .../templates/email_workflow_report.txt.j2 | 8 --- 8 files changed, 52 insertions(+), 75 deletions(-) diff --git a/dev/archery/archery/ci/cli.py b/dev/archery/archery/ci/cli.py index 5d34fd582ca..d5c0a2c1f83 100644 --- a/dev/archery/archery/ci/cli.py +++ b/dev/archery/archery/ci/cli.py @@ -16,6 +16,7 @@ # under the License. import click +import email.message import email.utils from .core import Workflow @@ -108,14 +109,23 @@ def report_email(obj, workflow_id, sender_name, sender_email, recipient_email, workflow = Workflow(workflow_id, repository, ignore_job=ignore, gh_token=obj['github_token']) - email_report = EmailReport( - date=email.utils.formatdate(workflow.datetime), - message_id=email.utils.make_msgid(), - recipient_email=recipient_email, - report=workflow, - sender_email=sender_email, - sender_name=sender_name, + email_report = EmailReport(report=workflow) + message = email.message.EmailMessage() + message.set_charset('utf-8') + message['Message-Id'] = email.utils.make_msgid() + message['Date'] = email.utils.formatdate(workflow.datetime) + message['From'] = f'{sender_name} <{sender_email}>' + message['To'] = recipient_email + n_errors = len(workflow.tasks_by_state['error']) + n_failures = len(workflow.tasks_by_state['failure']) + n_pendings = len(workflow.tasks_by_state['pending']) + subject = ( + f'[NIGHTLY] Arrow Build Report for Job {workflow.job.branch}: ' + f'{n_errors + n_failures} failed, ' + f'{n_pendings} pending' ) + message['Subject'] = subject + message.set_content(email_report.render('workflow_report')) if send: ReportUtils.send_email( @@ -124,7 +134,7 @@ def report_email(obj, workflow_id, sender_name, sender_email, recipient_email, smtp_server=smtp_server, smtp_port=smtp_port, recipient_email=recipient_email, - message=email_report.render("workflow_report") + message=message ) else: - output.write(email_report.render("workflow_report")) + output.write(str(message)) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 48afbee0988..62f39ad723f 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,6 +16,7 @@ # under the License. from datetime import date +import email.message import email.utils from pathlib import Path import time @@ -384,14 +385,19 @@ def report(obj, job_name, sender_name, sender_email, recipient_email, job = queue.get(job_name) report = Report(job) - email_report = EmailReport( - date=email.utils.formatdate(), - message_id=email.utils.make_msgid(), - recipient_email=recipient_email, - report=report, - sender_email=sender_email, - sender_name=sender_name, + email_report = EmailReport(report=report) + message = email.message.EmailMessage() + message.set_charset('utf-8') + message['Message-Id'] = email.utils.make_msgid() + message['Date'] = email.utils.formatdate() + message['From'] = f'{sender_name} <{sender_email}>' + message['To'] = recipient_email + date = report.datetime.strftime('%Y-%m-%d') + message['Subject'] = ( + f'[{date}] Arrow Build Report for {report.name}: ' + f'{len(report.failed_jobs())} failed' ) + message.set_content(email_report.render('nightly_report')) if poll: job.wait_until_finished( @@ -406,10 +412,10 @@ def report(obj, job_name, sender_name, sender_email, recipient_email, smtp_server=smtp_server, smtp_port=smtp_port, recipient_email=recipient_email, - message=email_report.render("nightly_report") + message=message ) else: - output.write(email_report.render("nightly_report")) + output.write(str(message)) @crossbow.command() @@ -645,22 +651,25 @@ def notify_token_expiration(obj, days, sender_name, sender_email, return class TokenExpirationReport: - def __init__(self, token_expiration_date, days_left): - self.token_expiration_date = token_expiration_date + def __init__(self, days_left): self.days_left = days_left - report = TokenExpirationReport( - token_expiration_date or "ALREADY_EXPIRED", days_left) - email_report = EmailReport( - date=email.utils.formatdate(), - message_id=email.utils.make_msgid(), - recipient_email=recipient_email, - report=report, - sender_email=sender_email, - sender_name=sender_name, + if not token_expiration_date: + token_expiration_date = 'ALREADY_EXPIRED' + report = TokenExpirationReport(days_left) + email_report = EmailReport(report) + + message = email.message.EmailMessage() + message.set_charset('utf-8') + message['Message-Id'] = email.utils.make_msgid() + message['Date'] = email.utils.formatdate() + message['From'] = f'{sender_name} <{sender_email}>' + message['To'] = recipient_email + message['Subject'] = ( + f'[CI] Arrow Crossbow Token Expiration in {token_expiration_date}' ) + message.set_content(email_report.render('token_expiration')) - message = email_report.render("token_expiration") if send: ReportUtils.send_email( smtp_user=smtp_user, @@ -671,4 +680,4 @@ def __init__(self, token_expiration_date, days_left): message=message ) else: - output.write(message) + output.write(str(message)) diff --git a/dev/archery/archery/crossbow/reports.py b/dev/archery/archery/crossbow/reports.py index 38d0a66aae6..f2efd8623f8 100644 --- a/dev/archery/archery/crossbow/reports.py +++ b/dev/archery/archery/crossbow/reports.py @@ -259,7 +259,7 @@ def send_email(cls, smtp_user, smtp_password, smtp_server, smtp_port, else: smtp.starttls() smtp.login(smtp_user, smtp_password) - smtp.sendmail(smtp_user, recipient_email, message) + smtp.send_message(smtp_user, recipient_email, message) @classmethod def write_csv(cls, report, add_headers=True): @@ -277,12 +277,7 @@ class EmailReport(JinjaReport): 'workflow_report': 'email_workflow_report.txt.j2', } fields = [ - 'date', - 'message_id', - 'recipient_email', 'report', - 'sender_email', - 'sender_name', ] diff --git a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt b/dev/archery/archery/crossbow/tests/fixtures/email-report.txt index bab93e6f89d..16409dc38ef 100644 --- a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt +++ b/dev/archery/archery/crossbow/tests/fixtures/email-report.txt @@ -1,11 +1,3 @@ -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Message-Id: -Date: Mon, 22 Dec 2025 22:12:00 -0000 -From: Sender Reporter -To: recipient@arrow.com -Subject: [NIGHTLY] Arrow Build Report for Job ursabot-1: 2 failed, 1 pending - Arrow Build Report for Job ursabot-1 See https://s3.amazonaws.com/arrow-data/index.html for more information. diff --git a/dev/archery/archery/crossbow/tests/test_reports.py b/dev/archery/archery/crossbow/tests/test_reports.py index 829c1e833e6..4ed40a7793c 100644 --- a/dev/archery/archery/crossbow/tests/test_reports.py +++ b/dev/archery/archery/crossbow/tests/test_reports.py @@ -76,12 +76,7 @@ def test_crossbow_email_report(load_fixture): job = load_fixture('crossbow-job.yaml', decoder=yaml.load) report = Report(job) assert report.tasks_by_state is not None - email_report = EmailReport(date="Mon, 22 Dec 2025 22:12:00 -0000", - message_id="", - recipient_email="recipient@arrow.com", - report=report, - sender_email="sender@arrow.com", - sender_name="Sender Reporter") + email_report = EmailReport(report=report) assert ( email_report.render("nightly_report") == textwrap.dedent(expected_msg) diff --git a/dev/archery/archery/templates/email_nightly_report.txt.j2 b/dev/archery/archery/templates/email_nightly_report.txt.j2 index 959738467f9..7b43d7c867e 100644 --- a/dev/archery/archery/templates/email_nightly_report.txt.j2 +++ b/dev/archery/archery/templates/email_nightly_report.txt.j2 @@ -16,14 +16,6 @@ # specific language governing permissions and limitations # under the License. -#} -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Message-Id: {{ message_id }} -Date: {{ date }} -From: {{ sender_name }} <{{ sender_email }}> -To: {{ recipient_email }} -Subject: [NIGHTLY] Arrow Build Report for Job {{report.job.branch}}: {{ (report.tasks_by_state["error"] | length) + (report.tasks_by_state["failure"] | length) }} failed, {{ report.tasks_by_state["pending"] | length }} pending - Arrow Build Report for Job {{ report.job.branch }} See https://s3.amazonaws.com/arrow-data/index.html for more information. diff --git a/dev/archery/archery/templates/email_token_expiration.txt.j2 b/dev/archery/archery/templates/email_token_expiration.txt.j2 index 2af8aaaea85..096026fa3a2 100644 --- a/dev/archery/archery/templates/email_token_expiration.txt.j2 +++ b/dev/archery/archery/templates/email_token_expiration.txt.j2 @@ -16,14 +16,6 @@ # specific language governing permissions and limitations # under the License. -#} -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Message-Id: {{ message_id }} -Date: {{ date }} -From: {{ sender_name }} <{{ sender_email }}> -To: {{ recipient_email }} -Subject: [CI] Arrow Crossbow Token Expiration in {{ report.token_expiration_date }} - The Arrow Crossbow Token will expire in {{ report.days_left }} days. Please generate a new Token. Send it to Apache INFRA to update the CROSSBOW_GITHUB_TOKEN. diff --git a/dev/archery/archery/templates/email_workflow_report.txt.j2 b/dev/archery/archery/templates/email_workflow_report.txt.j2 index 370eb557ca3..6668d6c67ee 100644 --- a/dev/archery/archery/templates/email_workflow_report.txt.j2 +++ b/dev/archery/archery/templates/email_workflow_report.txt.j2 @@ -16,14 +16,6 @@ # specific language governing permissions and limitations # under the License. -#} -MIME-Version: 1.0 -Content-Type: text/plain; charset="utf-8" -Message-Id: {{ message_id }} -Date: {{ date }} -From: {{ sender_name }} <{{ sender_email }}> -To: {{ recipient_email }} -Subject: [{{ report.datetime.strftime('%Y-%m-%d') }}] Arrow Build Report for {{ report.name }}: {{ report.failed_jobs() | length }} failed - Arrow Build Report for {{ report.name }} Workflow URL: {{ report.url }} From f869412cc644cc9d29b7613dabd9c3560330f703 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 1 Jan 2026 11:22:45 +0900 Subject: [PATCH 3/5] Unify email.message.EmailMessage generation --- dev/archery/archery/ci/cli.py | 16 ++++----- dev/archery/archery/crossbow/cli.py | 33 +++++++++---------- dev/archery/archery/crossbow/reports.py | 14 ++++++++ .../crossbow/tests/fixtures/email-report.txt | 9 +++++ .../archery/crossbow/tests/test_reports.py | 10 +++++- 5 files changed, 54 insertions(+), 28 deletions(-) diff --git a/dev/archery/archery/ci/cli.py b/dev/archery/archery/ci/cli.py index d5c0a2c1f83..9979f576ab8 100644 --- a/dev/archery/archery/ci/cli.py +++ b/dev/archery/archery/ci/cli.py @@ -16,7 +16,6 @@ # under the License. import click -import email.message import email.utils from .core import Workflow @@ -110,12 +109,6 @@ def report_email(obj, workflow_id, sender_name, sender_email, recipient_email, workflow = Workflow(workflow_id, repository, ignore_job=ignore, gh_token=obj['github_token']) email_report = EmailReport(report=workflow) - message = email.message.EmailMessage() - message.set_charset('utf-8') - message['Message-Id'] = email.utils.make_msgid() - message['Date'] = email.utils.formatdate(workflow.datetime) - message['From'] = f'{sender_name} <{sender_email}>' - message['To'] = recipient_email n_errors = len(workflow.tasks_by_state['error']) n_failures = len(workflow.tasks_by_state['failure']) n_pendings = len(workflow.tasks_by_state['pending']) @@ -124,8 +117,13 @@ def report_email(obj, workflow_id, sender_name, sender_email, recipient_email, f'{n_errors + n_failures} failed, ' f'{n_pendings} pending' ) - message['Subject'] = subject - message.set_content(email_report.render('workflow_report')) + headers = { + 'Date': email.utils.formatdate(workflow.datetime), + 'From': f'{sender_name} <{sender_email}>', + 'To': recipient_email, + 'Subject': subject, + } + message = email_report.render('workflow_report', headers) if send: ReportUtils.send_email( diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 62f39ad723f..3da86943ce8 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,7 +16,6 @@ # under the License. from datetime import date -import email.message import email.utils from pathlib import Path import time @@ -386,18 +385,18 @@ def report(obj, job_name, sender_name, sender_email, recipient_email, job = queue.get(job_name) report = Report(job) email_report = EmailReport(report=report) - message = email.message.EmailMessage() - message.set_charset('utf-8') - message['Message-Id'] = email.utils.make_msgid() - message['Date'] = email.utils.formatdate() - message['From'] = f'{sender_name} <{sender_email}>' - message['To'] = recipient_email date = report.datetime.strftime('%Y-%m-%d') - message['Subject'] = ( + subject = ( f'[{date}] Arrow Build Report for {report.name}: ' f'{len(report.failed_jobs())} failed' ) - message.set_content(email_report.render('nightly_report')) + headers = { + 'Date': email.utils.formatdate(report.datetime), + 'From': f'{sender_name} <{sender_email}>', + 'To': recipient_email, + 'Subject': subject, + } + message = email_report.render('nightly_report', headers) if poll: job.wait_until_finished( @@ -658,17 +657,15 @@ def __init__(self, days_left): token_expiration_date = 'ALREADY_EXPIRED' report = TokenExpirationReport(days_left) email_report = EmailReport(report) - - message = email.message.EmailMessage() - message.set_charset('utf-8') - message['Message-Id'] = email.utils.make_msgid() - message['Date'] = email.utils.formatdate() - message['From'] = f'{sender_name} <{sender_email}>' - message['To'] = recipient_email - message['Subject'] = ( + subject = ( f'[CI] Arrow Crossbow Token Expiration in {token_expiration_date}' ) - message.set_content(email_report.render('token_expiration')) + headers = { + 'From': f'{sender_name} <{sender_email}>', + 'To': recipient_email, + 'Subject': subject, + } + message = email_report.render('token_expiration', headers) if send: ReportUtils.send_email( diff --git a/dev/archery/archery/crossbow/reports.py b/dev/archery/archery/crossbow/reports.py index f2efd8623f8..fa87c707c91 100644 --- a/dev/archery/archery/crossbow/reports.py +++ b/dev/archery/archery/crossbow/reports.py @@ -17,6 +17,8 @@ import collections import csv +import email.message +import email.utils import operator import fnmatch import functools @@ -280,6 +282,18 @@ class EmailReport(JinjaReport): 'report', ] + def render(self, template_name, headers): + message = email.message.EmailMessage() + message.set_charset('utf-8') + if 'Message-Id' not in headers: + message['Message-Id'] = email.utils.make_msgid() + if 'Date' not in headers: + message['Date'] = email.utils.formatdate() + for (key, value) in headers.items(): + message[key] = value + message.set_content(super().render(template_name)) + return message + class CommentReport(Report): diff --git a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt b/dev/archery/archery/crossbow/tests/fixtures/email-report.txt index 16409dc38ef..c8eaaef72ee 100644 --- a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt +++ b/dev/archery/archery/crossbow/tests/fixtures/email-report.txt @@ -1,3 +1,12 @@ +MIME-Version: 1.0 +Message-Id: +Date: Thu, 01 Jan 2026 02:19:16 -0000 +From: from@example.com +To: to@example.com +Subject: Arrow Build Report +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + Arrow Build Report for Job ursabot-1 See https://s3.amazonaws.com/arrow-data/index.html for more information. diff --git a/dev/archery/archery/crossbow/tests/test_reports.py b/dev/archery/archery/crossbow/tests/test_reports.py index 4ed40a7793c..1711a51bbcd 100644 --- a/dev/archery/archery/crossbow/tests/test_reports.py +++ b/dev/archery/archery/crossbow/tests/test_reports.py @@ -77,9 +77,17 @@ def test_crossbow_email_report(load_fixture): report = Report(job) assert report.tasks_by_state is not None email_report = EmailReport(report=report) + headers = { + 'Message-Id': '', + 'Date': 'Thu, 01 Jan 2026 02:19:16 -0000', + 'From': 'from@example.com', + 'To': 'to@example.com', + 'Subject': 'Arrow Build Report', + } assert ( - email_report.render("nightly_report") == textwrap.dedent(expected_msg) + str(email_report.render("nightly_report", headers)) == + textwrap.dedent(expected_msg) ) From ce6b347ff434aed0f781cf320f3da77a98402d2e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 10 Jan 2026 07:01:46 +0900 Subject: [PATCH 4/5] Create EmailReport subclass per report It's for encapsulating header generation code to report an object. --- dev/archery/archery/ci/cli.py | 42 ++++++----- dev/archery/archery/crossbow/cli.py | 71 +++++++++++-------- dev/archery/archery/crossbow/reports.py | 48 +++++++++---- ...il-report.txt => nightly-email-report.txt} | 8 +-- .../token-expiration-email-report.txt | 14 ++++ .../archery/crossbow/tests/test_reports.py | 63 +++++++++++----- .../templates/email_token_expiration.txt.j2 | 5 +- 7 files changed, 164 insertions(+), 87 deletions(-) rename dev/archery/archery/crossbow/tests/fixtures/{email-report.txt => nightly-email-report.txt} (80%) create mode 100644 dev/archery/archery/crossbow/tests/fixtures/token-expiration-email-report.txt diff --git a/dev/archery/archery/ci/cli.py b/dev/archery/archery/ci/cli.py index 9979f576ab8..5597dff733e 100644 --- a/dev/archery/archery/ci/cli.py +++ b/dev/archery/archery/ci/cli.py @@ -16,7 +16,6 @@ # under the License. import click -import email.utils from .core import Workflow from ..crossbow.reports import ChatReport, EmailReport, ReportUtils @@ -74,6 +73,22 @@ def report_chat(obj, workflow_id, send, repository, ignore, webhook, output.write(report_chat.render("workflow_report")) +class WorkflowEmailReport(EmailReport): + def __init__(self, **kwargs): + super().__init__('workflow_report', **kwargs) + + def date(self): + return self.report.datetime + + def subject(self): + workflow = self.report + date = self.date().strftime('%Y-%m-%d') + return ( + f'[{date}] Arrow Build Report for Job {workflow.name}: ' + f'{len(workflow.failed_jobs())} failed' + ) + + @ci.command() @click.argument('workflow_id', required=True) @click.option('--sender-name', '-n', @@ -108,22 +123,12 @@ def report_email(obj, workflow_id, sender_name, sender_email, recipient_email, workflow = Workflow(workflow_id, repository, ignore_job=ignore, gh_token=obj['github_token']) - email_report = EmailReport(report=workflow) - n_errors = len(workflow.tasks_by_state['error']) - n_failures = len(workflow.tasks_by_state['failure']) - n_pendings = len(workflow.tasks_by_state['pending']) - subject = ( - f'[NIGHTLY] Arrow Build Report for Job {workflow.job.branch}: ' - f'{n_errors + n_failures} failed, ' - f'{n_pendings} pending' + email_report = WorkflowEmailReport( + report=workflow, + sender_name=sender_name, + sender_email=sender_email, + recipient_email=recipient_email ) - headers = { - 'Date': email.utils.formatdate(workflow.datetime), - 'From': f'{sender_name} <{sender_email}>', - 'To': recipient_email, - 'Subject': subject, - } - message = email_report.render('workflow_report', headers) if send: ReportUtils.send_email( @@ -131,8 +136,7 @@ def report_email(obj, workflow_id, sender_name, sender_email, recipient_email, smtp_password=smtp_password, smtp_server=smtp_server, smtp_port=smtp_port, - recipient_email=recipient_email, - message=message + report=email_report ) else: - output.write(str(message)) + output.write(str(email_report.render())) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 3da86943ce8..10aa3dedf44 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,7 +16,6 @@ # under the License. from datetime import date -import email.utils from pathlib import Path import time import sys @@ -344,6 +343,22 @@ def latest_prefix(obj, prefix, fetch): click.echo(latest.branch) +class NightlyEmailReport(EmailReport): + def __init__(self, **kwargs): + super().__init__('nightly_report', **kwargs) + + def subject(self): + report = self.report + n_errors = len(report.tasks_by_state['error']) + n_failures = len(report.tasks_by_state['failure']) + n_pendings = len(report.tasks_by_state['pending']) + return ( + f'[NIGHTLY] Arrow Build Report for Job {report.job.branch}: ' + f'{n_errors + n_failures} failed, ' + f'{n_pendings} pending' + ) + + @crossbow.command() @click.argument('job-name', required=True) @click.option('--sender-name', '-n', @@ -384,19 +399,12 @@ def report(obj, job_name, sender_name, sender_email, recipient_email, job = queue.get(job_name) report = Report(job) - email_report = EmailReport(report=report) - date = report.datetime.strftime('%Y-%m-%d') - subject = ( - f'[{date}] Arrow Build Report for {report.name}: ' - f'{len(report.failed_jobs())} failed' + email_report = NightlyEmailReport( + report=report, + sender_name=sender_name, + sender_email=sender_email, + recipient_email=recipient_email ) - headers = { - 'Date': email.utils.formatdate(report.datetime), - 'From': f'{sender_name} <{sender_email}>', - 'To': recipient_email, - 'Subject': subject, - } - message = email_report.render('nightly_report', headers) if poll: job.wait_until_finished( @@ -410,11 +418,10 @@ def report(obj, job_name, sender_name, sender_email, recipient_email, smtp_password=smtp_password, smtp_server=smtp_server, smtp_port=smtp_port, - recipient_email=recipient_email, - message=message + report=email_report ) else: - output.write(str(message)) + output.write(str(email_report.render())) @crossbow.command() @@ -610,6 +617,17 @@ def batch_gen(iterable, step): print(batch) +class TokenExpirationEmailReport(EmailReport): + def __init__(self, **kwargs): + super().__init__('token_expiration', **kwargs) + + def subject(self): + token_expiration_date = self.report.token_expiration_date + return ( + f'[CI] Arrow Crossbow Token Expiration in {token_expiration_date}' + ) + + @crossbow.command() @click.option('--days', default=30, help='Notification will be sent if expiration date is ' @@ -650,22 +668,14 @@ def notify_token_expiration(obj, days, sender_name, sender_email, return class TokenExpirationReport: - def __init__(self, days_left): + def __init__(self, token_expiration_date, days_left): + self.token_expiration_date = token_expiration_date self.days_left = days_left if not token_expiration_date: token_expiration_date = 'ALREADY_EXPIRED' - report = TokenExpirationReport(days_left) - email_report = EmailReport(report) - subject = ( - f'[CI] Arrow Crossbow Token Expiration in {token_expiration_date}' - ) - headers = { - 'From': f'{sender_name} <{sender_email}>', - 'To': recipient_email, - 'Subject': subject, - } - message = email_report.render('token_expiration', headers) + report = TokenExpirationReport(token_expiration_date, days_left) + email_report = TokenExpirationEmailReport(report) if send: ReportUtils.send_email( @@ -673,8 +683,7 @@ def __init__(self, days_left): smtp_password=smtp_password, smtp_server=smtp_server, smtp_port=smtp_port, - recipient_email=recipient_email, - message=message + report=email_report ) else: - output.write(str(message)) + output.write(str(email_report.render())) diff --git a/dev/archery/archery/crossbow/reports.py b/dev/archery/archery/crossbow/reports.py index fa87c707c91..a2c0487a2b1 100644 --- a/dev/archery/archery/crossbow/reports.py +++ b/dev/archery/archery/crossbow/reports.py @@ -17,6 +17,8 @@ import collections import csv +import datetime +import email.headerregistry import email.message import email.utils import operator @@ -248,7 +250,7 @@ def send_message(cls, webhook, message): @classmethod def send_email(cls, smtp_user, smtp_password, smtp_server, smtp_port, - recipient_email, message): + report): from smtplib import SMTP, SMTP_SSL if smtp_port == 465: @@ -261,7 +263,8 @@ def send_email(cls, smtp_user, smtp_password, smtp_server, smtp_port, else: smtp.starttls() smtp.login(smtp_user, smtp_password) - smtp.send_message(smtp_user, recipient_email, message) + message = report.render() + smtp.send_message(smtp_user, report.recipient_email, message) @classmethod def write_csv(cls, report, add_headers=True): @@ -273,25 +276,40 @@ def write_csv(cls, report, add_headers=True): class EmailReport(JinjaReport): - templates = { - 'nightly_report': 'email_nightly_report.txt.j2', - 'token_expiration': 'email_token_expiration.txt.j2', - 'workflow_report': 'email_workflow_report.txt.j2', - } fields = [ 'report', + 'sender_name', + 'sender_email', + 'recipient_email', ] - def render(self, template_name, headers): + def __init__(self, template_name, **kwargs): + self._template_name = template_name + super().__init__(**kwargs) + + @property + def templates(self): + return { + self._template_name: f'email_{self._template_name}.txt.j2', + } + + def date(self): + return None + + def render(self): message = email.message.EmailMessage() message.set_charset('utf-8') - if 'Message-Id' not in headers: - message['Message-Id'] = email.utils.make_msgid() - if 'Date' not in headers: - message['Date'] = email.utils.formatdate() - for (key, value) in headers.items(): - message[key] = value - message.set_content(super().render(template_name)) + message['Message-Id'] = email.utils.make_msgid() + date = self.date() + if isinstance(date, datetime.datetime): + message['Date'] = date + else: + message['Date'] = email.utils.formatdate(date) + message['From'] = email.headerregistry.Address( + self.sender_name, addr_spec=self.sender_email) + message['To'] = email.headerregistry.Address(addr_spec=self.recipient_email) + message['Subject'] = self.subject() + message.set_content(super().render(self._template_name)) return message diff --git a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt b/dev/archery/archery/crossbow/tests/fixtures/nightly-email-report.txt similarity index 80% rename from dev/archery/archery/crossbow/tests/fixtures/email-report.txt rename to dev/archery/archery/crossbow/tests/fixtures/nightly-email-report.txt index c8eaaef72ee..5e7b8e9c67d 100644 --- a/dev/archery/archery/crossbow/tests/fixtures/email-report.txt +++ b/dev/archery/archery/crossbow/tests/fixtures/nightly-email-report.txt @@ -1,9 +1,9 @@ MIME-Version: 1.0 Message-Id: -Date: Thu, 01 Jan 2026 02:19:16 -0000 -From: from@example.com -To: to@example.com -Subject: Arrow Build Report +Date: date +From: Sender Reporter +To: recipient@arrow.com +Subject: [NIGHTLY] Arrow Build Report for Job ursabot-1: 2 failed, 1 pending Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit diff --git a/dev/archery/archery/crossbow/tests/fixtures/token-expiration-email-report.txt b/dev/archery/archery/crossbow/tests/fixtures/token-expiration-email-report.txt new file mode 100644 index 00000000000..1f8ccbf30c6 --- /dev/null +++ b/dev/archery/archery/crossbow/tests/fixtures/token-expiration-email-report.txt @@ -0,0 +1,14 @@ +MIME-Version: 1.0 +Message-Id: +Date: date +From: Sender Reporter +To: recipient@arrow.com +Subject: [CI] Arrow Crossbow Token Expiration in 2026-01-17 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +The Arrow Crossbow Token will expire in 7 days. + +Please generate a new Token. Send it to Apache INFRA to update the +CROSSBOW_GITHUB_TOKEN. Update it on the crossbow repository and in +the Azure pipelines. diff --git a/dev/archery/archery/crossbow/tests/test_reports.py b/dev/archery/archery/crossbow/tests/test_reports.py index 1711a51bbcd..02012d2f1be 100644 --- a/dev/archery/archery/crossbow/tests/test_reports.py +++ b/dev/archery/archery/crossbow/tests/test_reports.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. +import re import textwrap +from archery.crossbow.cli import (NightlyEmailReport, TokenExpirationEmailReport) from archery.crossbow.core import yaml -from archery.crossbow.reports import (ChatReport, CommentReport, EmailReport, - Report) +from archery.crossbow.reports import (ChatReport, CommentReport, Report) def test_crossbow_comment_formatter(load_fixture): @@ -71,25 +72,55 @@ def test_crossbow_chat_report_extra_message_success(load_fixture): assert report_chat.render("text") == textwrap.dedent(expected_msg) -def test_crossbow_email_report(load_fixture): - expected_msg = load_fixture('email-report.txt') +def test_crossbow_nightly_email_report(load_fixture): + expected_msg = load_fixture('nightly-email-report.txt') job = load_fixture('crossbow-job.yaml', decoder=yaml.load) report = Report(job) assert report.tasks_by_state is not None - email_report = EmailReport(report=report) - headers = { - 'Message-Id': '', - 'Date': 'Thu, 01 Jan 2026 02:19:16 -0000', - 'From': 'from@example.com', - 'To': 'to@example.com', - 'Subject': 'Arrow Build Report', - } - - assert ( - str(email_report.render("nightly_report", headers)) == - textwrap.dedent(expected_msg) + email_report = NightlyEmailReport( + report=report, + sender_name='Sender Reporter', + sender_email='sender@arrow.com', + recipient_email='recipient@arrow.com' ) + actual = str(email_report.render()) + # Normalize dynamic headers + actual = re.sub(r'(?m)^Message-Id: <.+?>', + 'Message-Id: ', + actual) + actual = re.sub(r'(?m)^Date: [^\n]+ -0000$', + 'Date: date', + actual) + assert actual == textwrap.dedent(expected_msg) + + +def test_crossbow_token_expiration_email_report(load_fixture): + expected_msg = load_fixture('token-expiration-email-report.txt') + + class TokenExpirationReport: + def __init__(self, token_expiration_date, days_left): + self.token_expiration_date = token_expiration_date + self.days_left = days_left + + report = TokenExpirationReport('2026-01-17', 7) + email_report = TokenExpirationEmailReport( + report=report, + sender_name='Sender Reporter', + sender_email='sender@arrow.com', + recipient_email='recipient@arrow.com' + ) + + actual = str(email_report.render()) + # Normalize dynamic headers + actual = re.sub(r'(?m)^Message-Id: <.+?>', + 'Message-Id: ', + actual) + actual = re.sub(r'(?m)^Date: [^\n]+ -0000$', + 'Date: date', + actual) + assert actual == textwrap.dedent(expected_msg) + def test_crossbow_export_report(load_fixture): job = load_fixture('crossbow-job.yaml', decoder=yaml.load) diff --git a/dev/archery/archery/templates/email_token_expiration.txt.j2 b/dev/archery/archery/templates/email_token_expiration.txt.j2 index 096026fa3a2..340cb4a5353 100644 --- a/dev/archery/archery/templates/email_token_expiration.txt.j2 +++ b/dev/archery/archery/templates/email_token_expiration.txt.j2 @@ -18,5 +18,6 @@ -#} The Arrow Crossbow Token will expire in {{ report.days_left }} days. -Please generate a new Token. Send it to Apache INFRA to update the CROSSBOW_GITHUB_TOKEN. -Update it on the crossbow repository and in the Azure pipelines. +Please generate a new Token. Send it to Apache INFRA to update the +CROSSBOW_GITHUB_TOKEN. Update it on the crossbow repository and in +the Azure pipelines. From d666d5f9861193d5ca5da9dc1764140a3a623b46 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 10 Jan 2026 07:42:32 +0900 Subject: [PATCH 5/5] Add missing paths --- .github/workflows/cpp_extra.yml | 2 ++ .github/workflows/package_linux.yml | 2 ++ .github/workflows/r_extra.yml | 38 +++++++++++++++-------------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 7ab4c73270d..7844b0b0112 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -39,6 +39,7 @@ on: - 'ci/scripts/util_*' - 'cpp/**' - 'compose.yaml' + - 'dev/archery/archery/**' - 'format/Flight.proto' - 'testing' tags: @@ -61,6 +62,7 @@ on: - 'ci/scripts/util_*' - 'cpp/**' - 'compose.yaml' + - 'dev/archery/archery/**' - 'format/Flight.proto' - 'testing' types: diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 3e4b7592153..1d2ae61f1eb 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -29,6 +29,7 @@ on: - '.github/workflows/report_ci.yml' - 'cpp/**' - 'c_glib/**' + - 'dev/archery/archery/**' - 'dev/release/binary-task.rb' - 'dev/release/verify-apt.sh' - 'dev/release/verify-yum.sh' @@ -43,6 +44,7 @@ on: - '.github/workflows/report_ci.yml' - 'cpp/**' - 'c_glib/**' + - 'dev/archery/archery/**' - 'dev/release/binary-task.rb' - 'dev/release/verify-apt.sh' - 'dev/release/verify-yum.sh' diff --git a/.github/workflows/r_extra.yml b/.github/workflows/r_extra.yml index 687a4e0aa05..443d2354d7f 100644 --- a/.github/workflows/r_extra.yml +++ b/.github/workflows/r_extra.yml @@ -27,15 +27,16 @@ on: - '.github/workflows/check_labels.yml' - '.github/workflows/r_extra.yml' - '.github/workflows/report_ci.yml' - - "ci/docker/**" - - "ci/etc/rprofile" - - "ci/scripts/PKGBUILD" - - "ci/scripts/cpp_*.sh" - - "ci/scripts/install_minio.sh" - - "ci/scripts/r_*.sh" - - "cpp/**" - - "compose.yaml" - - "r/**" + - 'ci/docker/**' + - 'ci/etc/rprofile' + - 'ci/scripts/PKGBUILD' + - 'ci/scripts/cpp_*.sh' + - 'ci/scripts/install_minio.sh' + - 'ci/scripts/r_*.sh' + - 'cpp/**' + - 'compose.yaml' + - 'dev/archery/archery/**' + - 'r/**' tags: - '**' pull_request: @@ -44,15 +45,16 @@ on: - '.github/workflows/check_labels.yml' - '.github/workflows/r_extra.yml' - '.github/workflows/report_ci.yml' - - "ci/docker/**" - - "ci/etc/rprofile" - - "ci/scripts/PKGBUILD" - - "ci/scripts/cpp_*.sh" - - "ci/scripts/install_minio.sh" - - "ci/scripts/r_*.sh" - - "cpp/**" - - "compose.yaml" - - "r/**" + - 'ci/docker/**' + - 'ci/etc/rprofile' + - 'ci/scripts/PKGBUILD' + - 'ci/scripts/cpp_*.sh' + - 'ci/scripts/install_minio.sh' + - 'ci/scripts/r_*.sh' + - 'cpp/**' + - 'compose.yaml' + - 'dev/archery/archery/**' + - 'r/**' types: - labeled - opened