33import argparse
44import github
55import github .Workflow
6+ import github .WorkflowRun
67import matplotlib .pyplot as plt
78import os
89import re
4041"""
4142
4243
44+ class Memprof_Run :
45+ def __init__ (self , run : github .WorkflowRun .WorkflowRun ):
46+ self .head_branch = run .head_branch
47+ self .run_number = run .run_number
48+
49+ def add_artefact (self , artefact : zipfile .ZipFile ):
50+ self .artefact = artefact
51+
52+ def __str__ (self ) -> str :
53+ return f"{ self .run_number } - { self .head_branch } "
54+
55+
4356def download_artefact (url : str ) -> zipfile .ZipFile | None :
4457 """
4558 PyGithub does not support retrieving artefacts into buffers, so we have to resort
@@ -66,22 +79,21 @@ def download_artefact(url: str) -> zipfile.ZipFile | None:
6679
6780def get_artefacts (
6881 nruns : int , workflow : github .Workflow .Workflow , artefact : str , filter : list [str ]
69- ) -> dict [str , zipfile .ZipFile ]:
70- irun = 0
82+ ) -> dict [int , Memprof_Run ]:
7183 runs = {}
7284 for run in workflow .get_runs (status = "success" ):
7385 if filter :
7486 if run .head_branch not in filter :
7587 continue
76- k = f" { run . run_number } - { run . head_branch } "
88+ mpr = Memprof_Run ( run )
7789 for gha in run .get_artifacts ():
7890 if gha .name == artefact :
7991 artefact_data = download_artefact (gha .archive_download_url )
8092 if artefact_data :
81- runs [ k ] = artefact_data
82- irun += 1
93+ mpr . add_artefact ( artefact_data )
94+ runs [ run . run_number ] = mpr
8395 break
84- if irun == nruns :
96+ if len ( runs ) == nruns :
8597 break
8698 return runs
8799
@@ -107,6 +119,26 @@ def __exit__(self, type, value, traceback):
107119 self .tmpfile .close ()
108120
109121
122+ def check_memory_anomaly (category : str , test_name : str , rss : dict [int , list [float ]], times : dict [int , list [float ]]):
123+ """Check for unusually high memory usage in the latest test
124+
125+ For every test with a >60 second runtime, compare the maximum memory usage of the most recent
126+ test and issue a warning via github annotation if the memory usage is more than 20% above the
127+ average of the n previous runs.
128+ """
129+ if any ([max (tl ) <= 60.0 for tl in times .values ()]):
130+ return
131+
132+ max_mems = {i : max (rl ) for i , rl in rss .items ()}
133+ latest_run_no = max (max_mems .keys ())
134+
135+ avg_mem = sum ([m for i , m in max_mems .items () if i != latest_run_no ]) / (len (max_mems ) - 1 )
136+ if max_mems [latest_run_no ] > 1.2 * avg_mem :
137+ print (
138+ f"::warning title=High Memory Usage::Latest run of { category } : { test_name } ({ latest_run_no } ) has memory usage over 20% higher than the average for this test { max_mems [latest_run_no ]:.2f} GB > { avg_mem :.2f} GB"
139+ )
140+
141+
110142def main ():
111143 if gh_token == "BAD_KEY" :
112144 raise KeyError ("GH_TOKEN must be set in environment" )
@@ -159,8 +191,8 @@ def main():
159191 d_cat = {}
160192 d_names = {}
161193
162- for runid , zf in runs .items ():
163- with Zip_to_sql_conn (zf ) as conn :
194+ for runid , mpr in runs .items ():
195+ with Zip_to_sql_conn (mpr . artefact ) as conn :
164196 cur = conn .cursor ()
165197 cur .execute (get_all_cmds_query )
166198 for cmd , cat in cur .fetchall ():
@@ -179,11 +211,12 @@ def main():
179211 d_rss [f"{ cat } _{ cmd } " ][runid ].append (rss )
180212
181213 for k , v in d_rss .items ():
214+ check_memory_anomaly (d_cat [k ], k , v , d_times [k ])
182215 os .makedirs (f"{ ns .outdir } /{ d_cat [k ]} " , exist_ok = True )
183216 fig , ax = plt .subplots ()
184217 for runid in runs :
185218 if runid in v :
186- ax .plot (d_times [k ][runid ], v [runid ], label = f"Run { runid } " )
219+ ax .plot (d_times [k ][runid ], v [runid ], label = f"Run { runs [ runid ] } " )
187220 ax .set_xlabel ("Time (seconds)" )
188221 ax .set_ylabel ("Memory usage (GB)" )
189222 ax .set_ylim (ymin = 0.0 )
0 commit comments