55from microbiorust import gbk
66from codecarbon import OfflineEmissionsTracker
77
8+ #warnings.simplefilter('ignore', BioPythonWarning)
9+
810class PipelineSuite :
911 """
1012 Benchmarks for microbiorust-py vs BioPython.
1113 Measures:
1214 - Parsing Time
1315 - Peak Memory
1416 - Latency
15- - Energy Consumption (CodeCarbon)
17+ - Energy Consumption (CodeCarbon) using loop iterations
1618 """
1719
1820 timeout = 300 # 5 minutes per benchmark
@@ -35,55 +37,53 @@ def setup(self, engine, context):
3537 # Initialize energy dictionary if not already present
3638 if not hasattr (self , "_energy_joules" ):
3739 self ._energy_joules = {}
40+
41+ def _run_once (self , engine , context ):
42+ if context == 'interactive' :
43+ if engine == 'rust' :
44+ return gbk .gbk_to_faa_count (self .filepath )
45+ else :
46+ count = 0
47+ for record in SeqIO .parse (self .filepath , "genbank" ):
48+ for feature in record .features :
49+ if feature .type == "CDS" :
50+ _ = feature .extract (record .seq ).translate ()
51+ count += 1
52+ return count
53+ else : # pipeline context
54+ cli_cmd = self .rust_cli if engine == 'rust' else self .python_cli
55+ return subprocess .run (
56+ ["python" , cli_cmd , self .filepath ],
57+ capture_output = True , check = True , timeout = self .timeout
58+ ).returncode
59+
60+ # --- RUN REPEATEDLY for codecarbon otherwise the script was finishing before it could be measured ---
61+ def _run_repeatedly (self , engine , context , iterations ):
62+ """Calls the logic many times to make energy measurable."""
63+ last_result = None
64+ for _ in range (iterations ):
65+ last_result = self ._run_once (engine , context )
66+ return last_result
3867
3968 # --- CORE LOGIC WITH CODECARBON ---
40- def _run_logic (self , engine , context ):
69+ def track_energy (self , engine , context ):
4170 """
4271 Routes execution based on engine/context and tracks energy with CodeCarbon.
4372 Stores last measured energy per engine in self._energy_joules.
4473 """
4574 os .environ ["CODECARBON_CARBON_INTENSITY" ] = "475"
46- tracker = OfflineEmissionsTracker (measure_power_secs = 1 , log_level = "CRITICAL" , country_iso_code = "USA" )
75+ tracker = OfflineEmissionsTracker (measure_power_secs = 0. 1 , log_level = "CRITICAL" , country_iso_code = "USA" )
4776 tracker .start ()
4877 iterations = 500 if engine == 'rust' else 50
4978 result = None
50-
5179 try :
52- for _ in range (iterations ):
53- # --- DISPATCH ---
54- if context == 'interactive' :
55- if engine == 'rust' :
56- result = gbk .gbk_to_faa_count (self .filepath )
57- else :
58- count = 0
59- for record in SeqIO .parse (self .filepath , "genbank" ):
60- for feature in record .features :
61- if feature .type == "CDS" :
62- _ = feature .extract (record .seq ).translate ()
63- count += 1
64- result = count
65-
66- elif context == 'pipeline' :
67- if engine == 'rust' :
68- result = subprocess .run (
69- ["python" , self .rust_cli , self .filepath ],
70- capture_output = True , check = True , timeout = self .timeout
71- ).returncode
72- else :
73- result = subprocess .run (
74- ["python" , self .python_cli , self .filepath ],
75- capture_output = True , check = True , timeout = self .timeout
76- ).returncode
77- else :
78- raise ValueError (f"unknown context: { context } " )
79-
80+ self ._run_repeatedly (engine , context , iterations )
8081 finally :
8182 tracker .stop ()
82- energy_kwh = getattr (tracker , "total_energy" , 0 )
83- # Store energy per engine in Joules
84- self ._energy_joules [f"{ engine } _{ context } " ] = (energy_kwh * 3_600_000 )/ iterations # Joules/iterations
8583
86- return result
84+ energy_kwh = getattr (tracker , "total_energy" , 0 )
85+ # Store energy per engine in Joules
86+ return (energy_kwh * 3_600_000 )/ iterations # Joules/iterations
8787
8888 # --- 1. PRIMARY TIME BENCHMARK (ASV automatic) ---
8989 def time_process_all (self , engine , context ):
0 commit comments