diff --git a/compliance/TEST01/run_verification.py b/compliance/TEST01/run_verification.py index 921e46c70b..9b5d46c3c7 100644 --- a/compliance/TEST01/run_verification.py +++ b/compliance/TEST01/run_verification.py @@ -104,24 +104,24 @@ def main(): unixmode_str = unixmode if unixmode == "" else unixmode + " " # run verify accuracy - verify_accuracy_command = ( - sys.executable + " " - + verify_accuracy_binary - + " --dtype " - + args.dtype - + unixmode_str - + " -r " - + os.path.join(results_dir, "accuracy", "mlperf_log_accuracy.json") - + " -t " - + os.path.join(compliance_dir, "mlperf_log_accuracy.json") - ) + verify_accuracy_command = [ + sys.executable + , verify_accuracy_binary + , "--dtype" + , dtype + , unixmode_str + , "--reference_accuracy" + , os.path.join(results_dir, "accuracy", "mlperf_log_accuracy.json") + , "--test_accuracy" + , os.path.join(compliance_dir, "mlperf_log_accuracy.json")] + verify_accuracy_command = [arg.strip() for arg in verify_accuracy_command if arg.strip() != ""] + try: with open("verify_accuracy.txt", "w") as f: process = subprocess.Popen( verify_accuracy_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - shell=True, text=True ) # Write output to both console and file @@ -145,16 +145,14 @@ def main(): verify_performance_binary = os.path.join( os.path.dirname(__file__), "verify_performance.py" ) - verify_performance_command = ( - sys.executable + " " - + verify_performance_binary - + " -r" - + os.path.join(results_dir, "performance", - "run_1", "mlperf_log_detail.txt") - + " -t" - + os.path.join(compliance_dir, "mlperf_log_detail.txt") - ) - + verify_performance_command = [ + sys.executable, + verify_performance_binary, + "-r", + os.path.join(results_dir, "performance", "run_1", "mlperf_log_detail.txt"), + "-t", + os.path.join(compliance_dir, "mlperf_log_detail.txt") + ] try: with open("verify_performance.txt", "w") as f: process = subprocess.Popen( @@ -162,7 +160,6 @@ def main(): stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, - shell=True, ) # Write output to both console and file for line in process.stdout: diff --git a/compliance/TEST04/run_verification.py b/compliance/TEST04/run_verification.py index 4584e59c46..335dbb9f2d 100644 --- a/compliance/TEST04/run_verification.py +++ b/compliance/TEST04/run_verification.py @@ -57,16 +57,14 @@ def main(): verify_performance_binary = os.path.join( os.path.dirname(__file__), "verify_performance.py" ) - verify_performance_command = ( - sys.executable + " " - + verify_performance_binary - + " -r" - + os.path.join(results_dir, "performance", - "run_1", "mlperf_log_summary.txt") - + " -t" - + os.path.join(compliance_dir, "mlperf_log_summary.txt") - ) - + verify_performance_command = [ + sys.executable, + verify_performance_binary, + "-r", + os.path.join(results_dir, "performance", "run_1", "mlperf_log_summary.txt"), + "-t", + os.path.join(compliance_dir, "mlperf_log_summary.txt") + ] try: with open("verify_performance.txt", "w") as f: process = subprocess.Popen( @@ -74,7 +72,6 @@ def main(): stdout=subprocess.PIPE, # capture output stderr=subprocess.STDOUT, text=True, # decode output as text - shell=True, ) # Write output to both console and file for line in process.stdout: diff --git a/language/bert/accuracy-squad.py b/language/bert/accuracy-squad.py index 514258de38..56c26ddb53 100644 --- a/language/bert/accuracy-squad.py +++ b/language/bert/accuracy-squad.py @@ -514,14 +514,19 @@ def append_feature(feature): ) print("Evaluating predictions...") - cmd = "python3 {:}/evaluate_v1.1.py {:} {:} {}".format( - os.path.dirname(os.path.abspath(__file__)), + cmd = [ + "python3", + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "evaluate_v1.1.py" + ), args.val_data, args.out_file, - "--max_examples {}".format( - args.max_examples) if args.max_examples else "", - ) - subprocess.check_call(cmd, shell=True) + "--max_examples"if args.max_examples else "", + str(args.max_examples) if args.max_examples else "" + ] + cmd = [arg for arg in cmd if arg.strip() != ""] + subprocess.check_call(cmd) if __name__ == "__main__": diff --git a/language/bert/run.py b/language/bert/run.py index 5a35148904..58451e6b3b 100644 --- a/language/bert/run.py +++ b/language/bert/run.py @@ -99,7 +99,6 @@ def get_args(): def main(): args = get_args() - sut = None if not args.network or args.network == "sut": @@ -199,15 +198,19 @@ def main(): sut.sut, sut.qsl.qsl, settings, log_settings, args.audit_conf ) if args.accuracy and not os.environ.get("SKIP_VERIFY_ACCURACY"): - cmd = "python3 {:}/accuracy-squad.py {}".format( - os.path.dirname(os.path.abspath(__file__)), - ( - "--max_examples {}".format(args.max_examples) - if args.max_examples - else "" + cmd = [ + "python3", + os.path.join( + os.path.dirname(os.path.abspath(__file__)), "accuracy-squad.py" ), - ) - subprocess.check_call(cmd, shell=True) + "--max_examples" if args.max_examples + else "", + args.max_examples + if args.max_examples + else "", + ] + cmd = [arg for arg in cmd if arg.strip() != ""] + subprocess.check_call(cmd) print("Done!") diff --git a/retired_benchmarks/speech_recognition/rnnt/run.py b/retired_benchmarks/speech_recognition/rnnt/run.py index 1a8e4cef2d..8629c8597c 100644 --- a/retired_benchmarks/speech_recognition/rnnt/run.py +++ b/retired_benchmarks/speech_recognition/rnnt/run.py @@ -115,9 +115,18 @@ def main(): ) if args.accuracy: - cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" + cmd = [ + "python3", + "accuracy_eval.py", + "--log_dir", + log_path, + "--dataset_dir", + args.dataset_dir, + "--manifest", + args.manifest, + ] print(f"Running accuracy script: {cmd}") - subprocess.check_call(cmd, shell=True) + subprocess.check_call(cmd) print("Done!") diff --git a/retired_benchmarks/translation/gnmt/tensorflow/run_task.py b/retired_benchmarks/translation/gnmt/tensorflow/run_task.py index b2c0be68fb..0510a1ab0f 100644 --- a/retired_benchmarks/translation/gnmt/tensorflow/run_task.py +++ b/retired_benchmarks/translation/gnmt/tensorflow/run_task.py @@ -88,44 +88,25 @@ outpath = os.path.join(args.output_path, "output", "console_out_gnmt.txt") -cmd = ( - "python -m nmt.nmt \ - --src=en --tgt=de \ - --ckpt=" - + cpk_path - + " \ - --hparams_path=" - + haparams_path - + " \ - --out_dir=" - + out_dir - + " \ - --vocab_prefix=" - + vocab_prefix - + " \ - --inference_input_file=" - + inference_input_file - + " \ - --inference_output_file=" - + inference_output_file - + " \ - --inference_ref_file=" - + inference_ref_file - + " \ - --infer_batch_size=" - + args.batch_size - + " \ - --num_inter_threads=" - + args.num_inter_threads - + " \ - --num_intra_threads=" - + args.num_intra_threads - + " \ - --iterations=" - + str(iterations) - + " \ - --run=" - + args.run -) -return_code = subprocess.call(cmd, shell=True) +cmd = [ + "python", + "-m", + "nmt.nmt", + "--src=en", + "--tgt=de", + "--ckpt={}".format(cpk_path), + "--hparams_path={}".format(haparams_path), + "--out_dir={}".format(out_dir), + "--vocab_prefix={}".format(vocab_prefix), + "--inference_input_file={}".format(inference_input_file), + "--inference_output_file={}".format(inference_output_file), + "--inference_ref_file={}".format(inference_ref_file), + "--infer_batch_size={}".format(args.batch_size), + "--num_inter_threads={}".format(args.num_inter_threads), + "--num_intra_threads={}".format(args.num_intra_threads), + "--iterations={}".format(iterations), + "--run={}".format(args.run) +] + +return_code = subprocess.call(cmd) diff --git a/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py b/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py index 147624b340..ab73c25966 100644 --- a/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py +++ b/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py @@ -46,7 +46,8 @@ def load_prompts(dataset_path): class Model: - def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0): + def __init__(self, model_path, device, config, + prompts, fixed_latent=None, rank=0): self.device = device self.rank = rank self.height = config["height"] @@ -106,7 +107,8 @@ def flush_queries(self): class DebugModel: - def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0): + def __init__(self, model_path, device, config, + prompts, fixed_latent=None, rank=0): self.prompts = prompts def issue_queries(self, query_samples): @@ -186,7 +188,8 @@ def get_args(): parser.add_argument( "--scenario", default="SingleStream", - help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys())), + help="mlperf benchmark scenario, one of " + + str(list(SCENARIO_MAP.keys())), ) parser.add_argument( "--user_conf", @@ -202,7 +205,10 @@ def get_args(): help="performance sample count", default=5000, ) - parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass") + parser.add_argument( + "--accuracy", + action="store_true", + help="enable accuracy pass") # Dont overwrite these for official submission parser.add_argument("--count", type=int, help="dataset items to use") parser.add_argument("--time", type=int, help="time to scan in seconds") @@ -271,7 +277,10 @@ def run_mlperf(args, config): audit_config = os.path.abspath(args.audit_conf) if os.path.exists(audit_config): - settings.FromConfig(audit_config, "wan-2.2-t2v-a14b", args.scenario) + settings.FromConfig( + audit_config, + "wan-2.2-t2v-a14b", + args.scenario) settings.scenario = SCENARIO_MAP[args.scenario] settings.mode = lg.TestMode.PerformanceOnly @@ -297,8 +306,10 @@ def run_mlperf(args, config): if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: - settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) - settings.multi_stream_expected_latency_ns = int(args.max_latency * NANO_SEC) + settings.server_target_latency_ns = int( + args.max_latency * NANO_SEC) + settings.multi_stream_expected_latency_ns = int( + args.max_latency * NANO_SEC) performance_sample_count = ( args.performance_sample_count @@ -311,7 +322,8 @@ def run_mlperf(args, config): count, performance_sample_count, load_query_samples, unload_query_samples ) - lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config) + lg.StartTestWithLogSettings( + sut, qsl, settings, log_settings, audit_config) lg.DestroyQSL(qsl) lg.DestroySUT(sut) diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py index f0cf069924..4d949bb6fb 100644 --- a/tools/submission/generate_final_report.py +++ b/tools/submission/generate_final_report.py @@ -101,11 +101,11 @@ def main(): "singlestream": "SingleStream", "multistream": "MultiStream", "server": "Server", - "interactive":"Interactive", + "interactive": "Interactive", "offline": "Offline", } - df["Scenario"] = df["Scenario"].apply(lambda x: scenario_map.get(str(x).lower(), x)) - + df["Scenario"] = df["Scenario"].apply( + lambda x: scenario_map.get(str(x).lower(), x)) output = args.input[:-4] writer = pd.ExcelWriter(output + ".xlsx", engine="xlsxwriter") diff --git a/tools/submission/submission_checker/checks/performance_check.py b/tools/submission/submission_checker/checks/performance_check.py index 6a14c254d4..c66f2e87e6 100644 --- a/tools/submission/submission_checker/checks/performance_check.py +++ b/tools/submission/submission_checker/checks/performance_check.py @@ -85,13 +85,15 @@ def missing_check(self): self.log.error("Performance log missing at %s", self.path) return False return True - + def scenarios_check(self): if self.submission_logs.loader_data.get("check_scenarios", False): return True else: - missing_scenarios = self.submission_logs.loader_data.get("missing_scenarios", []) - unknown_scenarios = self.submission_logs.loader_data.get("unknown_scenarios", []) + missing_scenarios = self.submission_logs.loader_data.get( + "missing_scenarios", []) + unknown_scenarios = self.submission_logs.loader_data.get( + "unknown_scenarios", []) if len(missing_scenarios) > 0: self.log.error( "%s does not have all required scenarios, missing %s", @@ -116,7 +118,8 @@ def loadgen_errors_check(self): bool: True if no blocking Loadgen errors are present, False otherwise. """ - compliance_skip = self.submission_logs.loader_data.get("compliance_skip", False) + compliance_skip = self.submission_logs.loader_data.get( + "compliance_skip", False) if self.mlperf_log.has_error(): has_critical_errors = False if self.config.ignore_uncommited: @@ -127,7 +130,7 @@ def loadgen_errors_check(self): ): has_critical_errors = True if ( - not compliance_skip + not compliance_skip and "Multiple conf files are used" in error["value"] ): has_critical_errors = True @@ -454,7 +457,7 @@ def inferred_check(self): ("singlestream", "offline") ] if (self.scenario.lower(), self.scenario_fixed.lower() - ) not in list_inferred: + ) not in list_inferred: self.log.error( "Result for scenario %s can not be inferred from %s for: %s", self.scenario_fixed, @@ -543,12 +546,12 @@ def get_inferred_result(self, res): res = qps_wo_loadgen_overhead if (self.scenario_fixed in ["Offline"] - ) and self.scenario in ["MultiStream"]: + ) and self.scenario in ["MultiStream"]: inferred = True res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS) if (self.scenario_fixed in ["MultiStream"] - ) and self.scenario in ["SingleStream"]: + ) and self.scenario in ["SingleStream"]: inferred = True # samples_per_query does not match with the one reported in the logs # when inferring MultiStream from SingleStream @@ -565,6 +568,6 @@ def get_inferred_result(self, res): else: res = (latency_99_percentile * samples_per_query) / MS_TO_NS if (self.scenario_fixed in ["Interactive"] - ) and self.scenario not in ["Server"]: + ) and self.scenario not in ["Server"]: is_valid = False return res, is_valid diff --git a/tools/submission/submission_checker/utils.py b/tools/submission/submission_checker/utils.py index 7d1daf0e72..7ff4fd020a 100644 --- a/tools/submission/submission_checker/utils.py +++ b/tools/submission/submission_checker/utils.py @@ -107,17 +107,19 @@ def is_number(s): return True except ValueError: return False - + + def lower_list(l): return [str(e).lower() for e in l] + def contains_list(l1, l2): # Check if l1 contains all elements of l2 missing = [] for e in l2: if e not in l1: missing.append(e) - return missing, len(missing) == 0 + return missing, len(missing) == 0 def get_performance_metric( @@ -317,7 +319,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res): samples_per_query = 8 if (scenario_fixed in ["MultiStream"] - ) and scenario in ["SingleStream"]: + ) and scenario in ["SingleStream"]: power_metric = ( avg_power * power_duration * samples_per_query * 1000 / num_queries ) diff --git a/vision/medical_imaging/3d-unet-kits19/run.py b/vision/medical_imaging/3d-unet-kits19/run.py index 9d48f99283..1c7fa5ae57 100644 --- a/vision/medical_imaging/3d-unet-kits19/run.py +++ b/vision/medical_imaging/3d-unet-kits19/run.py @@ -133,8 +133,8 @@ def main(): "MultiStream": lg.TestScenario.MultiStream, } - args = get_args() + args = get_args() # instantiate SUT as per requested backend; QSL is also instantiated if args.backend == "pytorch": from pytorch_SUT import get_sut @@ -181,8 +181,13 @@ def main(): # if needed check accuracy if args.accuracy and not os.environ.get("SKIP_VERIFY_ACCURACY", False): print("Checking accuracy...") - cmd = f"python3 accuracy_kits.py --preprocessed_data_dir={args.preprocessed_data_dir} --log_file={os.path.join(log_path, 'mlperf_log_accuracy.json')}" - subprocess.check_call(cmd, shell=True) + cmd = [ + "python3", + "accuracy_kits.py", + "--preprocessed_data_dir={}".format(args.preprocessed_data_dir), + "--log_file={}".format(os.path.join(log_path, "mlperf_log_accuracy.json")), + ] + subprocess.check_call(cmd, shell) # all done print("Done!")