diff --git a/compliance/TEST01/run_verification.py b/compliance/TEST01/run_verification.py
index 921e46c70b..9b5d46c3c7 100644
--- a/compliance/TEST01/run_verification.py
+++ b/compliance/TEST01/run_verification.py
@@ -104,24 +104,24 @@ def main():
     unixmode_str = unixmode if unixmode == "" else unixmode + " "
 
     # run verify accuracy
-    verify_accuracy_command = (
-        sys.executable + " "
-        + verify_accuracy_binary
-        + " --dtype "
-        + args.dtype
-        + unixmode_str
-        + " -r "
-        + os.path.join(results_dir, "accuracy", "mlperf_log_accuracy.json")
-        + " -t "
-        + os.path.join(compliance_dir, "mlperf_log_accuracy.json")
-    )
+    verify_accuracy_command = [
+        sys.executable 
+        , verify_accuracy_binary
+        , "--dtype"
+        , dtype
+        , unixmode_str
+        , "--reference_accuracy"
+        , os.path.join(results_dir, "accuracy", "mlperf_log_accuracy.json")
+        , "--test_accuracy"
+        , os.path.join(compliance_dir, "mlperf_log_accuracy.json")]
+    verify_accuracy_command = [arg.strip() for arg in verify_accuracy_command if arg.strip() != ""]
+    
     try:
         with open("verify_accuracy.txt", "w") as f:
             process = subprocess.Popen(
                 verify_accuracy_command,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.STDOUT,
-                shell=True,
                 text=True
             )
             # Write output to both console and file
@@ -145,16 +145,14 @@ def main():
     verify_performance_binary = os.path.join(
         os.path.dirname(__file__), "verify_performance.py"
     )
-    verify_performance_command = (
-        sys.executable + " "
-        + verify_performance_binary
-        + " -r"
-        + os.path.join(results_dir, "performance",
-                       "run_1", "mlperf_log_detail.txt")
-        + " -t"
-        + os.path.join(compliance_dir, "mlperf_log_detail.txt")
-    )
-
+    verify_performance_command = [
+        sys.executable,
+        verify_performance_binary,
+        "-r",
+        os.path.join(results_dir, "performance", "run_1", "mlperf_log_detail.txt"),
+        "-t",
+        os.path.join(compliance_dir, "mlperf_log_detail.txt")
+    ]
     try:
         with open("verify_performance.txt", "w") as f:
             process = subprocess.Popen(
@@ -162,7 +160,6 @@ def main():
                 stdout=subprocess.PIPE,
                 stderr=subprocess.STDOUT,
                 text=True,
-                shell=True,
             )
             # Write output to both console and file
             for line in process.stdout:
diff --git a/compliance/TEST04/run_verification.py b/compliance/TEST04/run_verification.py
index 4584e59c46..335dbb9f2d 100644
--- a/compliance/TEST04/run_verification.py
+++ b/compliance/TEST04/run_verification.py
@@ -57,16 +57,14 @@ def main():
     verify_performance_binary = os.path.join(
         os.path.dirname(__file__), "verify_performance.py"
     )
-    verify_performance_command = (
-        sys.executable + " "
-        + verify_performance_binary
-        + " -r"
-        + os.path.join(results_dir, "performance",
-                       "run_1", "mlperf_log_summary.txt")
-        + " -t"
-        + os.path.join(compliance_dir, "mlperf_log_summary.txt")
-    )
-
+    verify_performance_command = [
+        sys.executable,
+        verify_performance_binary,
+        "-r",
+        os.path.join(results_dir, "performance", "run_1", "mlperf_log_summary.txt"),
+        "-t",
+        os.path.join(compliance_dir, "mlperf_log_summary.txt")
+    ]
     try:
         with open("verify_performance.txt", "w") as f:
             process = subprocess.Popen(
@@ -74,7 +72,6 @@ def main():
                 stdout=subprocess.PIPE,  # capture output
                 stderr=subprocess.STDOUT,
                 text=True,  # decode output as text
-                shell=True,
             )
             # Write output to both console and file
             for line in process.stdout:
diff --git a/language/bert/accuracy-squad.py b/language/bert/accuracy-squad.py
index 514258de38..56c26ddb53 100644
--- a/language/bert/accuracy-squad.py
+++ b/language/bert/accuracy-squad.py
@@ -514,14 +514,19 @@ def append_feature(feature):
     )
 
     print("Evaluating predictions...")
-    cmd = "python3 {:}/evaluate_v1.1.py {:} {:} {}".format(
-        os.path.dirname(os.path.abspath(__file__)),
+    cmd = [
+        "python3",
+        os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            "evaluate_v1.1.py"
+        ),
         args.val_data,
         args.out_file,
-        "--max_examples {}".format(
-            args.max_examples) if args.max_examples else "",
-    )
-    subprocess.check_call(cmd, shell=True)
+        "--max_examples"if args.max_examples else "",
+        str(args.max_examples) if args.max_examples else ""
+    ]
+    cmd = [arg for arg in cmd if arg.strip() != ""]
+    subprocess.check_call(cmd)
 
 
 if __name__ == "__main__":
diff --git a/language/bert/run.py b/language/bert/run.py
index 5a35148904..58451e6b3b 100644
--- a/language/bert/run.py
+++ b/language/bert/run.py
@@ -99,7 +99,6 @@ def get_args():
 
 def main():
     args = get_args()
-
     sut = None
 
     if not args.network or args.network == "sut":
@@ -199,15 +198,19 @@ def main():
             sut.sut, sut.qsl.qsl, settings, log_settings, args.audit_conf
         )
         if args.accuracy and not os.environ.get("SKIP_VERIFY_ACCURACY"):
-            cmd = "python3 {:}/accuracy-squad.py {}".format(
-                os.path.dirname(os.path.abspath(__file__)),
-                (
-                    "--max_examples {}".format(args.max_examples)
-                    if args.max_examples
-                    else ""
+            cmd = [
+                "python3",
+                os.path.join(
+                    os.path.dirname(os.path.abspath(__file__)), "accuracy-squad.py"
                 ),
-            )
-            subprocess.check_call(cmd, shell=True)
+                "--max_examples" if args.max_examples
+                else "",
+                args.max_examples
+                if args.max_examples
+                else "",
+            ]
+            cmd = [arg for arg in cmd if arg.strip() != ""]
+            subprocess.check_call(cmd)
 
     print("Done!")
 
diff --git a/retired_benchmarks/speech_recognition/rnnt/run.py b/retired_benchmarks/speech_recognition/rnnt/run.py
index 1a8e4cef2d..8629c8597c 100644
--- a/retired_benchmarks/speech_recognition/rnnt/run.py
+++ b/retired_benchmarks/speech_recognition/rnnt/run.py
@@ -115,9 +115,18 @@ def main():
     )
 
     if args.accuracy:
-        cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}"
+        cmd = [
+            "python3",
+            "accuracy_eval.py",
+            "--log_dir",
+            log_path,
+            "--dataset_dir",
+            args.dataset_dir,
+            "--manifest",
+            args.manifest,
+        ]
         print(f"Running accuracy script: {cmd}")
-        subprocess.check_call(cmd, shell=True)
+        subprocess.check_call(cmd)
 
     print("Done!")
 
diff --git a/retired_benchmarks/translation/gnmt/tensorflow/run_task.py b/retired_benchmarks/translation/gnmt/tensorflow/run_task.py
index b2c0be68fb..0510a1ab0f 100644
--- a/retired_benchmarks/translation/gnmt/tensorflow/run_task.py
+++ b/retired_benchmarks/translation/gnmt/tensorflow/run_task.py
@@ -88,44 +88,25 @@
 
 outpath = os.path.join(args.output_path, "output", "console_out_gnmt.txt")
 
-cmd = (
-    "python -m nmt.nmt \
-    --src=en --tgt=de \
-    --ckpt="
-    + cpk_path
-    + " \
-    --hparams_path="
-    + haparams_path
-    + " \
-    --out_dir="
-    + out_dir
-    + " \
-    --vocab_prefix="
-    + vocab_prefix
-    + " \
-    --inference_input_file="
-    + inference_input_file
-    + " \
-    --inference_output_file="
-    + inference_output_file
-    + " \
-    --inference_ref_file="
-    + inference_ref_file
-    + " \
-    --infer_batch_size="
-    + args.batch_size
-    + " \
-    --num_inter_threads="
-    + args.num_inter_threads
-    + " \
-    --num_intra_threads="
-    + args.num_intra_threads
-    + " \
-    --iterations="
-    + str(iterations)
-    + " \
-    --run="
-    + args.run
-)
 
-return_code = subprocess.call(cmd, shell=True)
+cmd = [
+    "python",
+    "-m",
+    "nmt.nmt",      
+    "--src=en",
+    "--tgt=de",
+    "--ckpt={}".format(cpk_path),
+    "--hparams_path={}".format(haparams_path),
+    "--out_dir={}".format(out_dir),
+    "--vocab_prefix={}".format(vocab_prefix),
+    "--inference_input_file={}".format(inference_input_file),
+    "--inference_output_file={}".format(inference_output_file),
+    "--inference_ref_file={}".format(inference_ref_file),
+    "--infer_batch_size={}".format(args.batch_size),
+    "--num_inter_threads={}".format(args.num_inter_threads),
+    "--num_intra_threads={}".format(args.num_intra_threads),
+    "--iterations={}".format(iterations),
+    "--run={}".format(args.run)
+]
+
+return_code = subprocess.call(cmd)
diff --git a/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py b/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py
index 147624b340..ab73c25966 100644
--- a/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py
+++ b/text_to_video/wan-2.2-t2v-a14b/run_mlperf.py
@@ -46,7 +46,8 @@ def load_prompts(dataset_path):
 
 
 class Model:
-    def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0):
+    def __init__(self, model_path, device, config,
+                 prompts, fixed_latent=None, rank=0):
         self.device = device
         self.rank = rank
         self.height = config["height"]
@@ -106,7 +107,8 @@ def flush_queries(self):
 
 
 class DebugModel:
-    def __init__(self, model_path, device, config, prompts, fixed_latent=None, rank=0):
+    def __init__(self, model_path, device, config,
+                 prompts, fixed_latent=None, rank=0):
         self.prompts = prompts
 
     def issue_queries(self, query_samples):
@@ -186,7 +188,8 @@ def get_args():
     parser.add_argument(
         "--scenario",
         default="SingleStream",
-        help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys())),
+        help="mlperf benchmark scenario, one of " +
+        str(list(SCENARIO_MAP.keys())),
     )
     parser.add_argument(
         "--user_conf",
@@ -202,7 +205,10 @@ def get_args():
         help="performance sample count",
         default=5000,
     )
-    parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass")
+    parser.add_argument(
+        "--accuracy",
+        action="store_true",
+        help="enable accuracy pass")
     # Dont overwrite these for official submission
     parser.add_argument("--count", type=int, help="dataset items to use")
     parser.add_argument("--time", type=int, help="time to scan in seconds")
@@ -271,7 +277,10 @@ def run_mlperf(args, config):
 
         audit_config = os.path.abspath(args.audit_conf)
         if os.path.exists(audit_config):
-            settings.FromConfig(audit_config, "wan-2.2-t2v-a14b", args.scenario)
+            settings.FromConfig(
+                audit_config,
+                "wan-2.2-t2v-a14b",
+                args.scenario)
         settings.scenario = SCENARIO_MAP[args.scenario]
 
         settings.mode = lg.TestMode.PerformanceOnly
@@ -297,8 +306,10 @@ def run_mlperf(args, config):
         if args.samples_per_query:
             settings.multi_stream_samples_per_query = args.samples_per_query
         if args.max_latency:
-            settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
-            settings.multi_stream_expected_latency_ns = int(args.max_latency * NANO_SEC)
+            settings.server_target_latency_ns = int(
+                args.max_latency * NANO_SEC)
+            settings.multi_stream_expected_latency_ns = int(
+                args.max_latency * NANO_SEC)
 
         performance_sample_count = (
             args.performance_sample_count
@@ -311,7 +322,8 @@ def run_mlperf(args, config):
             count, performance_sample_count, load_query_samples, unload_query_samples
         )
 
-        lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config)
+        lg.StartTestWithLogSettings(
+            sut, qsl, settings, log_settings, audit_config)
 
         lg.DestroyQSL(qsl)
         lg.DestroySUT(sut)
diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py
index f0cf069924..4d949bb6fb 100644
--- a/tools/submission/generate_final_report.py
+++ b/tools/submission/generate_final_report.py
@@ -101,11 +101,11 @@ def main():
         "singlestream": "SingleStream",
         "multistream": "MultiStream",
         "server": "Server",
-        "interactive":"Interactive",
+        "interactive": "Interactive",
         "offline": "Offline",
     }
-    df["Scenario"] = df["Scenario"].apply(lambda x: scenario_map.get(str(x).lower(), x))
-
+    df["Scenario"] = df["Scenario"].apply(
+        lambda x: scenario_map.get(str(x).lower(), x))
 
     output = args.input[:-4]
     writer = pd.ExcelWriter(output + ".xlsx", engine="xlsxwriter")
diff --git a/tools/submission/submission_checker/checks/performance_check.py b/tools/submission/submission_checker/checks/performance_check.py
index 6a14c254d4..c66f2e87e6 100644
--- a/tools/submission/submission_checker/checks/performance_check.py
+++ b/tools/submission/submission_checker/checks/performance_check.py
@@ -85,13 +85,15 @@ def missing_check(self):
             self.log.error("Performance log missing at %s", self.path)
             return False
         return True
-    
+
     def scenarios_check(self):
         if self.submission_logs.loader_data.get("check_scenarios", False):
             return True
         else:
-            missing_scenarios = self.submission_logs.loader_data.get("missing_scenarios", [])
-            unknown_scenarios = self.submission_logs.loader_data.get("unknown_scenarios", [])
+            missing_scenarios = self.submission_logs.loader_data.get(
+                "missing_scenarios", [])
+            unknown_scenarios = self.submission_logs.loader_data.get(
+                "unknown_scenarios", [])
             if len(missing_scenarios) > 0:
                 self.log.error(
                     "%s does not have all required scenarios, missing %s",
@@ -116,7 +118,8 @@ def loadgen_errors_check(self):
             bool: True if no blocking Loadgen errors are present,
                 False otherwise.
         """
-        compliance_skip = self.submission_logs.loader_data.get("compliance_skip", False)
+        compliance_skip = self.submission_logs.loader_data.get(
+            "compliance_skip", False)
         if self.mlperf_log.has_error():
             has_critical_errors = False
             if self.config.ignore_uncommited:
@@ -127,7 +130,7 @@ def loadgen_errors_check(self):
                     ):
                         has_critical_errors = True
                     if (
-                        not compliance_skip 
+                        not compliance_skip
                         and "Multiple conf files are used" in error["value"]
                     ):
                         has_critical_errors = True
@@ -454,7 +457,7 @@ def inferred_check(self):
                 ("singlestream", "offline")
             ]
             if (self.scenario.lower(), self.scenario_fixed.lower()
-                ) not in list_inferred:
+                    ) not in list_inferred:
                 self.log.error(
                     "Result for scenario %s can not be inferred from %s for: %s",
                     self.scenario_fixed,
@@ -543,12 +546,12 @@ def get_inferred_result(self, res):
             res = qps_wo_loadgen_overhead
 
         if (self.scenario_fixed in ["Offline"]
-            ) and self.scenario in ["MultiStream"]:
+                ) and self.scenario in ["MultiStream"]:
             inferred = True
             res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS)
 
         if (self.scenario_fixed in ["MultiStream"]
-            ) and self.scenario in ["SingleStream"]:
+                ) and self.scenario in ["SingleStream"]:
             inferred = True
             # samples_per_query does not match with the one reported in the logs
             # when inferring MultiStream from SingleStream
@@ -565,6 +568,6 @@ def get_inferred_result(self, res):
             else:
                 res = (latency_99_percentile * samples_per_query) / MS_TO_NS
         if (self.scenario_fixed in ["Interactive"]
-            ) and self.scenario not in ["Server"]:
+                ) and self.scenario not in ["Server"]:
             is_valid = False
         return res, is_valid
diff --git a/tools/submission/submission_checker/utils.py b/tools/submission/submission_checker/utils.py
index 7d1daf0e72..7ff4fd020a 100644
--- a/tools/submission/submission_checker/utils.py
+++ b/tools/submission/submission_checker/utils.py
@@ -107,17 +107,19 @@ def is_number(s):
         return True
     except ValueError:
         return False
-    
+
+
 def lower_list(l):
     return [str(e).lower() for e in l]
 
+
 def contains_list(l1, l2):
     # Check if l1 contains all elements of l2
     missing = []
     for e in l2:
         if e not in l1:
             missing.append(e)
-    return missing, len(missing) == 0 
+    return missing, len(missing) == 0
 
 
 def get_performance_metric(
@@ -317,7 +319,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
                 samples_per_query = 8
 
             if (scenario_fixed in ["MultiStream"]
-                ) and scenario in ["SingleStream"]:
+                    ) and scenario in ["SingleStream"]:
                 power_metric = (
                     avg_power * power_duration * samples_per_query * 1000 / num_queries
                 )
diff --git a/vision/medical_imaging/3d-unet-kits19/run.py b/vision/medical_imaging/3d-unet-kits19/run.py
index 9d48f99283..1c7fa5ae57 100644
--- a/vision/medical_imaging/3d-unet-kits19/run.py
+++ b/vision/medical_imaging/3d-unet-kits19/run.py
@@ -133,8 +133,8 @@ def main():
         "MultiStream": lg.TestScenario.MultiStream,
     }
 
-    args = get_args()
 
+    args = get_args()
     # instantiate SUT as per requested backend; QSL is also instantiated
     if args.backend == "pytorch":
         from pytorch_SUT import get_sut
@@ -181,8 +181,13 @@ def main():
     # if needed check accuracy
     if args.accuracy and not os.environ.get("SKIP_VERIFY_ACCURACY", False):
         print("Checking accuracy...")
-        cmd = f"python3 accuracy_kits.py --preprocessed_data_dir={args.preprocessed_data_dir} --log_file={os.path.join(log_path, 'mlperf_log_accuracy.json')}"
-        subprocess.check_call(cmd, shell=True)
+        cmd = [
+            "python3",
+            "accuracy_kits.py",
+            "--preprocessed_data_dir={}".format(args.preprocessed_data_dir),
+            "--log_file={}".format(os.path.join(log_path, "mlperf_log_accuracy.json")),
+        ]
+        subprocess.check_call(cmd, shell)
 
     # all done
     print("Done!")