diff --git a/.github/workflows/auto-update-dev.yml b/.github/workflows/auto-update-dev.yml index f75f9874ba..1eaf1a987e 100644 --- a/.github/workflows/auto-update-dev.yml +++ b/.github/workflows/auto-update-dev.yml @@ -3,7 +3,7 @@ name: Auto-Update Dev Branches from Master on: push: branches: - - master # Trigger workflow on commits to 'master' branch. + - master # Trigger workflow on commits to 'master' branch workflow_dispatch: {} jobs: diff --git a/text_to_video/wan-2.2-t2v-a14b/data/samples.txt b/text_to_video/wan-2.2-t2v-a14b/data/samples.txt new file mode 100644 index 0000000000..bee5b9a7b3 --- /dev/null +++ b/text_to_video/wan-2.2-t2v-a14b/data/samples.txt @@ -0,0 +1,10 @@ +130, A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pixel art +106, A panda drinking coffee in a cafe in Paris, watercolor painting +84, The bund Shanghai, black and white +59, an elephant spraying itself with water using its trunk to cool down +12, a car turning a corner +31, a truck anchored in a tranquil bay +86, The bund Shanghai, in cyberpunk style +122, Gwen Stacy reading a book, in cyberpunk style +233, skyscraper +96, a shark is swimming in the ocean, animated style diff --git a/text_to_video/wan-2.2-t2v-a14b/data/samples_filename_ids.txt b/text_to_video/wan-2.2-t2v-a14b/data/samples_filename_ids.txt new file mode 100644 index 0000000000..8d9f8117a6 --- /dev/null +++ b/text_to_video/wan-2.2-t2v-a14b/data/samples_filename_ids.txt @@ -0,0 +1,10 @@ +A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pixel art-0.mp4, 130.mp4 +A panda drinking coffee in a cafe in Paris, watercolor painting-0.mp4, 106.mp4 +The bund Shanghai, black and white-0.mp4, 84.mp4 +an elephant spraying itself with water using its trunk to cool down-0.mp4, 59.mp4 +a car turning a corner-0.mp4, 12.mp4 +a truck anchored in a tranquil bay-0.mp4, 31.mp4 +The bund Shanghai, in cyberpunk style-0.mp4, 86.mp4 +Gwen Stacy reading a book, in cyberpunk style-0.mp4, 122.mp4 +skyscraper-0.mp4, 223.mp4 +a shark is swimming in the ocean, animated style-0.mp4, 96.mp4 diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py index f0cf069924..4d949bb6fb 100644 --- a/tools/submission/generate_final_report.py +++ b/tools/submission/generate_final_report.py @@ -101,11 +101,11 @@ def main(): "singlestream": "SingleStream", "multistream": "MultiStream", "server": "Server", - "interactive":"Interactive", + "interactive": "Interactive", "offline": "Offline", } - df["Scenario"] = df["Scenario"].apply(lambda x: scenario_map.get(str(x).lower(), x)) - + df["Scenario"] = df["Scenario"].apply( + lambda x: scenario_map.get(str(x).lower(), x)) output = args.input[:-4] writer = pd.ExcelWriter(output + ".xlsx", engine="xlsxwriter") diff --git a/tools/submission/submission_checker/checks/accuracy_check.py b/tools/submission/submission_checker/checks/accuracy_check.py index a300aa1ebb..db1b1a7559 100644 --- a/tools/submission/submission_checker/checks/accuracy_check.py +++ b/tools/submission/submission_checker/checks/accuracy_check.py @@ -2,6 +2,7 @@ from ..constants import * from ..loader import SubmissionLogs from ..configuration.configuration import Config +from ..utils import check_extra_files import re import os @@ -25,6 +26,9 @@ class AccuracyCheck(BaseCheck): - `loadgen_errors_check`: Fails if Loadgen reported non-ignored errors. - `dataset_check`: Verifies the reported sample count matches the configured dataset size unless the check is skipped. + - `extra_files_check`: For benchmarks in REQUIRED_ACC_BENCHMARK (e.g. + stable-diffusion-xl, wan-2.2-t2v-a14b), verifies required extra + artifacts (e.g. images/, videos/) exist in the accuracy directory. Attributes: submission_logs (SubmissionLogs): Holder for submission log paths @@ -78,6 +82,7 @@ def setup_checks(self): self.checks.append(self.accuracy_json_check) self.checks.append(self.loadgen_errors_check) self.checks.append(self.dataset_check) + self.checks.append(self.extra_files_check) def accuracy_result_check(self): """Validate reported accuracy metrics in `accuracy.txt`. @@ -234,3 +239,34 @@ def dataset_check(self): ) return False return True + + def extra_files_check(self): + """Verify required extra accuracy files for certain benchmarks. + + For models in REQUIRED_ACC_BENCHMARK (e.g. stable-diffusion-xl + images, wan-2.2-t2v-a14b videos), ensures the accuracy directory + contains the required subdirs and files. Skipped if + skip_extra_accuracy_files_check is set. + + Returns: + bool: True if the check is skipped, the model has no extra + requirements, or all required files exist; False otherwise. + """ + if self.config.skip_extra_accuracy_files_check: + return True + if self.model not in REQUIRED_ACC_BENCHMARK: + return True + if self.config.version not in REQUIRED_ACC_BENCHMARK[self.model]: + return True + acc_dir = os.path.dirname(self.path) + target_files = REQUIRED_ACC_BENCHMARK[self.model][self.config.version] + extra_files_pass, missing_files = check_extra_files( + acc_dir, target_files) + if not extra_files_pass: + self.log.error( + "%s expected to have the following extra files (%s)", + acc_dir, + missing_files, + ) + return False + return True diff --git a/tools/submission/submission_checker/checks/performance_check.py b/tools/submission/submission_checker/checks/performance_check.py index 6a14c254d4..c66f2e87e6 100644 --- a/tools/submission/submission_checker/checks/performance_check.py +++ b/tools/submission/submission_checker/checks/performance_check.py @@ -85,13 +85,15 @@ def missing_check(self): self.log.error("Performance log missing at %s", self.path) return False return True - + def scenarios_check(self): if self.submission_logs.loader_data.get("check_scenarios", False): return True else: - missing_scenarios = self.submission_logs.loader_data.get("missing_scenarios", []) - unknown_scenarios = self.submission_logs.loader_data.get("unknown_scenarios", []) + missing_scenarios = self.submission_logs.loader_data.get( + "missing_scenarios", []) + unknown_scenarios = self.submission_logs.loader_data.get( + "unknown_scenarios", []) if len(missing_scenarios) > 0: self.log.error( "%s does not have all required scenarios, missing %s", @@ -116,7 +118,8 @@ def loadgen_errors_check(self): bool: True if no blocking Loadgen errors are present, False otherwise. """ - compliance_skip = self.submission_logs.loader_data.get("compliance_skip", False) + compliance_skip = self.submission_logs.loader_data.get( + "compliance_skip", False) if self.mlperf_log.has_error(): has_critical_errors = False if self.config.ignore_uncommited: @@ -127,7 +130,7 @@ def loadgen_errors_check(self): ): has_critical_errors = True if ( - not compliance_skip + not compliance_skip and "Multiple conf files are used" in error["value"] ): has_critical_errors = True @@ -454,7 +457,7 @@ def inferred_check(self): ("singlestream", "offline") ] if (self.scenario.lower(), self.scenario_fixed.lower() - ) not in list_inferred: + ) not in list_inferred: self.log.error( "Result for scenario %s can not be inferred from %s for: %s", self.scenario_fixed, @@ -543,12 +546,12 @@ def get_inferred_result(self, res): res = qps_wo_loadgen_overhead if (self.scenario_fixed in ["Offline"] - ) and self.scenario in ["MultiStream"]: + ) and self.scenario in ["MultiStream"]: inferred = True res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS) if (self.scenario_fixed in ["MultiStream"] - ) and self.scenario in ["SingleStream"]: + ) and self.scenario in ["SingleStream"]: inferred = True # samples_per_query does not match with the one reported in the logs # when inferring MultiStream from SingleStream @@ -565,6 +568,6 @@ def get_inferred_result(self, res): else: res = (latency_99_percentile * samples_per_query) / MS_TO_NS if (self.scenario_fixed in ["Interactive"] - ) and self.scenario not in ["Server"]: + ) and self.scenario not in ["Server"]: is_valid = False return res, is_valid diff --git a/tools/submission/submission_checker/constants.py b/tools/submission/submission_checker/constants.py index 2d43d03a59..67412ed0ca 100644 --- a/tools/submission/submission_checker/constants.py +++ b/tools/submission/submission_checker/constants.py @@ -1123,6 +1123,22 @@ "2289", ] }, + }, + "wan-2.2-t2v-a14b": { + "v6.0": { + "videos": [ + "130", + "106", + "84", + "59", + "12", + "31", + "86", + "122", + "233", + "96", + ] + }, } } REQUIRED_MEASURE_FILES = ["user.conf", "README.md"] @@ -1695,6 +1711,7 @@ "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST08/verify_accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST08/verify_accuracy.txt", } + TEST07_ACC_PATH = { "v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt", "default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt", diff --git a/tools/submission/submission_checker/utils.py b/tools/submission/submission_checker/utils.py index 7d1daf0e72..6435b9e165 100644 --- a/tools/submission/submission_checker/utils.py +++ b/tools/submission/submission_checker/utils.py @@ -57,8 +57,13 @@ def check_extra_files(path, target_files): for target_file in target_files[dir]: if target_file not in files: check_pass = False - missing_files.append( - f"{os.path.join(path, dir, target_file)}.png") + if "images" in dir: + missing_files.append( + f"{os.path.join(path, dir, target_file)}.png") + if "videos" in dir: + missing_files.append( + f"{os.path.join(path, dir, target_file)}.mp4") + if "captions" not in files: missing_files.append( f"{os.path.join(path, dir, 'captions.txt')}") @@ -107,17 +112,19 @@ def is_number(s): return True except ValueError: return False - + + def lower_list(l): return [str(e).lower() for e in l] + def contains_list(l1, l2): # Check if l1 contains all elements of l2 missing = [] for e in l2: if e not in l1: missing.append(e) - return missing, len(missing) == 0 + return missing, len(missing) == 0 def get_performance_metric( @@ -317,7 +324,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res): samples_per_query = 8 if (scenario_fixed in ["MultiStream"] - ) and scenario in ["SingleStream"]: + ) and scenario in ["SingleStream"]: power_metric = ( avg_power * power_duration * samples_per_query * 1000 / num_queries )