Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/auto-update-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Auto-Update Dev Branches from Master
on:
push:
branches:
- master # Trigger workflow on commits to 'master' branch.
- master # Trigger workflow on commits to 'master' branch
workflow_dispatch: {}

jobs:
Expand Down
10 changes: 10 additions & 0 deletions text_to_video/wan-2.2-t2v-a14b/data/samples.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
130, A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pixel art
106, A panda drinking coffee in a cafe in Paris, watercolor painting
84, The bund Shanghai, black and white
59, an elephant spraying itself with water using its trunk to cool down
12, a car turning a corner
31, a truck anchored in a tranquil bay
86, The bund Shanghai, in cyberpunk style
122, Gwen Stacy reading a book, in cyberpunk style
233, skyscraper
96, a shark is swimming in the ocean, animated style
10 changes: 10 additions & 0 deletions text_to_video/wan-2.2-t2v-a14b/data/samples_filename_ids.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
A boat sailing leisurely along the Seine River with the Eiffel Tower in background, pixel art-0.mp4, 130.mp4
A panda drinking coffee in a cafe in Paris, watercolor painting-0.mp4, 106.mp4
The bund Shanghai, black and white-0.mp4, 84.mp4
an elephant spraying itself with water using its trunk to cool down-0.mp4, 59.mp4
a car turning a corner-0.mp4, 12.mp4
a truck anchored in a tranquil bay-0.mp4, 31.mp4
The bund Shanghai, in cyberpunk style-0.mp4, 86.mp4
Gwen Stacy reading a book, in cyberpunk style-0.mp4, 122.mp4
skyscraper-0.mp4, 223.mp4
a shark is swimming in the ocean, animated style-0.mp4, 96.mp4
6 changes: 3 additions & 3 deletions tools/submission/generate_final_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ def main():
"singlestream": "SingleStream",
"multistream": "MultiStream",
"server": "Server",
"interactive":"Interactive",
"interactive": "Interactive",
"offline": "Offline",
}
df["Scenario"] = df["Scenario"].apply(lambda x: scenario_map.get(str(x).lower(), x))

df["Scenario"] = df["Scenario"].apply(
lambda x: scenario_map.get(str(x).lower(), x))

output = args.input[:-4]
writer = pd.ExcelWriter(output + ".xlsx", engine="xlsxwriter")
Expand Down
36 changes: 36 additions & 0 deletions tools/submission/submission_checker/checks/accuracy_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from ..constants import *
from ..loader import SubmissionLogs
from ..configuration.configuration import Config
from ..utils import check_extra_files
import re
import os

Expand All @@ -25,6 +26,9 @@ class AccuracyCheck(BaseCheck):
- `loadgen_errors_check`: Fails if Loadgen reported non-ignored errors.
- `dataset_check`: Verifies the reported sample count matches the
configured dataset size unless the check is skipped.
- `extra_files_check`: For benchmarks in REQUIRED_ACC_BENCHMARK (e.g.
stable-diffusion-xl, wan-2.2-t2v-a14b), verifies required extra
artifacts (e.g. images/, videos/) exist in the accuracy directory.

Attributes:
submission_logs (SubmissionLogs): Holder for submission log paths
Expand Down Expand Up @@ -78,6 +82,7 @@ def setup_checks(self):
self.checks.append(self.accuracy_json_check)
self.checks.append(self.loadgen_errors_check)
self.checks.append(self.dataset_check)
self.checks.append(self.extra_files_check)

def accuracy_result_check(self):
"""Validate reported accuracy metrics in `accuracy.txt`.
Expand Down Expand Up @@ -234,3 +239,34 @@ def dataset_check(self):
)
return False
return True

def extra_files_check(self):
"""Verify required extra accuracy files for certain benchmarks.

For models in REQUIRED_ACC_BENCHMARK (e.g. stable-diffusion-xl
images, wan-2.2-t2v-a14b videos), ensures the accuracy directory
contains the required subdirs and files. Skipped if
skip_extra_accuracy_files_check is set.

Returns:
bool: True if the check is skipped, the model has no extra
requirements, or all required files exist; False otherwise.
"""
if self.config.skip_extra_accuracy_files_check:
return True
if self.model not in REQUIRED_ACC_BENCHMARK:
return True
if self.config.version not in REQUIRED_ACC_BENCHMARK[self.model]:
return True
acc_dir = os.path.dirname(self.path)
target_files = REQUIRED_ACC_BENCHMARK[self.model][self.config.version]
extra_files_pass, missing_files = check_extra_files(
acc_dir, target_files)
if not extra_files_pass:
self.log.error(
"%s expected to have the following extra files (%s)",
acc_dir,
missing_files,
)
return False
return True
21 changes: 12 additions & 9 deletions tools/submission/submission_checker/checks/performance_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,15 @@ def missing_check(self):
self.log.error("Performance log missing at %s", self.path)
return False
return True

def scenarios_check(self):
if self.submission_logs.loader_data.get("check_scenarios", False):
return True
else:
missing_scenarios = self.submission_logs.loader_data.get("missing_scenarios", [])
unknown_scenarios = self.submission_logs.loader_data.get("unknown_scenarios", [])
missing_scenarios = self.submission_logs.loader_data.get(
"missing_scenarios", [])
unknown_scenarios = self.submission_logs.loader_data.get(
"unknown_scenarios", [])
if len(missing_scenarios) > 0:
self.log.error(
"%s does not have all required scenarios, missing %s",
Expand All @@ -116,7 +118,8 @@ def loadgen_errors_check(self):
bool: True if no blocking Loadgen errors are present,
False otherwise.
"""
compliance_skip = self.submission_logs.loader_data.get("compliance_skip", False)
compliance_skip = self.submission_logs.loader_data.get(
"compliance_skip", False)
if self.mlperf_log.has_error():
has_critical_errors = False
if self.config.ignore_uncommited:
Expand All @@ -127,7 +130,7 @@ def loadgen_errors_check(self):
):
has_critical_errors = True
if (
not compliance_skip
not compliance_skip
and "Multiple conf files are used" in error["value"]
):
has_critical_errors = True
Expand Down Expand Up @@ -454,7 +457,7 @@ def inferred_check(self):
("singlestream", "offline")
]
if (self.scenario.lower(), self.scenario_fixed.lower()
) not in list_inferred:
) not in list_inferred:
self.log.error(
"Result for scenario %s can not be inferred from %s for: %s",
self.scenario_fixed,
Expand Down Expand Up @@ -543,12 +546,12 @@ def get_inferred_result(self, res):
res = qps_wo_loadgen_overhead

if (self.scenario_fixed in ["Offline"]
) and self.scenario in ["MultiStream"]:
) and self.scenario in ["MultiStream"]:
inferred = True
res = samples_per_query * S_TO_MS / (latency_mean / MS_TO_NS)

if (self.scenario_fixed in ["MultiStream"]
) and self.scenario in ["SingleStream"]:
) and self.scenario in ["SingleStream"]:
inferred = True
# samples_per_query does not match with the one reported in the logs
# when inferring MultiStream from SingleStream
Expand All @@ -565,6 +568,6 @@ def get_inferred_result(self, res):
else:
res = (latency_99_percentile * samples_per_query) / MS_TO_NS
if (self.scenario_fixed in ["Interactive"]
) and self.scenario not in ["Server"]:
) and self.scenario not in ["Server"]:
is_valid = False
return res, is_valid
17 changes: 17 additions & 0 deletions tools/submission/submission_checker/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,22 @@
"2289",
]
},
},
"wan-2.2-t2v-a14b": {
"v6.0": {
"videos": [
"130",
"106",
"84",
"59",
"12",
"31",
"86",
"122",
"233",
"96",
]
},
}
}
REQUIRED_MEASURE_FILES = ["user.conf", "README.md"]
Expand Down Expand Up @@ -1695,6 +1711,7 @@
"v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST08/verify_accuracy.txt",
"default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST08/verify_accuracy.txt",
}

TEST07_ACC_PATH = {
"v6.0": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt",
"default": "{division}/{submitter}/results/{system}/{benchmark}/{scenario}/TEST07/verify_accuracy.txt",
Expand Down
17 changes: 12 additions & 5 deletions tools/submission/submission_checker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,13 @@ def check_extra_files(path, target_files):
for target_file in target_files[dir]:
if target_file not in files:
check_pass = False
missing_files.append(
f"{os.path.join(path, dir, target_file)}.png")
if "images" in dir:
missing_files.append(
f"{os.path.join(path, dir, target_file)}.png")
if "videos" in dir:
missing_files.append(
f"{os.path.join(path, dir, target_file)}.mp4")

if "captions" not in files:
missing_files.append(
f"{os.path.join(path, dir, 'captions.txt')}")
Expand Down Expand Up @@ -107,17 +112,19 @@ def is_number(s):
return True
except ValueError:
return False



def lower_list(l):
return [str(e).lower() for e in l]


def contains_list(l1, l2):
# Check if l1 contains all elements of l2
missing = []
for e in l2:
if e not in l1:
missing.append(e)
return missing, len(missing) == 0
return missing, len(missing) == 0


def get_performance_metric(
Expand Down Expand Up @@ -317,7 +324,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
samples_per_query = 8

if (scenario_fixed in ["MultiStream"]
) and scenario in ["SingleStream"]:
) and scenario in ["SingleStream"]:
power_metric = (
avg_power * power_duration * samples_per_query * 1000 / num_queries
)
Expand Down