diff --git a/src/winml/modelkit/optracing/qnn/csv_parser.py b/src/winml/modelkit/optracing/qnn/csv_parser.py index 9c6dcc090..aa467e84f 100644 --- a/src/winml/modelkit/optracing/qnn/csv_parser.py +++ b/src/winml/modelkit/optracing/qnn/csv_parser.py @@ -13,9 +13,11 @@ execute time in cycles/US). NODE SUB-EVENT rows carry per-operator cycle counts. UNKNOWN SUB-EVENT rows (compile stages) are ignored. -Multiple inference samples are separated by ROOT -"Accelerator (execute) time (cycles)" boundaries. +Multiple inference samples are delimited by the ROOT +"Number of HVX threads used" marker (the first ROOT metric of each +inference); every sample carries its own ROOT metadata. """ + from __future__ import annotations import csv @@ -35,26 +37,26 @@ _TOKEN_SUFFIX = re.compile(r"_token_\d+(?:_\d+)?") -def parse_qnn_profiling_csv(csv_path: str | Path) -> dict[str, Any]: - """Parse a QNN basic-mode profiling CSV into a structured dict. +def parse_qnn_profiling_csv(csv_path: str | Path) -> list[dict[str, Any]]: + """Parse a QNN basic-mode profiling CSV into a list of per-sample records. + + Returns one entry per inference sample:: - Returns: - ------- - dict with keys: - metadata : dict -- hvx_threads, accel_execute_cycles, num_samples - operators : list[dict] -- aggregated ops sorted by cycles desc - samples : list[list[dict]] -- per-sample operator lists + [ + { + "metadata": {hvx_threads, accel_execute_cycles, accel_execute_us}, + "samples": [{name, op_id, cycles}, ...], + }, + ... + ] + + Each sample carries its *own* ROOT metadata so per-operator durations can + be derived against the accelerator cycle counts of the same inference + (the cycle->US factor varies slightly between samples). Operator + aggregation across samples is left to the caller. """ rows = _read_csv(csv_path) - metadata = _extract_metadata(rows) - samples = _extract_samples(rows) - metadata["num_samples"] = len(samples) - operators = _aggregate_operators(samples) - return { - "metadata": metadata, - "operators": operators, - "samples": samples, - } + return _extract_samples(rows) # --------------------------------------------------------------------------- @@ -70,103 +72,75 @@ def _read_csv(csv_path: str | Path) -> list[dict[str, str]]: return list(reader) -def _extract_metadata(rows: list[dict[str, str]]) -> dict[str, Any]: - """Extract ROOT-level metadata from the CSV rows. +def _extract_samples(rows: list[dict[str, str]]) -> list[dict[str, Any]]: + """Split the CSV rows into per-sample records. - Captures the *first* occurrence of each metric so the result - reflects the initial inference sample. + A sample begins at the ROOT ``Number of HVX threads used`` marker — the + first ROOT metric QNN emits for each inference — and runs until the next + such marker (or end-of-file). This groups every ROOT metric (HVX threads, + accelerator execute cycles/US) with the NODE rows of the same inference, + so each sample carries its *own* metadata rather than sharing a single + first-occurrence snapshot. + + Returns a list of ``{"metadata": {...}, "samples": [op, ...]}`` dicts; + samples that produced no operator rows are dropped. """ - hvx_threads: int | None = None - accel_execute_cycles: int | None = None - accel_execute_us: int | None = None + samples: list[dict[str, Any]] = [] + current: dict[str, Any] | None = None for row in rows: event_level = row.get("Event Level", "").strip() event_id = row.get("Event Identifier", "").strip() + message = row.get("Message", "").strip() time_val = row.get("Time", "").strip() unit = row.get("Unit of Measurement", "").strip() - if event_level != "ROOT": + # Sample boundary: a new HVX-threads marker starts a fresh sample. + if event_level == "ROOT" and event_id == "Number of HVX threads used" and unit == "COUNT": + if current is not None and current["samples"]: + samples.append(current) + current = { + "metadata": { + "hvx_threads": int(time_val), + "accel_execute_cycles": 0, + "accel_execute_us": 0, + }, + "samples": [], + } continue - if ( - event_id == "Number of HVX threads used" - and unit == "COUNT" - and hvx_threads is None - ): - hvx_threads = int(time_val) - - if ( - event_id == "Accelerator (execute) time (cycles)" - and unit == "CYCLES" - and accel_execute_cycles is None - ): - accel_execute_cycles = int(time_val) - - if ( - event_id == "Accelerator (execute) time" - and unit == "US" - and accel_execute_us is None - ): - accel_execute_us = int(time_val) - - return { - "hvx_threads": hvx_threads or 0, - "accel_execute_cycles": accel_execute_cycles or 0, - "accel_execute_us": accel_execute_us or 0, - } - - -def _extract_samples(rows: list[dict[str, str]]) -> list[list[dict[str, Any]]]: - """Parse NODE SUB-EVENT rows into per-sample operator lists. - - Each sample begins at a ROOT row with - ``Accelerator (execute) time (cycles)`` and ends before the - next such row (or end-of-file). - """ - samples: list[list[dict[str, Any]]] = [] - current_sample: list[dict[str, Any]] | None = None + if current is None: + # Rows before the first HVX marker are compile/finalize noise. + continue - for row in rows: - event_level = row.get("Event Level", "").strip() - event_id = row.get("Event Identifier", "").strip() - message = row.get("Message", "").strip() - time_val = row.get("Time", "").strip() - unit = row.get("Unit of Measurement", "").strip() + meta = current["metadata"] - # Detect sample boundary. if ( event_level == "ROOT" and event_id == "Accelerator (execute) time (cycles)" and unit == "CYCLES" ): - # Close any previous sample before starting a new one. - if current_sample is not None: - samples.append(current_sample) - current_sample = [] + meta["accel_execute_cycles"] = int(time_val) continue - # Only collect NODE SUB-EVENT rows with CYCLES unit. - if ( - current_sample is not None - and message == "NODE" - and event_level == "SUB-EVENT" - and unit == "CYCLES" - ): + if event_level == "ROOT" and event_id == "Accelerator (execute) time" and unit == "US": + meta["accel_execute_us"] = int(time_val) + continue + + # Collect NODE SUB-EVENT rows with CYCLES unit. + if message == "NODE" and event_level == "SUB-EVENT" and unit == "CYCLES": parsed = _parse_node_event(event_id, time_val) if parsed is not None: - current_sample.append(parsed) + current["samples"].append(parsed) # Flush the last sample. - if current_sample is not None and len(current_sample) > 0: - samples.append(current_sample) + if current is not None and current["samples"]: + samples.append(current) return samples -def _parse_node_event( - event_id: str, time_val: str -) -> dict[str, Any] | None: +def _parse_node_event(event_id: str, time_val: str) -> dict[str, Any] | None: """Parse a single NODE SUB-EVENT identifier into name/op_id/cycles.""" m = _OP_PATTERN.match(event_id) if m is None: @@ -180,48 +154,3 @@ def _parse_node_event( name = _TOKEN_SUFFIX.sub("", raw_name) return {"name": name, "op_id": op_id, "cycles": cycles} - - -def _aggregate_operators( - samples: list[list[dict[str, Any]]], -) -> list[dict[str, Any]]: - """Average operator cycles across samples and sort by cycles desc. - - Operators are keyed by ``op_id`` so identically-named ops in - different positions are kept separate. - """ - if not samples: - return [] - - # Accumulate totals keyed by op_id. - totals: dict[int, dict[str, Any]] = {} - counts: dict[int, int] = {} - - for sample in samples: - for op in sample: - oid = op["op_id"] - if oid not in totals: - totals[oid] = { - "name": op["name"], - "op_id": oid, - "cycles": 0, - } - counts[oid] = 0 - totals[oid]["cycles"] += op["cycles"] - counts[oid] += 1 - - # Average. - aggregated: list[dict[str, Any]] = [] - for oid, entry in totals.items(): - avg_cycles = entry["cycles"] / counts[oid] - aggregated.append( - { - "name": entry["name"], - "op_id": entry["op_id"], - "cycles": avg_cycles, - } - ) - - # Sort descending by cycles. - aggregated.sort(key=lambda op: op["cycles"], reverse=True) - return aggregated diff --git a/src/winml/modelkit/optracing/qnn/profiler.py b/src/winml/modelkit/optracing/qnn/profiler.py index 2021c64ff..bf176261f 100644 --- a/src/winml/modelkit/optracing/qnn/profiler.py +++ b/src/winml/modelkit/optracing/qnn/profiler.py @@ -58,6 +58,64 @@ def _resolve_shape(shape: list, default_dim: int = 1) -> list[int]: return [default_dim if not isinstance(d, int) or d <= 0 else d for d in shape] +def _csv_operator_metrics(samples: list[dict[str, Any]]) -> list[OperatorMetrics]: + """Aggregate per-sample CSV operator records into ``OperatorMetrics``. + + Each operator's duration and percentage are computed against the metadata + of the *same* sample — the accelerator cycle total and cycle->US factor + differ slightly between inferences — then averaged across every sample the + operator appears in. Operators are keyed by ``op_id`` so identically-named + ops in different positions stay separate. The result is sorted by duration + descending. + """ + acc: dict[int, dict[str, Any]] = {} + + for sample in samples: + meta = sample["metadata"] + total_cycles = meta.get("accel_execute_cycles", 0) + accel_us = meta.get("accel_execute_us", 0) + cycle_to_us = accel_us / total_cycles if total_cycles > 0 else 0.0 + + for op in sample["samples"]: + oid = op["op_id"] + entry = acc.setdefault( + oid, + {"name": op["name"], "op_id": oid, "duration_us": 0.0, "percent": 0.0, "count": 0}, + ) + entry["duration_us"] += op["cycles"] * cycle_to_us + entry["percent"] += op["cycles"] / total_cycles * 100 if total_cycles > 0 else 0.0 + entry["count"] += 1 + + metrics = [ + OperatorMetrics( + name=entry["name"], + op_path=entry["name"], + op_id=entry["op_id"], + duration_us=entry["duration_us"] / entry["count"], + percent_of_total=entry["percent"] / entry["count"], + ) + for entry in acc.values() + ] + # duration is the headline metric; percent breaks ties when US timing is + # absent (so durations collapse to 0 but cycle shares still differ). + metrics.sort(key=lambda m: (m.duration_us, m.percent_of_total), reverse=True) + return metrics + + +def _csv_summary(samples: list[dict[str, Any]]) -> dict[str, Any]: + """Headline metadata across samples (HVX threads constant; cycles/US averaged).""" + if not samples: + return {"hvx_threads": 0, "accel_execute_cycles": 0, "accel_execute_us": 0} + + n = len(samples) + metas = [s["metadata"] for s in samples] + return { + "hvx_threads": metas[0]["hvx_threads"], + "accel_execute_cycles": round(sum(m["accel_execute_cycles"] for m in metas) / n), + "accel_execute_us": round(sum(m["accel_execute_us"] for m in metas) / n), + } + + @contextlib.contextmanager def _working_directory(path: Path) -> Iterator[None]: """Temporarily change CWD and restore on exit. @@ -153,7 +211,7 @@ def run(self, iterations: int = 5, warmup: int = 2) -> OpTraceResult: del session # ---- Post-processing ---- - return self._collect_results(csv_path, iterations) + return self._collect_results(csv_path, iterations, warmup) # ------------------------------------------------------------------ # ORT configuration builders @@ -201,7 +259,7 @@ def _generate_inputs(session: Any) -> dict[str, np.ndarray]: # Result collection # ------------------------------------------------------------------ - def _collect_results(self, csv_path: Path, iterations: int) -> OpTraceResult: + def _collect_results(self, csv_path: Path, iterations: int, warmup: int) -> OpTraceResult: """Parse profiling artifacts into an ``OpTraceResult``.""" artifacts: dict[str, str] = {} qnn_log = Path(str(csv_path) + "_qnn.log") @@ -224,7 +282,7 @@ def _collect_results(self, csv_path: Path, iterations: int) -> OpTraceResult: # --- Fallback / basic mode: parse CSV --- if csv_path.is_file(): - return self._from_csv(csv_path, iterations, artifacts) + return self._from_csv(csv_path, iterations, warmup, artifacts) # No artifacts at all -- return empty result. logger.warning("No profiling artifacts found in %s", self.output_dir) @@ -303,27 +361,25 @@ def _from_csv( self, csv_path: Path, iterations: int, + warmup: int, artifacts: dict[str, str], ) -> OpTraceResult: - """Build an ``OpTraceResult`` from the basic CSV parser.""" - parsed = parse_qnn_profiling_csv(csv_path) - meta = parsed["metadata"] + """Build an ``OpTraceResult`` from the basic CSV parser. - # Convert cycles to microseconds using the cycle-to-us factor. - total_cycles = meta.get("accel_execute_cycles", 0) - accel_us = meta.get("accel_execute_us", 0) - cycle_to_us = accel_us / total_cycles if total_cycles > 0 else 0.0 + The CSV records every execute call, warmup runs included. Warmup + carries graph-finalization / JIT overhead, so the first ``warmup`` + samples are dropped; the remaining samples — which must number + ``iterations`` — feed the operator metrics. + """ + samples = parse_qnn_profiling_csv(csv_path) - operators = [ - OperatorMetrics( - name=op["name"], - op_path=op["name"], - op_id=op["op_id"], - duration_us=op["cycles"] * cycle_to_us, - percent_of_total=(op["cycles"] / total_cycles * 100 if total_cycles > 0 else 0), - ) - for op in parsed["operators"] - ] + measured = samples[warmup:] + assert len(measured) == iterations, ( + f"Expected {iterations} measured sample(s) after skipping {warmup} " + f"warmup, got {len(measured)} from {len(samples)} total." + ) + + operators = _csv_operator_metrics(measured) return OpTraceResult( model=self.onnx_path.name, @@ -332,11 +388,7 @@ def _from_csv( ep="QNNExecutionProvider", tracing_backend="qnn", operators=operators, - num_samples=meta.get("num_samples", 0), - summary={ - "hvx_threads": meta.get("hvx_threads", 0), - "accel_execute_cycles": meta.get("accel_execute_cycles", 0), - "accel_execute_us": accel_us, - }, + num_samples=len(measured), + summary=_csv_summary(measured), artifacts=artifacts, ) diff --git a/src/winml/modelkit/optracing/result.py b/src/winml/modelkit/optracing/result.py index ae49e5e3b..51617caaa 100644 --- a/src/winml/modelkit/optracing/result.py +++ b/src/winml/modelkit/optracing/result.py @@ -48,8 +48,13 @@ class OperatorMetrics: dims: list[int] | None = None def to_dict(self) -> dict[str, Any]: - """Serialize to dict, preserving None for unavailable fields.""" - return asdict(self) + """Serialize to dict, omitting fields left unset (``None``). + + Basic-mode traces only populate identity + timing, so dropping ``None`` + keeps the output free of the many detail-only fields (DMA/VTCM/roofline) + that would otherwise serialize as ``null``. + """ + return {k: v for k, v in asdict(self).items() if v is not None} @dataclass diff --git a/tests/unit/optracing/fixtures/basic_pipeline_expected.json b/tests/unit/optracing/fixtures/basic_pipeline_expected.json new file mode 100644 index 000000000..bd6400517 --- /dev/null +++ b/tests/unit/optracing/fixtures/basic_pipeline_expected.json @@ -0,0 +1,573 @@ +{ + "metadata": { + "model": "resnet-50", + "device": "npu", + "ep": "", + "tracing_level": "basic", + "tracing_backend": "", + "timestamp": "", + "num_samples": 5 + }, + "summary": { + "hvx_threads": 4, + "accel_execute_cycles": 3455548, + "accel_execute_us": 1041 + }, + "operators": [ + { + "name": "/resnet/embedder/embedder/convolution/Conv", + "op_path": "/resnet/embedder/embedder/convolution/Conv", + "op_id": 24, + "duration_us": 197.87246478452283, + "percent_of_total": 19.10347271565341 + }, + { + "name": "Transpose", + "op_path": "Transpose", + "op_id": 18, + "duration_us": 73.19466283403723, + "percent_of_total": 6.998941887957573 + }, + { + "name": "/resnet/encoder/stages.1/layers.0/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.0/layer/layer.1/convolution/Conv", + "op_id": 128, + "duration_us": 46.10237119126056, + "percent_of_total": 4.457262479914476 + }, + { + "name": "/resnet/encoder/stages.3/layers.0/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.0/layer/layer.1/convolution/Conv", + "op_id": 394, + "duration_us": 31.058012793824286, + "percent_of_total": 2.907405153173719 + }, + { + "name": "/resnet/encoder/stages.2/layers.0/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.0/layer/layer.1/convolution/Conv", + "op_id": 236, + "duration_us": 30.65123868173662, + "percent_of_total": 2.962225510694881 + }, + { + "name": "/resnet/encoder/stages.0/layers.2/Add_3", + "op_path": "/resnet/encoder/stages.0/layers.2/Add_3", + "op_id": 114, + "duration_us": 30.440866176834344, + "percent_of_total": 2.9312860705196546 + }, + { + "name": "/resnet/encoder/stages.0/layers.0/Add_3", + "op_path": "/resnet/encoder/stages.0/layers.0/Add_3", + "op_id": 64, + "duration_us": 29.548920834054616, + "percent_of_total": 2.8544456148553943 + }, + { + "name": "/resnet/encoder/stages.0/layers.1/Add_3", + "op_path": "/resnet/encoder/stages.0/layers.1/Add_3", + "op_id": 89, + "duration_us": 29.265413670871375, + "percent_of_total": 2.8261883414243387 + }, + { + "name": "/resnet/embedder/pooler/MaxPool", + "op_path": "/resnet/embedder/pooler/MaxPool", + "op_id": 30, + "duration_us": 29.195149180575743, + "percent_of_total": 2.8184790172221037 + }, + { + "name": "/resnet/pooler/GlobalAveragePool", + "op_path": "/resnet/pooler/GlobalAveragePool", + "op_id": 467, + "duration_us": 27.237714522681312, + "percent_of_total": 2.540853249884492 + }, + { + "name": "pixel_values_QuantizeLinear_3", + "op_path": "pixel_values_QuantizeLinear_3", + "op_id": 16, + "duration_us": 21.747111533394264, + "percent_of_total": 2.100304420509825 + }, + { + "name": "/resnet/encoder/stages.3/layers.2/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.2/layer/layer.1/convolution/Conv", + "op_id": 452, + "duration_us": 21.60046791274286, + "percent_of_total": 2.0017083069469765 + }, + { + "name": "/resnet/encoder/stages.1/layers.0/shortcut/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.0/shortcut/convolution/Conv", + "op_id": 144, + "duration_us": 21.306912762518333, + "percent_of_total": 2.0485773679897754 + }, + { + "name": "/resnet/encoder/stages.1/layers.1/Add_3", + "op_path": "/resnet/encoder/stages.1/layers.1/Add_3", + "op_id": 172, + "duration_us": 20.838009200618526, + "percent_of_total": 2.0141942615027686 + }, + { + "name": "/resnet/encoder/stages.1/layers.2/Add_3", + "op_path": "/resnet/encoder/stages.1/layers.2/Add_3", + "op_id": 197, + "duration_us": 20.39773395992692, + "percent_of_total": 1.9723010557000613 + }, + { + "name": "/resnet/encoder/stages.1/layers.3/Add_3", + "op_path": "/resnet/encoder/stages.1/layers.3/Add_3", + "op_id": 222, + "duration_us": 20.1535252980997, + "percent_of_total": 1.9550192571157254 + }, + { + "name": "/resnet/encoder/stages.1/layers.0/Add_3", + "op_path": "/resnet/encoder/stages.1/layers.0/Add_3", + "op_id": 147, + "duration_us": 19.42867614529926, + "percent_of_total": 1.8827994326557693 + }, + { + "name": "/resnet/encoder/stages.2/layers.0/shortcut/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.0/shortcut/convolution/Conv", + "op_id": 252, + "duration_us": 15.217373226922973, + "percent_of_total": 1.4284918322904068 + }, + { + "name": "/classifier/classifier.1/Gemm_3", + "op_path": "/classifier/classifier.1/Gemm_3", + "op_id": 475, + "duration_us": 14.521111436495826, + "percent_of_total": 1.3878067385948083 + }, + { + "name": "/resnet/encoder/stages.3/layers.1/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.1/layer/layer.1/convolution/Conv", + "op_id": 427, + "duration_us": 14.268587468933124, + "percent_of_total": 1.2963922809298931 + }, + { + "name": "/resnet/encoder/stages.2/layers.4/Add_3", + "op_path": "/resnet/encoder/stages.2/layers.4/Add_3", + "op_id": 355, + "duration_us": 12.251838989906956, + "percent_of_total": 1.1847048394748012 + }, + { + "name": "/resnet/encoder/stages.2/layers.5/Add_3", + "op_path": "/resnet/encoder/stages.2/layers.5/Add_3", + "op_id": 380, + "duration_us": 12.2215084974497, + "percent_of_total": 1.1804032258309962 + }, + { + "name": "/resnet/encoder/stages.2/layers.3/Add_3", + "op_path": "/resnet/encoder/stages.2/layers.3/Add_3", + "op_id": 330, + "duration_us": 12.2214420871878, + "percent_of_total": 1.1811211134069852 + }, + { + "name": "/resnet/encoder/stages.2/layers.2/Add_3", + "op_path": "/resnet/encoder/stages.2/layers.2/Add_3", + "op_id": 305, + "duration_us": 12.194539733509043, + "percent_of_total": 1.1790319850803495 + }, + { + "name": "/resnet/encoder/stages.2/layers.1/Add_3", + "op_path": "/resnet/encoder/stages.2/layers.1/Add_3", + "op_id": 280, + "duration_us": 12.019878394576065, + "percent_of_total": 1.1618127565817753 + }, + { + "name": "/resnet/encoder/stages.3/layers.0/shortcut/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.0/shortcut/convolution/Conv", + "op_id": 410, + "duration_us": 11.766383999516531, + "percent_of_total": 1.12692739468823 + }, + { + "name": "/resnet/encoder/stages.2/layers.0/Add_3", + "op_path": "/resnet/encoder/stages.2/layers.0/Add_3", + "op_id": 255, + "duration_us": 11.569498330700938, + "percent_of_total": 1.1186390687705312 + }, + { + "name": "/resnet/encoder/stages.3/layers.2/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.2/layer/layer.0/convolution/Conv", + "op_id": 444, + "duration_us": 9.242882354929687, + "percent_of_total": 0.8782059665388087 + }, + { + "name": "/resnet/encoder/stages.2/layers.1/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.1/layer/layer.1/convolution/Conv", + "op_id": 269, + "duration_us": 7.142223111561625, + "percent_of_total": 0.6878954513595683 + }, + { + "name": "/resnet/encoder/stages.3/layers.1/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.1/layer/layer.2/convolution/Conv", + "op_id": 435, + "duration_us": 6.962569955509719, + "percent_of_total": 0.6552987326945149 + }, + { + "name": "/resnet/encoder/stages.3/layers.0/Add_3", + "op_path": "/resnet/encoder/stages.3/layers.0/Add_3", + "op_id": 413, + "duration_us": 6.651855872823707, + "percent_of_total": 0.6429320284774851 + }, + { + "name": "/resnet/encoder/stages.2/layers.2/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.2/layer/layer.1/convolution/Conv", + "op_id": 294, + "duration_us": 6.590697370189571, + "percent_of_total": 0.6370870891192918 + }, + { + "name": "/resnet/encoder/stages.2/layers.4/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.4/layer/layer.1/convolution/Conv", + "op_id": 344, + "duration_us": 6.542803607819138, + "percent_of_total": 0.632515563359822 + }, + { + "name": "/resnet/encoder/stages.2/layers.5/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.5/layer/layer.1/convolution/Conv", + "op_id": 369, + "duration_us": 6.505610726308722, + "percent_of_total": 0.6288555110688104 + }, + { + "name": "/resnet/encoder/stages.2/layers.3/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.3/layer/layer.1/convolution/Conv", + "op_id": 319, + "duration_us": 6.486731779100149, + "percent_of_total": 0.6270870612074704 + }, + { + "name": "/resnet/encoder/stages.3/layers.2/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.2/layer/layer.2/convolution/Conv", + "op_id": 460, + "duration_us": 6.483149277138303, + "percent_of_total": 0.6105042897623703 + }, + { + "name": "/resnet/encoder/stages.1/layers.1/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.1/layer/layer.1/convolution/Conv", + "op_id": 161, + "duration_us": 6.346914063375384, + "percent_of_total": 0.6140607457272236 + }, + { + "name": "/resnet/encoder/stages.0/layers.0/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.0/layer/layer.1/convolution/Conv", + "op_id": 45, + "duration_us": 6.226902312915317, + "percent_of_total": 0.6010213114181033 + }, + { + "name": "/resnet/encoder/stages.3/layers.0/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.0/layer/layer.0/convolution/Conv", + "op_id": 386, + "duration_us": 6.1930474729659055, + "percent_of_total": 0.5990901271443959 + }, + { + "name": "/resnet/encoder/stages.1/layers.0/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.0/layer/layer.0/convolution/Conv", + "op_id": 120, + "duration_us": 6.1312188431864385, + "percent_of_total": 0.5937526788339612 + }, + { + "name": "/resnet/encoder/stages.1/layers.2/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.2/layer/layer.1/convolution/Conv", + "op_id": 186, + "duration_us": 6.006812193410633, + "percent_of_total": 0.5815091396206968 + }, + { + "name": "/resnet/encoder/stages.1/layers.3/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.3/layer/layer.1/convolution/Conv", + "op_id": 211, + "duration_us": 5.967575839729966, + "percent_of_total": 0.5776749851481549 + }, + { + "name": "/resnet/encoder/stages.0/layers.2/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.2/layer/layer.1/convolution/Conv", + "op_id": 103, + "duration_us": 5.927781443224344, + "percent_of_total": 0.5745074317200402 + }, + { + "name": "/resnet/encoder/stages.0/layers.0/shortcut/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.0/shortcut/convolution/Conv", + "op_id": 61, + "duration_us": 5.876382017722966, + "percent_of_total": 0.5687812421315531 + }, + { + "name": "/resnet/encoder/stages.2/layers.0/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.0/layer/layer.0/convolution/Conv", + "op_id": 228, + "duration_us": 5.803414477285924, + "percent_of_total": 0.5617570218005301 + }, + { + "name": "/resnet/encoder/stages.3/layers.2/Add_3", + "op_path": "/resnet/encoder/stages.3/layers.2/Add_3", + "op_id": 463, + "duration_us": 5.729995552020262, + "percent_of_total": 0.5554556898558076 + }, + { + "name": "/resnet/encoder/stages.0/layers.2/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.2/layer/layer.2/convolution/Conv", + "op_id": 111, + "duration_us": 5.704461260955877, + "percent_of_total": 0.5519906636996887 + }, + { + "name": "/resnet/encoder/stages.3/layers.1/Add_3", + "op_path": "/resnet/encoder/stages.3/layers.1/Add_3", + "op_id": 438, + "duration_us": 5.691924936505158, + "percent_of_total": 0.5512875174448564 + }, + { + "name": "/resnet/encoder/stages.0/layers.1/layer/layer.1/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.1/layer/layer.1/convolution/Conv", + "op_id": 78, + "duration_us": 5.62348984814917, + "percent_of_total": 0.5449112060933031 + }, + { + "name": "/resnet/encoder/stages.0/layers.0/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.0/layer/layer.2/convolution/Conv", + "op_id": 53, + "duration_us": 5.583787059257368, + "percent_of_total": 0.539831537759931 + }, + { + "name": "/resnet/encoder/stages.0/layers.1/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.1/layer/layer.2/convolution/Conv", + "op_id": 86, + "duration_us": 5.368215746463642, + "percent_of_total": 0.5200303073528931 + }, + { + "name": "/resnet/encoder/stages.1/layers.1/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.1/layer/layer.2/convolution/Conv", + "op_id": 169, + "duration_us": 4.977880755961327, + "percent_of_total": 0.4739194486793705 + }, + { + "name": "/resnet/encoder/stages.1/layers.0/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.0/layer/layer.2/convolution/Conv", + "op_id": 136, + "duration_us": 4.883670539448156, + "percent_of_total": 0.47407620644818627 + }, + { + "name": "/resnet/encoder/stages.1/layers.3/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.3/layer/layer.2/convolution/Conv", + "op_id": 219, + "duration_us": 4.831734274785914, + "percent_of_total": 0.4608914425754058 + }, + { + "name": "/resnet/encoder/stages.1/layers.2/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.2/layer/layer.2/convolution/Conv", + "op_id": 194, + "duration_us": 4.747427664725864, + "percent_of_total": 0.46014565747879477 + }, + { + "name": "/resnet/encoder/stages.2/layers.0/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.0/layer/layer.2/convolution/Conv", + "op_id": 244, + "duration_us": 4.431440609836907, + "percent_of_total": 0.4284328340691938 + }, + { + "name": "/resnet/encoder/stages.2/layers.2/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.2/layer/layer.2/convolution/Conv", + "op_id": 302, + "duration_us": 4.364718237736733, + "percent_of_total": 0.42200531631855986 + }, + { + "name": "/resnet/encoder/stages.2/layers.1/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.1/layer/layer.2/convolution/Conv", + "op_id": 277, + "duration_us": 4.291274689479434, + "percent_of_total": 0.4148917592389642 + }, + { + "name": "/resnet/encoder/stages.2/layers.5/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.5/layer/layer.2/convolution/Conv", + "op_id": 377, + "duration_us": 4.181956362182358, + "percent_of_total": 0.4043225440465427 + }, + { + "name": "/resnet/encoder/stages.2/layers.4/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.4/layer/layer.2/convolution/Conv", + "op_id": 352, + "duration_us": 4.157854527975575, + "percent_of_total": 0.4019647471226128 + }, + { + "name": "/resnet/encoder/stages.2/layers.3/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.3/layer/layer.2/convolution/Conv", + "op_id": 327, + "duration_us": 4.153939852281513, + "percent_of_total": 0.40158167759977903 + }, + { + "name": "/resnet/encoder/stages.3/layers.0/layer/layer.2/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.0/layer/layer.2/convolution/Conv", + "op_id": 402, + "duration_us": 4.152870682788911, + "percent_of_total": 0.4006234492442706 + }, + { + "name": "/resnet/encoder/stages.3/layers.1/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.3/layers.1/layer/layer.0/convolution/Conv", + "op_id": 419, + "duration_us": 3.736920689204999, + "percent_of_total": 0.3434677632174699 + }, + { + "name": "Input", + "op_path": "Input", + "op_id": 2, + "duration_us": 3.4886417789136837, + "percent_of_total": 0.3293112131922725 + }, + { + "name": "/resnet/encoder/stages.0/layers.2/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.2/layer/layer.0/convolution/Conv", + "op_id": 95, + "duration_us": 3.2843521858090776, + "percent_of_total": 0.31843331489020626 + }, + { + "name": "/resnet/encoder/stages.1/layers.3/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.3/layer/layer.0/convolution/Conv", + "op_id": 203, + "duration_us": 3.2315715644945398, + "percent_of_total": 0.31366716798217453 + }, + { + "name": "/resnet/encoder/stages.0/layers.1/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.1/layer/layer.0/convolution/Conv", + "op_id": 70, + "duration_us": 3.137483097980128, + "percent_of_total": 0.3039727569375319 + }, + { + "name": "/resnet/encoder/stages.1/layers.2/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.2/layer/layer.0/convolution/Conv", + "op_id": 178, + "duration_us": 3.1293837114400818, + "percent_of_total": 0.30282627231889686 + }, + { + "name": "/resnet/encoder/stages.2/layers.3/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.3/layer/layer.0/convolution/Conv", + "op_id": 311, + "duration_us": 3.0421869180545174, + "percent_of_total": 0.29411871445520227 + }, + { + "name": "/resnet/encoder/stages.1/layers.1/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.1/layers.1/layer/layer.0/convolution/Conv", + "op_id": 153, + "duration_us": 3.0420068446330135, + "percent_of_total": 0.29494639979963033 + }, + { + "name": "/resnet/encoder/stages.2/layers.2/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.2/layer/layer.0/convolution/Conv", + "op_id": 286, + "duration_us": 3.0300923760956104, + "percent_of_total": 0.29291212656153426 + }, + { + "name": "/resnet/encoder/stages.2/layers.4/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.4/layer/layer.0/convolution/Conv", + "op_id": 336, + "duration_us": 3.010174556517658, + "percent_of_total": 0.2910935666887473 + }, + { + "name": "/resnet/encoder/stages.0/layers.0/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.0/layers.0/layer/layer.0/convolution/Conv", + "op_id": 37, + "duration_us": 2.961577678517372, + "percent_of_total": 0.28621977120273234 + }, + { + "name": "/resnet/encoder/stages.2/layers.5/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.5/layer/layer.0/convolution/Conv", + "op_id": 361, + "duration_us": 2.9558194669219966, + "percent_of_total": 0.28589910197100743 + }, + { + "name": "/resnet/encoder/stages.2/layers.1/layer/layer.0/convolution/Conv", + "op_path": "/resnet/encoder/stages.2/layers.1/layer/layer.0/convolution/Conv", + "op_id": 261, + "duration_us": 2.929113427064801, + "percent_of_total": 0.28317914475743733 + }, + { + "name": "Output", + "op_path": "Output", + "op_id": 3, + "duration_us": 1.342811228824536, + "percent_of_total": 0.12247875293134429 + }, + { + "name": "/classifier/classifier.0/Flatten_3", + "op_path": "/classifier/classifier.0/Flatten_3", + "op_id": 472, + "duration_us": 0.42123550757852735, + "percent_of_total": 0.03778017156313185 + }, + { + "name": "Transpose", + "op_path": "Transpose", + "op_id": 471, + "duration_us": 0.0, + "percent_of_total": 0.0 + }, + { + "name": "logits_DequantizeLinear_3", + "op_path": "logits_DequantizeLinear_3", + "op_id": 477, + "duration_us": 0.0, + "percent_of_total": 0.0 + } + ], + "statistics": {}, + "artifacts": {} +} diff --git a/tests/unit/optracing/test_csv_parser.py b/tests/unit/optracing/test_csv_parser.py index 14f6aa7cc..04c6b6a86 100644 --- a/tests/unit/optracing/test_csv_parser.py +++ b/tests/unit/optracing/test_csv_parser.py @@ -12,25 +12,25 @@ FIXTURE_DIR = Path(__file__).parent / "fixtures" -def test_parse_csv_returns_dict(): +def test_parse_csv_returns_sample_list(): result = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") - assert isinstance(result, dict) - assert "metadata" in result - assert "operators" in result - assert "samples" in result + assert isinstance(result, list) + assert len(result) >= 1 + assert all({"metadata", "samples"} <= entry.keys() for entry in result) -def test_parse_csv_metadata(): +def test_parse_csv_sample_metadata(): result = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") - meta = result["metadata"] - assert meta["hvx_threads"] == 4 - assert meta["accel_execute_cycles"] > 0 - assert meta["num_samples"] >= 1 + for sample in result: + meta = sample["metadata"] + assert meta["hvx_threads"] == 4 + assert meta["accel_execute_cycles"] > 0 + assert meta["accel_execute_us"] > 0 -def test_parse_csv_operators(): +def test_parse_csv_sample_operators(): result = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") - ops = result["operators"] + ops = result[0]["samples"] assert len(ops) > 0 first = ops[0] assert "name" in first @@ -39,14 +39,23 @@ def test_parse_csv_operators(): assert first["cycles"] > 0 -def test_parse_csv_operators_sorted_by_cycles(): +def test_parse_csv_multi_sample(): result = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") - ops = result["operators"] - cycles = [op["cycles"] for op in ops] - assert cycles == sorted(cycles, reverse=True) + # The fixture captures several inference samples. + assert len(result) > 1 -def test_parse_csv_multi_sample(): +def test_parse_csv_per_sample_cycles_differ(): + """Per-sample accel cycles are captured independently, not a shared snapshot.""" + result = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") + per_sample_cycles = [s["metadata"]["accel_execute_cycles"] for s in result] + # The fixture has distinct accelerator cycle counts across its samples. + assert len(set(per_sample_cycles)) > 1 + + +def test_parse_csv_each_sample_has_operators(): + """No sample is retained without operator rows.""" result = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") - assert result["metadata"]["num_samples"] >= 1 - assert len(result["samples"]) >= 1 + for sample in result: + assert len(sample["samples"]) > 0 + assert all({"name", "op_id", "cycles"} <= op.keys() for op in sample["samples"]) diff --git a/tests/unit/optracing/test_integration.py b/tests/unit/optracing/test_integration.py index 896ffaa26..b6c4545d3 100644 --- a/tests/unit/optracing/test_integration.py +++ b/tests/unit/optracing/test_integration.py @@ -11,6 +11,10 @@ from winml.modelkit.optracing.qnn.csv_parser import ( parse_qnn_profiling_csv, # Testing internal implementation ) +from winml.modelkit.optracing.qnn.profiler import ( + _csv_operator_metrics, # Testing internal implementation + _csv_summary, +) from winml.modelkit.optracing.qnn.qhas_parser import parse_qhas # Testing internal implementation @@ -18,39 +22,25 @@ def test_basic_pipeline_csv_to_json(tmp_path): - """Full basic mode: CSV -> OpTraceResult -> JSON file.""" - csv_data = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") - - total_cycles = sum(op["cycles"] for op in csv_data["operators"]) - - operators = [ - OperatorMetrics( - name=op["name"], - op_path=op["name"], # CSV doesn't distinguish type vs path - op_id=op["op_id"], - duration_us=op["cycles"], # keep raw cycles as duration placeholder - percent_of_total=((op["cycles"] / total_cycles * 100) if total_cycles else 0), - ) - for op in csv_data["operators"] - ] + """Full basic mode: CSV -> OpTraceResult -> JSON matches the golden fixture.""" + samples = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") result = OpTraceResult( model="resnet-50", device="npu", tracing_level="basic", - operators=operators, - num_samples=csv_data["metadata"]["num_samples"], - summary=csv_data["metadata"], + operators=_csv_operator_metrics(samples), + num_samples=len(samples), + summary=_csv_summary(samples), + timestamp="", # pinned: the only otherwise non-deterministic field ) out = tmp_path / "basic_op_trace.json" write_op_trace_json(result, out) - assert out.exists() - data = json.loads(out.read_text()) - assert data["metadata"]["tracing_level"] == "basic" - assert len(data["operators"]) > 0 - assert data["operators"][0]["duration_us"] > 0 + produced = json.loads(out.read_text()) + expected = json.loads((FIXTURE_DIR / "basic_pipeline_expected.json").read_text()) + assert produced == expected def test_detail_pipeline_qhas_to_json(tmp_path): @@ -194,7 +184,7 @@ def test_round_trip_json(): op1 = parsed["operators"][1] assert op1["name"] == "ReLU" - assert op1["dram_read_bytes"] is None # not set => None preserved + assert "dram_read_bytes" not in op1 # unset fields are omitted, not null # Summary round-trip assert parsed["summary"]["time_us"] == 270.5 @@ -202,9 +192,10 @@ def test_round_trip_json(): def test_csv_parser_operator_count(): """CSV parser finds the expected number of operators.""" - data = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") + samples = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") + operators = _csv_operator_metrics(samples) # ResNet-50 produces ~79 aggregated QNN ops from the fixture - assert len(data["operators"]) > 50 + assert len(operators) > 50 def test_qhas_parser_operator_count(): @@ -216,9 +207,9 @@ def test_qhas_parser_operator_count(): def test_cross_parser_top_operator_is_conv(): """Both parsers should show Conv as the top operator for ResNet.""" - # CSV: operators are sorted by cycles descending - csv_data = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") - top_csv = csv_data["operators"][0]["name"].lower() + # CSV: aggregated operators are sorted by duration descending + samples = parse_qnn_profiling_csv(FIXTURE_DIR / "optrace_resnet50.csv") + top_csv = _csv_operator_metrics(samples)[0].name.lower() # QHAS: operators are not pre-sorted; find the one with max duration qhas_raw = json.loads((FIXTURE_DIR / "qhas_resnet50.json").read_text()) diff --git a/tests/unit/optracing/test_qnn_profiler.py b/tests/unit/optracing/test_qnn_profiler.py index bf371bcfe..d12772582 100644 --- a/tests/unit/optracing/test_qnn_profiler.py +++ b/tests/unit/optracing/test_qnn_profiler.py @@ -10,6 +10,7 @@ from unittest.mock import MagicMock, patch import numpy as np +import pytest from winml.modelkit.optracing.qnn.profiler import ( QNNProfiler, @@ -183,10 +184,12 @@ def write_csv_on_del(): po = profiler._build_provider_options(output_dir / "profiling_output.csv") assert po["profiling_level"] == "detailed" - # Now test the CSV parsing path directly. + # Now test the CSV parsing path directly. The fixture holds a single + # sample, so treat it as one measured iteration with no warmup. result = profiler._from_csv( output_dir / "profiling_output.csv", - iterations=5, + iterations=1, + warmup=0, artifacts={"csv": str(output_dir / "profiling_output.csv")}, ) assert result.model == "model.onnx" @@ -197,10 +200,59 @@ def write_csv_on_del(): assert result.summary["hvx_threads"] == 4 +_CSV_HEADER = ( + "Msg Timestamp,Message,Time,Unit of Measurement,Timing Source,Event Level,Event Identifier\n" +) + + +def _make_csv_sample(cycles: int, us: int, conv_cycles: int, add_cycles: int) -> str: + """Build one inference sample block for a basic-mode profiling CSV.""" + return ( + '0,ROOT,4,COUNT,HW,ROOT,"Number of HVX threads used"\n' + f'1,ROOT,{cycles},CYCLES,HW,ROOT,"Accelerator (execute) time (cycles)"\n' + f'2,NODE,{conv_cycles},CYCLES,HW,SUB-EVENT,"Conv2d:OpId_1 (cycles)"\n' + f'3,NODE,{add_cycles},CYCLES,HW,SUB-EVENT,"Add:OpId_2 (cycles)"\n' + f'4,ROOT,{us},US,HW,ROOT,"Accelerator (execute) time"\n' + ) + + +def test_qnn_profiler_from_csv_skips_warmup(tmp_path): + """The first ``warmup`` samples are dropped before computing metrics.""" + # One warmup sample with outlier timing, then two measured samples. + csv_content = ( + _CSV_HEADER + + _make_csv_sample(cycles=900000, us=9000, conv_cycles=5000, add_cycles=3000) + + _make_csv_sample(cycles=100000, us=1000, conv_cycles=500, add_cycles=300) + + _make_csv_sample(cycles=100000, us=1000, conv_cycles=500, add_cycles=300) + ) + csv_path = tmp_path / "profiling_output.csv" + csv_path.write_text(csv_content, encoding="utf-8") + + profiler = QNNProfiler(tmp_path / "model.onnx", output_dir=tmp_path, level="basic") + result = profiler._from_csv(csv_path, iterations=2, warmup=1, artifacts={}) + + # Only the two measured samples survive; the warmup outlier is excluded. + assert result.num_samples == 2 + assert result.summary["accel_execute_cycles"] == 100000 + + +def test_qnn_profiler_from_csv_sample_count_mismatch(tmp_path): + """A measured-sample count that doesn't match ``iterations`` is an error.""" + csv_content = _CSV_HEADER + _make_csv_sample( + cycles=100000, us=1000, conv_cycles=500, add_cycles=300 + ) + csv_path = tmp_path / "profiling_output.csv" + csv_path.write_text(csv_content, encoding="utf-8") + + profiler = QNNProfiler(tmp_path / "model.onnx", output_dir=tmp_path, level="basic") + with pytest.raises(AssertionError): + profiler._from_csv(csv_path, iterations=5, warmup=0, artifacts={}) + + def test_qnn_profiler_empty_artifacts(tmp_path): """Profiler returns empty result when no artifacts exist.""" profiler = QNNProfiler(Path("model.onnx"), output_dir=tmp_path, level="basic") - result = profiler._collect_results(tmp_path / "nonexistent.csv", iterations=5) + result = profiler._collect_results(tmp_path / "nonexistent.csv", iterations=5, warmup=2) assert result.model == "model.onnx" assert len(result.operators) == 0 assert result.num_samples == 0 diff --git a/tests/unit/optracing/test_result.py b/tests/unit/optracing/test_result.py index 15e4cf0b6..9f8e446b2 100644 --- a/tests/unit/optracing/test_result.py +++ b/tests/unit/optracing/test_result.py @@ -15,7 +15,8 @@ def test_operator_metrics_to_dict(): assert d["name"] == "Conv2d" assert d["op_path"] == "/layer1/conv/Conv" assert d["duration_us"] == 45.2 - assert d["dram_read_bytes"] is None + # Unset detail-only fields are omitted rather than serialized as null. + assert "dram_read_bytes" not in d def test_operator_metrics_with_detail_fields():