Skip to content

Commit fa10fde

Browse files
authored
Extensive Refactoring and Bug Fixes (#4)
Refactor and Bug Fix: Enhance Codebase and Address Identified Issues - Refactor main function into modular units across different files. - Fix bugs related to model prediction and benchmarking. - Ensure accurate computation of average times in benchmarks. - Enhance logging to include device details during prediction.
1 parent d06a144 commit fa10fde

5 files changed

Lines changed: 272 additions & 40 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ OpenVINO is a toolkit from Intel that optimizes deep learning model inference fo
124124
4. Perform inference on the provided image using the OpenVINO model.
125125
5. Benchmark results, including average inference time, are logged for the OpenVINO model.
126126

127+
## Benchmarking and Visualization
128+
The results of the benchmarks for all modes are saved and visualized in a bar chart, showcasing the average inference times across different backends. The visualization aids in comparing the performance gains achieved with different optimizations.
129+
127130
#### Requirements
128131
Ensure you have installed the OpenVINO Toolkit and the necessary dependencies to use OpenVINO's model optimizer and inference engine.
129132

benchmark/benchmark_models.py

Lines changed: 229 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,244 @@
1-
import src.benchmark_class
2-
from benchmark.benchmark_utils import run_benchmark
3-
from src.benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark
4-
import openvino as ov
1+
import time
2+
from typing import Tuple
3+
4+
from abc import ABC, abstractmethod
5+
import numpy as np
56
import torch
7+
import torch.backends.cudnn as cudnn
8+
import logging
69
import onnxruntime as ort
10+
import openvino as ov
11+
12+
# Configure logging
13+
logging.basicConfig(filename="model.log", level=logging.INFO)
14+
15+
16+
class Benchmark(ABC):
17+
"""
18+
Abstract class representing a benchmark.
19+
"""
20+
21+
def __init__(self, nruns: int = 100, nwarmup: int = 50):
22+
self.nruns = nruns
23+
self.nwarmup = nwarmup
24+
25+
@abstractmethod
26+
def run(self):
27+
"""
28+
Abstract method to run the benchmark.
29+
"""
30+
pass
31+
32+
33+
class PyTorchBenchmark:
34+
def __init__(
35+
self,
36+
model: torch.nn.Module,
37+
device: str = "cuda",
38+
input_shape: Tuple[int, int, int, int] = (32, 3, 224, 224),
39+
dtype: torch.dtype = torch.float32,
40+
nwarmup: int = 50,
41+
nruns: int = 100,
42+
) -> None:
43+
"""
44+
Initialize the Benchmark object.
45+
46+
:param model: The model to be benchmarked.
47+
:param device: The device to run the benchmark on ("cpu" or "cuda").
48+
:param input_shape: The shape of the input data.
49+
:param dtype: The data type to be used in the benchmark (typically torch.float32 or torch.float16).
50+
:param nwarmup: The number of warmup runs before timing.
51+
:param nruns: The number of runs for timing.
52+
"""
53+
self.model = model
54+
self.device = device
55+
self.input_shape = input_shape
56+
self.dtype = dtype
57+
self.nwarmup = nwarmup
58+
self.nruns = nruns
59+
60+
cudnn.benchmark = True # Enable cuDNN benchmarking optimization
61+
62+
def run(self):
63+
"""
64+
Run the benchmark with the given model, input shape, and other parameters.
65+
Log the average batch time and print the input shape and output feature size.
66+
"""
67+
# Prepare input data
68+
input_data = torch.randn(self.input_shape).to(self.device).to(self.dtype)
69+
70+
# Warm up
71+
print("Warm up ...")
72+
with torch.no_grad():
73+
for _ in range(self.nwarmup):
74+
features = self.model(input_data)
75+
torch.cuda.synchronize()
76+
77+
# Start timing
78+
print("Start timing ...")
79+
timings = []
80+
with torch.no_grad():
81+
for i in range(1, self.nruns + 1):
82+
start_time = time.time()
83+
features = self.model(input_data)
84+
torch.cuda.synchronize()
85+
end_time = time.time()
86+
timings.append(end_time - start_time)
87+
88+
if i % 10 == 0:
89+
print(
90+
f"Iteration {i}/{self.nruns}, ave batch time {np.mean(timings) * 1000:.2f} ms"
91+
)
92+
93+
logging.info(f"Average batch time: {np.mean(timings) * 1000:.2f} ms")
94+
return np.mean(timings) * 1000
95+
96+
97+
class ONNXBenchmark(Benchmark):
98+
"""
99+
A class used to benchmark the performance of an ONNX model.
100+
"""
101+
102+
def __init__(
103+
self,
104+
ort_session: ort.InferenceSession,
105+
input_shape: tuple,
106+
nruns: int = 100,
107+
nwarmup: int = 50,
108+
):
109+
super().__init__(nruns)
110+
self.ort_session = ort_session
111+
self.input_shape = input_shape
112+
self.nwarmup = nwarmup
113+
self.nruns = nruns
114+
115+
def run(self):
116+
print("Warming up ...")
117+
# Adjusting the batch size in the input shape to match the expected input size of the model.
118+
input_shape = (1,) + self.input_shape[1:]
119+
input_data = np.random.randn(*input_shape).astype(np.float32)
120+
121+
for _ in range(self.nwarmup): # Warm-up runs
122+
_ = self.ort_session.run(None, {"input": input_data})
123+
124+
print("Starting benchmark ...")
125+
timings = []
126+
127+
for i in range(1, self.nruns + 1):
128+
start_time = time.time()
129+
_ = self.ort_session.run(None, {"input": input_data})
130+
end_time = time.time()
131+
timings.append(end_time - start_time)
132+
133+
if i % 10 == 0:
134+
print(
135+
f"Iteration {i}/{self.nruns}, ave batch time {np.mean(timings) * 1000:.2f} ms"
136+
)
137+
138+
avg_time = np.mean(timings) * 1000
139+
logging.info(f"Average ONNX inference time: {avg_time:.2f} ms")
140+
return avg_time
141+
142+
143+
class OVBenchmark(Benchmark):
144+
def __init__(
145+
self, model: ov.frontend.FrontEnd, input_shape: Tuple[int, int, int, int]
146+
):
147+
"""
148+
Initialize the OVBenchmark with the OpenVINO model and the input shape.
149+
150+
:param model: ov.frontend.FrontEnd
151+
The OpenVINO model.
152+
:param input_shape: Tuple[int, int, int, int]
153+
The shape of the model input.
154+
"""
155+
self.ov_model = model
156+
self.core = ov.Core()
157+
self.compiled_model = None
158+
self.input_shape = input_shape
159+
self.nwarmup = 50
160+
self.nruns = 100
161+
self.dummy_input = np.random.randn(*input_shape).astype(np.float32)
162+
163+
def warmup(self):
164+
"""
165+
Compile the OpenVINO model for optimal execution on available hardware.
166+
"""
167+
self.compiled_model = self.core.compile_model(self.ov_model, "AUTO")
168+
169+
def inference(self, input_data) -> dict:
170+
"""
171+
Perform inference on the input data using the compiled OpenVINO model.
172+
173+
:param input_data: np.ndarray
174+
The input data for the model.
175+
:return: dict
176+
The model's output as a dictionary.
177+
"""
178+
outputs = self.compiled_model(inputs={"input": input_data})
179+
return outputs
180+
181+
def run(self):
182+
"""
183+
Run the benchmark on the OpenVINO model. It first warms up by compiling the model and then measures
184+
the average inference time over a set number of runs.
185+
"""
186+
# Warm-up runs
187+
logging.info("Warming up ...")
188+
for _ in range(self.nwarmup):
189+
self.warmup()
190+
191+
# Benchmarking
192+
total_time = 0
193+
for i in range(1, self.nruns + 1):
194+
start_time = time.time()
195+
_ = self.inference(self.dummy_input)
196+
total_time += time.time() - start_time
197+
198+
if i % 10 == 0:
199+
print(
200+
f"Iteration {i}/{self.nruns}, ave batch time {total_time / i * 1000:.2f} ms"
201+
)
202+
203+
avg_time = total_time / self.nruns
204+
logging.info(f"Average inference time: {avg_time * 1000:.2f} ms")
205+
return avg_time * 1000
7206

8207

9208
def benchmark_onnx_model(ort_session: ort.InferenceSession):
10209
run_benchmark(None, None, None, ort_session, onnx=True)
11210

12211

13-
def benchmark_ov_model(ov_model: ov.CompiledModel) -> src.benchmark_class.OVBenchmark:
212+
def benchmark_ov_model(ov_model: ov.CompiledModel) -> OVBenchmark:
14213
ov_benchmark = OVBenchmark(ov_model, input_shape=(1, 3, 224, 224))
15214
ov_benchmark.run()
16215
return ov_benchmark
17216

18217

19218
def benchmark_cuda_model(cuda_model: torch.nn.Module, device: str, dtype: torch.dtype):
20219
run_benchmark(cuda_model, device, dtype)
220+
221+
222+
def run_benchmark(
223+
model: torch.nn.Module,
224+
device: str,
225+
dtype: torch.dtype,
226+
ort_session: ort.InferenceSession = None,
227+
onnx: bool = False,
228+
) -> None:
229+
"""
230+
Run and log the benchmark for the given model, device, and dtype.
231+
232+
:param onnx:
233+
:param ort_session:
234+
:param model: The model to be benchmarked.
235+
:param device: The device to run the benchmark on ("cpu" or "cuda").
236+
:param dtype: The data type to be used in the benchmark (typically torch.float32 or torch.float16).
237+
"""
238+
if onnx:
239+
logging.info(f"Running Benchmark for ONNX")
240+
benchmark = ONNXBenchmark(ort_session, input_shape=(32, 3, 224, 224))
241+
else:
242+
logging.info(f"Running Benchmark for {device.upper()} and precision {dtype}")
243+
benchmark = PyTorchBenchmark(model, device=device, dtype=dtype)
244+
benchmark.run()

benchmark/benchmark_utils.py

Lines changed: 8 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,7 @@
88
import torch
99
import onnxruntime as ort
1010

11-
from src.benchmark_class import PyTorchBenchmark, ONNXBenchmark, OVBenchmark
12-
13-
14-
def run_benchmark(
15-
model: torch.nn.Module,
16-
device: str,
17-
dtype: torch.dtype,
18-
ort_session: ort.InferenceSession = None,
19-
onnx: bool = False,
20-
) -> None:
21-
"""
22-
Run and log the benchmark for the given model, device, and dtype.
23-
24-
:param onnx:
25-
:param ort_session:
26-
:param model: The model to be benchmarked.
27-
:param device: The device to run the benchmark on ("cpu" or "cuda").
28-
:param dtype: The data type to be used in the benchmark (typically torch.float32 or torch.float16).
29-
"""
30-
if onnx:
31-
logging.info(f"Running Benchmark for ONNX")
32-
benchmark = ONNXBenchmark(ort_session, input_shape=(32, 3, 224, 224))
33-
else:
34-
logging.info(f"Running Benchmark for {device.upper()} and precision {dtype}")
35-
benchmark = PyTorchBenchmark(model, device=device, dtype=dtype)
36-
benchmark.run()
11+
from benchmark.benchmark_models import PyTorchBenchmark, ONNXBenchmark, OVBenchmark
3712

3813

3914
def run_all_benchmarks(
@@ -110,7 +85,13 @@ def plot_benchmark_results(results: Dict[str, float]):
11085

11186
# Plot
11287
plt.figure(figsize=(10, 6))
113-
ax = sns.barplot(x=data["Time"], y=data["Model"], hue=data["Model"], palette="rocket", legend=False)
88+
ax = sns.barplot(
89+
x=data["Time"],
90+
y=data["Model"],
91+
hue=data["Model"],
92+
palette="rocket",
93+
legend=False,
94+
)
11495

11596
# Adding the actual values on the bars
11697
for index, value in enumerate(data["Time"]):

main.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import logging
22
import os.path
3-
43
import torch_tensorrt
54

65
from benchmark.benchmark_models import benchmark_onnx_model, benchmark_ov_model
@@ -9,11 +8,16 @@
98
parse_arguments,
109
init_onnx_model,
1110
init_ov_model,
12-
init_cuda_model, export_onnx_model,
11+
init_cuda_model,
12+
export_onnx_model,
1313
)
1414
from src.image_processor import ImageProcessor
1515
from prediction.prediction_models import *
1616
from src.model import ModelLoader
17+
import warnings
18+
19+
# Filter out the specific warning from torchvision
20+
warnings.filterwarnings("ignore", category=UserWarning, module="torchvision.io.image")
1721

1822
# Configure logging
1923
logging.basicConfig(filename="model.log", level=logging.INFO)
@@ -38,18 +42,27 @@ def main():
3842
ort_session = init_onnx_model(args.onnx_path, model_loader, device)
3943
if args.mode != "all":
4044
benchmark_onnx_model(ort_session)
41-
predict_onnx_model(ort_session, img_batch, args.topk, model_loader.categories)
45+
predict_onnx_model(
46+
ort_session, img_batch, args.topk, model_loader.categories
47+
)
4248

4349
# OpenVINO
4450
if args.mode in ["ov", "all"]:
4551
# Check if ONNX model wasn't exported previously
4652
if not os.path.isfile(args.onnx_path):
47-
export_onnx_model(onnx_path=args.onnx_path, model_loader=model_loader, device=device)
53+
export_onnx_model(
54+
onnx_path=args.onnx_path, model_loader=model_loader, device=device
55+
)
4856

4957
ov_model = init_ov_model(args.onnx_path)
5058
if args.mode != "all":
5159
ov_benchmark = benchmark_ov_model(ov_model)
52-
predict_ov_model(ov_benchmark.compiled_model, img_batch, args.topk, model_loader.categories)
60+
predict_ov_model(
61+
ov_benchmark.compiled_model,
62+
img_batch,
63+
args.topk,
64+
model_loader.categories,
65+
)
5366

5467
# CUDA
5568
if args.mode in ["cuda", "all"]:
@@ -75,11 +88,13 @@ def main():
7588
img_batch = img_batch.to(device)
7689
else:
7790
print("Compiling TensorRT model")
91+
batch_size = 1 if args.mode == "cuda" else 32
7892
model = torch_tensorrt.compile(
7993
model,
80-
inputs=[torch_tensorrt.Input((32, 3, 224, 224), dtype=precision)],
94+
inputs=[torch_tensorrt.Input((batch_size, 3, 224, 224), dtype=precision)],
8195
enabled_precisions={precision},
8296
truncate_long_and_double=True,
97+
require_full_compilation=True,
8398
)
8499
# If it is for TensorRT, determine the mode (FP32 or FP16) and store under a TensorRT key
85100
mode = "fp32" if precision == torch.float32 else "fp16"

0 commit comments

Comments
 (0)