1- import src .benchmark_class
2- from benchmark .benchmark_utils import run_benchmark
3- from src .benchmark_class import PyTorchBenchmark , ONNXBenchmark , OVBenchmark
4- import openvino as ov
1+ import time
2+ from typing import Tuple
3+
4+ from abc import ABC , abstractmethod
5+ import numpy as np
56import torch
7+ import torch .backends .cudnn as cudnn
8+ import logging
69import onnxruntime as ort
10+ import openvino as ov
11+
12+ # Configure logging
13+ logging .basicConfig (filename = "model.log" , level = logging .INFO )
14+
15+
16+ class Benchmark (ABC ):
17+ """
18+ Abstract class representing a benchmark.
19+ """
20+
21+ def __init__ (self , nruns : int = 100 , nwarmup : int = 50 ):
22+ self .nruns = nruns
23+ self .nwarmup = nwarmup
24+
25+ @abstractmethod
26+ def run (self ):
27+ """
28+ Abstract method to run the benchmark.
29+ """
30+ pass
31+
32+
33+ class PyTorchBenchmark :
34+ def __init__ (
35+ self ,
36+ model : torch .nn .Module ,
37+ device : str = "cuda" ,
38+ input_shape : Tuple [int , int , int , int ] = (32 , 3 , 224 , 224 ),
39+ dtype : torch .dtype = torch .float32 ,
40+ nwarmup : int = 50 ,
41+ nruns : int = 100 ,
42+ ) -> None :
43+ """
44+ Initialize the Benchmark object.
45+
46+ :param model: The model to be benchmarked.
47+ :param device: The device to run the benchmark on ("cpu" or "cuda").
48+ :param input_shape: The shape of the input data.
49+ :param dtype: The data type to be used in the benchmark (typically torch.float32 or torch.float16).
50+ :param nwarmup: The number of warmup runs before timing.
51+ :param nruns: The number of runs for timing.
52+ """
53+ self .model = model
54+ self .device = device
55+ self .input_shape = input_shape
56+ self .dtype = dtype
57+ self .nwarmup = nwarmup
58+ self .nruns = nruns
59+
60+ cudnn .benchmark = True # Enable cuDNN benchmarking optimization
61+
62+ def run (self ):
63+ """
64+ Run the benchmark with the given model, input shape, and other parameters.
65+ Log the average batch time and print the input shape and output feature size.
66+ """
67+ # Prepare input data
68+ input_data = torch .randn (self .input_shape ).to (self .device ).to (self .dtype )
69+
70+ # Warm up
71+ print ("Warm up ..." )
72+ with torch .no_grad ():
73+ for _ in range (self .nwarmup ):
74+ features = self .model (input_data )
75+ torch .cuda .synchronize ()
76+
77+ # Start timing
78+ print ("Start timing ..." )
79+ timings = []
80+ with torch .no_grad ():
81+ for i in range (1 , self .nruns + 1 ):
82+ start_time = time .time ()
83+ features = self .model (input_data )
84+ torch .cuda .synchronize ()
85+ end_time = time .time ()
86+ timings .append (end_time - start_time )
87+
88+ if i % 10 == 0 :
89+ print (
90+ f"Iteration { i } /{ self .nruns } , ave batch time { np .mean (timings ) * 1000 :.2f} ms"
91+ )
92+
93+ logging .info (f"Average batch time: { np .mean (timings ) * 1000 :.2f} ms" )
94+ return np .mean (timings ) * 1000
95+
96+
97+ class ONNXBenchmark (Benchmark ):
98+ """
99+ A class used to benchmark the performance of an ONNX model.
100+ """
101+
102+ def __init__ (
103+ self ,
104+ ort_session : ort .InferenceSession ,
105+ input_shape : tuple ,
106+ nruns : int = 100 ,
107+ nwarmup : int = 50 ,
108+ ):
109+ super ().__init__ (nruns )
110+ self .ort_session = ort_session
111+ self .input_shape = input_shape
112+ self .nwarmup = nwarmup
113+ self .nruns = nruns
114+
115+ def run (self ):
116+ print ("Warming up ..." )
117+ # Adjusting the batch size in the input shape to match the expected input size of the model.
118+ input_shape = (1 ,) + self .input_shape [1 :]
119+ input_data = np .random .randn (* input_shape ).astype (np .float32 )
120+
121+ for _ in range (self .nwarmup ): # Warm-up runs
122+ _ = self .ort_session .run (None , {"input" : input_data })
123+
124+ print ("Starting benchmark ..." )
125+ timings = []
126+
127+ for i in range (1 , self .nruns + 1 ):
128+ start_time = time .time ()
129+ _ = self .ort_session .run (None , {"input" : input_data })
130+ end_time = time .time ()
131+ timings .append (end_time - start_time )
132+
133+ if i % 10 == 0 :
134+ print (
135+ f"Iteration { i } /{ self .nruns } , ave batch time { np .mean (timings ) * 1000 :.2f} ms"
136+ )
137+
138+ avg_time = np .mean (timings ) * 1000
139+ logging .info (f"Average ONNX inference time: { avg_time :.2f} ms" )
140+ return avg_time
141+
142+
143+ class OVBenchmark (Benchmark ):
144+ def __init__ (
145+ self , model : ov .frontend .FrontEnd , input_shape : Tuple [int , int , int , int ]
146+ ):
147+ """
148+ Initialize the OVBenchmark with the OpenVINO model and the input shape.
149+
150+ :param model: ov.frontend.FrontEnd
151+ The OpenVINO model.
152+ :param input_shape: Tuple[int, int, int, int]
153+ The shape of the model input.
154+ """
155+ self .ov_model = model
156+ self .core = ov .Core ()
157+ self .compiled_model = None
158+ self .input_shape = input_shape
159+ self .nwarmup = 50
160+ self .nruns = 100
161+ self .dummy_input = np .random .randn (* input_shape ).astype (np .float32 )
162+
163+ def warmup (self ):
164+ """
165+ Compile the OpenVINO model for optimal execution on available hardware.
166+ """
167+ self .compiled_model = self .core .compile_model (self .ov_model , "AUTO" )
168+
169+ def inference (self , input_data ) -> dict :
170+ """
171+ Perform inference on the input data using the compiled OpenVINO model.
172+
173+ :param input_data: np.ndarray
174+ The input data for the model.
175+ :return: dict
176+ The model's output as a dictionary.
177+ """
178+ outputs = self .compiled_model (inputs = {"input" : input_data })
179+ return outputs
180+
181+ def run (self ):
182+ """
183+ Run the benchmark on the OpenVINO model. It first warms up by compiling the model and then measures
184+ the average inference time over a set number of runs.
185+ """
186+ # Warm-up runs
187+ logging .info ("Warming up ..." )
188+ for _ in range (self .nwarmup ):
189+ self .warmup ()
190+
191+ # Benchmarking
192+ total_time = 0
193+ for i in range (1 , self .nruns + 1 ):
194+ start_time = time .time ()
195+ _ = self .inference (self .dummy_input )
196+ total_time += time .time () - start_time
197+
198+ if i % 10 == 0 :
199+ print (
200+ f"Iteration { i } /{ self .nruns } , ave batch time { total_time / i * 1000 :.2f} ms"
201+ )
202+
203+ avg_time = total_time / self .nruns
204+ logging .info (f"Average inference time: { avg_time * 1000 :.2f} ms" )
205+ return avg_time * 1000
7206
8207
9208def benchmark_onnx_model (ort_session : ort .InferenceSession ):
10209 run_benchmark (None , None , None , ort_session , onnx = True )
11210
12211
13- def benchmark_ov_model (ov_model : ov .CompiledModel ) -> src . benchmark_class . OVBenchmark :
212+ def benchmark_ov_model (ov_model : ov .CompiledModel ) -> OVBenchmark :
14213 ov_benchmark = OVBenchmark (ov_model , input_shape = (1 , 3 , 224 , 224 ))
15214 ov_benchmark .run ()
16215 return ov_benchmark
17216
18217
19218def benchmark_cuda_model (cuda_model : torch .nn .Module , device : str , dtype : torch .dtype ):
20219 run_benchmark (cuda_model , device , dtype )
220+
221+
222+ def run_benchmark (
223+ model : torch .nn .Module ,
224+ device : str ,
225+ dtype : torch .dtype ,
226+ ort_session : ort .InferenceSession = None ,
227+ onnx : bool = False ,
228+ ) -> None :
229+ """
230+ Run and log the benchmark for the given model, device, and dtype.
231+
232+ :param onnx:
233+ :param ort_session:
234+ :param model: The model to be benchmarked.
235+ :param device: The device to run the benchmark on ("cpu" or "cuda").
236+ :param dtype: The data type to be used in the benchmark (typically torch.float32 or torch.float16).
237+ """
238+ if onnx :
239+ logging .info (f"Running Benchmark for ONNX" )
240+ benchmark = ONNXBenchmark (ort_session , input_shape = (32 , 3 , 224 , 224 ))
241+ else :
242+ logging .info (f"Running Benchmark for { device .upper ()} and precision { dtype } " )
243+ benchmark = PyTorchBenchmark (model , device = device , dtype = dtype )
244+ benchmark .run ()
0 commit comments