@@ -77,7 +77,7 @@ img = self.precls_trans(cv2.resize(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), (224,22
7777``` py
7878
7979input_shape = torch.ones((1 , 3 , 224 , 224 )).cuda()
80- self .classification_engine = torch2trt(resnet50, [input_shape],
80+ self .classification_engine = torch2trt(resnet50, [input_shape],
8181 fp16_mode = self .fp16,
8282 max_batch_size = self .cls_trt_max_batchsize,
8383 )
@@ -116,8 +116,8 @@ config = torchpipe.parse_toml("resnet50.toml")
116116self .classification_engine = pipe(config)
117117
118118self .classification_engine(bin_data)
119-
120-
119+
120+
121121if TASK_RESULT_KEY not in bin_data.keys():
122122 print (" error decode" )
123123 return results
@@ -133,9 +133,9 @@ The contents of the toml file are as follows:
133133
134134``` bash
135135# Schedule'parameter
136- batching_timeout = 5
137- instance_num = 8
138- precision = " fp16"
136+ batching_timeout = 5
137+ instance_num = 8
138+ precision = " fp16"
139139
140140# # Data decoding
141141#
@@ -145,11 +145,11 @@ precision = "fp16"
145145# The original decoding output format was BGR
146146# The DecodeMat backend also defaults to outputting in BGR format
147147# Since decoding is done on the CPU, DecodeMat is used
148- # After each node is completed, the name of the next node needs to be
148+ # After each node is completed, the name of the next node needs to be
149149# appended, otherwise the last node is assumed by default
150150#
151151[cpu_decoder]
152- backend = " DecodeMat"
152+ backend = " DecodeMat"
153153next = " cpu_posdecoder"
154154
155155# # preprocessing: resize、cvtColorMat
@@ -160,11 +160,11 @@ next = "cpu_posdecoder"
160160# Note:
161161# The original preprocessing order was resize, cv2.COLOR_BGR2RGB,
162162# then Normalize.
163- # However, the normalization step is now integrated into the model
164- # processing (the [resnet50] node), so the output result after the
165- # preprocessing in this node is consistent with the preprocessing result
163+ # However, the normalization step is now integrated into the model
164+ # processing (the [resnet50] node), so the output result after the
165+ # preprocessing in this node is consistent with the preprocessing result
166166# without normalization.
167- # After each node is completed, the name of the next node needs to be
167+ # After each node is completed, the name of the next node needs to be
168168# appended, otherwise the last node is assumed by default.
169169#
170170[cpu_posdecoder]
@@ -183,23 +183,23 @@ next = "resnet50"
183183#
184184# This corresponds to 3.1(3) TensorRT acceleration and 3.1(2)Normalize
185185# Note:
186- # There's a slight difference from the original method of generating
187- # engines online. Here, the model needs to be first converted to ONNX
186+ # There's a slight difference from the original method of generating
187+ # engines online. Here, the model needs to be first converted to ONNX
188188# format.
189- #
189+ #
190190# For the conversion method, see [Converting Torch to ONNX].
191191#
192192[resnet50]
193- backend = " SyncTensor[TensorrtTensor]"
193+ backend = " SyncTensor[TensorrtTensor]"
194194min = 1
195195max = 4
196196instance_num = 4
197- model = " /you/model/path/resnet50.onnx"
197+ model = " /you/model/path/resnet50.onnx"
198198
199199mean=" 123.675, 116.28, 103.53" # 255*"0.485, 0.456, 0.406"
200200std=" 58.395, 57.120, 57.375" # 255*"0.229, 0.224, 0.225"
201201
202- # TensorrtTensor
202+ # TensorrtTensor
203203" model::cache" =" /you/model/path/resnet50.trt" # or resnet50.trt.encrypted
204204
205205```
@@ -221,7 +221,7 @@ std="58.395, 57.120, 57.375" # 255*"0.229, 0.224, 0.225"
221221
222222The specific test code can be found at [ client_qps.py] ( https://github.com/torchpipe/torchpipe/blob/develop/examples/resnet50_thrift/client_qps.py )
223223
224- With the same Thrift service interface, testing on a machine with NIDIA -3080 GPU, 36-core CPU, and concurrency of 10, we have the following results:
224+ With the same Thrift service interface, testing on a machine with NVIDIA -3080 GPU, 36-core CPU, and concurrency of 10, we have the following results:
225225
226226- throughput:
227227
@@ -233,7 +233,7 @@ With the same Thrift service interface, testing on a machine with NIDIA-3080 GPU
233233- response time:
234234
235235| Methods | TP50 | TP99 |
236- :-: | :-: | :-:|
236+ :-: | :-: | :-:|
237237| Pure TensorRT | 26.74 | 35.24|
238238| Using TorchPipe | 8.89| 14.28|
239239
0 commit comments