|
| 1 | +import os |
| 2 | +import cv2 |
| 3 | +import sys |
| 4 | +import argparse |
| 5 | +import time |
| 6 | +import numpy as np |
| 7 | +from fastapi import FastAPI, Response |
| 8 | +from fastapi.responses import StreamingResponse |
| 9 | +import uvicorn |
| 10 | +import threading |
| 11 | + |
| 12 | +# 导入共享工具 |
| 13 | +from py_utils.coco_utils import COCO_test_helper |
| 14 | + |
| 15 | +# 尝试导入RKNN-Toolkit-Lite2 |
| 16 | +try: |
| 17 | + from rknnlite.api import RKNNLite |
| 18 | + RKNN_LITE_AVAILABLE = True |
| 19 | +except ImportError: |
| 20 | + RKNN_LITE_AVAILABLE = False |
| 21 | + print("Warning: RKNN-Toolkit-Lite2 not available, using fallback") |
| 22 | + |
| 23 | +# 常量定义 |
| 24 | +OBJ_THRESH = 0.25 |
| 25 | +NMS_THRESH = 0.45 |
| 26 | +IMG_SIZE = (640, 640) |
| 27 | +CLASSES = ("person", "bicycle", "car","motorbike ","aeroplane ","bus ","train","truck ","boat","traffic light", |
| 28 | + "fire hydrant","stop sign ","parking meter","bench","bird","cat","dog ","horse ","sheep","cow","elephant", |
| 29 | + "bear","zebra ","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite", |
| 30 | + "baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife ", |
| 31 | + "spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza ","donut","cake","chair","sofa", |
| 32 | + "pottedplant","bed","diningtable","toilet ","tvmonitor","laptop ","mouse ","remote ","keyboard ","cell phone","microwave ", |
| 33 | + "oven ","toaster","sink","refrigerator ","book","clock","vase","scissors ","teddy bear ","hair drier", "toothbrush ") |
| 34 | + |
| 35 | +app = FastAPI(title="reComputer RK3576 Web Preview") |
| 36 | + |
| 37 | +def filter_boxes(boxes, box_confidences, box_class_probs): |
| 38 | + """Filter boxes with object threshold.""" |
| 39 | + box_confidences = box_confidences.reshape(-1) |
| 40 | + class_max_score = np.max(box_class_probs, axis=-1) |
| 41 | + classes = np.argmax(box_class_probs, axis=-1) |
| 42 | + _class_pos = np.where(class_max_score * box_confidences >= OBJ_THRESH) |
| 43 | + scores = (class_max_score * box_confidences)[_class_pos] |
| 44 | + boxes = boxes[_class_pos] |
| 45 | + classes = classes[_class_pos] |
| 46 | + return boxes, classes, scores |
| 47 | + |
| 48 | +def nms_boxes(boxes, scores): |
| 49 | + """Suppress non-maximal boxes.""" |
| 50 | + x = boxes[:, 0] |
| 51 | + y = boxes[:, 1] |
| 52 | + w = boxes[:, 2] - boxes[:, 0] |
| 53 | + h = boxes[:, 3] - boxes[:, 1] |
| 54 | + areas = w * h |
| 55 | + order = scores.argsort()[::-1] |
| 56 | + keep = [] |
| 57 | + while order.size > 0: |
| 58 | + i = order[0] |
| 59 | + keep.append(i) |
| 60 | + xx1 = np.maximum(x[i], x[order[1:]]) |
| 61 | + yy1 = np.maximum(y[i], y[order[1:]]) |
| 62 | + xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]]) |
| 63 | + yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]]) |
| 64 | + w1 = np.maximum(0.0, xx2 - xx1 + 0.00001) |
| 65 | + h1 = np.maximum(0.0, yy2 - yy1 + 0.00001) |
| 66 | + inter = w1 * h1 |
| 67 | + ovr = inter / (areas[i] + areas[order[1:]] - inter) |
| 68 | + inds = np.where(ovr <= NMS_THRESH)[0] |
| 69 | + order = order[inds + 1] |
| 70 | + return keep |
| 71 | + |
| 72 | +def dfl(position): |
| 73 | + # Distribution Focal Loss (DFL) |
| 74 | + n, c, h, w = position.shape |
| 75 | + p_num = 4 |
| 76 | + mc = c // p_num |
| 77 | + y = position.reshape(n, p_num, mc, h, w) |
| 78 | + y_exp = np.exp(y - np.max(y, axis=2, keepdims=True)) |
| 79 | + y_softmax = y_exp / np.sum(y_exp, axis=2, keepdims=True) |
| 80 | + acc_metrix = np.arange(mc).reshape(1, 1, mc, 1, 1).astype(np.float32) |
| 81 | + y = (y_softmax * acc_metrix).sum(2) |
| 82 | + return y |
| 83 | + |
| 84 | +def box_process(position): |
| 85 | + grid_h, grid_w = position.shape[2:4] |
| 86 | + col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h)) |
| 87 | + col = col.reshape(1, 1, grid_h, grid_w) |
| 88 | + row = row.reshape(1, 1, grid_h, grid_w) |
| 89 | + grid = np.concatenate((col, row), axis=1) |
| 90 | + stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) |
| 91 | + position = dfl(position) |
| 92 | + box_xy = grid + 0.5 - position[:,0:2,:,:] |
| 93 | + box_xy2 = grid + 0.5 + position[:,2:4,:,:] |
| 94 | + xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1) |
| 95 | + return xyxy |
| 96 | + |
| 97 | +def post_process(input_data): |
| 98 | + if input_data is None: |
| 99 | + return None, None, None |
| 100 | + boxes, scores, classes_conf = [], [], [] |
| 101 | + defualt_branch = 3 |
| 102 | + pair_per_branch = len(input_data) // defualt_branch |
| 103 | + for i in range(defualt_branch): |
| 104 | + boxes.append(box_process(input_data[pair_per_branch*i])) |
| 105 | + classes_conf.append(input_data[pair_per_branch*i+1]) |
| 106 | + scores.append(np.ones_like(input_data[pair_per_branch*i+1][:,:1,:,:], dtype=np.float32)) |
| 107 | + |
| 108 | + def sp_flatten(_in): |
| 109 | + ch = _in.shape[1] |
| 110 | + _in = _in.transpose(0, 2, 3, 1) |
| 111 | + return _in.reshape(-1, ch) |
| 112 | + |
| 113 | + boxes = [sp_flatten(_v) for _v in boxes] |
| 114 | + classes_conf = [sp_flatten(_v) for _v in classes_conf] |
| 115 | + scores = [sp_flatten(_v) for _v in scores] |
| 116 | + boxes = np.concatenate(boxes) |
| 117 | + classes_conf = np.concatenate(classes_conf) |
| 118 | + scores = np.concatenate(scores) |
| 119 | + boxes, classes, scores = filter_boxes(boxes, scores, classes_conf) |
| 120 | + nboxes, nclasses, nscores = [], [], [] |
| 121 | + for c in set(classes): |
| 122 | + inds = np.where(classes == c) |
| 123 | + b = boxes[inds] |
| 124 | + c = classes[inds] |
| 125 | + s = scores[inds] |
| 126 | + keep = nms_boxes(b, s) |
| 127 | + if len(keep) != 0: |
| 128 | + nboxes.append(b[keep]) |
| 129 | + nclasses.append(c[keep]) |
| 130 | + nscores.append(s[keep]) |
| 131 | + if not nclasses and not nscores: |
| 132 | + return None, None, None |
| 133 | + boxes = np.concatenate(nboxes) |
| 134 | + classes = np.concatenate(nclasses) |
| 135 | + scores = np.concatenate(nscores) |
| 136 | + return boxes, classes, scores |
| 137 | + |
| 138 | +def draw(image, boxes, scores, classes): |
| 139 | + for box, score, cl in zip(boxes, scores, classes): |
| 140 | + top, left, right, bottom = [int(_b) for _b in box] |
| 141 | + cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2) |
| 142 | + cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score), |
| 143 | + (top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) |
| 144 | + |
| 145 | +class RKNNLiteModel: |
| 146 | + def __init__(self, model_path): |
| 147 | + if not RKNN_LITE_AVAILABLE: |
| 148 | + raise ImportError("RKNN-Toolkit-Lite2 is not available") |
| 149 | + if not os.path.exists(model_path): |
| 150 | + raise FileNotFoundError(f"RKNN model file not found: {model_path}") |
| 151 | + self.rknn_lite = RKNNLite() |
| 152 | + print('Loading RKNN model...') |
| 153 | + ret = self.rknn_lite.load_rknn(model_path) |
| 154 | + if ret != 0: |
| 155 | + raise Exception(f"Load RKNN model failed with error code: {ret}") |
| 156 | + print('Initializing runtime...') |
| 157 | + # RK3576 使用默认初始化 |
| 158 | + ret = self.rknn_lite.init_runtime() |
| 159 | + if ret != 0: |
| 160 | + raise Exception(f"Init runtime failed with error code: {ret}") |
| 161 | + |
| 162 | + def run(self, input_data): |
| 163 | + return self.rknn_lite.inference(inputs=[input_data]) |
| 164 | + |
| 165 | +class VideoStreamer: |
| 166 | + def __init__(self, model_path, source): |
| 167 | + self.model = RKNNLiteModel(model_path) |
| 168 | + self.cap = cv2.VideoCapture(source) |
| 169 | + self.co_helper = COCO_test_helper(enable_letter_box=True) |
| 170 | + self.fps_counter = 0 |
| 171 | + self.inference_time = 0 |
| 172 | + self.lock = threading.Lock() |
| 173 | + |
| 174 | + def generate_frames(self): |
| 175 | + while True: |
| 176 | + success, frame = self.cap.read() |
| 177 | + if not success: |
| 178 | + if isinstance(self.cap, cv2.VideoCapture): |
| 179 | + self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) |
| 180 | + continue |
| 181 | + break |
| 182 | + |
| 183 | + # 预处理 |
| 184 | + img = self.co_helper.letter_box(im=frame.copy(), new_shape=(IMG_SIZE[1], IMG_SIZE[0]), pad_color=(0,0,0)) |
| 185 | + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
| 186 | + input_data = np.expand_dims(img, axis=0) |
| 187 | + |
| 188 | + # 推理 |
| 189 | + t1 = time.time() |
| 190 | + outputs = self.model.run(input_data) |
| 191 | + self.inference_time = time.time() - t1 |
| 192 | + |
| 193 | + if self.inference_time > 0: |
| 194 | + inf_fps = 1.0 / self.inference_time |
| 195 | + self.fps_counter = 0.9 * self.fps_counter + 0.1 * inf_fps if self.fps_counter > 0 else inf_fps |
| 196 | + |
| 197 | + # 后处理 |
| 198 | + boxes, classes, scores = post_process(outputs) |
| 199 | + |
| 200 | + if boxes is not None: |
| 201 | + draw(frame, self.co_helper.get_real_box(boxes), scores, classes) |
| 202 | + |
| 203 | + # 绘制信息 |
| 204 | + cv2.putText(frame, f'NPU FPS: {self.fps_counter:.1f}', (20, 40), |
| 205 | + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) |
| 206 | + cv2.putText(frame, f'Inference: {self.inference_time*1000:.1f}ms', (20, 80), |
| 207 | + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2) |
| 208 | + |
| 209 | + ret, buffer = cv2.imencode('.jpg', frame) |
| 210 | + frame_bytes = buffer.tobytes() |
| 211 | + yield (b'--frame\r\n' |
| 212 | + b'Content-Type: image/jpeg\r\n\r\n' + frame_bytes + b'\r\n') |
| 213 | + |
| 214 | +streamer = None |
| 215 | + |
| 216 | +@app.get("/api/video_feed") |
| 217 | +async def video_feed(): |
| 218 | + return StreamingResponse(streamer.generate_frames(), |
| 219 | + media_type="multipart/x-mixed-replace; boundary=frame") |
| 220 | + |
| 221 | +@app.get("/") |
| 222 | +async def index(): |
| 223 | + return Response(content=""" |
| 224 | + <html> |
| 225 | + <head><title>reComputer RK3576 Web Preview</title></head> |
| 226 | + <body style="background-color: #1a1a1a; color: white; text-align: center; font-family: sans-serif;"> |
| 227 | + <h1>reComputer RK3576 Real-time Detection</h1> |
| 228 | + <div style="margin: 20px auto; display: inline-block; border: 5px solid #333; border-radius: 10px; overflow: hidden;"> |
| 229 | + <img src="/api/video_feed" style="max-width: 100%; height: auto;"> |
| 230 | + </div> |
| 231 | + <p>Streaming via FastAPI + MJPEG</p> |
| 232 | + </body> |
| 233 | + </html> |
| 234 | + """, media_type="text/html") |
| 235 | + |
| 236 | +if __name__ == "__main__": |
| 237 | + parser = argparse.ArgumentParser(description="RK3576 YOLO Web Detection") |
| 238 | + parser.add_argument('--model_path', type=str, default='model/yolo11n.rknn', help='path to rknn model') |
| 239 | + parser.add_argument('--source', type=str, default='1', help='camera id or video path') |
| 240 | + parser.add_argument('--host', type=str, default='0.0.0.0', help='host address') |
| 241 | + parser.add_argument('--port', type=int, default=8000, help='port number') |
| 242 | + args = parser.parse_args() |
| 243 | + |
| 244 | + if args.source.isdigit(): |
| 245 | + source = int(args.source) |
| 246 | + else: |
| 247 | + source = args.source |
| 248 | + |
| 249 | + streamer = VideoStreamer(args.model_path, source) |
| 250 | + uvicorn.run(app, host=args.host, port=args.port) |
0 commit comments