環境補充
配置訓練環境時,有一些包沒有安裝,這裏進行一些補充,就記得這幾個了,其餘的忘了,大家缺啥補啥吧,反正都是pip安裝。
conda activate rf-detr
pip install onnx
pip install onnxruntime
pip install onnxsim
onnx模型導出
from rfdetr import RFDETRBase,RFDETRLarge,RFDETRNano,RFDETRSmall,RFDETRMedium
model = RFDETRNano(pretrain_weights="/home/project_python/rf-detr/runs/nano/checkpoint_best_total.pth")
model.export(output_dir = "/home/project_python/rf-detr/runs/nano/")
onnx模型推理
這裏作者實現了兩個版本的預處理,一個使用torch一個使用numpy,經過測試,torch預處理的圖像推理效果與pt模型推理效果一致,numpy版本推理結果與pt模型略有出入,但影響不大,由於工程端一般不使用torch,大家可以選擇numpy版本的預處理,將其轉換成C++代碼進行部署。
import cv2
import onnxruntime
import numpy as np
import random
import time
import torchvision.transforms.functional as F
def xywh2xyxy(x):
"""
將邊界框座標從 (x, y, width, height) 轉換為 (x1, y1, x2, y2)
Args:
x (np.ndarray): 輸入邊界框數組
Returns:
np.ndarray: 轉換後的邊界框數組
"""
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def softmax(x):
"""
Softmax函數
Args:
x (np.ndarray): 輸入數組
Returns:
np.ndarray: 經過softmax處理的數組
"""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def sigmoid(x):
"""
Sigmoid函數
Args:
x (np.ndarray): 輸入數組
Returns:
np.ndarray: 經過sigmoid處理的數組
"""
return 1 / (1 + np.exp(-x))
def get_optimal_font_scale(image_shape, text, font_face=cv2.FONT_HERSHEY_SIMPLEX):
"""
根據圖像分辨率自動計算最優字體大小
"""
# 獲取圖像的最小邊長
min_dimension = min(image_shape[0], image_shape[1])
# 基於圖像尺寸計算基礎字體大小
base_font_scale = min_dimension / 1000.0
# 確保字體大小在合理範圍內
font_scale = max(0.5, min(base_font_scale, 2.0))
return font_scale
def generate_distinct_color():
"""
生成具有明顯區分度的隨機顏色
"""
# 使用HSV色彩空間生成顏色,確保顏色具有較高的飽和度和亮度
h = random.randint(0, 179) # OpenCV中色調範圍是0-179
s = random.randint(150, 255) # 飽和度:150-255 (避免過淡的顏色)
v = random.randint(150, 255) # 亮度:150-255 (避免過暗的顏色)
# 將HSV轉換為BGR
hsv = np.uint8([[[h, s, v]]])
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
return tuple(int(x) for x in bgr[0][0])
def get_color_for_class(class_id, color_map):
"""
為類別ID獲取顏色,如果不存在則生成新顏色
"""
if class_id not in color_map:
color_map[class_id] = generate_distinct_color()
return color_map[class_id]
def preprocess_image(image, target_size=(384, 384)):
"""
預處理圖像
Args:
image (np.ndarray): 輸入圖像
target_size (tuple): 目標圖像尺寸
Returns:
tuple: (處理後的圖像, 縮放因子, 原始尺寸)
"""
# 保存原始尺寸
h, w = image.shape[:2]
w_rate = w / target_size[0]
h_rate = h / target_size[1]
# 調整圖像大小
resized_image = cv2.resize(image, target_size,interpolation=cv2.INTER_LINEAR)
# 顏色空間轉換和歸一化
rgb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
normalized_image = rgb_image.astype(np.float32) / 255.0
# 標準化
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalized_image = (normalized_image - mean) / std
# 轉換維度並擴展批次維度
processed_image = np.transpose(normalized_image, (2, 0, 1))
processed_image = np.expand_dims(processed_image, axis=0).astype(np.float32)
return processed_image, (w_rate, h_rate), (w, h)
def preprocess_image_torch(image, target_size=(384, 384)):
"""
預處理圖像
Args:
image (np.ndarray): 輸入圖像
target_size (tuple): 目標圖像尺寸
Returns:
tuple: (處理後的圖像, 縮放因子, 原始尺寸)
"""
img_tensor = image
img_tensor = F.to_tensor(img_tensor).to("cuda:0")
h, w = img_tensor.shape[1:]
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
img_tensor = F.normalize(img_tensor, mean, std)
img_tensor = F.resize(img_tensor, target_size)
# 保存原始尺寸
h, w = image.shape[:2]
w_rate = w / target_size[0]
h_rate = h / target_size[1]
return np.expand_dims(img_tensor.cpu().numpy(), axis=0).astype(np.float32), (w_rate, h_rate), (w, h)
def postprocess_detections(bboxes, labels, target_size=(384, 384)):
"""
後處理檢測結果
Args:
bboxes (np.ndarray): 邊界框數組
labels (np.ndarray): 標籤數組
target_size (tuple): 目標圖像尺寸
Returns:
list: 處理後的檢測結果列表
"""
results = []
for i in range(len(bboxes)):
# 轉換邊界框格式
bbox = xywh2xyxy(bboxes[i])
# 縮放邊界框座標
bbox[0] *= target_size[1] # x座標乘以寬度
bbox[2] *= target_size[1] # x座標乘以寬度
bbox[1] *= target_size[0] # y座標乘以高度
bbox[3] *= target_size[0] # y座標乘以高度
# 應用softmax並獲取類別和置信度
label_list = sigmoid(labels[i])
class_id = np.argmax(label_list)
conf = label_list[class_id]
results.append({
'bbox': bbox,
'class_id': class_id,
'confidence': conf
})
return results
def draw_detections(image, detections, scale_factors, conf_threshold=0.5):
"""
在圖像上繪製檢測結果
Args:
image (np.ndarray): 輸入圖像
detections (list): 檢測結果列表
scale_factors (tuple): 縮放因子 (w_rate, h_rate)
conf_threshold (float): 置信度閾值
Returns:
np.ndarray: 繪製了檢測結果的圖像
"""
w_rate, h_rate = scale_factors
result_image = image.copy()
detection_count = 0
# 存儲類別顏色的字典
color_map = {}
# 根據圖像分辨率自動調整字體大小
font_scale = get_optimal_font_scale(image.shape, "SampleText")
thickness = max(1, int(font_scale * 2)) # 根據字體大小調整線條粗細
for detection in detections:
conf = detection['confidence']
if conf > conf_threshold:
detection_count += 1
bbox = detection['bbox']
class_id = detection['class_id']
# 為每個類別ID獲取顏色
color = get_color_for_class(class_id, color_map)
# 在原始尺寸圖像上繪製框
x1 = int(bbox[0] * w_rate)
y1 = int(bbox[1] * h_rate)
x2 = int(bbox[2] * w_rate)
y2 = int(bbox[3] * h_rate)
# 繪製邊界框
cv2.rectangle(result_image, (x1, y1), (x2, y2), color, thickness)
# 在同一行顯示類別名稱和置信度,帶背景色
label = f"{class_id} {conf:.2f}"
# 計算文本尺寸
(text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
# 繪製文本背景框
cv2.rectangle(result_image, (x1, y1 - text_height - baseline - 2),
(x1 + text_width, y1), color, -1)
# 繪製文本
cv2.putText(result_image, label, (x1, y1 - baseline),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness)
print(f"檢測框: 類別={class_id}, 置信度={conf:.2f}")
print(f"總共檢測到 {detection_count} 個置信度高於閾值的目標")
return result_image
def run_detection(model_path, image_path, conf_threshold=0.5):
"""
運行目標檢測
Args:
model_path (str): ONNX模型路徑
image_path (str): 圖像路徑
conf_threshold (float): 置信度閾值
"""
# 初始化模型
model = onnxruntime.InferenceSession(model_path,providers=['CPUExecutionProvider'])
# 讀取圖像
image = cv2.imread(image_path)
src_image = image.copy()
# 預處理圖像
processed_image, scale_factors, original_size = preprocess_image(image)
# 模型推理
start = time.time()
output = model.run(["dets", "labels"], {"input": processed_image})
print(f"inference time: {time.time() - start}")
bboxes = output[0][0]
labels = output[1][0]
# 後處理檢測結果
detections = postprocess_detections(bboxes, labels)
# 繪製檢測結果
result_image = draw_detections(src_image, detections, scale_factors, conf_threshold)
# 保存結果
cv2.imwrite("result_onnx.jpg", result_image)
# 主程序
if __name__ == "__main__":
model_path = "/home/project_python/rf-detr/runs/nano/inference_model.onnx"
image_path = "/home/project_python/rf-detr/dataset/dtrain_20251030_v1/images/test/gacdbx_1_17424bef_1600_20251029113538995.jpg"
start = time.time()
run_detection(model_path, image_path, conf_threshold=0.9)
print(f"total time: {time.time() - start}")
結語
onnx模型推理速度為72ms每張,下篇文章將更新tensorrt推理。
本文章為轉載內容,我們尊重原作者對文章享有的著作權。如有內容錯誤或侵權問題,歡迎原作者聯繫我們進行內容更正或刪除文章。