打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
设备:MLU270
网络:FastestDet
下载源码
git clone https://github.com/dog-qiuqiu/FastestDet
参看requirements.txt 文件,由于python版本和torch版本差异,软件包对应的版本会有些区别,供参考:
onnx 1.6.0
onnxruntime 1.10.0
onnxsim 0.4.10
opencv-python 3.4.2.17
tqdm 4.64.1
pycocotools 2.0.6
diff --git a/module/shufflenetv2.py b/module/shufflenetv2.py
index 89d0eb7..80cd875 100644
--- a/module/shufflenetv2.py
+++ b/module/shufflenetv2.py
@@ -54,7 +54,8 @@ class ShuffleV2Block(nn.Module):
return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
def channel_shuffle(self, x):
- batchsize, num_channels, height, width = x.data.size()
+ #batchsize, num_channels, height, width = x.data.size()
+ batchsize, num_channels, height, width = x.detach().size()
assert (num_channels % 4 == 0)
x = x.reshape(batchsize * num_channels // 2, 2, height * width)
x = x.permute(1, 0, 2)
备注:.data 是较早前的操作,后面版本已经使用detach()进行替换
torchvision.ops.batched_nms pytorch 1.13 对应的是 torchvision.ops.boxes.batched_nms
测试发现 gy, gx dtype存在问题,会导致结果错误,转成float()
diff --git a/utils/tool.py b/utils/tool.py
old mode 100644
new mode 100755
index bc7f07a..9cb945c
--- a/utils/tool.py
+++ b/utils/tool.py
@@ -77,6 +77,9 @@ def handle_preds(preds, device, conf_thresh=0.25, nms_thresh=0.45):
# 检测框的坐标
gy, gx = torch.meshgrid([torch.arange(H), torch.arange(W)])
+ gy = gy.float()
+ gx = gx.float()
+
bw, bh = preg[..., 2].sigmoid(), preg[..., 3].sigmoid()
bcx = (preg[..., 0].tanh() + gx.to(device)) / W
bcy = (preg[..., 1].tanh() + gy.to(device)) / H
@@ -113,7 +116,8 @@ def handle_preds(preds, device, conf_thresh=0.25, nms_thresh=0.45):
b = torch.Tensor(b).to(device)
c = torch.Tensor(c).squeeze(1).to(device)
s = torch.Tensor(s).squeeze(1).to(device)
- keep = torchvision.ops.batched_nms(b, s, c, nms_thresh)
+ # keep = torchvision.ops.batched_nms(b, s, c, nms_thresh)
+ keep = torchvision.ops.boxes.batched_nms(b, s, c, nms_thresh)
for i in keep:
output.append(temp[i])
output_bboxes.append(torch.Tensor(output))
该程序主要是为了将pt文件从1.6以上版本降到低版本pytorch
import os
import argparse
import sys
prj_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
print(prj_dir)
sys.path.append(prj_dir)
import torch
from utils.tool import *
from module.detector import Detector
if __name__ == '__main__':
# 指定训练配置文件
parser = argparse.ArgumentParser()
parser.add_argument('--yaml', type=str, default="", help='.yaml config')
parser.add_argument('--weight', type=str, default=None, help='.weight config')
opt = parser.parse_args()
print("run on cpu...")
device = torch.device("cpu")
# 解析yaml配置文件
cfg = LoadYaml(opt.yaml)
print(cfg)
# 模型加载
print("load weight from:%s"%opt.weight)
model = Detector(cfg.category_num, True).to(device)
model.load_state_dict(torch.load(opt.weight, map_location=device))
#sets the module in eval node
model.eval()
pt_file="FastestDet_unzip.pt"
print("save no zipfile ...")
torch.save(model.state_dict(), pt_file,_use_new_zipfile_serialization=False)
print("save %s end ..."%pt_file)
基于test.py 进行修改,添加量化配置参数等:
---
#参数增加
parser.add_argument('--qua_weight', type=str,default='FastestDet_intx.pth', help='model.pt path(s)')
---
#量化参数配置
# 配置量化参数
import torch_mlu
import torch_mlu.core.mlu_model as ct
import torch_mlu.core.mlu_quantize as mlu_quantize
qconfig={'use_avg':False, 'data_scale':1.0, 'firstconv':False, 'per_channel': False}
# 调用量化接口
quantized_net = mlu_quantize.quantize_dynamic_mlu(model,qconfig_spec=qconfig, dtype='int16', gen_quant=True)
# 设置为推理模式
quantized_net = quantized_net.eval().float()
model = quantized_net
---
#模型量化模型
# 保存量化模型
print("\n")
qua_weight = opt.qua_weight
print("SAVE quantize model:",qua_weight)
torch.save(model.state_dict(),qua_weight)
基于test.py 进行修改,主要修改模型加载部分:
---
#模型加载
#注释
# model.load_state_dict(torch.load(opt.weight, map_location=device))
#配置 MLU core number
ct.set_core_number(opt.core_number)
# 设置输入图片的通道顺序,以决定首层卷积对三通道输入的补齐通道顺序。默认是 RGBA 顺序
#ct.set_input_format(0)
#配置MLU core类型
ct.set_core_version(opt.mcore)
torch.set_grad_enabled(False)
if opt.fake_device:
print("fake device mode")
ct.set_device(-1)
mlu_device = ct.mlu_device()
print("run on %s ..."%mlu_device)
# 加载量化模型
weight = opt.weight
quantized_net = torch_mlu.core.mlu_quantize.quantize_dynamic_mlu(model)
print('weight:',weight)
state_dict = torch.load(weight)
quantized_net.load_state_dict(state_dict, strict=False)
# 设置为推理模式
quantized_net = quantized_net.eval().float()
quantized_net.to(mlu_device)
model = quantized_net
# 设置在线融合模式
if opt.jit:
if opt.save:
ct.save_as_cambricon(opt.mname)
example = torch.randn(opt.batch_size, 3, cfg.input_height, cfg.input_width,dtype=torch.float)
trace_input = torch.randn(1, 3, cfg.input_height, cfg.input_width,dtype=torch.float)
if opt.half_input:
print('half_input ')
trace_input = trace_input.type(torch.HalfTensor)
example = example.type(torch.HalfTensor)
print("jit trace example shape",example.shape)
model = torch.jit.trace(model,trace_input.to(mlu_device),check_trace=False)
if opt.save or opt.fake_device:
print("save offline model mname: ",opt.mname)
model(example.to(mlu_device))
ct.save_as_cambricon('')
exit(0)
---
#数据部分
#需要将数据拷贝到mlu device上
img = img.to(mlu_device)
---
#输出部分
preds = preds.cpu().type(torch.FloatTensor) if opt.half_input else preds.cpu()
python3 mlu/gen_unzipmodel.py --yaml configs/coco.yaml --weight weights/weight_AP05\:0.253207_280-epoch.pth
mv FastestDet_unzip.pt
备注:需要在大于1.6 pytorch 版本进行转换
python3 test.py --yaml configs/coco.yaml --weight mlu/weight/FastestDet_unzip.pt --img data/3.jpg
备注:可以在高版本容器内验证也可以在mlu容器内进行验证
python3 mlu/mlu_quant.py --yaml configs/coco.yaml --weight mlu/weight/FastestDet_unzip.pt --img data/3.jpg mv FastestDet_intx.pth mlu/weight/
#逐层运行 python3 mlu/mlu_test.py --yaml configs/coco.yaml --weight mlu/weight/FastestDet_intx.pth --img data/3.jpg #融合模式运行 python3 mlu/mlu_test.py --yaml configs/coco.yaml --weight mlu/weight/FastestDet_intx.pth --img data/3.jpg --jit #生成270离线模型 4batch python3 mlu/mlu_test.py --yaml configs/coco.yaml --weight mlu/weight/FastestDet_intx.pth --img data/3.jpg --batch 4 --core 4 --mcore MLU270 --save --jit --mname mlu270_4b4c
热门帖子
精华帖子