1. 关于.data, .data是早期torch的用法,.data和.detach()框架内部的运算是不一样的2. 生成的离线模型会包括运行指令,会比.pt 大一些展开
可以尝试修改下代码将.data 更换成.detach:diff --git a/module/shufflenetv2.py b/module/shufflenetv2.pyindex 89d0eb7..80cd875 100644--- a/module/shufflenetv2.py+++ b/module/shufflenetv2.py@@ -54,7 +54,8 @@ class ShuffleV2Block(nn.Module): return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1) def channel_shuffle(self, x):- batchsize, num_channels, height, width = x.data.size()+ #batchsize, num_channels, height, width = x.data.size()+ batchsize, num_channels, height, width = x.detach().size() assert (num_channels % 4 == 0) x = x.reshape(batchsize * num_channels // 2, 2, height * width) x = x.permute(1, 0, 2)展开
按你说的修改,或者把“.data”去掉就能转换成功了,这是为什么?
另外,转换完的.cambricon为啥比原来的.pt文件还大?
我看到社区挺多模型转换错误的帖子,都没有解决方案......建议大佬们合力看看,总不能只用yolo吧。
可以尝试修改下代码将.data 更换成.detach:
diff --git a/module/shufflenetv2.py b/module/shufflenetv2.py
index 89d0eb7..80cd875 100644
--- a/module/shufflenetv2.py
+++ b/module/shufflenetv2.py
@@ -54,7 +54,8 @@ class ShuffleV2Block(nn.Module):
return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
def channel_shuffle(self, x):
- batchsize, num_channels, height, width = x.data.size()
+ #batchsize, num_channels, height, width = x.data.size()
+ batchsize, num_channels, height, width = x.detach().size()
assert (num_channels % 4 == 0)
x = x.reshape(batchsize * num_channels // 2, 2, height * width)
x = x.permute(1, 0, 2)
模型转换代码如下:def mlu_forward(modelfile): ct.set_core_version("MLU220") ct.set_device(-1) ct.set_input_format(order) ct.set_core_number(4) # set core number device = ct.mlu_device() with torch.no_grad(): model = Detector(1, True) model.eval().float() model = mlu_quantize.quantize_dynamic_mlu(model) model.load_state_dict(torch.load(modelfile)) ct.save_as_cambricon('fastestdet') example_tensor = torch.randn((1, 3, 640, 640), dtype=torch.float).to(device) fusion_model = torch.jit.trace(model.to(device), example_tensor, check_trace=False) fusion_model(example_tensor) ct.save_as_cambricon('') # generate offline model print('fusion success')展开
从日志看是因为没有设备导致的,去掉ct.set_device(-1) ,建议基于有MLU200的设备的环境进行模型验证和转换
您好,看log不止一个错误。请先检查一下量化部分的代码。
量化代码如下:
def quantification(weight, offline, img_path, img_size, save_path):
with torch.no_grad():
ckpt = torch.load(weight)
state_dict = ckpt['model'] if 'model' in ckpt else ckpt
model = Detector(1, True)
model.load_state_dict(state_dict)
model.eval().float()
model = mlu_quantize.quantize_dynamic_mlu(model,
{ 'iteration':1000,
'mean': mean,
'std': std,
'data_scale':1.0,
'firstconv': True,
'perchannel':True,
'use_avg':False }, dtype='int8', gen_quant=True)
calibration(model = model, dir = img_path, img_size = img_size)
torch.save(model.state_dict(), save_path)
print(save_path, " save over")
模型转换代码如下:
def mlu_forward(modelfile):
ct.set_core_version("MLU220")
ct.set_device(-1)
ct.set_input_format(order)
ct.set_core_number(4) # set core number
device = ct.mlu_device()
with torch.no_grad():
model = Detector(1, True)
model.eval().float()
model = mlu_quantize.quantize_dynamic_mlu(model)
model.load_state_dict(torch.load(modelfile))
ct.save_as_cambricon('fastestdet')
example_tensor = torch.randn((1, 3, 640, 640), dtype=torch.float).to(device)
fusion_model = torch.jit.trace(model.to(device), example_tensor, check_trace=False)
fusion_model(example_tensor)
ct.save_as_cambricon('') # generate offline model
print('fusion success')
请登录后评论