切换版块
×
基础软件平台
PyTorch开发
TensorFlow开发
编解码及SDK开发
BANG语言与计算库
开发工具链
MagicMind开发
软件栈百科
云平台集成
硬件产品专区
MLU370系列AI加速卡
MLU270系列AI加速卡
MLU220系列AI加速产品
经验方案交流区
经验案例与实践分享
开发者服务
开发者活动
公告与版务
高校支持
《智能计算系统》
签到
版块
社区
文档
SDK下载
370系列
200系列
开发平台
官网首页
注册
登录
全部版块
基础软件平台
硬件产品专区
经验方案交流区
开发者服务
高校支持
发布新帖
登录/注册
LV.1
lcxzzz
79
积分
0
赞
6
帖子
57
回复
0
收藏
TA的动态
TA的帖子
TA的回复
在线融合阶段模型生成静态图后输出异常
我的回复:#8xiedong2022回复[图片]convertmodel.txt不好意思,我传的文本错行了。参考一下附件,主要是mfus部分。尝试修改一下half。再确认一下你的docker版本。展开你好,看了你给的文档,mfus中不需要把model放到mlu设别上运行吗?
0
在线融合阶段模型生成静态图后输出异常
我的回复:#5xiedong2022回复你好,一般在线正确,融合也正确。先检查一下融合的代码吧:可以参考yolov5 融合代码: elif self.running_mode == 'mfus': device = self.device if self.gen_off: print('generate offline model') ct.save_as_cambricon('yolov5s_int8_1b_4c_640') torch.set_grad_enabled(False) batch_size = 4 ct.set_core_number(4) ct.set_core_version('MLU270') example_mlu = torch.randn(batch_size, 3,640,640, dtype=torch.float) trace_input = torch.randn(1, 3, 640, 640,dtype=torch.float) trace_input=trace_input.half() example_mlu = example_mlu.half() trace_input=trace_input.to(ct.mlu_device()) trace_model = torch.jit.trace(self.quantized_net, trace_input, check_trace = False) if self.gen_off: trace_model(example_mlu.to(device)) ct.save_as_cambricon('') exit(0) else: img = img.half() pred = trace_model(img.to(device)) pred = pred.to(torch.device('cpu')) pred = get_boxes(pred) print(pred) for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], torch.Tensor(det[:, :4]), im0.shape).round() for c in torch.Tensor(det[:, -1]).unique(): n = (torch.Tensor(det[:, -1]) == c).sum() for *xyxy, conf, cls in det: if save_img: label = '%s %.2f' % (self.names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) if save_img: cv2.imwrite('./mfus_result.jpg', im0)展开这是我的主函数代码 import os import sys from pathlib import Path import time import numpy as np import cv2 import torch import torch.nn as nn import torch_mlu import torch_mlu.core.mlu_model as ct import torch_mlu.core.mlu_quantize as mlu_quantize import nets.basicblock as B from nets.basicblock import Focus, UnFocus os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" FILE = Path(__file__).resolve() ROOT = FILE.parents[0] if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) def time_sync(): if torch.cuda.is_available(): torch.cuda.synchronize() return time.time() class LoadImages: def __init__(self, path): self.files = [os.path.join(path, f) for f in os.listdir(path)] self.file_num = len(self.files) def __iter__(self): self.count = 0 return self def __next__(self): if self.count == self.file_num: raise StopIteration path = self.files[self.count] img = cv2.imread(path, 0) img = img.astype(np.float32) assert img is not None, 'Image Not Found ' + path img = np.expand_dims(img, axis=0) # img = img.transpose(2, 0, 1) img = np.ascontiguousarray(img) self.count += 1 return path, img class UNET_Z(nn.Module): def __init__(self, in_nc = 1, out_nc = 4, nc=[8, 16, 32, 64], act_mode='R', \\ downsample_mode = 'strideconv' , upsample_mode = 'upconv', nb = 1, bias=True): super(UNET_Z, self).__init__() # b X 1 X w X h self.head = Focus(nc[1], nc[1], act_mode=act_mode) # downsample if downsample_mode == 'avgpool': downsample_block = B.downsample_avgpool elif downsample_mode == 'maxpool': downsample_block = B.downsample_maxpool elif downsample_mode == 'strideconv': downsample_block = B.downsample_strideconv else: raise NotImplementedError('downsample mode [{:s}] is not found'.format(downsample_mode)) self.m_down1 = B.sequential(*[B.ResBlock(nc[1], nc[1], bias=bias, \\ mode='C'+act_mode+'C') for _ in range(nb)], downsample_block(nc[1], nc[2], bias=bias, mode='2')) self.m_down2 = B.sequential(*[B.ResBlock(nc[2], nc[2], bias=bias, \\ mode='C'+act_mode+'C') for _ in range(nb)], downsample_block(nc[2], nc[3], bias=bias, mode='2')) self.m_body = B.sequential(*[B.ResBlock(nc[3], nc[3], bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) # upsample if upsample_mode == 'upconv': upsample_block = B.upsample_upconv elif upsample_mode == 'pixelshuffle': upsample_block = B.upsample_pixelshuffle elif upsample_mode == 'convtranspose': upsample_block = B.upsample_convtranspose else: raise NotImplementedError('upsample mode [{:s}] is not found'.format(upsample_mode)) self.m_up1 = B.sequential(upsample_block(nc[3], nc[2], bias=bias, mode='2'), *[B.ResBlock(nc[2], nc[2], \\ bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) self.m_up2 = B.sequential(upsample_block(nc[2], nc[1], bias=bias, mode='2'), *[B.ResBlock(nc[1], nc[1], \\ bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) self.m_up3 = B.sequential(upsample_block(nc[1], nc[0], bias=bias, mode='2'), *[B.ResBlock(nc[0], nc[0], \\ bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) self.m_tail = B.sequential(B.conv(nc[0], out_nc, bias=bias, mode='C')) # b X 1 X w X h def forward(self, x0): y = x0 # b x 1 x w x h h, w = x0.size()[-2:] # paddingBottom = int(np.ceil(h / 4) * 4 - h) # paddingRight = int(np.ceil(w / 4) * 4 - w) # x0 = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x0) x1 = self.head(x0) # b x 16 x w/4 x h/4 x2 = self.m_down1(x1) # b x 32 x w/8 x h/8 x3 = self.m_down2(x2) # b x 64 x w/16 x h/16 x = self.m_body(x3) # b x 64 x w/16 x h/16 x = self.m_up1(x + x3) # b x 32 x w/8 x h/8 x = self.m_up2(x + x2) # b x 16 x w/4 x h/4 x = self.m_up3(x + x1) # b x 8 x w/2 x h/2 x = self.m_tail(x) # b x 1 x w x h return x[..., :int(h / 2), :int(w / 2)] def final(x): b, c, w, h = x.shape y = torch.ones((b, int(c / 4), int(w * 2), int(h * 2))) # yolov5-Focus y[..., 0::2, 0::2] = x[:, 0: 1, :, :] y[..., 1::2, 0::2] = x[:, 1: 2, :, :] y[..., 0::2, 1::2] = x[:, 2: 3, :, :] y[..., 1::2, 1::2] = x[:, 3: 4, :, :] return y def main(): images_dir = './dataset/data3' save_dir = './results/result_mfus' if not os.path.exists(save_dir): os.mkdir(save_dir) # device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = UNET_Z() model = torch_mlu.core.mlu_quantize.quantize_dynamic_mlu(model) state_dict = torch.load('qua_unet_int8.pt') model.load_state_dict(state_dict, strict=False) ct.set_core_number(4) ct.set_core_version("MLU270") ct.save_as_cambricon("unet-270") model = model.to(ct.mlu_device()) torch.set_grad_enabled(False) trace_input = torch.randn(1, 1, 1024, 1024, dtype=torch.float).to(ct.mlu_device()) model = torch.jit.trace(model, trace_input, check_trace=False) dataset = LoadImages(images_dir) for path, img in dataset: t1 = time_sync() img = torch.from_numpy(img) # img = img / 255.0 if len(img.shape) == 3: img = img[None] pred = model(img.to(ct.mlu_device())).to(torch.device('cpu')) pred = torch.squeeze(final(pred)).detach().numpy() t2 = time_sync() print("Speed per image is ", t2 - t1) # torch.save(model.state_dict(), 'qua_unet_int8.pt') save_path = os.path.join(save_dir, os.path.basename(path)) cv2.imwrite(save_path, pred) if __name__ == "__main__": main()
0
在线融合阶段模型生成静态图后输出异常
我的回复:#5xiedong2022回复你好,一般在线正确,融合也正确。先检查一下融合的代码吧:可以参考yolov5 融合代码: elif self.running_mode == 'mfus': device = self.device if self.gen_off: print('generate offline model') ct.save_as_cambricon('yolov5s_int8_1b_4c_640') torch.set_grad_enabled(False) batch_size = 4 ct.set_core_number(4) ct.set_core_version('MLU270') example_mlu = torch.randn(batch_size, 3,640,640, dtype=torch.float) trace_input = torch.randn(1, 3, 640, 640,dtype=torch.float) trace_input=trace_input.half() example_mlu = example_mlu.half() trace_input=trace_input.to(ct.mlu_device()) trace_model = torch.jit.trace(self.quantized_net, trace_input, check_trace = False) if self.gen_off: trace_model(example_mlu.to(device)) ct.save_as_cambricon('') exit(0) else: img = img.half() pred = trace_model(img.to(device)) pred = pred.to(torch.device('cpu')) pred = get_boxes(pred) print(pred) for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], torch.Tensor(det[:, :4]), im0.shape).round() for c in torch.Tensor(det[:, -1]).unique(): n = (torch.Tensor(det[:, -1]) == c).sum() for *xyxy, conf, cls in det: if save_img: label = '%s %.2f' % (self.names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) if save_img: cv2.imwrite('./mfus_result.jpg', im0)展开你好我的去噪网络的倒数第二个输出大小是1x4x(h/2)x(w/2),按照Focus逆操作,将输出组成1x1xhxw(这一步我使用的方法是矩阵赋值),但是mlu上不能进行赋值操作,因此我将赋值部分放在cpu上进行
0
在线融合阶段模型生成静态图后输出异常
我的回复:#3xiedong2022回复你量化的步骤是什么?用的是原图吗?代码做了什么修改?一般在线融合正确,融合处理也没问题。而且,你融合处理,有一部分图片缺失,看起来不是整体处理问题。检查一下推理前后处理代码。展开在线融合阶段使用torch.jit.trace这个函数,结果就会异常,右上角会多一块,删除这个函数,结果就没有问题
0
在线融合阶段模型生成静态图后输出异常
我的回复:使用的是类似于UNet结构的去噪网络,在线推理阶段输出去噪后的图片正常,但是融合处理后图片有一部分异常,下图左边是在线推理结果,右边是在线融合结果。
0
上一页
1
2
3
下一页
Github
开发平台
文档中心
新手必读
官方微信
版权所有 © 2024 寒武纪 Cambricon.com 备案/许可证号:
京ICP备17003415号-1
关闭