切换版块
×
基础软件平台
PyTorch开发
TensorFlow开发
编解码及SDK开发
BANG语言与计算库
开发工具链
MagicMind开发
软件栈百科
云平台集成
硬件产品专区
大模型专区
寒武纪® AIDC® MLU370®系列加速卡
MLU200系列AI加速产品
经验方案交流区
经验案例与实践分享
开发者服务
开发者活动
公告与版务
高校支持
《智能计算系统》
签到
版块
社区
文档
SDK下载
370系列
200系列
开发平台
官网首页
注册
登录
全部版块
基础软件平台
硬件产品专区
经验方案交流区
开发者服务
高校支持
发布新帖
登录/注册
LV.1
lcxzzz
79
积分
0
赞
6
帖子
57
回复
0
收藏
TA的动态
TA的帖子
TA的回复
在线融合阶段模型生成静态图后输出异常
我的回复:#8xiedong2022回复[图片]convertmodel.txt不好意思,我传的文本错行了。参考一下附件,主要是mfus部分。尝试修改一下half。再确认一下你的docker版本。展开你好,看了你给的文档,mfus中不需要把model放到mlu设别上运行吗?
0
在线融合阶段模型生成静态图后输出异常
我的回复:#5xiedong2022回复你好,一般在线正确,融合也正确。先检查一下融合的代码吧:可以参考yolov5 融合代码: elif self.running_mode == 'mfus': device = self.device if self.gen_off: print('generate offline model') ct.save_as_cambricon('yolov5s_int8_1b_4c_640') torch.set_grad_enabled(False) batch_size = 4 ct.set_core_number(4) ct.set_core_version('MLU270') example_mlu = torch.randn(batch_size, 3,640,640, dtype=torch.float) trace_input = torch.randn(1, 3, 640, 640,dtype=torch.float) trace_input=trace_input.half() example_mlu = example_mlu.half() trace_input=trace_input.to(ct.mlu_device()) trace_model = torch.jit.trace(self.quantized_net, trace_input, check_trace = False) if self.gen_off: trace_model(example_mlu.to(device)) ct.save_as_cambricon('') exit(0) else: img = img.half() pred = trace_model(img.to(device)) pred = pred.to(torch.device('cpu')) pred = get_boxes(pred) print(pred) for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], torch.Tensor(det[:, :4]), im0.shape).round() for c in torch.Tensor(det[:, -1]).unique(): n = (torch.Tensor(det[:, -1]) == c).sum() for *xyxy, conf, cls in det: if save_img: label = '%s %.2f' % (self.names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) if save_img: cv2.imwrite('./mfus_result.jpg', im0)展开这是我的主函数代码 import os import sys from pathlib import Path import time import numpy as np import cv2 import torch import torch.nn as nn import torch_mlu import torch_mlu.core.mlu_model as ct import torch_mlu.core.mlu_quantize as mlu_quantize import nets.basicblock as B from nets.basicblock import Focus, UnFocus os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" FILE = Path(__file__).resolve() ROOT = FILE.parents[0] if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) def time_sync(): if torch.cuda.is_available(): torch.cuda.synchronize() return time.time() class LoadImages: def __init__(self, path): self.files = [os.path.join(path, f) for f in os.listdir(path)] self.file_num = len(self.files) def __iter__(self): self.count = 0 return self def __next__(self): if self.count == self.file_num: raise StopIteration path = self.files[self.count] img = cv2.imread(path, 0) img = img.astype(np.float32) assert img is not None, 'Image Not Found ' + path img = np.expand_dims(img, axis=0) # img = img.transpose(2, 0, 1) img = np.ascontiguousarray(img) self.count += 1 return path, img class UNET_Z(nn.Module): def __init__(self, in_nc = 1, out_nc = 4, nc=[8, 16, 32, 64], act_mode='R', \\ downsample_mode = 'strideconv' , upsample_mode = 'upconv', nb = 1, bias=True): super(UNET_Z, self).__init__() # b X 1 X w X h self.head = Focus(nc[1], nc[1], act_mode=act_mode) # downsample if downsample_mode == 'avgpool': downsample_block = B.downsample_avgpool elif downsample_mode == 'maxpool': downsample_block = B.downsample_maxpool elif downsample_mode == 'strideconv': downsample_block = B.downsample_strideconv else: raise NotImplementedError('downsample mode [{:s}] is not found'.format(downsample_mode)) self.m_down1 = B.sequential(*[B.ResBlock(nc[1], nc[1], bias=bias, \\ mode='C'+act_mode+'C') for _ in range(nb)], downsample_block(nc[1], nc[2], bias=bias, mode='2')) self.m_down2 = B.sequential(*[B.ResBlock(nc[2], nc[2], bias=bias, \\ mode='C'+act_mode+'C') for _ in range(nb)], downsample_block(nc[2], nc[3], bias=bias, mode='2')) self.m_body = B.sequential(*[B.ResBlock(nc[3], nc[3], bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) # upsample if upsample_mode == 'upconv': upsample_block = B.upsample_upconv elif upsample_mode == 'pixelshuffle': upsample_block = B.upsample_pixelshuffle elif upsample_mode == 'convtranspose': upsample_block = B.upsample_convtranspose else: raise NotImplementedError('upsample mode [{:s}] is not found'.format(upsample_mode)) self.m_up1 = B.sequential(upsample_block(nc[3], nc[2], bias=bias, mode='2'), *[B.ResBlock(nc[2], nc[2], \\ bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) self.m_up2 = B.sequential(upsample_block(nc[2], nc[1], bias=bias, mode='2'), *[B.ResBlock(nc[1], nc[1], \\ bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) self.m_up3 = B.sequential(upsample_block(nc[1], nc[0], bias=bias, mode='2'), *[B.ResBlock(nc[0], nc[0], \\ bias=bias, mode='C'+act_mode+'C') for _ in range(nb)]) self.m_tail = B.sequential(B.conv(nc[0], out_nc, bias=bias, mode='C')) # b X 1 X w X h def forward(self, x0): y = x0 # b x 1 x w x h h, w = x0.size()[-2:] # paddingBottom = int(np.ceil(h / 4) * 4 - h) # paddingRight = int(np.ceil(w / 4) * 4 - w) # x0 = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x0) x1 = self.head(x0) # b x 16 x w/4 x h/4 x2 = self.m_down1(x1) # b x 32 x w/8 x h/8 x3 = self.m_down2(x2) # b x 64 x w/16 x h/16 x = self.m_body(x3) # b x 64 x w/16 x h/16 x = self.m_up1(x + x3) # b x 32 x w/8 x h/8 x = self.m_up2(x + x2) # b x 16 x w/4 x h/4 x = self.m_up3(x + x1) # b x 8 x w/2 x h/2 x = self.m_tail(x) # b x 1 x w x h return x[..., :int(h / 2), :int(w / 2)] def final(x): b, c, w, h = x.shape y = torch.ones((b, int(c / 4), int(w * 2), int(h * 2))) # yolov5-Focus y[..., 0::2, 0::2] = x[:, 0: 1, :, :] y[..., 1::2, 0::2] = x[:, 1: 2, :, :] y[..., 0::2, 1::2] = x[:, 2: 3, :, :] y[..., 1::2, 1::2] = x[:, 3: 4, :, :] return y def main(): images_dir = './dataset/data3' save_dir = './results/result_mfus' if not os.path.exists(save_dir): os.mkdir(save_dir) # device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = UNET_Z() model = torch_mlu.core.mlu_quantize.quantize_dynamic_mlu(model) state_dict = torch.load('qua_unet_int8.pt') model.load_state_dict(state_dict, strict=False) ct.set_core_number(4) ct.set_core_version("MLU270") ct.save_as_cambricon("unet-270") model = model.to(ct.mlu_device()) torch.set_grad_enabled(False) trace_input = torch.randn(1, 1, 1024, 1024, dtype=torch.float).to(ct.mlu_device()) model = torch.jit.trace(model, trace_input, check_trace=False) dataset = LoadImages(images_dir) for path, img in dataset: t1 = time_sync() img = torch.from_numpy(img) # img = img / 255.0 if len(img.shape) == 3: img = img[None] pred = model(img.to(ct.mlu_device())).to(torch.device('cpu')) pred = torch.squeeze(final(pred)).detach().numpy() t2 = time_sync() print("Speed per image is ", t2 - t1) # torch.save(model.state_dict(), 'qua_unet_int8.pt') save_path = os.path.join(save_dir, os.path.basename(path)) cv2.imwrite(save_path, pred) if __name__ == "__main__": main()
0
在线融合阶段模型生成静态图后输出异常
我的回复:#5xiedong2022回复你好,一般在线正确,融合也正确。先检查一下融合的代码吧:可以参考yolov5 融合代码: elif self.running_mode == 'mfus': device = self.device if self.gen_off: print('generate offline model') ct.save_as_cambricon('yolov5s_int8_1b_4c_640') torch.set_grad_enabled(False) batch_size = 4 ct.set_core_number(4) ct.set_core_version('MLU270') example_mlu = torch.randn(batch_size, 3,640,640, dtype=torch.float) trace_input = torch.randn(1, 3, 640, 640,dtype=torch.float) trace_input=trace_input.half() example_mlu = example_mlu.half() trace_input=trace_input.to(ct.mlu_device()) trace_model = torch.jit.trace(self.quantized_net, trace_input, check_trace = False) if self.gen_off: trace_model(example_mlu.to(device)) ct.save_as_cambricon('') exit(0) else: img = img.half() pred = trace_model(img.to(device)) pred = pred.to(torch.device('cpu')) pred = get_boxes(pred) print(pred) for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], torch.Tensor(det[:, :4]), im0.shape).round() for c in torch.Tensor(det[:, -1]).unique(): n = (torch.Tensor(det[:, -1]) == c).sum() for *xyxy, conf, cls in det: if save_img: label = '%s %.2f' % (self.names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) if save_img: cv2.imwrite('./mfus_result.jpg', im0)展开你好我的去噪网络的倒数第二个输出大小是1x4x(h/2)x(w/2),按照Focus逆操作,将输出组成1x1xhxw(这一步我使用的方法是矩阵赋值),但是mlu上不能进行赋值操作,因此我将赋值部分放在cpu上进行
0
在线融合阶段模型生成静态图后输出异常
我的回复:#3xiedong2022回复你量化的步骤是什么?用的是原图吗?代码做了什么修改?一般在线融合正确,融合处理也没问题。而且,你融合处理,有一部分图片缺失,看起来不是整体处理问题。检查一下推理前后处理代码。展开在线融合阶段使用torch.jit.trace这个函数,结果就会异常,右上角会多一块,删除这个函数,结果就没有问题
0
在线融合阶段模型生成静态图后输出异常
我的回复:使用的是类似于UNet结构的去噪网络,在线推理阶段输出去噪后的图片正常,但是融合处理后图片有一部分异常,下图左边是在线推理结果,右边是在线融合结果。
0
上一页
1
2
3
下一页
Github
开发平台
文档中心
新手必读
官方微信
版权所有 © 2025 寒武纪 Cambricon.com 备案/许可证号:
京ICP备17003415号-1
关闭