def mlu_forward(imgfile, use_mlu=True):
ct.set_core_version("MLU220") #转换 MLU220 的离线模型,这个离线模型只能在MLU220的核心运行
# ct.set_core_version("MLU270") #转换 MLU270 的模型,可以在仿真环境下运行(有MLU270显卡)
ct.set_device(-1)
ct.set_core_number(4) # set core number #设置离线实际运行,NPU用几个核心来运行这个模型,一般选4,性能上比较好
if use_mlu:
device = ct.mlu_device()
else:
device = torch.device('cpu')
Img = cv2.imread(imgfile) #读入图片
img = Image.fromarray(Img).convert('RGB') #转换格式
img = cv2.resize(np.array(img),(64,128)) #缩放在模型输入大小
input_w = img.shape[0]
input_h = img.shape[1]
batch_size = 1
fusion = 1
with torch.no_grad():
model = Net(reid = True) #构建网络
model.eval().float()
if use_mlu:
model = mlu_quantize.quantize_dynamic_mlu(model)
model.load_state_dict(torch.load(save_path),strict=False)
model.to(device)
'''
if not use_mlu:
sized = ((img / 255) - np.array(mean)) / np.array(std)
sized = np.transpose(sized, (2, 0, 1))
'''
sized = ((np.array(img) / 255) - np.array(mean)) / np.array(std)
sized = sized.transpose((2, 0, 1))
sized = sized.astype(np.float32)
input_img = torch.from_numpy(np.stack([sized]*batch_size))
input_img = input_img.to(device)
logging.info(input_img.shape)
# fusion mode
if use_mlu and fusion: #进行算法融合并转换成离线模型
ct.save_as_cambricon('deepsort')
example_tensor = torch.randn((4,3,128,64), dtype=torch.float).to(device) #C, D, H, W
model = torch.jit.trace(model, example_tensor, check_trace=False)
out = model(example_tensor)
ct.save_as_cambricon('') # generate offline model
print('fusion success')