非常感谢您的指正,帮助我解决了关键的问题,使用实际图片后mse下降了两个数量级。我想知道量化推理的这一步为什么这么关键,对使用的图片有什么要求吗?展开
请核对一下量化时候的参数及逐层运行时候的前处理参数是否正确,尤其校核预处理设置部分,另外,firstconv可以先关闭测试一下看看展开
这是量化时的代码
def generate_qua(): model = D2Net() state_dict = torch.load('models/d2_tf.pth', map_location='cpu')['model'] model.load_state_dict(state_dict) model.eval() qconfig = {'use_avg':False,'firstconv':False, 'per_channel':False} quantized_model = mlu_quantize.quantize_dynamic_mlu(model, qconfig, dtype='int8', gen_quant=True) input_tensor = torch.zeros((1,3,480,640)) quantized_model(input_tensor) torch.save(quantized_model.state_dict(),'d2-net-8-data.pt') print('quantize finished!')
请核对一下量化时候的参数及逐层运行时候的前处理参数是否正确,尤其校核预处理设置部分,另外,firstconv可以先关闭测试一下看看展开
感谢您的回复,不过我在量化时并没有使用first_conv。图片数据的预处理看起来好像也没有什么问题。
def loadCPUModel(): model = D2Net() model.eval() state_dict = torch.load('models/d2_tf.pth', map_location='cpu')['model'] model.load_state_dict(state_dict) print('Load Finished!') return model def loadMlUModel(): model = D2Net() model.eval() quantized_net = mlu_quantize.quantize_dynamic_mlu(model) state_dict = torch.load('models/d2-net-8.pt') quantized_net.load_state_dict(state_dict, strict=False) quantized_net.eval() quantized_net.to(ct.mlu_device()) print('Load finished!') return quantized_net def load_image(path): image = cv2.imread(path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.astype(np.float32) image = np.transpose(image, [2, 0, 1]) # preprocess image /= 255.0 mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) image = (image - mean.reshape([3, 1, 1])) / std.reshape([3, 1, 1]) # convert to tensor image = torch.tensor(image[np.newaxis, :, :, :].astype(np.float32)) print(image.shape) return image def dump_test(): test_input = load_image('qualitative/images/pair_1/1.jpg') #### CPU MOdel #### model = loadCPUModel() dump_utils.register_dump_hook(model) out = model(test_input) dump_utils.save_data('dump_res/','cpu') #### MLU per-Layer Model #### model = loadMlUModel() dump_utils.register_dump_hook(model) test_input = test_input.to(ct.mlu_device()) out = model(test_input) dump_utils.save_data('dump_res/','mlu') #### Compare the result #### dump_utils.diff_data('dump_res/dump_cpu_data.pth', 'dump_res/dump_mlu_data.pth','MSE')
请登录后评论