打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
【寒武纪硬件产品型号】必填*:MLU370-x8
【MagicMind版本号】必填*:magicmind_v1.5.0
【出错信息】必填*:没有出错就是性能太差
【操作步骤】选填:
下载onnx 模型文件,链接如下
https://github.com/onnx/models/blob/main/vision/classification/resnet/model/resnet50-v1-12.onnx
将 onnx 模型文件转为 magicmind 格式的模型
转换代码如下
import argparse import numpy as np import magicmind.python.runtime as mm def onnx_parser(args): from magicmind.python.runtime.parser import Parser network = mm.Network() parser = Parser(mm.ModelKind.kOnnx) #parser.set_model_param("tf-infer-shape", True) assert parser.parse(network, args.onnx_path).ok() input_dims = mm.Dims((args.batchsize, args.channel, args.input_height, args.input_width)) assert network.get_input(0).set_dimension(input_dims).ok() # assert network.mark_output(network.get_output(0)) return network def generate_model_config(args): config = mm.BuilderConfig() assert config.parse_from_string('{"precision_config":{"precision_mode":"%s"}}' % args.precision).ok() assert config.parse_from_string('{"opt_config":{"type64to32_conversion":true}}').ok() assert config.parse_from_string('{"opt_config":{"conv_scale_fold":true}}').ok() assert config.parse_from_string('{"graph_shape_mutable": false}') assert config.parse_from_string('{"dim_range": {"0":{"min":[16,3,224,224], "max":[16,3,224,224]} } }') # 量化算法,支持对称量化(symmetric)和非对称量化(asymmetric)。当量化统计算法设置为EQNM_ALOGORITHM时,仅适用于对称量化。 assert config.parse_from_string('{"precision_config": {"activation_quant_algo": "symmetric"}}').ok() # 设置量化粒度,支持按tensor量化(per_tensor)和按通道量化(per_axis)两种。 assert config.parse_from_string('{"precision_config": {"weight_quant_granularity": "per_tensor"}}').ok() # 指定硬件平台 assert config.parse_from_string('{"archs":["mtp_372"]}').ok() return config def calibrate(args, network : mm.Network, config : mm.BuilderConfig): class MMCalibData(mm.CalibDataInterface): def __init__(self, args): super().__init__() with open(args.calibrate_list, 'r') as f: image_paths = f.readlines() self.images = [] for image_path in image_paths: self.images.append(image_path.strip()) nimages = len(self.images) assert nimages != 0, 'no images in calibrate list[' + args.calibrate_list + ']!' # at least one batch if nimages < args.batchsize: for i in range(args.batchsize - nimages): self.images.append(self.images[0]) self.shape_ = mm.Dims((args.batchsize, args.channel, args.input_height, args.input_width)) self.cur_image_index_ = 0 self.args_ = args def get_shape(self): return self.shape_ def get_data_type(self): return mm.DataType.FLOAT32 def get_sample(self): return self.cur_sample_ def preprocess_image(self): if self.cur_image_index_ == len(self.images): return None img = self.images[self.cur_image_index_] img = np.load(img)['data'] self.cur_image_index_ = self.cur_image_index_ + 1 return img def next(self): batch_size = self.shape_.GetDimValue(0) preprocessed_images = [] for i in range(batch_size): image = self.preprocess_image() if image is None: # no more data return mm.Status(mm.Code.OUT_OF_RANGE, "Data end reached") preprocessed_images.append(image) self.cur_sample_ = np.array(preprocessed_images) return mm.Status.OK() def reset(self): self.cur_sample_ = None self.cur_image_index_ = 0 return mm.Status.OK() calib_data = MMCalibData(args) calibrator = mm.Calibrator([calib_data]) assert calibrator is not None # 设置量化统计算法,支持线性统计算法(LINEAR_ALGORITHM)及加强的最小化量化噪声算法(EQM_ALGORITHM)。 assert calibrator.set_quantization_algorithm(mm.QuantizationAlgorithm.LINEAR_ALGORITHM).ok() # 打开设备 with mm.System() as mm_sys: dev_count = mm_sys.device_count() if args.device >= dev_count: print("Invalid device set!") # 打开MLU设备 dev = mm.Device() dev.id = args.device assert dev.active().ok() # 进行量化 assert calibrator.calibrate(network, config).ok() def main(): args = argparse.ArgumentParser(de ion='onnx model to magicmind model') args.add_argument('--onnx_path', dest = 'onnx_path', default = 'resnet50-v1-12.onnx', required = False, type = str, help = 'onnx model path') args.add_argument('--batchsize', dest = 'batchsize', default = 16, type = int, help = 'batchsize') args.add_argument('--channel', dest = 'channel', default = 3, type = int, help = 'channel') args.add_argument('--input_width', dest = 'input_width', default = 224, type = int, help = 'model input width') args.add_argument('--input_height', dest = 'input_height', default = 224, type = int, help = 'model input height') args.add_argument('--output_model', dest = 'output_model', default = 'resnet50-v1-12_qint8_mixed_float32.magicmind', type = str, help = 'output model path') args.add_argument('--precision', dest = 'precision', default = 'qint8_mixed_float32', type = str, help = 'precision mode, qint8_mixed_float16 qint8_mixed_float32 force_float16 force_float32 are supported') args.add_argument('--calibrate_list', dest = 'calibrate_list', default = 'resnet50_int8/calibrate_list.txt', type = str, help = 'image list file path, file contains input image paths for calibration') args.add_argument('--device', dest = 'device', default = 0, type = int, help = 'mlu device id, used for calibration') args = args.parse_args() supported_precision = ['qint8_mixed_float16', 'qint8_mixed_float32', 'force_float16', 'force_float32'] if args.precision not in supported_precision: print('precision mode [' + args.precision + ']', 'not supported') exit() network = onnx_parser(args) config = generate_model_config(args) if args.precision.find('qint') != -1: print('do calibrate...') calibrate(args, network, config) print('build model...') builder = mm.Builder() model = builder.build_model('magicmind model', network, config) assert model is not None assert model.serialize_to_file(args.output_model).ok() print('build model finish') if __name__ == "__main__": main()
生成 resnet50-v1-12_qint8_mixed_float32.magicmind 模型,使用 mm_run 测试模型性能, 命令如下
mm_run --magicmind_model ./resnet50-v1-12_qint8_mixed_float32.magicmind
QPS如下,只有 1296
Resnet_50(INT8,Batch_Size=16) 需要算力 66.14 G OPS INT8
MLU370-x8 执行一次Resnet_50 需要 median:12.881 ms
Cambricon 连续执行 1s实际使用算力 66.14 GOPS/ 0.012881 s = 5.13 T OPS
MLU370-x8 int8 算力有 256 TOPS, 那相当于算力利用率只有 5.13/256 = 2%
这个模型的算力利用率为什么这么低? QPS这么差? 这是什么原因? 我需要怎么去解决这个问题?
热门帖子
精华帖子