打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
【寒武纪硬件产品型号】必填*:MLU370-x8
【MagicMind版本号】必填*:magicmind_v1.5.0
【出错信息】必填*:没有出错就是性能太差
【操作步骤】选填:
下载onnx 模型文件,链接如下
https://github.com/onnx/models/blob/main/vision/classification/resnet/model/resnet50-v1-12.onnx
将 onnx 模型文件转为 magicmind 格式的模型
转换代码如下
import argparse
import numpy as np
import magicmind.python.runtime as mm
def onnx_parser(args):
from magicmind.python.runtime.parser import Parser
network = mm.Network()
parser = Parser(mm.ModelKind.kOnnx)
#parser.set_model_param("tf-infer-shape", True)
assert parser.parse(network, args.onnx_path).ok()
input_dims = mm.Dims((args.batchsize, args.channel, args.input_height, args.input_width))
assert network.get_input(0).set_dimension(input_dims).ok()
# assert network.mark_output(network.get_output(0))
return network
def generate_model_config(args):
config = mm.BuilderConfig()
assert config.parse_from_string('{"precision_config":{"precision_mode":"%s"}}' % args.precision).ok()
assert config.parse_from_string('{"opt_config":{"type64to32_conversion":true}}').ok()
assert config.parse_from_string('{"opt_config":{"conv_scale_fold":true}}').ok()
assert config.parse_from_string('{"graph_shape_mutable": false}')
assert config.parse_from_string('{"dim_range": {"0":{"min":[16,3,224,224], "max":[16,3,224,224]} } }')
# 量化算法,支持对称量化(symmetric)和非对称量化(asymmetric)。当量化统计算法设置为EQNM_ALOGORITHM时,仅适用于对称量化。
assert config.parse_from_string('{"precision_config": {"activation_quant_algo": "symmetric"}}').ok()
# 设置量化粒度,支持按tensor量化(per_tensor)和按通道量化(per_axis)两种。
assert config.parse_from_string('{"precision_config": {"weight_quant_granularity": "per_tensor"}}').ok()
# 指定硬件平台
assert config.parse_from_string('{"archs":["mtp_372"]}').ok()
return config
def calibrate(args, network : mm.Network, config : mm.BuilderConfig):
class MMCalibData(mm.CalibDataInterface):
def __init__(self, args):
super().__init__()
with open(args.calibrate_list, 'r') as f:
image_paths = f.readlines()
self.images = []
for image_path in image_paths:
self.images.append(image_path.strip())
nimages = len(self.images)
assert nimages != 0, 'no images in calibrate list[' + args.calibrate_list + ']!'
# at least one batch
if nimages < args.batchsize:
for i in range(args.batchsize - nimages):
self.images.append(self.images[0])
self.shape_ = mm.Dims((args.batchsize, args.channel, args.input_height, args.input_width))
self.cur_image_index_ = 0
self.args_ = args
def get_shape(self):
return self.shape_
def get_data_type(self):
return mm.DataType.FLOAT32
def get_sample(self):
return self.cur_sample_
def preprocess_image(self):
if self.cur_image_index_ == len(self.images):
return None
img = self.images[self.cur_image_index_]
img = np.load(img)['data']
self.cur_image_index_ = self.cur_image_index_ + 1
return img
def next(self):
batch_size = self.shape_.GetDimValue(0)
preprocessed_images = []
for i in range(batch_size):
image = self.preprocess_image()
if image is None:
# no more data
return mm.Status(mm.Code.OUT_OF_RANGE, "Data end reached")
preprocessed_images.append(image)
self.cur_sample_ = np.array(preprocessed_images)
return mm.Status.OK()
def reset(self):
self.cur_sample_ = None
self.cur_image_index_ = 0
return mm.Status.OK()
calib_data = MMCalibData(args)
calibrator = mm.Calibrator([calib_data])
assert calibrator is not None
# 设置量化统计算法,支持线性统计算法(LINEAR_ALGORITHM)及加强的最小化量化噪声算法(EQM_ALGORITHM)。
assert calibrator.set_quantization_algorithm(mm.QuantizationAlgorithm.LINEAR_ALGORITHM).ok()
# 打开设备
with mm.System() as mm_sys:
dev_count = mm_sys.device_count()
if args.device >= dev_count:
print("Invalid device set!")
# 打开MLU设备
dev = mm.Device()
dev.id = args.device
assert dev.active().ok()
# 进行量化
assert calibrator.calibrate(network, config).ok()
def main():
args = argparse.ArgumentParser(de ion='onnx model to magicmind model')
args.add_argument('--onnx_path', dest = 'onnx_path', default = 'resnet50-v1-12.onnx',
required = False, type = str, help = 'onnx model path')
args.add_argument('--batchsize', dest = 'batchsize', default = 16,
type = int, help = 'batchsize')
args.add_argument('--channel', dest = 'channel', default = 3,
type = int, help = 'channel')
args.add_argument('--input_width', dest = 'input_width', default = 224,
type = int, help = 'model input width')
args.add_argument('--input_height', dest = 'input_height', default = 224,
type = int, help = 'model input height')
args.add_argument('--output_model', dest = 'output_model', default = 'resnet50-v1-12_qint8_mixed_float32.magicmind',
type = str, help = 'output model path')
args.add_argument('--precision', dest = 'precision', default = 'qint8_mixed_float32',
type = str, help = 'precision mode, qint8_mixed_float16 qint8_mixed_float32 force_float16 force_float32 are supported')
args.add_argument('--calibrate_list', dest = 'calibrate_list', default = 'resnet50_int8/calibrate_list.txt',
type = str, help = 'image list file path, file contains input image paths for calibration')
args.add_argument('--device', dest = 'device', default = 0,
type = int, help = 'mlu device id, used for calibration')
args = args.parse_args()
supported_precision = ['qint8_mixed_float16', 'qint8_mixed_float32', 'force_float16', 'force_float32']
if args.precision not in supported_precision:
print('precision mode [' + args.precision + ']', 'not supported')
exit()
network = onnx_parser(args)
config = generate_model_config(args)
if args.precision.find('qint') != -1:
print('do calibrate...')
calibrate(args, network, config)
print('build model...')
builder = mm.Builder()
model = builder.build_model('magicmind model', network, config)
assert model is not None
assert model.serialize_to_file(args.output_model).ok()
print('build model finish')
if __name__ == "__main__":
main()生成 resnet50-v1-12_qint8_mixed_float32.magicmind 模型,使用 mm_run 测试模型性能, 命令如下
mm_run --magicmind_model ./resnet50-v1-12_qint8_mixed_float32.magicmind
QPS如下,只有 1296
Resnet_50(INT8,Batch_Size=16) 需要算力 66.14 G OPS INT8
MLU370-x8 执行一次Resnet_50 需要 median:12.881 ms
Cambricon 连续执行 1s实际使用算力 66.14 GOPS/ 0.012881 s = 5.13 T OPS
MLU370-x8 int8 算力有 256 TOPS, 那相当于算力利用率只有 5.13/256 = 2%
这个模型的算力利用率为什么这么低? QPS这么差? 这是什么原因? 我需要怎么去解决这个问题?
热门帖子
精华帖子