切换版块
×
基础软件平台
PyTorch开发
TensorFlow开发
编解码及SDK开发
BANG语言与计算库
开发工具链
MagicMind开发
软件栈百科
云平台集成
硬件产品专区
大模型专区
MLU200系列AI加速产品
MLU370系列AI加速卡
经验方案交流区
经验案例与实践分享
开发者服务
开发者活动
公告与版务
高校支持
《智能计算系统》
签到
版块
社区
文档
SDK下载
370系列
200系列
开发平台
官网首页
注册
登录
全部版块
基础软件平台
硬件产品专区
经验方案交流区
开发者服务
高校支持
发布新帖
登录/注册
LV.1
zwb
115
积分
2
赞
14
帖子
75
回复
2
收藏
TA的动态
TA的帖子
TA的回复
yolov5离线推理时的问题
我的回复:因为只有mlu220边缘设备,所以无法看到融合模式结果,只能生成离线模型,对于同一张图在batchsize=4的情况下,输出的boxnumber分别为56,27,0,0,为什么会出现检测框数目不一样的情况呢,后处理部分代码如下void writeVisualizeBBox( const vector<cv::Mat>& images, const vector<vector<vector<float>>> detections, const vector<string>& labelToDisplayName, const vector<string>& imageNames, int input_dim) { // Retrieve detections. for (int i = 0; i < images.size(); ++i) { if (imageNames[i] == "null") continue; cv::Mat image; image = images[i]; vector<vector<float>> result = detections[i]; std::string name = imageNames[i]; int positionMap = imageNames[i].rfind("/"); if (positionMap > 0 && positionMap < imageNames[i].size()) { name = name.substr(positionMap + 1); } positionMap = name.find("."); if (positionMap > 0 && positionMap < name.size()) { name = name.substr(0, positionMap); } string filename = "result/"+name + ".txt"; std::ofstream fileMap(filename); float scaling_factors = std::min( static_cast<float>(input_dim) / static_cast<float>(images[i].cols), static_cast<float>(input_dim) / static_cast<float>(images[i].rows)); for (int j = 0; j < result.size(); j++) { result[j][0] = result[j][0] - static_cast<float>(input_dim - scaling_factors * image.cols) / 2.0; result[j][2] = result[j][2] - static_cast<float>(input_dim - scaling_factors * image.cols) / 2.0; result[j][1] = result[j][1] - static_cast<float>(input_dim - scaling_factors * image.rows) / 2.0; result[j][3] = result[j][3] - static_cast<float>(input_dim - scaling_factors * image.rows) / 2.0; for (int k = 0; k < 4; k++) { result[j][k] = result[j][k] / scaling_factors; // cout << result[j][k] << " "; } // cout << endl; } for (int j = 0; j < result.size(); j++) { result[j][0] = result[j][0] < 0 ? 0 : result[j][0]; result[j][2] = result[j][2] < 0 ? 0 : result[j][2]; result[j][1] = result[j][1] < 0 ? 0 : result[j][1]; result[j][3] = result[j][3] < 0 ? 0 : result[j][3]; result[j][0] = result[j][0] > image.cols ? image.cols : result[j][0]; result[j][2] = result[j][2] > image.cols ? image.cols : result[j][2]; result[j][1] = result[j][1] > image.rows ? image.rows : result[j][1]; result[j][3] = result[j][3] > image.rows ? image.rows : result[j][3]; } for (int j = 0; j < result.size(); j++) { int x0 = static_cast<int>(result[j][0]); int y0 = static_cast<int>(result[j][1]); int x1 = static_cast<int>(result[j][2]); int y1 = static_cast<int>(result[j][3]); //cout << "(" <<x0 << "," << y0 << ")" << "(" << y0 << "," << y1 << ")" << endl; cv::Point p1(x0, y0); cv::Point p2(x1, y1); cv::rectangle(image, p1, p2, cv::Scalar(0, 0, 255), 2); stringstream ss; ss << round(result[j][4] * 1000) / 1000.0; std::string str = labelToDisplayName[static_cast<int>(result[j][5])] + ":" + ss.str(); //std::string str = ss.str(); cv::Point p5(x0, y0 - 2); // cv::rectanle(image, p1, p2, ) cv::putText(image, str, p5, cv::FONT_HERSHEY_SIMPLEX, 0.6, cv::Scalar(255, 0, 0), 2); //std::cout << "it is " << result[j][5] << ":" << result[j][4] << std::endl; fileMap << labelToDisplayName[static_cast<int>(result[j][5])] << " " << ss.str() << " " << static_cast<float>(result[j][0]) << " " << static_cast<float>(result[j][1]) << " " << static_cast<float>(result[j][2]) << " " << static_cast<float>(result[j][3]) << " " << image.cols << " " << image.rows << std::endl; } fileMap.close(); stringstream ss; string outFile; ss << "result/yolov5_" << name << ".jpg"; ss >> outFile; cv::imwrite(outFile.c_str(), image); } } void readLabels(string filename, vector<string>& labels) { std::ifstream file(filename); if (file.fail()) std::cerr << "failed to open labels file!"; std::string line; while (getline(file, line)) { labels.push_back(line); } file.close(); } using std::vector; using std::string; /*vector<vector<vector<float>>> getResults(float* outputData, int dimNumm, int *dimValues) { vector<vector<vector<float>>> detections; // BangOp implementation float max_limit = 1; float min_limit = 0; float input_size = 640; int batchSize = dimValues[0]; int count = dimValues[3]; for (int i = 0; i < batchSize; i++) { int num_boxes = static_cast<int>(outputData[i * count]); vector<vector<float>> batch_box; for (int k = 0; k < num_boxes; k++) { int index = i * count + 64 + k * 7; vector<float> single_box; float bl = std::max( min_limit, std::min(max_limit, outputData[index + 3]/input_size)); // x1 float br = std::max( min_limit, std::min(max_limit, outputData[index + 5]/input_size)); // x2 float bt = std::max( min_limit, std::min(max_limit, outputData[index + 4]/input_size)); // y1 float bb = std::max( min_limit, std::min(max_limit, outputData[index + 6]/input_size)); // y2 single_box.push_back(bl); single_box.push_back(bt); single_box.push_back(br); single_box.push_back(bb); single_box.push_back(outputData[index + 2]); single_box.push_back(outputData[index + 1]); for(auto s:single_box) cout << s << " "; cout << endl; if ((br - bl) > 0 && (bb - bt) > 0) { batch_box.push_back(single_box); } } detections.push_back(batch_box); } return detections; }*/ vector<vector<vector<float>>> getResults(float* outputData, int dimNumm, int *dimValues) { vector<vector<vector<float>>> detections; int batchSize = dimValues[0]; int count = dimValues[3]; for (int i = 0; i < batchSize; i++) { int num_boxes = static_cast<int>(outputData[i * count]); vector<vector<float>> batch_box; for (int k = 0; k < num_boxes; k++) { int index = i * count + 64 + k * 7; vector<float> single_box; float bl = outputData[index + 3]; float br = outputData[index + 5]; float bt = outputData[index + 4]; float bb = outputData[index + 6]; single_box.push_back(bl); single_box.push_back(bt); single_box.push_back(br); single_box.push_back(bb); single_box.push_back(outputData[index + 2]); single_box.push_back(outputData[index + 1]); // for(auto s:single_box) // cout << s << " "; // cout << endl; if ((br - bl) > 0 && (bb - bt) > 0) { batch_box.push_back(single_box); } } detections.push_back(batch_box); } return detections; }
1
pytorch yolov3移植
我的回复:#1lanhao回复参见软件栈中的yolov3样例 cambricon_pytorch/pytorch/examples/offline/c++/yolov3展开您好,请问官方有转换yolov3离线模型的相关样例吗
0
pytorch yolov3移植
我的回复:def mlu_forward(imgfile, use_mlu=True): ct.set_core_version("MLU220") #转换 MLU220 的离线模型,这个离线模型只能在MLU220的核心运行 # ct.set_core_version("MLU270") #转换 MLU270 的模型,可以在仿真环境下运行(有MLU270显卡) ct.set_device(-1) ct.set_core_number(4) # set core number #设置离线实际运行,NPU用几个核心来运行这个模型,一般选4,性能上比较好 if use_mlu: device = ct.mlu_device() else: device = torch.device('cpu') Img = cv2.imread(imgfile) #读入图片 img = Image.fromarray(Img).convert('RGB') #转换格式 img = cv2.resize(np.array(img),(64,128)) #缩放在模型输入大小 input_w = img.shape[0] input_h = img.shape[1] batch_size = 1 fusion = 1 with torch.no_grad(): model = Net(reid = True) #构建网络 model.eval().float() if use_mlu: model = mlu_quantize.quantize_dynamic_mlu(model) model.load_state_dict(torch.load(save_path),strict=False) model.to(device) ''' if not use_mlu: sized = ((img / 255) - np.array(mean)) / np.array(std) sized = np.transpose(sized, (2, 0, 1)) ''' sized = ((np.array(img) / 255) - np.array(mean)) / np.array(std) sized = sized.transpose((2, 0, 1)) sized = sized.astype(np.float32) input_img = torch.from_numpy(np.stack([sized]*batch_size)) input_img = input_img.to(device) logging.info(input_img.shape) # fusion mode if use_mlu and fusion: #进行算法融合并转换成离线模型 ct.save_as_cambricon('deepsort') example_tensor = torch.randn((4,3,128,64), dtype=torch.float).to(device) #C, D, H, W model = torch.jit.trace(model, example_tensor, check_trace=False) out = model(example_tensor) ct.save_as_cambricon('') # generate offline model print('fusion success')
0
pytorch yolov3移植
我的回复:#3lanhao回复pytorch没有mlu270真正运行是怎么生成离线模型的呢?如果生成了离线模型,判断模型精度的方式和在线没什么区别。展开def mlu_forward(imgfile, use_mlu=True): ct.set_core_version(\"MLU220\") #转换 MLU220 的离线模型,这个离线模型只能在MLU220的核心运行 # ct.set_core_version(\"MLU270\") #转换 MLU270 的模型,可以在仿真环境下运行(有MLU270显卡) ct.set_device(-1) ct.set_core_number(4) # set core number #设置离线实际运行,NPU用几个核心来运行这个模型,一般选4,性能上比较好 if use_mlu: device = ct.mlu_device() else: device = torch.device(\'cpu\') Img = cv2.imread(imgfile) #读入图片 img = Image.fromarray(Img).convert(\'RGB\') #转换格式 img = cv2.resize(np.array(img),(64,128)) #缩放在模型输入大小 input_w = img.shape[0] input_h = img.shape[1] batch_size = 1 fusion = 1 with torch.no_grad(): model = Net(reid = True) #构建网络 model.eval().float() if use_mlu: model = mlu_quantize.quantize_dynamic_mlu(model) model.load_state_dict(torch.load(save_path),strict=False) model.to(device) \'\'\' if not use_mlu: sized = ((img / 255) - np.array(mean)) / np.array(std) sized = np.transpose(sized, (2, 0, 1)) \'\'\' sized = ((np.array(img) / 255) - np.array(mean)) / np.array(std) sized = sized.transpose((2, 0, 1)) sized = sized.astype(np.float32) input_img = torch.from_numpy(np.stack([sized]*batch_size)) input_img = input_img.to(device) logging.info(input_img.shape) # fusion mode if use_mlu and fusion: #进行算法融合并转换成离线模型 ct.save_as_cambricon(\'deepsort\') example_tensor = torch.randn((4,3,128,64), dtype=torch.float).to(device) #C, D, H, W model = torch.jit.trace(model, example_tensor, check_trace=False) out = model(example_tensor) ct.save_as_cambricon(\'\') # generate offline model print(\'fusion success\') 采用这种方法在主机上生成的离线模型,然后放到mlu220的盒子设备上推理,因为在主机上没有mlu270设备,所以看不到结果,只有一个cambricon。这样生成的离线模型是正确的吗
0
pytorch yolov3移植
我的回复:#1lanhao回复生成.cambricon后缀的离线模型后,在mlu220编写离线代码进行推理即可。保存220离线模型时在线融合推理结果不对是正常现象,能够保证在线融合的结果没问题就行了展开您好,那如果没有mlu270等设备在线推理,只有mlu220的设备,如何判断生成的离线模型是没有问题的呢
0
上一页
1
2
3
下一页
Github
开发平台
文档中心
新手必读
官方微信
版权所有 © 2025 寒武纪 Cambricon.com 备案/许可证号:
京ICP备17003415号-1
关闭