打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
yolov5s模型在batchsize=4的条件下对一张图片进行离线推理时没有输出结果,但是batchsize=1时结果正常,离线推理采用部分代码如下
for (int i = 0; i < inputNum; i++) { CNRT_CHECK(cnrtMalloc(&inputMluPtrS[i],inputSizeS[i])); //分配MLU上内存 inputCpuPtrS[i] = (void *)malloc(inputSizeS[i]); //分配CPU上的内存 //printf("%d\n", inputSizeS[i]); //获取输入的维度信息 NHWC CNRT_CHECK(cnrtGetInputDataShape(&dimValues,&dimNum,i,function)); printf("input shape:\n"); for(int y=0;y<dimNum;y++) { printf("%d ",dimValues[y]); } printf("\n"); input_width=dimValues[2]; input_height=dimValues[1]; batch_size=dimValues[0]; free(dimValues); } //为输出节点 分配CPU/MLU内存 for (int i = 0; i < outputNum; i++) { CNRT_CHECK(cnrtMalloc(&outputMluPtrS[i],outputSizeS[i])); //分配MLU上内存 outputCpuPtrS[i] = (void *)malloc(outputSizeS[i]); //分配CPU上的内存 //printf("%d\n", outputSizeS[i]); //获取输出的维度信息 NHWC CNRT_CHECK(cnrtGetOutputDataShape(&dimValues,&dimNum,i,function)); int count=1; printf("output shape:\n"); for(int y=0;y<dimNum;y++) { printf("%d ",dimValues[y]); count=count*dimValues[y]; } printf("\n"); outputCpuNchwPtrS[i] = (void *)malloc(count*sizeof(float)); //将输出转为float32类型,方便用户后处理 output_count.push_back(count); free(dimValues); } //配置MLU输入/输出 地址的指针 param = (void **)malloc(sizeof(void *) * (inputNum + outputNum)); for (int i = 0; i < inputNum; i++) { param[i] = inputMluPtrS[i]; } for (int i = 0; i < outputNum; i++) { param[i + inputNum] = outputMluPtrS[i]; } //设置输入/输出的节点 索引 int input_idx=0; int output_idx=0; vector<cv::Mat> imgs; vector<string> img_names; unsigned char *ptr=(unsigned char *)inputCpuPtrS[input_idx]; for(int i=0;i<batch_size;i++) { // 选项 2 是yolov5的数据预处理方式 img_names.push_back(image_path); cv::Mat input_image=cv::imread(image_path); imgs.push_back(input_image); cv::Mat input_image_resized; cv::resize(input_image,input_image_resized,cv::Size(input_width,input_height)); if(is_rgb==1) { cv::Mat net_input_data_rgba(input_height,input_width,CV_8UC4,ptr); cv::cvtColor(input_image_resized, net_input_data_rgba, CV_BGR2RGBA); ptr+=(input_height*input_width*4); } else if(is_rgb==0) { cv::Mat net_input_data_rgba(input_height,input_width,CV_8UC4,ptr); cv::cvtColor(input_image_resized, net_input_data_rgba, CV_BGR2BGRA); ptr+=(input_height*input_width*4); } else if(is_rgb==2) { cv::Mat sample_temp; float img_w = input_image.cols; float img_h = input_image.rows; cv::Mat sample_temp_bgr(input_image.cols, input_image.rows, CV_32FC3); float img_scale = img_w < img_h ? (input_height / img_h) : (input_width / img_w); int new_w = std::floor(img_w * img_scale); int new_h = std::floor(img_h * img_scale); cv::cvtColor(input_image, sample_temp_bgr, CV_BGR2RGB); cv::resize(sample_temp_bgr, sample_temp, cv::Size(new_w, new_h), CV_INTER_LINEAR); cv::Mat net_input_data_rgba(input_height,input_width,CV_32FC3,ptr); sample_temp.copyTo(net_input_data_rgba( cv::Range((static_cast<float>(input_height) - new_h) / 2, (static_cast<float>(input_height) - new_h) / 2 + new_h), cv::Range((static_cast<float>(input_width) - new_w) / 2, (static_cast<float>(input_width) - new_w) / 2 + new_w))); net_input_data_rgba /= 255.0; ptr+=(input_height*input_width*4); } } auto t0=GetTickCount(); //拷贝输入数据到MLU内存 CNRT_CHECK(cnrtMemcpy(inputMluPtrS[input_idx],inputCpuPtrS[input_idx],inputSizeS[input_idx],CNRT_MEM_TRANS_DIR_HOST2DEV)); //创建事件 cnrtNotifier_t notifier_start; //用来记录硬件时间 cnrtNotifier_t notifier_end; CNRT_CHECK(cnrtRuntimeContextCreateNotifier(ctx,¬ifier_start)); CNRT_CHECK(cnrtRuntimeContextCreateNotifier(ctx,¬ifier_end)); CNRT_CHECK(cnrtPlaceNotifier(notifier_start, queue)); //设置invoke的参数 unsigned int affinity=1<<dev_channel; //设置通道亲和性,使用指定的MLU cluster做推理 cnrtInvokeParam_t invokeParam; //invoke参数 invokeParam.invoke_param_type=CNRT_INVOKE_PARAM_TYPE_0; invokeParam.cluster_affinity.affinity=&affinity; CNRT_CHECK(cnrtInvokeRuntimeContext_V2(ctx,nullptr,param,queue,&invokeParam)); CNRT_CHECK(cnrtPlaceNotifier(notifier_end, queue)); CNRT_CHECK(cnrtSyncQueue(queue)); //拷贝MLU输出到CPU内存 CNRT_CHECK(cnrtMemcpy(outputCpuPtrS[output_idx],outputMluPtrS[output_idx],outputSizeS[output_idx],CNRT_MEM_TRANS_DIR_DEV2HOST)); auto t1=GetTickCount(); float hwtime; CNRT_CHECK(cnrtNotifierDuration(notifier_start, notifier_end, &hwtime)); printf("HardwareTime:%f(ms) E2ETime:%f(ms)\n",hwtime/1000.0,t1-t0); int dim_order[4] = {0, 3, 1, 2}; CNRT_CHECK(cnrtGetOutputDataShape(&dimValues,&dimNum,output_idx,function)); if(dimNum==4) { //NHWC->NCHW half->float32 CNRT_CHECK(cnrtTransOrderAndCast(reinterpret_cast<void*>(outputCpuPtrS[output_idx]), outputTypeS[output_idx], reinterpret_cast<void*>(outputCpuNchwPtrS[output_idx]), CNRT_FLOAT32, nullptr, dimNum, dimValues, dim_order)); } else { //数据类型转换 half->float32 CNRT_CHECK(cnrtCastDataType(reinterpret_cast<void*>(outputCpuPtrS[output_idx]), outputTypeS[output_idx], reinterpret_cast<void*>(outputCpuNchwPtrS[output_idx]), CNRT_FLOAT32, outputSizeS[output_idx]/2,nullptr)); } //打印输出结果 float *output_ptr=(float*)outputCpuNchwPtrS[output_idx]; cout << "boxnum:" << output_ptr[0] << endl; vector<vector<vector<float>>> detections = getResults(output_ptr, dimNum, dimValues); cout << "=========================" << endl; for(auto& d0:detections) for(auto &d1:d0){ for(auto &re:d1) cout << re << " "; cout << endl; } cout << "=========================" << endl; vector<string> labels; readLabels(label_filename, labels); writeVisualizeBBox(imgs, detections,labels, img_names, input_height); free(dimValues); CNRT_CHECK(cnrtSetCurrentDevice(dev)); CNRT_CHECK(cnrtDestroyQueue(queue)); CNRT_CHECK(cnrtDestroyFunction(function)); CNRT_CHECK(cnrtUnloadModel(model)); cnrtDestroyNotifier(¬ifier_start); cnrtDestroyNotifier(¬ifier_end); for (int i = 0; i < inputNum; i++) { free(inputCpuPtrS[i]); cnrtFree(inputMluPtrS[i]); } for (int i = 0; i < outputNum; i++) { free(outputCpuPtrS[i]); free(outputCpuNchwPtrS[i]); cnrtFree(outputMluPtrS[i]); } free(param); free(inputCpuPtrS); free(outputCpuPtrS); cnrtDestroyRuntimeContext(ctx); return 0; }
热门帖子
精华帖子