打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
__mlu_func__ void biliner_block(float* ptrOut, float* ptrIn, int inWidth, int inHeight, int outWidth, int outHeight, int idX, int idY, int numX) { __nram__ float temp_px[BLOCK_SIZE], temp_py[BLOCK_SIZE], temp_result[2][BLOCK_SIZE]; __nram__ float src_line_buff1[2][BLOCK_SIZE], src_line_buff2[2][BLOCK_SIZE]; __nram__ int /*temp_x[BLOCK_SIZE],*/ temp_y[BLOCK_SIZE]; bang::pipeline pipe(0); int start_x = idX, start_y = BLOCK_SIZE * idY; float factor_y = (inHeight - 1.0f) / (outHeight - 1.0f); // factor of y axis float factor_x = (inWidth - 1.0f) / (outWidth - 1.0f); // factor of x axis /*{ printf("taskId: %d. (%d %d) %d\n", taskId, idX, idY, numX); return; }*/ if (start_x >= outWidth || start_y >= outHeight) { return; } int end_x = ((start_x+numX) < outWidth) ? (start_x+numX) : outWidth; int end_y = ((start_y+BLOCK_SIZE) < outHeight) ? (start_y+BLOCK_SIZE) : outHeight; int end_x2 = (end_x == outWidth) ? (end_x-1) : end_x; int end_y2 = (end_y == outHeight) ? (end_y-1) : end_y; for (int idy = 0; idy < (end_y-start_y); idy++) { temp_py[idy] = (idy+start_y) * factor_y; temp_y[idy] = (int)(temp_py[idy]); } for (int idx = 0; idx < (end_x-start_x); idx++) { temp_px[idx] = (idx+start_x) * factor_x; //temp_x[idx] = (int)(temp_px[idx]); } int fn_buff_id = 0; bang::memcpy_async(pipe, src_line_buff1[fn_buff_id], ptrIn, inHeight*sizeof(float), GDRAM2NRAM); bang::memcpy_async(pipe, src_line_buff2[fn_buff_id], ptrIn+inHeight, inHeight*sizeof(float), GDRAM2NRAM); for (int idx = 0; idx < (end_x2-start_x); idx++) { pipe.wait_copy_dram_to_nram(); int src_idx = (int)(temp_px[idx]); int async_buff_id = (fn_buff_id+1) % 2; if ( idx < (end_x2-start_x-1) ) { bang::memcpy_async(pipe, src_line_buff1[async_buff_id], ptrIn+src_idx*inHeight, inHeight*sizeof(float), GDRAM2NRAM); bang::memcpy_async(pipe, src_line_buff2[async_buff_id], ptrIn+(src_idx+1)*inHeight, inHeight*sizeof(float), GDRAM2NRAM); } for (int idy = 0; idy < (end_y2-start_y); idy++) { float val_in = matc_get_at(src_idx, temp_y[idy], ptrIn, inHeight, src_line_buff1[fn_buff_id]); temp_result[fn_buff_id][idy] = val_in * (temp_y[idy] + 1 - temp_py[idy]) * (src_idx + 1 - temp_px[idx]); val_in = matc_get_at(src_idx, temp_y[idy]+1, ptrIn, inHeight, src_line_buff1[fn_buff_id]); temp_result[fn_buff_id][idy] += val_in * (temp_py[idy] - temp_y[idy]) * (src_idx + 1 - temp_px[idx]); val_in = matc_get_at(src_idx+1, temp_y[idy], ptrIn, inHeight, src_line_buff2[fn_buff_id]); temp_result[fn_buff_id][idy] += val_in * (temp_y[idy] + 1 - temp_py[idy]) * (temp_px[idx] - src_idx); val_in = matc_get_at(src_idx+1, temp_y[idy]+1, ptrIn, inHeight, src_line_buff2[fn_buff_id]); temp_result[fn_buff_id][idy] += val_in * (temp_py[idy] - temp_y[idy]) * (temp_px[idx] - src_idx); } if (end_y == outHeight) { int idy2 = end_y2 - start_y; float val_in = matc_get_at(src_idx, temp_y[idy2], ptrIn, inHeight, src_line_buff1[fn_buff_id]); temp_result[fn_buff_id][idy2] = val_in * (src_idx + 1 - temp_px[idx]); val_in = matc_get_at(src_idx+1, temp_y[idy2], ptrIn, inHeight, src_line_buff2[fn_buff_id]); temp_result[fn_buff_id][idy2] += val_in * (temp_px[idx] - src_idx); } pipe.wait_copy_nram_to_dram(); bang::memcpy_async(pipe, ptrOut+(idx+start_x)*outHeight + start_y, temp_result[fn_buff_id], (end_y2-start_y)*sizeof(float), NRAM2GDRAM); fn_buff_id = async_buff_id; // update current buff index for next loop } pipe.wait_copy_nram_to_dram(); if (end_x == outWidth) { int idx2 = end_x2 - start_x; int src_idx = (int)(temp_px[idx2]); __memcpy(src_line_buff1[0], ptrIn+src_idx*inHeight, inHeight*sizeof(float), GDRAM2NRAM); for (int idy = 0; idy < (end_y2-start_y); idy++) { float val_in = matc_get_at(src_idx, temp_y[idy], ptrIn, inHeight, src_line_buff1[0]); temp_result[0][idy] = val_in * (temp_y[idy] + 1 - temp_py[idy]); val_in = matc_get_at(src_idx, temp_y[idy]+1, ptrIn, inHeight, src_line_buff1[0]); temp_result[0][idy] += val_in * (temp_py[idy] - temp_y[idy]); } if (end_y == outHeight) { int idy2 = end_y2 - start_y; float val_in = matc_get_at(inWidth-1, inHeight-1, ptrIn, inHeight, NULL); temp_result[0][idy2] = val_in; } __memcpy(ptrOut+(idx2+start_x)*outHeight + start_y, temp_result[0], (end_y2-start_y)*sizeof(float), NRAM2GDRAM); } }
热门帖子
精华帖子