打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
__mlu_func__ void biliner_block(float* ptrOut, float* ptrIn,
int inWidth, int inHeight, int outWidth, int outHeight, int idX, int idY, int numX) {
__nram__ float temp_px[BLOCK_SIZE], temp_py[BLOCK_SIZE], temp_result[2][BLOCK_SIZE];
__nram__ float src_line_buff1[2][BLOCK_SIZE], src_line_buff2[2][BLOCK_SIZE];
__nram__ int /*temp_x[BLOCK_SIZE],*/ temp_y[BLOCK_SIZE];
bang::pipeline pipe(0);
int start_x = idX, start_y = BLOCK_SIZE * idY;
float factor_y = (inHeight - 1.0f) / (outHeight - 1.0f); // factor of y axis
float factor_x = (inWidth - 1.0f) / (outWidth - 1.0f); // factor of x axis
/*{
printf("taskId: %d. (%d %d) %d\n", taskId, idX, idY, numX);
return;
}*/
if (start_x >= outWidth || start_y >= outHeight) { return; }
int end_x = ((start_x+numX) < outWidth) ? (start_x+numX) : outWidth;
int end_y = ((start_y+BLOCK_SIZE) < outHeight) ? (start_y+BLOCK_SIZE) : outHeight;
int end_x2 = (end_x == outWidth) ? (end_x-1) : end_x;
int end_y2 = (end_y == outHeight) ? (end_y-1) : end_y;
for (int idy = 0; idy < (end_y-start_y); idy++) {
temp_py[idy] = (idy+start_y) * factor_y;
temp_y[idy] = (int)(temp_py[idy]);
}
for (int idx = 0; idx < (end_x-start_x); idx++) {
temp_px[idx] = (idx+start_x) * factor_x;
//temp_x[idx] = (int)(temp_px[idx]);
}
int fn_buff_id = 0;
bang::memcpy_async(pipe, src_line_buff1[fn_buff_id], ptrIn, inHeight*sizeof(float), GDRAM2NRAM);
bang::memcpy_async(pipe, src_line_buff2[fn_buff_id], ptrIn+inHeight, inHeight*sizeof(float), GDRAM2NRAM);
for (int idx = 0; idx < (end_x2-start_x); idx++) {
pipe.wait_copy_dram_to_nram();
int src_idx = (int)(temp_px[idx]);
int async_buff_id = (fn_buff_id+1) % 2;
if ( idx < (end_x2-start_x-1) ) {
bang::memcpy_async(pipe, src_line_buff1[async_buff_id], ptrIn+src_idx*inHeight, inHeight*sizeof(float), GDRAM2NRAM);
bang::memcpy_async(pipe, src_line_buff2[async_buff_id], ptrIn+(src_idx+1)*inHeight, inHeight*sizeof(float), GDRAM2NRAM);
}
for (int idy = 0; idy < (end_y2-start_y); idy++) {
float val_in = matc_get_at(src_idx, temp_y[idy], ptrIn, inHeight, src_line_buff1[fn_buff_id]);
temp_result[fn_buff_id][idy] = val_in * (temp_y[idy] + 1 - temp_py[idy]) * (src_idx + 1 - temp_px[idx]);
val_in = matc_get_at(src_idx, temp_y[idy]+1, ptrIn, inHeight, src_line_buff1[fn_buff_id]);
temp_result[fn_buff_id][idy] += val_in * (temp_py[idy] - temp_y[idy]) * (src_idx + 1 - temp_px[idx]);
val_in = matc_get_at(src_idx+1, temp_y[idy], ptrIn, inHeight, src_line_buff2[fn_buff_id]);
temp_result[fn_buff_id][idy] += val_in * (temp_y[idy] + 1 - temp_py[idy]) * (temp_px[idx] - src_idx);
val_in = matc_get_at(src_idx+1, temp_y[idy]+1, ptrIn, inHeight, src_line_buff2[fn_buff_id]);
temp_result[fn_buff_id][idy] += val_in * (temp_py[idy] - temp_y[idy]) * (temp_px[idx] - src_idx);
}
if (end_y == outHeight) {
int idy2 = end_y2 - start_y;
float val_in = matc_get_at(src_idx, temp_y[idy2], ptrIn, inHeight, src_line_buff1[fn_buff_id]);
temp_result[fn_buff_id][idy2] = val_in * (src_idx + 1 - temp_px[idx]);
val_in = matc_get_at(src_idx+1, temp_y[idy2], ptrIn, inHeight, src_line_buff2[fn_buff_id]);
temp_result[fn_buff_id][idy2] += val_in * (temp_px[idx] - src_idx);
}
pipe.wait_copy_nram_to_dram();
bang::memcpy_async(pipe, ptrOut+(idx+start_x)*outHeight + start_y,
temp_result[fn_buff_id], (end_y2-start_y)*sizeof(float), NRAM2GDRAM);
fn_buff_id = async_buff_id; // update current buff index for next loop
}
pipe.wait_copy_nram_to_dram();
if (end_x == outWidth) {
int idx2 = end_x2 - start_x;
int src_idx = (int)(temp_px[idx2]);
__memcpy(src_line_buff1[0], ptrIn+src_idx*inHeight, inHeight*sizeof(float), GDRAM2NRAM);
for (int idy = 0; idy < (end_y2-start_y); idy++) {
float val_in = matc_get_at(src_idx, temp_y[idy], ptrIn, inHeight, src_line_buff1[0]);
temp_result[0][idy] = val_in * (temp_y[idy] + 1 - temp_py[idy]);
val_in = matc_get_at(src_idx, temp_y[idy]+1, ptrIn, inHeight, src_line_buff1[0]);
temp_result[0][idy] += val_in * (temp_py[idy] - temp_y[idy]);
}
if (end_y == outHeight) {
int idy2 = end_y2 - start_y;
float val_in = matc_get_at(inWidth-1, inHeight-1, ptrIn, inHeight, NULL);
temp_result[0][idy2] = val_in;
}
__memcpy(ptrOut+(idx2+start_x)*outHeight + start_y,
temp_result[0], (end_y2-start_y)*sizeof(float), NRAM2GDRAM);
}
}
热门帖子
精华帖子