打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
__bang_conv函数计算结果与cpu端结果不一致,代码如下,是否有问题:
#define IN_CHANNEL 64
#define IN_HEIGHT 32
#define IN_WIDTH 32
#define FILTER_HEIGHT 3
#define FILTER_WIDTH 3
#define STRIDE_HEIGHT 1
#define STRIDE_WIDTH 1
#define OUT_CHANNEL 64
#define OUT_HEIGHT ((((IN_HEIGHT) - (FILTER_HEIGHT)) / (STRIDE_HEIGHT)) + 1)
#define OUT_WIDTH ((((IN_WIDTH) - (FILTER_WIDTH)) / (STRIDE_WIDTH)) + 1)
#define OUT_DATA_NUM ((OUT_HEIGHT) * (OUT_WIDTH) * (OUT_CHANNEL))
#define IN_DATA_NUM ((IN_HEIGHT) * (IN_WIDTH) * (IN_CHANNEL))
#define FILTER_DATA_NUM ((FILTER_HEIGHT) * (FILTER_WIDTH) * (IN_CHANNEL) * (OUT_CHANNEL))
__mlu_func__ void ConvKernel(half* out_data, int8_t* in_data, int8_t* filter_data,
int in_channel, int in_height, int in_width,
int filter_height, int filter_width,
int stride_height, int stride_width,
int out_channel,int pos) {
__nram__ half nram_out_data[OUT_DATA_NUM];
__nram__ int8_t nram_in_data[IN_DATA_NUM];
__nram__ int8_t nram_filter_data[FILTER_DATA_NUM];
__nram__ int8_t nram_filter_data_back[FILTER_DATA_NUM];
__wram__ int8_t wram_filter[FILTER_DATA_NUM];
__memcpy(nram_in_data, in_data, IN_DATA_NUM * sizeof(int8_t), GDRAM2NRAM);
__memcpy(nram_filter_data, filter_data, FILTER_DATA_NUM * sizeof(int8_t), GDRAM2NRAM);
__bang_reshape_filter(nram_filter_data_back, nram_filter_data,
OUT_CHANNEL, FILTER_HEIGHT, FILTER_WIDTH,
IN_CHANNEL);
__memcpy(wram_filter, nram_filter_data_back, FILTER_DATA_NUM * sizeof(int8_t),
NRAM2WRAM);
__bang_conv(nram_out_data, nram_in_data, wram_filter, IN_CHANNEL, IN_HEIGHT, IN_WIDTH, FILTER_HEIGHT, FILTER_WIDTH, STRIDE_WIDTH, STRIDE_HEIGHT,OUT_CHANNEL, pos);
__memcpy(out_data, nram_out_data, OUT_DATA_NUM * sizeof(half), NRAM2GDRAM);
}
热门帖子
精华帖子