您好,您用的cncc是什么版本的?
您好,您用的cncc是什么版本的?
__mlu_entry__ void addKernel(half *c, half *a, half *b, int n) { int level = n / ONELINE, cur_indx = 0; __nram__ half a_tmp[ONELINE]; __nram__ half b_tmp[ONELINE]; __nram__ half c_tmp[ONELINE]; for (int i = 0; i <= level; ++i) { int _sz = i < level? ONELINE: n - i * ONELINE; __memcpy(a_tmp, a + i * ONELINE, sizeof(half) * _sz, GDRAM2NRAM); __memcpy(b_tmp, b + i * ONELINE, sizeof(half) * _sz, GDRAM2NRAM); __bang_add(c_tmp, a_tmp, b_tmp, ONELINE); __memcpy(c + i * ONELINE, c_tmp, sizeof(half) * _sz, NRAM2GDRAM); } }
↑这样写就能过编译(
请登录后评论