打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
root@localhost:~/test/sample# cncc cuda-test.cu-bang.mlu -o mlu.o cuda-test.cu-bang.mlu:14:2: error: no matching function for call to '__bang_add' __bang_add(c_tmp, a_tmp, b_tmp, ONELINE); ^~~~~~~~~~ /opt/AICSE-demo-student/env/neuware/lib/clang/5.0.1/include/__clang_bang_math.h:43:17: note: candidate function not viable: no known conversion from 'float __nram__[1024]' to 'half *' for 1st argument; take the address of the argument with & __DEVICE__ void __bang_add(half* dst, ^ 1 error generated.
似乎__bang_add不支持float类型调用?或者是我调用的问题?
下附`cuda-test.cu-bang.mlu`:
#include "mlu.h"
#define ONELINE 1024
__mlu_entry__ void addKernel(float *c, float *a, float *b, int n) {
int level = n / ONELINE, cur_indx = 0;
__nram__ float a_tmp[ONELINE];
__nram__ float b_tmp[ONELINE];
__nram__ float c_tmp[ONELINE];
for (int i = 0; i <= level; ++i) {
int _sz = n - i * ONELINE;
_sz = _sz > ONELINE? ONELINE: _sz;
__memcpy(a_tmp, a + i * ONELINE, sizeof(half) * _sz, GDRAM2NRAM);
__memcpy(b_tmp, b + i * ONELINE, sizeof(half) * _sz, GDRAM2NRAM);
__bang_add(c_tmp, a_tmp, b_tmp, ONELINE);
__memcpy(c + i * ONELINE, c_tmp, sizeof(half) * _sz, NRAM2GDRAM);
}
}感谢大神指正(
热门帖子
精华帖子