打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
#define LEN 768 * 128
__mlu_entry__ void Kernel(float* dst, float* source1, float* source2) {
int PAGE_SIZE = 32;
int kv_block_stride = PAGE_SIZE * 128;
// __nram__ float dest[LEN];
__wram__ float k_wram[LEN];
__nram__ float q_nram[128];
__nram__ float qk[768];
__memcpy_async(q_nram, source2, 128 * sizeof(float),
GDRAM2NRAM);
int seq_len_tmp = 768;
int i = 0;
float * cur_k_wram = k_wram + i * kv_block_stride;
for (; i < 768 / PAGE_SIZE; i++) {
const float* k_ptr_tmp = source1 + i * kv_block_stride;
// __bang_printf("kv_block_stride:%d cur_k_wram:%d MAX_NRAM_SIZE:%d PAGE_SIZE * DHEAD_SIZE:%d\n", kv_block_stride, cur_k_wram - k_wram, MAX_NRAM_SIZE, PAGE_SIZE * DHEAD_SIZE);
__bang_printf("i:%d seq_len_tmp:%d\n", i, seq_len_tmp);
if (seq_len_tmp == 0) break;
if (seq_len_tmp < PAGE_SIZE) {
__memcpy_async(cur_k_wram, k_ptr_tmp, seq_len_tmp * 128 * sizeof(float),
GDRAM2WRAM);
break;
}
else{
__memcpy_async(cur_k_wram, k_ptr_tmp, kv_block_stride * sizeof(float),
GDRAM2WRAM);
}
seq_len_tmp -= PAGE_SIZE;
cur_k_wram = k_wram + i * kv_block_stride;
}
// k_wram 128 * 768 (cloumn major)
// q_nram 1 *128
__sync();
__bang_matmul(qk, q_nram, k_wram, 1, 128, 768);
__sync_compute();
__memcpy(dst, qk, 768 * sizeof(float), NRAM2GDRAM);
__sync();
// printf("qk:%f\n", qk[0]);
// __bang_printf("src1:%f\n", src1[0]);
}
2024-06-20 11:13:53.778380: [cnrtError] [112613] [Card: 0] Error occurred during calling 'cnQueueSync' in CNDrv interface.
2024-06-20 11:13:53.778433: [cnrtError] [112613] [Card: 0] Return value is 100124, CN_INVOKE_ERROR_ADDRESS_SPACE.
2024-06-20 11:13:53.778445: [cnrtError] [112613] [Card: 0] cnrtQueueSync: MLU queue sync failed.
热门帖子
精华帖子