打开微信,使用扫一扫进入页面后,点击右上角菜单,
点击“发送给朋友”或“分享到朋友圈”完成分享
使用cmake构建bangc程序!
. ├── CMakeLists.txt ├── build.sh ├── include │ └── simple.h ├── mlus │ ├── CMakeLists.txt │ └── simple.mlu └── srcs └── main.cpp 3 directories, 6 files
./CMakeLists.txt内容
cmake_minimum_required(VERSION 3.3) project(Simple) ################################################################################ # Neuware Evironment and BANG Setup ################################################################################ # check `NEUWARE_HOME` env message("NEUWARE_HOME PATH:${NEUWARE_HOME}") if(EXISTS ${NEUWARE_HOME}) include_directories("${NEUWARE_HOME}/include") include_directories("${CMAKE_SOURCE_DIR}/include") link_directories("${NEUWARE_HOME}/lib64") link_directories("${NEUWARE_HOME}/lib") set(NEUWARE_ROOT_DIR "${NEUWARE_HOME}") else() message(FATAL_ERROR "NEUWARE directory cannot be found, refer README.md to prepare NEUWARE_HOME environment.") endif() # setup cmake search path set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake" "${NEUWARE_HOME}/cmake" "${NEUWARE_HOME}/cmake/modules" ) include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include") add_subdirectory(mlus) add_executable(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/srcs/main.cpp) target_link_libraries(${PROJECT_NAME} mluop)
simple.h内容
#ifndef __SIMPLE_INCLUDE_H_H #define __SIMPLE_INCLUDE_H_H void data_to_mlu(float* h_a,float* h_b,float *result,int N,unsigned int device); #endif
mlus/CMakeLists.txt
set(LIBRARY_OUTPUT_PATH "${CMAKE_BINARY_DIR}/lib") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fPIC -std=c++11 -pthread -pipe") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS} -O3") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections -fPIC") # include FindBANG.cmake and check cncc find_package(BANG) if(NOT BANG_FOUND) message(FATAL_ERROR "BANG cannot be found.") elseif (NOT BANG_CNCC_EXECUTABLE) message(FATAL_ERROR "cncc not found, please ensure cncc is in your PATH env or set variable BANG_CNCC_EXECUTABLE from cmake. Otherwise you should che ck path used by find_program(BANG_CNCC_EXECUTABLE) in FindBANG.cmake") endif() # setup cncc flags set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -fPIC -Wall -Werror -std=c++11 -pthread") set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3") set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS}" "--bang-mlu-arch=mtp_220" "--bang-mlu-arch=mtp_270" "--bang-mlu-arch=mtp_290" "--bang-mlu-arch=mtp_372" ) file(GLOB_RECURSE src_files ${src_files} "${CMAKE_SOURCE_DIR}/mlus/*.mlu") bang_add_library(mluop SHARED ${src_files}) target_link_libraries(mluop cnrt cndrv)
mlus/simple.mlu
#include "simple.h" #define BLOCK_SIZE (384 * 1024) #define ALIGNBYTES 128 __nram__ char BUFFER[BLOCK_SIZE]; __mlu_entry__ void add_vector_mlu(float* a,float* b,int n){ float *a_buffer = (float *)BUFFER; float *b_buffer = (float *)(BUFFER + BLOCK_SIZE / 2); int data_size = n*sizeof(float); __memcpy(a_buffer, a, data_size, GDRAM2NRAM); __memcpy(b_buffer, b, data_size, GDRAM2NRAM); // Align to 128 bytes int align_size = data_size%ALIGNBYTES == 0?data_size:(data_size/ALIGNBYTES+1)*ALIGNBYTES; __bang_add(a_buffer,a_buffer,b_buffer,align_size); __memcpy(a,a_buffer,data_size,NRAM2GDRAM); } void data_to_mlu(float* h_a,float* h_b,float *result,int N,unsigned int device){ cnrtInit(device); cnrtDev_t dev; cnrtGetDeviceHandle(&dev,device); cnrtSetCurrentDevice(dev); float *d_a,*d_b; cnrtRet_t ret; ret = cnrtMalloc(reinterpret_cast<void**>(&d_a),N*sizeof(float)); ret = cnrtMemcpy(d_a,h_a,N*sizeof(float),CNRT_MEM_TRANS_DIR_HOST2DEV); ret = cnrtMalloc(reinterpret_cast<void**>(&d_b),N*sizeof(float)); ret = cnrtMemcpy(d_b,h_b,N*sizeof(float),CNRT_MEM_TRANS_DIR_HOST2DEV); cnrtQueue_t queue; cnrtCreateQueue(&queue); cnrtDim3_t dim = {1, 1, 1}; cnrtFunctionType_t c = CNRT_FUNC_TYPE_BLOCK; add_vector_mlu<<<dim, c, queue>>>(d_a,d_b,N); ret = cnrtMemcpy(result,d_a,N*sizeof(float),CNRT_MEM_TRANS_DIR_DEV2HOST); cnrtFree(d_a); cnrtFree(d_b); ret = cnrtDestroyQueue(queue); cnrtDestroy(); }
srcs/main.cpp
#include "simple.h" // #include <iostream> template<typename T> void initValue(T* a,T value,int N){ for(int i=0;i<N;i++) *(a+i) = value; } int main(){ int N = 128; float *h_a = new float[N]; float *h_b = new float[N]; float *result = new float[N]; unsigned int device = 0; initValue(h_a,1.f,N); initValue(h_b,2.f,N); data_to_mlu(h_a,h_b,result,N,device); //for(int i=0;i<10;i++) //// std::cout<<"data["<<i<<"] = "<<result[i]<<" "; delete [] h_a; delete [] h_b; delete [] result; }
构建脚本build.sh
#!/bin/bash build=build if [ ! -d ${build} ];then mkdir -p ${build} fi pushd build cmake .. -DNEUWARE_HOME="${NEUWARE_HOME}" make -j16 popd
热门帖子
精华帖子