×

签到

分享到微信

打开微信,使用扫一扫进入页面后,点击右上角菜单,

点击“发送给朋友”或“分享到朋友圈”完成分享

Bangc CMake程序构建 liushuai2021-11-29 14:25:48 回复 2 查看 技术答疑
Bangc CMake程序构建
分享到:

使用cmake构建bangc程序!

.
├── CMakeLists.txt
├── build.sh
├── include
│   └── simple.h
├── mlus
│   ├── CMakeLists.txt
│   └── simple.mlu
└── srcs
    └── main.cpp

3 directories, 6 files

./CMakeLists.txt内容

cmake_minimum_required(VERSION 3.3)
project(Simple)
################################################################################
# Neuware Evironment and BANG Setup
################################################################################

# check `NEUWARE_HOME` env
message("NEUWARE_HOME PATH:${NEUWARE_HOME}")
if(EXISTS ${NEUWARE_HOME})
  include_directories("${NEUWARE_HOME}/include")
  include_directories("${CMAKE_SOURCE_DIR}/include")
  link_directories("${NEUWARE_HOME}/lib64")
  link_directories("${NEUWARE_HOME}/lib")
  set(NEUWARE_ROOT_DIR "${NEUWARE_HOME}")
else()
  message(FATAL_ERROR "NEUWARE directory cannot be found, refer README.md to prepare NEUWARE_HOME environment.")
endif()

# setup cmake search path
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}
  "${CMAKE_SOURCE_DIR}/cmake"
  "${NEUWARE_HOME}/cmake"
  "${NEUWARE_HOME}/cmake/modules"
)

include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include")
add_subdirectory(mlus)
add_executable(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/srcs/main.cpp)
target_link_libraries(${PROJECT_NAME} mluop)

simple.h内容

#ifndef __SIMPLE_INCLUDE_H_H
#define __SIMPLE_INCLUDE_H_H
void data_to_mlu(float* h_a,float* h_b,float *result,int N,unsigned int device);
#endif

mlus/CMakeLists.txt

set(LIBRARY_OUTPUT_PATH "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fPIC -std=c++11 -pthread -pipe")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -Wl,--gc-sections -fPIC")

# include FindBANG.cmake and check cncc
find_package(BANG)
if(NOT BANG_FOUND)
  message(FATAL_ERROR "BANG cannot be found.")
elseif (NOT BANG_CNCC_EXECUTABLE)
  message(FATAL_ERROR "cncc not found, please ensure cncc is in your PATH env or set variable BANG_CNCC_EXECUTABLE from cmake. Otherwise you should che
ck path used by find_program(BANG_CNCC_EXECUTABLE) in FindBANG.cmake")
endif()

# setup cncc flags
set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -fPIC -Wall -Werror -std=c++11 -pthread")
set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS}" "--bang-mlu-arch=mtp_220"
                                         "--bang-mlu-arch=mtp_270"
                                         "--bang-mlu-arch=mtp_290"
                                         "--bang-mlu-arch=mtp_372"
)

file(GLOB_RECURSE src_files ${src_files} "${CMAKE_SOURCE_DIR}/mlus/*.mlu")
bang_add_library(mluop SHARED ${src_files})
target_link_libraries(mluop cnrt cndrv)

mlus/simple.mlu

#include "simple.h"
#define BLOCK_SIZE (384 * 1024)
#define ALIGNBYTES 128
__nram__ char BUFFER[BLOCK_SIZE];

__mlu_entry__ void add_vector_mlu(float* a,float* b,int n){
    float *a_buffer = (float *)BUFFER;
    float *b_buffer = (float *)(BUFFER + BLOCK_SIZE / 2);
    int data_size = n*sizeof(float);
    __memcpy(a_buffer, a, data_size, GDRAM2NRAM);
    __memcpy(b_buffer, b, data_size, GDRAM2NRAM);
    // Align to 128 bytes
    int align_size = data_size%ALIGNBYTES == 0?data_size:(data_size/ALIGNBYTES+1)*ALIGNBYTES;
    __bang_add(a_buffer,a_buffer,b_buffer,align_size);
    __memcpy(a,a_buffer,data_size,NRAM2GDRAM);

}
void data_to_mlu(float* h_a,float* h_b,float *result,int N,unsigned int device){
    cnrtInit(device);

    cnrtDev_t dev;
    cnrtGetDeviceHandle(&dev,device);
    cnrtSetCurrentDevice(dev);

    float *d_a,*d_b;
    cnrtRet_t ret;
    ret = cnrtMalloc(reinterpret_cast<void**>(&d_a),N*sizeof(float));
    ret = cnrtMemcpy(d_a,h_a,N*sizeof(float),CNRT_MEM_TRANS_DIR_HOST2DEV);

    ret = cnrtMalloc(reinterpret_cast<void**>(&d_b),N*sizeof(float));
    ret = cnrtMemcpy(d_b,h_b,N*sizeof(float),CNRT_MEM_TRANS_DIR_HOST2DEV);
    cnrtQueue_t queue;
    cnrtCreateQueue(&queue);
    cnrtDim3_t dim = {1, 1, 1};
    cnrtFunctionType_t c = CNRT_FUNC_TYPE_BLOCK;
    add_vector_mlu<<<dim, c, queue>>>(d_a,d_b,N);
    ret = cnrtMemcpy(result,d_a,N*sizeof(float),CNRT_MEM_TRANS_DIR_DEV2HOST);
    cnrtFree(d_a);
    cnrtFree(d_b);
    ret = cnrtDestroyQueue(queue);
    cnrtDestroy();
}

srcs/main.cpp

#include "simple.h"
// #include <iostream>
template<typename T>
void initValue(T* a,T value,int N){
  for(int i=0;i<N;i++)
    *(a+i) = value;
}
int main(){
   int N = 128;
   float *h_a = new float[N];  
   float *h_b = new float[N];
   float *result = new float[N];
   unsigned int device = 0;
   initValue(h_a,1.f,N);
   initValue(h_b,2.f,N);

   data_to_mlu(h_a,h_b,result,N,device);
   //for(int i=0;i<10;i++)
   ////    std::cout<<"data["<<i<<"] = "<<result[i]<<" ";
   delete [] h_a;
   delete [] h_b;
   delete [] result;
}

构建脚本build.sh

#!/bin/bash
build=build
if [ ! -d ${build} ];then
  mkdir -p ${build}
fi
pushd build
cmake .. -DNEUWARE_HOME="${NEUWARE_HOME}"
make -j16
popd


版权所有 © 2024 寒武纪 Cambricon.com 备案/许可证号:京ICP备17003415号-1
关闭