#include #include #include #include #include #include #include "mm_runtime.h" #include "cnrt.h" #define UNPACK(...) __VA_ARGS__ #define USED_VAR(var) static_cast(var) #define CHECK_STATUS(status) \ do \ { \ auto __ret = (status); \ if (__ret != magicmind::Status::OK()) \ { \ std::cout << __ret.ToString(); \ abort(); \ } \ } while (0) #define CHECK_STATUS_RET(status) \ do \ { \ auto __ret = (status); \ if (__ret != magicmind::Status::OK()) \ { \ return __ret; \ } \ } while (0) #define CHECK_VALID(valid) \ do \ { \ if (!(valid)) \ { \ std::cout << #valid " is null or 0"; \ abort(); \ } \ } while (0) #define CHECK_EQ(a, b) \ do \ { \ if (a != b) \ { \ std::cout << #a "(" << a << ") should be equal to " << #b "(" << b << ")"; \ abort(); \ } \ } while (0) #define CHECK_LE(a, b) \ do \ { \ if (a > b) \ { \ std::cout << #a "(" << a << ") should be less equal to " << #b "(" << b << ")"; \ abort(); \ } \ } while (0) /* * A function to generate len number of uniform distribution int/float nums from begin to end. */ template ::value> struct RandDist { }; template struct RandDist { typedef std::uniform_real_distribution Dist; }; template struct RandDist { typedef std::uniform_int_distribution Dist; }; template std::vector GenRand(uint64_t len, T begin, T end, unsigned int seed) { std::vector ret(len); std::default_random_engine eng(seed); typename RandDist::Dist dist(begin, end); for (size_t idx = 0; idx < len; ++idx) { ret[idx] = dist(eng); } return ret; } void Memcpy(void *dst, void *src, size_t size, bool host_to_mlu) { if (host_to_mlu) { cnrtMemcpy(dst, src, size, CNRT_MEM_TRANS_DIR_HOST2DEV); } else { cnrtMemcpy(dst, src, size, CNRT_MEM_TRANS_DIR_DEV2HOST); } } namespace { bool IsQuant(magicmind::DataType type) { return type == magicmind::DataType::QINT8 || type == magicmind::DataType::QINT16; } // Malloc/Free/Copy memory in local/remote. void *MallocMLUAddr(size_t size) { void *mem = nullptr; cnrtMalloc(&mem, size); return mem; } void FreeMLUAddr(void *ptr) { cnrtFree(ptr); } } // namespace int main(int argc, char *argv[]) { std::string model_name = "/data/model/resnet50_v0.13.0_4b_rgb_uint8.magicmind"; std::vector input_tensors_; std::vector output_tensors_; std::vector input_host_ptrs_; std::vector output_host_ptrs_; cnrtSetDevice(0); magicmind::IModel *model = magicmind::CreateIModel(); model->DeserializeFromFile(model_name.c_str()); { ///////////////////Print info below////////////////////// size_t size = 0; model->GetSerializedModelSize(&size); std::cout << "Create IModel done." << std::endl; std::cout << "Name: " << model_name << " Size: " << size << std::endl; std::cout << "Input num: " << model->GetInputNum() << std::endl; std::cout << "Input info:[ " << std::endl; auto names = model->GetInputNames(); // CHECK_EQ(names.size(), in_dims_.size()); auto dims = model->GetInputDimensions(); auto types = model->GetInputDataTypes(); for (uint32_t i = 0; i < names.size(); ++i) { std::cout << names[i] << ": " << dims[i] << ", " << magicmind::TypeEnumToString(types[i]) << std::endl; } std::cout << "]" << std::endl; std::cout << "Output info:[ " << std::endl; names = model->GetOutputNames(); dims = model->GetOutputDimensions(); types = model->GetOutputDataTypes(); for (uint32_t i = 0; i < names.size(); ++i) { std::cout << names[i] << ": " << dims[i] << ", " << magicmind::TypeEnumToString(types[i]) << std::endl; } std::cout << "]" << std::endl; //////////////////////////////////////////////////////// } magicmind::IEngine *engine = model->CreateIEngine(); magicmind::IContext *context_ = engine->CreateIContext(); context_->CreateInputTensors(&input_tensors_); context_->CreateOutputTensors(&output_tensors_); auto input_dim_vec = model->GetInputDimension(0).GetDims(); if (input_dim_vec[0] == -1) { input_dim_vec[0] = 4; } magicmind::Dims input_dims = magicmind::Dims(input_dim_vec); for (uint32_t i = 0; i < input_tensors_.size(); ++i) { input_tensors_[i]->SetDimensions(input_dims); auto input_size = input_tensors_[i]->GetSize(); std::cout << "input_tensors_[i]->GetSize():" << input_size << std::endl; void *input_ptr = nullptr; cnrtHostMalloc(&input_ptr, input_size); input_host_ptrs_.push_back(input_ptr); // Some network has param as input, Host address will speed them up. if ((input_tensors_[i]->GetMemoryLocation() == magicmind::TensorLocation::kMLU) || (input_tensors_[i]->GetMemoryLocation() == magicmind::TensorLocation::kRemoteMLU)) { // 拷贝数据. input_tensors_[i]->SetData(MallocMLUAddr(input_size)); Memcpy(input_tensors_[i]->GetMutableData(), input_ptr, input_size, true); std::cout << "copy data host2device size=" << input_size << std::endl; } else { input_tensors_[i]->SetData(input_ptr); } } magicmind::Status isOk = context_->InferOutputShape(input_tensors_, output_tensors_); if (magicmind::Status::OK() == isOk) { for (uint32_t i = 0; i < model->GetOutputNum(); ++i) { void *out_ptr = nullptr; auto output_size = output_tensors_[i]->GetSize(); cnrtHostMalloc(&out_ptr, output_size); std::cout << "output_tensors_[i]->GetSize():" << output_size << std::endl; magicmind::Status isOk = output_tensors_[i]->SetData(out_ptr); if (isOk != magicmind::Status::OK()) { std::cout << "SetData message=" << isOk.error_message() << std::endl; } output_host_ptrs_.push_back(out_ptr); } } cnrtQueue_t queue_ = nullptr; cnrtQueueCreate(&queue_); isOk = context_->Enqueue(input_tensors_, output_tensors_, queue_); if (isOk != magicmind::Status::OK()) { std::cout << "Enqueue message=" << isOk.error_message() << std::endl; } cnrtQueueSync(queue_); #if 0 for (uint32_t i = 0; i < output_tensors_.size(); ++i) { auto size = output_tensors_[i]->GetSize(); // fill in input data Memcpy(output_host_ptrs_[i], output_tensors_[i]->GetMutableData(), size, false); } #endif getchar(); return 0; }