forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
458 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
#include "acl_ops.h" | ||
|
||
OpCaller::OpCaller() { attrs = aclopCreateAttr(); } | ||
|
||
OpCaller::~OpCaller() { | ||
for (aclTensorDesc* desc : input_descs) { | ||
aclDestroyTensorDesc(desc); | ||
} | ||
for (aclDataBuffer* buffer : input_buffers) { | ||
aclDestroyDataBuffer(buffer); | ||
} | ||
for (aclTensorDesc* desc : output_descs) { | ||
aclDestroyTensorDesc(desc); | ||
} | ||
for (aclDataBuffer* buffer : output_buffers) { | ||
aclDestroyDataBuffer(buffer); | ||
} | ||
// TODO: may free before use. | ||
for (void* ptr : ptrs) { | ||
aclrtFree(ptr); | ||
} | ||
aclopDestroyAttr(attrs); | ||
} | ||
|
||
OpCaller& OpCaller::name(std::string _op_name) { | ||
op_name = _op_name; | ||
return *this; | ||
} | ||
|
||
OpCaller& OpCaller::input_no_contiguous(ggml_tensor* tensor, const char* name) { | ||
aclDataType dtype = type_mapping(tensor->type); | ||
// TODO | ||
int64_t ne[] = {tensor->ne[3], tensor->ne[2], tensor->ne[1], tensor->ne[0]}; | ||
aclTensorDesc* tensor_desc = | ||
aclCreateTensorDesc(dtype, GGML_MAX_DIMS, ne, ACL_FORMAT_ND); | ||
aclSetTensorDescName(tensor_desc, name); | ||
input_descs.push_back(tensor_desc); | ||
aclDataBuffer* data_buffer = | ||
aclCreateDataBuffer(tensor->data, ggml_nbytes(tensor)); | ||
input_buffers.push_back(data_buffer); | ||
return *this; | ||
} | ||
|
||
OpCaller& OpCaller::input(ggml_tensor* tensor, const char* name) { | ||
GGML_ASSERT(ggml_is_contiguous(tensor)); | ||
return input_no_contiguous(tensor, name); | ||
} | ||
|
||
OpCaller& OpCaller::output(ggml_tensor* tensor, const char* name) { | ||
aclDataType dtype = type_mapping(tensor->type); | ||
aclTensorDesc* tensor_desc = | ||
aclCreateTensorDesc(dtype, GGML_MAX_DIMS, tensor->ne, ACL_FORMAT_ND); | ||
aclSetTensorDescName(tensor_desc, name); | ||
output_descs.push_back(tensor_desc); | ||
aclDataBuffer* data_buffer = | ||
aclCreateDataBuffer(tensor->data, ggml_nbytes(tensor)); | ||
output_buffers.push_back(data_buffer); | ||
return *this; | ||
} | ||
|
||
OpCaller& OpCaller::attr(int64_t value, const char* name) { | ||
ACL_CHECK(aclopSetAttrInt(attrs, name, value)); | ||
return *this; | ||
} | ||
|
||
OpCaller& OpCaller::attr(bool value, const char* name) { | ||
ACL_CHECK(aclopSetAttrBool(attrs, name, value)); | ||
return *this; | ||
} | ||
|
||
OpCaller& OpCaller::attr(float value, const char* name) { | ||
ACL_CHECK(aclopSetAttrFloat(attrs, name, value)); | ||
return *this; | ||
} | ||
|
||
OpCaller& OpCaller::run(aclrtStream stream) { | ||
ACL_CHECK(aclSetCompileopt(ACL_OP_JIT_COMPILE, "disable")); | ||
ACL_CHECK(aclopCompileAndExecute( | ||
op_name.c_str(), input_descs.size(), input_descs.data(), | ||
input_buffers.data(), output_buffers.size(), output_descs.data(), | ||
output_buffers.data(), attrs, ACL_ENGINE_SYS, ACL_COMPILE_SYS, nullptr, | ||
stream)); | ||
return *this; | ||
} | ||
|
||
void ggml_cann_cont(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||
ggml_tensor* src = dst->src[0]; | ||
int64_t src_stride[GGML_MAX_DIMS]; | ||
int64_t dst_stride[GGML_MAX_DIMS]; | ||
|
||
for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||
src_stride[i] = src->nb[i] / ggml_type_size(src->type); | ||
dst_stride[i] = dst->nb[i] / ggml_type_size(src->type); | ||
} | ||
|
||
int64_t storage_offset[] = {0}; | ||
int64_t storage_offset_dim[] = {1}; | ||
int64_t size_stride_dim[] = {GGML_MAX_DIMS}; | ||
|
||
OpCaller op; | ||
op.name("ViewCopy") | ||
.input_no_contiguous(dst, "dst") | ||
.input(dst->ne, ACL_INT64, 1, size_stride_dim, "dst_size", ctx.stream()) | ||
.input(dst_stride, ACL_INT64, 1, size_stride_dim, "dst_stride", | ||
ctx.stream()) | ||
.input(storage_offset, ACL_INT64, 1, storage_offset_dim, | ||
"dst_storage_offset", ctx.stream()) | ||
.input_no_contiguous(src, "src") | ||
.input(src->ne, ACL_INT64, 1, size_stride_dim, "src_size", ctx.stream()) | ||
.input(src_stride, ACL_INT64, 1, size_stride_dim, "src_stride", | ||
ctx.stream()) | ||
.input(storage_offset, ACL_INT64, 1, storage_offset_dim, | ||
"src_storage_offset", ctx.stream()) | ||
.output(dst, "dst") | ||
.run(ctx.stream()); | ||
//aclrtSynchronizeStream(ctx.stream()); | ||
} | ||
|
||
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||
ggml_tensor* src = dst->src[0]; | ||
int64_t paddings[] = { | ||
0, dst->ne[3] - src->ne[3], 0, dst->ne[2] - src->ne[2], | ||
0, dst->ne[1] - src->ne[1], 0, dst->ne[0] - src->ne[0]}; | ||
int64_t dim[] = {GGML_MAX_DIMS, 2}; | ||
OpCaller op; | ||
op.name("Pad") | ||
.input(src, "x") | ||
.input(paddings, ACL_INT64, 2, dim, "paddings", ctx.stream()) | ||
.output(dst, "y") | ||
.run(ctx.stream()); | ||
//aclrtSynchronizeStream(ctx.stream()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#ifndef CANN_ACL_OPS | ||
#define CANN_ACL_OPS | ||
|
||
#include <acl/acl_op.h> | ||
#include <acl/acl_op_compiler.h> | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
#include "bcast.h" | ||
#include "common.h" | ||
|
||
struct OpCaller { | ||
std::string op_name; | ||
std::vector<aclTensorDesc*> input_descs; | ||
std::vector<aclDataBuffer*> input_buffers; | ||
std::vector<aclTensorDesc*> output_descs; | ||
std::vector<aclDataBuffer*> output_buffers; | ||
aclopAttr* attrs; | ||
std::vector<void*> ptrs; | ||
|
||
OpCaller(); | ||
|
||
virtual ~OpCaller(); | ||
|
||
OpCaller& name(std::string _op_name); | ||
|
||
OpCaller& input_no_contiguous(ggml_tensor* tensor, const char* name); | ||
|
||
OpCaller& input(ggml_tensor* tensor, const char* name); | ||
|
||
OpCaller& output(ggml_tensor* tensor, const char* name); | ||
|
||
OpCaller& attr(int64_t value, const char* name); | ||
|
||
OpCaller& attr(bool value, const char* name); | ||
|
||
OpCaller& attr(float value, const char* name); | ||
|
||
template <typename T> | ||
OpCaller& input(T* values, aclDataType dtype, size_t dims, int64_t* dim, | ||
const char* name, aclrtStream stream = nullptr) { | ||
void* device_ptr = nullptr; | ||
size_t n_elem = 1; | ||
for (size_t i = 0; i < dims; i++) { | ||
n_elem *= dim[i]; | ||
} | ||
|
||
size_t n_bytes = n_elem * sizeof(T); | ||
ACL_CHECK(aclrtMalloc(&device_ptr, n_bytes, ACL_MEM_MALLOC_HUGE_FIRST)); | ||
ptrs.push_back(device_ptr); | ||
if (stream == nullptr) { | ||
ACL_CHECK(aclrtMemcpy(device_ptr, n_bytes, values, n_bytes, | ||
ACL_MEMCPY_HOST_TO_DEVICE)); | ||
} else { | ||
ACL_CHECK(aclrtMemcpyAsync(device_ptr, n_bytes, values, n_bytes, | ||
ACL_MEMCPY_HOST_TO_DEVICE, stream)); | ||
} | ||
|
||
aclTensorDesc* tensor_desc = | ||
aclCreateTensorDesc(dtype, dims, dim, ACL_FORMAT_ND); | ||
aclSetTensorDescName(tensor_desc, name); | ||
input_descs.push_back(tensor_desc); | ||
aclDataBuffer* data_buffer = aclCreateDataBuffer(device_ptr, n_bytes); | ||
input_buffers.push_back(data_buffer); | ||
|
||
return *this; | ||
} | ||
|
||
OpCaller& run(aclrtStream stream = nullptr); | ||
}; | ||
|
||
void ggml_cann_cont(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||
|
||
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||
|
||
void ggml_cann_upscale(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||
|
||
#endif // CANN_ACL_OPS |
Oops, something went wrong.