Skip to content

Commit

Permalink
use aclnn pad
Browse files Browse the repository at this point in the history
  • Loading branch information
hipudding committed Apr 10, 2024
1 parent 75d45c8 commit 84c031b
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 35 deletions.
1 change: 0 additions & 1 deletion ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,6 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
case GGML_OP_CONCAT:
ggml_cann_concat(ctx, dst);
break;
// TODO: Format need NC1HWC0.
case GGML_OP_UPSCALE:
ggml_cann_upsample_nearest2d(ctx, dst);
break;
Expand Down
14 changes: 0 additions & 14 deletions ggml-cann/acl_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,3 @@ void ggml_cann_cont(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
.output(dst, "dst")
.run(ctx.stream());
}

void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_tensor* src = dst->src[0];
int64_t paddings[] = {
0, dst->ne[3] - src->ne[3], 0, dst->ne[2] - src->ne[2],
0, dst->ne[1] - src->ne[1], 0, dst->ne[0] - src->ne[0]};
int64_t dim[] = {GGML_MAX_DIMS, 2};
OpCaller op;
op.name("Pad")
.input(src, "x")
.input(ctx, paddings, ACL_INT64, 2, dim, "paddings", ctx.stream())
.output(dst, "y")
.run(ctx.stream());
}
4 changes: 0 additions & 4 deletions ggml-cann/acl_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,4 @@ struct OpCaller {

void ggml_cann_cont(ggml_backend_cann_context& ctx, ggml_tensor* dst);

void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);

void ggml_cann_upscale(ggml_backend_cann_context& ctx, ggml_tensor* dst);

#endif // CANN_ACL_OPS
74 changes: 58 additions & 16 deletions ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#include "aclnn_ops.h"

#include <aclnnop/aclnn_cast.h>
#include <aclnnop/aclnn_constant_pad_nd.h>
#include <aclnnop/aclnn_group_norm.h>
#include <aclnnop/aclnn_layer_norm.h>
#include <aclnnop/aclnn_reduce_sum.h>
#include <aclnnop/aclnn_repeat.h>
#include <aclnnop/aclnn_softmax.h>
#include <aclnnop/aclnn_upsample_nearest_2d.h>
#include <aclnnop/aclnn_reduce_sum.h>

#include <cmath>
#include <cstring>
Expand Down Expand Up @@ -490,6 +491,10 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
int64_t reduce_dims_host[] = {3};
aclIntArray* reduce_dims = aclCreateIntArray(reduce_dims_host, 1);

uint64_t workspaceSize = 0;
aclOpExecutor* executor;
void* workspaceAddr = nullptr;

ACL_CHECK(aclnnReduceSumGetWorkspaceSize(acl_src, reduce_dims, true,
type_mapping(src->type), acl_dst,
&workspaceSize, &executor));
Expand All @@ -504,37 +509,74 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ACL_CHECK(aclDestroyTensor(acl_dst));
}

void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
ggml_tensor* dst) {

void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
ggml_tensor* dst) {
ggml_tensor* src = dst->src[0];

aclTensor* acl_src = create_acl_tensor(src, nullptr, nullptr, 0,
ACL_FORMAT_NCHW);
aclTensor* acl_dst = create_acl_tensor(dst, nullptr, nullptr, 0,
ACL_FORMAT_NCHW);
aclTensor* acl_src =
create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW);
aclTensor* acl_dst =
create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW);

const int scale_factor = dst->op_params[0];
std::vector<int64_t> output_size{dst->ne[1], dst->ne[0]};
auto output_size_array = aclCreateIntArray(output_size.data(), 2);

uint64_t workspaceSize = 0;
aclOpExecutor* executor;
void* workspaceAddr = nullptr;

aclrtStream stream = ctx.stream();

ACL_CHECK(aclnnUpsampleNearest2dGetWorkspaceSize(acl_src, output_size_array,
acl_dst, &workspaceSize,
&executor));
ACL_CHECK(aclnnUpsampleNearest2dGetWorkspaceSize(
acl_src, output_size_array, acl_dst, &workspaceSize, &executor));
if (workspaceSize > 0) {
workspaceAddr = ctx.alloc_buffer(workspaceSize);
}
ACL_CHECK(aclnnUpsampleNearest2d(workspaceAddr, workspaceSize, executor,
stream));

ACL_CHECK(
aclnnUpsampleNearest2d(workspaceAddr, workspaceSize, executor, stream));

ACL_CHECK(aclDestroyIntArray(output_size_array));
ACL_CHECK(aclDestroyTensor(acl_src));
ACL_CHECK(aclDestroyTensor(acl_dst));
}

void aclnn_pad(ggml_backend_cann_context& ctx, ggml_tensor* src,
ggml_tensor* dst) {
aclTensor* acl_src = create_acl_tensor(src);
aclTensor* acl_dst = create_acl_tensor(dst);

int64_t paddings[] = {
0, dst->ne[0] - src->ne[0], 0, dst->ne[1] - src->ne[1],
0, dst->ne[2] - src->ne[2], 0, dst->ne[3] - src->ne[3]};
float value = 0.0f;

aclIntArray* acl_pad = aclCreateIntArray(paddings, GGML_MAX_DIMS * 2);
aclScalar* acl_value = aclCreateScalar(&value, aclDataType::ACL_FLOAT);

uint64_t workspaceSize = 0;
aclOpExecutor* executor;
void* workspaceAddr = nullptr;

ACL_CHECK(aclnnConstantPadNdGetWorkspaceSize(
acl_src, acl_pad, acl_value, acl_dst, &workspaceSize, &executor));

if (workspaceSize > 0) {
workspaceAddr = ctx.alloc_buffer(workspaceSize);
}

aclrtStream stream = ctx.stream();
ACL_CHECK(
aclnnConstantPadNd(workspaceAddr, workspaceSize, executor, stream));

ACL_CHECK(aclDestroyIntArray(acl_pad));
ACL_CHECK(aclDestroyScalar(acl_value));
ACL_CHECK(aclDestroyTensor(acl_src));
ACL_CHECK(aclDestroyTensor(acl_dst));
}

void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_tensor* src = dst->src[0];
aclnn_pad(ctx, src, dst);
}
2 changes: 2 additions & 0 deletions ggml-cann/aclnn_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);

void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);

void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);

template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
aclTensor*, uint64_t*, aclOpExecutor**),
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
Expand Down

0 comments on commit 84c031b

Please sign in to comment.