diff --git a/ggml-cann.cpp b/ggml-cann.cpp index c92c01b897c178..0a9554cfb6463b 100644 --- a/ggml-cann.cpp +++ b/ggml-cann.cpp @@ -398,7 +398,6 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, case GGML_OP_CONCAT: ggml_cann_concat(ctx, dst); break; - // TODO: Format need NC1HWC0. case GGML_OP_UPSCALE: ggml_cann_upsample_nearest2d(ctx, dst); break; diff --git a/ggml-cann/acl_ops.cpp b/ggml-cann/acl_ops.cpp index eb78057d79670a..c3084fe574d4c7 100644 --- a/ggml-cann/acl_ops.cpp +++ b/ggml-cann/acl_ops.cpp @@ -112,17 +112,3 @@ void ggml_cann_cont(ggml_backend_cann_context& ctx, ggml_tensor* dst) { .output(dst, "dst") .run(ctx.stream()); } - -void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) { - ggml_tensor* src = dst->src[0]; - int64_t paddings[] = { - 0, dst->ne[3] - src->ne[3], 0, dst->ne[2] - src->ne[2], - 0, dst->ne[1] - src->ne[1], 0, dst->ne[0] - src->ne[0]}; - int64_t dim[] = {GGML_MAX_DIMS, 2}; - OpCaller op; - op.name("Pad") - .input(src, "x") - .input(ctx, paddings, ACL_INT64, 2, dim, "paddings", ctx.stream()) - .output(dst, "y") - .run(ctx.stream()); -} diff --git a/ggml-cann/acl_ops.h b/ggml-cann/acl_ops.h index 77c64df4fae753..17405b1e68b287 100644 --- a/ggml-cann/acl_ops.h +++ b/ggml-cann/acl_ops.h @@ -71,8 +71,4 @@ struct OpCaller { void ggml_cann_cont(ggml_backend_cann_context& ctx, ggml_tensor* dst); -void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst); - -void ggml_cann_upscale(ggml_backend_cann_context& ctx, ggml_tensor* dst); - #endif // CANN_ACL_OPS \ No newline at end of file diff --git a/ggml-cann/aclnn_ops.cpp b/ggml-cann/aclnn_ops.cpp index 30ef7f98792a0f..f02e856e02b2e1 100644 --- a/ggml-cann/aclnn_ops.cpp +++ b/ggml-cann/aclnn_ops.cpp @@ -1,12 +1,13 @@ #include "aclnn_ops.h" #include +#include #include #include +#include #include #include #include -#include #include #include @@ -490,6 +491,10 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) { int64_t reduce_dims_host[] = {3}; aclIntArray* reduce_dims = aclCreateIntArray(reduce_dims_host, 1); + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + void* workspaceAddr = nullptr; + ACL_CHECK(aclnnReduceSumGetWorkspaceSize(acl_src, reduce_dims, true, type_mapping(src->type), acl_dst, &workspaceSize, &executor)); @@ -504,37 +509,74 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ACL_CHECK(aclDestroyTensor(acl_dst)); } -void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, - ggml_tensor* dst) { - +void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, + ggml_tensor* dst) { ggml_tensor* src = dst->src[0]; - aclTensor* acl_src = create_acl_tensor(src, nullptr, nullptr, 0, - ACL_FORMAT_NCHW); - aclTensor* acl_dst = create_acl_tensor(dst, nullptr, nullptr, 0, - ACL_FORMAT_NCHW); + aclTensor* acl_src = + create_acl_tensor(src, nullptr, nullptr, 0, ACL_FORMAT_NCHW); + aclTensor* acl_dst = + create_acl_tensor(dst, nullptr, nullptr, 0, ACL_FORMAT_NCHW); const int scale_factor = dst->op_params[0]; std::vector output_size{dst->ne[1], dst->ne[0]}; auto output_size_array = aclCreateIntArray(output_size.data(), 2); - + uint64_t workspaceSize = 0; aclOpExecutor* executor; void* workspaceAddr = nullptr; aclrtStream stream = ctx.stream(); - ACL_CHECK(aclnnUpsampleNearest2dGetWorkspaceSize(acl_src, output_size_array, - acl_dst, &workspaceSize, - &executor)); + ACL_CHECK(aclnnUpsampleNearest2dGetWorkspaceSize( + acl_src, output_size_array, acl_dst, &workspaceSize, &executor)); if (workspaceSize > 0) { workspaceAddr = ctx.alloc_buffer(workspaceSize); } - - ACL_CHECK(aclnnUpsampleNearest2d(workspaceAddr, workspaceSize, executor, - stream)); - + + ACL_CHECK( + aclnnUpsampleNearest2d(workspaceAddr, workspaceSize, executor, stream)); + ACL_CHECK(aclDestroyIntArray(output_size_array)); ACL_CHECK(aclDestroyTensor(acl_src)); ACL_CHECK(aclDestroyTensor(acl_dst)); +} + +void aclnn_pad(ggml_backend_cann_context& ctx, ggml_tensor* src, + ggml_tensor* dst) { + aclTensor* acl_src = create_acl_tensor(src); + aclTensor* acl_dst = create_acl_tensor(dst); + + int64_t paddings[] = { + 0, dst->ne[0] - src->ne[0], 0, dst->ne[1] - src->ne[1], + 0, dst->ne[2] - src->ne[2], 0, dst->ne[3] - src->ne[3]}; + float value = 0.0f; + + aclIntArray* acl_pad = aclCreateIntArray(paddings, GGML_MAX_DIMS * 2); + aclScalar* acl_value = aclCreateScalar(&value, aclDataType::ACL_FLOAT); + + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + void* workspaceAddr = nullptr; + + ACL_CHECK(aclnnConstantPadNdGetWorkspaceSize( + acl_src, acl_pad, acl_value, acl_dst, &workspaceSize, &executor)); + + if (workspaceSize > 0) { + workspaceAddr = ctx.alloc_buffer(workspaceSize); + } + + aclrtStream stream = ctx.stream(); + ACL_CHECK( + aclnnConstantPadNd(workspaceAddr, workspaceSize, executor, stream)); + + ACL_CHECK(aclDestroyIntArray(acl_pad)); + ACL_CHECK(aclDestroyScalar(acl_value)); + ACL_CHECK(aclDestroyTensor(acl_src)); + ACL_CHECK(aclDestroyTensor(acl_dst)); +} + +void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst) { + ggml_tensor* src = dst->src[0]; + aclnn_pad(ctx, src, dst); } \ No newline at end of file diff --git a/ggml-cann/aclnn_ops.h b/ggml-cann/aclnn_ops.h index 0caf3a92ba3120..1e0710d38cd021 100644 --- a/ggml-cann/aclnn_ops.h +++ b/ggml-cann/aclnn_ops.h @@ -47,6 +47,8 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); +void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst); + template