diff --git a/ggml-cann.cpp b/ggml-cann.cpp index 2f5376c20be84a..c92c01b897c178 100644 --- a/ggml-cann.cpp +++ b/ggml-cann.cpp @@ -400,7 +400,8 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, break; // TODO: Format need NC1HWC0. case GGML_OP_UPSCALE: - return false; + ggml_cann_upsample_nearest2d(ctx, dst); + break; case GGML_OP_PAD: ggml_cann_pad(ctx, dst); break; @@ -687,7 +688,7 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend, case GGML_OP_GROUP_NORM: return true; case GGML_OP_UPSCALE: - return false; + return true; case GGML_OP_PAD: case GGML_OP_ARANGE: return true; diff --git a/ggml-cann/aclnn_ops.cpp b/ggml-cann/aclnn_ops.cpp index 36565db131021a..30ef7f98792a0f 100644 --- a/ggml-cann/aclnn_ops.cpp +++ b/ggml-cann/aclnn_ops.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -486,10 +487,6 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) { GGML_ASSERT(dst->ne[0] == 1); aclTensor* acl_dst = create_acl_tensor(dst); - uint64_t workspaceSize = 0; - aclOpExecutor* executor; - void* workspaceAddr = nullptr; - int64_t reduce_dims_host[] = {3}; aclIntArray* reduce_dims = aclCreateIntArray(reduce_dims_host, 1); @@ -503,6 +500,41 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) { aclrtStream stream = ctx.stream(); ACL_CHECK(aclnnReduceSum(workspaceAddr, workspaceSize, executor, stream)); + ACL_CHECK(aclDestroyTensor(acl_src)); + ACL_CHECK(aclDestroyTensor(acl_dst)); +} + +void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, + ggml_tensor* dst) { + + ggml_tensor* src = dst->src[0]; + + aclTensor* acl_src = create_acl_tensor(src, nullptr, nullptr, 0, + ACL_FORMAT_NCHW); + aclTensor* acl_dst = create_acl_tensor(dst, nullptr, nullptr, 0, + ACL_FORMAT_NCHW); + + const int scale_factor = dst->op_params[0]; + std::vector output_size{dst->ne[1], dst->ne[0]}; + auto output_size_array = aclCreateIntArray(output_size.data(), 2); + + uint64_t workspaceSize = 0; + aclOpExecutor* executor; + void* workspaceAddr = nullptr; + + aclrtStream stream = ctx.stream(); + + ACL_CHECK(aclnnUpsampleNearest2dGetWorkspaceSize(acl_src, output_size_array, + acl_dst, &workspaceSize, + &executor)); + if (workspaceSize > 0) { + workspaceAddr = ctx.alloc_buffer(workspaceSize); + } + + ACL_CHECK(aclnnUpsampleNearest2d(workspaceAddr, workspaceSize, executor, + stream)); + + ACL_CHECK(aclDestroyIntArray(output_size_array)); ACL_CHECK(aclDestroyTensor(acl_src)); ACL_CHECK(aclDestroyTensor(acl_dst)); } \ No newline at end of file diff --git a/ggml-cann/aclnn_ops.h b/ggml-cann/aclnn_ops.h index 5de4dabebebe2d..0caf3a92ba3120 100644 --- a/ggml-cann/aclnn_ops.h +++ b/ggml-cann/aclnn_ops.h @@ -149,4 +149,6 @@ void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ACL_CHECK(aclDestroyTensor(acl_dst)); } +void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, ggml_tensor* dst); + #endif // CANN_ACLNN_OPS \ No newline at end of file