Skip to content

Commit

Permalink
[CANN] Add Ascend NPU backend
Browse files Browse the repository at this point in the history
Ascend is a full-stack AI computing infrastructure for industry
applications and services based on Huawei Ascend processors and
software.

CANN (Compute Architecture of Neural Networks), developped by
Huawei, is a heterogeneous computing architecture for AI.

This commit adding Ascend NPU as a new backend, which implements
the following features:
1. Ascend NPU register;
2. Ascend NPU runtime(device memory, streams, events).
3. Part of GGML_OPs through aclnn library.
4. Introduce a new test file named test-backend-runtime, for
   testing runtime functionality.
  • Loading branch information
hipudding committed Mar 28, 2024
1 parent 0642b22 commit 5fec9cb
Show file tree
Hide file tree
Showing 18 changed files with 1,935 additions and 9 deletions.
94 changes: 94 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,99 @@ if (LLAMA_CPU_HBM)
target_link_libraries(ggml PUBLIC memkind)
endif()

if (LLAMA_CANN)
if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
endif()

if (CANN_INSTALL_DIR)
# Only Support Linux.
if (LLAMA_CANN)
if (NOT UNIX)
set(LLAMA_CANN OFF)
message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off LLAMA_CANN")
endif()
endif()

# Supported platforms: x86-64, arm64
if (LLAMA_CANN)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
else()
set(LLAMA_CANN OFF)
message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off LLAMA_CANN")
endif()
endif()

# Set headers
set(CANN_INCLUDE_DIRS "${CANN_INSTALL_DIR}/include" "${CANN_INSTALL_DIR}/include/aclnn")
# Find libs
set(CANN_LIBRARIES "")
# TODO: optimize find libs.
# * libascendcl.so
if (LLAMA_CANN)
set(lib_dir "${CANN_INSTALL_DIR}/acllib/lib64")
find_library(found_lib_ascendcl NAMES ascendcl PATHS ${lib_dir} NO_DEFAULT_PATH)
if (found_lib_ascendcl)
set(lib_ascendcl ${found_lib_ascendcl})
list(APPEND CANN_LIBRARIES ${lib_ascendcl})
message(STATUS "CANN: libascendcl.so is found at ${lib_dir}")
else()
set(LLAMA_CANN OFF)
message(WARNING "CANN: Missing libascendcl.so. Turning off LLAMA_CANN")
endif()
endif()

# * libnnopbase.so
if (LLAMA_CANN)
set(lib_dir "${CANN_INSTALL_DIR}/acllib/lib64")
find_library(found_lib_nnopbase NAMES nnopbase PATHS ${lib_dir} NO_DEFAULT_PATH)
if (found_lib_nnopbase)
set(lib_nnopbase ${found_lib_nnopbase})
list(APPEND CANN_LIBRARIES ${lib_nnopbase})
message(STATUS "CANN: libnnopbase.so is found at ${lib_dir}")
else()
set(LLAMA_CANN OFF)
message(WARNING "CANN: Missing libnnopbase.so. Turning off LLAMA_CANN")
endif()
endif()

# * libopapi.so
if (LLAMA_CANN)
set(lib_dir "${CANN_INSTALL_DIR}/lib64")
find_library(found_lib_opapi NAMES opapi PATHS ${lib_dir} NO_DEFAULT_PATH)
if (found_lib_opapi)
set(lib_opapi ${found_lib_opapi})
list(APPEND CANN_LIBRARIES ${lib_opapi})
message(STATUS "CANN: libopapi.so is found at ${lib_dir}")
else()
set(LLAMA_CANN OFF)
message(WARNING "CANN: Missing libopapi.so. Turning off LLAMA_CANN")
endif()
endif()

# Set headers and libs
if (LLAMA_CANN)
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
set(GGML_HEADERS_CANN ggml-cann.h)
file(GLOB GGML_SOURCES_CUDA "ggml-cann/*.cpp")
list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${CANN_LIBRARIES})
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
add_compile_definitions(GGML_USE_CANN)
endif()
else()
set(LLAMA_CANN OFF)
message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off LLAMA_CANN")
endif()

if(NOT LLAMA_CANN)
message(WARNING "CANN: LLAMA_CANN is turned OFF, see above for details.")
endif()
endif()

if (LLAMA_PERF)
add_compile_definitions(GGML_PERF)
endif()
Expand Down Expand Up @@ -1147,6 +1240,7 @@ add_library(ggml OBJECT
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
)

target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
Expand Down
10 changes: 7 additions & 3 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@
#define GGML_USE_CUDA_SYCL_VULKAN
#endif

#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) || defined(GGML_USE_CANN)
#define GGML_USE_CUDA_SYCL_CANN
#endif

#if defined(LLAMA_USE_CURL)
#ifdef __linux__
#include <linux/limits.h>
Expand Down Expand Up @@ -861,9 +865,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true;
}
params.main_gpu = std::stoi(argv[i]);
#ifndef GGML_USE_CUDA_SYCL
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n");
#endif // GGML_USE_CUDA_SYCL
#ifndef GGML_USE_CUDA_SYCL_CANN
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/CANN. Setting the main GPU has no effect.\n");
#endif // GGML_USE_CUDA_SYCL_CANN
return true;
}
if (arg == "--split-mode" || arg == "-sm") {
Expand Down
15 changes: 15 additions & 0 deletions examples/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include "ggml-cuda.h"
#include "ggml-sycl.h"

#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif

// utils
static uint64_t get_time_ns() {
using clock = std::chrono::high_resolution_clock;
Expand Down Expand Up @@ -134,6 +138,17 @@ static std::string get_gpu_info() {
id += "/";
}
}
#endif
#ifdef GGML_USE_CANN
uint32_t count = ggml_backend_cann_get_device_count();
for (uint32_t i = 0; i < count; i++) {
char buf[128];
ggml_backend_cann_get_device_description(i, buf, sizeof(buf));
id += buf;
if (i < count - 1) {
id += "/";
}
}
#endif
// TODO: other backends
return id;
Expand Down
9 changes: 9 additions & 0 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
#include "ggml-metal.h"
#endif

#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif

#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"

Expand Down Expand Up @@ -978,6 +982,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
printf("%s: CLIP using Metal backend\n", __func__);
#endif

#ifdef GGML_USE_CANN
new_clip->backend = ggml_backend_cann_init(0);
printf("%s: CLIP using CANN backend\n", __func__);
#endif


if (!new_clip->backend) {
new_clip->backend = ggml_backend_cpu_init();
Expand Down
6 changes: 3 additions & 3 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2539,17 +2539,17 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
}
}
#else
LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a tensor split.\n", {});
LOG_WARNING("llama.cpp was compiled without CUDA/SYCL. It is not possible to set a tensor split.\n", {});
#endif // GGML_USE_CUDA
} else if (arg == "--main-gpu" || arg == "-mg") {
if (++i >= argc) {
invalid_param = true;
break;
}
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL) || defined(GGML_USE_CANN)
params.main_gpu = std::stoi(argv[i]);
#else
LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a main GPU.", {});
LOG_WARNING("llama.cpp was compiled without cuBLAS/SYCL/CANN. It is not possible to set a main GPU.", {});
#endif
} else if (arg == "--lora") {
if (++i >= argc) {
Expand Down
5 changes: 5 additions & 0 deletions ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
ggml_backend_kompute_reg_devices();
#endif

#ifdef GGML_USE_CANN
extern GGML_CALL int ggml_backend_cann_reg_devices(void);
ggml_backend_cann_reg_devices();
#endif
}

GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
Expand Down
Loading

0 comments on commit 5fec9cb

Please sign in to comment.