[CANN] Add Ascend NPU backend

Ascend is a full-stack AI computing infrastructure for industry applications and services based on Huawei Ascend processors and software. CANN (Compute Architecture of Neural Networks), developped by Huawei, is a heterogeneous computing architecture for AI. This commit adding Ascend NPU as a new backend, which implements the following features: 1. Ascend NPU register; 2. Ascend NPU runtime(device memory, streams, events). 3. Part of GGML_OPs through aclnn library. 4. Introduce a new test file named test-backend-runtime, for testing runtime functionality.
hipudding · Mar 28, 2024 · 5fec9cb · 5fec9cb
1 parent 0642b22
commit 5fec9cb
Show file tree

Hide file tree

Showing 18 changed files with 1,935 additions and 9 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -769,6 +769,99 @@ if (LLAMA_CPU_HBM)
     target_link_libraries(ggml PUBLIC memkind)
 endif()
 
+if (LLAMA_CANN)
+    if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOME})
+        set(CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME})
+        message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
+    endif()
+
+    if (CANN_INSTALL_DIR)
+        # Only Support Linux.
+        if (LLAMA_CANN)
+            if (NOT UNIX)
+                set(LLAMA_CANN OFF)
+                message(WARNING "CANN: CANN toolkit supports unix but not ${CMAKE_SYSTEM_NAME}. Turning off LLAMA_CANN")
+            endif()
+        endif()
+
+        # Supported platforms: x86-64, arm64
+        if (LLAMA_CANN)
+            if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+            elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
+            else()
+                set(LLAMA_CANN OFF)
+                message(WARNING "CANN: CANN toolkit supports x86-64 and arm64 but not ${CMAKE_SYSTEM_PROCESSOR}. Turning off LLAMA_CANN")
+            endif()
+        endif()
+
+        # Set headers
+        set(CANN_INCLUDE_DIRS "${CANN_INSTALL_DIR}/include" "${CANN_INSTALL_DIR}/include/aclnn")
+        # Find libs
+        set(CANN_LIBRARIES "")
+        # TODO: optimize find libs.
+        # * libascendcl.so
+        if (LLAMA_CANN)
+            set(lib_dir "${CANN_INSTALL_DIR}/acllib/lib64")
+            find_library(found_lib_ascendcl NAMES ascendcl PATHS ${lib_dir} NO_DEFAULT_PATH)
+            if (found_lib_ascendcl)
+                set(lib_ascendcl ${found_lib_ascendcl})
+                list(APPEND CANN_LIBRARIES ${lib_ascendcl})
+                message(STATUS "CANN: libascendcl.so is found at ${lib_dir}")
+            else()
+                set(LLAMA_CANN OFF)
+                message(WARNING "CANN: Missing libascendcl.so. Turning off LLAMA_CANN")
+            endif()
+        endif()
+
+        # * libnnopbase.so
+        if (LLAMA_CANN)
+            set(lib_dir "${CANN_INSTALL_DIR}/acllib/lib64")
+            find_library(found_lib_nnopbase NAMES nnopbase PATHS ${lib_dir} NO_DEFAULT_PATH)
+            if (found_lib_nnopbase)
+                set(lib_nnopbase ${found_lib_nnopbase})
+                list(APPEND CANN_LIBRARIES ${lib_nnopbase})
+                message(STATUS "CANN: libnnopbase.so is found at ${lib_dir}")
+            else()
+                set(LLAMA_CANN OFF)
+                message(WARNING "CANN: Missing libnnopbase.so. Turning off LLAMA_CANN")
+            endif()
+        endif()
+
+        # * libopapi.so
+        if (LLAMA_CANN)
+            set(lib_dir "${CANN_INSTALL_DIR}/lib64")
+            find_library(found_lib_opapi NAMES opapi PATHS ${lib_dir} NO_DEFAULT_PATH)
+            if (found_lib_opapi)
+                set(lib_opapi ${found_lib_opapi})
+                list(APPEND CANN_LIBRARIES ${lib_opapi})
+                message(STATUS "CANN: libopapi.so is found at ${lib_dir}")
+            else()
+                set(LLAMA_CANN OFF)
+                message(WARNING "CANN: Missing libopapi.so. Turning off LLAMA_CANN")
+            endif()
+        endif()
+
+        # Set headers and libs
+        if (LLAMA_CANN)
+            message(STATUS "CANN: CANN_INCLUDE_DIRS =  ${CANN_INCLUDE_DIRS}")
+            message(STATUS "CANN: CANN_LIBRARIES =  ${CANN_LIBRARIES}")
+            set(GGML_HEADERS_CANN ggml-cann.h)
+            file(GLOB GGML_SOURCES_CUDA "ggml-cann/*.cpp")
+            list(APPEND GGML_SOURCES_CANN "ggml-cann.cpp")
+            set(LLAMA_EXTRA_LIBS     ${LLAMA_EXTRA_LIBS}     ${CANN_LIBRARIES})
+            set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CANN_INCLUDE_DIRS})
+            add_compile_definitions(GGML_USE_CANN)
+        endif()
+    else()
+        set(LLAMA_CANN OFF)
+        message(WARNING "CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off LLAMA_CANN")
+    endif()
+
+    if(NOT LLAMA_CANN)
+        message(WARNING "CANN: LLAMA_CANN is turned OFF, see above for details.")
+    endif()
+endif()
+
 if (LLAMA_PERF)
     add_compile_definitions(GGML_PERF)
 endif()
@@ -1147,6 +1240,7 @@ add_library(ggml OBJECT
             ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
             ${GGML_SOURCES_VULKAN}  ${GGML_HEADERS_VULKAN}
             ${GGML_SOURCES_ROCM}    ${GGML_HEADERS_ROCM}
+            ${GGML_SOURCES_CANN}    ${GGML_HEADERS_CANN}
             )
 
 target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})

diff --git a/common/common.cpp b/common/common.cpp
@@ -56,6 +56,10 @@
 #define GGML_USE_CUDA_SYCL_VULKAN
 #endif
 
+#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) || defined(GGML_USE_CANN)
+#define GGML_USE_CUDA_SYCL_CANN
+#endif
+
 #if defined(LLAMA_USE_CURL)
 #ifdef __linux__
 #include <linux/limits.h>
@@ -861,9 +865,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             return true;
         }
         params.main_gpu = std::stoi(argv[i]);
-#ifndef GGML_USE_CUDA_SYCL
-        fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n");
-#endif // GGML_USE_CUDA_SYCL
+#ifndef GGML_USE_CUDA_SYCL_CANN
+        fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/CANN. Setting the main GPU has no effect.\n");
+#endif // GGML_USE_CUDA_SYCL_CANN
         return true;
     }
     if (arg == "--split-mode" || arg == "-sm") {

diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
@@ -23,6 +23,10 @@
 #include "ggml-cuda.h"
 #include "ggml-sycl.h"
 
+#ifdef GGML_USE_CANN
+#include "ggml-cann.h"
+#endif
+
 // utils
 static uint64_t get_time_ns() {
     using clock = std::chrono::high_resolution_clock;
@@ -134,6 +138,17 @@ static std::string get_gpu_info() {
             id += "/";
         }
     }
+#endif
+#ifdef GGML_USE_CANN
+    uint32_t count = ggml_backend_cann_get_device_count();
+    for (uint32_t i = 0; i < count; i++) {
+        char buf[128];
+        ggml_backend_cann_get_device_description(i, buf, sizeof(buf));
+        id += buf;
+        if (i < count - 1) {
+            id += "/";
+        }
+    }
 #endif
     // TODO: other backends
     return id;

diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
@@ -15,6 +15,10 @@
 #include "ggml-metal.h"
 #endif
 
+#ifdef GGML_USE_CANN
+#include "ggml-cann.h"
+#endif
+
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 
@@ -978,6 +982,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
     printf("%s: CLIP using Metal backend\n", __func__);
 #endif
 
+#ifdef GGML_USE_CANN
+    new_clip->backend = ggml_backend_cann_init(0);
+    printf("%s: CLIP using CANN backend\n", __func__);
+#endif
+
 
     if (!new_clip->backend) {
         new_clip->backend = ggml_backend_cpu_init();

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -2539,17 +2539,17 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
                 }
             }
 #else
-            LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a tensor split.\n", {});
+            LOG_WARNING("llama.cpp was compiled without CUDA/SYCL. It is not possible to set a tensor split.\n", {});
 #endif // GGML_USE_CUDA
         } else if (arg == "--main-gpu" || arg == "-mg") {
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
-#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
+#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL) || defined(GGML_USE_CANN)
             params.main_gpu = std::stoi(argv[i]);
 #else
-            LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a main GPU.", {});
+            LOG_WARNING("llama.cpp was compiled without cuBLAS/SYCL/CANN. It is not possible to set a main GPU.", {});
 #endif
         } else if (arg == "--lora") {
             if (++i >= argc) {

diff --git a/ggml-backend.c b/ggml-backend.c
@@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
     extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
     ggml_backend_kompute_reg_devices();
 #endif
+
+#ifdef GGML_USE_CANN
+    extern GGML_CALL int ggml_backend_cann_reg_devices(void);
+    ggml_backend_cann_reg_devices();
+#endif
 }
 
 GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {