Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch CUDA training logic to enable wasmedge-llmc gpu backend #5

Merged
merged 1 commit into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,29 @@ jobs:
cd build
cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
cmake --build . -j$(nproc)
build-CUDA:
strategy:
matrix:
target:
- 'Debug'
- 'Release'
runs-on: ubuntu-22.04
container:
image: nvidia/cuda:12.6.1-cudnn-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: System Info
run: |
nvcc --version
g++ --version

- name: Build
run: |
ln -snf /usr/share/zoneinfo/Africa/Abidjan /etc/localtime && echo Africa/Abidjan > /etc/timezone
apt-get update && apt-get install -y git cmake
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=${{ matrix.target }} .. -DCUDALIB=1
cmake --build . -j$(nproc)
117 changes: 63 additions & 54 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,37 @@
cmake_minimum_required(VERSION 3.15)
project(llm.c LANGUAGES C)
# project(llm.c LANGUAGES C CXX CUDA)

# Put binaries and libraries in the same location.
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
option(CUDALIB "Enable CUDA training" OFF)

set(PRECISION "BF16" CACHE STRING "Precision Settings")

# Always export compile_commands.json for lsp like clangd.
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

# We don't support this compiler.
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
message(FATAL_ERROR "This compiler is not supported")
endif()

if (CUDALIB)
project(llm.c LANGUAGES C CXX CUDA)
else()
project(llm.c LANGUAGES C)
endif()

# Put binaries and libraries in the same location.
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)

# Release by default if not specified.
if (NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
endif()
endif()

# option(PRECISION "Precision settings" BF16)
# option(USE_CUDNN "Use cudnn" ON)

add_library(train_gpt2_cpu train_gpt2.c)
target_include_directories(train_gpt2_cpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
target_link_libraries(train_gpt2_cpu PRIVATE m)
Expand All @@ -42,51 +49,53 @@ else()
endif()
target_compile_options(train_gpt2_cpu PRIVATE -Ofast -Wno-unused-result -Wno-ignored-pragmas -Wno-unknown-attributes -march=native)

# set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
# add_library(train_gpt2_cuda SHARED train_gpt2.cu llmc/cudnn_att.cpp)
# target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
# target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
# set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
# if (PRECISION EQUAL "FP32")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
# elseif(PRECISION EQUAL "FP16")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
# else()
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
# endif()
# Training GPT2 with CUDA.
if (CUDALIB)
set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
add_library(train_gpt2_cuda train_gpt2.cu llmc/cudnn_att.cpp)
target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
target_compile_definitions(train_gpt2_cuda PRIVATE -DLLMC_LIB=1)
set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
set_target_properties(train_gpt2_cuda PROPERTIES CUDA_ARCHITECTURES "72;80")

if (PRECISION STREQUAL "FP32")
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
elseif(PRECISION STREQUAL "FP16")
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
else()
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
endif()

# Disable cudnn for now, it has soem bugs in its cmake.
# if (USE_CUDNN)
# include(FetchContent)
# FetchContent_Declare(cudnn-frontend URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.5.2.tar.gz)
# FetchContent_MakeAvailable(cudnn-frontend)
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
# target_link_libraries(train_gpt2_cuda PRIVATE cudnn)
# endif()
set(CUDNN_FRONTEND_BUILD_SAMPLES OFF)
set(CUDNN_FRONTEND_BUILD_UNIT_TESTS OFF)
message(STATUS "Fetching cudnn-frontend")
include(FetchContent)
FetchContent_Declare(
cf
URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.6.1.tar.gz
URL_HASH MD5=c131914d8007318ec7b5b5f792458cb4
)
FetchContent_MakeAvailable(cf)
FetchContent_GetProperties(cf)
target_include_directories(train_gpt2_cuda PRIVATE ${cf_SOURCE_DIR}/include)
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
target_link_libraries(train_gpt2_cuda PRIVATE cudnn)

find_package(CUDAToolkit REQUIRED)
target_link_libraries(train_gpt2_cuda PRIVATE CUDA::cublas CUDA::cublasLt CUDA::cudart CUDA::nvrtc)

if (NO_USE_MPI)
message(STATUS "→ MPI is manually disabled")
else()
find_package(MPI)
if (MPI_FOUND)
message(STATUS "✓ MPI found")
target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
else()
message(STATUS "✗ MPI not found")
endif()
endif()
endif()

# if (NO_USE_MPI)
# message(STATUS "→ MPI is manually disabled")
# else()
# find_package(MPI)
# if (MPI_FOUND)
# message(STATUS "✓ MPI found")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
# target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
# else()
# message(STATUS "✗ MPI not found")
# endif()
# endif()
#
# if (NO_MULTI_GPU)
# message(STATUS "→ Multi-GPU (NCCL) is manually disabled")
# else()
# find_package(NCCL)
# if (NCCL_FOUND)
# message(STATUS "✓ NCCL found, OK to train with multiple GPUs")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DMULTI_GPU)
# target_link_libraries(train_gpt2_cuda PRIVATE NCCL::NCCL_C)
# else()
# message(STATUS "✗ NCCL is not found, disabling multi-GPU support")
# endif()
# endif()
Loading