Skip to content

Commit

Permalink
Merge pull request #5 from junaire/jun/cuda
Browse files Browse the repository at this point in the history
Patch CUDA training logic to enable wasmedge-llmc gpu backend
  • Loading branch information
hydai authored Sep 11, 2024
2 parents fa07917 + a41ee03 commit abbf08f
Show file tree
Hide file tree
Showing 3 changed files with 2,692 additions and 1,674 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,29 @@ jobs:
cd build
cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
cmake --build . -j$(nproc)
build-CUDA:
strategy:
matrix:
target:
- 'Debug'
- 'Release'
runs-on: ubuntu-22.04
container:
image: nvidia/cuda:12.6.1-cudnn-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: System Info
run: |
nvcc --version
g++ --version
- name: Build
run: |
ln -snf /usr/share/zoneinfo/Africa/Abidjan /etc/localtime && echo Africa/Abidjan > /etc/timezone
apt-get update && apt-get install -y git cmake
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=${{ matrix.target }} .. -DCUDALIB=1
cmake --build . -j$(nproc)
123 changes: 66 additions & 57 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,32 +1,39 @@
cmake_minimum_required(VERSION 3.15)
project(llm.c LANGUAGES C)
# project(llm.c LANGUAGES C CXX CUDA)

# Put binaries and libraries in the same location.
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
option(CUDALIB "Enable CUDA training" OFF)

set(PRECISION "BF16" CACHE STRING "Precision Settings")

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

# Always export compile_commands.json for lsp like clangd.
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

# We don't support this compiler.
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
message(FATAL_ERROR "This compiler is not supported")
endif()

if (CUDALIB)
project(llm.c LANGUAGES C CXX CUDA)
else()
project(llm.c LANGUAGES C)
endif()

# Put binaries and libraries in the same location.
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)

# Release by default if not specified.
if (NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
endif()
endif()

# option(PRECISION "Precision settings" BF16)
# option(USE_CUDNN "Use cudnn" ON)

add_library(train_gpt2_cpu train_gpt2.c)
target_include_directories(train_gpt2_cpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
target_link_libraries(train_gpt2_cpu PRIVATE m)
Expand All @@ -44,51 +51,53 @@ else()
endif()
target_compile_options(train_gpt2_cpu PRIVATE -Ofast -Wno-unused-result -Wno-ignored-pragmas -Wno-unknown-attributes -march=native)

# set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
# add_library(train_gpt2_cuda SHARED train_gpt2.cu llmc/cudnn_att.cpp)
# target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
# target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
# set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
# if (PRECISION EQUAL "FP32")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
# elseif(PRECISION EQUAL "FP16")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
# else()
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
# endif()


# Disable cudnn for now, it has soem bugs in its cmake.
# if (USE_CUDNN)
# include(FetchContent)
# FetchContent_Declare(cudnn-frontend URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.5.2.tar.gz)
# FetchContent_MakeAvailable(cudnn-frontend)
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
# target_link_libraries(train_gpt2_cuda PRIVATE cudnn)
# endif()

# if (NO_USE_MPI)
# message(STATUS "→ MPI is manually disabled")
# else()
# find_package(MPI)
# if (MPI_FOUND)
# message(STATUS "✓ MPI found")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
# target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
# else()
# message(STATUS "✗ MPI not found")
# endif()
# endif()
#
# if (NO_MULTI_GPU)
# message(STATUS "→ Multi-GPU (NCCL) is manually disabled")
# else()
# find_package(NCCL)
# if (NCCL_FOUND)
# message(STATUS "✓ NCCL found, OK to train with multiple GPUs")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DMULTI_GPU)
# target_link_libraries(train_gpt2_cuda PRIVATE NCCL::NCCL_C)
# else()
# message(STATUS "✗ NCCL is not found, disabling multi-GPU support")
# endif()
# endif()
# Training GPT2 with CUDA.
if (CUDALIB)
set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
add_library(train_gpt2_cuda train_gpt2.cu llmc/cudnn_att.cpp)
target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
target_compile_definitions(train_gpt2_cuda PRIVATE -DLLMC_LIB=1)
set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
set_target_properties(train_gpt2_cuda PROPERTIES CUDA_ARCHITECTURES "72;80")

if (PRECISION STREQUAL "FP32")
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
elseif(PRECISION STREQUAL "FP16")
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
else()
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
endif()

set(CUDNN_FRONTEND_BUILD_SAMPLES OFF)
set(CUDNN_FRONTEND_BUILD_UNIT_TESTS OFF)
message(STATUS "Fetching cudnn-frontend")
include(FetchContent)
FetchContent_Declare(
cf
URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.6.1.tar.gz
URL_HASH MD5=c131914d8007318ec7b5b5f792458cb4
)
FetchContent_MakeAvailable(cf)
FetchContent_GetProperties(cf)
target_include_directories(train_gpt2_cuda PRIVATE ${cf_SOURCE_DIR}/include)
target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
target_link_libraries(train_gpt2_cuda PRIVATE cudnn)

find_package(CUDAToolkit REQUIRED)
target_link_libraries(train_gpt2_cuda PRIVATE CUDA::cublas CUDA::cublasLt CUDA::cudart CUDA::nvrtc)

if (NO_USE_MPI)
message(STATUS "→ MPI is manually disabled")
else()
find_package(MPI)
if (MPI_FOUND)
message(STATUS "✓ MPI found")
target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
else()
message(STATUS "✗ MPI not found")
endif()
endif()
endif()

Loading

0 comments on commit abbf08f

Please sign in to comment.