Skip to content

Commit

Permalink
[refactor] Make llm.c modular
Browse files Browse the repository at this point in the history
Signed-off-by: Jun Zhang <[email protected]>
  • Loading branch information
junaire committed Aug 1, 2024
1 parent bdb0fb5 commit 7147edc
Show file tree
Hide file tree
Showing 5 changed files with 1,328 additions and 1,022 deletions.
65 changes: 65 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: Cmake
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build-Clang-Linux:
strategy:
matrix:
target:
- 'Debug'
- 'Release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: install clang
run: |
sudo apt update && sudo apt install build-essential software-properties-common clang clang++
- name: ccache
uses: hendrikmuhs/[email protected]
- name: Build
run: |
echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> $GITHUB_PATH
mkdir build
cd build
cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
cmake --build . -j$(nproc)
ctest . -j$(nproc)
build-GCC-Linux:
strategy:
matrix:
target:
- 'Debug'
- 'Release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: install GCC
run: |
sudo apt update && sudo apt install build-essential software-properties-common
- name: ccache
uses: hendrikmuhs/[email protected]
- name: Build
run: |
echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> $GITHUB_PATH
mkdir build
cd build
cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
cmake --build . -j$(nproc)
build-windows:
strategy:
matrix:
target:
- 'Debug'
- 'Release'
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- name: Build
run: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=${{ matrix.target }} ..
cmake --build . -j $Env:NUMBER_OF_PROCESSORS
91 changes: 91 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
cmake_minimum_required(VERSION 3.24)
project(llm.c LANGUAGES C CXX CUDA)

# Put binaries and libraries in the same location.
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)

# Always export compile_commands.json for lsp like clangd.
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Release by default if not specified.
if (NOT EXISTS ${CMAKE_BINARY_DIR}/CMakeCache.txt)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
endif()
endif()

# option(PRECISION "Precision settings" BF16)
# option(USE_CUDNN "Use cudnn" ON)

add_library(train_gpt2_cpu SHARED train_gpt2.c)
target_include_directories(train_gpt2_cpu PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
target_link_libraries(train_gpt2_cpu PRIVATE m)
target_compile_definitions(train_gpt2_cpu PRIVATE -DLLMC_LIB=1)
if (NO_OMP)
message(STATUS "OpenMP is manually disabled")
else()
find_package(OpenMP)
if (OpenMP_FOUND)
message(STATUS "✓ OpenMP found")
target_link_libraries(train_gpt2_cpu PRIVATE OpenMP::OpenMP_C)
else()
message(STATUS "✗ OpenMP not found")
endif()
endif()
if (MSVC)
target_include_directories(train_gpt2_cpu PUBLIC dev)
target_compile_options(train_gpt2_cpu PRIVATE /Zi /nologo /W4 /WX- /diagnostics:column /sdl /O2 /Oi /Ot /GL /D _DEBUG /D _CONSOLE /D _UNICODE /D UNICODE /Gm- /EHsc /MD /GS /Gy /fp:fast /Zc:wchar_t /Zc:forScope /Zc:inline /permissive- /external:W3 /Gd /TP /wd4996 /FC /openmp:llvm)
else()
target_compile_options(train_gpt2_cpu PRIVATE -Ofast -Wno-unused-result -Wno-ignored-pragmas -Wno-unknown-attributes -march=native)
endif()

# set_source_files_properties(llmc/cudnn_att.cpp PROPERTIES LANGUAGE CUDA)
# add_library(train_gpt2_cuda SHARED train_gpt2.cu llmc/cudnn_att.cpp)
# target_include_directories(train_gpt2_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/llmc)
# target_compile_options(train_gpt2_cuda PRIVATE -O3 -t=0 --use_fast_math)
# set_target_properties(train_gpt2_cuda PROPERTIES CXX_STANDARD 17)
# if (PRECISION EQUAL "FP32")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP32)
# elseif(PRECISION EQUAL "FP16")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_FP16)
# else()
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_BF16)
# endif()


# Disable cudnn for now, it has soem bugs in its cmake.
# if (USE_CUDNN)
# include(FetchContent)
# FetchContent_Declare(cudnn-frontend URL https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.5.2.tar.gz)
# FetchContent_MakeAvailable(cudnn-frontend)
# target_compile_definitions(train_gpt2_cuda PRIVATE -DENABLE_CUDNN)
# target_link_libraries(train_gpt2_cuda PRIVATE cudnn)
# endif()

# if (NO_USE_MPI)
# message(STATUS "→ MPI is manually disabled")
# else()
# find_package(MPI)
# if (MPI_FOUND)
# message(STATUS "✓ MPI found")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DUSE_MPI)
# target_link_libraries(train_gpt2_cuda PRIVATE MPI::MPI_C)
# else()
# message(STATUS "✗ MPI not found")
# endif()
# endif()
#
# if (NO_MULTI_GPU)
# message(STATUS "→ Multi-GPU (NCCL) is manually disabled")
# else()
# find_package(NCCL)
# if (NCCL_FOUND)
# message(STATUS "✓ NCCL found, OK to train with multiple GPUs")
# target_compile_definitions(train_gpt2_cuda PRIVATE -DMULTI_GPU)
# target_link_libraries(train_gpt2_cuda PRIVATE NCCL::NCCL_C)
# else()
# message(STATUS "✗ NCCL is not found, disabling multi-GPU support")
# endif()
# endif()
21 changes: 20 additions & 1 deletion llmc/dataloader.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,19 @@ void dataloader_init(DataLoader *loader,
dataloader_reset(loader);
}

DataLoader* dataloader_create(
const char* filename_pattern,
size_t B,
size_t T,
int process_rank,
int num_processes,
int should_shuffle) {
DataLoader* loader = (DataLoader*)mallocCheck(sizeof(DataLoader));
dataloader_init(loader, filename_pattern, B, T, process_rank, num_processes, should_shuffle);
return loader;

}

void dataloader_load_batch(DataLoader* loader) {
assert(!loader->should_shuffle || (loader->should_shuffle && loader->intra_shard_indices != NULL));
assert(loader->current_sample_idx < loader->shard_num_samples);
Expand Down Expand Up @@ -248,6 +261,12 @@ void dataloader_free(DataLoader *loader) {
globfree(&loader->glob_result);
}

void dataloader_destroy(DataLoader* loader) {
dataloader_free(loader);
free(loader);
loader = NULL;
}

// ----------------------------------------------------------------------------
// Distributed Eval Loader
// Many evals (like) HellaSwag and MMLU are multiple-choice
Expand Down Expand Up @@ -511,4 +530,4 @@ void evalloader_free(EvalLoader *loader) {
fcloseCheck(loader->eval_file);
}

#endif // DATALOADER_H
#endif // DATALOADER_H
12 changes: 12 additions & 0 deletions llmc/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ void tokenizer_init(Tokenizer *tokenizer, const char *filename) {
tokenizer->init_ok = 1;
}

Tokenizer* tokenizer_create(const char *filename) {
Tokenizer* tokenizer = (Tokenizer*)mallocCheck(sizeof(Tokenizer));
tokenizer_init(tokenizer, filename);
return tokenizer;
}

const char *tokenizer_decode(Tokenizer *tokenizer, uint32_t token_id) {
if (tokenizer->init_ok == 0) {
return NULL;
Expand All @@ -103,3 +109,9 @@ void tokenizer_free(Tokenizer *tokenizer) {
free(tokenizer->token_table);
}
}

void tokenizer_destroy(Tokenizer* tokenizer) {
tokenizer_free(tokenizer);
free(tokenizer);
tokenizer = NULL;
}
Loading

0 comments on commit 7147edc

Please sign in to comment.