Skip to content

Commit

Permalink
Use pthreads on Linux to avoid depending on GLIBCXX_3.4.29
Browse files Browse the repository at this point in the history
This is a partial revert of 332530b.
  • Loading branch information
akx committed Mar 7, 2024
1 parent 87e029b commit a8025b8
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ CMakeFiles/
bitsandbytes.dir/
Debug/
Release/
csrc/config.h

# IDE local files
.vs/
Expand Down
11 changes: 9 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,22 @@ list(APPEND SRC_FILES ${CPP_FILES})

set(COMPUTE_BACKEND "cpu" CACHE STRING "The compute backend to use (cpu, cuda, mps)")
set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda mps)

set(BNB_USE_STD_THREADS OFF CACHE BOOL "Use std::thread for parallelism")

option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)

if(APPLE)
set(CMAKE_OSX_DEPLOYMENT_TARGET 13.1)
endif()

if(WIN32)
set(BNB_USE_STD_THREADS ON) # no pthread on Windows
endif()

set(BNB_OUTPUT_NAME "bitsandbytes")

message(STATUS "Configuring ${PROJECT_NAME} (Backend: ${COMPUTE_BACKEND})")
message(STATUS "Configuring ${PROJECT_NAME} (Backend: ${COMPUTE_BACKEND}, std::thread: ${BNB_USE_STD_THREADS})")

if(${COMPUTE_BACKEND} STREQUAL "cuda")
if(APPLE)
Expand Down Expand Up @@ -188,7 +195,7 @@ set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
add_library(bitsandbytes SHARED ${SRC_FILES})
target_compile_features(bitsandbytes PUBLIC cxx_std_14)
target_include_directories(bitsandbytes PUBLIC csrc include)

configure_file(csrc/config.h.in "csrc/config.h")

if(BUILD_CUDA)
target_include_directories(bitsandbytes PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
Expand Down
1 change: 1 addition & 0 deletions csrc/config.h.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#cmakedefine BNB_USE_STD_THREADS
28 changes: 28 additions & 0 deletions csrc/cpu_ops.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
#include <BinSearch.h>
#include <common.h>
#include "config.h"

#ifdef BNB_USE_STD_THREADS
#include <thread>
#else
#include <pthread.h>

// Wrapper for `pthread_start` to match the signature of `std::thread`
static void* quantize_block_w(void* arg) {
quantize_block(*(quantize_block_args*)arg);
return nullptr;
}
#endif

using namespace BinSearch;

Expand Down Expand Up @@ -31,7 +43,11 @@ void quantize_cpu(float *code, float *A, float *absmax, unsigned char *out, long
for(long long offset = 0; offset < num_blocks; offset+=thread_wave_size)
{
long long valid_chunks = num_blocks - offset >= thread_wave_size ? thread_wave_size : num_blocks - offset;
#ifdef BNB_USE_STD_THREADS
std::vector<std::thread> threads(valid_chunks);
#else
std::vector<pthread_t> threads(valid_chunks);
#endif
std::vector<quantize_block_args> args(valid_chunks);

int chunks_processed = 0;
Expand All @@ -51,13 +67,25 @@ void quantize_cpu(float *code, float *A, float *absmax, unsigned char *out, long
arg.threadidx = block_idx / blocksize;
arg.blocksize = blocksize;

#ifdef BNB_USE_STD_THREADS
threads[chunks_processed] = std::thread([arg] { quantize_block(arg); });
#else
pthread_create(&threads[chunks_processed], NULL, quantize_block_w, &arg);
// TODO: handle error from pthread_create
#endif
chunks_processed += 1;
if(chunks_processed == valid_chunks){ break; }
}

for (int i = 0; i < valid_chunks; i++)
{
#ifdef BNB_USE_STD_THREADS
threads[i].join();
#else
int err = pthread_join(threads[i], NULL);
#endif
}

}

}

0 comments on commit a8025b8

Please sign in to comment.