From d8da97b6922c15f363357f2b7ba580432ddd3eb4 Mon Sep 17 00:00:00 2001 From: dhuangnm <74931910+dhuangnm@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:13:43 -0400 Subject: [PATCH] Add githash to nm-vllm (#299) Add git hash information to nm-vllm: ``` >>> import vllm >>> vllm.githash() '106796861914146372aba9386aeff9361edfb34d' ``` --------- Co-authored-by: dhuangnm --- CMakeLists.txt | 10 +++++++--- cmake/dep.cmake | 6 ++++++ collect_env.py | 13 +++++++++++++ csrc/cpu/torch_bindings.cpp | 7 +++++++ csrc/torch_bindings.cpp | 7 +++++++ vllm/__init__.py | 6 ++++++ 6 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 cmake/dep.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index ad6736c47f459..fab73ebc4a101 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") message(STATUS "Target device: ${VLLM_TARGET_DEVICE}") include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake) +include(${CMAKE_CURRENT_LIST_DIR}/cmake/dep.cmake) # # Supported python versions. These versions will be searched in order, the @@ -206,7 +207,8 @@ define_gpu_extension_target( ARCHITECTURES ${VLLM_GPU_ARCHES} INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR} USE_SABI 3 - WITH_SOABI) + WITH_SOABI + LIBRARIES cmake_git_version_tracking) # # _moe_C extension @@ -224,7 +226,8 @@ define_gpu_extension_target( COMPILE_FLAGS ${VLLM_GPU_FLAGS} ARCHITECTURES ${VLLM_GPU_ARCHES} USE_SABI 3 - WITH_SOABI) + WITH_SOABI + LIBRARIES cmake_git_version_tracking) # # _punica_C extension @@ -276,7 +279,8 @@ if (VLLM_PUNICA_GPU_ARCHES) COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS} ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES} USE_SABI 3 - WITH_SOABI) + WITH_SOABI + LIBRARIES cmake_git_version_tracking) else() message(WARNING "Unable to create _punica_C target because none of the " "requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0") diff --git a/cmake/dep.cmake b/cmake/dep.cmake new file mode 100644 index 0000000000000..006fa6904aac7 --- /dev/null +++ b/cmake/dep.cmake @@ -0,0 +1,6 @@ +include(FetchContent) +FetchContent_Declare(cmake_git_version_tracking + GIT_REPOSITORY https://github.com/andrew-hardin/cmake-git-version-tracking.git + GIT_TAG 6c0cb87edd029ddfb403a8e24577c144a03605a6 +) +FetchContent_MakeAvailable(cmake_git_version_tracking) diff --git a/collect_env.py b/collect_env.py index a1fee64d39e8a..0dbeaf5f8ad66 100644 --- a/collect_env.py +++ b/collect_env.py @@ -15,6 +15,8 @@ try: import torch TORCH_AVAILABLE = True + installed_path = os.path.dirname(torch.__file__) + sys.path.insert(0, os.path.dirname(installed_path)) except (ImportError, NameError, AttributeError, OSError): TORCH_AVAILABLE = False @@ -22,6 +24,7 @@ SystemEnv = namedtuple( 'SystemEnv', [ + 'vllm_git_hash', 'torch_version', 'is_debug_build', 'cuda_compiled_version', @@ -140,6 +143,14 @@ def get_conda_packages(run_lambda, patterns=None): for name in patterns)) +def get_vllm_git_hash(): + try: + import vllm + return vllm.githash() + except ImportError: + return None + + def get_gcc_version(run_lambda): return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)') @@ -538,6 +549,7 @@ def get_version_or_na(cfg, prefix): gpu_topo = get_gpu_topo(run_lambda) return SystemEnv( + vllm_git_hash=get_vllm_git_hash(), torch_version=version_str, is_debug_build=debug_mode_str, python_version='{} ({}-bit runtime)'.format( @@ -614,6 +626,7 @@ def get_version_or_na(cfg, prefix): ROCM Version: {rocm_version} Neuron SDK Version: {neuron_sdk_version} vLLM Version: {vllm_version} +vLLM Git Hash: {vllm_git_hash} vLLM Build Flags: {vllm_build_flags} GPU Topology: diff --git a/csrc/cpu/torch_bindings.cpp b/csrc/cpu/torch_bindings.cpp index a2bf0d49adba5..c3fb4bc0b408c 100644 --- a/csrc/cpu/torch_bindings.cpp +++ b/csrc/cpu/torch_bindings.cpp @@ -2,11 +2,18 @@ #include "ops.h" #include "registration.h" +#include #include +std::string githash() { return std::string{git::CommitSHA1()}; } + TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { // vLLM custom ops + // Show vllm git hash + ops.def("githash", &githash); + ops.impl("githash", torch::kCPU, &githash); + // Attention ops // Compute the attention between an input query and the cached keys/values // using PagedAttention. diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp index df2603544c85a..c240360d6d203 100644 --- a/csrc/torch_bindings.cpp +++ b/csrc/torch_bindings.cpp @@ -3,8 +3,11 @@ #include "ops.h" #include "registration.h" +#include #include +std::string githash() { return std::string{git::CommitSHA1()}; } + // Note on op signatures: // The X_meta signatures are for the meta functions corresponding to op X. // They must be kept in sync with the signature for X. Generally, only @@ -18,6 +21,10 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { // vLLM custom ops + // Show vllm git hash + ops.def("githash", &githash); + ops.impl("githash", torch::kCUDA, &githash); + // Attention ops // Compute the attention between an input query and the cached // keys/values using PagedAttention. diff --git a/vllm/__init__.py b/vllm/__init__.py index b3fc5cc26f19b..84dc6399e83ae 100644 --- a/vllm/__init__.py +++ b/vllm/__init__.py @@ -16,6 +16,7 @@ __version__ = "0.5.0" __all__ = [ + "githash", "LLM", "ModelRegistry", "PromptStrictInputs", @@ -33,3 +34,8 @@ "initialize_ray_cluster", "PoolingParams", ] + + +def githash(): + import torch + return torch.ops._C.githash()