From d8da97b6922c15f363357f2b7ba580432ddd3eb4 Mon Sep 17 00:00:00 2001
From: dhuangnm <74931910+dhuangnm@users.noreply.github.com>
Date: Wed, 19 Jun 2024 09:13:43 -0400
Subject: [PATCH] Add githash to nm-vllm (#299)

Add git hash information to nm-vllm:

```
>>> import vllm
>>> vllm.githash()
'106796861914146372aba9386aeff9361edfb34d'
```

---------

Co-authored-by: dhuangnm <dhuang@MacBook-Pro-2.local>
---
 CMakeLists.txt              | 10 +++++++---
 cmake/dep.cmake             |  6 ++++++
 collect_env.py              | 13 +++++++++++++
 csrc/cpu/torch_bindings.cpp |  7 +++++++
 csrc/torch_bindings.cpp     |  7 +++++++
 vllm/__init__.py            |  6 ++++++
 6 files changed, 46 insertions(+), 3 deletions(-)
 create mode 100644 cmake/dep.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad6736c47f459..fab73ebc4a101 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,7 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
 
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/cmake/dep.cmake)
 
 #
 # Supported python versions.  These versions will be searched in order, the
@@ -206,7 +207,8 @@ define_gpu_extension_target(
   ARCHITECTURES ${VLLM_GPU_ARCHES}
   INCLUDE_DIRECTORIES ${CUTLASS_INCLUDE_DIR};${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}
   USE_SABI 3
-  WITH_SOABI)
+  WITH_SOABI
+  LIBRARIES cmake_git_version_tracking)
 
 #
 # _moe_C extension
@@ -224,7 +226,8 @@ define_gpu_extension_target(
   COMPILE_FLAGS ${VLLM_GPU_FLAGS}
   ARCHITECTURES ${VLLM_GPU_ARCHES}
   USE_SABI 3
-  WITH_SOABI)
+  WITH_SOABI
+  LIBRARIES cmake_git_version_tracking)
 
 #
 # _punica_C extension
@@ -276,7 +279,8 @@ if (VLLM_PUNICA_GPU_ARCHES)
     COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
     ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
     USE_SABI 3
-    WITH_SOABI)
+    WITH_SOABI
+    LIBRARIES cmake_git_version_tracking)
 else()
   message(WARNING "Unable to create _punica_C target because none of the "
     "requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")
diff --git a/cmake/dep.cmake b/cmake/dep.cmake
new file mode 100644
index 0000000000000..006fa6904aac7
--- /dev/null
+++ b/cmake/dep.cmake
@@ -0,0 +1,6 @@
+include(FetchContent)
+FetchContent_Declare(cmake_git_version_tracking                   
+  GIT_REPOSITORY https://github.com/andrew-hardin/cmake-git-version-tracking.git
+  GIT_TAG 6c0cb87edd029ddfb403a8e24577c144a03605a6
+)
+FetchContent_MakeAvailable(cmake_git_version_tracking)
diff --git a/collect_env.py b/collect_env.py
index a1fee64d39e8a..0dbeaf5f8ad66 100644
--- a/collect_env.py
+++ b/collect_env.py
@@ -15,6 +15,8 @@
 try:
     import torch
     TORCH_AVAILABLE = True
+    installed_path = os.path.dirname(torch.__file__)
+    sys.path.insert(0, os.path.dirname(installed_path))
 except (ImportError, NameError, AttributeError, OSError):
     TORCH_AVAILABLE = False
 
@@ -22,6 +24,7 @@
 SystemEnv = namedtuple(
     'SystemEnv',
     [
+        'vllm_git_hash',
         'torch_version',
         'is_debug_build',
         'cuda_compiled_version',
@@ -140,6 +143,14 @@ def get_conda_packages(run_lambda, patterns=None):
                                                          for name in patterns))
 
 
+def get_vllm_git_hash():
+    try:
+        import vllm
+        return vllm.githash()
+    except ImportError:
+        return None
+
+
 def get_gcc_version(run_lambda):
     return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
 
@@ -538,6 +549,7 @@ def get_version_or_na(cfg, prefix):
     gpu_topo = get_gpu_topo(run_lambda)
 
     return SystemEnv(
+        vllm_git_hash=get_vllm_git_hash(),
         torch_version=version_str,
         is_debug_build=debug_mode_str,
         python_version='{} ({}-bit runtime)'.format(
@@ -614,6 +626,7 @@ def get_version_or_na(cfg, prefix):
 ROCM Version: {rocm_version}
 Neuron SDK Version: {neuron_sdk_version}
 vLLM Version: {vllm_version}
+vLLM Git Hash: {vllm_git_hash}
 vLLM Build Flags:
 {vllm_build_flags}
 GPU Topology:
diff --git a/csrc/cpu/torch_bindings.cpp b/csrc/cpu/torch_bindings.cpp
index a2bf0d49adba5..c3fb4bc0b408c 100644
--- a/csrc/cpu/torch_bindings.cpp
+++ b/csrc/cpu/torch_bindings.cpp
@@ -2,11 +2,18 @@
 #include "ops.h"
 #include "registration.h"
 
+#include <git.h>
 #include <torch/library.h>
 
+std::string githash() { return std::string{git::CommitSHA1()}; }
+
 TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   // vLLM custom ops
 
+  // Show vllm git hash
+  ops.def("githash", &githash);
+  ops.impl("githash", torch::kCPU, &githash);
+
   // Attention ops
   // Compute the attention between an input query and the cached keys/values
   // using PagedAttention.
diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp
index df2603544c85a..c240360d6d203 100644
--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@@ -3,8 +3,11 @@
 #include "ops.h"
 #include "registration.h"
 
+#include <git.h>
 #include <torch/library.h>
 
+std::string githash() { return std::string{git::CommitSHA1()}; }
+
 // Note on op signatures:
 // The X_meta signatures are for the meta functions corresponding to op X.
 // They must be kept in sync with the signature for X. Generally, only
@@ -18,6 +21,10 @@
 TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   // vLLM custom ops
 
+  // Show vllm git hash
+  ops.def("githash", &githash);
+  ops.impl("githash", torch::kCUDA, &githash);
+
   // Attention ops
   // Compute the attention between an input query and the cached
   // keys/values using PagedAttention.
diff --git a/vllm/__init__.py b/vllm/__init__.py
index b3fc5cc26f19b..84dc6399e83ae 100644
--- a/vllm/__init__.py
+++ b/vllm/__init__.py
@@ -16,6 +16,7 @@
 __version__ = "0.5.0"
 
 __all__ = [
+    "githash",
     "LLM",
     "ModelRegistry",
     "PromptStrictInputs",
@@ -33,3 +34,8 @@
     "initialize_ray_cluster",
     "PoolingParams",
 ]
+
+
+def githash():
+    import torch
+    return torch.ops._C.githash()