Merge branch 'main' of https://github.com/oobabooga/text-generation-w…

…ebui into Chinese
Touch-Night · Jul 14, 2024 · f1560ba · f1560ba
2 parents 01d67be + 0315122
commit f1560ba
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 60 deletions.
diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py
@@ -1,4 +1,5 @@
 import importlib
+import platform
 from typing import Sequence
 
 from tqdm import tqdm
@@ -13,58 +14,39 @@
 def llama_cpp_lib():
     global imported_module
 
-    def module_to_purpose(module_name):
-        if module_name == 'llama_cpp':
-            return 'CPU'
-        elif module_name == 'llama_cpp_cuda_tensorcores':
-            return 'tensorcores'
-        elif module_name == 'llama_cpp_cuda':
-            return 'default'
-
-        return 'unknown'
-
-    return_lib = None
-
-    if shared.args.cpu:
-        if imported_module and imported_module != 'llama_cpp':
-            raise Exception(f"当前已加载 {module_to_purpose(imported_module)} 版本的 llama-cpp-python。目前要切换到CPU版本需要重启服务器。")
-        try:
-            return_lib = importlib.import_module('llama_cpp')
-            imported_module = 'llama_cpp'
-        except:
-            pass
-
-    if shared.args.tensorcores and return_lib is None:
-        if imported_module and imported_module != 'llama_cpp_cuda_tensorcores':
-            raise Exception(f"当前已加载 {module_to_purpose(imported_module)} 版本的 llama-cpp-python。目前要切换到 tensorcores 版本需要重启服务器。")
-        try:
-            return_lib = importlib.import_module('llama_cpp_cuda_tensorcores')
-            imported_module = 'llama_cpp_cuda_tensorcores'
-        except:
-            pass
-
-    if return_lib is None:
-        if imported_module and imported_module != 'llama_cpp_cuda':
-            raise Exception(f"当前已加载 {module_to_purpose(imported_module)} 版本的 llama-cpp-python。目前要切换到默认版本需要重启服务器。")
-        try:
-            return_lib = importlib.import_module('llama_cpp_cuda')
-            imported_module = 'llama_cpp_cuda'
-        except:
-            pass
-
-    if return_lib is None and not shared.args.cpu:
-        if imported_module and imported_module != 'llama_cpp':
-            raise Exception(f"当前已加载 {module_to_purpose(imported_module)} 版本的 llama-cpp-python。目前要切换到CPU版本需要重启服务器。")
-        try:
-            return_lib = importlib.import_module('llama_cpp')
-            imported_module = 'llama_cpp'
-        except:
-            pass
-
-    if return_lib is not None:
-        monkey_patch_llama_cpp_python(return_lib)
-
-    return return_lib
+    # Determine the platform
+    is_macos = platform.system() == 'Darwin'
+
+    # Define the library names based on the platform
+    if is_macos:
+        lib_names = [
+            (None, 'llama_cpp')
+        ]
+    else:
+        lib_names = [
+            ('cpu', 'llama_cpp'),
+            ('tensorcores', 'llama_cpp_cuda_tensorcores'),
+            (None, 'llama_cpp_cuda'),
+            (None, 'llama_cpp')
+        ]
+
+    for arg, lib_name in lib_names:
+        should_import = (arg is None or getattr(shared.args, arg))
+
+        if should_import:
+            if imported_module and imported_module != lib_name:
+                # Conflict detected, raise an exception
+                raise Exception(f"由于`{imported_module}`已被导入，无法导入`{lib_name}`。目前要切换 llama-cpp-python 的变种需要重启服务器。")
+
+            try:
+                return_lib = importlib.import_module(lib_name)
+                imported_module = lib_name
+                monkey_patch_llama_cpp_python(return_lib)
+                return return_lib
+            except ImportError:
+                continue
+
+    return None
 
 
 def eval_with_progress(self, tokens: Sequence[int]):

diff --git a/requirements.txt b/requirements.txt
@@ -58,8 +58,8 @@ https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/downl
 https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
@@ -58,8 +58,8 @@ https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/downl
 https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://mirror.ghproxy.com/https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://mirror.ghproxy.com/https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://mirror.ghproxy.com/https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"