oobabooga · oobabooga · Oct 25, 2024 · Oct 1, 2024 · Oct 3, 2024 · Oct 5, 2024
diff --git a/docker/.env.example b/docker/.env.example
@@ -1,6 +1,7 @@
 # by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX
 # however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5
 # https://developer.nvidia.com/cuda-gpus you can find the version for your card here
+# Or for a programatic approach run `nvidia-smi --query-gpu=name,compute_cap --format=csv`
 TORCH_CUDA_ARCH_LIST=7.5
 # the port the webui binds to on the host
 HOST_PORT=7860

diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py
@@ -96,7 +96,7 @@ def ui():
             with gr.Accordion("Settings", open=False):
                 auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
                 device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"])
-                whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"])
+                whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "turbo"])
                 whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
 
     audio.change(

diff --git a/instruction-templates/RWKV-World.yaml b/instruction-templates/RWKV-World.yaml
@@ -0,0 +1,25 @@
+instruction_template: |-
+  {%- set ns = namespace(found=false) -%}
+  {%- for message in messages -%}
+      {%- if message['role'] == 'system' -%}
+          {%- set ns.found = true -%}
+      {%- endif -%}
+  {%- endfor -%}
+  {%- if not ns.found -%}
+      {{- '' + '' + '' -}}
+  {%- endif %}
+  {%- for message in messages %}
+      {%- if message['role'] == 'system' -%}
+          {{- '' + message['content'] + '' -}}
+      {%- else -%}
+          {%- if message['role'] == 'user' -%}
+              {{-'User: ' + message['content'] + '\n\n'-}}
+          {%- else -%}
+              {{-'Assistant: ' + message['content'] + '\n\n' -}}
+          {%- endif -%}
+      {%- endif -%}
+  {%- endfor -%}
+  {%- if add_generation_prompt -%}
+      {{-'Assistant:'-}}
+  {%- endif -%}
+
diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py
@@ -9,10 +9,11 @@
 from modules.cache_utils import process_llamacpp_cache
 
 imported_module = None
+not_available_modules = set()
 
 
 def llama_cpp_lib():
-    global imported_module
+    global imported_module, not_available_modules
 
     # Determine the platform
     is_macos = platform.system() == 'Darwin'
@@ -31,6 +32,9 @@ def llama_cpp_lib():
         ]
 
     for arg, lib_name in lib_names:
+        if lib_name in not_available_modules:
+            continue
+
         should_import = (arg is None or getattr(shared.args, arg))
 
         if should_import:
@@ -44,6 +48,7 @@ def llama_cpp_lib():
                 monkey_patch_llama_cpp_python(return_lib)
                 return return_lib
             except ImportError:
+                not_available_modules.add(lib_name)
                 continue
 
     return None

diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py
@@ -136,7 +136,7 @@ def generate(self, prompt, state, callback=None):
             prompt=prompt,
             max_tokens=state['max_new_tokens'],
             temperature=state['temperature'],
-            top_p=state['top_p'],
+            top_p=state['top_p'] if state['top_p'] < 1 else 0.999,
             min_p=state['min_p'],
             typical_p=state['typical_p'],
             frequency_penalty=state['frequency_penalty'],

diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py
@@ -454,7 +454,7 @@ def get_logits_processor_patch(self, **kwargs):
             )
 
         # Stuff we don't need
-        elif warpers[i].__class__.__name__ in ['SuppressTokensLogitsProcessor', 'RepetitionPenaltyLogitsProcessor']:
+        elif warpers[i].__class__.__name__ in ['RepetitionPenaltyLogitsProcessor']:
             del warpers[i]
 
     # Add custom warpers
@@ -571,11 +571,10 @@ def get_logits_processor_patch(self, **kwargs):
     if generation_config.temperature_last:
         for param_name in ['temperature', 'dynamic_temperature', 'quadratic_sampling']:
             if param_name in sampler_priority:
-                if param_name in sampler_priority:
-                    index = sampler_priority.index(param_name)
-                    sampler_priority.append(sampler_priority.pop(index))
-                else:
-                    sampler_priority.append(param_name)
+                index = sampler_priority.index(param_name)
+                sampler_priority.append(sampler_priority.pop(index))
+            else:
+                sampler_priority.append(param_name)
 
     class_name_to_nickname = {
         'DynamicTemperatureLogitsWarper': 'dynamic_temperature',

diff --git a/one_click.py b/one_click.py
@@ -189,8 +189,8 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False,
             conda_sh_path = os.path.join(script_dir, "installer_files", "conda", "etc", "profile.d", "conda.sh")
             cmd = f'. "{conda_sh_path}" && conda activate "{conda_env_path}" && {cmd}'
 
-    # Set executable to None for Windows, /bin/bash for everything else
-    executable = None if is_windows() else '/bin/bash'
+    # Set executable to None for Windows, bash for everything else
+    executable = None if is_windows() else 'bash'
 
     # Run shell commands
     result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env, executable=executable)
@@ -313,7 +313,7 @@ def install_webui():
     if selected_gpu == "INTEL":
         # Install oneAPI dependencies via conda
         print_big_message("Installing Intel oneAPI runtime libraries.")
-        run_cmd("conda install -y -c intel dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0")
+        run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0")
         # Install libuv required by Intel-patched torch
         run_cmd("conda install -y libuv")
 
@@ -329,7 +329,7 @@ def install_extensions_requirements():
     print_big_message("Installing extensions requirements.\nSome of these may fail on Windows.\nDon\'t worry if you see error messages, as they will not affect the main program.")
     extensions = get_extensions_names()
     for i, extension in enumerate(extensions):
-        print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n")
+        print(f"\n\n--- [{i + 1}/{len(extensions)}]: {extension}\n\n")
         extension_req_path = os.path.join("extensions", extension, "requirements.txt")
         run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,16 +1,14 @@
-accelerate==0.33.*
+accelerate==1.0.*
 bitsandbytes==0.44.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -23,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_amd.txt b/requirements_amd.txt
@@ -1,15 +1,13 @@
-accelerate==0.33.*
+accelerate==1.0.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -22,7 +20,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
@@ -1,15 +1,13 @@
-accelerate==0.33.*
+accelerate==1.0.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -22,7 +20,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
@@ -1,15 +1,13 @@
-accelerate==0.33.*
+accelerate==1.0.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -22,7 +20,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
@@ -1,15 +1,13 @@
-accelerate==0.33.*
+accelerate==1.0.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -22,7 +20,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
@@ -1,15 +1,13 @@
-accelerate==0.33.*
+accelerate==1.0.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -22,7 +20,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
@@ -1,15 +1,13 @@
-accelerate==0.33.*
+accelerate==1.0.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -22,7 +20,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
@@ -1,16 +1,14 @@
-accelerate==0.33.*
+accelerate==1.0.*
 bitsandbytes==0.44.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -23,7 +21,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb
 

diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
@@ -1,15 +1,13 @@
-accelerate==0.33.*
+accelerate==1.0.*
 colorama
 datasets
 einops
 fastapi==0.112.4
 gradio==4.26.*
 jinja2==3.1.4
-lm_eval==0.3.0
 markdown
 numba==0.59.*
 numpy==1.26.*
-optimum==1.17.*
 pandas
 peft==0.12.*
 Pillow>=9.5.0
@@ -22,7 +20,7 @@ safetensors==0.4.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.45.*
+transformers==4.46.*
 tqdm
 wandb