diff --git a/docker/.env.example b/docker/.env.example index 2de9f0ab6f..bd0f8bcc5e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -1,6 +1,7 @@ # by default the Dockerfile specifies these versions: 3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX # however for me to work i had to specify the exact version for my card ( 2060 ) it was 7.5 # https://developer.nvidia.com/cuda-gpus you can find the version for your card here +# Or for a programatic approach run `nvidia-smi --query-gpu=name,compute_cap --format=csv` TORCH_CUDA_ARCH_LIST=7.5 # the port the webui binds to on the host HOST_PORT=7860 diff --git a/extensions/whisper_stt/script.py b/extensions/whisper_stt/script.py index e45c8b1e7c..d949e93f73 100644 --- a/extensions/whisper_stt/script.py +++ b/extensions/whisper_stt/script.py @@ -96,7 +96,7 @@ def ui(): with gr.Accordion("Settings", open=False): auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit']) device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"]) - whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large"]) + whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "turbo"]) whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"]) audio.change( diff --git a/instruction-templates/RWKV-World.yaml b/instruction-templates/RWKV-World.yaml new file mode 100644 index 0000000000..bf65511b8e --- /dev/null +++ b/instruction-templates/RWKV-World.yaml @@ -0,0 +1,25 @@ +instruction_template: |- + {%- set ns = namespace(found=false) -%} + {%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- set ns.found = true -%} + {%- endif -%} + {%- endfor -%} + {%- if not ns.found -%} + {{- '' + '' + '' -}} + {%- endif %} + {%- for message in messages %} + {%- if message['role'] == 'system' -%} + {{- '' + message['content'] + '' -}} + {%- else -%} + {%- if message['role'] == 'user' -%} + {{-'User: ' + message['content'] + '\n\n'-}} + {%- else -%} + {{-'Assistant: ' + message['content'] + '\n\n' -}} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {%- if add_generation_prompt -%} + {{-'Assistant:'-}} + {%- endif -%} + diff --git a/modules/llama_cpp_python_hijack.py b/modules/llama_cpp_python_hijack.py index 2a9c10da2e..f3872a7446 100644 --- a/modules/llama_cpp_python_hijack.py +++ b/modules/llama_cpp_python_hijack.py @@ -9,10 +9,11 @@ from modules.cache_utils import process_llamacpp_cache imported_module = None +not_available_modules = set() def llama_cpp_lib(): - global imported_module + global imported_module, not_available_modules # Determine the platform is_macos = platform.system() == 'Darwin' @@ -31,6 +32,9 @@ def llama_cpp_lib(): ] for arg, lib_name in lib_names: + if lib_name in not_available_modules: + continue + should_import = (arg is None or getattr(shared.args, arg)) if should_import: @@ -44,6 +48,7 @@ def llama_cpp_lib(): monkey_patch_llama_cpp_python(return_lib) return return_lib except ImportError: + not_available_modules.add(lib_name) continue return None diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index a16230caf3..96f7ed56b5 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -136,7 +136,7 @@ def generate(self, prompt, state, callback=None): prompt=prompt, max_tokens=state['max_new_tokens'], temperature=state['temperature'], - top_p=state['top_p'], + top_p=state['top_p'] if state['top_p'] < 1 else 0.999, min_p=state['min_p'], typical_p=state['typical_p'], frequency_penalty=state['frequency_penalty'], diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 87f0b25e8e..24dbcf2ee1 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -454,7 +454,7 @@ def get_logits_processor_patch(self, **kwargs): ) # Stuff we don't need - elif warpers[i].__class__.__name__ in ['SuppressTokensLogitsProcessor', 'RepetitionPenaltyLogitsProcessor']: + elif warpers[i].__class__.__name__ in ['RepetitionPenaltyLogitsProcessor']: del warpers[i] # Add custom warpers @@ -571,11 +571,10 @@ def get_logits_processor_patch(self, **kwargs): if generation_config.temperature_last: for param_name in ['temperature', 'dynamic_temperature', 'quadratic_sampling']: if param_name in sampler_priority: - if param_name in sampler_priority: - index = sampler_priority.index(param_name) - sampler_priority.append(sampler_priority.pop(index)) - else: - sampler_priority.append(param_name) + index = sampler_priority.index(param_name) + sampler_priority.append(sampler_priority.pop(index)) + else: + sampler_priority.append(param_name) class_name_to_nickname = { 'DynamicTemperatureLogitsWarper': 'dynamic_temperature', diff --git a/one_click.py b/one_click.py index 9bad25d1be..8fc1edf0cf 100644 --- a/one_click.py +++ b/one_click.py @@ -189,8 +189,8 @@ def run_cmd(cmd, assert_success=False, environment=False, capture_output=False, conda_sh_path = os.path.join(script_dir, "installer_files", "conda", "etc", "profile.d", "conda.sh") cmd = f'. "{conda_sh_path}" && conda activate "{conda_env_path}" && {cmd}' - # Set executable to None for Windows, /bin/bash for everything else - executable = None if is_windows() else '/bin/bash' + # Set executable to None for Windows, bash for everything else + executable = None if is_windows() else 'bash' # Run shell commands result = subprocess.run(cmd, shell=True, capture_output=capture_output, env=env, executable=executable) @@ -313,7 +313,7 @@ def install_webui(): if selected_gpu == "INTEL": # Install oneAPI dependencies via conda print_big_message("Installing Intel oneAPI runtime libraries.") - run_cmd("conda install -y -c intel dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") + run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") # Install libuv required by Intel-patched torch run_cmd("conda install -y libuv") @@ -329,7 +329,7 @@ def install_extensions_requirements(): print_big_message("Installing extensions requirements.\nSome of these may fail on Windows.\nDon\'t worry if you see error messages, as they will not affect the main program.") extensions = get_extensions_names() for i, extension in enumerate(extensions): - print(f"\n\n--- [{i+1}/{len(extensions)}]: {extension}\n\n") + print(f"\n\n--- [{i + 1}/{len(extensions)}]: {extension}\n\n") extension_req_path = os.path.join("extensions", extension, "requirements.txt") run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True) diff --git a/requirements.txt b/requirements.txt index 5623f1f7ac..2549c64864 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==1.0.* bitsandbytes==0.44.* colorama datasets @@ -6,11 +6,9 @@ einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -23,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index d203c157c2..78bdd3ca84 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,15 +1,13 @@ -accelerate==0.33.* +accelerate==1.0.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -22,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 3a7a950d54..9420e861f8 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,15 +1,13 @@ -accelerate==0.33.* +accelerate==1.0.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -22,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 61c5367ffb..625021ee1f 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,15 +1,13 @@ -accelerate==0.33.* +accelerate==1.0.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -22,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 494c127afe..3cb66cbc81 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,15 +1,13 @@ -accelerate==0.33.* +accelerate==1.0.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -22,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 5838ac47c2..fbd6447bdf 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,15 +1,13 @@ -accelerate==0.33.* +accelerate==1.0.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -22,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 97d4eb9063..e9ab0fbad3 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,15 +1,13 @@ -accelerate==0.33.* +accelerate==1.0.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -22,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index d34a598d65..99791ea9b4 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==0.33.* +accelerate==1.0.* bitsandbytes==0.44.* colorama datasets @@ -6,11 +6,9 @@ einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -23,7 +21,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 5c840f56de..f5c3966eb3 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,15 +1,13 @@ -accelerate==0.33.* +accelerate==1.0.* colorama datasets einops fastapi==0.112.4 gradio==4.26.* jinja2==3.1.4 -lm_eval==0.3.0 markdown numba==0.59.* numpy==1.26.* -optimum==1.17.* pandas peft==0.12.* Pillow>=9.5.0 @@ -22,7 +20,7 @@ safetensors==0.4.* scipy sentencepiece tensorboard -transformers==4.45.* +transformers==4.46.* tqdm wandb