diff --git a/services/text_generation_webui.py b/services/text_generation_webui.py index f7ca181..97dd8ae 100644 --- a/services/text_generation_webui.py +++ b/services/text_generation_webui.py @@ -2,6 +2,7 @@ from core.stack import Stack class TextGenerationWebui(Stack): + def __init__(self): super().__init__( 'Text Generation', @@ -10,6 +11,8 @@ class TextGenerationWebui(Stack): 'https://github.com/oobabooga/text-generation-webui/' ) + self.exllama = "0.2.1" + def _install(self): # Install LlamaCpp from prebuilt self.pip_install("llama-cpp-python", env=["CMAKE_ARGS=\"-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS\""]) # cpu @@ -41,13 +44,19 @@ class TextGenerationWebui(Stack): # Fix llama trying to use cuda version self.remove_line_in_file("llama_cpp_cuda", "webui/modules/llama_cpp_python_hijack.py") - # Install useful packages + # Install ExLlamaV2 and auto-gptq self.pip_install( - "https://github.com/turboderp/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm6.1.torch2.4.0-cp310-cp310-linux_x86_64.whl") + f"https://github.com/turboderp/exllamav2/releases/download/v{self.exllama}/exllamav2-{self.exllama}+rocm6.1.torch2.4.0-cp310-cp310-linux_x86_64.whl") self.install_from_prebuilt("bitsandbytes") self.pip_install("auto-gptq", args=["--no-build-isolation", "--extra-index-url", "https://huggingface.github.io/autogptq-index/whl/rocm573/"]) + def _update(self): + self.pip_install( + f"https://github.com/turboderp/exllamav2/releases/download/v{self.exllama}/exllamav2-{self.exllama}+rocm6.1.torch2.4.0-cp310-cp310-linux_x86_64.whl") + self.pip_install("auto-gptq", args=["--no-build-isolation", "--extra-index-url", + "https://huggingface.github.io/autogptq-index/whl/rocm573/"]) + def _start(self): args = ["--listen", "--listen-port", str(self.port)] self.python(f"server.py", args=args, current_dir="webui",