ai-suite-rocm-local/services/text_generation_webui.py

from core.stack import Stack


class TextGenerationWebui(Stack):
    def __init__(self):
        super().__init__(
            'Text Generation',
            'text_generation_webui',
            5000,
            'https://github.com/oobabooga/text-generation-webui/'
        )

    def _install(self):
        # Install LlamaCpp from prebuilt
        self.pip_install("llama-cpp-python", env=["CMAKE_ARGS=\"-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS\""])  # cpu

        # Install LlamaCpp for ROCM from source
        # self.pip_install("llama-cpp-python", env=["CMAKE_ARGS=\"-DGGML_HIPBLAS=on\" FORCE_CMAKE=1"]) # manual gpu (only works if whole rocm suite installed)
        # self.install_from_prebuilt("llama_cpp_python") # gpu (only works if whole rocm suite installed)

        # Install Triton for ROCM from prebuilt
        # self.install_from_prebuilt("triton")

        # Install Triton for ROCM from source
        # self.git_clone(url="https://github.com/ROCmSoftwarePlatform/triton.git")
        # self.pip_install(['ninja', 'cmake'])
        # self.pip("install -e .", path="triton")

        # Install the webui
        self.git_clone(url=self.url, dest="webui")
        self.remove_line_in_file(["accelerate", "lm_eval", "optimum", "autoawq", "llama_cpp_python"],
                                 "webui/requirements_amd.txt")
        self.install_requirements("webui/requirements_amd.txt")
        self.pip_install(["accelerate", "optimum"])
        self.pip_install(
            "https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v0.0.7/autoawq_kernels-0.0.7+rocm571-cp310-cp310-linux_x86_64.whl",
            no_deps=True)
        self.pip_install(
            "https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl",
            no_deps=True)
        # Fix llama trying to use cuda version
        self.remove_line_in_file("llama_cpp_cuda", "webui/modules/llama_cpp_python_hijack.py")

        # Install useful packages
        self.pip_install(
            "https://github.com/turboderp/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm6.1.torch2.4.0-cp310-cp310-linux_x86_64.whl")
        self.install_from_prebuilt("bitsandbytes")
        self.pip_install("auto-gptq", args=["--no-build-isolation", "--extra-index-url",
                                            "https://huggingface.github.io/autogptq-index/whl/rocm573/"])

    def _start(self):
        args = ["--listen", "--listen-port", str(self.port)]
        self.python(f"server.py", args=args, current_dir="webui",
                    env=["TORCH_BLAS_PREFER_HIPBLASLT=0"], daemon=True)
massive refactor, added terminal interface, daemon system (still not working) 2024-08-28 22:20:36 +02:00			`from core.stack import Stack`
add all the old services 2024-08-28 10:39:27 +02:00

massive refactor, added terminal interface, daemon system (still not working) 2024-08-28 22:20:36 +02:00			`class TextGenerationWebui(Stack):`
add all the old services 2024-08-28 10:39:27 +02:00			`def __init__(self):`
			`super().__init__(`
			`'Text Generation',`
massive refactor, added terminal interface, daemon system (still not working) 2024-08-28 22:20:36 +02:00			`'text_generation_webui',`
add all the old services 2024-08-28 10:39:27 +02:00			`5000,`
			`'https://github.com/oobabooga/text-generation-webui/'`
			`)`

improved/fixed start/install/update system 2024-08-29 12:26:37 +02:00			`def _install(self):`
add all the old services 2024-08-28 10:39:27 +02:00			`# Install LlamaCpp from prebuilt`
add daemon system and began working on ui 2024-08-28 20:05:46 +02:00			`self.pip_install("llama-cpp-python", env=["CMAKE_ARGS=\"-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS\""]) # cpu`
add all the old services 2024-08-28 10:39:27 +02:00
			`# Install LlamaCpp for ROCM from source`
add daemon system and began working on ui 2024-08-28 20:05:46 +02:00			`# self.pip_install("llama-cpp-python", env=["CMAKE_ARGS=\"-DGGML_HIPBLAS=on\" FORCE_CMAKE=1"]) # manual gpu (only works if whole rocm suite installed)`
add all the old services 2024-08-28 10:39:27 +02:00			`# self.install_from_prebuilt("llama_cpp_python") # gpu (only works if whole rocm suite installed)`

			`# Install Triton for ROCM from prebuilt`
			`# self.install_from_prebuilt("triton")`

			`# Install Triton for ROCM from source`
			`# self.git_clone(url="https://github.com/ROCmSoftwarePlatform/triton.git")`
			`# self.pip_install(['ninja', 'cmake'])`
			`# self.pip("install -e .", path="triton")`

			`# Install the webui`
			`self.git_clone(url=self.url, dest="webui")`
			`self.remove_line_in_file(["accelerate", "lm_eval", "optimum", "autoawq", "llama_cpp_python"],`
fix paths for text gen 2024-08-30 12:38:18 +02:00			`"webui/requirements_amd.txt")`
			`self.install_requirements("webui/requirements_amd.txt")`
add all the old services 2024-08-28 10:39:27 +02:00			`self.pip_install(["accelerate", "optimum"])`
			`self.pip_install(`
			`"https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v0.0.7/autoawq_kernels-0.0.7+rocm571-cp310-cp310-linux_x86_64.whl",`
			`no_deps=True)`
			`self.pip_install(`
			`"https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl",`
			`no_deps=True)`
			`# Fix llama trying to use cuda version`
fix paths for text gen 2024-08-30 12:38:18 +02:00			`self.remove_line_in_file("llama_cpp_cuda", "webui/modules/llama_cpp_python_hijack.py")`
add all the old services 2024-08-28 10:39:27 +02:00
			`# Install useful packages`
			`self.pip_install(`
			`"https://github.com/turboderp/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm6.1.torch2.4.0-cp310-cp310-linux_x86_64.whl")`
			`self.install_from_prebuilt("bitsandbytes")`
add daemon system and began working on ui 2024-08-28 20:05:46 +02:00			`self.pip_install("auto-gptq", args=["--no-build-isolation", "--extra-index-url",`
			`"https://huggingface.github.io/autogptq-index/whl/rocm573/"])`
add all the old services 2024-08-28 10:39:27 +02:00
improved/fixed start/install/update system 2024-08-29 12:26:37 +02:00			`def _start(self):`
add all the old services 2024-08-28 10:39:27 +02:00			`args = ["--listen", "--listen-port", str(self.port)]`
fixed daemon running system 2024-08-29 12:03:37 +02:00			`self.python(f"server.py", args=args, current_dir="webui",`
add daemon system and began working on ui 2024-08-28 20:05:46 +02:00			`env=["TORCH_BLAS_PREFER_HIPBLASLT=0"], daemon=True)`