diff --git a/Dockerfile b/Dockerfile index 71ff1fa72028..242b22b6c673 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} -ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/run.sh,autogptq:/build/extra/grpc/autogptq/run.sh,bark:/build/extra/grpc/bark/run.sh,diffusers:/build/extra/grpc/diffusers/run.sh,exllama:/build/extra/grpc/exllama/exllama.py,vall-e-x:/build/extra/grpc/vall-e-x/ttsvalle.py,vllm:/build/extra/grpc/vllm/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/run.sh,autogptq:/build/extra/grpc/autogptq/run.sh,bark:/build/extra/grpc/bark/run.sh,diffusers:/build/extra/grpc/diffusers/run.sh,exllama:/build/extra/grpc/exllama/exllama.py,vall-e-x:/build/extra/grpc/vall-e-x/run.sh,vllm:/build/extra/grpc/vllm/run.sh" ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]' ARG GO_TAGS="stablediffusion tts" diff --git a/Makefile b/Makefile index ac3e8ba6ae3a..5a5a5fa541aa 100644 --- a/Makefile +++ b/Makefile @@ -415,6 +415,8 @@ prepare-extra-conda-environments: $(MAKE) -C extra/grpc/diffusers $(MAKE) -C extra/grpc/vllm $(MAKE) -C extra/grpc/huggingface + $(MAKE) -C extra/grpc/vall-e-x + backend-assets/grpc: mkdir -p backend-assets/grpc diff --git a/extra/grpc/vall-e-x/Makefile b/extra/grpc/vall-e-x/Makefile new file mode 100644 index 000000000000..7216967d5a54 --- /dev/null +++ b/extra/grpc/vall-e-x/Makefile @@ -0,0 +1,11 @@ +.PONY: ttsvalle +ttsvalle: + @echo "Creating virtual environment..." + @conda env create --name ttsvalle --file ttsvalle.yml + @echo "Virtual environment created." + +.PONY: run +run: + @echo "Running ttsvalle..." + bash run.sh + @echo "ttsvalle run." \ No newline at end of file diff --git a/extra/grpc/vall-e-x/README.md b/extra/grpc/vall-e-x/README.md new file mode 100644 index 000000000000..a3a93361bfb3 --- /dev/null +++ b/extra/grpc/vall-e-x/README.md @@ -0,0 +1,5 @@ +# Creating a separate environment for the ttsvalle project + +``` +make ttsvalle +``` \ No newline at end of file diff --git a/extra/grpc/vall-e-x/run.sh b/extra/grpc/vall-e-x/run.sh new file mode 100644 index 000000000000..f42c79e60c37 --- /dev/null +++ b/extra/grpc/vall-e-x/run.sh @@ -0,0 +1,10 @@ +## +## A bash script wrapper that runs the ttsvalle server with conda + +# Activate conda environment +source activate ttsvalle + +# get the directory where the bash script is located +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +python $DIR/ttvalle.py \ No newline at end of file diff --git a/extra/grpc/vall-e-x/ttsvalle.py b/extra/grpc/vall-e-x/ttsvalle.py index be7f3cab1033..d7c5d700fe1b 100644 --- a/extra/grpc/vall-e-x/ttsvalle.py +++ b/extra/grpc/vall-e-x/ttsvalle.py @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -import grpc + from concurrent import futures -import time -import backend_pb2 -import backend_pb2_grpc import argparse import signal import sys import os -from pathlib import Path +import time +import backend_pb2 +import backend_pb2_grpc + +import grpc from utils.generation import SAMPLE_RATE, generate_audio, preload_models from scipy.io.wavfile import write as write_wav @@ -21,9 +22,34 @@ # Implement the BackendServicer class with the service methods class BackendServicer(backend_pb2_grpc.BackendServicer): + """ + gRPC servicer for backend services. + """ def Health(self, request, context): + """ + Health check service. + + Args: + request: A backend_pb2.HealthRequest instance. + context: A grpc.ServicerContext instance. + + Returns: + A backend_pb2.Reply instance with message "OK". + """ return backend_pb2.Reply(message=bytes("OK", 'utf-8')) + def LoadModel(self, request, context): + """ + Load model service. + + Args: + request: A backend_pb2.LoadModelRequest instance. + context: A grpc.ServicerContext instance. + + Returns: + A backend_pb2.Result instance with message "Model loaded successfully" and success=True if successful. + A backend_pb2.Result instance with success=False and error message if unsuccessful. + """ model_name = request.Model try: print("Preparing models, please wait", file=sys.stderr) @@ -49,6 +75,17 @@ def LoadModel(self, request, context): return backend_pb2.Result(message="Model loaded successfully", success=True) def TTS(self, request, context): + """ + Text-to-speech service. + + Args: + request: A backend_pb2.TTSRequest instance. + context: A grpc.ServicerContext instance. + + Returns: + A backend_pb2.Result instance with success=True if successful. + A backend_pb2.Result instance with success=False and error message if unsuccessful. + """ model = request.model print(request, file=sys.stderr) try: @@ -97,4 +134,4 @@ def signal_handler(sig, frame): ) args = parser.parse_args() - serve(args.addr) \ No newline at end of file + serve(args.addr) diff --git a/extra/grpc/vall-e-x/ttsvalle.yml b/extra/grpc/vall-e-x/ttsvalle.yml new file mode 100644 index 000000000000..72f232b5feaa --- /dev/null +++ b/extra/grpc/vall-e-x/ttsvalle.yml @@ -0,0 +1,101 @@ +name: ttsvalle +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - bzip2=1.0.8=h7b6447c_0 + - ca-certificates=2023.08.22=h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - libuuid=1.41.5=h5eee18b_0 + - ncurses=6.4=h6a678d5_0 + - openssl=3.0.11=h7f8727e_2 + - pip=23.2.1=py310h06a4308_0 + - python=3.10.13=h955ad1f_0 + - readline=8.2=h5eee18b_0 + - setuptools=68.0.0=py310h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - tzdata=2023c=h04d1e81_0 + - wheel=0.41.2=py310h06a4308_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - aiofiles==23.2.1 + - altair==5.1.2 + - annotated-types==0.6.0 + - anyio==3.7.1 + - click==8.1.7 + - cn2an==0.5.22 + - cython==3.0.3 + - einops==0.7.0 + - encodec==0.1.1 + - eng-to-ipa==0.0.2 + - fastapi==0.103.2 + - ffmpeg-python==0.2.0 + - ffmpy==0.3.1 + - fsspec==2023.9.2 + - future==0.18.3 + - gradio==3.47.1 + - gradio-client==0.6.0 + - grpcio==1.59.0 + - h11==0.14.0 + - httpcore==0.18.0 + - httpx==0.25.0 + - huggingface-hub==0.17.3 + - importlib-resources==6.1.0 + - inflect==7.0.0 + - jieba==0.42.1 + - langid==1.1.6 + - llvmlite==0.41.0 + - more-itertools==10.1.0 + - nltk==3.8.1 + - numba==0.58.0 + - numpy==1.25.2 + - nvidia-cublas-cu12==12.1.3.1 + - nvidia-cuda-cupti-cu12==12.1.105 + - nvidia-cuda-nvrtc-cu12==12.1.105 + - nvidia-cuda-runtime-cu12==12.1.105 + - nvidia-cudnn-cu12==8.9.2.26 + - nvidia-cufft-cu12==11.0.2.54 + - nvidia-curand-cu12==10.3.2.106 + - nvidia-cusolver-cu12==11.4.5.107 + - nvidia-cusparse-cu12==12.1.0.106 + - nvidia-nccl-cu12==2.18.1 + - nvidia-nvjitlink-cu12==12.2.140 + - nvidia-nvtx-cu12==12.1.105 + - openai-whisper==20230306 + - orjson==3.9.7 + - proces==0.1.7 + - protobuf==4.24.4 + - pydantic==2.4.2 + - pydantic-core==2.10.1 + - pydub==0.25.1 + - pyopenjtalk-prebuilt==0.3.0 + - pypinyin==0.49.0 + - python-multipart==0.0.6 + - regex==2023.10.3 + - safetensors==0.4.0 + - semantic-version==2.10.0 + - soundfile==0.12.1 + - starlette==0.27.0 + - sudachidict-core==20230927 + - sudachipy==0.6.7 + - tokenizers==0.14.1 + - toolz==0.12.0 + - torch==2.1.0 + - torchaudio==2.1.0 + - torchvision==0.16.0 + - tqdm==4.66.1 + - transformers==4.34.0 + - triton==2.1.0 + - unidecode==1.3.7 + - uvicorn==0.23.2 + - vocos==0.0.3 + - websockets==11.0.3 + - wget==3.2 +prefix: /opt/conda/envs/ttsvalle