diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..35500d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,76 @@ +.env +config.yaml + +/service/__init__.pyc +*.pyc +/service/test/__init__.pyc +/service/test/test_call.pyc +_trial_temp/ +*.swp + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg +*.log + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +*.log.* + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + + +*.sqlite3 +*.*~ +*.passwd +.coverage + diff --git a/config.yaml b/config.yaml index 658bb9d..6d82903 100644 --- a/config.yaml +++ b/config.yaml @@ -1,26 +1,16 @@ -redis: - host: redis - port: 6379 - logging: version: 1 formatters: normal: format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s" handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: normal - filename: publisher.log - maxBytes: 10485760 - backupCount: 10 console: class: logging.StreamHandler formatter: normal loggers: publisher: level: DEBUG - handlers: [file, console] + handlers: [console] root: level: DEBUG - handlers: [file, console] + handlers: [console] diff --git a/docker-compose.yml b/docker-compose.yml index cd54beb..2130fd0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,43 +1,85 @@ version: "3.7" services: - speech-azure: - build: speech-azure - restart: unless-stopped - networks: - - voice-api - ports: - - 8181:8000 - volumes: - - ./speech-azure/src/:/app - - /var/lib/asterisk/sounds/:/sounds - - /etc/localtime:/etc/localtime + # speech-azure: + # image: ikonoim/speech-azure:0.0.1 + # build: + # context: speech-azure + # dockerfile: Dockerfile + # # runtime, or dev, or test for running tests + # target: runtime + # restart: unless-stopped + # ports: + # - 8181:8000 + # environment: + # - AZURE_API_TOKEN + # - DEFAULT_LANG=es-MX-JorgeNeural + # volumes: + # - /etc/localtime:/etc/localtime + # - /var/lib/asterisk/sounds/:/sounds + # ## - ./speech-azure/src/:/app + # logging: + # driver: json-file + # options: + # max-size: 20m + # max-file: "10" + # networks: + # - voice-api - speech-google: - build: speech-google - restart: unless-stopped - networks: - - voice-api - ports: - - 8182:8000 - volumes: - - ./speech-google/src/:/app - - /var/lib/asterisk/sounds/:/sounds - - /etc/localtime:/etc/localtime + # speech-google: + # image: ikonoim/speech-google:0.0.1 + # build: + # context: speech-google + # dockerfile: Dockerfile + # # runtime, or dev, or test for running tests + # target: runtime + # restart: unless-stopped + # ports: + # - 8182:8000 + # environment: + # - GOOGLE_APPLICATION_CREDENTIALS + # volumes: + # - /etc/localtime:/etc/localtime + # - /var/lib/asterisk/sounds/:/sounds + # ## - ./speech-google/src/:/app + # logging: + # driver: json-file + # options: + # max-size: 20m + # max-file: "10" + # networks: + # - voice-api tts-azure: - build: tts-azure + image: ikonoim/tts-azure:0.0.1 + build: + context: tts-azure + dockerfile: Dockerfile + # runtime, or dev, or test for running tests + target: runtime restart: unless-stopped - networks: - - voice-api ports: - 8081:8000 + environment: + - AZURE_API_TOKEN volumes: - - ./tts-azure/src/:/app - - /var/lib/asterisk/sounds/:/sounds + - ./config.yaml:/etc/config.yaml - /etc/localtime:/etc/localtime - #entrypoint: watchmedo auto-restart --recursive --pattern="*.py" --directory="." python main.py + - /var/lib/asterisk/sounds/:/sounds + ## - ./tts-azure/src/:/app + logging: + driver: json-file + options: + max-size: 20m + max-file: "10" + networks: + - voice-api networks: voice-api: - external: true + name: voice-api + # driver: overlay + attachable: true + ipam: + config: + - subnet: 172.31.128.0/24 diff --git a/speech-azure/src/__pycache__/main.cpython-38.pyc b/speech-azure/src/__pycache__/main.cpython-38.pyc deleted file mode 100644 index 55f6aeb..0000000 Binary files a/speech-azure/src/__pycache__/main.cpython-38.pyc and /dev/null differ diff --git a/speech-azure/src/config.yaml b/speech-azure/src/config.yaml deleted file mode 100644 index ddccdc5..0000000 --- a/speech-azure/src/config.yaml +++ /dev/null @@ -1,22 +0,0 @@ -logging: - version: 1 - formatters: - normal: - format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s" - handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: normal - filename: publisher.log - maxBytes: 10485760 - backupCount: 10 - console: - class: logging.StreamHandler - formatter: normal - loggers: - publisher: - level: DEBUG - handlers: [file, console] - root: - level: DEBUG - handlers: [file, console] diff --git a/speech-google/src/__pycache__/main.cpython-38.pyc b/speech-google/src/__pycache__/main.cpython-38.pyc deleted file mode 100644 index a83798e..0000000 Binary files a/speech-google/src/__pycache__/main.cpython-38.pyc and /dev/null differ diff --git a/speech-google/src/config.yaml b/speech-google/src/config.yaml deleted file mode 100644 index ddccdc5..0000000 --- a/speech-google/src/config.yaml +++ /dev/null @@ -1,22 +0,0 @@ -logging: - version: 1 - formatters: - normal: - format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s" - handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: normal - filename: publisher.log - maxBytes: 10485760 - backupCount: 10 - console: - class: logging.StreamHandler - formatter: normal - loggers: - publisher: - level: DEBUG - handlers: [file, console] - root: - level: DEBUG - handlers: [file, console] diff --git a/tts-azure/Dockerfile b/tts-azure/Dockerfile index df9380d..f5bed29 100644 --- a/tts-azure/Dockerfile +++ b/tts-azure/Dockerfile @@ -1,4 +1,4 @@ -FROM tiangolo/uvicorn-gunicorn:python3.8 +FROM tiangolo/uvicorn-gunicorn:python3.8 as dev LABEL name="TTS API" LABEL authors="iKono Telecomunicaciones" @@ -11,9 +11,10 @@ RUN apt-get update \ # copy the dependencies file to the working directory COPY requirements.txt . - +COPY requirements-dev.txt . # install dependencies RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -r requirements-dev.txt # Add metadata to the image to describe which port the container is listening on at runtime. EXPOSE 8000 @@ -32,4 +33,23 @@ ENTRYPOINT ["/usr/local/bin/uvicorn"] # app: the object created inside of main.py with the line app = FastAPI(). # --reload: make the server restart after code changes. Only do this for development. # --host 0.0.0.0: Bind socket to this host. -CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"] + + + +FROM python:3.8-slim as runtime + +RUN apt-get update \ + && apt-get install -y \ + sox \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /code + +COPY ./requirements.txt /code/requirements.txt + +RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt + +COPY ./src /code/app + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/tts-azure/requirements-dev.txt b/tts-azure/requirements-dev.txt new file mode 100644 index 0000000..523b880 --- /dev/null +++ b/tts-azure/requirements-dev.txt @@ -0,0 +1,2 @@ +watchdog +argh diff --git a/tts-azure/requirements.txt b/tts-azure/requirements.txt index 7970d9d..ae305a7 100644 --- a/tts-azure/requirements.txt +++ b/tts-azure/requirements.txt @@ -1,6 +1,5 @@ -fastapi pyyaml +fastapi aiohttp sox -watchdog -argh +uvicorn \ No newline at end of file diff --git a/tts-azure/src/__pycache__/main.cpython-38.pyc b/tts-azure/src/__pycache__/main.cpython-38.pyc deleted file mode 100644 index c8469b0..0000000 Binary files a/tts-azure/src/__pycache__/main.cpython-38.pyc and /dev/null differ diff --git a/tts-azure/src/config.yaml b/tts-azure/src/config.yaml deleted file mode 100644 index ddccdc5..0000000 --- a/tts-azure/src/config.yaml +++ /dev/null @@ -1,22 +0,0 @@ -logging: - version: 1 - formatters: - normal: - format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s" - handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: normal - filename: publisher.log - maxBytes: 10485760 - backupCount: 10 - console: - class: logging.StreamHandler - formatter: normal - loggers: - publisher: - level: DEBUG - handlers: [file, console] - root: - level: DEBUG - handlers: [file, console] diff --git a/tts-azure/src/main.py b/tts-azure/src/main.py index eb2fc64..eec75e9 100644 --- a/tts-azure/src/main.py +++ b/tts-azure/src/main.py @@ -16,7 +16,7 @@ import logging.config -with open("config.yaml") as file_stream: +with open("/etc/config.yaml") as file_stream: config = yaml.full_load(file_stream) logging.config.dictConfig(config.get("logging")) @@ -29,27 +29,30 @@ logger.error("Please set the environment variable AZURE_API_TOKEN") sys.exit(1) +DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "es-MX-JorgeNeural") + app = FastAPI() base_url = "https://eastus.tts.speech.microsoft.com/cognitiveservices" api_token = os.environ.get("AZURE_API_TOKEN") output_format = "raw-24khz-16bit-mono-pcm" provider = "azure" +cache_format = "sln24" ssml_string = """ - - {lexicon_tag} {text} - """ +loop = asyncio.get_event_loop() + + @app.get("/voicelist") async def voicelisst(): headers = {"Ocp-Apim-Subscription-Key": api_token} @@ -60,49 +63,62 @@ async def voicelisst(): @app.post("/synthesize") -async def synthesize(text: str, - voice: Optional[str] = "es-MX-JorgeNeural", - rate: Optional[str] = "0%", - pitch: Optional[str] = "0%", - lexicon: Optional[str] = None, - ): - headers = {"Ocp-Apim-Subscription-Key": api_token, - "X-Microsoft-OutputFormat": output_format, - "Content-Type": "application/ssml+xml" - } - lexicon_tag = f'' if lexicon else "" - - data = ssml_string.format(text=text, - voice=voice, - language=voice[0:5], - rate=rate, - pitch=pitch, - lexicon_tag=lexicon_tag - ) +async def synthesize( + text: str, + voice: Optional[str] = DEFAULT_LANG, + exten: Optional[str] = "alaw", + file_type: Optional[str] = "al", + rate: Optional[str] = 8000, +): + headers = { + "Ocp-Apim-Subscription-Key": api_token, + "X-Microsoft-OutputFormat": output_format, + "Content-Type": "application/ssml+xml", + } + + # SSML to send + data = ssml_string.format(text=text, voice=voice, language=voice[0:5]) logger.debug(data) + # Define file name and path - filename = hashlib.md5(text.encode()).hexdigest() - file_dir = f"/sounds/{provider}/{voice}" + md5_data = hashlib.md5(data.encode()).hexdigest() + d1, d2, filename = md5_data[0:2], md5_data[2:4], md5_data + dir_schema = f"{provider}/{voice}/{d1}/{d2}" + file_dir = f"/sounds/{dir_schema}" file_path = f"{file_dir}/{filename}" + # We need to ensure the defined directory ensure_dir(file_dir) - output_filepath = f"{file_path}.sln24" - if not os.path.exists(output_filepath) or os.path.getsize(output_filepath) <= 0: + + # Store the content in plain text + with open(f"{file_path}.txt", "w") as content: + content.write(data) + content.close() + + # This block performs the synthesize using Azure. + # It's done using the best quality. + # After that the final format is obtained using SOX + cache_filepath = f"{file_path}.{cache_format}" + if not os.path.exists(cache_filepath) or os.path.getsize(cache_filepath) <= 0: async with aiohttp.ClientSession(headers=headers) as session: async with session.post(f"{base_url}/v1", data=data) as result: - with open(output_filepath, 'wb') as output_file: + with open(cache_filepath, "wb") as output_file: while True: chunk = await result.content.read(1024) if not chunk: break output_file.write(chunk) - loop = asyncio.get_event_loop() - result = await loop.run_in_executor(None, - partial(sox_converter, file_path, 'alaw', 'al', 8000) - ) + if not os.path.exists(cache_filepath) or os.path.getsize(cache_filepath) <= 0: + result = "azure_error" + else: + # The SOX converter is called in async mode + result = await loop.run_in_executor( + None, partial(sox_converter, file_path, exten, file_type, rate) + ) - return {"sound_path": f"{provider}/{voice}/{filename}"} + # The final result is returned + return {"sound_path": f"{dir_schema}/{filename}", "exten": exten, "result": result} def ensure_dir(directory): @@ -111,20 +127,20 @@ def ensure_dir(directory): def sox_converter(file_path, exten, file_type, rate=16000, bits=16, channels=1): - input_filepath = f'{file_path}.sln24' - output_filepath = f'{file_path}.{exten}' + input_filepath = f"{file_path}.{cache_format}" + output_filepath = f"{file_path}.{exten}" - if not os.path.exists(output_filepath) or os.path.getsize(output_filepath) <= 0: + if os.path.exists(output_filepath) and os.path.getsize(output_filepath) > 0: + return "cached" + else: tfm = sox.Transformer() tfm.silence(location=1) tfm.silence(location=-1) tfm.pad(0.05, 0.05) - tfm.set_input_format(file_type='sln', rate=24000, bits=16, channels=1) + tfm.set_input_format(file_type="sln", rate=24000, bits=16, channels=1) tfm.set_output_format(file_type, rate, bits, channels) result = tfm.build_file(input_filepath=input_filepath, output_filepath=output_filepath) - else: - result = None - return result + return "success" if result else "error"