Initial commit. It uses Azure for TTS and Azure/Google for Recognition

jffrancob · Aug 25, 2021 · a299986 · a299986
1 parent 0bc114a
commit a299986
Show file tree

Hide file tree

Showing 17 changed files with 525 additions and 0 deletions.
diff --git a/config.yaml b/config.yaml
@@ -0,0 +1,26 @@
+redis:
+  host: redis
+  port: 6379
+
+logging:
+  version: 1
+  formatters:
+    normal:
+      format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
+  handlers:
+    file:
+      class: logging.handlers.RotatingFileHandler
+      formatter: normal
+      filename: publisher.log
+      maxBytes: 10485760
+      backupCount: 10
+    console:
+      class: logging.StreamHandler
+      formatter: normal
+  loggers:
+    publisher:
+      level: DEBUG
+      handlers: [file, console]
+  root:
+    level: DEBUG
+    handlers: [file, console]
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,43 @@
+version: "3.7"
+
+services:
+  speech-azure:
+    build: speech-azure
+    restart: unless-stopped
+    networks:
+     - voice-api
+    ports:
+      - 8181:8000
+    volumes:
+      - ./speech-azure/src/:/app
+      - /var/lib/asterisk/sounds/:/sounds
+      - /etc/localtime:/etc/localtime
+
+  speech-google:
+    build: speech-google
+    restart: unless-stopped
+    networks:
+     - voice-api
+    ports:
+      - 8182:8000
+    volumes:
+      - ./speech-google/src/:/app
+      - /var/lib/asterisk/sounds/:/sounds
+      - /etc/localtime:/etc/localtime
+
+  tts-azure:
+    build: tts-azure
+    restart: unless-stopped
+    networks:
+     - voice-api
+    ports:
+      - 8081:8000
+    volumes:
+      - ./tts-azure/src/:/app
+      - /var/lib/asterisk/sounds/:/sounds
+      - /etc/localtime:/etc/localtime
+    #entrypoint: watchmedo auto-restart --recursive --pattern="*.py" --directory="." python main.py
+
+networks:
+  voice-api:
+    external: true
diff --git a/speech-azure/Dockerfile b/speech-azure/Dockerfile
@@ -0,0 +1,35 @@
+FROM tiangolo/uvicorn-gunicorn:python3.8
+
+LABEL name="speech API"
+LABEL authors="iKono Telecomunicaciones"
+
+# Install all required dependencies
+# RUN apt-get update \
+#   && apt-get install -y \
+#      sox \
+#   && rm -rf /var/lib/apt/lists/*
+
+# copy the dependencies file to the working directory
+COPY requirements.txt .
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Add metadata to the image to describe which port the container is listening on at runtime.
+EXPOSE 8000
+
+# set the working directory in the container
+WORKDIR /app
+
+# copy the content of the local src directory to the working directory
+COPY ./src/ .
+
+# Uvicorn is a lightning-fast ASGI server, built on uvloop and httptools
+ENTRYPOINT ["/usr/local/bin/uvicorn"]
+
+# The command (Defined as entrypoint): uvicorn main:app refers to:
+# main: the file main.py (the Python "module").
+# app: the object created inside of main.py with the line app = FastAPI().
+# --reload: make the server restart after code changes. Only do this for development.
+# --host 0.0.0.0: Bind socket to this host.
+CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/speech-azure/requirements.txt b/speech-azure/requirements.txt
@@ -0,0 +1,6 @@
+fastapi
+pyyaml
+aiohttp
+watchdog
+argh
+azure-cognitiveservices-speech
diff --git a/speech-azure/src/__pycache__/main.cpython-38.pyc b/speech-azure/src/__pycache__/main.cpython-38.pyc
diff --git a/speech-azure/src/config.yaml b/speech-azure/src/config.yaml
@@ -0,0 +1,22 @@
+logging:
+  version: 1
+  formatters:
+    normal:
+      format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
+  handlers:
+    file:
+      class: logging.handlers.RotatingFileHandler
+      formatter: normal
+      filename: publisher.log
+      maxBytes: 10485760
+      backupCount: 10
+    console:
+      class: logging.StreamHandler
+      formatter: normal
+  loggers:
+    publisher:
+      level: DEBUG
+      handlers: [file, console]
+  root:
+    level: DEBUG
+    handlers: [file, console]
diff --git a/speech-azure/src/main.py b/speech-azure/src/main.py
@@ -0,0 +1,78 @@
+from fastapi import FastAPI, Query
+from typing import List, Optional
+
+import azure.cognitiveservices.speech as speechsdk
+
+import os
+import sys
+import yaml
+
+import logging
+import logging.config
+
+import asyncio
+import functools
+
+with open("config.yaml") as file_stream:
+    config = yaml.full_load(file_stream)
+
+logging.config.dictConfig(config.get("logging"))
+logger = logging.getLogger()
+logger.debug("starting config...")
+
+try:
+    os.environ["AZURE_API_TOKEN"]
+except KeyError:
+    logger.error("Please set the environment variable AZURE_API_TOKEN")
+    sys.exit(1)
+
+app = FastAPI()
+loop = asyncio.get_running_loop()
+
+api_region = "eastus"
+api_token = os.environ.get("AZURE_API_TOKEN")
+
+
+def azure_recognize(speech_recognizer):
+    result = speech_recognizer.recognize_once_async().get()
+    return result
+
+
+@app.post("/recognize")
+async def recognize(file_path: str, phrase: Optional[List[str]] = Query(None)):
+    audio_file = os.path.join("/sounds", file_path)
+
+    try:
+        logger.debug(f"Executing Recognition to file: {file_path} and phrase list {phrase}")
+
+        speech_config = speechsdk.SpeechConfig(subscription=api_token, region=api_region)
+        speech_config.speech_recognition_language = "es-MX"
+        audio_input = speechsdk.AudioConfig(filename=audio_file)
+        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
+                                                       audio_config=audio_input)
+
+        if phrase:
+            phrase_list_grammar = speechsdk.PhraseListGrammar.from_recognizer(speech_recognizer)
+            for sentence in phrase:
+                phrase_list_grammar.addPhrase(sentence)
+
+        result = await loop.run_in_executor(None, functools.partial(azure_recognize,
+                                                                    speech_recognizer))
+        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
+            result_text = result.text.strip(" .")
+            logger.debug(f"Recognition in {file_path}. result: {result_text}")
+            return {"text": result_text}
+        elif result.reason == speechsdk.ResultReason.NoMatch:
+            logger.error(f"No speech could be recognized: {result.no_match_details}")
+        elif result.reason == speechsdk.ResultReason.Canceled:
+            cancellation_details = result.cancellation_details
+            logger.error(f"Speech Recognition canceled: {cancellation_details.reason}")
+            if cancellation_details.reason == speechsdk.CancellationReason.Error:
+                logger.error(f"Error details: {cancellation_details.error_details}")
+        else:
+            logger.error(f"Speech Recognition result: {result.reason}")
+
+        return None
+
+    except Exception as e:
+        logger.debug("Could not request results from Google Cloud Speech service; {0}".format(e))
diff --git a/speech-google/Dockerfile b/speech-google/Dockerfile
@@ -0,0 +1,35 @@
+FROM tiangolo/uvicorn-gunicorn:python3.8
+
+LABEL name="speech API"
+LABEL authors="iKono Telecomunicaciones"
+
+# Install all required dependencies
+# RUN apt-get update \
+#   && apt-get install -y \
+#      sox \
+#   && rm -rf /var/lib/apt/lists/*
+
+# copy the dependencies file to the working directory
+COPY requirements.txt .
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Add metadata to the image to describe which port the container is listening on at runtime.
+EXPOSE 8000
+
+# set the working directory in the container
+WORKDIR /app
+
+# copy the content of the local src directory to the working directory
+COPY ./src/ .
+
+# Uvicorn is a lightning-fast ASGI server, built on uvloop and httptools
+ENTRYPOINT ["/usr/local/bin/uvicorn"]
+
+# The command (Defined as entrypoint): uvicorn main:app refers to:
+# main: the file main.py (the Python "module").
+# app: the object created inside of main.py with the line app = FastAPI().
+# --reload: make the server restart after code changes. Only do this for development.
+# --host 0.0.0.0: Bind socket to this host.
+CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/speech-google/requirements.txt b/speech-google/requirements.txt
@@ -0,0 +1,10 @@
+fastapi
+pyyaml
+aiohttp
+watchdog
+argh
+SpeechRecognition
+google-api-python-client
+google-cloud-speech
+oauth2client
+idna<3.0
diff --git a/speech-google/src/__pycache__/main.cpython-38.pyc b/speech-google/src/__pycache__/main.cpython-38.pyc
diff --git a/speech-google/src/config.yaml b/speech-google/src/config.yaml
@@ -0,0 +1,22 @@
+logging:
+  version: 1
+  formatters:
+    normal:
+      format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
+  handlers:
+    file:
+      class: logging.handlers.RotatingFileHandler
+      formatter: normal
+      filename: publisher.log
+      maxBytes: 10485760
+      backupCount: 10
+    console:
+      class: logging.StreamHandler
+      formatter: normal
+  loggers:
+    publisher:
+      level: DEBUG
+      handlers: [file, console]
+  root:
+    level: DEBUG
+    handlers: [file, console]
diff --git a/speech-google/src/main.py b/speech-google/src/main.py
@@ -0,0 +1,55 @@
+from fastapi import FastAPI
+
+import speech_recognition as sr
+from os import path
+import yaml
+
+import logging
+import logging.config
+
+import asyncio
+import functools
+import traceback
+
+
+with open("config.yaml") as file_stream:
+    config = yaml.full_load(file_stream)
+
+logging.config.dictConfig(config.get("logging"))
+logger = logging.getLogger()
+logger.debug("starting config...")
+
+
+app = FastAPI()
+
+
+loop = asyncio.get_running_loop()
+
+# recognize speech using Google Cloud Speech
+file = open('/GOOGLE_CLOUD_SPEECH_CREDENTIALS', mode='r')
+GOOGLE_CLOUD_SPEECH_CREDENTIALS = file.read()
+file.close()
+
+
+@app.post("/recognize")
+async def recognize(file_path: str):
+    audio_file = path.join("/sounds", file_path)
+
+    try:
+        logger.debug(f"Executing Recognition to file: {file_path}")
+        r = sr.Recognizer()
+        with sr.AudioFile(audio_file) as source:
+            audio = r.record(source)
+            data = {"audio_data": audio,
+                    "language": 'es_CO',
+                    "credentials_json": GOOGLE_CLOUD_SPEECH_CREDENTIALS
+                    }
+            result = await loop.run_in_executor(None, functools.partial(r.recognize_google_cloud,
+                                                                        **data))
+            result_text = result.strip(" .")
+            logger.debug(f"Recognition in {file_path}. result: {result_text}")
+            return {"text": result_text}
+
+    except Exception as e:
+        logger.error(traceback.format_exc())
+        logger.debug("Could not request results from Google Cloud Speech service; {0}".format(e))
diff --git a/tts-azure/Dockerfile b/tts-azure/Dockerfile
@@ -0,0 +1,35 @@
+FROM tiangolo/uvicorn-gunicorn:python3.8
+
+LABEL name="TTS API"
+LABEL authors="iKono Telecomunicaciones"
+
+# Install all required dependencies
+RUN apt-get update \
+  && apt-get install -y \
+     sox \
+  && rm -rf /var/lib/apt/lists/*
+
+# copy the dependencies file to the working directory
+COPY requirements.txt .
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Add metadata to the image to describe which port the container is listening on at runtime.
+EXPOSE 8000
+
+# set the working directory in the container
+WORKDIR /app
+
+# copy the content of the local src directory to the working directory
+COPY ./src/ .
+
+# Uvicorn is a lightning-fast ASGI server, built on uvloop and httptools
+ENTRYPOINT ["/usr/local/bin/uvicorn"]
+
+# The command (Defined as entrypoint): uvicorn main:app refers to:
+# main: the file main.py (the Python "module").
+# app: the object created inside of main.py with the line app = FastAPI().
+# --reload: make the server restart after code changes. Only do this for development.
+# --host 0.0.0.0: Bind socket to this host.
+CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/tts-azure/requirements.txt b/tts-azure/requirements.txt
@@ -0,0 +1,6 @@
+fastapi
+pyyaml
+aiohttp
+sox
+watchdog
+argh
diff --git a/tts-azure/src/__pycache__/main.cpython-38.pyc b/tts-azure/src/__pycache__/main.cpython-38.pyc