Skip to content

Commit

Permalink
Initial commit. It uses Azure for TTS and Azure/Google for Recognition
Browse files Browse the repository at this point in the history
  • Loading branch information
jffrancob committed Aug 25, 2021
1 parent 0bc114a commit a299986
Show file tree
Hide file tree
Showing 17 changed files with 525 additions and 0 deletions.
26 changes: 26 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
redis:
host: redis
port: 6379

logging:
version: 1
formatters:
normal:
format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
handlers:
file:
class: logging.handlers.RotatingFileHandler
formatter: normal
filename: publisher.log
maxBytes: 10485760
backupCount: 10
console:
class: logging.StreamHandler
formatter: normal
loggers:
publisher:
level: DEBUG
handlers: [file, console]
root:
level: DEBUG
handlers: [file, console]
43 changes: 43 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
version: "3.7"

services:
speech-azure:
build: speech-azure
restart: unless-stopped
networks:
- voice-api
ports:
- 8181:8000
volumes:
- ./speech-azure/src/:/app
- /var/lib/asterisk/sounds/:/sounds
- /etc/localtime:/etc/localtime

speech-google:
build: speech-google
restart: unless-stopped
networks:
- voice-api
ports:
- 8182:8000
volumes:
- ./speech-google/src/:/app
- /var/lib/asterisk/sounds/:/sounds
- /etc/localtime:/etc/localtime

tts-azure:
build: tts-azure
restart: unless-stopped
networks:
- voice-api
ports:
- 8081:8000
volumes:
- ./tts-azure/src/:/app
- /var/lib/asterisk/sounds/:/sounds
- /etc/localtime:/etc/localtime
#entrypoint: watchmedo auto-restart --recursive --pattern="*.py" --directory="." python main.py

networks:
voice-api:
external: true
35 changes: 35 additions & 0 deletions speech-azure/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM tiangolo/uvicorn-gunicorn:python3.8

LABEL name="speech API"
LABEL authors="iKono Telecomunicaciones"

# Install all required dependencies
# RUN apt-get update \
# && apt-get install -y \
# sox \
# && rm -rf /var/lib/apt/lists/*

# copy the dependencies file to the working directory
COPY requirements.txt .

# install dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Add metadata to the image to describe which port the container is listening on at runtime.
EXPOSE 8000

# set the working directory in the container
WORKDIR /app

# copy the content of the local src directory to the working directory
COPY ./src/ .

# Uvicorn is a lightning-fast ASGI server, built on uvloop and httptools
ENTRYPOINT ["/usr/local/bin/uvicorn"]

# The command (Defined as entrypoint): uvicorn main:app refers to:
# main: the file main.py (the Python "module").
# app: the object created inside of main.py with the line app = FastAPI().
# --reload: make the server restart after code changes. Only do this for development.
# --host 0.0.0.0: Bind socket to this host.
CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"]
6 changes: 6 additions & 0 deletions speech-azure/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
fastapi
pyyaml
aiohttp
watchdog
argh
azure-cognitiveservices-speech
Binary file added speech-azure/src/__pycache__/main.cpython-38.pyc
Binary file not shown.
22 changes: 22 additions & 0 deletions speech-azure/src/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
logging:
version: 1
formatters:
normal:
format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
handlers:
file:
class: logging.handlers.RotatingFileHandler
formatter: normal
filename: publisher.log
maxBytes: 10485760
backupCount: 10
console:
class: logging.StreamHandler
formatter: normal
loggers:
publisher:
level: DEBUG
handlers: [file, console]
root:
level: DEBUG
handlers: [file, console]
78 changes: 78 additions & 0 deletions speech-azure/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from fastapi import FastAPI, Query
from typing import List, Optional

import azure.cognitiveservices.speech as speechsdk

import os
import sys
import yaml

import logging
import logging.config

import asyncio
import functools

with open("config.yaml") as file_stream:
config = yaml.full_load(file_stream)

logging.config.dictConfig(config.get("logging"))
logger = logging.getLogger()
logger.debug("starting config...")

try:
os.environ["AZURE_API_TOKEN"]
except KeyError:
logger.error("Please set the environment variable AZURE_API_TOKEN")
sys.exit(1)

app = FastAPI()
loop = asyncio.get_running_loop()

api_region = "eastus"
api_token = os.environ.get("AZURE_API_TOKEN")


def azure_recognize(speech_recognizer):
result = speech_recognizer.recognize_once_async().get()
return result


@app.post("/recognize")
async def recognize(file_path: str, phrase: Optional[List[str]] = Query(None)):
audio_file = os.path.join("/sounds", file_path)

try:
logger.debug(f"Executing Recognition to file: {file_path} and phrase list {phrase}")

speech_config = speechsdk.SpeechConfig(subscription=api_token, region=api_region)
speech_config.speech_recognition_language = "es-MX"
audio_input = speechsdk.AudioConfig(filename=audio_file)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,
audio_config=audio_input)

if phrase:
phrase_list_grammar = speechsdk.PhraseListGrammar.from_recognizer(speech_recognizer)
for sentence in phrase:
phrase_list_grammar.addPhrase(sentence)

result = await loop.run_in_executor(None, functools.partial(azure_recognize,
speech_recognizer))
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
result_text = result.text.strip(" .")
logger.debug(f"Recognition in {file_path}. result: {result_text}")
return {"text": result_text}
elif result.reason == speechsdk.ResultReason.NoMatch:
logger.error(f"No speech could be recognized: {result.no_match_details}")
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
logger.error(f"Speech Recognition canceled: {cancellation_details.reason}")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
logger.error(f"Error details: {cancellation_details.error_details}")
else:
logger.error(f"Speech Recognition result: {result.reason}")

return None

except Exception as e:
logger.debug("Could not request results from Google Cloud Speech service; {0}".format(e))
35 changes: 35 additions & 0 deletions speech-google/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM tiangolo/uvicorn-gunicorn:python3.8

LABEL name="speech API"
LABEL authors="iKono Telecomunicaciones"

# Install all required dependencies
# RUN apt-get update \
# && apt-get install -y \
# sox \
# && rm -rf /var/lib/apt/lists/*

# copy the dependencies file to the working directory
COPY requirements.txt .

# install dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Add metadata to the image to describe which port the container is listening on at runtime.
EXPOSE 8000

# set the working directory in the container
WORKDIR /app

# copy the content of the local src directory to the working directory
COPY ./src/ .

# Uvicorn is a lightning-fast ASGI server, built on uvloop and httptools
ENTRYPOINT ["/usr/local/bin/uvicorn"]

# The command (Defined as entrypoint): uvicorn main:app refers to:
# main: the file main.py (the Python "module").
# app: the object created inside of main.py with the line app = FastAPI().
# --reload: make the server restart after code changes. Only do this for development.
# --host 0.0.0.0: Bind socket to this host.
CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"]
10 changes: 10 additions & 0 deletions speech-google/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
fastapi
pyyaml
aiohttp
watchdog
argh
SpeechRecognition
google-api-python-client
google-cloud-speech
oauth2client
idna<3.0
Binary file added speech-google/src/__pycache__/main.cpython-38.pyc
Binary file not shown.
22 changes: 22 additions & 0 deletions speech-google/src/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
logging:
version: 1
formatters:
normal:
format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
handlers:
file:
class: logging.handlers.RotatingFileHandler
formatter: normal
filename: publisher.log
maxBytes: 10485760
backupCount: 10
console:
class: logging.StreamHandler
formatter: normal
loggers:
publisher:
level: DEBUG
handlers: [file, console]
root:
level: DEBUG
handlers: [file, console]
55 changes: 55 additions & 0 deletions speech-google/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from fastapi import FastAPI

import speech_recognition as sr
from os import path
import yaml

import logging
import logging.config

import asyncio
import functools
import traceback


with open("config.yaml") as file_stream:
config = yaml.full_load(file_stream)

logging.config.dictConfig(config.get("logging"))
logger = logging.getLogger()
logger.debug("starting config...")


app = FastAPI()


loop = asyncio.get_running_loop()

# recognize speech using Google Cloud Speech
file = open('/GOOGLE_CLOUD_SPEECH_CREDENTIALS', mode='r')
GOOGLE_CLOUD_SPEECH_CREDENTIALS = file.read()
file.close()


@app.post("/recognize")
async def recognize(file_path: str):
audio_file = path.join("/sounds", file_path)

try:
logger.debug(f"Executing Recognition to file: {file_path}")
r = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = r.record(source)
data = {"audio_data": audio,
"language": 'es_CO',
"credentials_json": GOOGLE_CLOUD_SPEECH_CREDENTIALS
}
result = await loop.run_in_executor(None, functools.partial(r.recognize_google_cloud,
**data))
result_text = result.strip(" .")
logger.debug(f"Recognition in {file_path}. result: {result_text}")
return {"text": result_text}

except Exception as e:
logger.error(traceback.format_exc())
logger.debug("Could not request results from Google Cloud Speech service; {0}".format(e))
35 changes: 35 additions & 0 deletions tts-azure/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM tiangolo/uvicorn-gunicorn:python3.8

LABEL name="TTS API"
LABEL authors="iKono Telecomunicaciones"

# Install all required dependencies
RUN apt-get update \
&& apt-get install -y \
sox \
&& rm -rf /var/lib/apt/lists/*

# copy the dependencies file to the working directory
COPY requirements.txt .

# install dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Add metadata to the image to describe which port the container is listening on at runtime.
EXPOSE 8000

# set the working directory in the container
WORKDIR /app

# copy the content of the local src directory to the working directory
COPY ./src/ .

# Uvicorn is a lightning-fast ASGI server, built on uvloop and httptools
ENTRYPOINT ["/usr/local/bin/uvicorn"]

# The command (Defined as entrypoint): uvicorn main:app refers to:
# main: the file main.py (the Python "module").
# app: the object created inside of main.py with the line app = FastAPI().
# --reload: make the server restart after code changes. Only do this for development.
# --host 0.0.0.0: Bind socket to this host.
CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"]
6 changes: 6 additions & 0 deletions tts-azure/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
fastapi
pyyaml
aiohttp
sox
watchdog
argh
Binary file added tts-azure/src/__pycache__/main.cpython-38.pyc
Binary file not shown.
Loading

0 comments on commit a299986

Please sign in to comment.