diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..35500d1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,76 @@
+.env
+config.yaml
+
+/service/__init__.pyc
+*.pyc
+/service/test/__init__.pyc
+/service/test/test_call.pyc
+_trial_temp/
+*.swp
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+*.log
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+*.log.*
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+
+*.sqlite3
+*.*~
+*.passwd
+.coverage
+
diff --git a/config.yaml b/config.yaml
index 658bb9d..6d82903 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,26 +1,16 @@
-redis:
- host: redis
- port: 6379
-
logging:
version: 1
formatters:
normal:
format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
handlers:
- file:
- class: logging.handlers.RotatingFileHandler
- formatter: normal
- filename: publisher.log
- maxBytes: 10485760
- backupCount: 10
console:
class: logging.StreamHandler
formatter: normal
loggers:
publisher:
level: DEBUG
- handlers: [file, console]
+ handlers: [console]
root:
level: DEBUG
- handlers: [file, console]
+ handlers: [console]
diff --git a/docker-compose.yml b/docker-compose.yml
index cd54beb..2130fd0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,43 +1,85 @@
version: "3.7"
services:
- speech-azure:
- build: speech-azure
- restart: unless-stopped
- networks:
- - voice-api
- ports:
- - 8181:8000
- volumes:
- - ./speech-azure/src/:/app
- - /var/lib/asterisk/sounds/:/sounds
- - /etc/localtime:/etc/localtime
+ # speech-azure:
+ # image: ikonoim/speech-azure:0.0.1
+ # build:
+ # context: speech-azure
+ # dockerfile: Dockerfile
+ # # runtime, or dev, or test for running tests
+ # target: runtime
+ # restart: unless-stopped
+ # ports:
+ # - 8181:8000
+ # environment:
+ # - AZURE_API_TOKEN
+ # - DEFAULT_LANG=es-MX-JorgeNeural
+ # volumes:
+ # - /etc/localtime:/etc/localtime
+ # - /var/lib/asterisk/sounds/:/sounds
+ # ## - ./speech-azure/src/:/app
+ # logging:
+ # driver: json-file
+ # options:
+ # max-size: 20m
+ # max-file: "10"
+ # networks:
+ # - voice-api
- speech-google:
- build: speech-google
- restart: unless-stopped
- networks:
- - voice-api
- ports:
- - 8182:8000
- volumes:
- - ./speech-google/src/:/app
- - /var/lib/asterisk/sounds/:/sounds
- - /etc/localtime:/etc/localtime
+ # speech-google:
+ # image: ikonoim/speech-google:0.0.1
+ # build:
+ # context: speech-google
+ # dockerfile: Dockerfile
+ # # runtime, or dev, or test for running tests
+ # target: runtime
+ # restart: unless-stopped
+ # ports:
+ # - 8182:8000
+ # environment:
+ # - GOOGLE_APPLICATION_CREDENTIALS
+ # volumes:
+ # - /etc/localtime:/etc/localtime
+ # - /var/lib/asterisk/sounds/:/sounds
+ # ## - ./speech-google/src/:/app
+ # logging:
+ # driver: json-file
+ # options:
+ # max-size: 20m
+ # max-file: "10"
+ # networks:
+ # - voice-api
tts-azure:
- build: tts-azure
+ image: ikonoim/tts-azure:0.0.1
+ build:
+ context: tts-azure
+ dockerfile: Dockerfile
+ # runtime, or dev, or test for running tests
+ target: runtime
restart: unless-stopped
- networks:
- - voice-api
ports:
- 8081:8000
+ environment:
+ - AZURE_API_TOKEN
volumes:
- - ./tts-azure/src/:/app
- - /var/lib/asterisk/sounds/:/sounds
+ - ./config.yaml:/etc/config.yaml
- /etc/localtime:/etc/localtime
- #entrypoint: watchmedo auto-restart --recursive --pattern="*.py" --directory="." python main.py
+ - /var/lib/asterisk/sounds/:/sounds
+ ## - ./tts-azure/src/:/app
+ logging:
+ driver: json-file
+ options:
+ max-size: 20m
+ max-file: "10"
+ networks:
+ - voice-api
networks:
voice-api:
- external: true
+ name: voice-api
+ # driver: overlay
+ attachable: true
+ ipam:
+ config:
+ - subnet: 172.31.128.0/24
diff --git a/speech-azure/src/__pycache__/main.cpython-38.pyc b/speech-azure/src/__pycache__/main.cpython-38.pyc
deleted file mode 100644
index 55f6aeb..0000000
Binary files a/speech-azure/src/__pycache__/main.cpython-38.pyc and /dev/null differ
diff --git a/speech-azure/src/config.yaml b/speech-azure/src/config.yaml
deleted file mode 100644
index ddccdc5..0000000
--- a/speech-azure/src/config.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-logging:
- version: 1
- formatters:
- normal:
- format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
- handlers:
- file:
- class: logging.handlers.RotatingFileHandler
- formatter: normal
- filename: publisher.log
- maxBytes: 10485760
- backupCount: 10
- console:
- class: logging.StreamHandler
- formatter: normal
- loggers:
- publisher:
- level: DEBUG
- handlers: [file, console]
- root:
- level: DEBUG
- handlers: [file, console]
diff --git a/speech-google/src/__pycache__/main.cpython-38.pyc b/speech-google/src/__pycache__/main.cpython-38.pyc
deleted file mode 100644
index a83798e..0000000
Binary files a/speech-google/src/__pycache__/main.cpython-38.pyc and /dev/null differ
diff --git a/speech-google/src/config.yaml b/speech-google/src/config.yaml
deleted file mode 100644
index ddccdc5..0000000
--- a/speech-google/src/config.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-logging:
- version: 1
- formatters:
- normal:
- format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
- handlers:
- file:
- class: logging.handlers.RotatingFileHandler
- formatter: normal
- filename: publisher.log
- maxBytes: 10485760
- backupCount: 10
- console:
- class: logging.StreamHandler
- formatter: normal
- loggers:
- publisher:
- level: DEBUG
- handlers: [file, console]
- root:
- level: DEBUG
- handlers: [file, console]
diff --git a/tts-azure/Dockerfile b/tts-azure/Dockerfile
index df9380d..f5bed29 100644
--- a/tts-azure/Dockerfile
+++ b/tts-azure/Dockerfile
@@ -1,4 +1,4 @@
-FROM tiangolo/uvicorn-gunicorn:python3.8
+FROM tiangolo/uvicorn-gunicorn:python3.8 as dev
LABEL name="TTS API"
LABEL authors="iKono Telecomunicaciones"
@@ -11,9 +11,10 @@ RUN apt-get update \
# copy the dependencies file to the working directory
COPY requirements.txt .
-
+COPY requirements-dev.txt .
# install dependencies
RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir -r requirements-dev.txt
# Add metadata to the image to describe which port the container is listening on at runtime.
EXPOSE 8000
@@ -32,4 +33,23 @@ ENTRYPOINT ["/usr/local/bin/uvicorn"]
# app: the object created inside of main.py with the line app = FastAPI().
# --reload: make the server restart after code changes. Only do this for development.
# --host 0.0.0.0: Bind socket to this host.
-CMD ["main:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
+
+
+
+FROM python:3.8-slim as runtime
+
+RUN apt-get update \
+ && apt-get install -y \
+ sox \
+ && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /code
+
+COPY ./requirements.txt /code/requirements.txt
+
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+
+COPY ./src /code/app
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/tts-azure/requirements-dev.txt b/tts-azure/requirements-dev.txt
new file mode 100644
index 0000000..523b880
--- /dev/null
+++ b/tts-azure/requirements-dev.txt
@@ -0,0 +1,2 @@
+watchdog
+argh
diff --git a/tts-azure/requirements.txt b/tts-azure/requirements.txt
index 7970d9d..ae305a7 100644
--- a/tts-azure/requirements.txt
+++ b/tts-azure/requirements.txt
@@ -1,6 +1,5 @@
-fastapi
pyyaml
+fastapi
aiohttp
sox
-watchdog
-argh
+uvicorn
\ No newline at end of file
diff --git a/tts-azure/src/__pycache__/main.cpython-38.pyc b/tts-azure/src/__pycache__/main.cpython-38.pyc
deleted file mode 100644
index c8469b0..0000000
Binary files a/tts-azure/src/__pycache__/main.cpython-38.pyc and /dev/null differ
diff --git a/tts-azure/src/config.yaml b/tts-azure/src/config.yaml
deleted file mode 100644
index ddccdc5..0000000
--- a/tts-azure/src/config.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-logging:
- version: 1
- formatters:
- normal:
- format: "[%(asctime)s] [%(levelname)s@%(name)s] %(message)s"
- handlers:
- file:
- class: logging.handlers.RotatingFileHandler
- formatter: normal
- filename: publisher.log
- maxBytes: 10485760
- backupCount: 10
- console:
- class: logging.StreamHandler
- formatter: normal
- loggers:
- publisher:
- level: DEBUG
- handlers: [file, console]
- root:
- level: DEBUG
- handlers: [file, console]
diff --git a/tts-azure/src/main.py b/tts-azure/src/main.py
index eb2fc64..eec75e9 100644
--- a/tts-azure/src/main.py
+++ b/tts-azure/src/main.py
@@ -16,7 +16,7 @@
import logging.config
-with open("config.yaml") as file_stream:
+with open("/etc/config.yaml") as file_stream:
config = yaml.full_load(file_stream)
logging.config.dictConfig(config.get("logging"))
@@ -29,27 +29,30 @@
logger.error("Please set the environment variable AZURE_API_TOKEN")
sys.exit(1)
+DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "es-MX-JorgeNeural")
+
app = FastAPI()
base_url = "https://eastus.tts.speech.microsoft.com/cognitiveservices"
api_token = os.environ.get("AZURE_API_TOKEN")
output_format = "raw-24khz-16bit-mono-pcm"
provider = "azure"
+cache_format = "sln24"
ssml_string = """
-
- {lexicon_tag}
{text}
-
"""
+loop = asyncio.get_event_loop()
+
+
@app.get("/voicelist")
async def voicelisst():
headers = {"Ocp-Apim-Subscription-Key": api_token}
@@ -60,49 +63,62 @@ async def voicelisst():
@app.post("/synthesize")
-async def synthesize(text: str,
- voice: Optional[str] = "es-MX-JorgeNeural",
- rate: Optional[str] = "0%",
- pitch: Optional[str] = "0%",
- lexicon: Optional[str] = None,
- ):
- headers = {"Ocp-Apim-Subscription-Key": api_token,
- "X-Microsoft-OutputFormat": output_format,
- "Content-Type": "application/ssml+xml"
- }
- lexicon_tag = f'' if lexicon else ""
-
- data = ssml_string.format(text=text,
- voice=voice,
- language=voice[0:5],
- rate=rate,
- pitch=pitch,
- lexicon_tag=lexicon_tag
- )
+async def synthesize(
+ text: str,
+ voice: Optional[str] = DEFAULT_LANG,
+ exten: Optional[str] = "alaw",
+ file_type: Optional[str] = "al",
+ rate: Optional[str] = 8000,
+):
+ headers = {
+ "Ocp-Apim-Subscription-Key": api_token,
+ "X-Microsoft-OutputFormat": output_format,
+ "Content-Type": "application/ssml+xml",
+ }
+
+ # SSML to send
+ data = ssml_string.format(text=text, voice=voice, language=voice[0:5])
logger.debug(data)
+
# Define file name and path
- filename = hashlib.md5(text.encode()).hexdigest()
- file_dir = f"/sounds/{provider}/{voice}"
+ md5_data = hashlib.md5(data.encode()).hexdigest()
+ d1, d2, filename = md5_data[0:2], md5_data[2:4], md5_data
+ dir_schema = f"{provider}/{voice}/{d1}/{d2}"
+ file_dir = f"/sounds/{dir_schema}"
file_path = f"{file_dir}/{filename}"
+ # We need to ensure the defined directory
ensure_dir(file_dir)
- output_filepath = f"{file_path}.sln24"
- if not os.path.exists(output_filepath) or os.path.getsize(output_filepath) <= 0:
+
+ # Store the content in plain text
+ with open(f"{file_path}.txt", "w") as content:
+ content.write(data)
+ content.close()
+
+ # This block performs the synthesize using Azure.
+ # It's done using the best quality.
+ # After that the final format is obtained using SOX
+ cache_filepath = f"{file_path}.{cache_format}"
+ if not os.path.exists(cache_filepath) or os.path.getsize(cache_filepath) <= 0:
async with aiohttp.ClientSession(headers=headers) as session:
async with session.post(f"{base_url}/v1", data=data) as result:
- with open(output_filepath, 'wb') as output_file:
+ with open(cache_filepath, "wb") as output_file:
while True:
chunk = await result.content.read(1024)
if not chunk:
break
output_file.write(chunk)
- loop = asyncio.get_event_loop()
- result = await loop.run_in_executor(None,
- partial(sox_converter, file_path, 'alaw', 'al', 8000)
- )
+ if not os.path.exists(cache_filepath) or os.path.getsize(cache_filepath) <= 0:
+ result = "azure_error"
+ else:
+ # The SOX converter is called in async mode
+ result = await loop.run_in_executor(
+ None, partial(sox_converter, file_path, exten, file_type, rate)
+ )
- return {"sound_path": f"{provider}/{voice}/{filename}"}
+ # The final result is returned
+ return {"sound_path": f"{dir_schema}/{filename}", "exten": exten, "result": result}
def ensure_dir(directory):
@@ -111,20 +127,20 @@ def ensure_dir(directory):
def sox_converter(file_path, exten, file_type, rate=16000, bits=16, channels=1):
- input_filepath = f'{file_path}.sln24'
- output_filepath = f'{file_path}.{exten}'
+ input_filepath = f"{file_path}.{cache_format}"
+ output_filepath = f"{file_path}.{exten}"
- if not os.path.exists(output_filepath) or os.path.getsize(output_filepath) <= 0:
+ if os.path.exists(output_filepath) and os.path.getsize(output_filepath) > 0:
+ return "cached"
+ else:
tfm = sox.Transformer()
tfm.silence(location=1)
tfm.silence(location=-1)
tfm.pad(0.05, 0.05)
- tfm.set_input_format(file_type='sln', rate=24000, bits=16, channels=1)
+ tfm.set_input_format(file_type="sln", rate=24000, bits=16, channels=1)
tfm.set_output_format(file_type, rate, bits, channels)
result = tfm.build_file(input_filepath=input_filepath, output_filepath=output_filepath)
- else:
- result = None
- return result
+ return "success" if result else "error"