[openaitts] OpenAI Text-to-Speech initial contribution (#17733)

Also-by: Wouter Born <[email protected]> Signed-off-by: Artur-Fedjukevits <[email protected]>
openhab · Dec 24, 2024 · 7c6e658 · 7c6e658
1 parent 50e3ca6
commit 7c6e658
Show file tree

Hide file tree

Showing 14 changed files with 408 additions and 0 deletions.
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -465,6 +465,7 @@
 /bundles/org.openhab.voice.mactts/ @kaikreuzer
 /bundles/org.openhab.voice.marytts/ @kaikreuzer
 /bundles/org.openhab.voice.mimictts/ @dalgwen
+/bundles/org.openhab.voice.openaitts/ @Artur-Fedjukevits
 /bundles/org.openhab.voice.picotts/ @FlorianSW
 /bundles/org.openhab.voice.pipertts/ @GiviMAD
 /bundles/org.openhab.voice.pollytts/ @openhab/add-ons-maintainers

diff --git a/bom/openhab-addons/pom.xml b/bom/openhab-addons/pom.xml
@@ -2301,6 +2301,11 @@
       <artifactId>org.openhab.voice.mimictts</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.openhab.addons.bundles</groupId>
+      <artifactId>org.openhab.voice.openaitts</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.openhab.addons.bundles</groupId>
       <artifactId>org.openhab.voice.picotts</artifactId>

diff --git a/bundles/org.openhab.voice.openaitts/NOTICE b/bundles/org.openhab.voice.openaitts/NOTICE
@@ -0,0 +1,14 @@
+This content is produced and maintained by the openHAB project.
+
+* Project home: https://www.openhab.org
+
+== Declared Project Licenses
+
+This program and the accompanying materials are made available under the terms
+of the Eclipse Public License 2.0 which is available at
+https://www.eclipse.org/legal/epl-2.0/.
+
+== Source Code
+
+https://github.com/openhab/openhab-addons
+
diff --git a/bundles/org.openhab.voice.openaitts/README.md b/bundles/org.openhab.voice.openaitts/README.md
@@ -0,0 +1,23 @@
+# OpenAI Text-to-Speech
+
+The OpenAI TTS (Text-to-Speech) add-on for openHAB allows you to integrate OpenAI's Text-to-Speech capabilities into your openHAB system.
+The advantage of this service over others is that one selected voice can speak different languages.
+This is useful, for example, in conjunction with ChatGPT binding, which will help in learning foreign languages.
+You can find the price for this service here - https://openai.com/api/pricing/
+
+## Configuration
+
+To configure the OpenAI TTS, **Settings / Other Services - OpenAI Text-to-Speech** and set:
+
+* **apiKey** - The API key to be used for the requests.
+* **apiUrl** - The server API where to reach the AI TTS service.
+* **model**  - The ID of the model to use for TTS.
+
+### Default Text-to-Speech and Voice Configuration
+
+You can setup your preferred default Text-to-Speech and default voice in the UI:
+
+* Go to **Settings**.
+* Edit **System Services - Voice**.
+* Set **OpenAI TTS Service** as **Default Text-to-Speech**.
+* Choose your preferred **Default Voice** for your setup.
diff --git a/bundles/org.openhab.voice.openaitts/pom.xml b/bundles/org.openhab.voice.openaitts/pom.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.openhab.addons.bundles</groupId>
+    <artifactId>org.openhab.addons.reactor.bundles</artifactId>
+    <version>5.0.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>org.openhab.voice.openaitts</artifactId>
+
+  <name>openHAB Add-ons :: Bundles :: Voice :: OpenAI Text-to-Speech</name>
+</project>
diff --git a/bundles/org.openhab.voice.openaitts/src/main/feature/feature.xml b/bundles/org.openhab.voice.openaitts/src/main/feature/feature.xml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<features name="org.openhab.voice.openaitts-${project.version}" xmlns="http://karaf.apache.org/xmlns/features/v1.4.0">
+	<repository>mvn:org.openhab.core.features.karaf/org.openhab.core.features.karaf.openhab-core/${ohc.version}/xml/features</repository>
+
+	<feature name="openhab-voice-openaitts" description="OpenAI Text-to-Speech" version="${project.version}">
+		<feature>openhab-runtime-base</feature>
+		<bundle start-level="80">mvn:org.openhab.addons.bundles/org.openhab.voice.openaitts/${project.version}</bundle>
+	</feature>
+</features>
diff --git a/....openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConfiguration.java b/....openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConfiguration.java
@@ -0,0 +1,27 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ */
+@NonNullByDefault
+public class OpenAITTSConfiguration {
+
+    public String apiKey = "";
+    public String apiUrl = "https://api.openai.com/v1/audio/speech";
+    public String model = "tts-1";
+    public String speed = "1";
+}
diff --git a/...oice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConstants.java b/...oice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSConstants.java
@@ -0,0 +1,25 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ */
+@NonNullByDefault
+public class OpenAITTSConstants {
+
+    public static final String TTS_SERVICE_ID = "openaitts";
+    public static final String TTS_SERVICE_PID = "org.openhab.voice.openaitts";
+}
diff --git a/....voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSService.java b/....voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSService.java
@@ -0,0 +1,148 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import static org.openhab.voice.openaitts.internal.OpenAITTSConstants.*;
+
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+import org.eclipse.jdt.annotation.Nullable;
+import org.eclipse.jetty.client.HttpClient;
+import org.eclipse.jetty.client.api.ContentResponse;
+import org.eclipse.jetty.client.util.StringContentProvider;
+import org.eclipse.jetty.http.HttpMethod;
+import org.eclipse.jetty.http.HttpStatus;
+import org.openhab.core.audio.AudioFormat;
+import org.openhab.core.audio.AudioStream;
+import org.openhab.core.audio.ByteArrayAudioStream;
+import org.openhab.core.config.core.ConfigurableService;
+import org.openhab.core.config.core.Configuration;
+import org.openhab.core.io.net.http.HttpClientFactory;
+import org.openhab.core.voice.AbstractCachedTTSService;
+import org.openhab.core.voice.TTSCache;
+import org.openhab.core.voice.TTSException;
+import org.openhab.core.voice.TTSService;
+import org.openhab.core.voice.Voice;
+import org.osgi.framework.Constants;
+import org.osgi.service.component.annotations.Activate;
+import org.osgi.service.component.annotations.Component;
+import org.osgi.service.component.annotations.Modified;
+import org.osgi.service.component.annotations.Reference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+import com.google.gson.JsonObject;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ *         API documentation: https://platform.openai.com/docs/guides/text-to-speech
+ */
+@Component(configurationPid = TTS_SERVICE_PID, property = Constants.SERVICE_PID + "="
+        + TTS_SERVICE_PID, service = TTSService.class)
+@ConfigurableService(category = "voice", label = "OpenAI TTS Service", description_uri = "voice:" + TTS_SERVICE_ID)
+
+@NonNullByDefault
+public class OpenAITTSService extends AbstractCachedTTSService {
+
+    private static final int REQUEST_TIMEOUT_MS = 10_000;
+    private final Logger logger = LoggerFactory.getLogger(OpenAITTSService.class);
+    private OpenAITTSConfiguration config = new OpenAITTSConfiguration();
+    private final HttpClient httpClient;
+    private final Gson gson = new Gson();
+    private static final Set<Voice> VOICES = Stream.of("nova", "alloy", "echo", "fable", "onyx", "shimmer")
+            .map(OpenAITTSVoice::new).collect(Collectors.toSet());
+
+    @Activate
+    public OpenAITTSService(@Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
+            Map<String, Object> config) {
+        super(ttsCache);
+        this.httpClient = httpClientFactory.getCommonHttpClient();
+    }
+
+    @Activate
+    protected void activate(Map<String, Object> config) {
+        this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
+    }
+
+    @Modified
+    protected void modified(Map<String, Object> config) {
+        this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
+    }
+
+    @Override
+    public Set<AudioFormat> getSupportedFormats() {
+        return Set.of(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000, 44100L));
+    }
+
+    @Override
+    public String getId() {
+        return TTS_SERVICE_ID;
+    }
+
+    @Override
+    public String getLabel(@Nullable Locale locale) {
+        return "OpenAI TTS Service";
+    }
+
+    @Override
+    public Set<Voice> getAvailableVoices() {
+        return VOICES;
+    }
+
+    /**
+     * Synthesizes the given text to audio data using the OpenAI API
+     *
+     * @param text The text to synthesize
+     * @param voice The voice to use
+     * @param requestedFormat The requested audio format
+     * @return The synthesized audio data
+     * @throws TTSException If the synthesis fails
+     */
+    @Override
+    public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
+        JsonObject content = new JsonObject();
+        content.addProperty("model", config.model);
+        content.addProperty("input", text);
+        content.addProperty("voice", voice.getLabel().toLowerCase());
+        content.addProperty("speed", config.speed);
+
+        String queryJson = gson.toJson(content);
+
+        try {
+            ContentResponse response = httpClient.newRequest(config.apiUrl).method(HttpMethod.POST)
+                    .timeout(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
+                    .header("Authorization", "Bearer " + config.apiKey).header("Content-Type", "application/json")
+                    .content(new StringContentProvider(queryJson)).send();
+
+            if (response.getStatus() == HttpStatus.OK_200) {
+                return new ByteArrayAudioStream(response.getContent(), requestedFormat);
+            } else {
+                logger.error("Request resulted in HTTP {} with message: {}", response.getStatus(),
+                        response.getReason());
+                throw new TTSException("Failed to generate audio data");
+            }
+        } catch (InterruptedException | TimeoutException | ExecutionException e) {
+            logger.error("Request to OpenAI failed: {}", e.getMessage(), e);
+            throw new TTSException("Failed to generate audio data");
+        }
+    }
+}
diff --git a/...ab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSVoice.java b/...ab.voice.openaitts/src/main/java/org/openhab/voice/openaitts/internal/OpenAITTSVoice.java
@@ -0,0 +1,61 @@
+/**
+ * Copyright (c) 2010-2024 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.openaitts.internal;
+
+import java.util.Locale;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+import org.openhab.core.voice.Voice;
+
+/**
+ * @author Artur Fedjukevits - Initial contribution
+ */
+@NonNullByDefault
+public class OpenAITTSVoice implements Voice {
+
+    private final String label;
+
+    public OpenAITTSVoice(String label) {
+        this.label = label;
+    }
+
+    /**
+     * The unique identifier of the voice, used for internal purposes
+     *
+     * @return The unique identifier of the voice
+     */
+    @Override
+    public String getUID() {
+        return "openaitts:" + label;
+    }
+
+    /**
+     * The voice label, used for GUI's or VUI's
+     *
+     * @return The voice label
+     */
+    @Override
+    public String getLabel() {
+        return Character.toUpperCase(label.charAt(0)) + label.substring(1);
+    }
+
+    /**
+     * The locale of the voice
+     *
+     * @return The locale of the voice
+     */
+    @Override
+    public Locale getLocale() {
+        return Locale.ENGLISH;
+    }
+}
diff --git a/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/addon/addon.xml b/bundles/org.openhab.voice.openaitts/src/main/resources/OH-INF/addon/addon.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<addon:addon id="openaitts" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xmlns:addon="https://openhab.org/schemas/addon/v1.0.0"
+	xsi:schemaLocation="https://openhab.org/schemas/addon/v1.0.0 https://openhab.org/schemas/addon-1.0.0.xsd">
+
+	<type>voice</type>
+	<name>OpenAI Text-to-Speech</name>
+	<description>OpenAI TTS Service provides text-to-speech capabilities for openHAB.</description>
+	<connection>cloud</connection>
+
+	<service-id>org.openhab.voice.openaitts</service-id>
+
+	<config-description-ref uri="voice:openaitts"/>
+
+</addon:addon>