Skip to content

Commit

Permalink
[openaitts] OpenAI Text-to-Speech initial contribution (#17733)
Browse files Browse the repository at this point in the history
Also-by: Wouter Born <[email protected]>
Signed-off-by: Artur-Fedjukevits <[email protected]>
  • Loading branch information
Artur-Fedjukevits authored Dec 24, 2024
1 parent 50e3ca6 commit 7c6e658
Show file tree
Hide file tree
Showing 14 changed files with 408 additions and 0 deletions.
1 change: 1 addition & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@
/bundles/org.openhab.voice.mactts/ @kaikreuzer
/bundles/org.openhab.voice.marytts/ @kaikreuzer
/bundles/org.openhab.voice.mimictts/ @dalgwen
/bundles/org.openhab.voice.openaitts/ @Artur-Fedjukevits
/bundles/org.openhab.voice.picotts/ @FlorianSW
/bundles/org.openhab.voice.pipertts/ @GiviMAD
/bundles/org.openhab.voice.pollytts/ @openhab/add-ons-maintainers
Expand Down
5 changes: 5 additions & 0 deletions bom/openhab-addons/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2301,6 +2301,11 @@
<artifactId>org.openhab.voice.mimictts</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.openhab.addons.bundles</groupId>
<artifactId>org.openhab.voice.openaitts</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.openhab.addons.bundles</groupId>
<artifactId>org.openhab.voice.picotts</artifactId>
Expand Down
14 changes: 14 additions & 0 deletions bundles/org.openhab.voice.openaitts/NOTICE
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
This content is produced and maintained by the openHAB project.

* Project home: https://www.openhab.org

== Declared Project Licenses

This program and the accompanying materials are made available under the terms
of the Eclipse Public License 2.0 which is available at
https://www.eclipse.org/legal/epl-2.0/.

== Source Code

https://github.com/openhab/openhab-addons

23 changes: 23 additions & 0 deletions bundles/org.openhab.voice.openaitts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# OpenAI Text-to-Speech

The OpenAI TTS (Text-to-Speech) add-on for openHAB allows you to integrate OpenAI's Text-to-Speech capabilities into your openHAB system.
The advantage of this service over others is that one selected voice can speak different languages.
This is useful, for example, in conjunction with ChatGPT binding, which will help in learning foreign languages.
You can find the price for this service here - https://openai.com/api/pricing/

## Configuration

To configure the OpenAI TTS, **Settings / Other Services - OpenAI Text-to-Speech** and set:

* **apiKey** - The API key to be used for the requests.
* **apiUrl** - The server API where to reach the AI TTS service.
* **model** - The ID of the model to use for TTS.

### Default Text-to-Speech and Voice Configuration

You can setup your preferred default Text-to-Speech and default voice in the UI:

* Go to **Settings**.
* Edit **System Services - Voice**.
* Set **OpenAI TTS Service** as **Default Text-to-Speech**.
* Choose your preferred **Default Voice** for your setup.
16 changes: 16 additions & 0 deletions bundles/org.openhab.voice.openaitts/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">

<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.openhab.addons.bundles</groupId>
<artifactId>org.openhab.addons.reactor.bundles</artifactId>
<version>5.0.0-SNAPSHOT</version>
</parent>

<artifactId>org.openhab.voice.openaitts</artifactId>

<name>openHAB Add-ons :: Bundles :: Voice :: OpenAI Text-to-Speech</name>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<features name="org.openhab.voice.openaitts-${project.version}" xmlns="http://karaf.apache.org/xmlns/features/v1.4.0">
<repository>mvn:org.openhab.core.features.karaf/org.openhab.core.features.karaf.openhab-core/${ohc.version}/xml/features</repository>

<feature name="openhab-voice-openaitts" description="OpenAI Text-to-Speech" version="${project.version}">
<feature>openhab-runtime-base</feature>
<bundle start-level="80">mvn:org.openhab.addons.bundles/org.openhab.voice.openaitts/${project.version}</bundle>
</feature>
</features>
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;

import org.eclipse.jdt.annotation.NonNullByDefault;

/**
* @author Artur Fedjukevits - Initial contribution
*/
@NonNullByDefault
public class OpenAITTSConfiguration {

public String apiKey = "";
public String apiUrl = "https://api.openai.com/v1/audio/speech";
public String model = "tts-1";
public String speed = "1";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;

import org.eclipse.jdt.annotation.NonNullByDefault;

/**
* @author Artur Fedjukevits - Initial contribution
*/
@NonNullByDefault
public class OpenAITTSConstants {

public static final String TTS_SERVICE_ID = "openaitts";
public static final String TTS_SERVICE_PID = "org.openhab.voice.openaitts";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;

import static org.openhab.voice.openaitts.internal.OpenAITTSConstants.*;

import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.jetty.client.HttpClient;
import org.eclipse.jetty.client.api.ContentResponse;
import org.eclipse.jetty.client.util.StringContentProvider;
import org.eclipse.jetty.http.HttpMethod;
import org.eclipse.jetty.http.HttpStatus;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.audio.ByteArrayAudioStream;
import org.openhab.core.config.core.ConfigurableService;
import org.openhab.core.config.core.Configuration;
import org.openhab.core.io.net.http.HttpClientFactory;
import org.openhab.core.voice.AbstractCachedTTSService;
import org.openhab.core.voice.TTSCache;
import org.openhab.core.voice.TTSException;
import org.openhab.core.voice.TTSService;
import org.openhab.core.voice.Voice;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Modified;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.gson.Gson;
import com.google.gson.JsonObject;

/**
* @author Artur Fedjukevits - Initial contribution
* API documentation: https://platform.openai.com/docs/guides/text-to-speech
*/
@Component(configurationPid = TTS_SERVICE_PID, property = Constants.SERVICE_PID + "="
+ TTS_SERVICE_PID, service = TTSService.class)
@ConfigurableService(category = "voice", label = "OpenAI TTS Service", description_uri = "voice:" + TTS_SERVICE_ID)

@NonNullByDefault
public class OpenAITTSService extends AbstractCachedTTSService {

private static final int REQUEST_TIMEOUT_MS = 10_000;
private final Logger logger = LoggerFactory.getLogger(OpenAITTSService.class);
private OpenAITTSConfiguration config = new OpenAITTSConfiguration();
private final HttpClient httpClient;
private final Gson gson = new Gson();
private static final Set<Voice> VOICES = Stream.of("nova", "alloy", "echo", "fable", "onyx", "shimmer")
.map(OpenAITTSVoice::new).collect(Collectors.toSet());

@Activate
public OpenAITTSService(@Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
Map<String, Object> config) {
super(ttsCache);
this.httpClient = httpClientFactory.getCommonHttpClient();
}

@Activate
protected void activate(Map<String, Object> config) {
this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
}

@Modified
protected void modified(Map<String, Object> config) {
this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
}

@Override
public Set<AudioFormat> getSupportedFormats() {
return Set.of(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000, 44100L));
}

@Override
public String getId() {
return TTS_SERVICE_ID;
}

@Override
public String getLabel(@Nullable Locale locale) {
return "OpenAI TTS Service";
}

@Override
public Set<Voice> getAvailableVoices() {
return VOICES;
}

/**
* Synthesizes the given text to audio data using the OpenAI API
*
* @param text The text to synthesize
* @param voice The voice to use
* @param requestedFormat The requested audio format
* @return The synthesized audio data
* @throws TTSException If the synthesis fails
*/
@Override
public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
JsonObject content = new JsonObject();
content.addProperty("model", config.model);
content.addProperty("input", text);
content.addProperty("voice", voice.getLabel().toLowerCase());
content.addProperty("speed", config.speed);

String queryJson = gson.toJson(content);

try {
ContentResponse response = httpClient.newRequest(config.apiUrl).method(HttpMethod.POST)
.timeout(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
.header("Authorization", "Bearer " + config.apiKey).header("Content-Type", "application/json")
.content(new StringContentProvider(queryJson)).send();

if (response.getStatus() == HttpStatus.OK_200) {
return new ByteArrayAudioStream(response.getContent(), requestedFormat);
} else {
logger.error("Request resulted in HTTP {} with message: {}", response.getStatus(),
response.getReason());
throw new TTSException("Failed to generate audio data");
}
} catch (InterruptedException | TimeoutException | ExecutionException e) {
logger.error("Request to OpenAI failed: {}", e.getMessage(), e);
throw new TTSException("Failed to generate audio data");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;

import java.util.Locale;

import org.eclipse.jdt.annotation.NonNullByDefault;
import org.openhab.core.voice.Voice;

/**
* @author Artur Fedjukevits - Initial contribution
*/
@NonNullByDefault
public class OpenAITTSVoice implements Voice {

private final String label;

public OpenAITTSVoice(String label) {
this.label = label;
}

/**
* The unique identifier of the voice, used for internal purposes
*
* @return The unique identifier of the voice
*/
@Override
public String getUID() {
return "openaitts:" + label;
}

/**
* The voice label, used for GUI's or VUI's
*
* @return The voice label
*/
@Override
public String getLabel() {
return Character.toUpperCase(label.charAt(0)) + label.substring(1);
}

/**
* The locale of the voice
*
* @return The locale of the voice
*/
@Override
public Locale getLocale() {
return Locale.ENGLISH;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<addon:addon id="openaitts" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:addon="https://openhab.org/schemas/addon/v1.0.0"
xsi:schemaLocation="https://openhab.org/schemas/addon/v1.0.0 https://openhab.org/schemas/addon-1.0.0.xsd">

<type>voice</type>
<name>OpenAI Text-to-Speech</name>
<description>OpenAI TTS Service provides text-to-speech capabilities for openHAB.</description>
<connection>cloud</connection>

<service-id>org.openhab.voice.openaitts</service-id>

<config-description-ref uri="voice:openaitts"/>

</addon:addon>
Loading

0 comments on commit 7c6e658

Please sign in to comment.