diff --git a/Demo/Demo-Info.plist b/Demo/Demo-Info.plist
new file mode 100644
index 00000000..ff579a6c
--- /dev/null
+++ b/Demo/Demo-Info.plist
@@ -0,0 +1,8 @@
+
+
+
+
+ UIFileSharingEnabled
+
+
+
diff --git a/Demo/Demo.xcodeproj/project.pbxproj b/Demo/Demo.xcodeproj/project.pbxproj
index 60e31dba..edde7d8d 100644
--- a/Demo/Demo.xcodeproj/project.pbxproj
+++ b/Demo/Demo.xcodeproj/project.pbxproj
@@ -18,6 +18,7 @@
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
+ 8CF490312B066F26002C202C /* Demo-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "Demo-Info.plist"; sourceTree = ""; };
EFBC533C29DFB4EA00334182 /* Demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Demo.app; sourceTree = BUILT_PRODUCTS_DIR; };
EFBC533F29DFB4EA00334182 /* DemoApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DemoApp.swift; sourceTree = ""; };
EFBC534329DFB4EB00334182 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; };
@@ -45,6 +46,7 @@
EFBC533329DFB4EA00334182 = {
isa = PBXGroup;
children = (
+ 8CF490312B066F26002C202C /* Demo-Info.plist */,
EFBC535F29DFCE0700334182 /* Packages */,
EFBC533E29DFB4EA00334182 /* App */,
EFBC533D29DFB4EA00334182 /* Products */,
@@ -299,8 +301,10 @@
CODE_SIGN_ENTITLEMENTS = App/Demo.entitlements;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
+ DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
+ INFOPLIST_FILE = "Demo-Info.plist";
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphoneos*]" = YES;
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphonesimulator*]" = YES;
"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphoneos*]" = YES;
@@ -336,8 +340,10 @@
CODE_SIGN_ENTITLEMENTS = App/Demo.entitlements;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
+ DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
+ INFOPLIST_FILE = "Demo-Info.plist";
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphoneos*]" = YES;
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphonesimulator*]" = YES;
"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphoneos*]" = YES;
diff --git a/Demo/DemoChat/Sources/Extensions/View+RootVC.swift b/Demo/DemoChat/Sources/Extensions/View+RootVC.swift
new file mode 100644
index 00000000..02de7f9e
--- /dev/null
+++ b/Demo/DemoChat/Sources/Extensions/View+RootVC.swift
@@ -0,0 +1,16 @@
+//
+// View+RootVC.swift
+//
+//
+// Created by Ihor Makhnyk on 20.11.2023.
+//
+
+import SwiftUI
+
+extension View {
+ func getCurrentViewController() -> UIViewController? {
+ guard let windowScene = UIApplication.shared.connectedScenes.first as? UIWindowScene,
+ let rootViewController = windowScene.windows.first?.rootViewController else { return nil }
+ return rootViewController
+ }
+}
diff --git a/Demo/DemoChat/Sources/MiscStore.swift b/Demo/DemoChat/Sources/MiscStore.swift
index e2e732e5..8665974b 100644
--- a/Demo/DemoChat/Sources/MiscStore.swift
+++ b/Demo/DemoChat/Sources/MiscStore.swift
@@ -5,7 +5,7 @@
// Created by Aled Samuel on 22/04/2023.
//
-import Foundation
+import UIKit
import OpenAI
public final class MiscStore: ObservableObject {
@@ -19,7 +19,7 @@ public final class MiscStore: ObservableObject {
self.openAIClient = openAIClient
}
- // MARK: Models
+ // MARK: - Models
@MainActor
func getModels() async {
@@ -32,11 +32,11 @@ public final class MiscStore: ObservableObject {
}
}
- // MARK: Moderations
+ // MARK: - Moderations
@Published var moderationConversation = Conversation(id: "", messages: [])
@Published var moderationConversationError: Error?
-
+
@MainActor
func sendModerationMessage(_ message: Message) async {
moderationConversation.messages.append(message)
diff --git a/Demo/DemoChat/Sources/SpeechStore.swift b/Demo/DemoChat/Sources/SpeechStore.swift
new file mode 100644
index 00000000..516d5cb8
--- /dev/null
+++ b/Demo/DemoChat/Sources/SpeechStore.swift
@@ -0,0 +1,59 @@
+//
+// SpeechStore.swift
+//
+//
+// Created by Ihor Makhnyk on 20.11.2023.
+//
+
+import OpenAI
+import SwiftUI
+import AVFAudio
+
+public final class SpeechStore: ObservableObject {
+ public var openAIClient: OpenAIProtocol
+
+ @Published var audioObjects: [AudioObject] = []
+
+ public init(
+ openAIClient: OpenAIProtocol
+ ) {
+ self.openAIClient = openAIClient
+ }
+
+ struct AudioObject: Identifiable {
+ let id = UUID()
+ let prompt: String
+ let audioPlayer: AVAudioPlayer?
+ let originResponse: AudioSpeechResult
+ let format: String
+ }
+
+ @MainActor
+ func createSpeech(_ query: AudioSpeechQuery) async {
+ guard let input = query.input, !input.isEmpty else { return }
+ do {
+ let response = try await openAIClient.audioCreateSpeech(query: query)
+ guard let data = response.audioData else { return }
+ let player = try? AVAudioPlayer(data: data)
+ let audioObject = AudioObject(prompt: input,
+ audioPlayer: player,
+ originResponse: response,
+ format: query.responseFormat.rawValue)
+ audioObjects.append(audioObject)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+
+ func getFileInDocumentsDirectory(_ data: Data, fileName: String, _ completion: @escaping (URL) -> Void) {
+ if let fileURL = try? FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true) {
+ let saveURL = fileURL.appendingPathComponent(fileName)
+ do {
+ try data.write(to: saveURL)
+ completion(saveURL)
+ } catch {
+ print(error.localizedDescription)
+ }
+ }
+ }
+}
diff --git a/Demo/DemoChat/Sources/UI/Misc/MiscView.swift b/Demo/DemoChat/Sources/UI/Misc/MiscView.swift
index cc69b4ac..b8572400 100644
--- a/Demo/DemoChat/Sources/UI/Misc/MiscView.swift
+++ b/Demo/DemoChat/Sources/UI/Misc/MiscView.swift
@@ -24,6 +24,9 @@ public struct MiscView: View {
Section(header: Text("Moderations")) {
NavigationLink("Moderation Chat", destination: ModerationChatView(store: store))
}
+ Section(header: Text("Audio")) {
+ NavigationLink("Create Speech", destination: TextToSpeechView(store: SpeechStore(openAIClient: store.openAIClient)))
+ }
}
.listStyle(.insetGrouped)
.navigationTitle("Misc")
diff --git a/Demo/DemoChat/Sources/UI/TextToSpeechView.swift b/Demo/DemoChat/Sources/UI/TextToSpeechView.swift
new file mode 100644
index 00000000..459a4423
--- /dev/null
+++ b/Demo/DemoChat/Sources/UI/TextToSpeechView.swift
@@ -0,0 +1,148 @@
+//
+// File.swift
+//
+//
+// Created by Ihor Makhnyk on 16.11.2023.
+//
+
+import SwiftUI
+import OpenAI
+import UIKit
+
+public struct TextToSpeechView: View {
+
+ @ObservedObject var store: SpeechStore
+
+ @State private var prompt: String = ""
+ @State private var voice: AudioSpeechQuery.AudioSpeechVoice = .alloy
+ @State private var speed: Double = 1
+ @State private var responseFormat: AudioSpeechQuery.AudioSpeechResponseFormat = .mp3
+
+ public init(store: SpeechStore) {
+ self.store = store
+ }
+
+ public var body: some View {
+ List {
+ Section {
+ HStack {
+ VStack {
+ Text("Prompt")
+ Spacer()
+ }
+ .padding(.vertical, 8)
+ Spacer()
+ ZStack(alignment: .topTrailing) {
+ TextEditor(text: $prompt)
+ .scrollContentBackground(.hidden)
+ .multilineTextAlignment(.trailing)
+ if prompt.isEmpty {
+ Text("...input")
+ .foregroundStyle(.secondary)
+ .multilineTextAlignment(.trailing)
+ .allowsHitTesting(false)
+ .padding(8)
+ }
+ }
+ }
+ HStack {
+ Picker("Voice", selection: $voice) {
+ let allVoices = AudioSpeechQuery.AudioSpeechVoice.allCases
+ ForEach(allVoices, id: \.self) { voice in
+ Text("\(voice.rawValue.capitalized)")
+ }
+ }
+ }
+ HStack {
+ Text("Speed: ")
+ Spacer()
+ Stepper(value: $speed, in: 0.25...4, step: 0.25) {
+ HStack {
+ Spacer()
+ Text("**\(String(format: "%.2f", speed))**")
+ }
+ }
+ }
+ HStack {
+ Picker("Format", selection: $responseFormat) {
+ let allFormats = AudioSpeechQuery.AudioSpeechResponseFormat.allCases
+ ForEach(allFormats, id: \.self) { format in
+ Text(".\(format.rawValue)")
+ }
+ }
+ }
+ } footer: {
+ if responseFormat == .opus {
+ Text("'.opus' is unsupported by AVFAudio player.").foregroundStyle(.secondary).font(.caption)
+ }
+ }
+ Section {
+ HStack {
+ Button("Create Speech") {
+ let query = AudioSpeechQuery(model: .tts_1,
+ input: prompt,
+ voice: voice,
+ responseFormat: responseFormat,
+ speed: speed)
+ Task {
+ await store.createSpeech(query)
+ }
+ prompt = ""
+ }
+ .foregroundColor(.accentColor)
+ .disabled(prompt.replacingOccurrences(of: " ", with: "").isEmpty)
+ Spacer()
+ }
+ }
+ if !$store.audioObjects.wrappedValue.isEmpty {
+ Section("Click to play, swipe to save:") {
+ ForEach(store.audioObjects) { object in
+ HStack {
+ Text(object.prompt.capitalized)
+ Spacer()
+ Button(action: {
+ guard let player = object.audioPlayer,
+ object.format != AudioSpeechQuery.AudioSpeechResponseFormat.opus.rawValue else { return }
+
+ if player.isPlaying {
+ player.stop()
+ } else {
+ player.prepareToPlay()
+ player.volume = 1
+ player.play()
+ }
+ }, label: {
+ Image(systemName: "play.fill").foregroundStyle(object.format == AudioSpeechQuery.AudioSpeechResponseFormat.opus.rawValue ? Color.secondary : Color.accentColor)
+ })
+ }
+ .swipeActions(edge: .trailing, allowsFullSwipe: false) {
+ Button {
+ presentUserDirectoryDocumentPicker(for: object.originResponse.audioData, filename: "GeneratedAudio.\(object.format)")
+ } label: {
+ Image(systemName: "square.and.arrow.down")
+ }
+ .tint(.accentColor)
+ }
+ }
+ }
+ }
+ }
+ .listStyle(.insetGrouped)
+ .scrollDismissesKeyboard(.interactively)
+ .navigationTitle("Create Speech")
+ }
+}
+
+extension TextToSpeechView {
+
+ private func presentUserDirectoryDocumentPicker(for audioData: Data?, filename: String) {
+ guard let audioData else { return }
+ store.getFileInDocumentsDirectory(audioData, fileName: filename) { fileUrl in
+ let filePickerVC = UIDocumentPickerViewController(forExporting: [fileUrl], asCopy: false)
+ filePickerVC.shouldShowFileExtensions = true
+
+ guard let vc = getCurrentViewController() else { return }
+ vc.present(filePickerVC, animated: true, completion: nil)
+ }
+ }
+}
diff --git a/README.md b/README.md
index f8b1eef5..9d42bb4b 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,7 @@ This repository contains Swift community-maintained implementation over [OpenAI]
- [Create Image Edit](#create-image-edit)
- [Create Image Variation](#create-image-variation)
- [Audio](#audio)
+ - [Audio Create Speech](#audio-create-speech)
- [Audio Transcriptions](#audio-transcriptions)
- [Audio Translations](#audio-translations)
- [Edits](#edits)
@@ -523,6 +524,48 @@ Transcribe audio into whatever language the audio is in.
Translate and transcribe the audio into english.
File uploads are currently limited to 25 MB and the following input file types are supported: mp3, mp4, mpeg, mpga, m4a, wav, and webm.
+#### Audio Create Speech
+
+This function sends an `AudioSpeechQuery` to the OpenAI API to create audio speech from text using a specific voice and format.
+
+[Learn more about voices.](https://platform.openai.com/docs/guides/text-to-speech/voice-options)
+[Learn more about models.](https://platform.openai.com/docs/models/tts)
+
+**Request:**
+
+```swift
+public struct AudioSpeechQuery: Codable, Equatable {
+ //...
+ public let model: Model // tts-1 or tts-1-hd
+ public let input: String
+ public let voice: AudioSpeechVoice
+ public let response_format: AudioSpeechResponseFormat
+ public let speed: String? // Initializes with Double?
+ //...
+}
+```
+
+**Response:**
+
+```swift
+/// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac`
+public let audioData: Data?
+```
+
+**Example:**
+
+```swift
+let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, response_format: .mp3, speed: 1.0)
+
+openAI.audioCreateSpeech(query: query) { result in
+ // Handle response here
+}
+//or
+let result = try await openAI.audioTranscriptions(query: query)
+```
+[OpenAI Create Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)
+
+
#### Audio Transcriptions
Transcribes audio into the input language.
diff --git a/Sources/OpenAI/OpenAI.swift b/Sources/OpenAI/OpenAI.swift
index 720f0f23..3dcad3c9 100644
--- a/Sources/OpenAI/OpenAI.swift
+++ b/Sources/OpenAI/OpenAI.swift
@@ -111,13 +111,20 @@ final public class OpenAI: OpenAIProtocol {
public func audioTranslations(query: AudioTranslationQuery, completion: @escaping (Result) -> Void) {
performRequest(request: MultipartFormDataRequest(body: query, url: buildURL(path: .audioTranslations)), completion: completion)
}
+
+ public func audioCreateSpeech(query: AudioSpeechQuery, completion: @escaping (Result) -> Void) {
+ performSpeechRequest(request: JSONRequest(body: query, url: buildURL(path: .audioSpeech)), completion: completion)
+ }
+
}
extension OpenAI {
func performRequest(request: any URLRequestBuildable, completion: @escaping (Result) -> Void) {
do {
- let request = try request.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval)
+ let request = try request.build(token: configuration.token,
+ organizationIdentifier: configuration.organizationIdentifier,
+ timeoutInterval: configuration.timeoutInterval)
let task = session.dataTask(with: request) { data, _, error in
if let error = error {
completion(.failure(error))
@@ -153,7 +160,9 @@ extension OpenAI {
func performSteamingRequest(request: any URLRequestBuildable, onResult: @escaping (Result) -> Void, completion: ((Error?) -> Void)?) {
do {
- let request = try request.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval)
+ let request = try request.build(token: configuration.token,
+ organizationIdentifier: configuration.organizationIdentifier,
+ timeoutInterval: configuration.timeoutInterval)
let session = StreamingSession(urlRequest: request)
session.onReceiveContent = {_, object in
onResult(.success(object))
@@ -171,6 +180,40 @@ extension OpenAI {
completion?(error)
}
}
+
+ func performSpeechRequest(request: any URLRequestBuildable, completion: @escaping (Result) -> Void) {
+ do {
+ let request = try request.build(token: configuration.token,
+ organizationIdentifier: configuration.organizationIdentifier,
+ timeoutInterval: configuration.timeoutInterval)
+
+ let task = session.dataTask(with: request) { data, _, error in
+ if let error = error {
+ completion(.failure(error))
+ return
+ }
+ guard let data = data else {
+ completion(.failure(OpenAIError.emptyData))
+ return
+ }
+
+ completion(.success(AudioSpeechResult(audioData: data)))
+ let apiError: Error? = nil
+
+ if let apiError = apiError {
+ do {
+ let decoded = try JSONDecoder().decode(APIErrorResponse.self, from: data)
+ completion(.failure(decoded))
+ } catch {
+ completion(.failure(apiError))
+ }
+ }
+ }
+ task.resume()
+ } catch {
+ completion(.failure(error))
+ }
+ }
}
extension OpenAI {
@@ -194,6 +237,7 @@ extension APIPath {
static let models = "/v1/models"
static let moderations = "/v1/moderations"
+ static let audioSpeech = "/v1/audio/speech"
static let audioTranscriptions = "/v1/audio/transcriptions"
static let audioTranslations = "/v1/audio/translations"
diff --git a/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift b/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift
new file mode 100644
index 00000000..36db44a5
--- /dev/null
+++ b/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift
@@ -0,0 +1,93 @@
+//
+// AudioSpeechQuery.swift
+//
+//
+// Created by Ihor Makhnyk on 13.11.2023.
+//
+
+import Foundation
+
+/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)
+public struct AudioSpeechQuery: Codable, Equatable {
+
+ /// Encapsulates the voices available for audio generation.
+ ///
+ /// To get aquinted with each of the voices and listen to the samples visit:
+ /// [OpenAI Text-to-Speech – Voice Options](https://platform.openai.com/docs/guides/text-to-speech/voice-options)
+ public enum AudioSpeechVoice: String, Codable, CaseIterable {
+ case alloy
+ case echo
+ case fable
+ case onyx
+ case nova
+ case shimmer
+ }
+
+ /// Encapsulates the response formats available for audio data.
+ ///
+ /// **Formats:**
+ /// - mp3
+ /// - opus
+ /// - aac
+ /// - flac
+ public enum AudioSpeechResponseFormat: String, Codable, CaseIterable {
+ case mp3
+ case opus
+ case aac
+ case flac
+ }
+ /// One of the available TTS models: tts-1 or tts-1-hd
+ public let model: Model
+ /// The text to generate audio for. The maximum length is 4096 characters.
+ public let input: String?
+ /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
+ public let voice: AudioSpeechVoice
+ /// The format to audio in. Supported formats are mp3, opus, aac, and flac.
+ public let responseFormat: AudioSpeechResponseFormat
+ /// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0**
+ public let speed: String?
+
+ public enum CodingKeys: String, CodingKey {
+ case model
+ case input
+ case voice
+ case responseFormat = "response_format"
+ case speed
+ }
+
+ private enum Constants {
+ static let normalSpeed = 1.0
+ static let maxSpeed = 4.0
+ static let minSpeed = 0.25
+ }
+
+ public init(model: Model, input: String, voice: AudioSpeechVoice, responseFormat: AudioSpeechResponseFormat = .mp3, speed: Double?) {
+ self.model = AudioSpeechQuery.validateSpeechModel(model)
+ self.speed = AudioSpeechQuery.normalizeSpeechSpeed(speed)
+ self.input = input
+ self.voice = voice
+ self.responseFormat = responseFormat
+ }
+}
+
+private extension AudioSpeechQuery {
+
+ static func validateSpeechModel(_ inputModel: Model) -> Model {
+ let isModelOfIncorrentFormat = inputModel != .tts_1 && inputModel != .tts_1_hd
+ guard !isModelOfIncorrentFormat else {
+ print("[AudioSpeech] 'AudioSpeechQuery' must have a valid Text-To-Speech model, 'tts-1' or 'tts-1-hd'. Setting model to 'tts-1'.")
+ return .tts_1
+ }
+ return inputModel
+ }
+
+ static func normalizeSpeechSpeed(_ inputSpeed: Double?) -> String {
+ guard let inputSpeed else { return "\(Constants.normalSpeed)" }
+ let isSpeedOutOfBounds = inputSpeed >= Constants.maxSpeed && inputSpeed <= Constants.minSpeed
+ guard !isSpeedOutOfBounds else {
+ print("[AudioSpeech] Speed value must be between 0.25 and 4.0. Setting value to closest valid.")
+ return inputSpeed < Constants.minSpeed ? "\(Constants.minSpeed)" : "\(Constants.maxSpeed)"
+ }
+ return "\(inputSpeed)"
+ }
+}
diff --git a/Sources/OpenAI/Public/Models/AudioSpeechResult.swift b/Sources/OpenAI/Public/Models/AudioSpeechResult.swift
new file mode 100644
index 00000000..4d8e62fb
--- /dev/null
+++ b/Sources/OpenAI/Public/Models/AudioSpeechResult.swift
@@ -0,0 +1,14 @@
+//
+// AudioSpeechResult.swift
+//
+//
+// Created by Ihor Makhnyk on 13.11.2023.
+//
+
+import Foundation
+
+public struct AudioSpeechResult {
+
+ /// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac`
+ public let audioData: Data?
+}
diff --git a/Sources/OpenAI/Public/Models/Models/Models.swift b/Sources/OpenAI/Public/Models/Models/Models.swift
index 03b86f31..2e9e3099 100644
--- a/Sources/OpenAI/Public/Models/Models/Models.swift
+++ b/Sources/OpenAI/Public/Models/Models/Models.swift
@@ -69,6 +69,13 @@ public extension Model {
static let textDavinci_001 = "text-davinci-001"
static let codeDavinciEdit_001 = "code-davinci-edit-001"
+ // Speech
+
+ /// The latest text to speech model, optimized for speed.
+ static let tts_1 = "tts-1"
+ /// The latest text to speech model, optimized for quality.
+ static let tts_1_hd = "tts-1-hd"
+
// Transcriptions / Translations
static let whisper_1 = "whisper-1"
diff --git a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift
index 909704ca..b515a234 100644
--- a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift
+++ b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift
@@ -184,6 +184,21 @@ public extension OpenAIProtocol {
}
}
+ func audioCreateSpeech(
+ query: AudioSpeechQuery
+ ) async throws -> AudioSpeechResult {
+ try await withCheckedThrowingContinuation { continuation in
+ audioCreateSpeech(query: query) { result in
+ switch result {
+ case let .success(success):
+ return continuation.resume(returning: success)
+ case let .failure(failure):
+ return continuation.resume(throwing: failure)
+ }
+ }
+ }
+ }
+
func audioTranscriptions(
query: AudioTranscriptionQuery
) async throws -> AudioTranscriptionResult {
diff --git a/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift b/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift
index 6519e8fe..caf97090 100644
--- a/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift
+++ b/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift
@@ -213,6 +213,23 @@ public protocol OpenAIProtocol {
**/
func moderations(query: ModerationsQuery, completion: @escaping (Result) -> Void)
+ /**
+ This function sends an `AudioSpeechQuery` to the OpenAI API to create audio speech from text using a specific voice and format.
+
+ Example:
+ ```
+ let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, response_format: .mp3, speed: 1.0)
+ openAI.audioCreateSpeech(query: query) { result in
+ // Handle response here
+ }
+ ```
+
+ - Parameters:
+ - query: An `AudioSpeechQuery` object containing the parameters for the API request. This includes the Text-to-Speech model to be used, input text, voice to be used for generating the audio, the desired audio format, and the speed of the generated audio.
+ - completion: A closure which receives the result. The closure's parameter, `Result`, will either contain the `AudioSpeechResult` object with the audio data or an error if the request failed.
+ */
+ func audioCreateSpeech(query: AudioSpeechQuery, completion: @escaping (Result) -> Void)
+
/**
Transcribes audio data using OpenAI's audio transcription API and completes the operation asynchronously.
diff --git a/Tests/OpenAITests/OpenAITests.swift b/Tests/OpenAITests/OpenAITests.swift
index f195317e..a66e9a45 100644
--- a/Tests/OpenAITests/OpenAITests.swift
+++ b/Tests/OpenAITests/OpenAITests.swift
@@ -258,6 +258,15 @@ class OpenAITests: XCTestCase {
XCTAssertEqual(inError, apiError)
}
+ func testAudioSpeechError() async throws {
+ let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: 1.0)
+ let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100")
+ self.stub(error: inError)
+
+ let apiError: APIError = try await XCTExpectError { try await openAI.audioCreateSpeech(query: query) }
+ XCTAssertEqual(inError, apiError)
+ }
+
func testAudioTranscriptions() async throws {
let data = Data()
let query = AudioTranscriptionQuery(file: data, fileName: "audio.m4a", model: .whisper_1)