Skip to content

Commit

Permalink
Merge pull request #120 from MacPaw/feat/audio-speech-api-feature
Browse files Browse the repository at this point in the history
Feat: Create Speech  [ tts-1, tts-1-hd ]
  • Loading branch information
ingvarus-bc authored Nov 20, 2023
2 parents ac5892f + f01e2f7 commit 0db1679
Show file tree
Hide file tree
Showing 15 changed files with 488 additions and 6 deletions.
8 changes: 8 additions & 0 deletions Demo/Demo-Info.plist
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>UIFileSharingEnabled</key>
<true/>
</dict>
</plist>
6 changes: 6 additions & 0 deletions Demo/Demo.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
/* End PBXBuildFile section */

/* Begin PBXFileReference section */
8CF490312B066F26002C202C /* Demo-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "Demo-Info.plist"; sourceTree = "<group>"; };
EFBC533C29DFB4EA00334182 /* Demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Demo.app; sourceTree = BUILT_PRODUCTS_DIR; };
EFBC533F29DFB4EA00334182 /* DemoApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DemoApp.swift; sourceTree = "<group>"; };
EFBC534329DFB4EB00334182 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
Expand Down Expand Up @@ -45,6 +46,7 @@
EFBC533329DFB4EA00334182 = {
isa = PBXGroup;
children = (
8CF490312B066F26002C202C /* Demo-Info.plist */,
EFBC535F29DFCE0700334182 /* Packages */,
EFBC533E29DFB4EA00334182 /* App */,
EFBC533D29DFB4EA00334182 /* Products */,
Expand Down Expand Up @@ -299,8 +301,10 @@
CODE_SIGN_ENTITLEMENTS = App/Demo.entitlements;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = "Demo-Info.plist";
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphoneos*]" = YES;
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphonesimulator*]" = YES;
"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphoneos*]" = YES;
Expand Down Expand Up @@ -336,8 +340,10 @@
CODE_SIGN_ENTITLEMENTS = App/Demo.entitlements;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = "";
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
INFOPLIST_FILE = "Demo-Info.plist";
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphoneos*]" = YES;
"INFOPLIST_KEY_UIApplicationSceneManifest_Generation[sdk=iphonesimulator*]" = YES;
"INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents[sdk=iphoneos*]" = YES;
Expand Down
16 changes: 16 additions & 0 deletions Demo/DemoChat/Sources/Extensions/View+RootVC.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//
// View+RootVC.swift
//
//
// Created by Ihor Makhnyk on 20.11.2023.
//

import SwiftUI

extension View {
func getCurrentViewController() -> UIViewController? {
guard let windowScene = UIApplication.shared.connectedScenes.first as? UIWindowScene,
let rootViewController = windowScene.windows.first?.rootViewController else { return nil }
return rootViewController
}
}
8 changes: 4 additions & 4 deletions Demo/DemoChat/Sources/MiscStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// Created by Aled Samuel on 22/04/2023.
//

import Foundation
import UIKit
import OpenAI

public final class MiscStore: ObservableObject {
Expand All @@ -19,7 +19,7 @@ public final class MiscStore: ObservableObject {
self.openAIClient = openAIClient
}

// MARK: Models
// MARK: - Models

@MainActor
func getModels() async {
Expand All @@ -32,11 +32,11 @@ public final class MiscStore: ObservableObject {
}
}

// MARK: Moderations
// MARK: - Moderations

@Published var moderationConversation = Conversation(id: "", messages: [])
@Published var moderationConversationError: Error?

@MainActor
func sendModerationMessage(_ message: Message) async {
moderationConversation.messages.append(message)
Expand Down
59 changes: 59 additions & 0 deletions Demo/DemoChat/Sources/SpeechStore.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//
// SpeechStore.swift
//
//
// Created by Ihor Makhnyk on 20.11.2023.
//

import OpenAI
import SwiftUI
import AVFAudio

public final class SpeechStore: ObservableObject {
public var openAIClient: OpenAIProtocol

@Published var audioObjects: [AudioObject] = []

public init(
openAIClient: OpenAIProtocol
) {
self.openAIClient = openAIClient
}

struct AudioObject: Identifiable {
let id = UUID()
let prompt: String
let audioPlayer: AVAudioPlayer?
let originResponse: AudioSpeechResult
let format: String
}

@MainActor
func createSpeech(_ query: AudioSpeechQuery) async {
guard let input = query.input, !input.isEmpty else { return }
do {
let response = try await openAIClient.audioCreateSpeech(query: query)
guard let data = response.audioData else { return }
let player = try? AVAudioPlayer(data: data)
let audioObject = AudioObject(prompt: input,
audioPlayer: player,
originResponse: response,
format: query.responseFormat.rawValue)
audioObjects.append(audioObject)
} catch {
print(error.localizedDescription)
}
}

func getFileInDocumentsDirectory(_ data: Data, fileName: String, _ completion: @escaping (URL) -> Void) {
if let fileURL = try? FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true) {
let saveURL = fileURL.appendingPathComponent(fileName)
do {
try data.write(to: saveURL)
completion(saveURL)
} catch {
print(error.localizedDescription)
}
}
}
}
3 changes: 3 additions & 0 deletions Demo/DemoChat/Sources/UI/Misc/MiscView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ public struct MiscView: View {
Section(header: Text("Moderations")) {
NavigationLink("Moderation Chat", destination: ModerationChatView(store: store))
}
Section(header: Text("Audio")) {
NavigationLink("Create Speech", destination: TextToSpeechView(store: SpeechStore(openAIClient: store.openAIClient)))
}
}
.listStyle(.insetGrouped)
.navigationTitle("Misc")
Expand Down
148 changes: 148 additions & 0 deletions Demo/DemoChat/Sources/UI/TextToSpeechView.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
//
// File.swift
//
//
// Created by Ihor Makhnyk on 16.11.2023.
//

import SwiftUI
import OpenAI
import UIKit

public struct TextToSpeechView: View {

@ObservedObject var store: SpeechStore

@State private var prompt: String = ""
@State private var voice: AudioSpeechQuery.AudioSpeechVoice = .alloy
@State private var speed: Double = 1
@State private var responseFormat: AudioSpeechQuery.AudioSpeechResponseFormat = .mp3

public init(store: SpeechStore) {
self.store = store
}

public var body: some View {
List {
Section {
HStack {
VStack {
Text("Prompt")
Spacer()
}
.padding(.vertical, 8)
Spacer()
ZStack(alignment: .topTrailing) {
TextEditor(text: $prompt)
.scrollContentBackground(.hidden)
.multilineTextAlignment(.trailing)
if prompt.isEmpty {
Text("...input")
.foregroundStyle(.secondary)
.multilineTextAlignment(.trailing)
.allowsHitTesting(false)
.padding(8)
}
}
}
HStack {
Picker("Voice", selection: $voice) {
let allVoices = AudioSpeechQuery.AudioSpeechVoice.allCases
ForEach(allVoices, id: \.self) { voice in
Text("\(voice.rawValue.capitalized)")
}
}
}
HStack {
Text("Speed: ")
Spacer()
Stepper(value: $speed, in: 0.25...4, step: 0.25) {
HStack {
Spacer()
Text("**\(String(format: "%.2f", speed))**")
}
}
}
HStack {
Picker("Format", selection: $responseFormat) {
let allFormats = AudioSpeechQuery.AudioSpeechResponseFormat.allCases
ForEach(allFormats, id: \.self) { format in
Text(".\(format.rawValue)")
}
}
}
} footer: {
if responseFormat == .opus {
Text("'.opus' is unsupported by AVFAudio player.").foregroundStyle(.secondary).font(.caption)
}
}
Section {
HStack {
Button("Create Speech") {
let query = AudioSpeechQuery(model: .tts_1,
input: prompt,
voice: voice,
responseFormat: responseFormat,
speed: speed)
Task {
await store.createSpeech(query)
}
prompt = ""
}
.foregroundColor(.accentColor)
.disabled(prompt.replacingOccurrences(of: " ", with: "").isEmpty)
Spacer()
}
}
if !$store.audioObjects.wrappedValue.isEmpty {
Section("Click to play, swipe to save:") {
ForEach(store.audioObjects) { object in
HStack {
Text(object.prompt.capitalized)
Spacer()
Button(action: {
guard let player = object.audioPlayer,
object.format != AudioSpeechQuery.AudioSpeechResponseFormat.opus.rawValue else { return }

if player.isPlaying {
player.stop()
} else {
player.prepareToPlay()
player.volume = 1
player.play()
}
}, label: {
Image(systemName: "play.fill").foregroundStyle(object.format == AudioSpeechQuery.AudioSpeechResponseFormat.opus.rawValue ? Color.secondary : Color.accentColor)
})
}
.swipeActions(edge: .trailing, allowsFullSwipe: false) {
Button {
presentUserDirectoryDocumentPicker(for: object.originResponse.audioData, filename: "GeneratedAudio.\(object.format)")
} label: {
Image(systemName: "square.and.arrow.down")
}
.tint(.accentColor)
}
}
}
}
}
.listStyle(.insetGrouped)
.scrollDismissesKeyboard(.interactively)
.navigationTitle("Create Speech")
}
}

extension TextToSpeechView {

private func presentUserDirectoryDocumentPicker(for audioData: Data?, filename: String) {
guard let audioData else { return }
store.getFileInDocumentsDirectory(audioData, fileName: filename) { fileUrl in
let filePickerVC = UIDocumentPickerViewController(forExporting: [fileUrl], asCopy: false)
filePickerVC.shouldShowFileExtensions = true

guard let vc = getCurrentViewController() else { return }
vc.present(filePickerVC, animated: true, completion: nil)
}
}
}
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ This repository contains Swift community-maintained implementation over [OpenAI]
- [Create Image Edit](#create-image-edit)
- [Create Image Variation](#create-image-variation)
- [Audio](#audio)
- [Audio Create Speech](#audio-create-speech)
- [Audio Transcriptions](#audio-transcriptions)
- [Audio Translations](#audio-translations)
- [Edits](#edits)
Expand Down Expand Up @@ -523,6 +524,48 @@ Transcribe audio into whatever language the audio is in.
Translate and transcribe the audio into english.
File uploads are currently limited to 25 MB and the following input file types are supported: mp3, mp4, mpeg, mpga, m4a, wav, and webm.

#### Audio Create Speech

This function sends an `AudioSpeechQuery` to the OpenAI API to create audio speech from text using a specific voice and format.

[Learn more about voices.](https://platform.openai.com/docs/guides/text-to-speech/voice-options)
[Learn more about models.](https://platform.openai.com/docs/models/tts)

**Request:**

```swift
public struct AudioSpeechQuery: Codable, Equatable {
//...
public let model: Model // tts-1 or tts-1-hd
public let input: String
public let voice: AudioSpeechVoice
public let response_format: AudioSpeechResponseFormat
public let speed: String? // Initializes with Double?
//...
}
```

**Response:**

```swift
/// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac`
public let audioData: Data?
```

**Example:**

```swift
let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, response_format: .mp3, speed: 1.0)

openAI.audioCreateSpeech(query: query) { result in
// Handle response here
}
//or
let result = try await openAI.audioTranscriptions(query: query)
```
[OpenAI Create Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)


#### Audio Transcriptions

Transcribes audio into the input language.
Expand Down
Loading

0 comments on commit 0db1679

Please sign in to comment.