-
Notifications
You must be signed in to change notification settings - Fork 364
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #120 from MacPaw/feat/audio-speech-api-feature
Feat: Create Speech [ tts-1, tts-1-hd ]
- Loading branch information
Showing
15 changed files
with
488 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | ||
<plist version="1.0"> | ||
<dict> | ||
<key>UIFileSharingEnabled</key> | ||
<true/> | ||
</dict> | ||
</plist> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// | ||
// View+RootVC.swift | ||
// | ||
// | ||
// Created by Ihor Makhnyk on 20.11.2023. | ||
// | ||
|
||
import SwiftUI | ||
|
||
extension View { | ||
func getCurrentViewController() -> UIViewController? { | ||
guard let windowScene = UIApplication.shared.connectedScenes.first as? UIWindowScene, | ||
let rootViewController = windowScene.windows.first?.rootViewController else { return nil } | ||
return rootViewController | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// | ||
// SpeechStore.swift | ||
// | ||
// | ||
// Created by Ihor Makhnyk on 20.11.2023. | ||
// | ||
|
||
import OpenAI | ||
import SwiftUI | ||
import AVFAudio | ||
|
||
public final class SpeechStore: ObservableObject { | ||
public var openAIClient: OpenAIProtocol | ||
|
||
@Published var audioObjects: [AudioObject] = [] | ||
|
||
public init( | ||
openAIClient: OpenAIProtocol | ||
) { | ||
self.openAIClient = openAIClient | ||
} | ||
|
||
struct AudioObject: Identifiable { | ||
let id = UUID() | ||
let prompt: String | ||
let audioPlayer: AVAudioPlayer? | ||
let originResponse: AudioSpeechResult | ||
let format: String | ||
} | ||
|
||
@MainActor | ||
func createSpeech(_ query: AudioSpeechQuery) async { | ||
guard let input = query.input, !input.isEmpty else { return } | ||
do { | ||
let response = try await openAIClient.audioCreateSpeech(query: query) | ||
guard let data = response.audioData else { return } | ||
let player = try? AVAudioPlayer(data: data) | ||
let audioObject = AudioObject(prompt: input, | ||
audioPlayer: player, | ||
originResponse: response, | ||
format: query.responseFormat.rawValue) | ||
audioObjects.append(audioObject) | ||
} catch { | ||
print(error.localizedDescription) | ||
} | ||
} | ||
|
||
func getFileInDocumentsDirectory(_ data: Data, fileName: String, _ completion: @escaping (URL) -> Void) { | ||
if let fileURL = try? FileManager.default.url(for: .documentDirectory, in: .userDomainMask, appropriateFor: nil, create: true) { | ||
let saveURL = fileURL.appendingPathComponent(fileName) | ||
do { | ||
try data.write(to: saveURL) | ||
completion(saveURL) | ||
} catch { | ||
print(error.localizedDescription) | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
// | ||
// File.swift | ||
// | ||
// | ||
// Created by Ihor Makhnyk on 16.11.2023. | ||
// | ||
|
||
import SwiftUI | ||
import OpenAI | ||
import UIKit | ||
|
||
public struct TextToSpeechView: View { | ||
|
||
@ObservedObject var store: SpeechStore | ||
|
||
@State private var prompt: String = "" | ||
@State private var voice: AudioSpeechQuery.AudioSpeechVoice = .alloy | ||
@State private var speed: Double = 1 | ||
@State private var responseFormat: AudioSpeechQuery.AudioSpeechResponseFormat = .mp3 | ||
|
||
public init(store: SpeechStore) { | ||
self.store = store | ||
} | ||
|
||
public var body: some View { | ||
List { | ||
Section { | ||
HStack { | ||
VStack { | ||
Text("Prompt") | ||
Spacer() | ||
} | ||
.padding(.vertical, 8) | ||
Spacer() | ||
ZStack(alignment: .topTrailing) { | ||
TextEditor(text: $prompt) | ||
.scrollContentBackground(.hidden) | ||
.multilineTextAlignment(.trailing) | ||
if prompt.isEmpty { | ||
Text("...input") | ||
.foregroundStyle(.secondary) | ||
.multilineTextAlignment(.trailing) | ||
.allowsHitTesting(false) | ||
.padding(8) | ||
} | ||
} | ||
} | ||
HStack { | ||
Picker("Voice", selection: $voice) { | ||
let allVoices = AudioSpeechQuery.AudioSpeechVoice.allCases | ||
ForEach(allVoices, id: \.self) { voice in | ||
Text("\(voice.rawValue.capitalized)") | ||
} | ||
} | ||
} | ||
HStack { | ||
Text("Speed: ") | ||
Spacer() | ||
Stepper(value: $speed, in: 0.25...4, step: 0.25) { | ||
HStack { | ||
Spacer() | ||
Text("**\(String(format: "%.2f", speed))**") | ||
} | ||
} | ||
} | ||
HStack { | ||
Picker("Format", selection: $responseFormat) { | ||
let allFormats = AudioSpeechQuery.AudioSpeechResponseFormat.allCases | ||
ForEach(allFormats, id: \.self) { format in | ||
Text(".\(format.rawValue)") | ||
} | ||
} | ||
} | ||
} footer: { | ||
if responseFormat == .opus { | ||
Text("'.opus' is unsupported by AVFAudio player.").foregroundStyle(.secondary).font(.caption) | ||
} | ||
} | ||
Section { | ||
HStack { | ||
Button("Create Speech") { | ||
let query = AudioSpeechQuery(model: .tts_1, | ||
input: prompt, | ||
voice: voice, | ||
responseFormat: responseFormat, | ||
speed: speed) | ||
Task { | ||
await store.createSpeech(query) | ||
} | ||
prompt = "" | ||
} | ||
.foregroundColor(.accentColor) | ||
.disabled(prompt.replacingOccurrences(of: " ", with: "").isEmpty) | ||
Spacer() | ||
} | ||
} | ||
if !$store.audioObjects.wrappedValue.isEmpty { | ||
Section("Click to play, swipe to save:") { | ||
ForEach(store.audioObjects) { object in | ||
HStack { | ||
Text(object.prompt.capitalized) | ||
Spacer() | ||
Button(action: { | ||
guard let player = object.audioPlayer, | ||
object.format != AudioSpeechQuery.AudioSpeechResponseFormat.opus.rawValue else { return } | ||
|
||
if player.isPlaying { | ||
player.stop() | ||
} else { | ||
player.prepareToPlay() | ||
player.volume = 1 | ||
player.play() | ||
} | ||
}, label: { | ||
Image(systemName: "play.fill").foregroundStyle(object.format == AudioSpeechQuery.AudioSpeechResponseFormat.opus.rawValue ? Color.secondary : Color.accentColor) | ||
}) | ||
} | ||
.swipeActions(edge: .trailing, allowsFullSwipe: false) { | ||
Button { | ||
presentUserDirectoryDocumentPicker(for: object.originResponse.audioData, filename: "GeneratedAudio.\(object.format)") | ||
} label: { | ||
Image(systemName: "square.and.arrow.down") | ||
} | ||
.tint(.accentColor) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
.listStyle(.insetGrouped) | ||
.scrollDismissesKeyboard(.interactively) | ||
.navigationTitle("Create Speech") | ||
} | ||
} | ||
|
||
extension TextToSpeechView { | ||
|
||
private func presentUserDirectoryDocumentPicker(for audioData: Data?, filename: String) { | ||
guard let audioData else { return } | ||
store.getFileInDocumentsDirectory(audioData, fileName: filename) { fileUrl in | ||
let filePickerVC = UIDocumentPickerViewController(forExporting: [fileUrl], asCopy: false) | ||
filePickerVC.shouldShowFileExtensions = true | ||
|
||
guard let vc = getCurrentViewController() else { return } | ||
vc.present(filePickerVC, animated: true, completion: nil) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.