diff --git a/.gitignore b/.gitignore index 94a48503..4bf942bb 100644 --- a/.gitignore +++ b/.gitignore @@ -83,4 +83,6 @@ react-native-audio-api*.tgz # Android .kotlin + +# Envs .env diff --git a/apps/common-app/src/examples/TextToSpeech/TextToSpeech.tsx b/apps/common-app/src/examples/TextToSpeech/TextToSpeech.tsx new file mode 100644 index 00000000..7dd96d1a --- /dev/null +++ b/apps/common-app/src/examples/TextToSpeech/TextToSpeech.tsx @@ -0,0 +1,125 @@ +import React, { useState, FC } from 'react'; +import { AudioBuffer, AudioContext } from 'react-native-audio-api'; +import { ActivityIndicator, TextInput, StyleSheet } from 'react-native'; + +import { Container, Button, Spacer } from '../../components'; +import Env from '../../utils/env'; +import { colors } from '../../styles'; + +async function getOpenAIResponse(input: string, voice: string = 'alloy') { + return await fetch('https://api.openai.com/v1/audio/speech', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${Env.openAiToken}`, + }, + body: JSON.stringify({ + model: 'tts-1-hd', + voice: voice, + input: input, + response_format: 'pcm', + }), + }).then((response) => response.arrayBuffer()); +} + +const openAISampleRate = 24000; +const maxInputValue = 32768.0; + +// TODO: this should ideally be done using native code through .decodeAudioData +function goofyResample( + audioContext: AudioContext, + input: Int16Array +): AudioBuffer { + const scale = audioContext.sampleRate / openAISampleRate; + + const outputBuffer = audioContext.createBuffer( + 2, + input.length * scale, + audioContext.sampleRate + ); + + const processingChannel: Array = []; + const upSampleChannel: Array = []; + + for (let i = 0; i < input.length; i += 1) { + processingChannel[i] = input[i] / maxInputValue; + } + + for (let i = 0; i < input.length; i += 1) { + const isLast = i === input.length - 1; + const currentSample = processingChannel[i]; + const nextSample = isLast ? currentSample : processingChannel[i + 1]; + + upSampleChannel[2 * i] = currentSample; + upSampleChannel[2 * i + 1] = (currentSample + nextSample) / 2; + } + + outputBuffer.copyToChannel(upSampleChannel, 0); + outputBuffer.copyToChannel(upSampleChannel, 1); + + return outputBuffer; +} + +const TextToSpeech: FC = () => { + const [isLoading, setIsLoading] = useState(false); + const [textToRead, setTextToRead] = useState(''); + + const onReadText = async () => { + if (isLoading) { + return; + } + + const aCtx = new AudioContext(); + + setIsLoading(true); + const results = await getOpenAIResponse(textToRead, 'alloy'); + setIsLoading(false); + + const audioBuffer = goofyResample(aCtx, new Int16Array(results)); + const sourceNode = aCtx.createBufferSource(); + const duration = audioBuffer.duration; + const now = aCtx.currentTime; + + sourceNode.buffer = audioBuffer; + + sourceNode.connect(aCtx.destination); + + sourceNode.start(now); + sourceNode.stop(now + duration); + }; + + return ( + + + + +