Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Text to speech example #216

Merged
merged 4 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,6 @@ react-native-audio-api*.tgz
# Android
.kotlin


# Envs
.env
125 changes: 125 additions & 0 deletions apps/common-app/src/examples/TextToSpeech/TextToSpeech.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import React, { useState, FC } from 'react';
import { AudioBuffer, AudioContext } from 'react-native-audio-api';
import { ActivityIndicator, TextInput, StyleSheet } from 'react-native';

import { Container, Button, Spacer } from '../../components';
import Env from '../../utils/env';
import { colors } from '../../styles';

async function getOpenAIResponse(input: string, voice: string = 'alloy') {
return await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${Env.openAiToken}`,
},
body: JSON.stringify({
model: 'tts-1-hd',
voice: voice,
input: input,
response_format: 'pcm',
}),
}).then((response) => response.arrayBuffer());
}

const openAISampleRate = 24000;
const maxInputValue = 32768.0;

// TODO: this should ideally be done using native code through .decodeAudioData
function goofyResample(
audioContext: AudioContext,
input: Int16Array
): AudioBuffer {
const scale = audioContext.sampleRate / openAISampleRate;

const outputBuffer = audioContext.createBuffer(
2,
input.length * scale,
audioContext.sampleRate
);

const processingChannel: Array<number> = [];
const upSampleChannel: Array<number> = [];

for (let i = 0; i < input.length; i += 1) {
processingChannel[i] = input[i] / maxInputValue;
}

for (let i = 0; i < input.length; i += 1) {
const isLast = i === input.length - 1;
const currentSample = processingChannel[i];
const nextSample = isLast ? currentSample : processingChannel[i + 1];

upSampleChannel[2 * i] = currentSample;
upSampleChannel[2 * i + 1] = (currentSample + nextSample) / 2;
}

outputBuffer.copyToChannel(upSampleChannel, 0);
outputBuffer.copyToChannel(upSampleChannel, 1);

return outputBuffer;
}

const TextToSpeech: FC = () => {
const [isLoading, setIsLoading] = useState(false);
const [textToRead, setTextToRead] = useState('');

const onReadText = async () => {
if (isLoading) {
return;
}

const aCtx = new AudioContext();

setIsLoading(true);
const results = await getOpenAIResponse(textToRead, 'alloy');
setIsLoading(false);

const audioBuffer = goofyResample(aCtx, new Int16Array(results));
const sourceNode = aCtx.createBufferSource();
const duration = audioBuffer.duration;
const now = aCtx.currentTime;

sourceNode.buffer = audioBuffer;

sourceNode.connect(aCtx.destination);

sourceNode.start(now);
sourceNode.stop(now + duration);
};

return (
<Container style={styles.container}>
<Spacer.Vertical size={60} />
<TextInput
value={textToRead}
onChangeText={setTextToRead}
style={styles.textInput}
multiline
/>
<Spacer.Vertical size={24} />
<Button onPress={onReadText} title="Read Text" />
<Spacer.Vertical size={24} />
{isLoading && <ActivityIndicator />}
</Container>
);
};

export default TextToSpeech;

const styles = StyleSheet.create({
container: {
alignItems: 'center',
},
textInput: {
backgroundColor: 'transparent',
borderColor: colors.border,
color: colors.white,
borderWidth: 1,
fontSize: 16,
padding: 16,
width: 280,
height: 200,
borderRadius: 6,
},
});
1 change: 1 addition & 0 deletions apps/common-app/src/examples/TextToSpeech/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { default } from './TextToSpeech';
8 changes: 8 additions & 0 deletions apps/common-app/src/examples/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { StackNavigationProp } from '@react-navigation/stack';

import Piano from './Piano';
import TextToSpeech from './TextToSpeech';
import Metronome from './Metronome';
import Oscillator from './Oscillator';
import DrumMachine from './DrumMachine';
Expand All @@ -11,6 +12,7 @@ type NavigationParamList = {
Metronome: undefined;
DrumMachine: undefined;
Piano: undefined;
TextToSpeech: undefined;
AudioFile: undefined;
};

Expand All @@ -37,6 +39,12 @@ export const Examples: Example[] = [
subtitle: 'Play some notes',
screen: Piano,
},
{
key: 'TextToSpeech',
title: 'Text to Speech',
subtitle: 'type some text and hear it spoken',
screen: TextToSpeech,
},
{
key: 'Metronome',
title: 'Metronome',
Expand Down
3 changes: 3 additions & 0 deletions apps/common-app/src/utils/env.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export default {
openAiToken: process.env.OPENAI_API_TOKEN,
};
2 changes: 1 addition & 1 deletion apps/fabric-example/babel.config.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module.exports = {
presets: ['module:@react-native/babel-preset'],
plugins: ['react-native-reanimated/plugin'],
plugins: ['react-native-reanimated/plugin', 'module:react-native-dotenv'],
};
1 change: 1 addition & 0 deletions apps/fabric-example/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"eslint": "^8.19.0",
"jest": "^29.6.3",
"prettier": "2.8.8",
"react-native-dotenv": "^3.4.11",
"react-test-renderer": "18.3.1",
"typescript": "5.0.4"
},
Expand Down
19 changes: 19 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5418,6 +5418,13 @@ __metadata:
languageName: node
linkType: hard

"dotenv@npm:^16.4.5":
version: 16.4.5
resolution: "dotenv@npm:16.4.5"
checksum: 10/55a3134601115194ae0f924e54473459ed0d9fc340ae610b676e248cca45aa7c680d86365318ea964e6da4e2ea80c4514c1adab5adb43d6867fb57ff068f95c8
languageName: node
linkType: hard

"eastasianwidth@npm:^0.2.0":
version: 0.2.0
resolution: "eastasianwidth@npm:0.2.0"
Expand Down Expand Up @@ -6150,6 +6157,7 @@ __metadata:
react-dom: "npm:18.2.0"
react-native: "npm:0.76.0"
react-native-audio-api: "workspace:*"
react-native-dotenv: "npm:^3.4.11"
react-native-gesture-handler: "npm:^2.20.2"
react-native-reanimated: "npm:^3.16.1"
react-native-safe-area-context: "npm:^4.12.0"
Expand Down Expand Up @@ -10219,6 +10227,17 @@ __metadata:
languageName: node
linkType: hard

"react-native-dotenv@npm:^3.4.11":
version: 3.4.11
resolution: "react-native-dotenv@npm:3.4.11"
dependencies:
dotenv: "npm:^16.4.5"
peerDependencies:
"@babel/runtime": ^7.20.6
checksum: 10/09e8a7310fcb01ac021e71db9328e9d342d1e117bf68026b12de0392bfe17292ac6a071f03b88e7fb42c82a8f2fdf03bc520c7dedd2f80a1448cb3de5e03d4fb
languageName: node
linkType: hard

"react-native-gesture-handler@npm:^2.20.2":
version: 2.21.1
resolution: "react-native-gesture-handler@npm:2.21.1"
Expand Down
Loading