Skip to content

Commit

Permalink
Text to speech example (#216)
Browse files Browse the repository at this point in the history
* feat: working on open ai example

* feat: cleanup and renaming
  • Loading branch information
michalsek authored Dec 5, 2024
1 parent bdad8eb commit d1038f3
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,6 @@ react-native-audio-api*.tgz
# Android
.kotlin


# Envs
.env
125 changes: 125 additions & 0 deletions apps/common-app/src/examples/TextToSpeech/TextToSpeech.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import React, { useState, FC } from 'react';
import { AudioBuffer, AudioContext } from 'react-native-audio-api';
import { ActivityIndicator, TextInput, StyleSheet } from 'react-native';

import { Container, Button, Spacer } from '../../components';
import Env from '../../utils/env';
import { colors } from '../../styles';

async function getOpenAIResponse(input: string, voice: string = 'alloy') {
return await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${Env.openAiToken}`,
},
body: JSON.stringify({
model: 'tts-1-hd',
voice: voice,
input: input,
response_format: 'pcm',
}),
}).then((response) => response.arrayBuffer());
}

const openAISampleRate = 24000;
const maxInputValue = 32768.0;

// TODO: this should ideally be done using native code through .decodeAudioData
function goofyResample(
audioContext: AudioContext,
input: Int16Array
): AudioBuffer {
const scale = audioContext.sampleRate / openAISampleRate;

const outputBuffer = audioContext.createBuffer(
2,
input.length * scale,
audioContext.sampleRate
);

const processingChannel: Array<number> = [];
const upSampleChannel: Array<number> = [];

for (let i = 0; i < input.length; i += 1) {
processingChannel[i] = input[i] / maxInputValue;
}

for (let i = 0; i < input.length; i += 1) {
const isLast = i === input.length - 1;
const currentSample = processingChannel[i];
const nextSample = isLast ? currentSample : processingChannel[i + 1];

upSampleChannel[2 * i] = currentSample;
upSampleChannel[2 * i + 1] = (currentSample + nextSample) / 2;
}

outputBuffer.copyToChannel(upSampleChannel, 0);
outputBuffer.copyToChannel(upSampleChannel, 1);

return outputBuffer;
}

const TextToSpeech: FC = () => {
const [isLoading, setIsLoading] = useState(false);
const [textToRead, setTextToRead] = useState('');

const onReadText = async () => {
if (isLoading) {
return;
}

const aCtx = new AudioContext();

setIsLoading(true);
const results = await getOpenAIResponse(textToRead, 'alloy');
setIsLoading(false);

const audioBuffer = goofyResample(aCtx, new Int16Array(results));
const sourceNode = aCtx.createBufferSource();
const duration = audioBuffer.duration;
const now = aCtx.currentTime;

sourceNode.buffer = audioBuffer;

sourceNode.connect(aCtx.destination);

sourceNode.start(now);
sourceNode.stop(now + duration);
};

return (
<Container style={styles.container}>
<Spacer.Vertical size={60} />
<TextInput
value={textToRead}
onChangeText={setTextToRead}
style={styles.textInput}
multiline
/>
<Spacer.Vertical size={24} />
<Button onPress={onReadText} title="Read Text" />
<Spacer.Vertical size={24} />
{isLoading && <ActivityIndicator />}
</Container>
);
};

export default TextToSpeech;

const styles = StyleSheet.create({
container: {
alignItems: 'center',
},
textInput: {
backgroundColor: 'transparent',
borderColor: colors.border,
color: colors.white,
borderWidth: 1,
fontSize: 16,
padding: 16,
width: 280,
height: 200,
borderRadius: 6,
},
});
1 change: 1 addition & 0 deletions apps/common-app/src/examples/TextToSpeech/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { default } from './TextToSpeech';
8 changes: 8 additions & 0 deletions apps/common-app/src/examples/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { StackNavigationProp } from '@react-navigation/stack';

import Piano from './Piano';
import TextToSpeech from './TextToSpeech';
import Metronome from './Metronome';
import Oscillator from './Oscillator';
import DrumMachine from './DrumMachine';
Expand All @@ -11,6 +12,7 @@ type NavigationParamList = {
Metronome: undefined;
DrumMachine: undefined;
Piano: undefined;
TextToSpeech: undefined;
AudioFile: undefined;
};

Expand All @@ -37,6 +39,12 @@ export const Examples: Example[] = [
subtitle: 'Play some notes',
screen: Piano,
},
{
key: 'TextToSpeech',
title: 'Text to Speech',
subtitle: 'type some text and hear it spoken',
screen: TextToSpeech,
},
{
key: 'Metronome',
title: 'Metronome',
Expand Down
3 changes: 3 additions & 0 deletions apps/common-app/src/utils/env.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export default {
openAiToken: process.env.OPENAI_API_TOKEN,
};
2 changes: 1 addition & 1 deletion apps/fabric-example/babel.config.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module.exports = {
presets: ['module:@react-native/babel-preset'],
plugins: ['react-native-reanimated/plugin'],
plugins: ['react-native-reanimated/plugin', 'module:react-native-dotenv'],
};
1 change: 1 addition & 0 deletions apps/fabric-example/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"eslint": "^8.19.0",
"jest": "^29.6.3",
"prettier": "2.8.8",
"react-native-dotenv": "^3.4.11",
"react-test-renderer": "18.3.1",
"typescript": "5.0.4"
},
Expand Down
19 changes: 19 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5418,6 +5418,13 @@ __metadata:
languageName: node
linkType: hard

"dotenv@npm:^16.4.5":
version: 16.4.5
resolution: "dotenv@npm:16.4.5"
checksum: 10/55a3134601115194ae0f924e54473459ed0d9fc340ae610b676e248cca45aa7c680d86365318ea964e6da4e2ea80c4514c1adab5adb43d6867fb57ff068f95c8
languageName: node
linkType: hard

"eastasianwidth@npm:^0.2.0":
version: 0.2.0
resolution: "eastasianwidth@npm:0.2.0"
Expand Down Expand Up @@ -6150,6 +6157,7 @@ __metadata:
react-dom: "npm:18.2.0"
react-native: "npm:0.76.0"
react-native-audio-api: "workspace:*"
react-native-dotenv: "npm:^3.4.11"
react-native-gesture-handler: "npm:^2.20.2"
react-native-reanimated: "npm:^3.16.1"
react-native-safe-area-context: "npm:^4.12.0"
Expand Down Expand Up @@ -10219,6 +10227,17 @@ __metadata:
languageName: node
linkType: hard

"react-native-dotenv@npm:^3.4.11":
version: 3.4.11
resolution: "react-native-dotenv@npm:3.4.11"
dependencies:
dotenv: "npm:^16.4.5"
peerDependencies:
"@babel/runtime": ^7.20.6
checksum: 10/09e8a7310fcb01ac021e71db9328e9d342d1e117bf68026b12de0392bfe17292ac6a071f03b88e7fb42c82a8f2fdf03bc520c7dedd2f80a1448cb3de5e03d4fb
languageName: node
linkType: hard

"react-native-gesture-handler@npm:^2.20.2":
version: 2.21.1
resolution: "react-native-gesture-handler@npm:2.21.1"
Expand Down

0 comments on commit d1038f3

Please sign in to comment.