Skip to content

Commit

Permalink
add whisper voice to interact
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristopherTrimboli committed Dec 10, 2023
1 parent 0bf55b5 commit 3f0def4
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 7 deletions.
33 changes: 30 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "lala-companion",
"productName": "lala-companion",
"version": "0.0.4",
"version": "0.0.5",
"description": "3D personified desktop assistants, tuned for you, powered by AI vision and voice.",
"main": ".vite/build/main.js",
"scripts": {
Expand Down Expand Up @@ -30,6 +30,7 @@
"@electron-forge/plugin-auto-unpack-natives": "^7.2.0",
"@electron-forge/plugin-vite": "^7.2.0",
"@electron-forge/publisher-github": "^7.2.0",
"@types/hark": "^1.2.5",
"@types/react-dom": "^18.2.17",
"@typescript-eslint/eslint-plugin": "^5.62.0",
"@typescript-eslint/parser": "^5.62.0",
Expand All @@ -46,9 +47,11 @@
"ai": "^2.2.28",
"dotenv": "^16.3.1",
"electron-squirrel-startup": "^1.0.0",
"hark": "^1.2.3",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"three": "^0.159.0",
"update-electron-app": "^3.0.0"
"update-electron-app": "^3.0.0",
"wavesurfer.js": "^7.5.1"
}
}
10 changes: 9 additions & 1 deletion src/components/VRMCompanion.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,16 @@ interface VrmCompanionProps {
virtualText: string;
voiceUrl: string;
audioRef?: MutableRefObject<HTMLAudioElement>;
onSpeakStart?: () => void;
onSpeakEnd?: () => void;
}

const VrmCompanion = ({
virtualText,
voiceUrl,
audioRef,
onSpeakStart,
onSpeakEnd,
}: VrmCompanionProps) => {
const [gltf, setGltf] = useState(null);
const [animationMixer, setAnimationMixer] = useState<AnimationMixer>(null);
Expand Down Expand Up @@ -199,6 +203,7 @@ const VrmCompanion = ({
useEffect(() => {
const main = async () => {
if (voiceUrl) {
onSpeakStart?.();
audioRef.current.src = voiceUrl;
audioRef.current.play();

Expand All @@ -222,6 +227,7 @@ const VrmCompanion = ({
lipsAction.play();

audioRef.current.onended = async () => {
onSpeakEnd?.();
animationMixer.clipAction(talkClip).fadeOut(2);
lipsAction.fadeOut(1);
const randomIdle = getRandomAnimation("idle");
Expand Down Expand Up @@ -263,7 +269,7 @@ const VrmCompanion = ({
);
};

const Scene = ({ virtualText, voiceUrl }: VrmCompanionProps) => {
const Scene = ({ virtualText, voiceUrl, onSpeakStart, onSpeakEnd }: VrmCompanionProps) => {
const audioRef = useRef<HTMLAudioElement>(null);

return (
Expand All @@ -287,6 +293,8 @@ const Scene = ({ virtualText, voiceUrl }: VrmCompanionProps) => {
virtualText={virtualText}
voiceUrl={voiceUrl}
audioRef={audioRef}
onSpeakStart={onSpeakStart}
onSpeakEnd={onSpeakEnd}
/>
</Canvas>
<audio autoPlay ref={audioRef} src={""} />
Expand Down
100 changes: 99 additions & 1 deletion src/overlay/Overlay.tsx
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import React, { useCallback, useEffect, useState } from "react";
import VRMCompanion from "../components/VRMCompanion";
import { useChat } from "../../node_modules/ai/react/dist/index";
import hark from "hark";
import WaveSurfer from "wavesurfer.js";
import RecordPlugin from "wavesurfer.js/dist/plugins/record";

const Overlay = () => {
const [voiceUrl, setVoiceUrl] = useState<string>("");
const [recentResponse, setRecentResponse] = useState<string>("");
const [isLalaSpeaking, setIsLalaSpeaking] = useState<boolean>(false);

const getVoiceAudio = useCallback(async (text: string) => {
try {
Expand Down Expand Up @@ -52,14 +56,108 @@ const Overlay = () => {
});
}, []);

// whisper chunking magic here
useEffect(() => {
let stream: MediaStream = null;
let speechEvents: hark.Harker = null;
let wavesurfer: WaveSurfer = null;
let recorder: RecordPlugin = null;
let isUserSpeaking = false;
let isLoading = false;

const main = async () => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
speechEvents = hark(stream);

wavesurfer = WaveSurfer.create({
container: "#recorder",
height: 0,
});

recorder = wavesurfer.registerPlugin(
RecordPlugin.create({
scrollingWaveform: true,
renderRecordedAudio: false,
})
);

speechEvents.on("speaking", () => {
if (isLalaSpeaking || isLoading) return;
isUserSpeaking = true;
recorder.startRecording();
console.log("Started speaking");
});

speechEvents.on("stopped_speaking", () => {
if (isLalaSpeaking) return;
isLoading = true;
recorder.stopRecording();
isUserSpeaking = false;
console.log("Stopped speaking");
});

recorder.on("record-end", async (blob) => {
console.log("recording stopped");
const formData = new FormData();

const file = new File([blob], "voice.wav", {
type: "audio/wav",
});

console.log(file);

formData.append("file", file);

const whisperResp = await fetch(
"https://lalaland.chat/api/magic/whisper",
{
method: "POST",
body: formData,
}
);

if (whisperResp.ok) {
const whisperText = await whisperResp.json();
console.log(whisperText);
await append({
role: "user",
content: whisperText,
});
setTimeout(() => {
isLoading = false;
}, 5000);
} else {
console.log("error whispering", whisperResp);
isLoading = false;
}
});
};
main();

return () => {
stream?.getTracks().forEach((track) => track.stop());
speechEvents?.stop();
wavesurfer?.destroy();
recorder?.destroy();
isUserSpeaking = false;
isLoading = false;
};
}, [isLalaSpeaking]);

return (
<div
style={{
height: "100%",
width: "100%",
}}
>
<VRMCompanion virtualText={recentResponse} voiceUrl={voiceUrl} />
<VRMCompanion
virtualText={recentResponse}
voiceUrl={voiceUrl}
onSpeakStart={() => setIsLalaSpeaking(true)}
onSpeakEnd={() => setIsLalaSpeaking(false)}
/>
<div id="recorder" />
</div>
);
};
Expand Down

0 comments on commit 3f0def4

Please sign in to comment.