Skip to content

Commit

Permalink
Merge pull request #1039 from syntaxfm/transcript-frontend
Browse files Browse the repository at this point in the history
Transcript frontend + AI Show notes
  • Loading branch information
wesbos authored Aug 15, 2023
2 parents 85aa0f6 + c3fc6e6 commit 4fdb8dd
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 44 deletions.
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
},
"type": "module",
"engines": {
"node": ">=20.5.0",
"node": ">=18.0.0 <19.0.0 || =20.5.0",
"pnpm": ">=8"
},
"dependencies": {
Expand All @@ -67,8 +67,10 @@
"@types/emscripten": "^1.39.7",
"date-fns": "^2.30.0",
"flexsearch": "^0.7.31",
"gpt-3-encoder": "^1.1.4",
"gray-matter": "^4.0.3",
"media-chrome": "^1.2.2",
"openai": "^3.3.0",
"rehype-highlight": "^6.0.0",
"rehype-raw": "^6.1.1",
"rehype-stringify": "^9.0.3",
Expand Down
74 changes: 74 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ model AiSummaryEntry {

model AiTweet {
id Int @id @default(autoincrement())
content String @db.VarChar(250)
content String @db.VarChar(350)
showNote Int
aiShowNote AiShowNote @relation(fields: [showNote], references: [id])
}
Expand Down
8 changes: 4 additions & 4 deletions src/lib/player/CD.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ class="disc"
border-radius: 10px 10px 30px 30px;
background: linear-gradient(180deg, #000, #111);
}
.case-glass {
border-radius: 10px 10px 30px 30px;
position: absolute;
top: 0;
width: 100%;
height: 95%;
background: linear-gradient(to right, #ffffff33, #ffffff66);
Z-index: 10;
z-index: 10;
left: 0;
box-shadow: inset 0 0 0px 4px #111, inset 0 0 0px 10px #222;
border: solid 2px #222;
Expand All @@ -151,7 +151,7 @@ class="disc"
border-bottom: solid 2px #111;
height: 60%;
background: radial-gradient(circle at center, #11111133, #00000033), linear-gradient(140deg, #111, #333);
Z-index: 11;
z-index: 11;
left: 0;
}
Expand All @@ -161,7 +161,7 @@ class="disc"
width: 70px;
height: auto;
}
.spinning {
animation: spin .5s linear infinite forwards;
}
Expand Down
60 changes: 26 additions & 34 deletions src/server/ai/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,43 +27,31 @@ export const CONDENSE_THRESHOLD = 100;
const configuration = new Configuration({
apiKey: process.env.OPENAI_API_KEY
});
// import { findSimilarUtterances } from './embeddings';
import { error } from '@sveltejs/kit';
import { AINoteSelect, AIPodcastSummaryResponse } from './queries';
export const openai = new OpenAIApi(configuration);

async function condense(transcript: string, show: TranscribedShow, inputTokensLength: number) {
console.log(`========== Condensing show ${show.number} ============`);
// See if we have a condensed transcript already saved to file
const path = `./deepgram/condensed-healed/${show.number}.json`;
const localExists = await exists(path);
// Save existing work to Memory
let existing: SlimUtterance[] = [];
if (localExists) {
const condensedFile = await readFile(path, 'utf-8');
console.log(`Using local condensed file for ${show.number}`);
existing = JSON.parse(condensedFile) as SlimUtterance[];
export async function condense(
transcript: string,
show: TranscribedShow
): Promise<SlimUtterance[]> {
// Figure out how large the input is
const inputTokensLength = encode(`${transcript} ${summarizePrompt}`).length;
// If its under the limit, return the transcript as is
if (inputTokensLength < TOKEN_INPUT_LIMIT) {
console.log(
`========== Skipping condensing show ${show.number} - Size is ${inputTokensLength} and acceptable ============`
);
return show.utterances;
}

console.log(`========== Condensing show ${show.number} ============`);
// Figure out how many hunks we need to split this string into
const size = encode(transcript).length;
console.log(`Transcript size: ${size}`);
console.log(`inputTokensLength size: ${inputTokensLength}`);
console.log(`Token input limit: ${TOKEN_INPUT_LIMIT}`);
const factorSmaller = 1 - TOKEN_INPUT_LIMIT / inputTokensLength;
console.log(`Factor smaller: ${factorSmaller}`);
// Split the transcript into hunks
const utteranceFuncs = show.utterances.map((utterance, index) => {
return async function getCondenseUtterance(): Promise<SlimUtterance> {
// Check if we already have this utterance condensed
const existingUtterance = existing.find(
(eu) => eu.utteranceIndex === utterance.utteranceIndex
);
if (existingUtterance) {
// console.log(utterance.showNumber, utterance.utteranceIndex, `Skipping OpenAI, we already have utterance condensed`);
return existingUtterance;
}

// Wait a random amount of time to avoid rate limiting. Between 0 and 10 seconds
const waitTime = Math.floor(Math.random() * 10000);
await wait(waitTime);
Expand Down Expand Up @@ -92,6 +80,7 @@ async function condense(transcript: string, show: TranscribedShow, inputTokensLe
console.dir(err.response.headers);
});
const condensed = completion?.data?.choices?.at(0)?.message?.content;
// Inject the condensed transcript into the utterance
if (condensed) {
utterance.condensedTranscript = condensed;
}
Expand All @@ -106,6 +95,7 @@ async function condense(transcript: string, show: TranscribedShow, inputTokensLe
)}% of original`
);
console.timeEnd(`Condensing ${index} of ${show.utterances.length}`);
// Return the modifined utterance
return utterance;
};
});
Expand All @@ -114,17 +104,12 @@ async function condense(transcript: string, show: TranscribedShow, inputTokensLe
console.log('Done condensing');
// Get the results
const utterances = utteranceResults
.filter((result): result is PromiseSettledResult<SlimUtterance> => {
.filter((result): result is PromiseFulfilledResult<SlimUtterance> => {
return result.status === 'fulfilled';
})
.map((result) => result.value);

console.log(`Saving condensed file for ${show.number}`);
// Write the results to a file
await writeFile(
`./deepgram/condensed-healed/${show.number}.json`,
JSON.stringify(utterances, null, 2)
);
return utterances;
}

Expand All @@ -149,8 +134,14 @@ export async function generate_ai_notes(
) {
const slimUtterance = getSlimUtterances(show.transcript?.utterances || [], show.number);
const transcript = formatAsTranscript(slimUtterance);
// See if its over the token limit
// TODO: move to OpenAI's new functions API
// Condense
const slimUtterancesWithCondensed = await condense(transcript, {
name: show.title,
number: show.number,
utterances: slimUtterance
});
const condensedTranscript = formatAsTranscript(slimUtterancesWithCondensed);

const input: CreateChatCompletionRequest = {
model: MODEL,
messages: [
Expand All @@ -161,13 +152,14 @@ export async function generate_ai_notes(
},
{ role: 'user', content: `This episode is #${show.number} entitled ${show.title}` },
{ role: 'user', content: summarizePrompt },
{ role: 'user', content: transcript }
{ role: 'user', content: condensedTranscript }
]
};

console.log(`Creating AI notes for ${show.number}`);
const completion = await openai.createChatCompletion(input);
const maybeJSON = completion.data.choices.at(0)?.message?.content;
console.log(maybeJSON);
const parsed = JSON.parse(maybeJSON || '') as AIPodcastSummaryResponse;
return parsed;
}
5 changes: 3 additions & 2 deletions src/server/transcripts/deepgram.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Deepgram } from '@deepgram/sdk';
import DeepgramPkg from '@deepgram/sdk';
const { Deepgram } = DeepgramPkg;
import { prisma_client as prisma } from '../../hooks.server';
import { error } from '@sveltejs/kit';
import { keywords } from './fixes';
Expand All @@ -7,7 +8,7 @@ import { save_transcript_to_db } from './transcripts';
import type { PrerecordedTranscriptionResponse } from '@deepgram/sdk/dist/types';
const deepgramApiKey = process.env.DEEPGRAM_SECRET;
if (!deepgramApiKey) {
console.log('Please set the DEEPGRAM_SECRET environment variable.');
console.error('Please set the DEEPGRAM_SECRET environment variable.');
process.exit(1);
}

Expand Down
12 changes: 10 additions & 2 deletions src/server/transcripts/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,20 @@ export function formatTime(secs: number) {
return `${minutesString}:${secondsString}`;
}

function getSpeakerShortName(speaker: string | undefined) {
const shortForms = new Map([
['Scott Tolinski', 'Scott'],
['Wes Bos', 'Wes']
]);
return shortForms.get(speaker) || speaker;
}

export function formatAsTranscript(utterances: SlimUtterance[]) {
return utterances.reduce((acc, utterance) => {
// TODO: We might need to reinstate condensedTranscript here
const timestamp = formatTime(utterance.start);
return `${acc}\n${timestamp} ${utterance.speaker}:\n${
/*condensedTranscript || */ utterance.transcript
return `${acc}\n${timestamp} ${getSpeakerShortName(utterance.speaker)}:\n${
utterance.condensedTranscript || utterance.transcript
}\n`;
}, '');
}
Expand Down

1 comment on commit 4fdb8dd

@vercel
Copy link

@vercel vercel bot commented on 4fdb8dd Aug 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

sytnax-website-v2 – ./

sytnax-website-v2-syntax.vercel.app
sytnax-website-v2-git-v2-syntax.vercel.app
beta.syntax.fm

Please sign in to comment.