Skip to content
This repository has been archived by the owner on Jan 8, 2025. It is now read-only.

Commit

Permalink
feat: add BAA-covered Whisper (#53)
Browse files Browse the repository at this point in the history
* feat: add BAA-covered Whisper

* fix: add dummy environment variables for Azure client
  • Loading branch information
ReinderVosDeWael authored Jun 7, 2024
1 parent 333daf8 commit 32d1863
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 55 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ jobs:
OPENAI_API_KEY: fake_key
AZURE_DOCUMENT_INTELLIGENCE_KEY: fake_key
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT: fake_endpoint
AZURE_OPENAI_API_KEY: fake_key
AZURE_OPENAI_ENDPOINT: fake_endpoint

steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
Expand Down
18 changes: 18 additions & 0 deletions src/lib/server/azure.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { AzureKeyCredential, OpenAIClient } from '@azure/openai';
import {
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT,
AZURE_DOCUMENT_INTELLIGENCE_KEY,
AZURE_OPENAI_API_KEY,
AZURE_OPENAI_ENDPOINT
} from '$lib/server/secrets';
import { DocumentAnalysisClient } from '@azure/ai-form-recognizer';

export const azureOpenai = new OpenAIClient(
AZURE_OPENAI_ENDPOINT,
new AzureKeyCredential(AZURE_OPENAI_API_KEY)
);

export const azureDocumentIntelligence = new DocumentAnalysisClient(
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT,
new AzureKeyCredential(AZURE_DOCUMENT_INTELLIGENCE_KEY)
);
1 change: 1 addition & 0 deletions src/lib/server/secrets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ export const AZURE_OPENAI_API_KEY = env.AZURE_OPENAI_API_KEY || '';
export const AZURE_OPENAI_GPT_DEPLOYMENT_NAME = env.AZURE_OPENAI_GPT_DEPLOYMENT_NAME || '';
export const AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME =
env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME || '';
export const AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME = env.AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME || '';
export const AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT = env.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT || '';
export const AZURE_DOCUMENT_INTELLIGENCE_KEY = env.AZURE_DOCUMENT_INTELLIGENCE_KEY || '';
22 changes: 5 additions & 17 deletions src/routes/api/document-intelligence/+server.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,20 @@
import { memoryFileToDiskFile } from '$lib/fileHandling.js';
import { azureDocumentIntelligence } from '$lib/server/azure';
import { logger } from '$lib/server/utils';
import { AzureKeyCredential, DocumentAnalysisClient } from '@azure/ai-form-recognizer';
import { PrebuiltDocumentModel } from './models';
import { type DocumentAnalysisClient } from '@azure/ai-form-recognizer';
import fs, { createReadStream } from 'fs';
import { memoryFileToDiskFile } from '$lib/fileHandling.js';
import {
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT,
AZURE_DOCUMENT_INTELLIGENCE_KEY
} from '$lib/server/secrets';
import { PrebuiltDocumentModel } from './models';

export const POST = async ({ request }) => {
logger.info('Document Intelligence API called');
if (!AZURE_DOCUMENT_INTELLIGENCE_KEY || !AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT) {
return new Response('Azure document intelligence environment varibales not set correctly.', {
status: 500
});
}
const client = new DocumentAnalysisClient(
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT,
new AzureKeyCredential(AZURE_DOCUMENT_INTELLIGENCE_KEY)
);

const formData = await request.formData();
const file = formData.get('file') as File;
if (!file) {
return new Response('No file found.', { status: 422 });
}

const pages = await getPages(client, file);
const pages = await getPages(azureDocumentIntelligence, file);
if (!pages) {
return new Response('No pages found in document', { status: 400 });
}
Expand Down
18 changes: 6 additions & 12 deletions src/routes/api/gpt/+server.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
import { OpenAIClient, AzureKeyCredential } from '@azure/openai';
import { azureOpenai } from '$lib/server/azure';
import type { Message } from '$lib/types';
import { logger } from '$lib/server/utils';
import { json } from '@sveltejs/kit';
import {
AZURE_OPENAI_ENDPOINT,
AZURE_OPENAI_API_KEY,
AZURE_OPENAI_GPT_DEPLOYMENT_NAME
} from '$lib/server/secrets';
import { AZURE_OPENAI_GPT_DEPLOYMENT_NAME } from '$lib/server/secrets';

export async function POST({ request }) {
const openai = new OpenAIClient(
AZURE_OPENAI_ENDPOINT,
new AzureKeyCredential(AZURE_OPENAI_API_KEY)
);

const data = await request.json();
const messages = data.messages as Message[];

Expand All @@ -33,6 +24,9 @@ export async function POST({ request }) {
user
});

const completion = await openai.getChatCompletions(AZURE_OPENAI_GPT_DEPLOYMENT_NAME, messages);
const completion = await azureOpenai.getChatCompletions(
AZURE_OPENAI_GPT_DEPLOYMENT_NAME,
messages
);
return json({ message: completion.choices[0].message?.content });
}
16 changes: 4 additions & 12 deletions src/routes/embedding/+page.server.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
import { diskFileToMemoryFile, memoryFileToDiskFile } from '$lib/fileHandling';
import {
AZURE_OPENAI_API_KEY,
AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME,
AZURE_OPENAI_ENDPOINT
} from '$lib/server/secrets';
import { azureOpenai } from '$lib/server/azure';
import { AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME } from '$lib/server/secrets';
import { logger } from '$lib/server/utils';
import { AzureKeyCredential, OpenAIClient, type Embeddings } from '@azure/openai';
import { type Embeddings } from '@azure/openai';
import { fail } from '@sveltejs/kit';
import { spawnSync } from 'child_process';
import fs from 'fs';
import * as pdfjsLib from 'pdfjs-dist';

export const actions = {
default: async (event) => {
const openai = new OpenAIClient(
AZURE_OPENAI_ENDPOINT,
new AzureKeyCredential(AZURE_OPENAI_API_KEY)
);

const formData = await event.request.formData();
const file = formData.get('file') as File;
if (file.size == 0) {
Expand All @@ -39,7 +31,7 @@ export const actions = {

let response: Embeddings;
try {
response = await openai.getEmbeddings(AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME, [input]);
response = await azureOpenai.getEmbeddings(AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME, [input]);
} catch (error) {
// @ts-expect-error as error is unknown.
return fail(500, { message: error.message });
Expand Down
26 changes: 13 additions & 13 deletions src/routes/transcription/+page.server.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
import OpenAI from 'openai';
import { fail } from '@sveltejs/kit';
import ffmpeg from 'fluent-ffmpeg';
import fs from 'fs';
import path from 'node:path';
import { whisperLanguages } from './whisperLanguages';
import { memoryFileToDiskFile, diskFileToMemoryFile } from '$lib/fileHandling';
import type { whisperLanguagesTypes } from '$lib/types';
import { logger } from '$lib/server/utils';
import { OPENAI_API_KEY } from '$lib/server/secrets';
import { azureOpenai } from '$lib/server/azure';
import { AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME } from '$lib/server/secrets';

const VALID_FILE_FORMATS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm'];
const OPENAI_MAX_SIZE = 24000000; // OpenAI limit is 25MB, but lets keep a margin.
const WHISPER_MAX_SIZE = 24000000; // Whisper size limit is 25MB, but lets keep a margin.
const LOCAL_MAX_SIZE = 500000000; // 500MB

export const actions = {
default: async (event) => {
const openai = new OpenAI({ apiKey: OPENAI_API_KEY });

const formData = await event.request.formData();
let files = [formData.get('file')] as File[];
const language = formData.get('language') as whisperLanguagesTypes;
Expand All @@ -38,7 +35,7 @@ export const actions = {
files[0] = await convertToMp3(files[0]);
}

if (files[0].size > OPENAI_MAX_SIZE) {
if (files[0].size > WHISPER_MAX_SIZE) {
files = await splitIntoMultipleFiles(files[0]);
}

Expand All @@ -53,11 +50,14 @@ export const actions = {
user
});
for (const f of files) {
const transcription = await openai.audio.transcriptions.create({
file: f,
language: whisperLanguages[language],
model
});
const fileContents = new Uint8Array(await f.arrayBuffer());
const transcription = await azureOpenai.getAudioTranscription(
AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME,
fileContents,
{
language
}
);
transcriptions.push(transcription.text);
}

Expand Down Expand Up @@ -95,7 +95,7 @@ async function splitIntoMultipleFiles(
): Promise<File[]> {
const timestamp = new Date().getTime();
const fileExtension = file.name.split('.').pop();
const nFiles = Math.ceil(file.size / OPENAI_MAX_SIZE);
const nFiles = Math.ceil(file.size / WHISPER_MAX_SIZE);
const tempDir = fs.mkdtempSync('temp');
const inputName = path.join(tempDir, `tempInputFile_${timestamp}.${fileExtension}`);
const outputName = path.join(tempDir, `temp_${timestamp}_%d.${targetFormat}`);
Expand Down
2 changes: 1 addition & 1 deletion src/routes/transcription/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
};
</script>

<FormActionPage {title} {description} {enhancer}>
<FormActionPage {title} {description} {enhancer} hasBusinessAssociateAgreemment>
<svelte:fragment slot="form">
<input
class="input"
Expand Down

0 comments on commit 32d1863

Please sign in to comment.