diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 31c3bd6..df07274 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -16,6 +16,9 @@ jobs: OPENAI_API_KEY: fake_key AZURE_DOCUMENT_INTELLIGENCE_KEY: fake_key AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT: fake_endpoint + AZURE_OPENAI_API_KEY: fake_key + AZURE_OPENAI_ENDPOINT: fake_endpoint + steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 diff --git a/src/lib/server/azure.ts b/src/lib/server/azure.ts new file mode 100644 index 0000000..00704ec --- /dev/null +++ b/src/lib/server/azure.ts @@ -0,0 +1,18 @@ +import { AzureKeyCredential, OpenAIClient } from '@azure/openai'; +import { + AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT, + AZURE_DOCUMENT_INTELLIGENCE_KEY, + AZURE_OPENAI_API_KEY, + AZURE_OPENAI_ENDPOINT +} from '$lib/server/secrets'; +import { DocumentAnalysisClient } from '@azure/ai-form-recognizer'; + +export const azureOpenai = new OpenAIClient( + AZURE_OPENAI_ENDPOINT, + new AzureKeyCredential(AZURE_OPENAI_API_KEY) +); + +export const azureDocumentIntelligence = new DocumentAnalysisClient( + AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT, + new AzureKeyCredential(AZURE_DOCUMENT_INTELLIGENCE_KEY) +); diff --git a/src/lib/server/secrets.ts b/src/lib/server/secrets.ts index ed6209b..f8b7cdc 100644 --- a/src/lib/server/secrets.ts +++ b/src/lib/server/secrets.ts @@ -7,5 +7,6 @@ export const AZURE_OPENAI_API_KEY = env.AZURE_OPENAI_API_KEY || ''; export const AZURE_OPENAI_GPT_DEPLOYMENT_NAME = env.AZURE_OPENAI_GPT_DEPLOYMENT_NAME || ''; export const AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME = env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME || ''; +export const AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME = env.AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME || ''; export const AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT = env.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT || ''; export const AZURE_DOCUMENT_INTELLIGENCE_KEY = env.AZURE_DOCUMENT_INTELLIGENCE_KEY || ''; diff --git a/src/routes/api/document-intelligence/+server.ts b/src/routes/api/document-intelligence/+server.ts index 601ea33..fd667b6 100644 --- a/src/routes/api/document-intelligence/+server.ts +++ b/src/routes/api/document-intelligence/+server.ts @@ -1,24 +1,12 @@ +import { memoryFileToDiskFile } from '$lib/fileHandling.js'; +import { azureDocumentIntelligence } from '$lib/server/azure'; import { logger } from '$lib/server/utils'; -import { AzureKeyCredential, DocumentAnalysisClient } from '@azure/ai-form-recognizer'; -import { PrebuiltDocumentModel } from './models'; +import { type DocumentAnalysisClient } from '@azure/ai-form-recognizer'; import fs, { createReadStream } from 'fs'; -import { memoryFileToDiskFile } from '$lib/fileHandling.js'; -import { - AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT, - AZURE_DOCUMENT_INTELLIGENCE_KEY -} from '$lib/server/secrets'; +import { PrebuiltDocumentModel } from './models'; export const POST = async ({ request }) => { logger.info('Document Intelligence API called'); - if (!AZURE_DOCUMENT_INTELLIGENCE_KEY || !AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT) { - return new Response('Azure document intelligence environment varibales not set correctly.', { - status: 500 - }); - } - const client = new DocumentAnalysisClient( - AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT, - new AzureKeyCredential(AZURE_DOCUMENT_INTELLIGENCE_KEY) - ); const formData = await request.formData(); const file = formData.get('file') as File; @@ -26,7 +14,7 @@ export const POST = async ({ request }) => { return new Response('No file found.', { status: 422 }); } - const pages = await getPages(client, file); + const pages = await getPages(azureDocumentIntelligence, file); if (!pages) { return new Response('No pages found in document', { status: 400 }); } diff --git a/src/routes/api/gpt/+server.ts b/src/routes/api/gpt/+server.ts index b088687..8780b1c 100644 --- a/src/routes/api/gpt/+server.ts +++ b/src/routes/api/gpt/+server.ts @@ -1,19 +1,10 @@ -import { OpenAIClient, AzureKeyCredential } from '@azure/openai'; +import { azureOpenai } from '$lib/server/azure'; import type { Message } from '$lib/types'; import { logger } from '$lib/server/utils'; import { json } from '@sveltejs/kit'; -import { - AZURE_OPENAI_ENDPOINT, - AZURE_OPENAI_API_KEY, - AZURE_OPENAI_GPT_DEPLOYMENT_NAME -} from '$lib/server/secrets'; +import { AZURE_OPENAI_GPT_DEPLOYMENT_NAME } from '$lib/server/secrets'; export async function POST({ request }) { - const openai = new OpenAIClient( - AZURE_OPENAI_ENDPOINT, - new AzureKeyCredential(AZURE_OPENAI_API_KEY) - ); - const data = await request.json(); const messages = data.messages as Message[]; @@ -33,6 +24,9 @@ export async function POST({ request }) { user }); - const completion = await openai.getChatCompletions(AZURE_OPENAI_GPT_DEPLOYMENT_NAME, messages); + const completion = await azureOpenai.getChatCompletions( + AZURE_OPENAI_GPT_DEPLOYMENT_NAME, + messages + ); return json({ message: completion.choices[0].message?.content }); } diff --git a/src/routes/embedding/+page.server.ts b/src/routes/embedding/+page.server.ts index ea42a6e..2904161 100644 --- a/src/routes/embedding/+page.server.ts +++ b/src/routes/embedding/+page.server.ts @@ -1,11 +1,8 @@ import { diskFileToMemoryFile, memoryFileToDiskFile } from '$lib/fileHandling'; -import { - AZURE_OPENAI_API_KEY, - AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME, - AZURE_OPENAI_ENDPOINT -} from '$lib/server/secrets'; +import { azureOpenai } from '$lib/server/azure'; +import { AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME } from '$lib/server/secrets'; import { logger } from '$lib/server/utils'; -import { AzureKeyCredential, OpenAIClient, type Embeddings } from '@azure/openai'; +import { type Embeddings } from '@azure/openai'; import { fail } from '@sveltejs/kit'; import { spawnSync } from 'child_process'; import fs from 'fs'; @@ -13,11 +10,6 @@ import * as pdfjsLib from 'pdfjs-dist'; export const actions = { default: async (event) => { - const openai = new OpenAIClient( - AZURE_OPENAI_ENDPOINT, - new AzureKeyCredential(AZURE_OPENAI_API_KEY) - ); - const formData = await event.request.formData(); const file = formData.get('file') as File; if (file.size == 0) { @@ -39,7 +31,7 @@ export const actions = { let response: Embeddings; try { - response = await openai.getEmbeddings(AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME, [input]); + response = await azureOpenai.getEmbeddings(AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME, [input]); } catch (error) { // @ts-expect-error as error is unknown. return fail(500, { message: error.message }); diff --git a/src/routes/transcription/+page.server.ts b/src/routes/transcription/+page.server.ts index edb044c..395e920 100644 --- a/src/routes/transcription/+page.server.ts +++ b/src/routes/transcription/+page.server.ts @@ -1,22 +1,19 @@ -import OpenAI from 'openai'; import { fail } from '@sveltejs/kit'; import ffmpeg from 'fluent-ffmpeg'; import fs from 'fs'; import path from 'node:path'; -import { whisperLanguages } from './whisperLanguages'; import { memoryFileToDiskFile, diskFileToMemoryFile } from '$lib/fileHandling'; import type { whisperLanguagesTypes } from '$lib/types'; import { logger } from '$lib/server/utils'; -import { OPENAI_API_KEY } from '$lib/server/secrets'; +import { azureOpenai } from '$lib/server/azure'; +import { AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME } from '$lib/server/secrets'; const VALID_FILE_FORMATS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']; -const OPENAI_MAX_SIZE = 24000000; // OpenAI limit is 25MB, but lets keep a margin. +const WHISPER_MAX_SIZE = 24000000; // Whisper size limit is 25MB, but lets keep a margin. const LOCAL_MAX_SIZE = 500000000; // 500MB export const actions = { default: async (event) => { - const openai = new OpenAI({ apiKey: OPENAI_API_KEY }); - const formData = await event.request.formData(); let files = [formData.get('file')] as File[]; const language = formData.get('language') as whisperLanguagesTypes; @@ -38,7 +35,7 @@ export const actions = { files[0] = await convertToMp3(files[0]); } - if (files[0].size > OPENAI_MAX_SIZE) { + if (files[0].size > WHISPER_MAX_SIZE) { files = await splitIntoMultipleFiles(files[0]); } @@ -53,11 +50,14 @@ export const actions = { user }); for (const f of files) { - const transcription = await openai.audio.transcriptions.create({ - file: f, - language: whisperLanguages[language], - model - }); + const fileContents = new Uint8Array(await f.arrayBuffer()); + const transcription = await azureOpenai.getAudioTranscription( + AZURE_OPENAI_WHISPER_DEPLOYMENT_NAME, + fileContents, + { + language + } + ); transcriptions.push(transcription.text); } @@ -95,7 +95,7 @@ async function splitIntoMultipleFiles( ): Promise { const timestamp = new Date().getTime(); const fileExtension = file.name.split('.').pop(); - const nFiles = Math.ceil(file.size / OPENAI_MAX_SIZE); + const nFiles = Math.ceil(file.size / WHISPER_MAX_SIZE); const tempDir = fs.mkdtempSync('temp'); const inputName = path.join(tempDir, `tempInputFile_${timestamp}.${fileExtension}`); const outputName = path.join(tempDir, `temp_${timestamp}_%d.${targetFormat}`); diff --git a/src/routes/transcription/+page.svelte b/src/routes/transcription/+page.svelte index 639201e..6fe64f5 100644 --- a/src/routes/transcription/+page.svelte +++ b/src/routes/transcription/+page.svelte @@ -20,7 +20,7 @@ }; - +