Skip to content
This repository has been archived by the owner on Jan 8, 2025. It is now read-only.

Commit

Permalink
fix: Change document intelligence api version to 2024 preview (#76)
Browse files Browse the repository at this point in the history
* fix: Change document intelligence api version to 2024 preview

* refactor: remove debug statements
  • Loading branch information
ReinderVosDeWael authored Aug 30, 2024
1 parent bef96fb commit 6e60f08
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 199 deletions.
95 changes: 0 additions & 95 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
"type": "module",
"dependencies": {
"@aws-sdk/client-bedrock-runtime": "^3.624.0",
"@azure/ai-form-recognizer": "^5.0.0",
"@azure/openai": "^1.0.0-beta.12",
"@cmi-dair/skeleton-themes": "^0.1.1",
"@floating-ui/dom": "^1.6.7",
Expand Down
6 changes: 1 addition & 5 deletions src/lib/fileHandling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,5 @@ export function diskFileToMemoryFile(filepath: string, type: string): File {
}

export async function memoryFileToDiskFile(file: File, filepath: string): Promise<void> {
fs.writeFile(filepath, Buffer.from(await file.arrayBuffer()), (err) => {
if (err) {
throw err;
}
});
fs.writeFileSync(filepath, Buffer.from(await file.arrayBuffer()));
}
50 changes: 44 additions & 6 deletions src/lib/server/azure.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,53 @@ import {
AZURE_OPENAI_API_KEY,
AZURE_OPENAI_ENDPOINT
} from '$lib/server/secrets';
import { DocumentAnalysisClient } from '@azure/ai-form-recognizer';
import * as fs from 'fs';
import { logger } from '$lib/server/utils';

export function getAzureOpenAiClient() {
return new OpenAIClient(AZURE_OPENAI_ENDPOINT, new AzureKeyCredential(AZURE_OPENAI_API_KEY));
}

export function getAzureDocumentAnalysisClient() {
return new DocumentAnalysisClient(
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT,
new AzureKeyCredential(AZURE_DOCUMENT_INTELLIGENCE_KEY)
);
export class DocumentAnalysis {
private endpoint: string;
private access_key;

constructor() {
this.endpoint =
AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT +
'documentintelligence/documentModels/prebuilt-read:analyze?_overload=analyzeDocument&api-version=2024-07-31-preview';
this.access_key = AZURE_DOCUMENT_INTELLIGENCE_KEY;
}

public async analyze(filepath: string) {
const base64 = fs.readFileSync(filepath, { encoding: 'base64' });
const postResponse = await fetch(this.endpoint, {
headers: { 'Ocp-Apim-Subscription-Key': this.access_key, 'Content-Type': 'application/json' },
body: `{'base64Source': '${base64}'}`,
method: 'POST'
});

const operationLocation = postResponse.headers.get('Operation-Location');
if (!operationLocation) {
logger.error([postResponse, await postResponse.json()]);
return new Response('', { status: 500 });
}

let attempts = 0;
while (attempts < 15) {
attempts += 1;
const getResponse = await fetch(operationLocation, {
headers: { 'Ocp-Apim-Subscription-Key': this.access_key }
});
const json = await getResponse.json();
if (json.status !== 'running') {
return new Response(JSON.stringify(json), { status: 200 });
}
await new Promise((resolve) => {
setTimeout(resolve, 2000);
});
}

return new Response('', { status: 500 });
}
}
46 changes: 14 additions & 32 deletions src/routes/api/document-intelligence/+server.ts
Original file line number Diff line number Diff line change
@@ -1,48 +1,30 @@
import { memoryFileToDiskFile } from '$lib/fileHandling.js';
import { getAzureDocumentAnalysisClient } from '$lib/server/azure';
import { DocumentAnalysis } from '$lib/server/azure';
import { logger } from '$lib/server/utils';
import { type DocumentAnalysisClient } from '@azure/ai-form-recognizer';
import fs, { createReadStream } from 'fs';
import { PrebuiltDocumentModel } from './models';
import fs from 'fs';

export const POST = async ({ request }) => {
logger.info('Document Intelligence API called');

const azureDocumentAnalysis = getAzureDocumentAnalysisClient();
const formData = await request.formData();
const file = formData.get('file') as File;
if (!file) {
return new Response('No file found.', { status: 422 });
}

const pages = await getPages(azureDocumentAnalysis, file);
if (!pages) {
return new Response('No pages found in document', { status: 400 });
}

let text = '';
for (const page of pages) {
if (!page.lines) {
continue;
}
for (const line of page.lines) {
text += line.content + '\n';
const analysis = new DocumentAnalysis();
const tempdir = fs.mkdtempSync('temp');
const filename = `${tempdir}/${file.name}`;
try {
await memoryFileToDiskFile(file, filename);
const response = await analysis.analyze(filename);
if (!response.ok) {
return new Response('Something went wrong, contact an admin.', { status: 500 });
}
}
return new Response(text, { status: 200 });
};
const content = (await response.json()).analyzeResult.content;

async function getPages(client: DocumentAnalysisClient, file: File) {
const tempDir = fs.mkdtempSync('temp');
const tempFile = `${tempDir}/${file.name}`;
try {
await memoryFileToDiskFile(file, tempFile);
const stream = createReadStream(tempFile);
const poller = await client.beginAnalyzeDocument(PrebuiltDocumentModel, stream);
const { pages } = await poller.pollUntilDone();
return pages;
return new Response(content, { status: 200 });
} finally {
fs.unlinkSync(tempFile);
fs.rmdirSync(tempDir);
fs.unlinkSync(filename);
}
}
};
60 changes: 0 additions & 60 deletions src/routes/api/document-intelligence/models.ts

This file was deleted.

0 comments on commit 6e60f08

Please sign in to comment.