From 4f6121f99e68ff47bb920de0c365494faa15f54d Mon Sep 17 00:00:00 2001 From: ahmetselman Date: Thu, 12 Dec 2024 10:07:49 +0100 Subject: [PATCH] fix: worker collisions --- package.json | 7 +--- src/domain/contexts/ChatbotContext.tsx | 8 ++-- .../DownloadPortalOperations.tsx | 7 ++-- .../pdf/DownloadPortalOperations.ts | 37 +++++++++++++++++++ yarn.lock | 25 ++++++++----- 5 files changed, 63 insertions(+), 21 deletions(-) create mode 100644 src/operations/pdf/DownloadPortalOperations.ts diff --git a/package.json b/package.json index dbbc666e..6f47cf17 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "@nextui-org/card": "^2.0.34", "@nextui-org/chip": "^2.0.33", "@nextui-org/code": "2.0.33", + "pdfjs-dist": "3.11.174", "@nextui-org/dropdown": "^2.1.31", "@nextui-org/image": "^2.0.32", "@nextui-org/input": "2.2.5", @@ -72,7 +73,6 @@ "react-leaflet-cluster": "^2.1.0", "react-markdown": "^9.0.1", "react-pdf": "^9.1.1", - "react-pdftotext": "^1.3.4", "rehype-sanitize": "^6.0.0", "remark-gfm": "^4.0.0", "sharp": "^0.33.5", @@ -124,8 +124,5 @@ "never" ] } - }, - "resolutions": { - "pdfjs-dist": "3.11.174" } -} \ No newline at end of file +} diff --git a/src/domain/contexts/ChatbotContext.tsx b/src/domain/contexts/ChatbotContext.tsx index 5e647cff..cca22c38 100644 --- a/src/domain/contexts/ChatbotContext.tsx +++ b/src/domain/contexts/ChatbotContext.tsx @@ -3,7 +3,6 @@ import React, { createContext, ReactNode, useContext, useEffect, useMemo, useSta import { IChat } from '@/domain/entities/chatbot/Chatbot'; import { SenderRole } from '@/domain/enums/SenderRole'; import ChatbotOperations from '@/operations/chatbot/Chatbot'; -import { DownloadPortalOperations } from '@/operations/download-portal/DownloadPortalOperations'; import { useMediaQuery } from '@/utils/resolution'; interface ChatbotContextType { @@ -79,12 +78,15 @@ export function ChatbotProvider({ children }: { children: ReactNode }) { return; } - const reportText = await DownloadPortalOperations.extractTextFromPdf(await report); + // Use dynamic import for client-side PDF text extraction + const { extractClientSidePdfText } = await import('@/operations/pdf/DownloadPortalOperations'); + const reportText = await extractClientSidePdfText(report); + // const reportText = await DownloadPortalOperations.extractTextFromPdf(await report); const assistantMessage = { id: crypto.randomUUID(), content: reportText - ? `Hey, how can I help you with this report about ${countryName}?` + ? `Hey, how can I help you with this report about ${reportText}?` : `Hey, unfortunately I'm currently unable to answer questions about this report. You can try it later or chat with me about other things!`, role: SenderRole.ASSISTANT, }; diff --git a/src/operations/download-portal/DownloadPortalOperations.tsx b/src/operations/download-portal/DownloadPortalOperations.tsx index 920b97a1..b02e469b 100644 --- a/src/operations/download-portal/DownloadPortalOperations.tsx +++ b/src/operations/download-portal/DownloadPortalOperations.tsx @@ -1,13 +1,12 @@ import { CalendarDate } from '@internationalized/date'; import { DocumentDownload, SearchNormal1 } from 'iconsax-react'; import { Bot } from 'lucide-react'; -import { pdfjs } from 'react-pdf'; import { CountryCodesData } from '@/domain/entities/country/CountryCodesData'; import { ICountryData } from '@/domain/entities/download/Country'; import { CustomTableColumns } from '@/domain/props/CustomTableProps'; -pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.min.mjs`; +// pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.min.mjs`; export class DownloadPortalOperations { static getColumns(): CustomTableColumns { @@ -130,6 +129,7 @@ export class DownloadPortalOperations { toggleModal(); } + /* static async extractTextFromPdf(url: string): Promise { try { const pdf = await pdfjs.getDocument(url).promise; @@ -158,7 +158,8 @@ export class DownloadPortalOperations { return fullText.trim(); } catch (error) { console.error('Error extracting text from PDF:', error); - return ''; + return 'error during extraction'; } } + */ } diff --git a/src/operations/pdf/DownloadPortalOperations.ts b/src/operations/pdf/DownloadPortalOperations.ts new file mode 100644 index 00000000..8d9dfb1c --- /dev/null +++ b/src/operations/pdf/DownloadPortalOperations.ts @@ -0,0 +1,37 @@ +'use client'; + +import { pdfjs } from 'react-pdf'; + +pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.min.mjs`; + +export const extractClientSidePdfText = async (url: string): Promise => { + try { + const pdf = await pdfjs.getDocument(url).promise; + let fullText = ''; + const pagePromises: Promise[] = []; + + for (let i = 1; i <= pdf.numPages; i += 1) { + pagePromises.push( + pdf.getPage(i).then(async (page) => { + const textContent = await page.getTextContent(); + return textContent.items + .map((item) => { + if ('str' in item) { + return item.str; + } + return ''; + }) + .join(' '); + }) + ); + } + + const pageTexts = await Promise.all(pagePromises); + fullText = pageTexts.join('\n'); + + return fullText.trim(); + } catch (error) { + console.error('Error extracting text from PDF:', error); + return 'error during extraction'; + } +}; diff --git a/yarn.lock b/yarn.lock index 5fe44311..3d1e42f5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -9160,6 +9160,11 @@ path2d@0.1.1: resolved "https://registry.yarnpkg.com/path2d/-/path2d-0.1.1.tgz#d3c3886cd2252fb2a7830c27ea7bb9a862d937ea" integrity sha512-/+S03c8AGsDYKKBtRDqieTJv2GlkMb0bWjnqOgtF6MkjdUQ9a8ARAtxWf9NgKLGm2+WQr6+/tqJdU8HNGsIDoA== +path2d@^0.2.0: + version "0.2.2" + resolved "https://registry.yarnpkg.com/path2d/-/path2d-0.2.2.tgz#cc85d61ed7827e7863a2ee36713d4b5315a3d85d" + integrity sha512-+vnG6S4dYcYxZd+CZxzXCNKdELYZSKfohrk98yajCo1PtRoDgCTrrwOvK1GT0UoAdVszagDVllQc0U1vaX4NUQ== + pathe@1.1.2: version "1.1.2" resolved "https://registry.yarnpkg.com/pathe/-/pathe-1.1.2.tgz#6c4cb47a945692e48a1ddd6e4094d170516437ec" @@ -9173,7 +9178,7 @@ pbf@^3.2.1: ieee754 "^1.1.12" resolve-protobuf-schema "^2.1.0" -pdfjs-dist@3.11.174, pdfjs-dist@4.4.168, pdfjs-dist@^4.6.82: +pdfjs-dist@3.11.174: version "3.11.174" resolved "https://registry.yarnpkg.com/pdfjs-dist/-/pdfjs-dist-3.11.174.tgz#5ff47b80f2d58c8dd0d74f615e7c6a7e7e704c4b" integrity sha512-TdTZPf1trZ8/UFu5Cx/GXB7GZM30LT+wWUNfsi6Bq8ePLnb+woNKtDymI2mxZYBpMbonNFqKmiz684DIfnd8dA== @@ -9181,6 +9186,14 @@ pdfjs-dist@3.11.174, pdfjs-dist@4.4.168, pdfjs-dist@^4.6.82: canvas "^2.11.2" path2d-polyfill "^2.0.1" +pdfjs-dist@4.4.168: + version "4.4.168" + resolved "https://registry.yarnpkg.com/pdfjs-dist/-/pdfjs-dist-4.4.168.tgz#4487716376a33c68753ed37f782ae91d1c9ef8fa" + integrity sha512-MbkAjpwka/dMHaCfQ75RY1FXX3IewBVu6NGZOcxerRFlaBiIkZmUoR0jotX5VUzYZEXAGzSFtknWs5xRKliXPA== + optionalDependencies: + canvas "^2.11.2" + path2d "^0.2.0" + picocolors@^1.0.0, picocolors@^1.1.0, picocolors@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/picocolors/-/picocolors-1.1.1.tgz#3d321af3eab939b083c8f929a1d12cda81c26b6b" @@ -9465,14 +9478,6 @@ react-pdf@^9.1.1: tiny-invariant "^1.0.0" warning "^4.0.0" -react-pdftotext@^1.3.4: - version "1.3.4" - resolved "https://registry.yarnpkg.com/react-pdftotext/-/react-pdftotext-1.3.4.tgz#513b706c33f577aebb726bd035fab2463bb574ab" - integrity sha512-WRM41aCYueQn8CikwG9OxAur6DqSn5sE5j1yu3oG+WORTCfJLDH/m0dR9RnggxnKB51tfMeRFyOZuKJQZLtYBQ== - dependencies: - pdfjs-dist "^4.6.82" - react-pdf "^9.1.1" - react-remove-scroll-bar@^2.3.6: version "2.3.6" resolved "https://registry.yarnpkg.com/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.6.tgz#3e585e9d163be84a010180b18721e851ac81a29c" @@ -10796,4 +10801,4 @@ yocto-queue@^1.0.0: zwitch@^2.0.0: version "2.0.4" resolved "https://registry.yarnpkg.com/zwitch/-/zwitch-2.0.4.tgz#c827d4b0acb76fc3e685a4c6ec2902d51070e9d7" - integrity sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A== \ No newline at end of file + integrity sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==