Skip to content

Commit

Permalink
feat(pdf): add gen dual layer pdf sup
Browse files Browse the repository at this point in the history
  • Loading branch information
cir9no committed Jan 9, 2025
1 parent b7219ac commit 94ebbbd
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 3 deletions.
12 changes: 12 additions & 0 deletions frontend/src/hooks/metadata-ai-operation.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ export const MetadataAIOperationsProvider = ({
});
}, [repoID]);

const genDualLayerPDF = useCallback(({ parentDir, fileName }, { success_callback, fail_callback } = {}) => {
const filePath = Utils.joinPath(parentDir, fileName);
const inProgressToaster = toaster.notifyInProgress(gettext('Making PDF searchable by AI...'), { duration: null });
metadataAPI.genDualLayerPDF(repoID, filePath).then(res => {inProgressToaster.close();}).catch(error => {
inProgressToaster.close();
const errorMessage = gettext('Failed to make PDF searchable');
toaster.danger(errorMessage);
fail_callback && fail_callback();
});
}, [repoID]);

const extractFilesDetails = useCallback((objIds, { success_callback, fail_callback } = {}) => {
const inProgressToaster = toaster.notifyInProgress(gettext('Extracting file details by AI...'), { duration: null });
metadataAPI.extractFileDetails(repoID, objIds).then(res => {
Expand Down Expand Up @@ -102,6 +113,7 @@ export const MetadataAIOperationsProvider = ({
onOCR,
OCRSuccessCallBack,
generateDescription,
genDualLayerPDF,
extractFilesDetails,
extractFileDetails,
}}>
Expand Down
9 changes: 9 additions & 0 deletions frontend/src/metadata/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,15 @@ class MetadataManagerAPI {
return this.req.post(url, params);
};

genDualLayerPDF = (repoID, filePath) => {
const url = this.server + '/api/v2.1/ai/pdf/to-dual-layer/';
const params = {
path: filePath,
repo_id: repoID,
};
return this.req.post(url, params);
};

imageCaption = (repoID, filePath, lang) => {
const url = this.server + '/api/v2.1/ai/image-caption/';
const params = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const OPERATION = {
OCR: 'ocr',
FILE_TAGS: 'file-tags',
FILE_DETAIL: 'file-detail',
GEN_DUAL_LAYER_PDF: 'gen-dual-layer-pdf',
};

const AI = () => {
Expand Down
5 changes: 5 additions & 0 deletions frontend/src/metadata/context.js
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ class Context {
return this.metadataAPI.generateDescription(repoID, filePath);
};

genDualLayerPDF = (filePath) => {
const repoID = this.settings['repoID'];
return this.metadataAPI.genDualLayerPDF(repoID, filePath);
};

imageCaption = (filePath) => {
const repoID = this.settings['repoID'];
const lang = this.settings['lang'];
Expand Down
26 changes: 25 additions & 1 deletion frontend/src/metadata/views/table/context-menu/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const OPERATION = {
FILE_DETAIL: 'file-detail',
FILE_DETAILS: 'file-details',
MOVE: 'move',
GEN_DUAL_LAYER_PDF: 'gen-dual-layer-pdf',
};

const ContextMenu = ({
Expand All @@ -50,7 +51,7 @@ const ContextMenu = ({

const { metadata } = useMetadataView();
const { enableOCR } = useMetadataStatus();
const { onOCR, generateDescription, extractFilesDetails } = useMetadataAIOperations();
const { onOCR, generateDescription, extractFilesDetails, genDualLayerPDF } = useMetadataAIOperations();

const repoID = window.sfMetadataStore.repoId;

Expand Down Expand Up @@ -175,6 +176,7 @@ const ContextMenu = ({
const isDescribableFile = checkIsDescribableFile(record);
const isImage = Utils.imageCheck(fileName);
const isVideo = Utils.videoCheck(fileName);
const isPDF = Utils.pdfCheck(fileName);
if (descriptionColumn && isDescribableFile) {
list.push({
value: OPERATION.GENERATE_DESCRIPTION,
Expand All @@ -183,6 +185,14 @@ const ContextMenu = ({
});
}

if (isPDF) {
list.push({
value: OPERATION.GEN_DUAL_LAYER_PDF,
label: gettext('Make the PDF searchable'),
record
});
}

if (enableOCR && isImage) {
list.push({ value: OPERATION.OCR, label: gettext('OCR'), record });
}
Expand Down Expand Up @@ -247,6 +257,14 @@ const ContextMenu = ({
});
}, [updateRecords, generateDescription]);

const handleGenerateDualLayerPDF = useCallback((record) => {
const parentDir = getParentDirFromRecord(record);
const fileName = getFileNameFromRecord(record);
if (!fileName || !parentDir) return;
if (!Utils.pdfCheck(fileName)) return;
genDualLayerPDF({ parentDir, fileName });
}, [genDualLayerPDF]);

const toggleFileTagsRecord = useCallback((record = null) => {
setFileTagsRecord(record);
}, []);
Expand Down Expand Up @@ -334,6 +352,12 @@ const ContextMenu = ({
handelGenerateDescription(record);
break;
}
case OPERATION.GEN_DUAL_LAYER_PDF: {
const { record } = option;
if (!record) break;
handleGenerateDualLayerPDF(record);
break;
}
case OPERATION.FILE_TAGS: {
const { record } = option;
if (!record) break;
Expand Down
69 changes: 68 additions & 1 deletion seahub/ai/apis.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os.path
import json

from pysearpc import SearpcError
from seahub.repo_metadata.models import RepoMetadata
Expand All @@ -15,7 +16,8 @@
from seahub.api2.authentication import TokenAuthentication, SdocJWTTokenAuthentication
from seahub.utils import get_file_type_and_ext, IMAGE
from seahub.views import check_folder_permission
from seahub.ai.utils import image_caption, translate, writing_assistant, verify_ai_config, generate_summary, generate_file_tags, ocr
from seahub.ai.utils import image_caption, translate, writing_assistant, \
verify_ai_config, generate_summary, generate_file_tags, ocr, generate_dual_layer_pdf

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -343,3 +345,68 @@ def post(self, request):
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)

return Response(resp_json, resp.status_code)


class GenDualLayerPDF(APIView):
authentication_classes = (TokenAuthentication, SessionAuthentication)
permission_classes = (IsAuthenticated,)
throttle_classes = (UserRateThrottle,)

def post(self, request):
if not verify_ai_config():
return api_error(status.HTTP_400_BAD_REQUEST, 'AI server not configured')

repo_id = request.data.get('repo_id')
path = request.data.get('path')
username = request.user.username

if not repo_id:
return api_error(status.HTTP_400_BAD_REQUEST, 'repo_id invalid')
if not path:
return api_error(status.HTTP_400_BAD_REQUEST, 'path invalid')

repo = seafile_api.get_repo(repo_id)
if not repo:
error_msg = 'Library %s not found.' % repo_id
return api_error(status.HTTP_404_NOT_FOUND, error_msg)

permission = check_folder_permission(request, repo_id, os.path.dirname(path))
if not permission:
error_msg = 'Permission denied.'
return api_error(status.HTTP_403_FORBIDDEN, error_msg)

try:
file_id = seafile_api.get_file_id_by_path(repo_id, path)
except SearpcError as e:
logger.error(e)
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, 'Internal Server Error')

if not file_id:
return api_error(status.HTTP_404_NOT_FOUND, f"File {path} not found")


download_token = seafile_api.get_fileserver_access_token(repo_id, file_id, 'download', username, use_onetime=True)
parent_dir = os.path.dirname(path)
obj_id = json.dumps({'parent_dir': parent_dir})
upload_token = seafile_api.get_fileserver_access_token(repo_id,
obj_id,
'upload-link',
username,
use_onetime=True)
if not (download_token and upload_token):
error_msg = 'Internal Server Error, '
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)

params = {
'path': path,
'download_token': download_token,
'upload_token': upload_token,
}
try:
resp = generate_dual_layer_pdf(params)
resp_json = resp.json()
except Exception as e:
error_msg = 'Internal Server Error'
return api_error(status.HTTP_500_INTERNAL_SERVER_ERROR, error_msg)

return Response(resp_json, resp.status_code)
7 changes: 7 additions & 0 deletions seahub/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def generate_summary(params):
return resp


def generate_dual_layer_pdf(params):
headers = gen_headers()
url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/pdf/generate-dual-layer-pdf/')
resp = requests.post(url, json=params, headers=headers)
return resp


def generate_file_tags(params):
headers = gen_headers()
url = urljoin(SEAFILE_AI_SERVER_URL, '/api/v1/generate-file-tags/')
Expand Down
4 changes: 3 additions & 1 deletion seahub/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from django.urls import include, path, re_path
from django.views.generic import TemplateView

from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, OCR, Translate, WritingAssistant
from seahub.ai.apis import ImageCaption, GenerateSummary, GenerateFileTags, \
OCR, Translate, WritingAssistant, GenDualLayerPDF
from seahub.api2.endpoints.share_link_auth import ShareLinkUserAuthView, ShareLinkEmailAuthView
from seahub.api2.endpoints.internal_api import InternalUserListView, InternalCheckShareLinkAccess, \
InternalCheckFileOperationAccess
Expand Down Expand Up @@ -1058,4 +1059,5 @@
re_path(r'^api/v2.1/ai/ocr/$', OCR.as_view(), name='api-v2.1-ocr'),
re_path(r'^api/v2.1/ai/translate/$', Translate.as_view(), name='api-v2.1-translate'),
re_path(r'^api/v2.1/ai/writing-assistant/$', WritingAssistant.as_view(), name='api-v2.1-writing-assistant'),
re_path(r'^api/v2.1/ai/pdf/to-dual-layer/$', GenDualLayerPDF.as_view(), name='api-v2.1-pdf-to-dual-layer'),
]

0 comments on commit 94ebbbd

Please sign in to comment.