Skip to content

Commit

Permalink
[ADD] attachment_indexation_mupdf
Browse files Browse the repository at this point in the history
  • Loading branch information
len-foss committed Sep 8, 2023
1 parent f31245a commit 7f394be
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 0 deletions.
5 changes: 5 additions & 0 deletions attachment_indexation_mupdf/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
=====================================================
Attachments List and Document Indexation with PyMuPDF
=====================================================

Module to index pdf document using state-of-the-art library.
4 changes: 4 additions & 0 deletions attachment_indexation_mupdf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright 2023 len-foss/Financial Way
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).

from . import models
17 changes: 17 additions & 0 deletions attachment_indexation_mupdf/__manifest__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2023 len-foss/Financial Way
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
{
"name": "Attachments List and Document Indexation with PyMuPDF",
"category": "Hidden/Tools",
"version": "16.0.0.0.0",
"summary": "Attachments List and Document Indexation with PyMuPDF",
"author": "len-foss/FinancialWay,Odoo Community Association (OCA)",
"website": "https://github.com/OCA/knowledge",
"license": "AGPL-3",
"depends": ["attachment_indexation"],
"auto_install": True,
"installable": True,
"data": [],
"assets": {},
"external_dependencies": {"python": ["PyMuPDF"]},
}
4 changes: 4 additions & 0 deletions attachment_indexation_mupdf/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright 2023 len-foss/Financial Way
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).

from . import ir_attachment
36 changes: 36 additions & 0 deletions attachment_indexation_mupdf/models/ir_attachment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2023 len-foss/Financial Way
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).

import io
import logging

from odoo import models

_logger = logging.getLogger(__name__)

try:
import fitz
except ImportError:
fitz = None
_logger.warning(
"Attachment indexation of PDF documents is unavailable"
"because PyMuPDF cannot be loaded."
)


class IrAttachment(models.Model):
_inherit = "ir.attachment"

def _index_pdf(self, bin_data):
"""Index PDF documents with MuPDF if available"""
if fitz is None:
return super()._index_pdf(bin_data)
buf = ""
try:
f = io.BytesIO(bin_data)
doc = fitz.open(stream=f, filetype="pdf")
for page in doc:
buf += page.get_text()
except Exception: # pylint: disable=except-pass
pass
return buf
2 changes: 2 additions & 0 deletions attachment_indexation_mupdf/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
from . import test_indexation
Binary file not shown.
27 changes: 27 additions & 0 deletions attachment_indexation_mupdf/tests/test_indexation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2023 len-foss/Financial Way
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).

import os
from unittest import skipIf

from odoo.tests.common import TransactionCase, tagged

directory = os.path.dirname(__file__)

try:
import fitz
except ImportError:
fitz = None


@tagged("post_install", "-at_install")
class TestCaseIndexation(TransactionCase):
@skipIf(fitz is None, "PyMyPDF is not installed")
def test_attachment_pdf_indexation(self):
with open(os.path.join(directory, "files", "test_content.pdf"), "rb") as file:
pdf = file.read()
text = self.env["ir.attachment"]._index(pdf, "application/pdf")
# note that the whitespace character is not the same as with pdfminer
self.assertEqual(
text, "TestContent!!\n", "the index content should be correct"
)
6 changes: 6 additions & 0 deletions setup/attachment_indexation_mupdf/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import setuptools

setuptools.setup(
setup_requires=['setuptools-odoo'],
odoo_addon=True,
)

0 comments on commit 7f394be

Please sign in to comment.