-
-
Notifications
You must be signed in to change notification settings - Fork 334
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
102 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
===================================================== | ||
Attachments List and Document Indexation with PyMuPDF | ||
===================================================== | ||
|
||
Module to index pdf document using state-of-the-art library. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Copyright 2023 len-foss/Financial Way | ||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). | ||
|
||
from . import models |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Copyright 2023 len-foss/Financial Way | ||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). | ||
{ | ||
"name": "Attachments List and Document Indexation with PyMuPDF", | ||
"category": "Hidden/Tools", | ||
"version": "16.0.0.0.0", | ||
"summary": "Attachments List and Document Indexation with PyMuPDF", | ||
"author": "len-foss/FinancialWay,Odoo Community Association (OCA)", | ||
"website": "https://github.com/OCA/knowledge", | ||
"license": "AGPL-3", | ||
"depends": ["attachment_indexation"], | ||
"auto_install": True, | ||
"installable": True, | ||
"data": [], | ||
"assets": {}, | ||
"external_dependencies": {"python": ["PyMuPDF"]}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Copyright 2023 len-foss/Financial Way | ||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). | ||
|
||
from . import ir_attachment |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright 2023 len-foss/Financial Way | ||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). | ||
|
||
import io | ||
import logging | ||
|
||
from odoo import models | ||
|
||
_logger = logging.getLogger(__name__) | ||
|
||
try: | ||
import fitz | ||
except ImportError: | ||
fitz = None | ||
_logger.warning( | ||
"Attachment indexation of PDF documents is unavailable" | ||
"because PyMuPDF cannot be loaded." | ||
) | ||
|
||
|
||
class IrAttachment(models.Model): | ||
_inherit = "ir.attachment" | ||
|
||
def _index_pdf(self, bin_data): | ||
"""Index PDF documents with MuPDF if available""" | ||
if fitz is None: | ||
return super()._index_pdf(bin_data) | ||
buf = "" | ||
try: | ||
f = io.BytesIO(bin_data) | ||
doc = fitz.open(stream=f, filetype="pdf") | ||
for page in doc: | ||
buf += page.get_text() | ||
except Exception: # pylint: disable=except-pass | ||
pass | ||
return buf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). | ||
from . import test_indexation |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Copyright 2023 len-foss/Financial Way | ||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). | ||
|
||
import os | ||
from unittest import skipIf | ||
|
||
from odoo.tests.common import TransactionCase, tagged | ||
|
||
directory = os.path.dirname(__file__) | ||
|
||
try: | ||
import fitz | ||
except ImportError: | ||
fitz = None | ||
|
||
|
||
@tagged("post_install", "-at_install") | ||
class TestCaseIndexation(TransactionCase): | ||
@skipIf(fitz is None, "PyMyPDF is not installed") | ||
def test_attachment_pdf_indexation(self): | ||
with open(os.path.join(directory, "files", "test_content.pdf"), "rb") as file: | ||
pdf = file.read() | ||
text = self.env["ir.attachment"]._index(pdf, "application/pdf") | ||
# note that the whitespace character is not the same as with pdfminer | ||
self.assertEqual( | ||
text, "TestContent!!\n", "the index content should be correct" | ||
) |
1 change: 1 addition & 0 deletions
1
setup/attachment_indexation_mupdf/odoo/addons/attachment_indexation_mupdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../../attachment_indexation_mupdf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import setuptools | ||
|
||
setuptools.setup( | ||
setup_requires=['setuptools-odoo'], | ||
odoo_addon=True, | ||
) |