diff --git a/api/utils/functions.py b/api/utils/functions.py new file mode 100644 index 0000000..5f53e45 --- /dev/null +++ b/api/utils/functions.py @@ -0,0 +1,14 @@ +import re + + +def multiple_replace(text, replacement=None): + replacement_dict = replacement + if not replacement: # pragma: no cover + replacement_dict = { + '\n': '', + '\t': '', + '\r': '', + } + + pattern = re.compile('|'.join(map(re.escape, replacement_dict.keys()))) + return pattern.sub(lambda match: replacement_dict[match.group(0)], text) diff --git a/api/utils/management/commands/updatemock.py b/api/utils/management/commands/updatemock.py index 49dc18a..04db043 100644 --- a/api/utils/management/commands/updatemock.py +++ b/api/utils/management/commands/updatemock.py @@ -4,24 +4,11 @@ from utils import sessions as sns, web_scraping as wbp from django.core.management.base import BaseCommand from pathlib import Path -import re +from utils.functions import multiple_replace import json import os -def multiple_replace(text, replacement=None): - replacement_dict = replacement - if not replacement: - replacement_dict = { - '\n': '', - '\t': '', - '\r': '', - } - - pattern = re.compile('|'.join(map(re.escape, replacement_dict.keys()))) - return pattern.sub(lambda match: replacement_dict[match.group(0)], text) - - class Command(BaseCommand): """Comando para atualizar os arquivos de mock do SIGAA.""" diff --git a/api/utils/web_scraping.py b/api/utils/web_scraping.py index f52110f..e6db21e 100644 --- a/api/utils/web_scraping.py +++ b/api/utils/web_scraping.py @@ -3,7 +3,7 @@ from collections import defaultdict from typing import List, Optional, Iterator from re import findall, finditer -from utils.management.commands import updatemock +from utils.functions import multiple_replace import requests.utils import requests import hashlib @@ -252,7 +252,7 @@ def create_page_fingerprint(self): if not tables: return "not_content" - treated_tables = updatemock.multiple_replace(tables.get_text(), replacement={ + treated_tables = multiple_replace(tables.get_text(), replacement={ '\n': '', '\t': '', '\r': '',