Skip to content

Commit

Permalink
task(api): otimiza piores e médios casos da atualização do db por mei…
Browse files Browse the repository at this point in the history
…o de redis cache, pages fingerprints e multithreading (#183)

* devops(python): django-redis lib and to container

* core(settings): config redis cache with env vars

* utils(scraper): add web page fingerprinter

* utils(commands): separate mult replace into func

* api(commands): add cache verification to update db

* utils(commands): move mult replace from file

* utils(tests): check cache access of finge

* api(commands): add multithreading to db update

* typo(commands): change 'bando' to 'banco'

* core(settings): set up global time constants

* api(decorators): wrapper to del cache keys

* api(commands): move cache set to take effect

* api(models): add cache handle to unique delete

* api(tests): add check to models delete
  • Loading branch information
mateusvrs authored Dec 29, 2023
1 parent f4250bf commit 19b8297
Show file tree
Hide file tree
Showing 13 changed files with 351 additions and 68 deletions.
3 changes: 3 additions & 0 deletions api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ POSTGRES_DB="postgres"
POSTGRES_USER="suagradeunb"
POSTGRES_PASSWORD="suagradeunb"

# Redis
REDIS_CACHE_LOCATION="redis://redis:6379/1"

# Credenciais de acesso ao admin
ADMIN_NAME="admin"
ADMIN_PASS="admin"
Expand Down
21 changes: 21 additions & 0 deletions api/api/decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from django.core.cache import cache
from api.models import cache_error_msg
import functools


def handle_cache_before_delete(query_func: callable) -> callable:

@functools.wraps(query_func)
def wrapper(*args, **kwargs):
queryset = query_func(*args, **kwargs)

try:
for query in queryset:
cache_key = query.get_cache_key()
cache.delete(cache_key)
except: # pragma: no cover
raise ValueError(cache_error_msg)
else:
queryset.delete()

return wrapper
123 changes: 98 additions & 25 deletions api/api/management/commands/updatedb.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from typing import Any
from argparse import ArgumentParser as CommandParser
from django.core.management.base import BaseCommand
from utils import sessions, web_scraping
from utils.db_handler import delete_classes_from_discipline, delete_all_departments_using_year_and_period, get_or_create_department, get_or_create_discipline, create_class
from time import time
from django.db import transaction
from utils import sessions
from utils import db_handler as dbh
from utils.web_scraping import DisciplineWebScraper, get_list_of_departments
from django.core.cache import cache
from time import time, sleep
from collections import deque
from core.settings.base import THIRTY_DAYS_IN_SECS
import threading


class Command(BaseCommand):
Expand All @@ -13,9 +19,12 @@ class Command(BaseCommand):

def add_arguments(self, parser: CommandParser) -> None:
"""Adiciona os argumentos do comando."""
parser.add_argument('-a', '-all', action='store_true', dest='all', default=False,
parser.add_argument('-a', '--all', action='store_true', dest='all', default=False,
help="Atualiza o banco de dados com as disciplinas dos períodos atual e seguinte.")

parser.add_argument('-ds', '--descriptive', action='store_true', default=False,
help="Ativa a opção de uma atualização descritiva com os outputs (print) necessários")

parser.add_argument('-p', '--period', action='store', default=None,
choices=[".".join(sessions.get_current_year_and_period()), ".".join(
sessions.get_next_period())],
Expand All @@ -26,6 +35,7 @@ def add_arguments(self, parser: CommandParser) -> None:

def handle(self, *args: Any, **options: Any):
choices = []
threads = []

if options["all"]:
choices.append(sessions.get_current_year_and_period())
Expand All @@ -37,67 +47,130 @@ def handle(self, *args: Any, **options: Any):
print("Utilize o comando 'updatedb -h' para mais informações.")
return

# Obtem o ano e o período anterior ao período atual
# Obtém o ano e o período anterior ao período atual
previous_period_year, previous_period = sessions.get_previous_period()

# Apaga as disciplinas do período anterior
delete_all_departments_using_year_and_period(
dbh.delete_all_departments_using_year_and_period(
year=previous_period_year, period=previous_period)

if options["delete"]:
for year, period in choices:
self.delete_period(year=year, period=period)
thread = threading.Thread(
target=self.delete_period, args=(year, period,))
threads.append(thread)
thread.start()

for thread in threads:
thread.join()
threads.clear()

return

departments_ids = web_scraping.get_list_of_departments()
departments_ids = get_list_of_departments()

if departments_ids is None:
self.display_error_message("department_ids")
return

print("Atualizando o banco de dados...")

for year, period in choices:
def start_update_year_period(year: str, period: str):
try:
start_time = time()
self.update_departments(
departments_ids=departments_ids, year=year, period=period)
print(f"Começando atualização de {year}/{period}")
with transaction.atomic():
self.update_departments(
departments_ids, year, period, options)

self.display_success_update_message(
operation=f"{year}/{period}", start_time=start_time)
except Exception as exception:
print("Houve um erro na atualização do bando de dados.")
print("Houve um erro na atualização do banco de dados.")
print(f"Error: {exception}")

def update_departments(self, departments_ids: list, year: str, period: str) -> None:
start_tot_time = time()
for year, period in choices:
thread = threading.Thread(
target=start_update_year_period, args=(year, period,))
threads.append(thread)
thread.start()
sleep(0.01) # little time to start print don't overleap

print()

for thread in threads:
thread.join()
threads.clear()

print(f"\nTempo total de execução: {(time() - start_tot_time):.1f}s")

def update_departments(self, departments_ids: list, year: str, period: str, options: Any) -> None:
"""Atualiza os departamentos do banco de dados e suas respectivas disciplinas."""
for department_id in departments_ids:
print(f"WebScraping do departamento: {department_id}")
disciplines_list = web_scraping.get_department_disciplines(
department_id=department_id, current_year=year, current_period=period)
department = get_or_create_department(
def execute_update(department_id):
scraper = DisciplineWebScraper(department_id, year, period)
fingerprint = scraper.create_page_fingerprint()

cache_key = f"{department_id}/{year}.{period}"
try:
cache_value = cache.get(cache_key)
if cache_value and cache_value == fingerprint:
if options['descriptive']:
print(f"Departamento ({department_id}) atualizado, operação não necessária")
return
except:
print("Ocorreu um erro ao tentar acessar o cache")
pass

disciplines_list = scraper.get_disciplines()
department = dbh.get_or_create_department(
code=department_id, year=year, period=period)

print("Atualizando disciplinas do departamento...")
if options['descriptive']:
print(f"Departamento ({department_id}) desatualizado, operação necessária")

# Para cada disciplina do período atual, deleta as turmas previamente cadastradas e cadastra novas turmas no banco de dados
for discipline_code in disciplines_list:
classes_info = disciplines_list[discipline_code]
# Cria ou pega a disciplina
discipline = get_or_create_discipline(
discipline = dbh.get_or_create_discipline(
name=classes_info[0]["name"], code=discipline_code, department=department)

# Deleta as turmas previamente cadastradas
delete_classes_from_discipline(discipline=discipline)
dbh.delete_classes_from_discipline(discipline=discipline)

# Cadastra as novas turmas
for class_info in classes_info:
create_class(teachers=class_info["teachers"],
classroom=class_info["classroom"], schedule=class_info["schedule"],
days=class_info["days"], _class=class_info["class_code"], discipline=discipline, special_dates=class_info["special_dates"])
dbh.create_class(teachers=class_info["teachers"],
classroom=class_info["classroom"], schedule=class_info["schedule"],
days=class_info["days"], _class=class_info["class_code"], discipline=discipline, special_dates=class_info["special_dates"])

cache.set(cache_key, fingerprint, timeout=THIRTY_DAYS_IN_SECS)

if options['descriptive']:
print(f'Operação de atualização finalizada para o departamento ({department_id})')

threads = deque()
for department_id in departments_ids:
thread = threading.Thread(
target=execute_update, args=(department_id,))
threads.append(thread)
thread.start()

if len(threads) == 3:
threads[0].join()
threads.popleft()

for thread in threads:
thread.join()
threads.clear()

def delete_period(self, year: str, period: str) -> None:
"""Deleta um período do banco de dados."""
start_time = time()
delete_all_departments_using_year_and_period(year=year, period=period)
with transaction.atomic():
dbh.delete_all_departments_using_year_and_period(
year=year, period=period)
self.display_success_delete_message(
operation=f"{year}/{period}", start_time=start_time)

Expand Down
58 changes: 55 additions & 3 deletions api/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,27 @@
from django.contrib.postgres.fields import ArrayField
from users.models import User
from django.utils import timezone
from django.core.cache import cache

class Department(models.Model):
cache_error_msg = "Cache isn't working properly, so database isn't allowed to be modified!"


class CustomModel(models.Model):
class Meta:
abstract = True

def delete(self, *args, **kwargs):
try:
cache.delete(kwargs['cache_key'])
kwargs.pop('cache_key')
except: # pragma: no cover
raise ValueError(cache_error_msg)
else:
super(CustomModel, self).delete()
pass


class Department(CustomModel):
"""Classe que representa um departamento.
code:str -> Código do departamento
year:str -> Ano do departamento
Expand All @@ -17,8 +36,19 @@ class Department(models.Model):
def __str__(self):
return self.code

def get_cache_key(self):
code = self.code
year = self.year
period = self.period

return f"{code}/{year}.{period}"

def delete(self, *args, **kwargs):
kwargs['cache_key'] = self.get_cache_key()
super(Department, self).delete(*args, **kwargs)

class Discipline(models.Model):

class Discipline(CustomModel):
"""Classe que representa uma disciplina.
name:str -> Nome da disciplina
unicode_name:str -> Nome da disciplina normalizado
Expand All @@ -38,8 +68,19 @@ def save(self, *args, **kwargs):
self.unicode_name = unidecode(self.name).casefold()
super(Discipline, self).save(*args, **kwargs)

def get_cache_key(self):
code = self.department.code
year = self.department.year
period = self.department.period

return f"{code}/{year}.{period}"

def delete(self, *args, **kwargs):
kwargs['cache_key'] = self.get_cache_key()
super(Discipline, self).delete(*args, **kwargs)


class Class(models.Model):
class Class(CustomModel):
"""Classe que representa uma turma.
teachers:list -> Lista de professores da turma
classroom:str -> Sala da turma
Expand Down Expand Up @@ -67,6 +108,17 @@ class Class(models.Model):
def __str__(self):
return self._class

def get_cache_key(self):
code = self.discipline.department.code
year = self.discipline.department.year
period = self.discipline.department.period

return f"{code}/{year}.{period}"

def delete(self, *args, **kwargs):
kwargs['cache_key'] = self.get_cache_key()
super(Class, self).delete(*args, **kwargs)


class Schedule(models.Model):
"""Classe que representa um horário.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from django.test import TestCase
from django.core.cache import cache
from api.models import Department, Discipline, Class


class DisciplineModelsTest(TestCase):
def setUp(self):
class ModelsTest(TestCase):
def create_data(self):
self.department = Department.objects.create(
code='INF',
year="2023",
Expand All @@ -24,6 +25,11 @@ def setUp(self):
discipline=self.discipline
)

cache.set("INF/2023.2", "hash_value")

def setUp(self):
self.create_data()

def test_create_discipline(self):
self.assertEqual(self.discipline.name,
'Métodos de Desenvolvimento de Software')
Expand Down Expand Up @@ -52,3 +58,31 @@ def test_str_method_of_class(self):

def test_str_method_of_department(self):
self.assertEqual(str(self.department), self.department.code)

def test_delete_department_with_cache_handle(self):
self.department.delete()

empty_model = not len(Department.objects.all())
empty_cache = not len(cache.keys('*'))

self.assertTrue(empty_model)
self.assertTrue(empty_cache)

def test_delete_discipline_with_cache_handle(self):
self.discipline.delete()

empty_model = not len(Discipline.objects.all())
empty_cache = not len(cache.keys('*'))

self.assertTrue(empty_model)
self.assertTrue(empty_cache)

def test_delete_class_with_cache_handle(self):
self._class.delete()

empty_model = not len(Class.objects.all())
empty_cache = not len(cache.keys('*'))

self.assertTrue(empty_model)
self.assertTrue(empty_cache)

Loading

0 comments on commit 19b8297

Please sign in to comment.