Skip to content

Commit

Permalink
add more test cases for datalog
Browse files Browse the repository at this point in the history
  • Loading branch information
philipkcl committed Oct 10, 2023
1 parent 145ff47 commit 6e53abb
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 102 deletions.
12 changes: 7 additions & 5 deletions doajtest/fixtures/v2/journals.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: UTF-8 -*-
from copy import deepcopy
from typing import Iterable

import rstr

Expand All @@ -16,7 +17,7 @@ def make_journal_source(in_doaj=False):
return template

@staticmethod
def make_many_journal_sources(count=2, in_doaj=False):
def make_many_journal_sources(count=2, in_doaj=False) -> Iterable[dict]:
journal_sources = []
for i in range(0, count):
template = deepcopy(JOURNAL_SOURCE)
Expand Down Expand Up @@ -66,10 +67,10 @@ def question_answers():
"editor_group": "editorgroup",
"editor": "associate",
"in_doaj": False,
"notes" : [
{"note" : "Second Note", "date" : "2014-05-22T00:00:00Z", "id" : "1234",
"notes": [
{"note": "Second Note", "date": "2014-05-22T00:00:00Z", "id": "1234",
"author_id": "fake_account_id__b"},
{"note": "First Note", "date": "2014-05-21T14:02:45Z", "id" : "abcd",
{"note": "First Note", "date": "2014-05-21T14:02:45Z", "id": "abcd",
"author_id": "fake_account_id__a"},
],
"owner": "publisher",
Expand All @@ -92,6 +93,7 @@ def question_answers():
JOURNAL_FORM_EXPANDED.update(OWNER_FORM_EXPANDED)

from portality.crosswalks.journal_form import JournalFormXWalk

JOURNAL_FORM = JournalFormXWalk.forminfo2multidict(JOURNAL_FORM_EXPANDED)

JOURNAL_BULK_EDIT = {
Expand Down Expand Up @@ -157,7 +159,7 @@ def question_answers():
'DOAJ Seal', # (added outside journal2questions)
'Added on Date', # (added outside journal2questions)
'Last updated Date', # (added outside journal2questions)
#'Tick: Accepted after March 2014', Removed 2020-12-11
# 'Tick: Accepted after March 2014', Removed 2020-12-11
"Number of Article Records", # (added outside journal2questions)
"Most Recent Article Added" # (added outside journal2questions)
]
Expand Down
154 changes: 75 additions & 79 deletions doajtest/unit/test_task_datalog_journal_added_update.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import time
import unittest
from typing import List
from unittest.mock import MagicMock
from unittest.mock import patch

from doajtest.fixtures import JournalFixtureFactory
from doajtest.helpers import DoajTestCase
from portality.lib import dates
from portality.models import Journal
from portality.models.datalog_journal_added import DatalogJournalAdded
from portality.tasks import datalog_journal_added_update
from portality.tasks.datalog_journal_added_update import DatalogJournalAddedUpdate, to_display_data
from portality.tasks.datalog_journal_added_update import DatalogJournalAddedUpdate, to_display_data, \
to_datalog_journal_added
from portality.tasks.helpers import background_helper

input_filename = 'fake_filename'
Expand Down Expand Up @@ -36,15 +41,6 @@


class TestDatalogJournalAddedUpdate(DoajTestCase):
@classmethod
def setUpClass(cls) -> None:
super(TestDatalogJournalAddedUpdate, cls).setUpClass()
for t in testdata_datalog_list:
t.save()

time.sleep(2)
# test_data_list[-1].save(blocking=True)
# print('hihih')

def test_execute__normal(self):
"""
Expand All @@ -54,10 +50,15 @@ def test_execute__normal(self):
-------
"""

save_test_datalog()

journals = save_test_journals(3)

worksheet = MagicMock()
worksheet.get_all_values.return_value = [
['Journal Title', ''],
['titlea', '1234-1000'],
[journals[-1].bibjson().title, journals[-1].bibjson().eissn],
]

# Replace the real worksheet with the mock
Expand All @@ -72,15 +73,16 @@ def test_execute__normal(self):
google_key_path=input_google_key_path)

worksheet.get_all_values.assert_called()
values, row_idx, *_ = worksheet.insert_rows.call_args.args
new_rows_added_to_excels, row_idx, *_ = worksheet.insert_rows.call_args.args

assert values == [
to_display_data(d) for d in testdata_datalog_list[:-1]
assert new_rows_added_to_excels == [
to_display_data(to_datalog_journal_added(d)) for d in journals[:-1]
]

assert row_idx == 2

def test_find_new_xlsx_rows(self):
save_test_datalog()
values = datalog_journal_added_update.find_new_xlsx_rows(testdata_datalog_list[-1].issn)
assert values == [
to_display_data(d) for d in testdata_datalog_list[:-1]
Expand Down Expand Up @@ -110,71 +112,65 @@ def test_to_display_data(self):
),
)

# print('-------------')
# print(values)

# worksheet.get_all_values.assert_not_called()
# # Now, when you use the worksheet object, it will behave like the mock
# result = worksheet.some_method()
#
# # Assertions or further testing using the mock
# assert result == "Mocked result"
# worksheet.some_method.assert_called_once_with()

# run execute

# # prepare test data
# for _ in range(3):
# BackgroundJob().save()
# for _ in range(2):
# Account().save(blocking=True)

# new_background_jobs = list(BackgroundJob.scroll())
# new_accounts = list(Account.scroll())

# # run execute
# background_task = background_helper.execute_by_bg_task_type(AnonExportBackgroundTask)

# # assert audit messages
# self.assertIn('audit', background_task.background_job.data)
# msgs = {l.get('message') for l in background_task.background_job.data['audit']}
# self.assertTrue(any('Compressing temporary file' in m for m in msgs))
# self.assertTrue(any('account.bulk.1' in m for m in msgs))
#
# main_store = StoreLocal(None)
# container_id = self.app_test.config.get("STORE_ANON_DATA_CONTAINER")
# target_names = main_store.list(container_id)
#
# # must have some file in main store
# self.assertGreater(len(target_names), 0)
#
# for target_name in target_names:
#
# # load data from store
# _target_path = Path(main_store.get(container_id, target_name).name)
# data_str = gzip.decompress(_target_path.read_bytes()).decode(errors='ignore')
# if data_str:
# rows = data_str.strip().split('\n')
#
# # Filter out the index: directives, leaving the actual record data
# json_rows = list(filter(lambda j: len(json.loads(j).keys()) > 1, rows))
#
# if target_name.startswith('background_job'):
# test_data_list = new_background_jobs
# elif target_name.startswith('account'):
# test_data_list = new_accounts
# else:
# print(f'unexpected data dump for target_name[{target_name}]')
# continue
#
# print(f'number of rows have been saved to store: [{target_name}] {len(json_rows)}')
# self.assertEqual(len(json_rows), len(test_data_list))
# self.assertIn(test_data_list[0].id, [json.loads(j)['id'] for j in json_rows])
# else:
# print(f'empty archive {target_name}')
#
# def test_prepare__queue_id(self):
# bgtask_tester.test_queue_id_assigned(AnonExportBackgroundTask)
def test_latest_row_index(self):
values = [
['xxxx', 'ddd'],
['Journal Title', ''],
['titlea', '1234-1000'],
['titleb', '1234-2000'],
['titlec', '1234-3000'],
]

assert datalog_journal_added_update.find_latest_row_index(values) == 2

values = [
['Journal Title', ''],
['titlea', '1234-1000'],
]

assert datalog_journal_added_update.find_latest_row_index(values) == 1

def test_find_new_datalog_journals(self):
save_test_journals(3)

def _find_new_datalog_journals(latest_date_str):
datalog_list = datalog_journal_added_update.find_new_datalog_journals(
dates.parse(latest_date_str)
)
datalog_list = list(datalog_list)
return len(datalog_list)

assert _find_new_datalog_journals('2101-01-01') == 2
assert _find_new_datalog_journals('2102-01-01') == 1
assert _find_new_datalog_journals('2103-01-01') == 0
assert _find_new_datalog_journals('2104-01-01') == 0


def save_test_datalog():
for t in testdata_datalog_list:
t.save()

time.sleep(2)


def save_test_journals(n_journals: int) -> List[Journal]:
journals = JournalFixtureFactory.make_many_journal_sources(count=n_journals, in_doaj=True)
journals = map(lambda d: Journal(**d), journals)
journals = list(journals)
assert len(journals) == n_journals
journals[0]['created_date'] = '2103-01-01'
journals[1]['created_date'] = '2102-01-01'
journals[2]['created_date'] = '2101-01-01'
save_and_block_last(journals)

return journals


def save_and_block_last(journals: List[Journal]):
sub_journals, last_journal = journals[:-1], journals[-1]
for j in sub_journals:
j.save()
last_journal.save(blocking=True)


if __name__ == '__main__':
Expand Down
4 changes: 4 additions & 0 deletions portality/background.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import traceback
from copy import deepcopy
from typing import Iterable
Expand All @@ -16,6 +17,8 @@
from portality.bll import DOAJ
from portality.core import app

log = logging.getLogger(__name__)


class BackgroundException(Exception):
pass
Expand Down Expand Up @@ -75,6 +78,7 @@ def execute(self, background_task: 'BackgroundTask'):
job.save()
raise
except Exception as e:
log.error(f"Error in Background Task: {e}")
job.fail()
job.add_audit_message("Error in Job Run")
job.add_audit_message("Caught in job runner during run: " + traceback.format_exc())
Expand Down
39 changes: 21 additions & 18 deletions portality/tasks/datalog_journal_added_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import re
import time
from typing import Callable, NoReturn
from typing import Callable, NoReturn, List, Iterable

import gspread

Expand Down Expand Up @@ -52,25 +52,28 @@ def query(self):
}


def find_new_datalog_journals(latest_date):
def find_new_datalog_journals(latest_date: datetime.datetime) -> Iterable[DatalogJournalAdded]:
records = Journal.iterate(NewDatalogJournalQuery(latest_date).query())
for journal in records:
bibjson = journal.bibjson()
title = bibjson.title
issn = bibjson.eissn or bibjson.pissn
has_seal = journal.has_seal()
try:
has_continuations = any([journal.get_future_continuations() + journal.get_past_continuations()])
except RecursionError:
has_continuations = False
return (to_datalog_journal_added(j) for j in records)

record = DatalogJournalAdded(title=title, issn=issn,
date_added=journal.created_timestamp,
has_seal=has_seal,
has_continuations=has_continuations,
journal_id=journal.id)

yield record
def to_datalog_journal_added(journal: Journal) -> DatalogJournalAdded:
bibjson = journal.bibjson()
title = bibjson.title
issn = bibjson.eissn or bibjson.pissn
has_seal = journal.has_seal()
try:
has_continuations = any([journal.get_future_continuations() + journal.get_past_continuations()])
except RecursionError:
has_continuations = False

record = DatalogJournalAdded(title=title, issn=issn,
date_added=journal.created_timestamp,
has_seal=has_seal,
has_continuations=has_continuations,
journal_id=journal.id)

return record


class LastDatalogJournalAddedQuery:
Expand Down Expand Up @@ -101,7 +104,7 @@ def get_latest_date_added():
return dates.parse(record.date_added)


def find_latest_row_index(records):
def find_latest_row_index(records: List[List[str]]):
records = iter(records)
latest_row_index = 0
while next(records, ['Journal Title'])[0] != 'Journal Title':
Expand Down

0 comments on commit 6e53abb

Please sign in to comment.