Skip to content

Commit

Permalink
Merge tag '20231013_1124_issue3726' into develop
Browse files Browse the repository at this point in the history
Logging for journalcsv
  • Loading branch information
Steven-Eardley committed Oct 13, 2023
2 parents 4f2881d + 0f27c23 commit 0a4bae1
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 16 deletions.
31 changes: 24 additions & 7 deletions portality/bll/services/journal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from portality import lock
from portality.bll.doaj import DOAJ
from portality.lib.dates import FMT_DATETIME_SHORT
from portality.store import StoreFactory, prune_container
from portality.store import StoreFactory, prune_container, StoreException
from portality.crosswalks.journal_questions import Journal2QuestionXwalk

from datetime import datetime
Expand Down Expand Up @@ -115,7 +115,7 @@ def journal(self, journal_id, lock_journal=False, lock_account=None, lock_timeou

return journal, the_lock

def csv(self, prune=True):
def csv(self, prune=True, logger=None):
"""
Generate the Journal CSV
Expand All @@ -127,39 +127,51 @@ def csv(self, prune=True):
"""
# first validate the incoming arguments to ensure that we've got the right thing
argvalidate("csv", [
{"arg": prune, "allow_none" : False, "arg_name" : "prune"}
{"arg": prune, "allow_none" : False, "arg_name" : "prune"},
{"arg": logger, "allow_none": True, "arg_name": "logger"}
], exceptions.ArgumentException)

# ~~->FileStoreTemp:Feature~~
filename = 'journalcsv__doaj_' + dates.now_str(FMT_DATETIME_SHORT) + '_utf8.csv'
container_id = app.config.get("STORE_CACHE_CONTAINER")
tmpStore = StoreFactory.tmp()
out = tmpStore.path(container_id, filename, create_container=True, must_exist=False)
try:
out = tmpStore.path(container_id, filename, create_container=True, must_exist=False)
logger("Temporary CSV will be written to {x}".format(x=out))
except StoreException as e:
logger("Could not create temporary CSV file: {x}".format(x=e))
raise e

with open(out, 'w', encoding='utf-8') as csvfile:
self._make_journals_csv(csvfile)
self._make_journals_csv(csvfile, logger=logger)
logger("Wrote CSV to output file {x}".format(x=out))

# ~~->FileStore:Feature~~
mainStore = StoreFactory.get("cache")
try:
mainStore.store(container_id, filename, source_path=out)
url = mainStore.url(container_id, filename)
logger("Stored CSV in main cache store at {x}".format(x=url))
finally:
tmpStore.delete_file(container_id, filename) # don't delete the container, just in case someone else is writing to it
logger("Deleted file from tmp store")

action_register = []
if prune:
logger("Pruning old CSVs from store")
def sort(filelist):
rx = "journalcsv__doaj_(.+?)_utf8.csv"
return sorted(filelist, key=lambda x: datetime.strptime(re.match(rx, x).groups(1)[0], FMT_DATETIME_SHORT), reverse=True)

def _filter(f_name):
return f_name.startswith("journalcsv__")
action_register = prune_container(mainStore, container_id, sort, filter=_filter, keep=2)
action_register = prune_container(mainStore, container_id, sort, filter=_filter, keep=2, logger=logger)
logger("Pruned old CSVs from store")

# update the ES record to point to the new file
# ~~-> Cache:Model~~
models.Cache.cache_csv(url)
logger("Stored CSV URL in ES Cache")
return url, action_register

def admin_csv(self, file_path, account_sub_length=8, obscure_accounts=True, add_sensitive_account_info=False):
Expand Down Expand Up @@ -207,11 +219,12 @@ def acc_email(j):
self._make_journals_csv(f, extra_cols)

@staticmethod
def _make_journals_csv(file_object, additional_columns=None):
def _make_journals_csv(file_object, additional_columns=None, logger=None):
"""
Make a CSV file of information for all journals.
:param file_object: a utf8 encoded file object.
"""
logger = logger if logger is not None else lambda x: x
YES_NO = {True: 'Yes', False: 'No', None: '', '': ''}

def _get_doaj_meta_kvs(journal):
Expand Down Expand Up @@ -243,6 +256,8 @@ def _get_article_kvs(journal):
# ~~!JournalCSV:Feature->Journal:Model~~
cols = {}
for j in models.Journal.all_in_doaj(page_size=1000): #Fixme: limited by ES, this may not be sufficient
logger("Exporting journal {x}".format(x=j.id))

bj = j.bibjson()
issn = bj.get_one_identifier(idtype=bj.P_ISSN)
if issn is None:
Expand All @@ -265,6 +280,7 @@ def _get_article_kvs(journal):
toc_kv = _get_doaj_toc_kv(j)
cols[issn].insert(2, toc_kv)

logger("All journals exported")
issns = cols.keys()

csvwriter = csv.writer(file_object)
Expand All @@ -275,4 +291,5 @@ def _get_article_kvs(journal):
csvwriter.writerow(qs)
vs = [v for _, v in cols[i]]
csvwriter.writerow(vs)
logger("CSV Written")

9 changes: 6 additions & 3 deletions portality/models/background.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,16 @@ def pretty_audit(self):

class StdOutBackgroundJob(BackgroundJob):

def __init__(self, inner):
def __init__(self, inner, force_logging=False):
super(StdOutBackgroundJob, self).__init__(**inner.data)
self._force_logging = force_logging

def add_audit_message(self, msg, timestamp=None):
super(StdOutBackgroundJob, self).add_audit_message(msg, timestamp)
if app.config.get("DOAJENV") == 'dev':
print(msg)
if app.config.get("DOAJENV") == 'dev' or self._force_logging:
if timestamp is None:
timestamp = dates.now_str_with_microseconds()
print("[" + timestamp + "] " + msg)


# ~~-> DataObj:Library~~
Expand Down
9 changes: 8 additions & 1 deletion portality/scripts/journalcsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,17 @@
exit()

user = app.config.get("SYSTEM_USERNAME")
print("Running journal CSV export for user {}".format(user))

job = journal_csv.JournalCSVBackgroundTask.prepare(user)
job = StdOutBackgroundJob(job)
job = StdOutBackgroundJob(job, force_logging=True)
print("Background Job prepared with id {}".format(job.id))

task = journal_csv.JournalCSVBackgroundTask(job)
print("Background task created")

BackgroundApi.execute(task)
print("Finished journal CSV export for user {}".format(user))



Expand Down
7 changes: 5 additions & 2 deletions portality/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ def list_container_ids(self):
return [x for x in os.listdir(self.dir) if os.path.isdir(os.path.join(self.dir, x))]


def prune_container(storage, container_id, sort, filter=None, keep=1):
def prune_container(storage, container_id, sort, filter=None, keep=1, logger=None):
logger = logger if logger is not None else lambda x: x
action_register = []

filelist = storage.list(container_id)
Expand All @@ -316,7 +317,9 @@ def prune_container(storage, container_id, sort, filter=None, keep=1):
#action_register.append("Considering files for retention in the following order: " + ", ".join(filtered_sorted))

remove = filtered_sorted[keep:]
action_register.append("Removed old files: " + ", ".join(remove))
msg = "Removed old files: " + ", ".join(remove)
action_register.append(msg)
logger(msg)

for fn in remove:
storage.delete_file(container_id, fn)
Expand Down
10 changes: 7 additions & 3 deletions portality/tasks/journal_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@ def run(self):
Execute the task as specified by the background_job
:return:
"""

def logger(msg):
self.background_job.add_audit_message(msg)

job = self.background_job

journalService = DOAJ.journalService()
url, action_register = journalService.csv()
for ar in action_register:
job.add_audit_message(ar)
url, action_register = journalService.csv(logger=logger)
# for ar in action_register:
# job.add_audit_message(ar)
job.add_audit_message("CSV generated; will be served from {y}".format(y=url))

def cleanup(self):
Expand Down

0 comments on commit 0a4bae1

Please sign in to comment.