.tar.gz
- title: Test maintenance mode for 'Upload preservation file' tab
context:
role: publisher
@@ -82,4 +122,4 @@ tests:
- step: Set the value of 'PRESERVATION_PAGE_UNDER_MAINTENANCE' to True in the configuration file (app.cfg/dev.cfg/test.cfg)
- step: Go to preservation area /publisher/preservation
results:
- - Maintenance page should be displayed with the content as 'This page is currently offline for maintenance'
\ No newline at end of file
+ - Maintenance page should be displayed with the content as 'This page is currently offline for maintenance'
diff --git a/doajtest/unit/resources/invalid_article.zip b/doajtest/unit/resources/invalid_article.zip
new file mode 120000
index 0000000000..e955a12f14
--- /dev/null
+++ b/doajtest/unit/resources/invalid_article.zip
@@ -0,0 +1 @@
+../../preservation_upload_test_package/invalid_article.zip
\ No newline at end of file
diff --git a/doajtest/unit/resources/multi_journals.zip b/doajtest/unit/resources/multi_journals.zip
new file mode 120000
index 0000000000..ac64455e26
--- /dev/null
+++ b/doajtest/unit/resources/multi_journals.zip
@@ -0,0 +1 @@
+../../preservation_upload_test_package/multi_journals.zip
\ No newline at end of file
diff --git a/doajtest/unit/resources/preservation_multiple_journals.zip b/doajtest/unit/resources/preservation_multiple_journals.zip
new file mode 100644
index 0000000000..a55b9decd2
Binary files /dev/null and b/doajtest/unit/resources/preservation_multiple_journals.zip differ
diff --git a/doajtest/unit/resources/valid_article.zip b/doajtest/unit/resources/valid_article.zip
new file mode 120000
index 0000000000..ebf17a3099
--- /dev/null
+++ b/doajtest/unit/resources/valid_article.zip
@@ -0,0 +1 @@
+../../preservation_upload_test_package/valid_article.zip
\ No newline at end of file
diff --git a/doajtest/unit/test_task_preservation.py b/doajtest/unit/test_task_preservation.py
index 29c170f742..d8d62d87e8 100644
--- a/doajtest/unit/test_task_preservation.py
+++ b/doajtest/unit/test_task_preservation.py
@@ -12,34 +12,80 @@
from portality.models.article import Article
-class TestPreservation(DoajTestCase):
+def mock_pull_by_key(key, value):
+ if value == "http://link.springer.com/article/10.1186/s40478-018-0619-9":
+ article = Article()
+ article.data = PreservationMock.ARTICLE_DATA
+ return article
+ elif value == "https://www.frontiersin.org/articles/10.3389/fcosc.2022.1028295":
+ article = Article()
+ article.data = PreservationMock.ARTICLE_DATA_JOURNAL2
+ return article
- def setUp(self):
- super(TestPreservation, self).setUp()
- articles_zip_path = test_constants.PATH_RESOURCES / "articles.zip"
+
+def mock_requests_post(*args, **kwargs):
+ class MockResponse:
+ def __init__(self, json_data, status_code):
+ self.json_data = json_data
+ self.status_code = status_code
+
+ def json(self):
+ return self.json_data
+
+ if not args[0] == None and kwargs["data"]["org"] == "DOAJ":
+ return MockResponse({
+ "files": [
+ {
+ "name": "name_of_tarball.tar.gz",
+ "sha256": "decafbad"
+ }
+ ]
+ }, 200)
+
+ return MockResponse(None, 404)
+
+
+def mock_owner_of_article(*args, **kwargs):
+ return True
+
+
+class TestPreservationSetup(DoajTestCase):
+
+ def initial_setup(self, package_name):
+ super(TestPreservationSetup, self).setUp()
+ articles_zip_path = test_constants.PATH_RESOURCES / package_name
with open(articles_zip_path, 'rb') as zf:
- self.zip_file = FileStorage(BytesIO(zf.read()), filename="articles.zip")
+ self.zip_file = FileStorage(BytesIO(zf.read()), filename=package_name)
self.upload_dir = app.config.get("UPLOAD_DIR", ".")
created_time = dates.now_str("%Y-%m-%d-%H-%M-%S")
- owner = "rama"
- dir_name = owner + "-" + created_time
+ self.owner = "rama"
+ self.journal_dir = "2051-5960"
+ dir_name = self.owner + "-" + created_time
self.local_dir = os.path.join(preservation.Preservation.UPLOAD_DIR, dir_name)
- self.preserve = preservation.Preservation(self.local_dir, owner)
- self.package = preservation.PreservationPackage(self.preserve.preservation_dir, owner)
- self.local_dir = os.path.join(self.local_dir,"tmp")
+ self.preserve = preservation.Preservation(self.local_dir, self.owner)
+ self.tmp_dir = os.path.join(self.local_dir, "tmp")
self.preservation_collection = app.config.get("PRESERVATION_COLLECTION")
- app.config["PRESERVATION_COLLECTION"] = {"rama":["test","2"]}
+ app.config["PRESERVATION_COLLECTION"] = {"rama": ["test", "2"]}
def tearDown(self):
- super(TestPreservation, self).tearDown()
+ super(TestPreservationSetup, self).tearDown()
preservation.Preservation.delete_local_directory(self.local_dir)
app.config["PRESERVATION_COLLECTION"] = self.preservation_collection
+
+class TestPreservation(TestPreservationSetup):
+
+ def setUp(self):
+ super(TestPreservation, self).initial_setup("articles.zip")
+
+ def tearDown(self):
+ super(TestPreservation, self).tearDown()
+
def test_local_directory(self):
- #Test creation of local directory
- #TestPreservation.preserve.create_local_directories()
+ # Test creation of local directory
+ # TestPreservation.preserve.create_local_directories()
job = preservation.PreservationBackgroundTask.prepare("rama", upload_file=self.zip_file)
params = job.params
local_dir = params["preserve__local_dir"]
@@ -48,81 +94,137 @@ def test_local_directory(self):
assert os.path.isdir(os.path.join(self.upload_dir, dir_name))
assert os.path.isdir(os.path.join(self.upload_dir, dir_name,dir_name))
- #Test deletion of local directory
+ # Test deletion of local directory
preservation.Preservation.delete_local_directory(local_dir)
assert not os.path.exists(os.path.join(self.upload_dir, dir_name))
- def mock_pull_by_key(key, value):
- article = Article()
- article.data = PreservationMock.ARTICLE_DATA
- return article
+ @patch.object(Article, 'pull_by_key', mock_pull_by_key)
+ @patch.object(requests, "post", mock_requests_post)
+ @patch.object(preservation.Preservation, 'owner_of_article', mock_owner_of_article)
+ def test_preservation(self):
+ self.preserve.save_file(self.zip_file)
+
+ assert os.path.exists(os.path.join(self.tmp_dir, self.zip_file.filename))
+
+ # Test extraction of zip file
+ self.preserve.extract_zip_file()
+
+ assert os.path.exists(os.path.join(self.tmp_dir, "articles"))
+ assert os.path.isdir(os.path.join(self.tmp_dir, "articles"))
+ assert os.path.isdir(os.path.join(self.tmp_dir, "articles", "article_1"))
+ assert os.path.exists(os.path.join(self.tmp_dir, "articles",
+ "article_1", "identifier.txt"))
+
+ reader = preservation.CSVReader(os.path.join(self.tmp_dir,
+ "articles", "identifiers.csv"))
+ data = reader.articles_info()
+
+ assert "article_1" in data
+ assert "article/10.1186/s40478-018-0619-9" in data["article_1"][0]
+
+ # Test package structure
+ self.preserve.create_package_structure()
+ package_dir = os.path.join(self.upload_dir,
+ self.preserve.dir_name, self.preserve.dir_name, self.journal_dir)
+ tag_manifest_file = os.path.join(package_dir, "00003741594643f4996e2555a01e03c7", "tagmanifest-sha256.txt")
+ manifest_file = os.path.join(package_dir, "00003741594643f4996e2555a01e03c7", "manifest-sha256.txt")
+ assert os.path.exists(package_dir)
+ assert os.path.exists(tag_manifest_file)
+ assert os.path.exists(manifest_file)
+
+ package = preservation.PreservationPackage(self.preserve.preservation_dir, self.journal_dir, self.owner)
+
+ # Test creation of tar file
+ package.create_package()
+ tar_file = package_dir + "_" + package.created_time + ".tar.gz"
+ assert os.path.exists(tar_file)
+
+ sha256 = package.sha256(tar_file)
+ response = package.upload_package(sha256, tar_file)
+ assert response.status_code == 200
+
+ def test_get_article_info(self):
+ issn, article_id, metadata_json = self.preserve.get_article_info(PreservationMock.ARTICLE_DATA)
- def mock_requests_post(*args, **kwargs):
- class MockResponse:
- def __init__(self, json_data, status_code):
- self.json_data = json_data
- self.status_code = status_code
+ assert issn == "2051-5960"
+ assert article_id == "00003741594643f4996e2555a01e03c7"
+ assert metadata_json["bibjson"]["identifier"][0]["id"] == "10.1186/s40478-018-0619-9"
- def json(self):
- return self.json_data
- if not args[0] == None and kwargs["data"]["org"] == "DOAJ":
- return MockResponse({
- "files": [
- {
- "name": "name_of_tarball.tar.gz",
- "sha256": "decafbad"
- }
- ]
- }, 200)
+class TestPreservationMultipleJournals(TestPreservationSetup):
- return MockResponse(None, 404)
+ def setUp(self):
+ super(TestPreservationMultipleJournals, self).initial_setup("preservation_multiple_journals.zip")
+ self.another_journal_dir = "2673-611X"
- def mock_owner_of_article(*args, **kwargs):
- return True
+ def tearDown(self):
+ super(TestPreservationMultipleJournals, self).tearDown()
@patch.object(Article, 'pull_by_key', mock_pull_by_key)
- @patch.object(requests,"post", mock_requests_post)
+ @patch.object(requests, "post", mock_requests_post)
@patch.object(preservation.Preservation, 'owner_of_article', mock_owner_of_article)
- def test_preservation(self):
+ def test_preservation_multiple_journals(self):
self.preserve.save_file(self.zip_file)
- assert os.path.exists(os.path.join(self.local_dir, self.zip_file.filename))
-
# Test extraction of zip file
self.preserve.extract_zip_file()
- assert os.path.exists(os.path.join(self.local_dir, "articles"))
- assert os.path.isdir(os.path.join(self.local_dir, "articles"))
- assert os.path.isdir(os.path.join(self.local_dir, "articles", "article_1"))
- assert os.path.exists(os.path.join(self.local_dir, "articles",
- "article_1", "identifier.txt"))
+ assert os.path.exists(os.path.join(self.tmp_dir, "articles"))
+ assert os.path.isdir(os.path.join(self.tmp_dir, "articles"))
+ assert os.path.isdir(os.path.join(self.tmp_dir, "articles", "article_1"))
+ assert os.path.exists(os.path.join(self.tmp_dir, "articles",
+ "article_1", "Identifier.txt"))
- reader = preservation.CSVReader(os.path.join(self.local_dir,
- "articles", "identifiers.csv"))
+ reader = preservation.CSVReader(os.path.join(self.tmp_dir,
+ "articles", "Identifiers.csv"))
data = reader.articles_info()
assert "article_1" in data
assert "article/10.1186/s40478-018-0619-9" in data["article_1"][0]
+ assert "article_2" in data
+ assert "10.3389/fcosc.2022.1028295" in data["article_2"][0]
+
# Test package structure
self.preserve.create_package_structure()
package_dir = os.path.join(self.upload_dir,
- self.preserve.dir_name, self.preserve.dir_name)
- tag_manifest_file = os.path.join(package_dir, "2051-5960", "00003741594643f4996e2555a01e03c7", "tagmanifest-sha256.txt")
- manifest_file = os.path.join(package_dir,"2051-5960", "00003741594643f4996e2555a01e03c7", "manifest-sha256.txt")
+ self.preserve.dir_name, self.preserve.dir_name, self.journal_dir)
+ tag_manifest_file = os.path.join(package_dir, "00003741594643f4996e2555a01e03c7", "tagmanifest-sha256.txt")
+ manifest_file = os.path.join(package_dir, "00003741594643f4996e2555a01e03c7", "manifest-sha256.txt")
assert os.path.exists(package_dir)
assert os.path.exists(tag_manifest_file)
assert os.path.exists(manifest_file)
+ package = preservation.PreservationPackage(self.preserve.preservation_dir, self.journal_dir, self.owner)
+
# Test creation of tar file
- self.package.create_package()
- assert os.path.exists(package_dir + ".tar.gz")
+ package.create_package()
+ tar_file = package_dir + "_" + package.created_time + ".tar.gz"
+ assert os.path.exists(tar_file)
- sha256 = self.package.sha256()
- response = self.package.upload_package(sha256)
+ sha256 = package.sha256(tar_file)
+ response = package.upload_package(sha256, tar_file)
assert response.status_code == 200
+ # Test another journal package
+ package_dir = os.path.join(self.upload_dir,
+ self.preserve.dir_name, self.preserve.dir_name, self.another_journal_dir)
+ tag_manifest_file = os.path.join(package_dir, "00005741594643f4996e2666a01e0310", "tagmanifest-sha256.txt")
+ manifest_file = os.path.join(package_dir, "00005741594643f4996e2666a01e0310", "manifest-sha256.txt")
+ assert os.path.exists(package_dir)
+ assert os.path.exists(tag_manifest_file)
+ assert os.path.exists(manifest_file)
+
+ package = preservation.PreservationPackage(self.preserve.preservation_dir, self.another_journal_dir, self.owner)
+
+ # Test creation of tar file for another journal
+ package.create_package()
+ tar_file = package_dir + "_" + package.created_time + ".tar.gz"
+ assert os.path.exists(tar_file)
+
+ sha256 = package.sha256(tar_file)
+ response = package.upload_package(sha256, tar_file)
+ assert response.status_code == 200
def test_get_article_info(self):
issn, article_id, metadata_json = self.preserve.get_article_info(PreservationMock.ARTICLE_DATA)
diff --git a/doajtest/unit/test_toc.py b/doajtest/unit/test_toc.py
index bfe6564d8c..dafe64ac31 100644
--- a/doajtest/unit/test_toc.py
+++ b/doajtest/unit/test_toc.py
@@ -1,16 +1,57 @@
-from doajtest.helpers import DoajTestCase
from doajtest.fixtures import ArticleFixtureFactory, JournalFixtureFactory
+from doajtest.helpers import DoajTestCase
+from portality import app as _app # noqa, make sure route is registered
from portality import models
+from portality.util import url_for
+
+
+def _test_toc_uses_both_issns_when_available(app_test, url_name):
+ j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
+ pissn = j.bibjson().first_pissn
+ eissn = j.bibjson().first_eissn
+ j.set_last_manual_update()
+ j.save(blocking=True)
+ a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=pissn, eissn=eissn, in_doaj=True))
+ a.save(blocking=True)
+ with app_test.test_client() as t_client:
+ response = t_client.get(url_for(url_name, identifier=j.bibjson().get_preferred_issn()))
+ assert response.status_code == 200
+ assert pissn in response.data.decode()
+ assert eissn in response.data.decode()
+
+
+def _test_toc_correctly_uses_pissn(app_test, url_name):
+ j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
+ pissn = j.bibjson().first_pissn
+ # remove eissn
+ del j.bibjson().eissn
+ j.set_last_manual_update()
+ j.save(blocking=True)
+ a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=pissn, in_doaj=True))
+ a.save(blocking=True)
+ with app_test.test_client() as t_client:
+ response = t_client.get(url_for(url_name, identifier=j.bibjson().get_preferred_issn()))
+ assert response.status_code == 200
+ assert pissn in response.data.decode()
+
+
+def _test_toc_correctly_uses_eissn(app_test, url_name):
+ j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
+ eissn = j.bibjson().first_eissn
+ # remove pissn
+ del j.bibjson().pissn
+ j.set_last_manual_update()
+ j.save(blocking=True)
+ a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=eissn, in_doaj=True))
+ a.save(blocking=True)
+ with app_test.test_client() as t_client:
+ response = t_client.get(url_for(url_name, identifier=j.bibjson().get_preferred_issn()))
+ assert response.status_code == 200
+ assert eissn in response.data.decode()
class TestTOC(DoajTestCase):
- def setUp(self):
- super(TestTOC, self).setUp()
-
- def tearDown(self):
- super(TestTOC, self).tearDown()
-
def test_01_article_index_date_parsing(self):
""" The ToC date histogram needs an accurate datestamp in the article's index """
a = models.Article(**ArticleFixtureFactory.make_article_source())
@@ -43,9 +84,9 @@ def test_01_article_index_date_parsing(self):
d = a.bibjson().get_publication_date()
assert d == '2012-03-01T00:00:00Z'
- a.bibjson().year = '86' # beware: this test will give a false negative 70 years from
- a.bibjson().month = '11' # the time of writing; this gives adequate warning (24 years)
- d = a.bibjson().get_publication_date() # to fix hard-coding of centuries in get_publication_date().
+ a.bibjson().year = '86' # beware: this test will give a false negative 70 years from
+ a.bibjson().month = '11' # the time of writing; this gives adequate warning (24 years)
+ d = a.bibjson().get_publication_date() # to fix hard-coding of centuries in get_publication_date().
assert d == '1986-11-01T00:00:00Z'
# Check we can handle numeric months
@@ -90,45 +131,21 @@ def test_02_toc_requirements(self):
assert a.data['index']['date_toc_fv_month'] == a.data['index']['date'] == "1991-01-01T00:00:00Z"
def test_03_toc_uses_both_issns_when_available(self):
- j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
- pissn = j.bibjson().first_pissn
- eissn = j.bibjson().first_eissn
- j.set_last_manual_update()
- j.save(blocking=True)
- a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=pissn, eissn=eissn, in_doaj=True))
- a.save(blocking=True)
- with self.app_test.test_client() as t_client:
- response = t_client.get('/toc/{}'.format(j.bibjson().get_preferred_issn()))
- assert response.status_code == 200
- assert pissn in response.data.decode()
- assert eissn in response.data.decode()
+ _test_toc_uses_both_issns_when_available(self.app_test, 'doaj.toc')
+
+ def test_04_toc_correctly_uses_pissn(self):
+ _test_toc_correctly_uses_pissn(self.app_test, 'doaj.toc')
+
+ def test_05_toc_correctly_uses_eissn(self):
+ _test_toc_correctly_uses_eissn(self.app_test, 'doaj.toc')
+
+
+class TestTOCArticles(DoajTestCase):
+ def test_03_toc_uses_both_issns_when_available(self):
+ _test_toc_uses_both_issns_when_available(self.app_test, 'doaj.toc_articles')
def test_04_toc_correctly_uses_pissn(self):
- j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
- pissn = j.bibjson().first_pissn
- # remove eissn
- del j.bibjson().eissn
-
- j.set_last_manual_update()
- j.save(blocking=True)
- a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=pissn, in_doaj=True))
- a.save(blocking=True)
- with self.app_test.test_client() as t_client:
- response = t_client.get('/toc/{}'.format(j.bibjson().get_preferred_issn()))
- assert response.status_code == 200
- assert pissn in response.data.decode()
+ _test_toc_correctly_uses_pissn(self.app_test, 'doaj.toc_articles')
def test_05_toc_correctly_uses_eissn(self):
- j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True))
- eissn = j.bibjson().first_eissn
- # remove pissn
- del j.bibjson().pissn
-
- j.set_last_manual_update()
- j.save(blocking=True)
- a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=eissn, in_doaj=True))
- a.save(blocking=True)
- with self.app_test.test_client() as t_client:
- response = t_client.get('/toc/{}'.format(j.bibjson().get_preferred_issn()))
- assert response.status_code == 200
- assert eissn in response.data.decode()
+ _test_toc_correctly_uses_eissn(self.app_test, 'doaj.toc_articles')
diff --git a/portality/bll/services/journal.py b/portality/bll/services/journal.py
index c1d005300d..d27956d700 100644
--- a/portality/bll/services/journal.py
+++ b/portality/bll/services/journal.py
@@ -10,8 +10,9 @@
from portality.lib.dates import FMT_DATETIME_SHORT
from portality.store import StoreFactory, prune_container, StoreException
from portality.crosswalks.journal_questions import Journal2QuestionXwalk
+from portality.util import no_op
-from datetime import datetime
+from datetime import datetime, timedelta
import re, csv, random, string
@@ -131,6 +132,10 @@ def csv(self, prune=True, logger=None):
{"arg": logger, "allow_none": True, "arg_name": "logger"}
], exceptions.ArgumentException)
+ # None isn't executable, so convert logger to NO-OP
+ if logger is None:
+ logger = no_op
+
# ~~->FileStoreTemp:Feature~~
filename = 'journalcsv__doaj_' + dates.now_str(FMT_DATETIME_SHORT) + '_utf8.csv'
container_id = app.config.get("STORE_CACHE_CONTAINER")
@@ -254,42 +259,64 @@ def _get_article_kvs(journal):
return kvs
# ~~!JournalCSV:Feature->Journal:Model~~
- cols = {}
- for j in models.Journal.all_in_doaj(page_size=1000): #Fixme: limited by ES, this may not be sufficient
+ csvwriter = csv.writer(file_object)
+ first = True
+ for j in models.Journal.all_in_doaj(page_size=100):
+ export_start = datetime.utcnow()
logger("Exporting journal {x}".format(x=j.id))
+ time_log = []
bj = j.bibjson()
issn = bj.get_one_identifier(idtype=bj.P_ISSN)
if issn is None:
issn = bj.get_one_identifier(idtype=bj.E_ISSN)
+ time_log.append("{x} - got issn".format(x=datetime.utcnow()))
+
if issn is None:
continue
# ~~!JournalCSV:Feature->JournalQuestions:Crosswalk~~
kvs = Journal2QuestionXwalk.journal2question(j)
+ time_log.append("{x} - crosswalked questions".format(x=datetime.utcnow()))
meta_kvs = _get_doaj_meta_kvs(j)
+ time_log.append("{x} - got meta kvs".format(x=datetime.utcnow()))
article_kvs = _get_article_kvs(j)
+ time_log.append("{x} - got article kvs".format(x=datetime.utcnow()))
additionals = []
if additional_columns is not None:
for col in additional_columns:
additionals += col(j)
- cols[issn] = kvs + meta_kvs + article_kvs + additionals
+ time_log.append("{x} - got additionals".format(x=datetime.utcnow()))
+ row = kvs + meta_kvs + article_kvs + additionals
# Get the toc URL separately from the meta kvs because it needs to be inserted earlier in the CSV
# ~~-> ToC:WebRoute~~
toc_kv = _get_doaj_toc_kv(j)
- cols[issn].insert(2, toc_kv)
+ row.insert(2, toc_kv)
+ time_log.append("{x} - got toc kvs".format(x=datetime.utcnow()))
- logger("All journals exported")
- issns = cols.keys()
-
- csvwriter = csv.writer(file_object)
- qs = None
- for i in sorted(issns):
- if qs is None:
- qs = [q for q, _ in cols[i]]
+ if first is True:
+ qs = [q for q, _ in row]
csvwriter.writerow(qs)
- vs = [v for _, v in cols[i]]
+ first = False
+
+ vs = [v for _, v in row]
csvwriter.writerow(vs)
- logger("CSV Written")
+ time_log.append("{x} - written row to csv".format(x=datetime.utcnow()))
+
+ export_end = datetime.utcnow()
+ if export_end - export_start > timedelta(seconds=10):
+ for l in time_log:
+ logger(l)
+
+ logger("All journals exported and CSV written")
+ # issns = cols.keys()
+ # qs = None
+ # for i in sorted(issns):
+ # if qs is None:
+ # qs = [q for q, _ in cols[i]]
+ # csvwriter.writerow(qs)
+ # vs = [v for _, v in cols[i]]
+ # csvwriter.writerow(vs)
+ # logger("CSV Written")
diff --git a/portality/forms/application_forms.py b/portality/forms/application_forms.py
index d1f8a44d20..e66a8e2ef2 100644
--- a/portality/forms/application_forms.py
+++ b/portality/forms/application_forms.py
@@ -169,18 +169,21 @@ class FieldDefinitions:
"contexts": {
"admin": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"editor": {
"disabled": True,
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"associate_editor": {
"disabled": True,
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
@@ -212,16 +215,19 @@ class FieldDefinitions:
},
"admin": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"associate_editor": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"editor": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
}
@@ -474,7 +480,7 @@ class FieldDefinitions:
],
"widgets": [
"trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
- {"autocomplete": {"type" : "journal", "field": "bibjson.publisher.name.exact"}},
+ {"autocomplete": {"type" : "journal", "field": "bibjson.publisher.name.exact"}}, # ~~^-> Autocomplete:FormWidget~~
"full_contents" # ~~^->FullContents:FormWidget~~
],
"help": {
@@ -486,16 +492,22 @@ class FieldDefinitions:
},
"admin": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
+ {"autocomplete": {"type": "journal", "field": "bibjson.publisher.name.exact"}}, # ~~^-> Autocomplete:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"associate_editor": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
+ {"autocomplete": {"type": "journal", "field": "bibjson.publisher.name.exact"}}, # ~~^-> Autocomplete:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"editor": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
+ {"autocomplete": {"type": "journal", "field": "bibjson.publisher.name.exact"}}, # ~~^-> Autocomplete:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
}
@@ -546,28 +558,37 @@ class FieldDefinitions:
"a society or other type of institution, enter that here."],
"placeholder": "Type or select the society or institution’s name"
},
- "contexts" : {
+ "widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
+ {"autocomplete": {"type" : "journal", "field": "bibjson.institution.name.exact"}}, # ~~^-> Autocomplete:FormWidget~~
+ "full_contents" # ~~^->FullContents:FormWidget~~
+ ],
+ "contexts": {
"admin": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
+ {"autocomplete": {"type": "journal", "field": "bibjson.institution.name.exact"}},
+ # ~~^-> Autocomplete:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"associate_editor": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
+ {"autocomplete": {"type": "journal", "field": "bibjson.institution.name.exact"}},
+ # ~~^-> Autocomplete:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
},
"editor": {
"widgets": [
+ "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
+ {"autocomplete": {"type": "journal", "field": "bibjson.institution.name.exact"}},
+ # ~~^-> Autocomplete:FormWidget~~
"click_to_copy", # ~~^-> ClickToCopy:FormWidget~~
]
}
- },
- "widgets": [
- "trim_whitespace", # ~~^-> TrimWhitespace:FormWidget~~
- {"autocomplete": {"type" : "journal", "field": "bibjson.institution.name.exact"}},
- "full_contents" # ~~^->FullContents:FormWidget~~
- ]
+ }
}
# ~~->$ InstitutionCountry:FormField~~
@@ -1639,7 +1660,7 @@ class FieldDefinitions:
"owner_exists"
],
"widgets": [
- {"autocomplete": {"type" : "account", "field": "id", "include" : False}},
+ {"autocomplete": {"type" : "account", "field": "id", "include" : False}}, # ~~^-> Autocomplete:FormWidget~~
"clickable_owner"
],
"contexts" : {
@@ -1697,7 +1718,7 @@ class FieldDefinitions:
"label": "Group",
"input": "text",
"widgets": [
- {"autocomplete": {"type" : "editor_group", "field": "name", "include" : False}}
+ {"autocomplete": {"type" : "editor_group", "field": "name", "include" : False}} # ~~^-> Autocomplete:FormWidget~~
],
"contexts" : {
"editor" : {
@@ -1705,7 +1726,7 @@ class FieldDefinitions:
},
"admin" : {
"widgets" : [
- {"autocomplete": {"type": "editor_group", "field": "name", "include" : False}},
+ {"autocomplete": {"type": "editor_group", "field": "name", "include" : False}}, # ~~^-> Autocomplete:FormWidget~~
{"load_editors" : {"field" : "editor"}}
]
}
diff --git a/portality/models/preservation.py b/portality/models/preservation.py
index c89c2f3f6f..cdaffb2708 100644
--- a/portality/models/preservation.py
+++ b/portality/models/preservation.py
@@ -99,6 +99,10 @@ def no_files_articles(self, articles_list):
if articles_list is not None and len(articles_list) > 0:
self.data["articles_info"]["no_files_articles"] = ", ".join(articles_list)
+ def uploaded_journals(self, uploaded_journals):
+ if uploaded_journals is not None and len(uploaded_journals) > 0:
+ self.data["articles_info"]["uploaded_journals"] = ", ".join(uploaded_journals)
+
@classmethod
def by_owner(cls, owner, size=10):
q = OwnerFileQuery(owner)
diff --git a/portality/scripts/230609_find_articles_with_invalid_issns.py b/portality/scripts/230609_find_articles_with_invalid_issns.py
index 8b857faa01..dd861ea04d 100644
--- a/portality/scripts/230609_find_articles_with_invalid_issns.py
+++ b/portality/scripts/230609_find_articles_with_invalid_issns.py
@@ -22,15 +22,26 @@
parser.add_argument("-o", "--out", help="output file path", required=True)
args = parser.parse_args()
- with open(args.out, "w", encoding="utf-8") as f:
- writer = csv.writer(f)
- writer.writerow(["ID", "PISSN", "EISSN", "Journals found with article's PISSN", "In doaj?", "Journals found with article's EISSN", "In doaj?", "Error"])
+
+ with open(args.out+"notfound.csv", "w", encoding="utf-8") as f_notfound, open(args.out+"-identical.csv", "w", encoding="utf-8") as f_identical, open(args.out+"-others.csv", "w", encoding="utf-8") as f_others:
+ writer_notfound = csv.writer(f_notfound)
+ writer_notfound.writerow(["ID", "PISSN", "EISSN", "Journals found with article's PISSN", "In doaj?",
+ "Journals found with article's EISSN", "In doaj?", "Error"])
+
+ writer_identical = csv.writer(f_identical)
+ writer_identical.writerow(["ID", "PISSN", "EISSN", "Journals found with article's PISSN", "In doaj?",
+ "Journals found with article's EISSN", "In doaj?", "Error"])
+
+ writer_others = csv.writer(f_others)
+ writer_others.writerow(["ID", "PISSN", "EISSN", "Journals found with article's PISSN", "In doaj?",
+ "Journals found with article's EISSN", "In doaj?", "Error"])
for a in models.Article.iterate(q=IN_DOAJ, page_size=100, keepalive='5m'):
article = models.Article(_source=a)
bibjson = article.bibjson()
try:
articlesvc.ArticleService._validate_issns(bibjson)
+ articlesvc.ArticleService.match_journal_with_validation(bibjson)
except exceptions.ArticleNotAcceptable as e:
id = article.id
pissn = bibjson.get_identifiers("pissn")
@@ -53,4 +64,9 @@
j_e_in_doaj.append(jobj.is_in_doaj())
else:
j_e_in_doaj.append("n/a")
- writer.writerow([id, pissn, eissn, j_p, j_p_in_doaj, j_e, j_e_in_doaj, str(e)])
+ if (str(e) == "The Print and Online ISSNs supplied are identical. If you supply 2 ISSNs they must be different."):
+ writer_identical.writerow([id, pissn, eissn, j_p, j_p_in_doaj, j_e, j_e_in_doaj, "Identical ISSNs"])
+ elif (str(e) == "ISSNs provided don't match any journal."):
+ writer_notfound.writerow([id, pissn, eissn, j_p, j_p_in_doaj, j_e, j_e_in_doaj, "No matching journal found."])
+ else:
+ writer_others.writerow([id, pissn, eissn, j_p, j_p_in_doaj, j_e, j_e_in_doaj, str(e)])
\ No newline at end of file
diff --git a/portality/settings.py b/portality/settings.py
index be121858a8..f9f6b46157 100644
--- a/portality/settings.py
+++ b/portality/settings.py
@@ -9,7 +9,7 @@
# Application Version information
# ~~->API:Feature~~
-DOAJ_VERSION = "6.4.3"
+DOAJ_VERSION = "6.4.6"
API_VERSION = "3.0.1"
######################################
@@ -423,11 +423,14 @@
# Crontab for never running a job - February 31st (use to disable tasks)
CRON_NEVER = {"month": "2", "day": "31", "day_of_week": "*", "hour": "*", "minute": "*"}
+# Additional Logging for scheduled JournalCSV
+EXTRA_JOURNALCSV_LOGGING = False
+
# Crontab schedules must be for unique times to avoid delays due to perceived race conditions
HUEY_SCHEDULE = {
"sitemap": {"month": "*", "day": "*", "day_of_week": "*", "hour": "8", "minute": "0"},
"reporting": {"month": "*", "day": "1", "day_of_week": "*", "hour": "0", "minute": "0"},
- "journal_csv": CRON_NEVER, # {"month": "*", "day": "*", "day_of_week": "*", "hour": "*", "minute": "20"},
+ "journal_csv": {"month": "*", "day": "*", "day_of_week": "*", "hour": "*", "minute": "20"},
"read_news": {"month": "*", "day": "*", "day_of_week": "*", "hour": "*", "minute": "30"},
"article_cleanup_sync": {"month": "*", "day": "2", "day_of_week": "*", "hour": "0", "minute": "0"},
"async_workflow_notifications": {"month": "*", "day": "*", "day_of_week": "1", "hour": "5", "minute": "0"},
diff --git a/portality/static/js/edges/public.journal.edge.js b/portality/static/js/edges/public.journal.edge.js
index 47d366d55d..f4494d8c4d 100644
--- a/portality/static/js/edges/public.journal.edge.js
+++ b/portality/static/js/edges/public.journal.edge.js
@@ -274,7 +274,7 @@ $.extend(true, doaj, {
value: false
})
],
- display : "Without article processing charges (APCs)"
+ display : "Without any fees"
}
],
fieldDisplays : {
diff --git a/portality/tasks/journal_csv.py b/portality/tasks/journal_csv.py
index 9b5b74269d..153a13735e 100644
--- a/portality/tasks/journal_csv.py
+++ b/portality/tasks/journal_csv.py
@@ -19,12 +19,18 @@ def run(self):
def logger(msg):
self.background_job.add_audit_message(msg)
+ _l = logger if app.config.get('EXTRA_JOURNALCSV_LOGGING', False) else None
+
job = self.background_job
journalService = DOAJ.journalService()
- url, action_register = journalService.csv(logger=logger)
- # for ar in action_register:
- # job.add_audit_message(ar)
+ url, action_register = journalService.csv(logger=_l)
+
+ # Log directly to the task if we don't have extra logging configured
+ if _l is None:
+ for ar in action_register:
+ job.add_audit_message(ar)
+
job.add_audit_message("CSV generated; will be served from {y}".format(y=url))
def cleanup(self):
diff --git a/portality/tasks/preservation.py b/portality/tasks/preservation.py
index 037ba4c1dd..4fc1cf09b9 100644
--- a/portality/tasks/preservation.py
+++ b/portality/tasks/preservation.py
@@ -5,7 +5,6 @@
import shutil
import tarfile
from copy import deepcopy
-from datetime import datetime
from zipfile import ZipFile
import requests
@@ -125,11 +124,15 @@ def __init__(self):
self.__unbagged_articles = []
self.__not_found_articles = []
self.__no_files_articles = []
+ self.__uploaded_journals = []
self.has_errors = False
def add_successful_article(self, article: ArticlePackage):
self.__successful_articles.append(os.path.basename(article.article_dir))
+ def add_uploaded_journal(self, journal_package):
+ self.__uploaded_journals.append(journal_package)
+
def add_unowned_articles(self, article: ArticlePackage):
self.has_errors = True
self.__unowned_articles.append(os.path.basename(article.article_dir))
@@ -167,6 +170,9 @@ def not_found_articles(self):
def no_files_articles(self):
return self.__no_files_articles
+ def uploaded_journals(self):
+ return self.__uploaded_journals
+
def get_count(self):
return len(self.__successful_articles) + \
len(self.__unowned_articles) + \
@@ -242,24 +248,50 @@ def run(self):
job.add_audit_message("Create Package structure")
articles_list = preserv.create_package_structure()
- self.save_articles_list(articles_list, preserve_model)
+
app.logger.debug("Created package structure")
if len(articles_list.successful_articles()) > 0:
- package = PreservationPackage(preserv.preservation_dir, job.user)
- job.add_audit_message("Create preservation package")
- tar_file = package.create_package()
- app.logger.debug(f"Created tar file {tar_file}")
+ # Each subdirectory is a jornal and the directory name is ISSN of the journal
+ # iterate through the directories and upload each journal as an individual package
+ dirs = [f.name for f in os.scandir(preserv.preservation_dir) if f.is_dir()]
+ upload_failed = False
+ for sub_dir in dirs:
+
+ package = PreservationPackage(preserv.preservation_dir, sub_dir, job.user)
+ job.add_audit_message("Create preservation package for " + sub_dir)
+ tar_file = package.create_package()
+
+ app.logger.debug(f"Created tar file {tar_file}")
+
+ job.add_audit_message("Create shasum for " + sub_dir)
+ sha256 = package.sha256(package.tar_file)
+
+ job.add_audit_message("Upload package " + sub_dir)
+ response = package.upload_package(sha256, package.tar_file)
+ app.logger.debug(f"Uploaded. Response{response.text}")
+
+ job.add_audit_message("Validate response")
+ self.validate_response(response, tar_file, sha256, preserve_model)
+
+ if preserve_model.status == 'failed':
+ upload_failed = True
+ break
+ else:
+ articles_list.add_uploaded_journal(package.tar_file_name)
- job.add_audit_message("Create shasum")
- sha256 = package.sha256()
+ # Upload the identifier file
+ job.add_audit_message("Create shasum for identifier")
+ sha256 = package.sha256(package.identifier_file)
- job.add_audit_message("Upload package")
- response = package.upload_package(sha256)
- app.logger.debug(f"Uploaded. Response{response.text}")
+ identifier_file_name = os.path.basename(package.identifier_file)
+ job.add_audit_message("Upload identifier file " + identifier_file_name)
+ package.upload_package(sha256, package.identifier_file)
+ articles_list.add_uploaded_journal(identifier_file_name)
+ app.logger.debug(f"Uploaded identifier file " + identifier_file_name)
- job.add_audit_message("Validate response")
- self.validate_response(response, tar_file, sha256, preserve_model)
+ if not upload_failed:
+ preserve_model.uploaded_to_ia()
# Check if the only few articles are successful
if articles_list.is_partial_success():
@@ -277,6 +309,8 @@ def run(self):
preserve_model.failed(FailedReasons.no_valid_article_available)
preserve_model.save()
+ self.save_articles_list(articles_list, preserve_model)
+
except (PreservationException, Exception) as exp:
# ~~-> PreservationException:Exception~~
preserve_model.failed(str(exp))
@@ -304,6 +338,8 @@ def save_articles_list(self, articles_list: ArticlesList, model: PreservationSta
model.unbagged_articles(articles_list.unbagged_articles())
if len(articles_list.no_files_articles()) > 0:
model.no_files_articles(articles_list.no_files_articles())
+ if len(articles_list.uploaded_journals()) > 0:
+ model.uploaded_journals(articles_list.uploaded_journals())
model.save()
def cleanup(self):
@@ -344,8 +380,7 @@ def validate_response(self, response, tar_file, sha256, model):
if res_filename and res_filename == tar_file:
if res_shasum and res_shasum == sha256:
- app.logger.info("successfully uploaded")
- model.uploaded_to_ia()
+ app.logger.info("successfully uploaded " + tar_file)
else:
model.failed(FailedReasons.checksum_doesnot_match)
else:
@@ -378,7 +413,7 @@ def validate_response(self, response, tar_file, sha256, model):
model.save()
else:
- app.logger.error(f"Upload failed {response.text}")
+ app.logger.error(f"Upload failed for {tar_file}. Reason - {response.text}")
model.failed(response.text)
model.save()
@@ -534,11 +569,13 @@ def create_package_structure(self) -> ArticlesList:
# Fetch identifiers at the root directory
if os.path.dirname(dir) == self.__local_dir:
- if Preservation.IDENTIFIERS_CSV in files:
- # Get articles info from csv file
- # ~~-> CSVReader:Feature~~
- csv_reader = CSVReader(os.path.join(dir, Preservation.IDENTIFIERS_CSV))
- self.__csv_articles_dict = csv_reader.articles_info()
+ for file in files:
+ if Preservation.IDENTIFIERS_CSV.lower() == file.lower():
+ # Get articles info from csv file
+ # ~~-> CSVReader:Feature~~
+ csv_reader = CSVReader(os.path.join(dir, file))
+ self.__csv_articles_dict = csv_reader.articles_info()
+ break
# process only the directories that has articles
else:
self.__process_article(dir, files, articles_list)
@@ -557,10 +594,12 @@ def __process_article(self, dir_path, files, articles_list):
return
# check if identifier file exist
- if Preservation.IDENTIFIER_FILE in files:
- with open(os.path.join(dir_path, Preservation.IDENTIFIER_FILE)) as file:
- identifiers = file.read().splitlines()
- elif self.__csv_articles_dict:
+ for file in files:
+ if Preservation.IDENTIFIER_FILE.lower() == file.lower():
+ with open(os.path.join(dir_path, file)) as identifier_file:
+ identifiers = identifier_file.read().splitlines()
+
+ if not identifiers and self.__csv_articles_dict:
if dir_name in self.__csv_articles_dict:
identifiers = self.__csv_articles_dict[dir_name]
@@ -570,10 +609,9 @@ def __process_article(self, dir_path, files, articles_list):
if article:
article_data = article.data
- if not self.owner_of_article(article):
- articles_list.add_unowned_articles(package)
+ is_owner = self.owner_of_article(article)
- else:
+ if isinstance(is_owner, bool) and is_owner == True:
issn, article_id, metadata_json = self.get_article_info(article_data)
try:
package = ArticlePackage(dir_path, files)
@@ -584,10 +622,17 @@ def __process_article(self, dir_path, files, articles_list):
package.create_article_bagit_structure()
+ # Create and update the identifier file for all articles in the journal
+ with open(os.path.join(self.__preservation_dir, issn + ".txt"), 'a') as identifier_file:
+ identifier_file.write(os.path.basename(dir_path) + "," + article_id + "," +
+ ','.join(identifiers) + "\n")
+
articles_list.add_successful_article(package)
except Exception:
articles_list.add_unbagged_articles(package)
app.logger.exception(f"Error while create article ( {article_id} ) package")
+ else:
+ articles_list.add_unowned_articles(package)
else:
# skip the article if not found
@@ -677,11 +722,20 @@ class PreservationPackage:
Creates preservation package and upload to Internet Server
"""
- def __init__(self, directory, owner):
- self.package_dir = directory
- self.tar_file = self.package_dir + ".tar.gz"
+ def __init__(self, preservation_dir, journal_dir, owner):
+ self.preservation_dir = preservation_dir
+ self.journal_dir = journal_dir
+ self.package_dir = os.path.join(self.preservation_dir, journal_dir)
+ self.created_time = dates.now_str("%Y-%m-%d-%H-%M-%S")
+ self.tar_file = self.package_dir + "_" + self.created_time + ".tar.gz"
self.tar_file_name = os.path.basename(self.tar_file)
self.__owner = owner
+ self.identifier_file = self.package_dir + "_" + self.created_time + ".txt"
+ try:
+ # Rename the identifier file to match the tar file
+ shutil.move(self.package_dir + ".txt", self.identifier_file)
+ except Exception as e:
+ app.logger.exception(e)
def create_package(self):
"""
@@ -697,7 +751,7 @@ def create_package(self):
return self.tar_file_name
- def upload_package(self, sha256sum):
+ def upload_package(self, sha256sum, file):
url = app.config.get("PRESERVATION_URL")
username = app.config.get("PRESERVATION_USERNAME")
@@ -707,7 +761,7 @@ def upload_package(self, sha256sum):
collection = params[0]
collection_id = params[1]
- file_name = os.path.basename(self.tar_file)
+ file_name = os.path.basename(file)
# payload for upload request
payload = {
@@ -727,7 +781,7 @@ def upload_package(self, sha256sum):
headers = {}
# get the file to upload
try:
- with open(self.tar_file, "rb") as f:
+ with open(file, "rb") as f:
files = {'file_field': (file_name, f)}
response = requests.post(url, headers=headers, auth=(username, password), files=files, data=payload)
except (IOError, Exception) as exp:
@@ -736,13 +790,13 @@ def upload_package(self, sha256sum):
return response
- def sha256(self):
+ def sha256(self, file):
"""
Creates sha256 hash for the tar file
"""
sha256_hash = hashlib.sha256()
- with open(self.tar_file, "rb") as f:
+ with open(file, "rb") as f:
# Read and update hash string value in blocks of 64K
for byte_block in iter(lambda: f.read(65536), b""):
sha256_hash.update(byte_block)
diff --git a/portality/templates/data/sponsors.html b/portality/templates/data/sponsors.html
index 5dfee23989..4cffdbf57b 100644
--- a/portality/templates/data/sponsors.html
+++ b/portality/templates/data/sponsors.html
@@ -1,35 +1,7 @@
-{% if data.sponsors.gold %}
- Premier contributors
-
- {% for i in data.sponsors.gold %}
-
- {% endfor %}
-
-{% endif %}
-
-
- Sustaining contributors
-
- {% for i in data.sponsors.silver %}
-
- {% endfor %}
-
-
-
-
- Basic contributors
- {% for i in data.sponsors.bronze %}
-