diff --git a/doajtest/helpers.py b/doajtest/helpers.py index fe2f585fa2..15fe6524f1 100644 --- a/doajtest/helpers.py +++ b/doajtest/helpers.py @@ -149,6 +149,7 @@ class DoajTestCase(TestCase): @classmethod def create_app_patch(cls): return { + 'AUTOCHECK_INCOMING': False, # old test cases design and depend on work flow of autocheck disabled "STORE_IMPL": "portality.store.StoreLocal", "STORE_LOCAL_DIR": paths.rel2abs(__file__, "..", "tmp", "store", "main", cls.__name__.lower()), "STORE_TMP_DIR": paths.rel2abs(__file__, "..", "tmp", "store", "tmp", cls.__name__.lower()), diff --git a/doajtest/testbook/public_site/public_search.yml b/doajtest/testbook/public_site/public_search.yml index 6ff91b38db..166b02c255 100644 --- a/doajtest/testbook/public_site/public_search.yml +++ b/doajtest/testbook/public_site/public_search.yml @@ -186,4 +186,13 @@ tests: - step: click spacebar to check the filter results: - filter is applied - +- title: Export article in RIS format + context: + role: anonymous + steps: + - step: Go to the DOAJ search page at /search/articles + results: + - Only articles are shown in the results + - step: Click on 'Export RIS' of any article + results: + - A RIS file is downloaded diff --git a/doajtest/unit/test_crosswalks_article_ris.py b/doajtest/unit/test_crosswalks_article_ris.py new file mode 100644 index 0000000000..760d819e1c --- /dev/null +++ b/doajtest/unit/test_crosswalks_article_ris.py @@ -0,0 +1,43 @@ +import unittest + +from doajtest.fixtures import ArticleFixtureFactory +from portality import models +from portality.crosswalks.article_ris import ArticleRisXWalk + + +class TestArticleRisXWalk(unittest.TestCase): + def test_article2ris(self): + article = ArticleFixtureFactory.make_article_source() + article = models.Article(**article) + article.bibjson().abstract = "abstract" + ris = ArticleRisXWalk.article2ris(article) + assert ris.type == 'JOUR' + assert ris['T1'] == [article.data['bibjson']['title']] + assert ris.to_text().split() == """ +TY - JOUR +T1 - Article Title +AU - The Author +PY - 1991 +JF - The Title +PB - The Publisher +VL - 1 +IS - 99 +SP - 3 +EP - 21 +UR - http://www.example.com/article +AB - abstract +KW - word +KW - key +DO - 10.0000/SOME.IDENTIFIER +LA - EN +LA - FR +ER - + """.split() + + def test_article2ris__only_title(self): + ris = ArticleRisXWalk.article2ris({"bibjson": {"title": "Article Title"}}) + assert ris.to_text().split() == """ +TY - JOUR +T1 - Article Title +ER - + """.split() diff --git a/doajtest/unit/test_ris.py b/doajtest/unit/test_ris.py new file mode 100644 index 0000000000..346181c794 --- /dev/null +++ b/doajtest/unit/test_ris.py @@ -0,0 +1,66 @@ +from unittest import TestCase + +from portality.lib.ris import RisEntry + + +class TestRisEntry(TestCase): + + def test_get_set_item(self): + test_value = 'value_a' + entry = RisEntry() + entry['A1'] = test_value + assert entry['A1'] == [test_value] + + def test_append(self): + entry = RisEntry() + entry.append('A1', '1') + entry['A1'].append('2') + assert entry['A1'] == ['1', '2'] + + entry['A1'] = '9' + assert entry['A1'] == ['9'] + + def test_getitem__valid_undefined(self): + entry = RisEntry() + assert entry['A1'] == [] + + def test_setitem__raise_field_not_found(self): + entry = RisEntry() + with self.assertRaises(ValueError): + entry['qoidjqowijdkncoiqw'] = 'value_a' + + def test_getitem__raise_field_not_found(self): + entry = RisEntry() + with self.assertRaises(ValueError): + print(entry['qoidjqowijdkncoiqw']) + + def test_to_text(self): + entry = RisEntry() + entry['A1'] = 'value_a' + entry['A2'] = 'value_b' + entry['TY'] = 'JOUR' + + expected = """ +TY - JOUR +A1 - value_a +A2 - value_b +ER - + """.strip() + ' \n' + + assert entry.to_text() == expected + + def test_from_text(self): + expected = """ + TY - JOUR + A1 - value_a + A2 - value_b + ER - + """.strip() + ' \n' + + entry = RisEntry.from_text(expected) + assert entry.type == 'JOUR' + assert dict(entry.data) == { + 'TY': ['JOUR'], + 'A1': ['value_a'], + 'A2': ['value_b'], + } diff --git a/doajtest/unit/test_view_doajservices.py b/doajtest/unit/test_view_doajservices.py new file mode 100644 index 0000000000..30e41ba17c --- /dev/null +++ b/doajtest/unit/test_view_doajservices.py @@ -0,0 +1,28 @@ +from doajtest.fixtures import ArticleFixtureFactory +from doajtest.helpers import DoajTestCase +from portality.crosswalks.article_ris import ArticleRisXWalk +from portality.models import Article +from portality.util import url_for + + +class TestDoajservices(DoajTestCase): + + def test_export_article_ris(self): + article = Article(**ArticleFixtureFactory.make_article_source()) + article.save(blocking=True) + Article.refresh() + + ris = ArticleRisXWalk.article2ris(article).to_text() + + with self.app_test.test_client() as t_client: + url = url_for('doajservices.export_article_ris', article_id=article.id, fmt='ris') + response = t_client.get(url) + assert response.status_code == 200 + assert response.get_data(as_text=True) == ris + + def test_export_article_ris__not_found(self): + with self.app_test.test_client() as t_client: + url = url_for('doajservices.export_article_ris', + article_id='article_id_that_does_not_exist', fmt='ris') + response = t_client.get(url) + assert response.status_code == 404 diff --git a/portality/crosswalks/article_ris.py b/portality/crosswalks/article_ris.py new file mode 100644 index 0000000000..82d4b04d26 --- /dev/null +++ b/portality/crosswalks/article_ris.py @@ -0,0 +1,50 @@ +from typing import Union + +from portality import models +from portality.lib import jsonpath_utils +from portality.lib.ris import RisEntry + + +def extra_author_names(article) -> list: + query = '$.bibjson.author[*].name' + values = jsonpath_utils.find_values(query, article) + return sorted(set(values)) + + +RIS_ARTICLE_MAPPING = { + 'T1': '$.bibjson.title', + 'AU': extra_author_names, + 'PY': '$.bibjson.year', + 'JF': '$.bibjson.journal.title', + 'PB': '$.bibjson.journal.publisher', + 'VL': '$.bibjson.journal.volume', + 'IS': '$.bibjson.journal.number', + 'SP': '$.bibjson.start_page', + 'EP': '$.bibjson.end_page', + 'UR': '$.bibjson.link[*].url', + 'AB': '$.bibjson.abstract', + 'KW': '$.bibjson.keywords[*]', + 'DO': '$.bibjson.identifier[?(@.type == "doi")].id', + 'SN': '$.bibjson.journal.issns[*]', + 'LA': '$.bibjson.journal.language[*]', +} + + +class ArticleRisXWalk: + + @classmethod + def article2ris(cls, article: Union[models.Article, dict]) -> RisEntry: + if isinstance(article, models.Article): + article = article.data + + entry = RisEntry(type_of_reference='JOUR') + for tag, query in RIS_ARTICLE_MAPPING.items(): + if callable(query): + values = query(article) + else: + values = jsonpath_utils.find_values(query, article) + + for v in values: + entry[tag].append(v) + + return entry diff --git a/portality/lib/jsonpath_utils.py b/portality/lib/jsonpath_utils.py new file mode 100644 index 0000000000..7201c9bfe8 --- /dev/null +++ b/portality/lib/jsonpath_utils.py @@ -0,0 +1,7 @@ +from typing import Iterable + +import jsonpath_ng.ext + + +def find_values(query: str, data: dict) -> Iterable: + return (m.value for m in jsonpath_ng.ext.parse(query).find(data)) diff --git a/portality/lib/ris.py b/portality/lib/ris.py new file mode 100644 index 0000000000..36bb83ab99 --- /dev/null +++ b/portality/lib/ris.py @@ -0,0 +1,225 @@ +""" +very simple library for RIS format + +file format references: https://en.wikipedia.org/wiki/RIS_(file_format) +""" +import collections +import logging +from collections import OrderedDict +from typing import Dict, Optional + +log = logging.getLogger(__name__) + +RTAG_TYPE = 'TY' +RTAG_END = 'ER' +RIS_TAGS = [ + 'A1', # primary_author + 'A2', # secondary_author + 'A3', # tertiary_author + 'A4', # quaternary_author + 'A5', # quinary_author_compiler + 'A6', # website_editor + 'AB', # abstract_synopsis + 'AD', # author_editor_address + 'AN', # accession_number + 'AU', # author_editor_translator + 'AV', # availability_location + 'BT', # primary_secondary_title + 'C1', # custom1 + 'C2', # custom2 + 'C3', # custom3 + 'C4', # custom4 + 'C5', # custom5 + 'C6', # custom6 + 'C7', # custom7 + 'C8', # custom8 + 'CA', # caption + 'CL', # classification + 'CN', # call_number + 'CP', # city_place_publication + 'CR', # cited_references + 'CT', # caption_primary_title + 'CY', # place_published + 'DA', # date + 'DB', # name_of_database + 'DI', # digital_object_identifier + 'DO', # digital_object_identifier2 + 'DOI', # digital_object_identifier3 + 'DP', # database_provider + 'DS', # data_source + 'ED', # secondary_author + 'EP', # end_page + 'ET', # edition + 'FD', # free_form_publication_data + 'H1', # location_library + 'H2', # location_call_number + 'ID', # reference_identifier + 'IP', # identifying_phrase + 'IS', # number_volumes + 'J1', # journal_abbreviation_1 + 'J2', # alternate_title + 'JA', # journal_standard_abbreviation + 'JF', # journal_full_name + 'JO', # journal_abbreviation + 'K1', # keyword1 + 'KW', # keyword_phrase + 'L1', # file_attachments + 'L2', # url_link + 'L3', # doi_link + 'L4', # figure_image_link + 'LA', # language + 'LB', # label + 'LK', # links + 'LL', # sponsoring_library_location + 'M1', # miscellaneous1 + 'M2', # miscellaneous2 + 'M3', # type_of_work + 'N1', # notes1 + 'N2', # abstract_notes + 'NO', # notes + 'NV', # number_of_volumes + 'OL', # output_language + 'OP', # original_publication + 'PA', # personal_notes + 'PB', # publisher + 'PMCID', # pmcid + 'PMID', # pmid + 'PP', # place_of_publication + 'PY', # publication_year + 'RD', # retrieved_date + 'RI', # reviewed_item + 'RN', # research_notes + 'RP', # reprint_status + 'RT', # reference_type + 'SE', # section + 'SF', # subfile_database + 'SL', # sponsoring_library + 'SN', # issn_isbn + 'SP', # start_pages + 'SR', # source_type + 'ST', # short_title + 'SV', # series_volume + 'T1', # primary_title + 'T2', # secondary_title + 'T3', # tertiary_title + 'TA', # translated_author + 'TI', # title + 'TT', # translated_title + RTAG_TYPE, # 'type_of_reference' + 'U1', # user_definable1 + 'U2', # user_definable2 + 'U3', # user_definable3 + 'U4', # user_definable4 + 'U5', # user_definable5 + 'U6', # user_definable6 + 'U7', # user_definable7 + 'U8', # user_definable8 + 'U9', # user_definable9 + 'U10', # user_definable10 + 'U11', # user_definable11 + 'U12', # user_definable12 + 'U13', # user_definable13 + 'U14', # user_definable14 + 'U15', # user_definable15 + 'UR', # web_url + 'VL', # volume + 'VO', # volume_published_standard + 'WP', # date_of_electronic_publication + 'WT', # website_title + 'WV', # website_version + 'Y1', # year_date + 'Y2', # access_date_secondary_date + 'YR', # publication_year_ref +] + + +def find_tag(field_name) -> Optional[str]: + field_name = field_name.upper() + if field_name in RIS_TAGS: + return field_name + raise ValueError(f'Field not found: {field_name}') + + +class RisEntry: + + def __init__(self, type_of_reference: str = None): + self.data: collections.defaultdict[str, list] = collections.defaultdict(list) + if type_of_reference: + self.type = type_of_reference + + def __setitem__(self, field_name, value): + tag = find_tag(field_name) + self.data[tag] = [value] + + def append(self, tag, value) -> list: + tag = find_tag(tag) + self[tag].append(value) + return self[tag] + + def __getitem__(self, field_name) -> list: + tag = find_tag(field_name) + return self.data[tag] + + @property + def type(self): + return self[RTAG_TYPE] and self[RTAG_TYPE][0] + + @type.setter + def type(self, value): + self[RTAG_TYPE] = value + + @classmethod + def from_dict(cls, d: dict): + instance = cls() + for k, v in d.items(): + if isinstance(v, list): + for vv in v: + instance[k].append(vv) + else: + instance[k].append(v) + + return instance + + @classmethod + def from_text(cls, text: str): + def _to_tag_value(line: str): + tag, value = line.split('-', 1) + tag = tag.strip() + value = value.lstrip() + value = value.replace('\\n', '\n') + return tag, value + + text = text.strip() + lines = text.splitlines() + entry = RisEntry() + for line in lines: + tag, val = _to_tag_value(line) + if tag == RTAG_END: + break + entry[tag].append(val) + return entry + + def to_text(self) -> str: + tags = list(self.data.keys()) + if RTAG_TYPE in tags: + tags.remove(RTAG_TYPE) + tags.insert(0, RTAG_TYPE) + + if RTAG_END in tags: + tags.remove(RTAG_END) + + def _to_line(tag, value): + if '\n' in value: + value = value.replace('\n', '\\n') + if value is None: + value = '' + return f'{tag} - {value}\n' + + text = '' + for tag in tags: + values = self.data[tag] + for v in values: + text += _to_line(tag, v) + + text += _to_line(RTAG_END, '') + return text diff --git a/portality/static/doaj/images/feather-icons/download.svg b/portality/static/doaj/images/feather-icons/download.svg new file mode 100644 index 0000000000..76767a9246 --- /dev/null +++ b/portality/static/doaj/images/feather-icons/download.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/portality/static/js/doaj.fieldrender.edges.js b/portality/static/js/doaj.fieldrender.edges.js index dc3138e4f8..7eaa93ebff 100644 --- a/portality/static/js/doaj.fieldrender.edges.js +++ b/portality/static/js/doaj.fieldrender.edges.js @@ -2872,6 +2872,8 @@ $.extend(true, doaj, { published = 'Published ' + name; } + const export_url = this.doaj_url + '/service/export/article/' + resultobj.id + '/ris'; + var frag = '
  • \
    \
  • \ +
  • \ + \ + Export Citation (RIS) ' + if (this.widget){ + frag += 'external-link icon' + } else { + frag += '' + } + frag += '\ +
  • \
  • \ About the journal\
  • \ diff --git a/portality/static/vendor/edges b/portality/static/vendor/edges index 990f422016..9639b871ac 160000 --- a/portality/static/vendor/edges +++ b/portality/static/vendor/edges @@ -1 +1 @@ -Subproject commit 990f4220163a3e18880f0bdc3ad5c80d234d22dd +Subproject commit 9639b871acb1f6590ee78e236f2c9333479c9fe8 diff --git a/portality/view/doajservices.py b/portality/view/doajservices.py index f83a727b6b..4ee10eff11 100644 --- a/portality/view/doajservices.py +++ b/portality/view/doajservices.py @@ -1,13 +1,14 @@ -import json, urllib.request, urllib.parse, urllib.error, requests +import json +from io import BytesIO -from flask import Blueprint, make_response, request, abort, render_template +from flask import Blueprint, make_response, abort, render_template, send_file from flask_login import current_user, login_required -from portality.core import app -from portality.decorators import ssl_required, write_required, restrict_to_role -from portality.util import jsonp from portality import lock, models from portality.bll import DOAJ +from portality.crosswalks.article_ris import ArticleRisXWalk +from portality.decorators import ssl_required, write_required +from portality.util import jsonp from portality.ui import templates blueprint = Blueprint('doajservices', __name__) @@ -41,7 +42,7 @@ def unlock(object_type, object_id): abort(400) # otherwise, return success - resp = make_response(json.dumps({"result" : "success"})) + resp = make_response(json.dumps({"result": "success"})) resp.mimetype = "application/json" return resp @@ -111,7 +112,8 @@ def group_status(group_id): :param group_id: :return: """ - if (not (current_user.has_role("editor") and models.EditorGroup.pull(group_id).editor == current_user.id)) and (not current_user.has_role("admin")): + if (not (current_user.has_role("editor") and models.EditorGroup.pull(group_id).editor == current_user.id)) and ( + not current_user.has_role("admin")): abort(404) svc = DOAJ.todoService() stats = svc.group_stats(group_id) @@ -130,6 +132,7 @@ def dismiss_autocheck(autocheck_set_id, autocheck_id): abort(404) return make_response(json.dumps({"status": "success"})) + @blueprint.route("/autocheck/undismiss//", methods=["GET", "POST"]) @jsonp @login_required @@ -142,3 +145,23 @@ def undismiss_autocheck(autocheck_set_id, autocheck_id): abort(404) return make_response(json.dumps({"status": "success"})) + +@blueprint.route('/export/article//') +def export_article_ris(article_id, fmt): + article = models.Article.pull(article_id) + if not article: + abort(404) + + if fmt != 'ris': + # only support ris for now + abort(404) + + byte_stream = BytesIO() + ris = ArticleRisXWalk.article2ris(article) + byte_stream.write(ris.to_text().encode('utf-8', errors='ignore')) + byte_stream.seek(0) + + filename = f'article-{article_id[:10]}.ris' + + resp = make_response(send_file(byte_stream, as_attachment=True, attachment_filename=filename)) + return resp diff --git a/setup.py b/setup.py index 8fbc87f66a..ddc27e9721 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ "feedparser==6.0.8", "itsdangerous==2.0.1", # fixme: unpinned dependency of flask, 2.1.0 is causing an import error 'json' "jinja2<3.1.0", # fixme: unpinned dependency of flask, import error on 'escape' + "jsonpath-ng~=1.6", "Flask~=2.1.2", "Flask-Cors==3.0.8", "Flask-DebugToolbar==0.13.1", @@ -63,6 +64,7 @@ 'pandas~=2.0.1', # pandas lets us generate URLs for linkcheck 'gspread-dataframe~=3.3.1', 'gspread-formatting~=1.1.2', + ] + (["setproctitle==1.1.10"] if "linux" in sys.platform else []), extras_require={ # prevent backtracking through all versions