Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
utnapischtim committed Nov 26, 2024
1 parent 9a0ea66 commit 8100b11
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 60 deletions.
86 changes: 52 additions & 34 deletions invenio_workflows_tugraz/openaccess/convert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022-2023 Graz University of Technology.
# Copyright (C) 2022-2024 Graz University of Technology.
#
# invenio-workflows-tugraz is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand Down Expand Up @@ -43,7 +43,7 @@ def convert_attribute(
record: Marc21Metadata,
) -> None:
"""Traverse first level elements of dictionary and extract attributes."""
convert_function = getattr(self, f"convert_{attribute}", lambda _: None)
convert_function = getattr(self, f"convert_{attribute}", lambda _, __: None)
convert_function(value, record)


Expand All @@ -52,46 +52,65 @@ class Pure2Marc21(Converter):

def convert_abstract(self, value: dict, record: Marc21Metadata) -> None:
"""Add the abstract to the Marc21Metadata."""
for abstract in value["text"]:
record.emplace_field("520...", value=abstract["value"])
try:
abstract = value["de_DE"]
except KeyError:
try:
abstract = value["en_GB"]
except KeyError:
abstract = None

if abstract:
record.emplace_datafield("520...", value=abstract)

def convert_additionalLinks(self, value: list, record: Marc21Metadata) -> None:
"""Add the additionalLinks attribute to the Marc21Metadata."""
for link in value:
if "url" in link:
record.emplace_field("856.4.1.u", value=link["url"])
record.emplace_datafield("856.4.1.u", value=link["url"])

def convert_bibliographicalNote(self, value: dict, record: Marc21Metadata) -> None:
"""Add the bibliographicalNote attribute to the Marc21Metadata."""
for text in value["text"]:
record.emplace_field("500...", value=text["value"])
record.emplace_datafield("500...", value=text["value"])

def convert_edition(self, value: str, record: Marc21Metadata) -> None:
"""Add the edition attribute to the Marc21Metadata."""
record.emplace_field("250...", value=value)
record.emplace_datafield("250...", value=value)

def convert_electronicIsbns(self, value: list, record: Marc21Metadata) -> None:
"""Add the electronicIsbns attribute to the Marc21Metadata."""
record.emplace_field("020...", value=str(value[0]).strip())
record.emplace_datafield("020...", value=str(value[0]).strip())

def convert_event(self, value: dict, record: Marc21Metadata) -> None:
"""Add the event attribute to the Marc21Metadata."""
for event_name in value["name"]["text"]:
record.emplace_field("711.2..", value=event_name["value"])
record.emplace_datafield("711.2..", value=event_name["value"])

def convert_isbns(self, value: list, record: Marc21Metadata) -> None:
"""Add the isbns attribute to the Marc21Metadata."""
for isbn in value:
record.emplace_field("020...", value=isbn)
record.emplace_datafield("020...", value=isbn)

def convert_journalAssociation(self, value: dict, record: Marc21Metadata) -> None:
"""Add the journalAssociation attribute to the Marc21Metadata."""
value = value["title"]["value"]
record.emplace_field("773.0.8.t", value=value)
subfs = {}
try:
subfs["t"] = value["title"]["title"]
except KeyError:
pass

try:
subfs["x"] = value["issn"]["issn"]
except KeyError:
pass

if subfs:
record.emplace_datafield("773.0.8.", subfs=subfs)

def convert_journalNumber(self, value: str, record: Marc21Metadata) -> None:
"""Add the journalNumber attribute to the Marc21Metadata."""
record.emplace_field("773.0.8.g", value=value)
record.emplace_datafield("773.0.8.g", value=value)

def convert_keywordGroups(self, value: list, record: Marc21Metadata) -> None:
"""Add the keywordGroups attribute to the Marc21Metadata."""
Expand All @@ -101,10 +120,9 @@ def convert_keywordGroups(self, value: list, record: Marc21Metadata) -> None:

def convert_language(self, value: dict, record: Marc21Metadata) -> None:
"""Add the language attribute to the Marc21Metadata."""
for locale in value["term"]["text"]:
language = locale["value"]
language_iso6393 = self.languages[language]
record.emplace_field("041...", value=language_iso6393)
language = value["term"]["en_GB"]
language_iso6393 = self.languages[language]
record.emplace_datafield("041...", value=language_iso6393)

def convert_managingOrganisationalUnit(
self,
Expand All @@ -118,7 +136,7 @@ def convert_managingOrganisationalUnit(

def convert_numberOfPages(self, value: int, record: Marc21Metadata) -> None:
"""Add the numberOfPages attribute to the Marc21Metadata."""
record.emplace_field("300...", value=str(value))
record.emplace_datafield("300...", value=str(value))

def convert_organisationalUnits(self, value: list, record: Marc21Metadata) -> None:
"""Add the organisationalUnits attribute to the Marc21Metadata."""
Expand All @@ -130,11 +148,11 @@ def convert_organisationalUnits(self, value: list, record: Marc21Metadata) -> No
def convert_pages(self, value: str, record: Marc21Metadata) -> None:
"""Add the pages attriute to the Marc21Metadata."""
pages = value
record.emplace_field("300...", value=pages)
record.emplace_datafield("300...", value=pages)

def convert_patentNumber(self, value: str, record: Marc21Metadata) -> None:
"""Add the patentNumber attribute to the Marc21Metadata."""
record.emplace_field("013...", value=value)
record.emplace_datafield("013...", value=value)

def convert_peerReview(
self,
Expand All @@ -144,16 +162,16 @@ def convert_peerReview(
"""Add the peerReview attribute to the Marc21Metadata."""
if value:
status = "Refereed/Peer-reviewed"
record.emplace_field("500...", value=status)
record.emplace_datafield("500...", value=status)

def convert_placeOfPublication(self, value: str, record: Marc21Metadata) -> None:
"""Add the placeOfPublication attribute to the Marc21Metadata."""
record.emplace_field("264...", value=value)
record.emplace_datafield("264..1.", value=value)

def convert_publicationSeries(self, value: list, record: Marc21Metadata) -> None:
"""Add the publicationSeries attribute to the Marc21Metadata."""
for series in value:
record.emplace_field("490.0..", value=series["name"])
record.emplace_datafield("490.0..", value=series["name"])

def convert_publicationStatuses(self, value: list, record: Marc21Metadata) -> None:
"""Add the publicationStatuses attribute to the Marc21Metadata."""
Expand All @@ -164,26 +182,26 @@ def convert_publicationStatuses(self, value: list, record: Marc21Metadata) -> No
def convert_publisher(self, value: dict, record: Marc21Metadata) -> None:
"""Add the publisher attribute to the Marc21Metadata."""
for text in value["name"]["text"]:
record.emplace_field("264...b", value=text["value"])
record.emplace_datafield("264..1.b", value=text["value"])

def convert_relatedProjects(self, value: list, record: Marc21Metadata) -> None:
"""Add the relatedProjects attribute to the Marc21Metadata."""
for entry in value:
for locale in entry["name"]["text"]:
record.emplace_field("536...", value=locale["value"])
record.emplace_datafield("536...", value=locale["value"])

def convert_subTitle(self, value: dict, record: Marc21Metadata) -> None:
"""Add the subTitle attribute to the Marc21Metadata."""
record.emplace_field("245.1.0.b", value=value["value"])
record.emplace_datafield("245.1.0.b", value=value["value"])

def convert_title(self, value: dict, record: Marc21Metadata) -> None:
"""Add the title attribute to the Marc21Metadata."""
record.emplace_field("245.1.0.", value=value["value"])
record.emplace_datafield("245.1.0.", value=value["value"])

def convert_volume(self, value: str, record: Marc21Metadata) -> None:
"""Add the volume attribute to the Marc21Metadata."""
record.emplace_field("490.0..", value=value)
record.emplace_field("773.0.8.g", value=value)
record.emplace_datafield("490.0..", value=value)
record.emplace_datafield("773.0.8.g", value=value)


class KeywordGroup(Converter):
Expand All @@ -198,22 +216,22 @@ def convert_freeKeywords(self, value: list, record: Marc21Metadata) -> None:
"""Add free keywords."""
for free_keyword in value:
for word in free_keyword["freeKeywords"]:
record.emplace_field("650..4.g", value=word)
record.emplace_datafield("650..4.g", value=word)

def convert_structuredKeyword(self, value: dict, record: Marc21Metadata) -> None:
"""Add free keywords."""
for word in value["term"]["text"]:
record.emplace_field("650..4.", value=word["value"])
record.emplace_datafield("650..4.", value=word["value"])


class PublicationStatus(Converter):
"""Class to convert publication status."""

def convert_publicationDate(self, value: dict, record: Marc21Metadata) -> None:
"""Add the publication date to the Marc21Metadata."""
record.emplace_field("264...c", value=value["year"])
# without cast not serializable
record.emplace_datafield("264..1.c", value=str(value["year"]))

def convert_publicationStatus(self, value: dict, record: Marc21Metadata) -> None:
"""Add the publication status to the Marc21Metadata."""
for text in value["term"]["text"]:
record.emplace_field("250...", value=text["value"])
record.emplace_datafield("250...", value=value["term"]["de_DE"])
45 changes: 35 additions & 10 deletions invenio_workflows_tugraz/openaccess/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,37 +23,62 @@ def _(value: PureId) -> None:
def access_type(electronic_version: dict) -> str:
"""Get Access type."""
try:
return electronic_version["accessType"]["term"]["text"][0]["value"]
return electronic_version["accessType"]["uri"]
except (KeyError, TypeError):
return False


def license_type(electronic_version: dict) -> str:
"""Get license type."""
try:
return electronic_version["licenseType"]["term"]["text"][0]["value"]
return electronic_version["licenseType"]["uri"]
except (KeyError, TypeError):
return False


def extract_file_urls(pure_record) -> URL:
def extract_files(pure_record) -> URL:
"""Extract file url."""

def condition(item: dict) -> bool:
condition_1 = access_type(item) in ["Open", "Offen"]
condition_2 = license_type(item).startswith("CC BY")
condition_1 = (
access_type(item) == "/dk/atira/pure/core/openaccesspermission/open"
)
condition_2 = "cc_by" in license_type(item)
return condition_1 and condition_2

file_urls = []
files = []
for electronic_version in pure_record["electronicVersions"]:
try:
file_url = electronic_version["file"]["fileURL"]
if condition(electronic_version):
file_urls.append(file_url)
files.append(electronic_version["file"])
except (KeyError, TypeError):
continue

if len(file_urls) == 0:
if len(files) == 0:
raise PureRuntimeError(pure_record)

return file_urls
return files


def change_to_exported(pure_record: dict) -> dict:
"""Replace the keyword group."""

# pure_record["keywordGroups"]
# new_keyword_group = {
# "typeDiscriminator": "ClassificationsKeywordGroup",
# "pureId": 69857868,
# "logicalName": new_logical_name,
# "name": {
# "en_GB": "Export to Repository",
# "de_DE": "Export ins Repository"
# },
# "classifications": [
# {
# "uri": "dk/atira/pure/researchoutput/keywords/export2repo/validated",
# "term": {
# "en_GB": "Validated",
# "de_DE": "Validiert"
# }
# }
# ]
# }
18 changes: 13 additions & 5 deletions invenio_workflows_tugraz/openaccess/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from .convert import Pure2Marc21
from .types import PureId
from .utils import extract_file_urls
from .utils import change_to_exported, extract_files


def openaccess_filter() -> dict:
Expand All @@ -52,13 +52,15 @@ def import_func(

try:
pure_record = pure_service.get_metadata(identity, pure_id)
file_urls = extract_file_urls(pure_record)
file_path = pure_service.download_file(identity, file_urls)
files = extract_files(pure_record)
file_paths = []
for file_ in files:
file_paths.append(pure_service.download_file(identity, file_))
except (PureRESTError, PureRuntimeError) as error:
raise RuntimeError(str(error)) from error

marc21_record = Marc21Metadata()
converter = Pure2Marc21(marc21_record)
converter = Pure2Marc21()
converter.convert(pure_record, marc21_record)

data = marc21_record.json
Expand All @@ -71,10 +73,12 @@ def import_func(
record = create_record(
marc21_service,
data,
[file_path],
file_paths,
identity,
do_publish=False,
)
# validate the draft here so that the record could be marked as exported
# because the publish will go through without problems
marc21_service.validate_draft(
identity,
id_=record.id,
Expand All @@ -88,6 +92,10 @@ def import_func(
raise RuntimeError(msg) from error

try:
print(f"import_func pure_record: {pure_record}")

change_to_exported(pure_record)

pure_service.mark_as_exported(pure_id, pure_record)
except PureRESTError as error:
raise RuntimeError(str(error)) from error
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ flask.commands =
workflows = invenio_workflows_tugraz.cli:workflows
invenio_base.apps =
invenio_workflows_tugraz = invenio_workflows_tugraz:InvenioWorkflowsTugraz
invenio_base.api_apps =
invenio_workflows_tugraz = invenio_workflows_tugraz:InvenioWorkflowsTugraz
invenio_base.blueprints =
invenio_workflows_tugraz_theses = invenio_workflows_tugraz.theses:create_blueprint
invenio_celery.tasks =
Expand Down
27 changes: 17 additions & 10 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import pytest
from flask import Flask
from invenio_app.factory import create_api
from invenio_db import InvenioDB
from invenio_records_marc21 import InvenioRecordsMARC21
from invenio_search import InvenioSearch
Expand All @@ -26,19 +27,25 @@


@pytest.fixture(scope="module")
def create_app(instance_path: str) -> Callable:
def create_app(instance_path, entry_points):
"""Application factory fixture."""
return create_api

def factory(**config: dict) -> Flask:
app = Flask("testapp", instance_path=instance_path)
app.config.update(**config)
InvenioWorkflowsTugraz(app)
InvenioSearch(app)
InvenioDB(app)
InvenioRecordsMARC21(app)
return app

return factory
# @pytest.fixture(scope="module")
# def create_app(instance_path: str) -> Callable:
# """Application factory fixture."""

# def factory(**config: dict) -> Flask:
# app = Flask("testapp", instance_path=instance_path)
# app.config.update(**config)
# InvenioWorkflowsTugraz(app)
# InvenioSearch(app)
# InvenioDB(app)
# InvenioRecordsMARC21(app)
# return app

# return factory


@pytest.fixture()
Expand Down
2 changes: 1 addition & 1 deletion tests/theses/test_theses.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


def test_update_func(
running_app: Flask, # noqa: ARG001
app: Flask, # noqa: ARG001
embargoed_record_xml: str,
) -> None:
"""Test update func."""
Expand Down

0 comments on commit 8100b11

Please sign in to comment.