Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
utnapischtim committed Jul 26, 2024
1 parent 87d1a95 commit 211e8a2
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 107 deletions.
22 changes: 11 additions & 11 deletions invenio_workflows_tugraz/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

"""Configuration file."""

from .openaccess import pure_import_func, pure_sieve_func
from .openaccess import import_func, openaccess_filter
from .theses import (
create_func,
duplicate_func,
Expand All @@ -20,31 +20,31 @@
update_func,
)

WORKFLOW_ALMA_REPOSITORY_RECORDS_IMPORT_FUNC = import_from_alma_func
WORKFLOWS_ALMA_REPOSITORY_RECORDS_IMPORT_FUNC = import_from_alma_func
""""""

WORKFLOW_ALMA_REPOSITORY_RECORDS_UPDATE_AGGREGATOR = theses_update_aggregator
WORKFLOWS_ALMA_REPOSITORY_RECORDS_UPDATE_AGGREGATOR = theses_update_aggregator
""""""

WORKFLOW_ALMA_REPOSITORY_RECORDS_UPDATE_FUNC = update_func
WORKFLOWS_ALMA_REPOSITORY_RECORDS_UPDATE_FUNC = update_func
""""""

WORKFLOW_ALMA_ALMA_RECORDS_CREATE_FUNC = create_func
WORKFLOWS_ALMA_ALMA_RECORDS_CREATE_FUNC = create_func
""""""

WORKFLOW_ALMA_ALMA_RECORDS_CREATE_AGGREGATOR = theses_create_aggregator
WORKFLOWS_ALMA_ALMA_RECORDS_CREATE_AGGREGATOR = theses_create_aggregator
""""""

WORKFLOW_CAMPUSONLINE_THESES_FILTER = theses_filter()
WORKFLOWS_CAMPUSONLINE_THESES_FILTER = theses_filter()
""""""

WORKFLOW_CAMPUSONLINE_IMPORT_FUNC = import_from_cms_func
WORKFLOWS_CAMPUSONLINE_IMPORT_FUNC = import_from_cms_func
""""""

WORKFLOW_CAMPUSONLINE_DUPLICATE_FUNC = duplicate_func
WORKFLOWS_CAMPUSONLINE_DUPLICATE_FUNC = duplicate_func
""""""

WORKFLOW_PURE_IMPORT_FUNC = pure_import_func
WORKFLOWS_PURE_IMPORT_FUNC = import_func
"""See corresponding varaible in invenio-pure."""

WORKFLOW_PURE_SIEVE_FUNC = pure_sieve_func
WORKFLOWS_PURE_FILTER_RECORDS = openaccess_filter()
26 changes: 4 additions & 22 deletions invenio_workflows_tugraz/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,30 +33,12 @@ def init_config(self, app: Flask) -> None:
for k in dir(config):
attr = getattr(config, k)

if k == "WORKFLOW_ALMA_REPOSITORY_RECORDS_UPDATE_AGGREGATOR":
app.config["ALMA_REPOSITORY_RECORDS_UPDATE_AGGREGATOR"] = attr

elif k == "WORKFLOW_ALMA_REPOSITORY_RECORDS_UPDATE_FUNC":
app.config["ALMA_REPOSITORY_RECORDS_UPDATE_FUNC"] = attr

elif k == "WORKFLOW_ALMA_ALMA_RECORDS_CREATE_AGGREGATOR":
app.config["ALMA_ALMA_RECORDS_CREATE_AGGREGATOR"] = attr

elif k == "WORKFLOW_ALMA_ALMA_RECORDS_CREATE_FUNC":
app.config["ALMA_ALMA_RECORDS_CREATE_FUNC"] = attr

elif k == "WORKFLOW_CAMPUSONLINE_THESES_FILTER":
app.config["CAMPUSONLINE_THESES_FILTER"] = attr

elif k == "WORKFLOW_CAMPUSONLINE_IMPORT_FUNC":
app.config["CAMPUSONLINE_IMPORT_FUNC"] = attr

elif k == "WORKFLOW_CAMPUSONLINE_DUPLICATE_FUNC":
app.config["CAMPUSONLINE_DUPLICATE_FUNC"] = attr

elif k.startswith("WORKFLOWS_TUGRAZ_"):
if k.startswith("WORKFLOWS_TUGRAZ_"):
app.config.setdefault(k, attr)

elif k.startswith("WORKFLOWS_"):
app.config[k.replace("WORKFLOWS_", "")] = attr

def init_services(self, app: Flask) -> None:
"""Init services."""
theses_config = WorkflowThesesServiceConfig.build(app)
Expand Down
6 changes: 3 additions & 3 deletions invenio_workflows_tugraz/openaccess/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022-2023 Graz University of Technology.
# Copyright (C) 2022-2024 Graz University of Technology.
#
# invenio-workflows-tugraz is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""Open Access Workflow."""

from .workflow import pure_import_func, pure_sieve_func
from .workflow import import_func, openaccess_filter

__all__ = ("pure_import_func", "pure_sieve_func")
__all__ = ("import_func", "openaccess_filter")
9 changes: 9 additions & 0 deletions invenio_workflows_tugraz/openaccess/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022-2024 Graz University of Technology.
#
# invenio-workflows-tugraz is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""Open Access Workflows config."""
4 changes: 2 additions & 2 deletions invenio_workflows_tugraz/openaccess/types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022 Graz University of Technology.
# Copyright (C) 2022-2024 Graz University of Technology.
#
# invenio-workflows-tugraz is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -14,4 +14,4 @@
class PureId(Marc21Category):
"""Pure ID."""

category: str = "995.subfields.d.keyword"
category: str = "024.subfields.a.keyword"
19 changes: 3 additions & 16 deletions invenio_workflows_tugraz/openaccess/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022-2023 Graz University of Technology.
# Copyright (C) 2022-2024 Graz University of Technology.
#
# invenio-workflows-tugraz is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""Openaccess Workflow utils."""

from invenio_pure import URL, PureID, PureRecord, PureRuntimeError
from invenio_pure import URL, PureRuntimeError
from invenio_records_marc21 import check_about_duplicate

from .types import PureId
Expand Down Expand Up @@ -36,20 +36,7 @@ def license_type(electronic_version: dict) -> str:
return False


def workflow(electronic_version: dict) -> str:
"""Get workflow status."""
try:
return electronic_version["workflow"]["value"]["text"][0]["value"]
except (KeyError, TypeError):
return False


def extract_pure_id(pure_record: PureRecord) -> PureID:
"""Extract pure id."""
return pure_record["pureId"]


def extract_file_url(pure_record: PureRecord) -> URL:
def extract_file_urls(pure_record) -> URL:
"""Extract file url."""

def condition(item: dict) -> bool:
Expand Down
121 changes: 68 additions & 53 deletions invenio_workflows_tugraz/openaccess/workflow.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022 Graz University of Technology.
# Copyright (C) 2022-2024 Graz University of Technology.
#
# invenio-workflows-tugraz is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""Open Access Workflow."""
from collections.abc import Callable

from invenio_config_tugraz import get_identity_from_user_by_email
from invenio_pure import PureConfigs, PureRecord

from flask_principal import Identity
from invenio_pure import PureRuntimeError
from invenio_pure.records.models import PureRESTError
from invenio_pure.services import PureRESTService
from invenio_pure.types import PureID
from invenio_records_marc21 import (
DuplicateRecordError,
Marc21Metadata,
Expand All @@ -19,66 +22,78 @@
current_records_marc21,
)
from invenio_records_resources.services.records.results import RecordItem
from marshmallow.exceptions import ValidationError
from sqlalchemy.orm.exc import StaleDataError

from .convert import Pure2Marc21
from .types import PureId
from .utils import (
access_type,
extract_file_url,
extract_pure_id,
license_type,
workflow,
)
from .utils import extract_file_urls


def openaccess_filter() -> dict:
"""Openaccess filter."""
return {
"keywordUris": ["dk/atira/pure/researchoutput/keywords/export2repo/validated"]
}

def pure_import_func(
pure_record: PureRecord,
configs: PureConfigs,
download_file: Callable,

def import_func(
identity: Identity,
pure_id: PureID,
pure_service: PureRESTService,
) -> RecordItem:
"""Import record from pure into the repository."""
pure_id = extract_pure_id(pure_record)
file_urls = extract_file_url(pure_record)

file_paths = []
for i, file_url in enumerate(file_urls):
file_path = download_file(
f"{pure_id}-{i}",
file_url,
configs.pure_username,
configs.pure_password,
)
file_paths.append(file_path)
marc21_service = current_records_marc21.records_service

try:
check_about_duplicate(PureId(pure_id))
except DuplicateRecordError as error:
raise RuntimeError(str(error)) from error

try:
pure_record = pure_service.get_metadata(identity, pure_id)
file_urls = extract_file_urls(pure_record)
file_path = pure_service.download_file(identity, file_urls)
except (PureRESTError, PureRuntimeError) as error:
raise RuntimeError(str(error)) from error

marc21_record = Marc21Metadata()
convert = Pure2Marc21()
convert.convert(pure_record, marc21_record)
converter = Pure2Marc21(marc21_record)
converter.convert(pure_record, marc21_record)

identity = get_identity_from_user_by_email(email=configs.user_email)
service = current_records_marc21.records_service
data = marc21_record.json
data["access"] = {"record": "public", "files": "public"}

return create_record(service, data, file_paths, identity, do_publish=True)
data["access"] = {
"record": "public",
"files": "public",
}

try:
record = create_record(
marc21_service,
data,
[file_path],
identity,
do_publish=False,
)
marc21_service.validate_draft(
identity,
id_=record.id,
ignore_field_permissions=True,
)
except StaleDataError as error:
msg = f"ERROR: PureImport StaleDataError pure_id: {pure_id}"
raise RuntimeError(msg) from error
except ValidationError as error:
msg = f"ERROR: PureImport ValidationError pure_id: {pure_id}, error: {error}"
raise RuntimeError(msg) from error

def pure_sieve_func(pure_record: PureRecord) -> bool:
"""Check if the record fullfills the import criteria."""
try:
pure_id = extract_pure_id(pure_record)
check_about_duplicate(PureId(pure_id))
duplicate_sieve = True
except DuplicateRecordError:
return False

file_sieve = False
for electronic_version in pure_record["electronicVersions"]:
if (
"file" in electronic_version
and access_type(electronic_version) in ["Open", "Offen"]
and license_type(electronic_version).startswith("CC BY")
and workflow(pure_record["workflow"]) in ["Valid"]
):
file_sieve = True

return duplicate_sieve and file_sieve
pure_service.mark_as_exported(pure_id, pure_record)
except PureRESTError as error:
raise RuntimeError(str(error)) from error

# since the valid import has been checked directly after creating the draft
# the publish should work without errors.
marc21_service.publish(id_=record.id, identity=identity)

return record

0 comments on commit 211e8a2

Please sign in to comment.