diff --git a/inspire_schemas/utils.py b/inspire_schemas/utils.py index 20b9f7c1..c664e13a 100644 --- a/inspire_schemas/utils.py +++ b/inspire_schemas/utils.py @@ -36,7 +36,7 @@ import six from bleach.linkifier import LinkifyFilter from bleach.sanitizer import Cleaner -from idutils import is_orcid +from idutils import is_isni from inspire_utils.date import PartialDate from isbn import ISBN from jsonschema import Draft4Validator, RefResolver, draft4_format_checker @@ -122,7 +122,12 @@ 'econf', ] +ORCID_ISNI_RANGES = [ + (15000000, 35000000), + (900000000000, 900100000000), +] +ORCID_URLS = ["http://orcid.org/", "https://orcid.org/"] # list produced from https://arxiv.org/archive/ _NEW_CATEGORIES = { 'acc-phys': 'physics.acc-ph', @@ -437,6 +442,22 @@ def func_wrapper(self, *args, **kwargs): return func_wrapper +def is_orcid(val): + """Test if argument is an ORCID ID. + See http://support.orcid.org/knowledgebase/ + articles/116780-structure-of-the-orcid-identifier + """ + for orcid_url in ORCID_URLS: + if val.startswith(orcid_url): + val = val[len(orcid_url):] + break + val = val.replace("-", "").replace(" ", "") + if is_isni(val): + val = int(val[:-1], 10) # Remove check digit and convert to int. + return any(start <= val <= end for start, end in ORCID_ISNI_RANGES) + return False + + def author_id_normalize_and_schema(uid, schema=None): """Detect and normalize an author UID schema. diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 196c3075..8aa38c43 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -33,7 +33,7 @@ from inspire_schemas import errors, utils from inspire_utils.query import ordered -from inspire_schemas.utils import normalize_collaboration_name +from inspire_schemas.utils import is_orcid, normalize_collaboration_name def test_classify_field_returns_none_on_falsy_value(): @@ -1443,3 +1443,20 @@ def test_get_references_for_schema_returns_proper_schemas(): ) def test_normalize_collaboration_name(collaboration_name, expected_normalized_name): assert normalize_collaboration_name(collaboration_name) == expected_normalized_name + + +def test_is_orcid(): + orcids = [ + "http://orcid.org/0000-0002-1825-0097", + "0000-0002-1825-0097", + "http://orcid.org/0000-0002-1825-0097", + "0000-0002-1694-233X", + "http://orcid.org/0000-0002-1694-233X", + "0009-0005-6000-7479", + "http://orcid.org/0009-0005-6000-7479", + "https://orcid.org/0009-0002-4767-9017", + "0009-0002-4767-9017", + "http://orcid.org/0009-0002-4767-9017", + ] + for orcid in orcids: + assert is_orcid(orcid)