Skip to content

Commit

Permalink
Implement support for *Zenodo* API changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
KelSolaar committed Oct 20, 2023
1 parent 8f556ba commit 71055db
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 81 deletions.
3 changes: 1 addition & 2 deletions colour_datasets/records/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ def datasets() -> Community:
Examples
--------
# Doctests skip for Python 2.x compatibility.
>>> datasets()["3245883"].title # doctest: +SKIP
>>> datasets()["3245883"].title
'Camera Spectral Sensitivity Database - Jiang et al. (2013)'
"""

Expand Down
64 changes: 40 additions & 24 deletions colour_datasets/records/tests/test_zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ def test__str__(self):
Record ID : 3245883
Authors : Jiang, Jun; Liu, Dengyu; Gu, Jinwei; Süsstrunk, Sabine
License : CC-BY-NC-SA-4.0
License : cc-by-nc-sa-4.0
DOI : 10.5281/zenodo.3245883
Publication Date : 2019-06-14
URL : https://zenodo.org/record/3245883
URL : https://zenodo.org/records/3245883
Description
-----------
Expand All @@ -154,17 +154,18 @@ def test__str__(self):
Files
-----
- camlist&equipment.txt : https://zenodo.org/api/files/\
a9c418ed-c354-4a90-abc7-5f88c89de741/camlist%26equipment.txt
- camspec_database.txt : https://zenodo.org/api/files/\
a9c418ed-c354-4a90-abc7-5f88c89de741/camspec_database.txt
- urls.txt : https://zenodo.org/api/files/\
a9c418ed-c354-4a90-abc7-5f88c89de741/urls.txt"""
- camlist&equipment.txt : https://zenodo.org/api/records/3245883/files/\
camlist&equipment.txt
- camspec_database.txt : https://zenodo.org/api/records/3245883/files/\
camspec_database.txt
- urls.txt : https://zenodo.org/api/records/3245883/files/urls.txt"""
)[1:],
)

def test__repr__(self):
"""Test :func:`colour_datasets.records.zenodo.Record.__repr__` method."""
"""
Test :func:`colour_datasets.records.zenodo.Record.__repr__` method.
"""

self.assertIsInstance(
eval( # noqa: PGH001, S307
Expand Down Expand Up @@ -219,12 +220,9 @@ def setUp(self):
"""Initialise the common tests attributes."""

community_data = json_open(
"https://zenodo.org/api/communities/colour-science-datasets"
)
records_data = json_open(
"https://zenodo.org/api/records/?q=communities:"
"colour-science-datasets-tests"
"https://zenodo.org/api/communities/colour-science-datasets-tests"
)
records_data = json_open(community_data["links"]["records"])

self._data = {
"community": community_data,
Expand Down Expand Up @@ -275,7 +273,9 @@ def test_configuration(self):
self.assertEqual(self._community.configuration, self._configuration)

def test_data(self):
"""Test :func:colour_datasets.records.zenodo.Community.data` property."""
"""
Test :func:colour_datasets.records.zenodo.Community.data` property.
"""

self.assertEqual(self._community.data, self._data)

Expand All @@ -290,12 +290,16 @@ def test_repository(self):
)

def test_records(self):
"""Test :func:colour_datasets.records.zenodo.Community.records` property."""
"""
Test :func:colour_datasets.records.zenodo.Community.records` property.
"""

self.assertIn("3245883", list(self._community.records))

def test__init__(self):
"""Test :func:`colour_datasets.records.zenodo.Community.__init__` method."""
"""
Test :func:`colour_datasets.records.zenodo.Community.__init__` method.
"""

community = Community(self._data, self._configuration)

Expand All @@ -305,7 +309,9 @@ def test__init__(self):
)

def test__str__(self):
"""Test :func:`colour_datasets.records.zenodo.Community.__str__` method."""
"""
Test :func:`colour_datasets.records.zenodo.Community.__str__` method.
"""

self._community.remove()

Expand All @@ -318,7 +324,7 @@ def test__str__(self):
Datasets : 4
Synced : 0
URL : https://zenodo.org/communities/colour-science-datasets/
URL : https://zenodo.org/communities/colour-science-datasets-tests
Datasets
--------
Expand All @@ -332,7 +338,9 @@ def test__str__(self):
)

def test__repr__(self):
"""Test :func:`colour_datasets.records.zenodo.Community.__repr__` method."""
"""
Test :func:`colour_datasets.records.zenodo.Community.__repr__` method.
"""

self.assertIsInstance(
eval( # noqa: PGH001, S307
Expand Down Expand Up @@ -372,7 +380,9 @@ def test__len__(self):
self.assertEqual(len(self._community), len(self._community.records))

def test_from_id(self):
"""Test :func:`colour_datasets.records.zenodo.Community.from_id` method."""
"""
Test :func:`colour_datasets.records.zenodo.Community.from_id` method.
"""

community = Community.from_id("colour-science-datasets")

Expand All @@ -383,22 +393,28 @@ def test_from_id(self):
)

def test_synced(self):
"""Test :func:`colour_datasets.records.zenodo.Community.synced` method."""
"""
Test :func:`colour_datasets.records.zenodo.Community.synced` method.
"""

self._community.pull()
self.assertTrue(self._community.synced())
self._community.remove()
self.assertFalse(self._community.synced())

def test_pull(self):
"""Test :func:`colour_datasets.records.zenodo.Community.pull` method."""
"""
Test :func:`colour_datasets.records.zenodo.Community.pull` method.
"""

self._community.remove()
self._community.pull()
self.assertTrue(self._community.synced())

def test_remove(self):
"""Test :func:`colour_datasets.records.zenodo.Community.remove` method."""
"""
Test :func:`colour_datasets.records.zenodo.Community.remove` method.
"""

self._community.pull()
self._community.remove()
Expand Down
84 changes: 34 additions & 50 deletions colour_datasets/records/zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,9 @@ class Record:
Examples
--------
>>> record = Record(json_open("https://zenodo.org/api/records/3245883"))
# Doctests skip for Python 2.x compatibility.
>>> record.id # doctest: +SKIP
>>> record.id
'3245883'
>>> record.title # doctest: +SKIP
>>> record.title
'Camera Spectral Sensitivity Database - Jiang et al. (2013)'
"""

Expand Down Expand Up @@ -203,10 +201,11 @@ def strip_html(text: str) -> str:
description = "\n".join(
textwrap.wrap(strip_html(metadata["description"]), 79)
)

files = "\n".join(
[
f"- {file_data['key']} : {file_data['links']['self']}"
for file_data in sorted(files, key=lambda x: x["key"])
f"- {file_data['filename']} : {file_data['links']['self']}"
for file_data in sorted(files, key=lambda x: x["filename"])
]
)

Expand All @@ -220,7 +219,7 @@ def strip_html(text: str) -> str:
f'License : {metadata["license"]["id"]}',
f'DOI : {metadata["doi"]}',
f'Publication Date : {metadata["publication_date"]}',
f'URL : {self._data["links"]["html"]}\n',
f'URL : {self._data["links"]["self_html"]}\n',
"Description",
"-----------",
"",
Expand All @@ -247,10 +246,7 @@ def __repr__(self) -> str:
Examples
--------
>>> data = json_open("https://zenodo.org/api/records/3245883")
# Doctests skip for Python 2.x compatibility.
>>> print("\\n".join(repr(Record(data)).splitlines()[:4]))
... # doctest: +SKIP
Record(
{'conceptdoi': '10.5281/zenodo.3245882',
'conceptrecid': '3245882',
Expand Down Expand Up @@ -298,9 +294,7 @@ def from_id(
Examples
--------
# Doctests skip for Python 2.x compatibility.
>>> Record.from_id("3245883").title
... # doctest: +SKIP
'Camera Spectral Sensitivity Database - Jiang et al. (2013)'
"""

Expand Down Expand Up @@ -394,11 +388,11 @@ def pull(self, use_urls_txt_file: bool = True, retries: int = 3):
# given by the content of :attr:`URLS_TXT_FILE` attribute file.
urls_txt = None
for file_data in self.data["files"]:
if file_data["key"] == self._configuration.urls_txt_file:
if file_data["filename"] == self._configuration.urls_txt_file:
urls_txt = file_data
break

def urls_download(urls: Dict):
def urls_download(urls: Dict, is_content_url=False):
"""Download given urls."""

for url, md5 in urls.items():
Expand All @@ -408,14 +402,17 @@ def urls_download(urls: Dict):
url.split("/")[-1]
),
)
url = ( # noqa: PLW2901
f"{url}/content" if is_content_url else url
)
url_download(url, filename, md5.split(":")[-1], retries)

try:
if use_urls_txt_file and urls_txt:
urls = {}
urls_txt_file = tempfile.NamedTemporaryFile(delete=False).name
url_download(
urls_txt["links"]["self"],
urls_txt["links"]["download"],
urls_txt_file,
urls_txt["checksum"].split(":")[-1],
retries,
Expand Down Expand Up @@ -448,14 +445,19 @@ def urls_download(urls: Dict):

urls = {}
for file_data in self.data["files"]:
if file_data["key"] == self._configuration.urls_txt_file:
if file_data["filename"] == self._configuration.urls_txt_file:
continue

urls[file_data["links"]["self"]] = file_data["checksum"].split(
":"
)[-1]
# TODO: Remove the following space escaping: The new Zenodo API
# is not quoting filenames properly thus we are temporarily
# escaping spaces for now.
# https://github.com/colour-science/colour-datasets/issues/
# 36#issuecomment-1773464695
url = file_data["links"]["self"].replace(" ", "%20")

urls[url] = file_data["checksum"].split(":")[-1]

urls_download(urls)
urls_download(urls, is_content_url=True)

deflate_directory = os.path.join(
self.repository, self._configuration.deflate_directory
Expand Down Expand Up @@ -548,19 +550,14 @@ class Community(Mapping):
>>> community_data = json_open(
... "https://zenodo.org/api/communities/colour-science-datasets"
... )
>>> records_data = json_open(
... "https://zenodo.org/api/records/?q=communities:"
... "colour-science-datasets"
... )
>>> records_data = json_open(community_data["links"]["records"])
>>> community = Community(
... {
... "community": community_data,
... "records": records_data,
... }
... )
# Doctests skip for Python 2.x compatibility.
>>> community["3245883"].title # doctest: +SKIP
>>> community["3245883"].title
'Camera Spectral Sensitivity Database - Jiang et al. (2013)'
"""

Expand Down Expand Up @@ -649,7 +646,7 @@ def __str__(self) -> str:
Datasets : ...
Synced : ...
URL : https://zenodo.org/communities/\
colour-science-datasets-tests/
colour-science-datasets-tests
"""

datasets = "\n".join(
Expand All @@ -673,7 +670,7 @@ def __str__(self) -> str:
"",
f"Datasets : {len(self)}",
f"Synced : {synced}",
f'URL : {self._data["community"]["links"]["html"]}',
f'URL : {self._data["community"]["links"]["self_html"]}',
"",
"Datasets",
"--------",
Expand All @@ -696,14 +693,11 @@ def __repr__(self) -> str:
Examples
--------
>>> community = Community.from_id("colour-science-datasets-tests")
# Doctests skip for Python 2.x compatibility.
>>> print("\\n".join(repr(community).splitlines()[:4]))
... # doctest: +SKIP
Community(
{'community': {'created': '2019-06-09T10:45:47.999975+00:00',
'curation_policy': '',
'description': '',
{'community': {'access': {'member_policy': 'open',
'record_policy': 'open',
'review_policy': 'open',
"""

data = "\n".join(
Expand Down Expand Up @@ -737,9 +731,7 @@ def __getitem__(self, item: str | Any) -> Any:
Examples
--------
>>> community = Community.from_id("colour-science-datasets-tests")
# Doctests skip for Python 2.x compatibility.
>>> community["3245883"].title # doctest: +SKIP
>>> community["3245883"].title
'Camera Spectral Sensitivity Database - Jiang et al. (2013)'
"""

Expand All @@ -756,7 +748,6 @@ def __iter__(self) -> Generator:
Examples
--------
# Doctests skip for Python 2.x compatibility.
>>> for record in Community.from_id("colour-science-datasets-tests"):
... print(record) # doctest: +SKIP
...
Expand Down Expand Up @@ -811,9 +802,7 @@ def from_id(
Examples
--------
>>> community = Community.from_id("colour-science-datasets-tests")
# Doctests skip for Python 2.x compatibility.
>>> community["3245883"].title # doctest: +SKIP
>>> community["3245883"].title
'Camera Spectral Sensitivity Database - Jiang et al. (2013)'
"""

Expand All @@ -828,13 +817,6 @@ def from_id(
community_url = (
f"{configuration.api_url}/communities/{configuration.community}"
)
# NOTE: Retrieving 512 datasets at most. This should cover needs for
# the foreseeable future. There is likely an undocumented hard limit on
# "Zenodo" server side.
records_url = (
f"{configuration.api_url}/records/"
f"?q=communities:{configuration.community}&size=512"
)

community_json_filename = os.path.join(
configuration.repository,
Expand All @@ -846,7 +828,9 @@ def from_id(

try:
community_data = json_open(community_url, retries)
records_data = json_open(records_url, retries)
records_data = json_open(
community_data["links"]["records"], retries
)

for key, value in {
community_json_filename: community_data,
Expand Down
Loading

0 comments on commit 71055db

Please sign in to comment.