From 71055db5827df4e919e80ce517d7c60cc6b959d0 Mon Sep 17 00:00:00 2001 From: Thomas Mansencal Date: Sat, 21 Oct 2023 08:58:42 +1300 Subject: [PATCH] Implement support for *Zenodo* API changes. --- colour_datasets/records/__init__.py | 3 +- colour_datasets/records/tests/test_zenodo.py | 64 +++++++++------ colour_datasets/records/zenodo.py | 84 ++++++++------------ colour_datasets/utilities/common.py | 1 - colour_datasets/utilities/spreadsheet.py | 6 +- 5 files changed, 77 insertions(+), 81 deletions(-) diff --git a/colour_datasets/records/__init__.py b/colour_datasets/records/__init__.py index d321c97..5d2a283 100644 --- a/colour_datasets/records/__init__.py +++ b/colour_datasets/records/__init__.py @@ -33,8 +33,7 @@ def datasets() -> Community: Examples -------- - # Doctests skip for Python 2.x compatibility. - >>> datasets()["3245883"].title # doctest: +SKIP + >>> datasets()["3245883"].title 'Camera Spectral Sensitivity Database - Jiang et al. (2013)' """ diff --git a/colour_datasets/records/tests/test_zenodo.py b/colour_datasets/records/tests/test_zenodo.py index 1bf095f..5fca47a 100644 --- a/colour_datasets/records/tests/test_zenodo.py +++ b/colour_datasets/records/tests/test_zenodo.py @@ -129,10 +129,10 @@ def test__str__(self): Record ID : 3245883 Authors : Jiang, Jun; Liu, Dengyu; Gu, Jinwei; Süsstrunk, Sabine -License : CC-BY-NC-SA-4.0 +License : cc-by-nc-sa-4.0 DOI : 10.5281/zenodo.3245883 Publication Date : 2019-06-14 -URL : https://zenodo.org/record/3245883 +URL : https://zenodo.org/records/3245883 Description ----------- @@ -154,17 +154,18 @@ def test__str__(self): Files ----- -- camlist&equipment.txt : https://zenodo.org/api/files/\ -a9c418ed-c354-4a90-abc7-5f88c89de741/camlist%26equipment.txt -- camspec_database.txt : https://zenodo.org/api/files/\ -a9c418ed-c354-4a90-abc7-5f88c89de741/camspec_database.txt -- urls.txt : https://zenodo.org/api/files/\ -a9c418ed-c354-4a90-abc7-5f88c89de741/urls.txt""" +- camlist&equipment.txt : https://zenodo.org/api/records/3245883/files/\ +camlist&equipment.txt +- camspec_database.txt : https://zenodo.org/api/records/3245883/files/\ +camspec_database.txt +- urls.txt : https://zenodo.org/api/records/3245883/files/urls.txt""" )[1:], ) def test__repr__(self): - """Test :func:`colour_datasets.records.zenodo.Record.__repr__` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Record.__repr__` method. + """ self.assertIsInstance( eval( # noqa: PGH001, S307 @@ -219,12 +220,9 @@ def setUp(self): """Initialise the common tests attributes.""" community_data = json_open( - "https://zenodo.org/api/communities/colour-science-datasets" - ) - records_data = json_open( - "https://zenodo.org/api/records/?q=communities:" - "colour-science-datasets-tests" + "https://zenodo.org/api/communities/colour-science-datasets-tests" ) + records_data = json_open(community_data["links"]["records"]) self._data = { "community": community_data, @@ -275,7 +273,9 @@ def test_configuration(self): self.assertEqual(self._community.configuration, self._configuration) def test_data(self): - """Test :func:colour_datasets.records.zenodo.Community.data` property.""" + """ + Test :func:colour_datasets.records.zenodo.Community.data` property. + """ self.assertEqual(self._community.data, self._data) @@ -290,12 +290,16 @@ def test_repository(self): ) def test_records(self): - """Test :func:colour_datasets.records.zenodo.Community.records` property.""" + """ + Test :func:colour_datasets.records.zenodo.Community.records` property. + """ self.assertIn("3245883", list(self._community.records)) def test__init__(self): - """Test :func:`colour_datasets.records.zenodo.Community.__init__` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Community.__init__` method. + """ community = Community(self._data, self._configuration) @@ -305,7 +309,9 @@ def test__init__(self): ) def test__str__(self): - """Test :func:`colour_datasets.records.zenodo.Community.__str__` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Community.__str__` method. + """ self._community.remove() @@ -318,7 +324,7 @@ def test__str__(self): Datasets : 4 Synced : 0 -URL : https://zenodo.org/communities/colour-science-datasets/ +URL : https://zenodo.org/communities/colour-science-datasets-tests Datasets -------- @@ -332,7 +338,9 @@ def test__str__(self): ) def test__repr__(self): - """Test :func:`colour_datasets.records.zenodo.Community.__repr__` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Community.__repr__` method. + """ self.assertIsInstance( eval( # noqa: PGH001, S307 @@ -372,7 +380,9 @@ def test__len__(self): self.assertEqual(len(self._community), len(self._community.records)) def test_from_id(self): - """Test :func:`colour_datasets.records.zenodo.Community.from_id` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Community.from_id` method. + """ community = Community.from_id("colour-science-datasets") @@ -383,7 +393,9 @@ def test_from_id(self): ) def test_synced(self): - """Test :func:`colour_datasets.records.zenodo.Community.synced` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Community.synced` method. + """ self._community.pull() self.assertTrue(self._community.synced()) @@ -391,14 +403,18 @@ def test_synced(self): self.assertFalse(self._community.synced()) def test_pull(self): - """Test :func:`colour_datasets.records.zenodo.Community.pull` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Community.pull` method. + """ self._community.remove() self._community.pull() self.assertTrue(self._community.synced()) def test_remove(self): - """Test :func:`colour_datasets.records.zenodo.Community.remove` method.""" + """ + Test :func:`colour_datasets.records.zenodo.Community.remove` method. + """ self._community.pull() self._community.remove() diff --git a/colour_datasets/records/zenodo.py b/colour_datasets/records/zenodo.py index 0eaad9b..c462193 100644 --- a/colour_datasets/records/zenodo.py +++ b/colour_datasets/records/zenodo.py @@ -82,11 +82,9 @@ class Record: Examples -------- >>> record = Record(json_open("https://zenodo.org/api/records/3245883")) - - # Doctests skip for Python 2.x compatibility. - >>> record.id # doctest: +SKIP + >>> record.id '3245883' - >>> record.title # doctest: +SKIP + >>> record.title 'Camera Spectral Sensitivity Database - Jiang et al. (2013)' """ @@ -203,10 +201,11 @@ def strip_html(text: str) -> str: description = "\n".join( textwrap.wrap(strip_html(metadata["description"]), 79) ) + files = "\n".join( [ - f"- {file_data['key']} : {file_data['links']['self']}" - for file_data in sorted(files, key=lambda x: x["key"]) + f"- {file_data['filename']} : {file_data['links']['self']}" + for file_data in sorted(files, key=lambda x: x["filename"]) ] ) @@ -220,7 +219,7 @@ def strip_html(text: str) -> str: f'License : {metadata["license"]["id"]}', f'DOI : {metadata["doi"]}', f'Publication Date : {metadata["publication_date"]}', - f'URL : {self._data["links"]["html"]}\n', + f'URL : {self._data["links"]["self_html"]}\n', "Description", "-----------", "", @@ -247,10 +246,7 @@ def __repr__(self) -> str: Examples -------- >>> data = json_open("https://zenodo.org/api/records/3245883") - - # Doctests skip for Python 2.x compatibility. >>> print("\\n".join(repr(Record(data)).splitlines()[:4])) - ... # doctest: +SKIP Record( {'conceptdoi': '10.5281/zenodo.3245882', 'conceptrecid': '3245882', @@ -298,9 +294,7 @@ def from_id( Examples -------- - # Doctests skip for Python 2.x compatibility. >>> Record.from_id("3245883").title - ... # doctest: +SKIP 'Camera Spectral Sensitivity Database - Jiang et al. (2013)' """ @@ -394,11 +388,11 @@ def pull(self, use_urls_txt_file: bool = True, retries: int = 3): # given by the content of :attr:`URLS_TXT_FILE` attribute file. urls_txt = None for file_data in self.data["files"]: - if file_data["key"] == self._configuration.urls_txt_file: + if file_data["filename"] == self._configuration.urls_txt_file: urls_txt = file_data break - def urls_download(urls: Dict): + def urls_download(urls: Dict, is_content_url=False): """Download given urls.""" for url, md5 in urls.items(): @@ -408,6 +402,9 @@ def urls_download(urls: Dict): url.split("/")[-1] ), ) + url = ( # noqa: PLW2901 + f"{url}/content" if is_content_url else url + ) url_download(url, filename, md5.split(":")[-1], retries) try: @@ -415,7 +412,7 @@ def urls_download(urls: Dict): urls = {} urls_txt_file = tempfile.NamedTemporaryFile(delete=False).name url_download( - urls_txt["links"]["self"], + urls_txt["links"]["download"], urls_txt_file, urls_txt["checksum"].split(":")[-1], retries, @@ -448,14 +445,19 @@ def urls_download(urls: Dict): urls = {} for file_data in self.data["files"]: - if file_data["key"] == self._configuration.urls_txt_file: + if file_data["filename"] == self._configuration.urls_txt_file: continue - urls[file_data["links"]["self"]] = file_data["checksum"].split( - ":" - )[-1] + # TODO: Remove the following space escaping: The new Zenodo API + # is not quoting filenames properly thus we are temporarily + # escaping spaces for now. + # https://github.com/colour-science/colour-datasets/issues/ + # 36#issuecomment-1773464695 + url = file_data["links"]["self"].replace(" ", "%20") + + urls[url] = file_data["checksum"].split(":")[-1] - urls_download(urls) + urls_download(urls, is_content_url=True) deflate_directory = os.path.join( self.repository, self._configuration.deflate_directory @@ -548,19 +550,14 @@ class Community(Mapping): >>> community_data = json_open( ... "https://zenodo.org/api/communities/colour-science-datasets" ... ) - >>> records_data = json_open( - ... "https://zenodo.org/api/records/?q=communities:" - ... "colour-science-datasets" - ... ) + >>> records_data = json_open(community_data["links"]["records"]) >>> community = Community( ... { ... "community": community_data, ... "records": records_data, ... } ... ) - - # Doctests skip for Python 2.x compatibility. - >>> community["3245883"].title # doctest: +SKIP + >>> community["3245883"].title 'Camera Spectral Sensitivity Database - Jiang et al. (2013)' """ @@ -649,7 +646,7 @@ def __str__(self) -> str: Datasets : ... Synced : ... URL : https://zenodo.org/communities/\ -colour-science-datasets-tests/ +colour-science-datasets-tests """ datasets = "\n".join( @@ -673,7 +670,7 @@ def __str__(self) -> str: "", f"Datasets : {len(self)}", f"Synced : {synced}", - f'URL : {self._data["community"]["links"]["html"]}', + f'URL : {self._data["community"]["links"]["self_html"]}', "", "Datasets", "--------", @@ -696,14 +693,11 @@ def __repr__(self) -> str: Examples -------- >>> community = Community.from_id("colour-science-datasets-tests") - - # Doctests skip for Python 2.x compatibility. >>> print("\\n".join(repr(community).splitlines()[:4])) - ... # doctest: +SKIP Community( - {'community': {'created': '2019-06-09T10:45:47.999975+00:00', - 'curation_policy': '', - 'description': '', + {'community': {'access': {'member_policy': 'open', + 'record_policy': 'open', + 'review_policy': 'open', """ data = "\n".join( @@ -737,9 +731,7 @@ def __getitem__(self, item: str | Any) -> Any: Examples -------- >>> community = Community.from_id("colour-science-datasets-tests") - - # Doctests skip for Python 2.x compatibility. - >>> community["3245883"].title # doctest: +SKIP + >>> community["3245883"].title 'Camera Spectral Sensitivity Database - Jiang et al. (2013)' """ @@ -756,7 +748,6 @@ def __iter__(self) -> Generator: Examples -------- - # Doctests skip for Python 2.x compatibility. >>> for record in Community.from_id("colour-science-datasets-tests"): ... print(record) # doctest: +SKIP ... @@ -811,9 +802,7 @@ def from_id( Examples -------- >>> community = Community.from_id("colour-science-datasets-tests") - - # Doctests skip for Python 2.x compatibility. - >>> community["3245883"].title # doctest: +SKIP + >>> community["3245883"].title 'Camera Spectral Sensitivity Database - Jiang et al. (2013)' """ @@ -828,13 +817,6 @@ def from_id( community_url = ( f"{configuration.api_url}/communities/{configuration.community}" ) - # NOTE: Retrieving 512 datasets at most. This should cover needs for - # the foreseeable future. There is likely an undocumented hard limit on - # "Zenodo" server side. - records_url = ( - f"{configuration.api_url}/records/" - f"?q=communities:{configuration.community}&size=512" - ) community_json_filename = os.path.join( configuration.repository, @@ -846,7 +828,9 @@ def from_id( try: community_data = json_open(community_url, retries) - records_data = json_open(records_url, retries) + records_data = json_open( + community_data["links"]["records"], retries + ) for key, value in { community_json_filename: community_data, diff --git a/colour_datasets/utilities/common.py b/colour_datasets/utilities/common.py index d72ea11..df57b03 100644 --- a/colour_datasets/utilities/common.py +++ b/colour_datasets/utilities/common.py @@ -213,7 +213,6 @@ def json_open(url: str, retries: int = 3) -> Dict: Examples -------- - # Doctests skip for Python 2.x compatibility. >>> json_open("https://zenodo.org/api/records/3245883") ... # doctest: +SKIP '{"conceptdoi":"10.5281/zenodo.3245882"' diff --git a/colour_datasets/utilities/spreadsheet.py b/colour_datasets/utilities/spreadsheet.py index da7237a..1077eb5 100644 --- a/colour_datasets/utilities/spreadsheet.py +++ b/colour_datasets/utilities/spreadsheet.py @@ -142,8 +142,7 @@ def index_to_row(index: int) -> str: Examples -------- - # Doctests skip for Python 2.x compatibility. - >>> index_to_row(0) # doctest: +SKIP + >>> index_to_row(0) '1' """ @@ -189,8 +188,7 @@ def index_to_column(index: int) -> str: Examples -------- - # Doctests skip for Python 2.x compatibility. - >>> index_to_column(0) # doctest: +SKIP + >>> index_to_column(0) 'A' """