diff --git a/betty/app/__init__.py b/betty/app/__init__.py index 0f2bd1453..57b394e8e 100644 --- a/betty/app/__init__.py +++ b/betty/app/__init__.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Mapping, Self, final import aiohttp +from aiohttp import ClientTimeout from reactives.instance import ReactiveInstance from reactives.instance.property import reactive_property @@ -332,7 +333,13 @@ def json_encoder(self) -> type[JSONEncoder]: @reactive_property(on_trigger_delete=True) def http_client(self) -> aiohttp.ClientSession: if not self._http_client: - self._http_client = aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit_per_host=5)) + self._http_client = aiohttp.ClientSession( + timeout=ClientTimeout(9), + connector=aiohttp.TCPConnector(limit_per_host=5), + headers={ + 'User-Agent': f'Betty (https://github.com/bartfeenstra/betty) on behalf of {self._project.configuration.base_url}{self._project.configuration.root_path}', + }, + ) weakref.finalize(self, sync(self._http_client.close)) return self._http_client diff --git a/betty/assets/betty.pot b/betty/assets/betty.pot index eee62c371..cb016aeb9 100644 --- a/betty/assets/betty.pot +++ b/betty/assets/betty.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: Betty VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2024-01-03 16:22+0000\n" +"POT-Creation-Date: 2024-01-08 19:21+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -316,6 +316,9 @@ msgstr "" msgid "Descendant names include" msgstr "" +msgid "Description, licensing, and image history" +msgstr "" + msgid "Determine if people can be proven to have died. If not, mark them and their associated entities private." msgstr "" @@ -389,6 +392,9 @@ msgstr "" msgid "Files" msgstr "" +msgid "Find out more about this image on Wikimedia Commons." +msgstr "" + msgid "Follow Betty on Twitter and Github." msgstr "" diff --git a/betty/assets/locale/fr-FR/betty.po b/betty/assets/locale/fr-FR/betty.po index 70f5f758a..bb24b5afa 100644 --- a/betty/assets/locale/fr-FR/betty.po +++ b/betty/assets/locale/fr-FR/betty.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2024-01-03 16:22+0000\n" +"POT-Creation-Date: 2024-01-08 19:21+0000\n" "PO-Revision-Date: 2020-11-27 19:49+0100\n" "Last-Translator: \n" "Language: fr\n" @@ -381,6 +381,9 @@ msgstr "" msgid "Descendant names include" msgstr "Les noms de descendants incluent" +msgid "Description, licensing, and image history" +msgstr "" + msgid "" "Determine if people can be proven to have died. If not, mark them and " "their associated entities private." @@ -462,6 +465,9 @@ msgstr "" msgid "Files" msgstr "" +msgid "Find out more about this image on Wikimedia Commons." +msgstr "" + msgid "" "Follow Betty on Twitter" " and Github." @@ -882,8 +888,8 @@ msgstr "" msgid "This person's name is unavailable to protect their privacy." msgstr "" -"Le nom de cette personne n'est pas disponibles afin de " -"protéger sa vie privée." +"Le nom de cette personne n'est pas disponibles afin de protéger sa vie " +"privée." msgid "This person's name is unknown." msgstr "Le nom de la personne est inconnu." diff --git a/betty/assets/locale/nl-NL/betty.po b/betty/assets/locale/nl-NL/betty.po index 1a0486101..ba658753a 100644 --- a/betty/assets/locale/nl-NL/betty.po +++ b/betty/assets/locale/nl-NL/betty.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2024-01-03 16:22+0000\n" +"POT-Creation-Date: 2024-01-08 19:21+0000\n" "PO-Revision-Date: 2022-04-08 01:58+0100\n" "Last-Translator: \n" "Language: nl\n" @@ -410,6 +410,9 @@ msgstr "Afleiden..." msgid "Descendant names include" msgstr "De nakomelingen heten" +msgid "Description, licensing, and image history" +msgstr "Beschrijving, licentie, en afbeeldingsgeschiedenis" + msgid "" "Determine if people can be proven to have died. If not, mark them and " "their associated entities private." @@ -491,6 +494,9 @@ msgstr "Bestandspad" msgid "Files" msgstr "Bestanden" +msgid "Find out more about this image on Wikimedia Commons." +msgstr "Vind meer informatie over deze afbeelding op Wikimedia Commons." + msgid "" "Follow Betty on Twitter" " and Github." diff --git a/betty/assets/locale/uk/betty.po b/betty/assets/locale/uk/betty.po index 1ceb8dc2d..e94c44932 100644 --- a/betty/assets/locale/uk/betty.po +++ b/betty/assets/locale/uk/betty.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: Betty VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2024-01-03 16:22+0000\n" +"POT-Creation-Date: 2024-01-08 19:21+0000\n" "PO-Revision-Date: 2020-05-02 22:29+0100\n" "Last-Translator: FULL NAME \n" "Language: uk\n" @@ -382,6 +382,9 @@ msgstr "" msgid "Descendant names include" msgstr "Імена нащадків є" +msgid "Description, licensing, and image history" +msgstr "" + msgid "" "Determine if people can be proven to have died. If not, mark them and " "their associated entities private." @@ -463,6 +466,9 @@ msgstr "" msgid "Files" msgstr "" +msgid "Find out more about this image on Wikimedia Commons." +msgstr "" + msgid "" "Follow Betty on Twitter" " and Github." diff --git a/betty/extension/cotton_candy/assets/betty.extension.npm._Npm/src/search.scss b/betty/extension/cotton_candy/assets/betty.extension.npm._Npm/src/search.scss index a7d6fd220..b70b116ba 100644 --- a/betty/extension/cotton_candy/assets/betty.extension.npm._Npm/src/search.scss +++ b/betty/extension/cotton_candy/assets/betty.extension.npm._Npm/src/search.scss @@ -129,6 +129,8 @@ .search-result-preview .image { border: 1px #eee solid; + height: 45px; + width: 45px; } .search-result-file-type { diff --git a/betty/extension/cotton_candy/assets/templates/entity/page--place.html.j2 b/betty/extension/cotton_candy/assets/templates/entity/page--place.html.j2 index b3938a296..3cb5b60c4 100644 --- a/betty/extension/cotton_candy/assets/templates/entity/page--place.html.j2 +++ b/betty/extension/cotton_candy/assets/templates/entity/page--place.html.j2 @@ -38,4 +38,8 @@ {% set events = (places + [place]) | unique | map(attribute='events') | flatten | select('public') | rejectattr('date', 'none') | selectattr('date.comparable') | list %} {% include 'timeline.html.j2' %} + + {% with files = place.associated_files %} + {% include 'media.html.j2' %} + {% endwith %} {% endblock %} \ No newline at end of file diff --git a/betty/extension/cotton_candy/assets/templates/search/result-person.html.j2 b/betty/extension/cotton_candy/assets/templates/search/result-person.html.j2 index da323bdfd..7dcac1253 100644 --- a/betty/extension/cotton_candy/assets/templates/search/result-person.html.j2 +++ b/betty/extension/cotton_candy/assets/templates/search/result-person.html.j2 @@ -1,16 +1 @@ -{% set embedded=True %} - -
- {% include 'entity/label--person.html.j2' %} - {% include 'entity/meta--person.html.j2' %} -
- {% set files = entity.files | select('public') | list %} - {% if files | length > 0 %} - {% set file = files | first %} - {% if file.media_type and file.media_type.type == 'image' %} -
- {{ file.description }} -
- {% endif %} - {% endif %} -
+{% include 'search/result-with-image.html.j2' %} diff --git a/betty/extension/cotton_candy/assets/templates/search/result-place.html.j2 b/betty/extension/cotton_candy/assets/templates/search/result-place.html.j2 index 1f2717296..7dcac1253 100644 --- a/betty/extension/cotton_candy/assets/templates/search/result-place.html.j2 +++ b/betty/extension/cotton_candy/assets/templates/search/result-place.html.j2 @@ -1,7 +1 @@ -{% set embedded=True %} - -
- {% include 'entity/label--place.html.j2' %} - {% include 'entity/meta--place.html.j2' %} -
-
+{% include 'search/result-with-image.html.j2' %} diff --git a/betty/extension/cotton_candy/assets/templates/search/result-with-image.html.j2 b/betty/extension/cotton_candy/assets/templates/search/result-with-image.html.j2 new file mode 100644 index 000000000..a26f0986d --- /dev/null +++ b/betty/extension/cotton_candy/assets/templates/search/result-with-image.html.j2 @@ -0,0 +1,16 @@ +{% set embedded=True %} + +
+ {% include ['entity/label--' + (entity | entity_type_name | camel_case_to_kebab_case) + '.html.j2', 'entity/label.html.j2'] %} + {% include ['entity/meta--' + (entity | entity_type_name | camel_case_to_kebab_case) + '.html.j2', 'entity/meta.html.j2'] ignore missing %} +
+ {% set files = entity.files | select('public') | list %} + {% if files | length > 0 %} + {% set file = files | first %} + {% if file.media_type and file.media_type.type == 'image' %} +
+ {{ file.description }} +
+ {% endif %} + {% endif %} +
diff --git a/betty/extension/wikipedia/__init__.py b/betty/extension/wikipedia/__init__.py index 581cdc94d..27eedae90 100644 --- a/betty/extension/wikipedia/__init__.py +++ b/betty/extension/wikipedia/__init__.py @@ -8,46 +8,37 @@ from reactives.instance import ReactiveInstance from reactives.instance.property import reactive_property +from betty import wikipedia from betty.app.extension import UserFacingExtension from betty.asyncio import gather from betty.jinja2 import Jinja2Provider, context_localizer from betty.load import PostLoader from betty.locale import negotiate_locale, Str from betty.model.ancestry import Link -from betty.wikipedia import _Retriever, _Populator, Entry, _parse_url, NotAnEntryError, RetrievalError +from betty.wikipedia import Summary, _parse_url, NotAPageError, RetrievalError class _Wikipedia(UserFacingExtension, Jinja2Provider, PostLoader, ReactiveInstance): def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) - self.__retriever: _Retriever | None = None - self.__populator: _Populator | None = None + self.__retriever: wikipedia._Retriever | None = None + self.__populator: wikipedia._Populator | None = None async def post_load(self) -> None: - await self._populator.populate() + populator = wikipedia._Populator(self.app, self._retriever) + await populator.populate() @property @reactive_property(on_trigger_delete=True) - def _retriever(self) -> _Retriever: + def _retriever(self) -> wikipedia._Retriever: if self.__retriever is None: - self.__retriever = _Retriever(self.app.http_client, self.cache_directory_path) + self.__retriever = wikipedia._Retriever(self.app.http_client, self.cache_directory_path) return self.__retriever @_retriever.deleter def _retriever(self) -> None: self.__retriever = None - @property - @reactive_property(on_trigger_delete=True) - def _populator(self) -> _Populator: - if self.__populator is None: - self.__populator = _Populator(self.app, self._retriever) - return self.__populator - - @_populator.deleter - def _populator(self) -> None: - self.__populator = None - @property def filters(self) -> dict[str, Callable[..., Any]]: return { @@ -55,7 +46,7 @@ def filters(self) -> dict[str, Callable[..., Any]]: } @pass_context - async def _filter_wikipedia_links(self, context: Context, links: Iterable[Link]) -> Iterable[Entry]: + async def _filter_wikipedia_links(self, context: Context, links: Iterable[Link]) -> Iterable[Summary]: return filter( None, await gather(*( @@ -68,15 +59,15 @@ async def _filter_wikipedia_links(self, context: Context, links: Iterable[Link]) )), ) - async def _filter_wikipedia_link(self, locale: str, link: Link) -> Entry | None: + async def _filter_wikipedia_link(self, locale: str, link: Link) -> Summary | None: try: entry_language, entry_name = _parse_url(link.url) - except NotAnEntryError: + except NotAPageError: return None if negotiate_locale(locale, {entry_language}) is None: return None try: - return await self._retriever.get_entry(entry_language, entry_name) + return await self._retriever.get_summary(entry_language, entry_name) except RetrievalError: return None diff --git a/betty/jinja2.py b/betty/jinja2.py index cec66d6ba..6c805d338 100644 --- a/betty/jinja2.py +++ b/betty/jinja2.py @@ -545,6 +545,8 @@ async def _filter_image( if file.media_type: if file.media_type.type == 'image': + if 'svg+xml' == file.media_type.subtype: + return await _filter_file(context, file) task_callable = _execute_filter_image_image destination_name += file.path.suffix elif file.media_type.type == 'application' and file.media_type.subtype == 'pdf': @@ -558,7 +560,7 @@ async def _filter_image( task_id = f'filter_image:{file.id}:{width or ""}:{height or ""}' if task_context is None or task_context.claim(task_id): cache_directory_path = CACHE_DIRECTORY_PATH / 'image' - await task_callable(file.path, cache_directory_path, file_directory_path, destination_name, width, height) + await task_callable(file, cache_directory_path, file_directory_path, destination_name, width, height) destination_public_path = '/file/%s' % destination_name @@ -566,25 +568,26 @@ async def _filter_image( async def _execute_filter_image_image( - file_path: Path, + file: File, cache_directory_path: Path, destination_directory_path: Path, destination_name: str, width: int | None, height: int | None, ) -> None: + assert file.media_type with warnings.catch_warnings(): # Ignore warnings about decompression bombs, because we know where the files come from. warnings.simplefilter('ignore', category=DecompressionBombWarning) - image = Image.open(file_path) + image = Image.open(file.path, formats=[file.media_type.subtype]) try: - await _execute_filter_image(image, file_path, cache_directory_path, destination_directory_path, destination_name, width, height) + await _execute_filter_image(image, file, cache_directory_path, destination_directory_path, destination_name, width, height) finally: image.close() async def _execute_filter_image_application_pdf( - file_path: Path, + file: File, cache_directory_path: Path, destination_directory_path: Path, destination_name: str, @@ -594,24 +597,25 @@ async def _execute_filter_image_application_pdf( with warnings.catch_warnings(): # Ignore warnings about decompression bombs, because we know where the files come from. warnings.simplefilter('ignore', category=DecompressionBombWarning) - image = convert_from_path(file_path, fmt='jpeg')[0] + image = convert_from_path(file.path, fmt='jpeg')[0] try: - await _execute_filter_image(image, file_path, cache_directory_path, destination_directory_path, destination_name, width, height) + await _execute_filter_image(image, file, cache_directory_path, destination_directory_path, destination_name, width, height) finally: image.close() async def _execute_filter_image( image: Image, - file_path: Path, + file: File, cache_directory_path: Path, destination_directory_path: Path, destination_name: str, width: int | None, height: int | None, ) -> None: + assert file.media_type await makedirs(destination_directory_path, exist_ok=True) - cache_file_path = cache_directory_path / ('%s-%s' % (hashfile(file_path), destination_name)) + cache_file_path = cache_directory_path / ('%s-%s' % (hashfile(file.path), destination_name)) destination_file_path = destination_directory_path / destination_name try: @@ -632,7 +636,7 @@ async def _execute_filter_image( converted = _resizeimage.resize_height(image, height) else: raise ValueError('Width and height cannot both be None.') - converted.save(cache_file_path) + converted.save(cache_file_path, format=file.media_type.subtype) await makedirs(destination_directory_path, exist_ok=True) await link_or_copy(cache_file_path, destination_file_path) diff --git a/betty/model/ancestry.py b/betty/model/ancestry.py index c08299649..f35597c51 100644 --- a/betty/model/ancestry.py +++ b/betty/model/ancestry.py @@ -323,7 +323,7 @@ def citations(self) -> None: @many_to_many('entities', 'betty.model.ancestry.HasFiles', 'files') -class File(Described, HasPrivacy, HasMediaType, HasNotes, HasCitations, UserFacingEntity, Entity): +class File(Described, HasPrivacy, HasLinks, HasMediaType, HasNotes, HasCitations, UserFacingEntity, Entity): def __init__( self, path: Path, @@ -336,6 +336,7 @@ def __init__( privacy: Privacy | None = None, public: bool | None = None, private: bool | None = None, + links: set[Link] | None = None, ): super().__init__( id, @@ -346,6 +347,7 @@ def __init__( privacy=privacy, public=public, private=private, + links=links, ) self._path = path @@ -670,7 +672,7 @@ def entity_type_label_plural(cls) -> Str: @one_to_many('events', 'betty.model.ancestry.Event', 'place') @one_to_many('enclosed_by', 'betty.model.ancestry.Enclosure', 'encloses') @one_to_many('encloses', 'betty.model.ancestry.Enclosure', 'enclosed_by') -class Place(HasLinks, UserFacingEntity, Entity): +class Place(HasLinks, HasFiles, UserFacingEntity, Entity): def __init__( self, *, @@ -758,6 +760,12 @@ def label(self) -> Str: return Str.plain(self.names[0].name) return super().label + @property + def associated_files(self) -> Iterable[File]: + yield from self.files + for event in self.events: + yield from event.files + class PresenceRole: @classmethod diff --git a/betty/tests/extension/wikipedia/test___init__.py b/betty/tests/extension/wikipedia/test___init__.py index 2db4e3713..c473da341 100644 --- a/betty/tests/extension/wikipedia/test___init__.py +++ b/betty/tests/extension/wikipedia/test___init__.py @@ -1,44 +1,37 @@ from __future__ import annotations -from typing import Any - -from aioresponses import aioresponses +from pytest_mock import MockerFixture from betty.app import App from betty.extension import Wikipedia from betty.load import load -from betty.media_type import MediaType -from betty.model.ancestry import Source, Link +from betty.model.ancestry import Link from betty.project import ExtensionConfiguration from betty.task import Context from betty.tests import patch_cache +from betty.wikipedia import Summary class TestWikipedia: @patch_cache - async def test_filter(self, aioresponses: aioresponses) -> None: - entry_url = 'https://en.wikipedia.org/wiki/Amsterdam' + async def test_filter(self, mocker: MockerFixture) -> None: + language = 'en' + name = 'Amsterdam' + title = 'Amstelredam' + extract = 'De hoofdstad van Nederland.' + summary = Summary(language, name, title, extract) + + m_get_summary = mocker.patch('betty.wikipedia._Retriever.get_summary') + m_get_summary.return_value = summary + + page_url = f'https://{language}.wikipedia.org/wiki/{name}' links = [ - Link(entry_url), + Link(page_url), # Add a link to Wikipedia, but using a locale that's not used by the app, to test it's ignored. Link('https://nl.wikipedia.org/wiki/Amsterdam'), # Add a link that doesn't point to Wikipedia at all to test it's ignored. Link('https://example.com'), ] - api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=extracts&exintro&format=json&formatversion=2' - title = 'Amstelredam' - extract = 'De hoofdstad van Nederland.' - api_response_body = { - 'query': { - 'pages': [ - { - 'title': title, - 'extract': extract, - }, - ], - } - } - aioresponses.get(api_url, payload=api_response_body) async with App() as app: app.project.configuration.extensions.append(ExtensionConfiguration(Wikipedia)) @@ -47,50 +40,16 @@ async def test_filter(self, aioresponses: aioresponses) -> None: task_context=Context(), links=links, ) + + m_get_summary.assert_called_once() assert extract == actual @patch_cache - async def test_post_load(self, aioresponses: aioresponses) -> None: - resource = Source( - id='the_source', - name='The Source', - ) - link = Link('https://en.wikipedia.org/wiki/Amsterdam') - resource.links.add(link) - entry_title = 'Amstelredam' - entry_extract = 'Capitol of the Netherlands' - entry_api_response_body = { - 'query': { - 'pages': [ - { - 'title': entry_title, - 'extract': entry_extract, - }, - ], - } - } - entry_api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=extracts&exintro&format=json&formatversion=2' - aioresponses.get(entry_api_url, payload=entry_api_response_body) - translations_api_response_body: Any = { - 'query': { - 'pages': [ - { - 'langlinks': [], - }, - ], - }, - } - translations_api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=langlinks&lllimit=500&format=json&formatversion=2' - aioresponses.get(translations_api_url, payload=translations_api_response_body) + async def test_post_load(self, mocker: MockerFixture) -> None: + m_populate = mocker.patch('betty.wikipedia._Populator.populate') async with App() as app: app.project.configuration.extensions.append(ExtensionConfiguration(Wikipedia)) - app.project.ancestry.add(resource) await load(app) - assert 1 == len(resource.links) - assert entry_title == link.label - assert 'en' == link.locale - assert MediaType('text/html') == link.media_type - assert link.description is not None - assert 'external' == link.relationship + m_populate.assert_called_once() diff --git a/betty/tests/test_wikipedia.py b/betty/tests/test_wikipedia.py index 48de5a0ee..557fbb87e 100644 --- a/betty/tests/test_wikipedia.py +++ b/betty/tests/test_wikipedia.py @@ -24,7 +24,7 @@ from betty.app import App from betty.model.ancestry import Source, Link, Citation, Place -from betty.wikipedia import Entry, _Retriever, NotAnEntryError, _parse_url, RetrievalError, _Populator +from betty.wikipedia import Summary, _Retriever, NotAPageError, _parse_url, RetrievalError, _Populator class TestParseUrl: @@ -46,23 +46,23 @@ async def test_should_return(self, expected: tuple[str, str], url: str) -> None: 'https://en.wikipedia.org/w/index.php?title=Amsterdam&action=edit', ]) async def test_should_error(self, url: str) -> None: - with pytest.raises(NotAnEntryError): + with pytest.raises(NotAPageError): _parse_url(url) -class TestEntry: +class TestSummary: async def test_url(self) -> None: - sut = Entry('nl', 'Amsterdam', 'Title for Amsterdam', 'Content for Amsterdam') + sut = Summary('nl', 'Amsterdam', 'Title for Amsterdam', 'Content for Amsterdam') assert 'https://nl.wikipedia.org/wiki/Amsterdam' == sut.url async def test_title(self) -> None: title = 'Title for Amsterdam' - sut = Entry('nl', 'Amsterdam', title, 'Content for Amsterdam') + sut = Summary('nl', 'Amsterdam', title, 'Content for Amsterdam') assert title == sut.title async def test_content(self) -> None: content = 'Content for Amsterdam' - sut = Entry('nl', 'Amsterdam', 'Title for Amsterdam', content) + sut = Summary('nl', 'Amsterdam', 'Title for Amsterdam', content) assert content == sut.content @@ -93,9 +93,9 @@ async def test_get_translations_should_return( mocker: MockerFixture, ) -> None: mocker.patch('sys.stderr') - entry_language = 'en' - entry_name = 'Amsterdam' - api_url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (entry_language, entry_name) + summary_language = 'en' + summary_name = 'Amsterdam' + api_url = f'https://{summary_language}.wikipedia.org/w/api.php?action=query&titles={summary_name}&prop=langlinks|pageimages|coordinates&lllimit=500&piprop=name&pilicense=free&pilimit=1&coprimary=primary&format=json&formatversion=2' api_response_body = { 'query': { 'pages': [response_pages_json], @@ -104,7 +104,7 @@ async def test_get_translations_should_return( aioresponses.get(api_url, payload=api_response_body) async with TemporaryDirectory() as cache_directory_path_str: async with aiohttp.ClientSession() as session: - translations = await _Retriever(session, Path(cache_directory_path_str)).get_translations(entry_language, entry_name) + translations = await _Retriever(session, Path(cache_directory_path_str)).get_translations(summary_language, summary_name) assert expected == translations async def test_get_translations_with_client_error_should_raise_retrieval_error( @@ -113,14 +113,14 @@ async def test_get_translations_with_client_error_should_raise_retrieval_error( mocker: MockerFixture, ) -> None: mocker.patch('sys.stderr') - entry_language = 'en' - entry_name = 'Amsterdam' - api_url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (entry_language, entry_name) + summary_language = 'en' + summary_name = 'Amsterdam' + api_url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (summary_language, summary_name) aioresponses.get(api_url, exception=aiohttp.ClientError()) async with TemporaryDirectory() as cache_directory_path_str: with pytest.raises(RetrievalError): async with aiohttp.ClientSession() as session: - await _Retriever(session, Path(cache_directory_path_str)).get_translations(entry_language, entry_name) + await _Retriever(session, Path(cache_directory_path_str)).get_translations(summary_language, summary_name) async def test_get_translations_with_invalid_json_response_should_raise_retrieval_error( self, @@ -128,14 +128,14 @@ async def test_get_translations_with_invalid_json_response_should_raise_retrieva mocker: MockerFixture, ) -> None: mocker.patch('sys.stderr') - entry_language = 'en' - entry_name = 'Amsterdam' - api_url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (entry_language, entry_name) + summary_language = 'en' + summary_name = 'Amsterdam' + api_url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (summary_language, summary_name) aioresponses.get(api_url, body='{Haha Im not rly JSON}') async with TemporaryDirectory() as cache_directory_path_str: with pytest.raises(RetrievalError): async with aiohttp.ClientSession() as session: - await _Retriever(session, Path(cache_directory_path_str)).get_translations(entry_language, entry_name) + await _Retriever(session, Path(cache_directory_path_str)).get_translations(summary_language, summary_name) @pytest.mark.parametrize('response_json', [ {}, @@ -160,42 +160,38 @@ async def test_get_translations_with_unexpected_json_response_should_raise_retri aioresponses: aioresponses, ) -> None: mocker.patch('sys.stderr') - entry_language = 'en' - entry_name = 'Amsterdam' - api_url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (entry_language, entry_name) + summary_language = 'en' + summary_name = 'Amsterdam' + api_url = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=langlinks&lllimit=500&format=json&formatversion=2' % (summary_language, summary_name) aioresponses.get(api_url, payload=response_json) async with TemporaryDirectory() as cache_directory_path_str: with pytest.raises(RetrievalError): async with aiohttp.ClientSession() as session: - await _Retriever(session, Path(cache_directory_path_str)).get_translations(entry_language, entry_name) + await _Retriever(session, Path(cache_directory_path_str)).get_translations(summary_language, summary_name) - async def test_get_entry_should_return(self, aioresponses: aioresponses) -> None: - entry_language = 'en' - entry_name = 'Amsterdam' - api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=extracts&exintro&format=json&formatversion=2' - entry_url = 'https://en.wikipedia.org/wiki/Amsterdam' + @pytest.mark.parametrize('extract_key', [ + 'extract', + 'extract_html', + ]) + async def test_get_summary_should_return(self, extract_key: str, aioresponses: aioresponses) -> None: + summary_language = 'en' + summary_name = 'Amsterdam' + api_url = f'https://{summary_language}.wikipedia.org/api/rest_v1/page/summary/{summary_name}' + summary_url = 'https://en.wikipedia.org/wiki/Amsterdam' title = 'Amstelredam' extract_1 = 'De hoofdstad van Nederland.' extract_4 = 'Niet de hoofdstad van Holland.' api_response_body_1 = { - 'query': { - 'pages': [ - { - 'title': title, - 'extract': extract_1, - }, - ], - } + 'titles': { + 'normalized': title, + }, + extract_key: extract_1, } api_response_body_4 = { - 'query': { - 'pages': [ - { - 'title': title, - 'extract': extract_4, - }, - ], - } + 'titles': { + 'normalized': title, + }, + extract_key: extract_4, } aioresponses.get(api_url, payload=api_response_body_1) aioresponses.get(api_url, exception=aiohttp.ClientError()) @@ -204,40 +200,40 @@ async def test_get_entry_should_return(self, aioresponses: aioresponses) -> None async with aiohttp.ClientSession() as session: retriever = _Retriever(session, Path(cache_directory_path_str), 1) # The first retrieval should make a successful request and set the cache. - entry_1 = await retriever.get_entry(entry_language, entry_name) + summary_1 = await retriever.get_summary(summary_language, summary_name) # The second retrieval should hit the cache from the first request. - entry_2 = await retriever.get_entry(entry_language, entry_name) + summary_2 = await retriever.get_summary(summary_language, summary_name) # The third retrieval should result in a failed request, and hit the cache from the first request. sleep(2) - entry_3 = await retriever.get_entry(entry_language, entry_name) + summary_3 = await retriever.get_summary(summary_language, summary_name) # The fourth retrieval should make a successful request and set the cache again. - entry_4 = await retriever.get_entry(entry_language, entry_name) + summary_4 = await retriever.get_summary(summary_language, summary_name) # The fifth retrieval should hit the cache from the fourth request. - entry_5 = await retriever.get_entry(entry_language, entry_name) - for entry in [entry_1, entry_2, entry_3]: - assert entry_url == entry.url - assert title == entry.title - assert extract_1 == entry.content - for entry in [entry_4, entry_5]: - assert entry_url == entry.url - assert title == entry.title - assert extract_4 == entry.content - - async def test_get_entry_with_client_error_should_raise_retrieval_error( + summary_5 = await retriever.get_summary(summary_language, summary_name) + for summary in [summary_1, summary_2, summary_3]: + assert summary_url == summary.url + assert title == summary.title + assert extract_1 == summary.content + for summary in [summary_4, summary_5]: + assert summary_url == summary.url + assert title == summary.title + assert extract_4 == summary.content + + async def test_get_summary_with_client_error_should_raise_retrieval_error( self, aioresponses: aioresponses, mocker: MockerFixture, ) -> None: mocker.patch('sys.stderr') - entry_language = 'en' - entry_name = 'Amsterdam' + summary_language = 'en' + summary_name = 'Amsterdam' api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=extracts&exintro&format=json&formatversion=2' aioresponses.get(api_url, exception=aiohttp.ClientError()) async with TemporaryDirectory() as cache_directory_path_str: async with aiohttp.ClientSession() as session: retriever = _Retriever(session, Path(cache_directory_path_str)) with pytest.raises(RetrievalError): - await retriever.get_entry(entry_language, entry_name) + await retriever.get_summary(summary_language, summary_name) @pytest.mark.parametrize('expected, response_pages_json', [ (None, {},), @@ -272,9 +268,9 @@ async def test_get_place_coordinates_should_return( mocker: MockerFixture, ) -> None: mocker.patch('sys.stderr') - entry_language = 'en' - entry_name = 'Amsterdam' - api_url = f'https://{entry_language}.wikipedia.org/w/api.php?action=query&titles={entry_name}&prop=coordinates&coprimary=primary&format=json&formatversion=2' + summary_language = 'en' + summary_name = 'Amsterdam' + api_url = f'https://{summary_language}.wikipedia.org/w/api.php?action=query&titles={summary_name}&prop=langlinks|pageimages|coordinates&lllimit=500&piprop=name&pilicense=free&pilimit=1&coprimary=primary&format=json&formatversion=2' api_response_body = { 'query': { 'pages': [response_pages_json], @@ -283,7 +279,7 @@ async def test_get_place_coordinates_should_return( aioresponses.get(api_url, payload=api_response_body) async with TemporaryDirectory() as cache_directory_path_str: async with aiohttp.ClientSession() as session: - actual = await _Retriever(session, Path(cache_directory_path_str)).get_place_coordinates(entry_language, entry_name) + actual = await _Retriever(session, Path(cache_directory_path_str)).get_place_coordinates(summary_language, summary_name) assert expected == actual @@ -292,10 +288,10 @@ class TestPopulator: async def test_populate_link_should_convert_http_to_https(self, mocker: MockerFixture) -> None: m_retriever = mocker.patch('betty.wikipedia._Retriever') link = Link('http://en.wikipedia.org/wiki/Amsterdam') - entry_language = 'nl' + summary_language = 'nl' async with App() as app: sut = _Populator(app, m_retriever) - await sut.populate_link(link, entry_language) + await sut.populate_link(link, summary_language) assert 'https://en.wikipedia.org/wiki/Amsterdam' == link.url @pytest.mark.parametrize('expected, media_type', [ @@ -340,7 +336,7 @@ async def test_populate_link_should_set_relationship( await sut.populate_link(link, 'en') assert expected == link.relationship - @pytest.mark.parametrize('expected, entry_language, locale', [ + @pytest.mark.parametrize('expected, summary_language, locale', [ ('nl-NL', 'nl', 'nl-NL'), ('nl', 'nl', None), ('nl', 'en', 'nl'), @@ -349,16 +345,16 @@ async def test_populate_link_should_set_relationship( async def test_populate_link_should_set_locale( self, expected: str, - entry_language: str, + summary_language: str, locale: str | None, mocker: MockerFixture, ) -> None: m_retriever = mocker.patch('betty.wikipedia._Retriever') - link = Link('http://%s.wikipedia.org/wiki/Amsterdam' % entry_language) + link = Link('http://%s.wikipedia.org/wiki/Amsterdam' % summary_language) link.locale = locale async with App() as app: sut = _Populator(app, m_retriever) - await sut.populate_link(link, entry_language) + await sut.populate_link(link, summary_language) assert expected == link.locale @pytest.mark.parametrize('expected, description', [ @@ -377,10 +373,10 @@ async def test_populate_link_should_set_description( 'http://en.wikipedia.org/wiki/Amsterdam', description=description, ) - entry_language = 'en' + summary_language = 'en' async with App() as app: sut = _Populator(app, m_retriever) - await sut.populate_link(link, entry_language) + await sut.populate_link(link, summary_language) assert expected == link.description @pytest.mark.parametrize('expected, label', [ @@ -397,10 +393,10 @@ async def test_populate_link_should_set_label( m_retriever = mocker.patch('betty.wikipedia._Retriever') link = Link('http://en.wikipedia.org/wiki/Amsterdam') link.label = label - entry = Entry('en', 'The_city_of_Amsterdam', 'The city of Amsterdam', 'Amsterdam, such a lovely place!') + summary = Summary('en', 'The_city_of_Amsterdam', 'The city of Amsterdam', 'Amsterdam, such a lovely place!') async with App() as app: sut = _Populator(app, m_retriever) - await sut.populate_link(link, 'en', entry) + await sut.populate_link(link, 'en', summary) assert expected == link.label @patch_cache @@ -447,12 +443,13 @@ async def test_populate_should_ignore_non_wikipedia_links(self, mocker: MockerFi @patch_cache async def test_populate_should_populate_existing_link(self, mocker: MockerFixture) -> None: m_retriever = mocker.patch('betty.wikipedia._Retriever', spec=_Retriever, new_callable=AsyncMock) - entry_language = 'en' - entry_name = 'Amsterdam' - entry_title = 'Amsterdam' - entry_content = 'Capitol of the Netherlands' - entry = Entry(entry_language, entry_name, entry_title, entry_content) - m_retriever.get_entry.return_value = entry + summary_language = 'en' + summary_name = 'Amsterdam' + summary_title = 'Amsterdam' + summary_content = 'Capitol of the Netherlands' + summary = Summary(summary_language, summary_name, summary_title, summary_content) + m_retriever.get_summary.return_value = summary + m_retriever.get_image.return_value = None link = Link('https://en.wikipedia.org/wiki/Amsterdam') resource = Source( @@ -464,7 +461,7 @@ async def test_populate_should_populate_existing_link(self, mocker: MockerFixtur app.project.ancestry.add(resource) sut = _Populator(app, m_retriever) await sut.populate() - m_retriever.get_entry.assert_called_once_with(entry_language, entry_name) + m_retriever.get_summary.assert_called_once_with(summary_language, summary_name) assert 1 == len(resource.links) assert 'Amsterdam' == link.label assert 'en' == link.locale @@ -475,24 +472,24 @@ async def test_populate_should_populate_existing_link(self, mocker: MockerFixtur @patch_cache async def test_populate_should_add_translation_links(self, mocker: MockerFixture) -> None: m_retriever = mocker.patch('betty.wikipedia._Retriever', spec=_Retriever, new_callable=AsyncMock) - entry_language = 'en' - entry_name = 'Amsterdam' - entry_title = 'Amsterdam' - entry_content = 'Capitol of the Netherlands' - entry = Entry(entry_language, entry_name, entry_title, entry_content) - added_entry_language = 'nl' - added_entry_name = 'Amsterdam' - added_entry_title = 'Amsterdam' - added_entry_content = 'Hoofdstad van Nederland' - added_entry = Entry(added_entry_language, added_entry_name, added_entry_title, added_entry_content) - m_retriever.get_entry.side_effect = [ - entry, - added_entry + summary_language = 'en' + summary_name = 'Amsterdam' + summary_title = 'Amsterdam' + summary_content = 'Capitol of the Netherlands' + summary = Summary(summary_language, summary_name, summary_title, summary_content) + added_summary_language = 'nl' + added_summary_name = 'Amsterdam' + added_summary_title = 'Amsterdam' + added_summary_content = 'Hoofdstad van Nederland' + added_summary = Summary(added_summary_language, added_summary_name, added_summary_title, added_summary_content) + m_retriever.get_summary.side_effect = [ + summary, + added_summary ] - + m_retriever.get_image.return_value = None m_retriever.get_translations.return_value = { - entry_language: entry_name, - added_entry_language: added_entry_name, + summary_language: summary_name, + added_summary_language: added_summary_name, } link_en = Link('https://en.wikipedia.org/wiki/Amsterdam') @@ -509,11 +506,11 @@ async def test_populate_should_add_translation_links(self, mocker: MockerFixture sut = _Populator(app, m_retriever) await sut.populate() - m_retriever.get_entry.assert_has_calls([ - call(entry_language, entry_name), - call(added_entry_language, added_entry_name), + m_retriever.get_summary.assert_has_calls([ + call(summary_language, summary_name), + call(added_summary_language, added_summary_name), ]) - m_retriever.get_translations.assert_called_once_with(entry_language, entry_name) + m_retriever.get_translations.assert_called_once_with(summary_language, summary_name) assert 2 == len(resource.links) link_nl = resource.links.difference({link_en}).pop() assert 'Amsterdam' == link_nl.label @@ -525,12 +522,13 @@ async def test_populate_should_add_translation_links(self, mocker: MockerFixture @patch_cache async def test_populate_place_should_add_coordinates(self, mocker: MockerFixture) -> None: m_retriever = mocker.patch('betty.wikipedia._Retriever', spec=_Retriever, new_callable=AsyncMock) - entry_language = 'en' - entry_name = 'Almelo' + summary_language = 'en' + summary_name = 'Almelo' coordinates = Point(52.35, 6.66666667) m_retriever.get_place_coordinates.return_value = coordinates + m_retriever.get_image.return_value = None - link = Link(f'https://{entry_language}.wikipedia.org/wiki/{entry_name}') + link = Link(f'https://{summary_language}.wikipedia.org/wiki/{summary_name}') place = Place(links={link}) app = App() async with app: diff --git a/betty/wikipedia.py b/betty/wikipedia.py index b1bf1ecac..e10f4d59d 100644 --- a/betty/wikipedia.py +++ b/betty/wikipedia.py @@ -1,8 +1,10 @@ from __future__ import annotations +import asyncio import hashlib import json import logging +import mimetypes import re from contextlib import suppress from os.path import getmtime @@ -19,15 +21,14 @@ from betty.functools import filter_suppress from betty.locale import Localized, negotiate_locale, to_locale, get_data, LocaleNotFoundError, Localey from betty.media_type import MediaType -from betty.model import Entity -from betty.model.ancestry import Link, HasLinks, Place +from betty.model.ancestry import Link, HasLinks, Place, File, HasFiles class WikipediaError(BaseException): pass -class NotAnEntryError(WikipediaError, ValueError): +class NotAPageError(WikipediaError, ValueError): pass @@ -41,11 +42,11 @@ class RetrievalError(WikipediaError, RuntimeError): def _parse_url(url: str) -> tuple[str, str]: match = _URL_PATTERN.fullmatch(url) if match is None: - raise NotAnEntryError + raise NotAPageError return cast(tuple[str, str], match.groups()) -class Entry(Localized): +class Summary(Localized): def __init__(self, locale: str, name: str, title: str, content: str): super().__init__(locale=locale) self._name = name @@ -69,76 +70,155 @@ def content(self) -> str: return self._content +class Image: + def __init__( + self, + path: Path, + media_type: MediaType, + title: str, + wikimedia_commons_url: str, + ): + self._path = path + self._media_type = media_type + self._title = title + self._wikimedia_commons_url = wikimedia_commons_url + + @property + def path(self) -> Path: + return self._path + + @property + def media_type(self) -> MediaType: + return self._media_type + + @property + def title(self) -> str: + return self._title + + @property + def wikimedia_commons_url(self) -> str: + return self._wikimedia_commons_url + + class _Retriever: - def __init__(self, http_client: aiohttp.ClientSession, cache_directory_path: Path, ttl: int = 86400): + def __init__( + self, + http_client: aiohttp.ClientSession, + cache_directory_path: Path, + # Default to seven days. + ttl: int = 86400 * 7, + ): self._cache_directory_path = cache_directory_path self._cache_directory_path.mkdir(exist_ok=True, parents=True) self._ttl = ttl self._http_client = http_client + self._images: dict[str, Image | None] = {} - async def _request(self, url: str) -> Any: - cache_file_path = self._cache_directory_path / hashlib.md5(url.encode('utf-8')).hexdigest() + async def _request(self, url: str, extension: str | None = None) -> Any: + cache_file_path = self._cache_directory_path / hashlib.md5(url.encode("utf-8")).hexdigest() + if extension: + cache_file_path = cache_file_path.with_suffix(f'.{extension}') response_data = None with suppress(FileNotFoundError): if getmtime(cache_file_path) + self._ttl > time(): - async with aiofiles.open(cache_file_path, encoding='utf-8') as f: - json_data = await f.read() - response_data = json.loads(json_data) + async with aiofiles.open(cache_file_path, mode='r+b') as f: + response_data = await f.read() if response_data is None: logger = logging.getLogger(__name__) try: + logger.debug(f'Fetching {url}...') async with self._http_client.get(url) as response: - response_data = await response.json(encoding='utf-8') - json_data = await response.text() - async with aiofiles.open(cache_file_path, 'w', encoding='utf-8') as f: - await f.write(json_data) - except aiohttp.ClientError as e: - logger.warning('Could not successfully connect to Wikipedia at %s: %s' % (url, e)) - except ValueError as e: - logger.warning('Could not parse JSON content from Wikipedia at %s: %s' % (url, e)) + response_data = await response.read() + async with aiofiles.open(cache_file_path, 'w+b') as f: + await f.write(response_data) + except aiohttp.ClientError as error: + logger.warning(f'Could not successfully connect to Wikipedia at {url}: {error}') + except asyncio.TimeoutError: + logger.warning(f'Timeout when connecting to Wikipedia at {url}') if response_data is None: try: - async with aiofiles.open(cache_file_path, encoding='utf-8') as f: - json_data = await f.read() - response_data = json.loads(json_data) + async with aiofiles.open(cache_file_path, mode='r+b') as f: + response_data = await f.read() except FileNotFoundError: raise RetrievalError('Could neither fetch %s, nor find an old version in the cache.' % url) return response_data - async def _get_page_data(self, url: str) -> Any: - response_data = await self._request(url) + async def _get_query_api_data(self, url: str) -> dict[str, Any]: + api_data = json.loads(await self._request(url)) try: - return response_data['query']['pages'][0] + return api_data['query']['pages'][0] # type: ignore[no-any-return] except (LookupError, TypeError) as e: - raise RetrievalError('Could not successfully parse the JSON format returned by %s: %s' % (url, e)) + raise RetrievalError(f'Could not successfully parse the JSON format returned by {url}: {e}') - async def get_translations(self, entry_language: str, entry_name: str) -> dict[str, str]: - url = f'https://{entry_language}.wikipedia.org/w/api.php?action=query&titles={entry_name}&prop=langlinks&lllimit=500&format=json&formatversion=2' - page_data = await self._get_page_data(url) + async def _get_entry_query_api_data(self, language: str, name: str) -> dict[str, Any]: + return await self._get_query_api_data( + f'https://{language}.wikipedia.org/w/api.php?action=query&titles={name}&prop=langlinks|pageimages|coordinates&lllimit=500&piprop=name&pilicense=free&pilimit=1&coprimary=primary&format=json&formatversion=2' + ) + + async def get_translations(self, language: str, name: str) -> dict[str, str]: + api_data = await self._get_entry_query_api_data(language, name) try: - translations_data = page_data['langlinks'] + translations_data = api_data['langlinks'] except KeyError: # There may not be any translations. return {} return {translation_data['lang']: translation_data['title'] for translation_data in translations_data} - async def get_entry(self, language: str, name: str) -> Entry: - url = f'https://{language}.wikipedia.org/w/api.php?action=query&titles={name}&prop=extracts&exintro&format=json&formatversion=2' - page_data = await self._get_page_data(url) + async def get_summary(self, language: str, name: str) -> Summary: + api_data = json.loads(await self._request(f'https://{language}.wikipedia.org/api/rest_v1/page/summary/{name}')) try: - return Entry(language, name, page_data['title'], page_data['extract']) + return Summary( + language, + name, + api_data['titles']['normalized'], + api_data['extract_html'] if 'extract_html' in api_data else api_data['extract'], + ) except KeyError as e: - raise RetrievalError('Could not successfully parse the JSON content returned by %s: %s' % (url, e)) + raise RetrievalError(f'Could not successfully parse the JSON content: {e}') + + async def get_image(self, language: str, name: str) -> Image | None: + api_data = await self._get_entry_query_api_data(language, name) + try: + page_image_name = api_data['pageimage'] + except KeyError: + # There may not be any images. + return None + + if page_image_name in self._images: + return self._images[page_image_name] + + url = f'https://en.wikipedia.org/w/api.php?action=query&prop=imageinfo&titles=File:{page_image_name}&iiprop=url|mime|canonicaltitle&format=json&formatversion=2' + image_info_api_data = await self._get_query_api_data(url) + + try: + image_info = image_info_api_data['imageinfo'][0] + except KeyError as e: + raise RetrievalError(f'Could not successfully parse the JSON content returned by {url}: {e}') + + extension = None + for mimetypes_extension, mimetypes_media_type in mimetypes.types_map.items(): + if mimetypes_media_type == image_info['mime']: + extension = mimetypes_extension + await self._request(image_info['url'], extension) + + file_path = (self._cache_directory_path / hashlib.md5(image_info['url'].encode("utf-8")).hexdigest()).with_suffix(f'.{extension}') + image = Image( + file_path, + MediaType(image_info['mime']), + image_info['canonicaltitle'], + image_info['descriptionurl'], + ) + + return image async def get_place_coordinates(self, language: str, name: str) -> Point | None: - url = f'https://{language}.wikipedia.org/w/api.php?action=query&titles={name}&prop=coordinates&coprimary=primary&format=json&formatversion=2' - page_data = await self._get_page_data(url) + api_data = await self._get_entry_query_api_data(language, name) try: - coordinates = page_data['coordinates'][0] + coordinates = api_data['coordinates'][0] except KeyError: # There may not be any coordinates. return None @@ -147,13 +227,14 @@ async def get_place_coordinates(self, language: str, name: str) -> Point | None: return None return Point(coordinates['lat'], coordinates['lon']) except KeyError as e: - raise RetrievalError('Could not successfully parse the JSON content returned by %s: %s' % (url, e)) + raise RetrievalError(f'Could not successfully parse the JSON content: {e}') class _Populator: def __init__(self, app: App, retriever: _Retriever): self._app = app self._retriever = retriever + self._image_files: dict[Image, File] = {} async def populate(self) -> None: locales = set(map(lambda x: x.alias, self._app.project.configuration.locales.values())) @@ -161,11 +242,14 @@ async def populate(self) -> None: self._populate_entity(entity, locales) for entity in self._app.project.ancestry + if isinstance(entity, HasLinks) )) - async def _populate_entity(self, entity: Entity, locales: set[str]) -> None: - if isinstance(entity, HasLinks): - await self._populate_has_links(entity, locales) + async def _populate_entity(self, entity: HasLinks, locales: set[str]) -> None: + await self._populate_has_links(entity, locales) + + if isinstance(entity, HasFiles): + await self._populate_has_files(entity) if isinstance(entity, Place): await self._populate_place(entity) @@ -175,7 +259,7 @@ async def _populate_has_links(self, has_links: HasLinks, locales: set[str]) -> N for link in has_links.links: try: entry_locale, entry_name = _parse_url(link.url) - except NotAnEntryError: + except NotAPageError: continue else: try: @@ -188,7 +272,7 @@ async def _populate_has_links(self, has_links: HasLinks, locales: set[str]) -> N entry = None if link.label is None: with suppress(RetrievalError): - entry = await self._retriever.get_entry(entry_locale, entry_name) + entry = await self._retriever.get_summary(entry_locale, entry_name) await self.populate_link(link, entry_locale, entry) for entry_locale, entry_name in list(entry_links): @@ -205,7 +289,7 @@ async def _populate_has_links(self, has_links: HasLinks, locales: set[str]) -> N if (added_entry_locale, added_entry_name) in entry_links: continue try: - added_entry = await self._retriever.get_entry(added_entry_locale, added_entry_name) + added_entry = await self._retriever.get_summary(added_entry_locale, added_entry_name) except RetrievalError: continue added_link = Link(added_entry.url) @@ -213,7 +297,7 @@ async def _populate_has_links(self, has_links: HasLinks, locales: set[str]) -> N has_links.links.add(added_link) entry_links.add((added_entry_locale, added_entry_name)) - async def populate_link(self, link: Link, entry_locale: str, entry: Entry | None = None) -> None: + async def populate_link(self, link: Link, entry_locale: str, entry: Summary | None = None) -> None: if link.url.startswith('http:'): link.url = 'https:' + link.url[5:] if link.media_type is None: @@ -239,13 +323,46 @@ async def _populate_place_coordinates(self, place: Place) -> None: for link in place.links: try: entry_locale, entry_name = _parse_url(link.url) - except NotAnEntryError: + except NotAPageError: continue else: - try: - get_data(entry_locale) - except LocaleNotFoundError: - continue - else: - with suppress(RetrievalError): - place.coordinates = await self._retriever.get_place_coordinates(entry_locale, entry_name) + with suppress(RetrievalError): + place.coordinates = await self._retriever.get_place_coordinates(entry_locale, entry_name) + return + + async def _populate_has_files(self, has_files: HasFiles & HasLinks) -> None: + for link in has_files.links: + try: + entry_locale, entry_name = _parse_url(link.url) + except NotAPageError: + continue + else: + with suppress(RetrievalError): + image = await self._retriever.get_image(entry_locale, entry_name) + if not image: + continue + + try: + file = self._image_files[image] + except KeyError: + file = File( + id=f'wikipedia-{image.title}', + path=image.path, + media_type=image.media_type, + links={ + Link( + f'{image.wikimedia_commons_url}?uselang={locale_configuration.alias}', + label=self._app.localizers[locale_configuration.locale]._('Description, licensing, and image history'), + description=self._app.localizers[locale_configuration.locale]._('Find out more about this image on Wikimedia Commons.'), + locale=locale_configuration.locale, + media_type=MediaType('text/html'), + ) + for locale_configuration + in self._app.project.configuration.locales.values() + }, + ) + self._image_files[image] = file + + has_files.files.add(file) + self._app.project.ancestry.add(file) + return