diff --git a/iiif/profiles/mss.py b/iiif/profiles/mss.py index cc1e177..bd43f8f 100644 --- a/iiif/profiles/mss.py +++ b/iiif/profiles/mss.py @@ -55,6 +55,20 @@ def __init__(self, profile: str, name: str, cause: 'StoreStreamError'): f'{cause.url} due to {cause.cause}') +class AssetIDNotFound(IIIFServerException): + + def __init__(self, asset_id: str): + super().__init__(f"Asset ID {asset_id} not found", status_code=404) + + +class AssetIDDuplicateGUIDs(IIIFServerException): + + def __init__(self, asset_id: str, total: int): + super().__init__(f"Asset ID {asset_id} matched multiple images", status_code=404, + log=f"Asset ID {asset_id} matched multiple {total} GUIDs") + self.total = total + + class MSSStoreNoLength(IIIFServerException): def __init__(self, profile: str, name: str): @@ -84,6 +98,8 @@ def __init__(self, profile_name: str, name: str, doc: dict): self.emu_irn = doc['id'] # the name of the original file as it appears on the actual filesystem EMu is using self.original = doc['file'] + # the old MAM asset ID value, if there is one + self.old_asset_id = doc.get('old_asset_id') # a list of the EMu generated derivatives of the original file. The list should already be # in the ascending (width, height) order because the import process sorts it self.derivatives = doc.get('derivatives', []) @@ -205,7 +221,7 @@ async def get_info(self, name: str) -> MSSImageInfo: cause=cause, level=logging.ERROR ) raise e from cause - except asyncio.TimeoutError: + except asyncio.TimeoutError as cause: raise Timeout(cause=cause, log=f'Timeout while waiting for get_info lock on {name} in ' f'profile {self.name}') @@ -279,7 +295,7 @@ async def get_mss_doc(self, name: str) -> dict: except Exception as cause: e = ImageNotFound(self.name, name, cause=cause, level=logging.ERROR) raise e from cause - except asyncio.TimeoutError: + except asyncio.TimeoutError as cause: raise Timeout(cause=cause, log=f'Timeout while waiting for get_mss_doc lock on {name} ' f'in profile {self.name}') @@ -335,6 +351,20 @@ async def stream_original(self, name: str, chunk_size: int = 4096): log_error(e) raise e + async def convert_guid_to_asset_id(self, asset_id: str) -> str: + """ + Given an old MAM asset ID, see if we can convert it into a GUID. + + :param asset_id: the old MAM asset ID + :return: the matching GUID + """ + total, guid = await self.es_handler.lookup_guid(asset_id) + if total == 0: + raise AssetIDNotFound(asset_id) + elif total > 1: + raise AssetIDDuplicateGUIDs(asset_id, total) + return guid + async def close(self): """ Close down this profile. @@ -375,12 +405,30 @@ async def get_mss_doc(self, guid: str) -> Tuple[int, Optional[dict]]: search_url = f'{next(self.es_hosts)}/{self.mss_index}/_search' search = Search().filter('term', **{'guid.keyword': guid}).extra(size=1) async with self.es_session.post(search_url, json=search.to_dict()) as response: - text = await response.text(encoding='utf-8') - result = json.loads(text) + result = await response.json(encoding='utf-8') total = result['hits']['total'] first_doc = next((doc['_source'] for doc in result['hits']['hits']), None) return total, first_doc + async def lookup_guid(self, asset_id: str) -> Tuple[int, Optional[str]]: + """ + Given an old MAM asset ID, lookup the associated GUID. + + :param asset_id: the old MAM asset ID + :return: the total hits and the GUID (or None if there are no hits) + """ + search_url = f'{next(self.es_hosts)}/{self.mss_index}/_search' + search = Search().filter('term', **{'old_asset_id.keyword': asset_id}).extra(size=1) + async with self.es_session.post(search_url, json=search.to_dict()) as response: + result = await response.json(encoding='utf-8') + total = result['hits']['total'] + first_doc = next((doc['_source'] for doc in result['hits']['hits']), None) + if first_doc: + guid = first_doc['guid'] + else: + guid = None + return total, guid + async def get_status(self) -> dict: """ Returns a dict describing the Elasticsearch cluster health. diff --git a/iiif/routers/mam.py b/iiif/routers/mam.py new file mode 100644 index 0000000..698335f --- /dev/null +++ b/iiif/routers/mam.py @@ -0,0 +1,37 @@ +from fastapi import APIRouter +from starlette.requests import Request +from starlette.responses import RedirectResponse + +from iiif.profiles.mss import MSSProfile +from iiif.state import state + +router = APIRouter() + + +@router.get('/mam/{asset_id}') +async def mam_redirect(request: Request, asset_id: str) -> RedirectResponse: + """ + When an old MAM URL is requested, it is now redirected to this endpoint. This endpoint looks up + the old asset ID and then redirects base simple image endpoint using the GUID instead of the + asset ID. + If the MSS is the default profile then the mss: is omitted, if not then it is included. + \f + + :param request: the request object + :param asset_id: the MAM asset ID + :return: a RedirectResponse to the MSS preview endpoint + """ + mss_profile: MSSProfile = state.get_profile('mss') + # convert the asset ID into a GUID + guid = await mss_profile.convert_guid_to_asset_id(asset_id) + + if state.config.default_profile_name == 'mss': + # if the default profile is the mss profile, redirect to just guid for nice clean URLs + identifier = guid + else: + # otherwise, create the full identifier with profile name + identifier = f'mss:{guid}' + # this seems to be the easiest way to ensure we redirect to a sensible path given we may be + # under some custom subpath via a proxy + path = request.url.path.replace(f'/mam/{asset_id}', f'/{identifier}') + return RedirectResponse(path) diff --git a/iiif/web.py b/iiif/web.py index 040469b..eb41477 100644 --- a/iiif/web.py +++ b/iiif/web.py @@ -9,7 +9,7 @@ from starlette.responses import JSONResponse, StreamingResponse from iiif.exceptions import handler, IIIFServerException -from iiif.routers import iiif, originals, simple +from iiif.routers import iiif, originals, simple, mam from iiif.state import state from iiif.utils import disable_bomb_errors @@ -87,5 +87,6 @@ async def get(): # order matters here btw! app.include_router(originals.router) +app.include_router(mam.router) app.include_router(simple.router) app.include_router(iiif.router)