From 102e9f681e996d96fa41b017d610d8b1ab10ce25 Mon Sep 17 00:00:00 2001 From: Tim DiLauro Date: Wed, 27 Nov 2024 08:33:25 -0500 Subject: [PATCH] Move isbn lookup to identifier module. (PP-1948) (#2196) --- ...56_playtime_tables_have_no_dependencies.py | 9 +-- .../manager/sqlalchemy/model/identifier.py | 30 ++++++++ .../manager/sqlalchemy/model/time_tracking.py | 39 +--------- .../sqlalchemy/model/test_identifier.py | 72 ++++++++++++++++++ .../sqlalchemy/model/test_time_tracking.py | 76 ------------------- 5 files changed, 108 insertions(+), 118 deletions(-) diff --git a/alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py b/alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py index 06de04213..6500074e9 100644 --- a/alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py +++ b/alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py @@ -13,11 +13,8 @@ from sqlalchemy.engine import Connection from sqlalchemy.orm.session import Session -from palace.manager.sqlalchemy.model.identifier import Identifier -from palace.manager.sqlalchemy.model.time_tracking import ( - _isbn_for_identifier, - _title_for_identifier, -) +from palace.manager.sqlalchemy.model.identifier import Identifier, isbn_for_identifier +from palace.manager.sqlalchemy.model.time_tracking import _title_for_identifier from palace.manager.sqlalchemy.util import get_one # revision identifiers, used by Alembic. @@ -231,7 +228,7 @@ def update_summary_isbn_and_title(session: Session) -> None: @cache def cached_isbn_lookup(identifier: Identifier) -> str | None: """Given an identifier, return its ISBN.""" - return _isbn_for_identifier(identifier) + return isbn_for_identifier(identifier) @cache diff --git a/src/palace/manager/sqlalchemy/model/identifier.py b/src/palace/manager/sqlalchemy/model/identifier.py index 7bcc8b9ae..6dfb0e7fc 100644 --- a/src/palace/manager/sqlalchemy/model/identifier.py +++ b/src/palace/manager/sqlalchemy/model/identifier.py @@ -1271,3 +1271,33 @@ def equivalent_identifiers( results[parent_identifier] = equivalent.identifier return results + + +def isbn_for_identifier(identifier: Identifier | None) -> str | None: + """Find the strongest ISBN match for the given identifier. + + :param identifier: The identifier to match. + :return: The ISBN string associated with the identifier or None, if no match is found. + """ + if identifier is None: + return None + + if identifier.type == Identifier.ISBN: + return identifier.identifier + + # If our identifier is not an ISBN itself, we'll use our Recursive Equivalency + # mechanism to find the next best one that is, if available. + db = Session.object_session(identifier) + eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter( + RecursiveEquivalencyCache.parent_identifier_id == identifier.id + ) + equivalent_identifiers = ( + db.query(Identifier) + .filter(Identifier.id.in_(eq_subquery)) + .filter(Identifier.type == Identifier.ISBN) + ) + + return next( + map(lambda id_: id_.identifier, equivalent_identifiers), + None, + ) diff --git a/src/palace/manager/sqlalchemy/model/time_tracking.py b/src/palace/manager/sqlalchemy/model/time_tracking.py index 56ea158ab..4309bbcc8 100644 --- a/src/palace/manager/sqlalchemy/model/time_tracking.py +++ b/src/palace/manager/sqlalchemy/model/time_tracking.py @@ -1,7 +1,7 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any from sqlalchemy import ( Boolean, @@ -17,10 +17,7 @@ from palace.manager.sqlalchemy.model.base import Base from palace.manager.sqlalchemy.model.edition import Edition -from palace.manager.sqlalchemy.model.identifier import ( - Identifier, - RecursiveEquivalencyCache, -) +from palace.manager.sqlalchemy.model.identifier import Identifier, isbn_for_identifier from palace.manager.sqlalchemy.util import get_one_or_create from palace.manager.util.datetime_helpers import minute_timestamp @@ -215,7 +212,7 @@ def add( if (not playtime.isbn or not playtime.title) and not identifier: identifier, _ = Identifier.parse_urn(_db, identifier_str, autocreate=False) if not playtime.isbn and identifier: - playtime.isbn = _isbn_for_identifier(identifier) + playtime.isbn = isbn_for_identifier(identifier) if not playtime.title and identifier: playtime.title = _title_for_identifier(identifier) @@ -243,33 +240,3 @@ def _title_for_identifier(identifier: Identifier | None) -> str | None: ): return edition.title return None - - -def _isbn_for_identifier(identifier: Identifier | None) -> str | None: - """Find the strongest ISBN match for the given identifier. - - :param identifier: The identifier to match. - :return: The ISBN string associated with the identifier or None, if no match is found. - """ - if identifier is None: - return None - - if identifier.type == Identifier.ISBN: - return cast(str, identifier.identifier) - - # If our identifier is not an ISBN itself, we'll use our Recursive Equivalency - # mechanism to find the next best one that is, if available. - db = Session.object_session(identifier) - eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter( - RecursiveEquivalencyCache.parent_identifier_id == identifier.id - ) - equivalent_identifiers = ( - db.query(Identifier) - .filter(Identifier.id.in_(eq_subquery)) - .filter(Identifier.type == Identifier.ISBN) - ) - - return next( - map(lambda id_: id_.identifier, equivalent_identifiers), - None, - ) diff --git a/tests/manager/sqlalchemy/model/test_identifier.py b/tests/manager/sqlalchemy/model/test_identifier.py index 2ed805a84..b374c1ea8 100644 --- a/tests/manager/sqlalchemy/model/test_identifier.py +++ b/tests/manager/sqlalchemy/model/test_identifier.py @@ -14,6 +14,7 @@ Identifier, ProQuestIdentifierParser, RecursiveEquivalencyCache, + isbn_for_identifier, ) from palace.manager.sqlalchemy.model.resource import Hyperlink from palace.manager.sqlalchemy.presentation import PresentationCalculationPolicy @@ -860,6 +861,77 @@ def test_equivalent_identifiers(self, db: DatabaseTransactionFixture) -> None: proquest_identfier: proquest_gutenberg, } + @pytest.mark.parametrize( + "id_key, equivalents, expected_isbn", + [ + # If the identifier is an ISBN, we will not use an equivalency. + [ + "i1", + (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), + "080442957X", + ], + [ + "i2", + (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), + "9788175257665", + ], + ["i1", (("i1", "i2", 200),), "080442957X"], + ["i2", (("i2", "i1", 200),), "9788175257665"], + # If identifier is not an ISBN, but has an equivalency that is, use the strongest match. + [ + "g2", + (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), + "080442957X", + ], + [ + "g2", + (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), + "9788175257665", + ], + # If we don't find an equivalent ISBN identifier, then we should get None. + ["g2", (), None], + ["g1", (("g1", "g2", 1),), None], + # If identifier is None, expect default value. + [None, (), None], + ], + ) + def test_isbn_for_identifier( + self, + db: DatabaseTransactionFixture, + id_key: str | None, + equivalents: tuple[tuple[str, str, int | float]], + expected_isbn: str, + ): + ids: dict[str, Identifier] = { + "i1": db.identifier( + identifier_type=Identifier.ISBN, foreign_id="080442957X" + ), + "i2": db.identifier( + identifier_type=Identifier.ISBN, foreign_id="9788175257665" + ), + "g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID), + "g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID), + } + equivalencies = [ + Equivalency( + input_id=ids[equivalent[0]].id, + output_id=ids[equivalent[1]].id, + strength=equivalent[2], + ) + for equivalent in equivalents + ] + test_identifier: Identifier | None = ids[id_key] if id_key is not None else None + if test_identifier is not None: + test_identifier.equivalencies = equivalencies + + # We're using the RecursiveEquivalencyCache, so must refresh it. + EquivalentIdentifiersCoverageProvider(db.session).run() + + # Act + result = isbn_for_identifier(test_identifier) + # Assert + assert result == expected_isbn + class TestProQuestIdentifierParser: @pytest.mark.parametrize( diff --git a/tests/manager/sqlalchemy/model/test_time_tracking.py b/tests/manager/sqlalchemy/model/test_time_tracking.py index 3703eeaa3..613df774c 100644 --- a/tests/manager/sqlalchemy/model/test_time_tracking.py +++ b/tests/manager/sqlalchemy/model/test_time_tracking.py @@ -3,14 +3,9 @@ import pytest from sqlalchemy.exc import IntegrityError -from palace.manager.core.equivalents_coverage import ( - EquivalentIdentifiersCoverageProvider, -) -from palace.manager.sqlalchemy.model.identifier import Equivalency, Identifier from palace.manager.sqlalchemy.model.time_tracking import ( PlaytimeEntry, PlaytimeSummary, - _isbn_for_identifier, _title_for_identifier, ) from palace.manager.sqlalchemy.util import create @@ -239,77 +234,6 @@ def test_cascades(self, db: DatabaseTransactionFixture): class TestHelpers: - @pytest.mark.parametrize( - "id_key, equivalents, expected_isbn", - [ - # If the identifier is an ISBN, we will not use an equivalency. - [ - "i1", - (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), - "080442957X", - ], - [ - "i2", - (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), - "9788175257665", - ], - ["i1", (("i1", "i2", 200),), "080442957X"], - ["i2", (("i2", "i1", 200),), "9788175257665"], - # If identifier is not an ISBN, but has an equivalency that is, use the strongest match. - [ - "g2", - (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), - "080442957X", - ], - [ - "g2", - (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), - "9788175257665", - ], - # If we don't find an equivalent ISBN identifier, then we should get None. - ["g2", (), None], - ["g1", (("g1", "g2", 1),), None], - # If identifier is None, expect default value. - [None, (), None], - ], - ) - def test__isbn_for_identifier( - self, - db: DatabaseTransactionFixture, - id_key: str | None, - equivalents: tuple[tuple[str, str, int | float]], - expected_isbn: str, - ): - ids: dict[str, Identifier] = { - "i1": db.identifier( - identifier_type=Identifier.ISBN, foreign_id="080442957X" - ), - "i2": db.identifier( - identifier_type=Identifier.ISBN, foreign_id="9788175257665" - ), - "g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID), - "g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID), - } - equivalencies = [ - Equivalency( - input_id=ids[equivalent[0]].id, - output_id=ids[equivalent[1]].id, - strength=equivalent[2], - ) - for equivalent in equivalents - ] - test_identifier: Identifier | None = ids[id_key] if id_key is not None else None - if test_identifier is not None: - test_identifier.equivalencies = equivalencies - - # We're using the RecursiveEquivalencyCache, so must refresh it. - EquivalentIdentifiersCoverageProvider(db.session).run() - - # Act - result = _isbn_for_identifier(test_identifier) - # Assert - assert result == expected_isbn - def test__title_for_identifier_multiple_editions( self, db: DatabaseTransactionFixture,