Skip to content

Commit

Permalink
Code review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathangreen committed Oct 31, 2024
1 parent c13d35e commit afaaf51
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 72 deletions.
7 changes: 6 additions & 1 deletion src/palace/manager/celery/tasks/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from palace.manager.service.redis.models.lock import RedisLock
from palace.manager.service.redis.redis import Redis
from palace.manager.sqlalchemy.model.collection import Collection
from palace.manager.sqlalchemy.model.identifier import (
Identifier,
RecursiveEquivalencyCache,
)
from palace.manager.sqlalchemy.model.marcfile import MarcFile
from palace.manager.sqlalchemy.util import create
from palace.manager.util.datetime_helpers import utc_now
Expand Down Expand Up @@ -190,9 +194,10 @@ def marc_export_collection(
]

# Find ISBN for any work that needs it
isbns = MarcExporter.query_isbn_identifiers(
isbns = RecursiveEquivalencyCache.equivalent_identifiers(
session,
{pool.identifier for work, pool in works_with_pools},
Identifier.ISBN,
)

for work, pool in works_with_pools:
Expand Down
2 changes: 1 addition & 1 deletion src/palace/manager/marc/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def query_works(
.order_by(Work.id.asc())
.options(
# We set loader options on all the collection properties
# needed to generate the MARC records, so that don't end
# needed to generate the MARC records, so that we don't end
# up doing queries for each work.
selectinload(Work.license_pools).options(
selectinload(LicensePool.identifier),
Expand Down
53 changes: 52 additions & 1 deletion src/palace/manager/sqlalchemy/model/identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
func,
)
from sqlalchemy.exc import MultipleResultsFound, NoResultFound
from sqlalchemy.orm import Mapped, joinedload, relationship
from sqlalchemy.orm import Mapped, joinedload, relationship, selectinload
from sqlalchemy.orm.session import Session
from sqlalchemy.sql import select
from sqlalchemy.sql.expression import and_, or_
Expand Down Expand Up @@ -1216,3 +1216,54 @@ class RecursiveEquivalencyCache(Base):
is_parent = Column(Boolean, Computed("parent_identifier_id = identifier_id"))

__table_args__ = (UniqueConstraint(parent_identifier_id, identifier_id),)

@staticmethod
def equivalent_identifiers(
session: Session, identifiers: set[Identifier], type: str | None = None
) -> dict[Identifier, Identifier]:
"""
Find all equivalent identifiers for the given Identifiers.
:param session: DB Session
:param identifiers: Set of Identifiers that we need equivalencies for
:param type: An optional type, if given only equivalent identifiers
of this type will be returned.
:return: A dictionary mapping input identifiers to equivalent identifiers.
"""

# Find identifiers that don't need to be looked up
results = (
{i: i for i in identifiers if i.type == type} if type is not None else {}
)
needs_lookup = {i.id: i for i in identifiers - results.keys()}
if not needs_lookup:
return results

query = (
select(RecursiveEquivalencyCache)
.join(
Identifier,
RecursiveEquivalencyCache.identifier_id == Identifier.id,
)
.where(
RecursiveEquivalencyCache.parent_identifier_id.in_(needs_lookup.keys()),
)
.order_by(
RecursiveEquivalencyCache.parent_identifier_id,
RecursiveEquivalencyCache.is_parent.desc(),
RecursiveEquivalencyCache.identifier_id.desc(),
)
.options(
selectinload(RecursiveEquivalencyCache.identifier),
)
)
if type is not None:
query = query.where(Identifier.type == Identifier.ISBN)

equivalents = session.execute(query).scalars().all()

for equivalent in equivalents:
parent_identifier = needs_lookup[equivalent.parent_identifier_id]
results[parent_identifier] = equivalent.identifier

return results
4 changes: 4 additions & 0 deletions tests/fixtures/marc.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ def work(self, collection: Collection | None = None) -> Work:
edition = self._db.edition()
self._db.licensepool(edition, collection=collection)
work = self._db.work(presentation_edition=edition)
# We set the works last updated time to 1 day ago, so we know this work
# will only be included in delta exports covering a time range before
# 1 day ago. This lets us easily test works being included / excluded
# based on their `last_update_time`.
work.last_update_time = utc_now() - datetime.timedelta(days=1)
return work

Expand Down
68 changes: 0 additions & 68 deletions tests/manager/marc/test_exporter.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
import datetime
from functools import partial
from unittest.mock import MagicMock

import pytest
from freezegun import freeze_time
from sqlalchemy.exc import InvalidRequestError

from palace.manager.core.equivalents_coverage import (
EquivalentIdentifiersCoverageProvider,
)
from palace.manager.marc.exporter import LibraryInfo, MarcExporter
from palace.manager.marc.settings import MarcExporterLibrarySettings
from palace.manager.sqlalchemy.model.discovery_service_registration import (
DiscoveryServiceRegistration,
)
from palace.manager.sqlalchemy.model.identifier import Equivalency, Identifier
from palace.manager.sqlalchemy.model.marcfile import MarcFile
from palace.manager.sqlalchemy.util import create
from palace.manager.util.datetime_helpers import datetime_utc, utc_now
Expand Down Expand Up @@ -305,69 +300,6 @@ def test_query_works(self, marc_exporter_fixture: MarcExporterFixture) -> None:

assert query_works(work_id_offset=works[3].id) == works[4:]

def test_query_isbn_identifiers(self, db: DatabaseTransactionFixture) -> None:
# This is already an isbn so no lookup necessary
isbn_identifier = db.identifier(identifier_type=Identifier.ISBN)

# Overdrive identifier with two ISBN associated
overdrive_identifier = db.identifier(identifier_type=Identifier.OVERDRIVE_ID)
od_isbn_1 = db.identifier(identifier_type=Identifier.ISBN)
od_isbn_2 = db.identifier(identifier_type=Identifier.ISBN)
create(
db.session,
Equivalency,
input_id=overdrive_identifier.id,
output_id=od_isbn_1.id,
strength=5,
)
create(
db.session,
Equivalency,
input_id=overdrive_identifier.id,
output_id=od_isbn_2.id,
strength=1,
)

# Gutenberg ID with one associated ISBN
gutenberg_identifier = db.identifier(identifier_type=Identifier.GUTENBERG_ID)
gutenberg_isbn = db.identifier(identifier_type=Identifier.ISBN)
create(
db.session,
Equivalency,
input_id=gutenberg_identifier.id,
output_id=gutenberg_isbn.id,
strength=5,
)

# Proquest ID with no associated ISBN
proquest_identfier = db.identifier(identifier_type=Identifier.PROQUEST_ID)

# We're using the RecursiveEquivalencyCache, so must refresh it.
EquivalentIdentifiersCoverageProvider(db.session).run()

# Calling with only ISBN doesn't do a query, it just returns the identifiers
assert MarcExporter.query_isbn_identifiers(
MagicMock(side_effect=Exception("Should not be called")),
{
isbn_identifier,
},
) == {isbn_identifier: isbn_identifier}

equivalent_isbns = MarcExporter.query_isbn_identifiers(
db.session,
{
isbn_identifier,
overdrive_identifier,
gutenberg_identifier,
proquest_identfier,
},
)
assert equivalent_isbns == {
isbn_identifier: isbn_identifier,
overdrive_identifier: od_isbn_1,
gutenberg_identifier: gutenberg_isbn,
}

def test_collection(self, marc_exporter_fixture: MarcExporterFixture) -> None:
collection_id = marc_exporter_fixture.collection1.id
assert collection_id is not None
Expand Down
97 changes: 96 additions & 1 deletion tests/manager/sqlalchemy/model/test_identifier.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
import datetime
from unittest.mock import PropertyMock, create_autospec
from unittest.mock import MagicMock, PropertyMock, create_autospec

import pytest

from palace.manager.core.equivalents_coverage import (
EquivalentIdentifiersCoverageProvider,
)
from palace.manager.sqlalchemy.constants import MediaTypes
from palace.manager.sqlalchemy.model.datasource import DataSource
from palace.manager.sqlalchemy.model.edition import Edition
from palace.manager.sqlalchemy.model.identifier import (
Equivalency,
Identifier,
ProQuestIdentifierParser,
RecursiveEquivalencyCache,
)
from palace.manager.sqlalchemy.model.resource import Hyperlink
from palace.manager.sqlalchemy.presentation import PresentationCalculationPolicy
from palace.manager.sqlalchemy.util import create
from tests.fixtures.database import DatabaseTransactionFixture
from tests.manager.sqlalchemy.model.test_coverage import (
ExampleEquivalencyCoverageRecordFixture,
Expand Down Expand Up @@ -765,6 +770,96 @@ def test_identifier_delete_cascade_parent(
all_recursives = session.query(RecursiveEquivalencyCache).all()
assert len(all_recursives) == 3

def test_equivalent_identifiers(self, db: DatabaseTransactionFixture) -> None:
# This is already an isbn so no lookup necessary
isbn_identifier = db.identifier(identifier_type=Identifier.ISBN)

# Overdrive identifier with two ISBN associated
overdrive_identifier = db.identifier(identifier_type=Identifier.OVERDRIVE_ID)
od_isbn_1 = db.identifier(identifier_type=Identifier.ISBN)
od_isbn_2 = db.identifier(identifier_type=Identifier.ISBN)
create(
db.session,
Equivalency,
input_id=overdrive_identifier.id,
output_id=od_isbn_1.id,
strength=5,
)
create(
db.session,
Equivalency,
input_id=overdrive_identifier.id,
output_id=od_isbn_2.id,
strength=1,
)

# Gutenberg ID with one associated ISBN
gutenberg_identifier = db.identifier(identifier_type=Identifier.GUTENBERG_ID)
gutenberg_isbn = db.identifier(identifier_type=Identifier.ISBN)
create(
db.session,
Equivalency,
input_id=gutenberg_identifier.id,
output_id=gutenberg_isbn.id,
strength=5,
)

# Proquest ID with no associated ISBN but an associated GUTENBERG_ID
proquest_identfier = db.identifier(identifier_type=Identifier.PROQUEST_ID)
proquest_gutenberg = db.identifier(identifier_type=Identifier.GUTENBERG_ID)
create(
db.session,
Equivalency,
input_id=proquest_identfier.id,
output_id=proquest_gutenberg.id,
strength=2,
)

# We're using the RecursiveEquivalencyCache, so must refresh it.
EquivalentIdentifiersCoverageProvider(db.session).run()

# Calling with only identifiers of the specified type doesn't do a query,
# it just returns the identifiers
assert RecursiveEquivalencyCache.equivalent_identifiers(
MagicMock(side_effect=Exception("Should not be called")),
{
isbn_identifier,
},
type=Identifier.ISBN,
) == {isbn_identifier: isbn_identifier}

equivalent_isbns = RecursiveEquivalencyCache.equivalent_identifiers(
db.session,
{
isbn_identifier,
overdrive_identifier,
gutenberg_identifier,
proquest_identfier,
},
type=Identifier.ISBN,
)
assert equivalent_isbns == {
isbn_identifier: isbn_identifier,
overdrive_identifier: od_isbn_1,
gutenberg_identifier: gutenberg_isbn,
}

equivalents = RecursiveEquivalencyCache.equivalent_identifiers(
db.session,
{
isbn_identifier,
overdrive_identifier,
gutenberg_identifier,
proquest_identfier,
},
)
assert equivalents == {
isbn_identifier: isbn_identifier,
overdrive_identifier: od_isbn_1,
gutenberg_identifier: gutenberg_isbn,
proquest_identfier: proquest_gutenberg,
}


class TestProQuestIdentifierParser:
@pytest.mark.parametrize(
Expand Down

0 comments on commit afaaf51

Please sign in to comment.