Skip to content

Commit

Permalink
Remove unused functions from collection model (PP-503) (#1482)
Browse files Browse the repository at this point in the history
This is a follow up to #1481. Remove a number of functions that are not used on collections. It looks like they existed to support the metadata wrangler at some point.
  • Loading branch information
jonathangreen authored Oct 26, 2023
1 parent 75085ad commit d784b79
Show file tree
Hide file tree
Showing 7 changed files with 2 additions and 637 deletions.
11 changes: 0 additions & 11 deletions bin/informational/list_collection_metadata_identifiers

This file was deleted.

19 changes: 0 additions & 19 deletions core/coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,25 +1387,6 @@ def run(self, _db, **kwargs):
provider.finalize_timestampdata(self.progress)


class CatalogCoverageProvider(CollectionCoverageProvider):
"""Most CollectionCoverageProviders provide coverage to Identifiers
that are licensed through a given Collection.
A CatalogCoverageProvider provides coverage to Identifiers that
are present in a given Collection's catalog.
"""

def items_that_need_coverage(self, identifiers=None, **kwargs):
"""Find all Identifiers in this Collection's catalog but lacking
coverage through this CoverageProvider.
"""
qu = super(CollectionCoverageProvider, self).items_that_need_coverage(
identifiers, **kwargs
)
qu = qu.join(Identifier.collections).filter(Collection.id == self.collection_id)
return qu


class BibliographicCoverageProvider(CollectionCoverageProvider):
"""Fill in bibliographic metadata for all books in a Collection.
Expand Down
202 changes: 2 additions & 200 deletions core/model/collection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import datetime
from abc import ABCMeta, abstractmethod
from typing import TYPE_CHECKING, Any, Generator, List, Optional, Tuple, TypeVar

Expand All @@ -13,15 +12,14 @@
Unicode,
UniqueConstraint,
exists,
func,
)
from sqlalchemy.orm import Mapped, Query, backref, joinedload, mapper, relationship
from sqlalchemy.orm import Mapped, Query, backref, mapper, relationship
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import and_, or_

from core.integration.goals import Goals
from core.model import Base, create, get_one, get_one_or_create
from core.model import Base, create, get_one_or_create
from core.model.configuration import ConfigurationSetting, ExternalIntegration
from core.model.constants import EditionConstants
from core.model.coverage import CoverageRecord
Expand All @@ -37,7 +35,6 @@
from core.model.library import Library
from core.model.licensing import LicensePool, LicensePoolDeliveryMechanism
from core.model.work import Work
from core.util.string_helpers import base64

if TYPE_CHECKING:
from core.external_search import ExternalSearchIndex
Expand Down Expand Up @@ -488,29 +485,6 @@ def external_integration(self) -> ExternalIntegration:
)
return self._external_integration

@property
def unique_account_id(self) -> str:
"""Identifier that uniquely represents this Collection of works"""
unique_account_id: str | None
if (
self.data_source
and self.data_source.name in self.GLOBAL_COLLECTION_DATA_SOURCES
and not self.parent
):
# Every top-level collection from this data source has the
# same catalog. Treat them all as one collection named
# after the data source.
unique_account_id = self.data_source.name
else:
unique_account_id = self.external_account_id

if not unique_account_id:
raise ValueError("Unique account identifier not set")

if self.parent:
return self.parent.unique_account_id + "+" + unique_account_id
return unique_account_id

@hybrid_property
def data_source(self) -> DataSource | None:
"""Find the data source associated with this Collection.
Expand Down Expand Up @@ -571,32 +545,6 @@ def parents(self) -> Generator[Collection, None, None]:
yield parent
yield from parent.parents

@property
def metadata_identifier(self) -> str:
"""Identifier based on collection details that uniquely represents
this Collection on the metadata wrangler. This identifier is
composed of the Collection protocol and account identifier.
A circulation manager provides a Collection's metadata
identifier as part of collection registration. The metadata
wrangler creates a corresponding Collection on its side,
*named after* the metadata identifier -- regardless of the name
of that collection on the circulation manager side.
"""
account_id = self.unique_account_id
if self.protocol == ExternalIntegration.OPDS_IMPORT:
# Remove ending / from OPDS url that could duplicate the collection
# on the Metadata Wrangler.
while account_id.endswith("/"):
account_id = account_id[:-1]

encode = base64.urlsafe_b64encode
account_id = encode(account_id)
protocol = encode(self.protocol)

metadata_identifier = protocol + ":" + account_id
return encode(metadata_identifier) # type: ignore[no-any-return]

def disassociate_library(self, library: Library) -> None:
"""Disassociate a Library from this Collection and delete any relevant
ConfigurationSettings.
Expand Down Expand Up @@ -638,67 +586,6 @@ def disassociate_library(self, library: Library) -> None:

self.libraries.remove(library)

@classmethod
def _decode_metadata_identifier(cls, metadata_identifier: str) -> Tuple[str, str]:
"""Invert the metadata_identifier property."""
if not metadata_identifier:
raise ValueError("No metadata identifier provided.")
try:
decode = base64.urlsafe_b64decode
details = decode(metadata_identifier)
encoded_details = details.split(":", 1)
[protocol, account_id] = [decode(d) for d in encoded_details]
except (TypeError, ValueError) as e:
raise ValueError(
"Metadata identifier '%s' is invalid: %s"
% (metadata_identifier, str(e))
)
return protocol, account_id

@classmethod
def from_metadata_identifier(
cls,
_db: Session,
metadata_identifier: str,
data_source: DataSource | str | None = None,
) -> Tuple[Collection, bool]:
"""Finds or creates a Collection on the metadata wrangler, based
on its unique metadata_identifier.
"""

# Decode the metadata identifier into a protocol and an
# account ID. If the metadata identifier is invalid, this
# will raise an exception.
protocol, account_id = cls._decode_metadata_identifier(metadata_identifier)

# Now that we know the metadata identifier is valid, try to
# look up a collection named after it.
collection = get_one(_db, Collection, name=metadata_identifier)
is_new = False

if not collection:
# Create a collection named after the metadata
# identifier. Give it an ExternalIntegration with the
# corresponding protocol, and set its data source and
# external_account_id.
new_collection, is_new = create(_db, Collection, name=metadata_identifier)
new_collection.create_external_integration(protocol)
new_collection.create_integration_configuration(protocol)
collection = new_collection

if protocol == ExternalIntegration.OPDS_IMPORT:
# For OPDS Import collections only, we store the URL to
# the OPDS feed (the "account ID") and the data source.
collection.external_account_id = account_id
if isinstance(data_source, DataSource):
collection.data_source = data_source
elif data_source is not None:
collection.data_source = DataSource.lookup(
_db, data_source, autocreate=True
)

return collection, is_new

@property
def pools_with_no_delivery_mechanisms(self) -> Query[LicensePool]:
"""Find all LicensePools in this Collection that have no delivery
Expand Down Expand Up @@ -739,91 +626,6 @@ def explain(self, include_secrets: bool = False) -> List[str]:
lines.append(f'Setting "{name}": "{value}"')
return lines

def catalog_identifier(self, identifier: Identifier) -> None:
"""Inserts an identifier into a catalog"""
self.catalog_identifiers([identifier])

def catalog_identifiers(self, identifiers: List[Identifier]) -> None:
"""Inserts identifiers into the catalog"""
if not identifiers:
# Nothing to do.
return

_db = Session.object_session(identifiers[0])
already_in_catalog = (
_db.query(Identifier)
.join(CollectionIdentifier)
.filter(CollectionIdentifier.collection_id == self.id) # type: ignore[attr-defined]
.filter(Identifier.id.in_([x.id for x in identifiers]))
.all()
)

new_catalog_entries = [
dict(collection_id=self.id, identifier_id=identifier.id)
for identifier in identifiers
if identifier not in already_in_catalog
]
_db.bulk_insert_mappings(CollectionIdentifier, new_catalog_entries)
_db.commit()

def unresolved_catalog(
self, _db: Session, data_source_name: str, operation: str
) -> Query[Identifier]:
"""Returns a query with all identifiers in a Collection's catalog that
have unsuccessfully attempted resolution. This method is used on the
metadata wrangler.
:return: a sqlalchemy.Query
"""
coverage_source = DataSource.lookup(_db, data_source_name)
is_not_resolved = and_(
CoverageRecord.operation == operation,
CoverageRecord.data_source_id == coverage_source.id,
CoverageRecord.status != CoverageRecord.SUCCESS,
)

query = (
_db.query(Identifier)
.outerjoin(Identifier.licensed_through)
.outerjoin(Identifier.coverage_records)
.outerjoin(LicensePool.work)
.outerjoin(Identifier.collections) # type: ignore[attr-defined]
.filter(Collection.id == self.id, is_not_resolved, Work.id == None)
.order_by(Identifier.id)
)

return query

def isbns_updated_since(
self, _db: Session, timestamp: datetime.datetime | None
) -> Query[Identifier]:
"""Finds all ISBNs in a collection's catalog that have been updated
since the timestamp but don't have a Work to show for it. Used in
the metadata wrangler.
:return: a Query
"""
isbns = (
_db.query(Identifier, func.max(CoverageRecord.timestamp).label("latest"))
.join(Identifier.collections) # type: ignore[attr-defined]
.join(Identifier.coverage_records)
.outerjoin(Identifier.licensed_through)
.group_by(Identifier.id)
.order_by("latest")
.filter(
Collection.id == self.id,
LicensePool.work_id == None,
CoverageRecord.status == CoverageRecord.SUCCESS,
)
.enable_eagerloads(False)
.options(joinedload(Identifier.coverage_records))
)

if timestamp:
isbns = isbns.filter(CoverageRecord.timestamp > timestamp)

return isbns

@classmethod
def restrict_to_ready_deliverable_works(
cls,
Expand Down
49 changes: 0 additions & 49 deletions core/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2572,55 +2572,6 @@ def explain_collection(self, collection):
)


class ListCollectionMetadataIdentifiersScript(CollectionInputScript):
"""List the metadata identifiers for Collections in the database.
This script is helpful for accounting for and tracking collections on
the metadata wrangler.
"""

def __init__(self, _db=None, output=None):
_db = _db or self._db
super().__init__(_db)
self.output = output or sys.stdout

def run(self, cmd_args=None):
parsed = self.parse_command_line(self._db, cmd_args=cmd_args)
self.do_run(parsed.collections)

def do_run(self, collections=None):
collection_ids = list()
if collections:
collection_ids = [c.id for c in collections]

collections = self._db.query(Collection).order_by(Collection.id)
if collection_ids:
collections = collections.filter(Collection.id.in_(collection_ids))

self.output.write("COLLECTIONS\n")
self.output.write("=" * 50 + "\n")

def add_line(id, name, protocol, metadata_identifier):
line = f"({id}) {name}/{protocol} => {metadata_identifier}\n"
self.output.write(line)

count = 0
for collection in collections:
if not count:
# Add a format line.
add_line("id", "name", "protocol", "metadata_identifier")

count += 1
add_line(
str(collection.id),
collection.name,
collection.protocol,
collection.metadata_identifier,
)

self.output.write("\n%d collections found.\n" % count)


class UpdateLaneSizeScript(LaneSweeperScript):
def __init__(self, _db=None, *args, **kwargs):
super().__init__(_db, *args, **kwargs)
Expand Down
Loading

0 comments on commit d784b79

Please sign in to comment.