Skip to content

Commit

Permalink
Refactor OPDS classes to use HTTP.get_with_timeout (PP-1485) (#1956)
Browse files Browse the repository at this point in the history
* Refactor OPDS classes to use HTTP.get_with_timeout functions instead of resource function.
  • Loading branch information
jonathangreen authored Jul 31, 2024
1 parent ebdeeeb commit d7ab9fb
Show file tree
Hide file tree
Showing 14 changed files with 160 additions and 174 deletions.
21 changes: 11 additions & 10 deletions src/palace/manager/api/odl/importer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

import datetime
import json
from collections.abc import Callable
from typing import TYPE_CHECKING, Any

import dateutil
from requests import Response
from sqlalchemy.orm import Session
from webpub_manifest_parser.odl import ODLFeedParserFactory
from webpub_manifest_parser.opds2.registry import OPDS2LinkRelationsRegistry
Expand All @@ -27,9 +27,9 @@
LicenseStatus,
RightsStatus,
)
from palace.manager.sqlalchemy.model.resource import HttpResponseTuple
from palace.manager.util import first_or_default
from palace.manager.util.datetime_helpers import to_utc
from palace.manager.util.http import HTTP

if TYPE_CHECKING:
from webpub_manifest_parser.core.ast import Metadata
Expand Down Expand Up @@ -63,7 +63,7 @@ def __init__(
collection: Collection,
parser: RWPMManifestParser | None = None,
data_source_name: str | None = None,
http_get: Callable[..., HttpResponseTuple] | None = None,
http_get: Callable[..., Response] | None = None,
):
"""Initialize a new instance of OPDS2WithODLImporter class.
Expand All @@ -90,9 +90,10 @@ def __init__(
collection,
parser if parser else RWPMManifestParser(ODLFeedParserFactory()),
data_source_name,
http_get,
)

self.http_get = http_get or HTTP.get_with_timeout

def _extract_publication_metadata(
self,
feed: OPDS2Feed,
Expand Down Expand Up @@ -229,16 +230,16 @@ def _extract_publication_metadata(

@classmethod
def fetch_license_info(
cls, document_link: str, do_get: Callable[..., HttpResponseTuple]
cls, document_link: str, do_get: Callable[..., Response]
) -> dict[str, Any] | None:
status_code, _, response = do_get(document_link, headers={})
if status_code in (200, 201):
license_info_document = json.loads(response)
resp = do_get(document_link, headers={})
if resp.status_code in (200, 201):
license_info_document = resp.json()
return license_info_document # type: ignore[no-any-return]
else:
cls.logger().warning(
f"License Info Document is not available. "
f"Status link {document_link} failed with {status_code} code."
f"Status link {document_link} failed with {resp.status_code} code."
)
return None

Expand Down Expand Up @@ -337,7 +338,7 @@ def get_license_data(
feed_license_identifier: str | None,
feed_license_expires: datetime.datetime | None,
feed_concurrency: int | None,
do_get: Callable[..., HttpResponseTuple],
do_get: Callable[..., Response],
) -> LicenseData | None:
license_info_document = cls.fetch_license_info(license_info_link, do_get)

Expand Down
4 changes: 2 additions & 2 deletions src/palace/manager/api/opds_for_distributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
RightsStatus,
)
from palace.manager.sqlalchemy.model.patron import Loan, Patron
from palace.manager.sqlalchemy.model.resource import HttpResponseTuple, Hyperlink
from palace.manager.sqlalchemy.model.resource import Hyperlink
from palace.manager.sqlalchemy.util import get_one
from palace.manager.util import base64
from palace.manager.util.datetime_helpers import utc_now
Expand Down Expand Up @@ -446,7 +446,7 @@ def __init__(

self.api = OPDSForDistributorsAPI(_db, collection)

def _get(self, url: str, headers: Mapping[str, str]) -> HttpResponseTuple:
def _get(self, url: str, headers: Mapping[str, str]) -> Response:
"""Make a normal HTTP request for an OPDS feed, but add in an
auth header with the credentials for the collection.
"""
Expand Down
11 changes: 3 additions & 8 deletions src/palace/manager/core/opds2_import.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
from collections.abc import Callable, Iterable, Mapping
from collections.abc import Iterable, Mapping
from datetime import datetime
from io import BytesIO, StringIO
from typing import TYPE_CHECKING, Any
Expand Down Expand Up @@ -63,11 +63,7 @@
RightsStatus,
)
from palace.manager.sqlalchemy.model.patron import Patron
from palace.manager.sqlalchemy.model.resource import (
HttpResponseTuple,
Hyperlink,
Representation,
)
from palace.manager.sqlalchemy.model.resource import Hyperlink, Representation
from palace.manager.util.datetime_helpers import utc_now
from palace.manager.util.http import HTTP, BadResponseException
from palace.manager.util.opds_writer import OPDSFeed
Expand Down Expand Up @@ -282,7 +278,6 @@ def __init__(
collection: Collection,
parser: RWPMManifestParser,
data_source_name: str | None = None,
http_get: Callable[..., HttpResponseTuple] | None = None,
):
"""Initialize a new instance of OPDS2Importer class.
Expand All @@ -298,7 +293,7 @@ def __init__(
NOTE: If `collection` is provided, its .data_source will take precedence over any value provided here.
This is only for use when you are importing OPDS metadata without any particular Collection in mind.
"""
super().__init__(db, collection, data_source_name, http_get)
super().__init__(db, collection, data_source_name)
self._parser = parser
self.ignored_identifier_types = self.settings.ignored_identifier_types

Expand Down
45 changes: 12 additions & 33 deletions src/palace/manager/core/opds_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from flask_babel import lazy_gettext as _
from lxml import etree
from pydantic import AnyHttpUrl
from requests import Response
from sqlalchemy.orm.session import Session

from palace.manager.api.circulation import (
Expand Down Expand Up @@ -75,11 +76,7 @@
)
from palace.manager.sqlalchemy.model.measurement import Measurement
from palace.manager.sqlalchemy.model.patron import Patron
from palace.manager.sqlalchemy.model.resource import (
HttpResponseTuple,
Hyperlink,
Representation,
)
from palace.manager.sqlalchemy.model.resource import Hyperlink
from palace.manager.sqlalchemy.util import get_one
from palace.manager.util import base64
from palace.manager.util.datetime_helpers import datetime_utc, to_utc, utc_now
Expand Down Expand Up @@ -382,7 +379,6 @@ def __init__(
_db: Session,
collection: Collection,
data_source_name: str | None,
http_get: Callable[..., HttpResponseTuple] | None = None,
):
self._db = _db
if collection.id is None:
Expand All @@ -401,11 +397,6 @@ def __init__(
"Cannot perform an OPDS import on a Collection that has no associated DataSource!"
)
self.data_source_name = data_source_name

# In general, we are cautious when mirroring resources so that
# we don't, e.g. accidentally get our IP banned from
# gutenberg.org.
self.http_get = http_get or Representation.cautious_http_get
self.settings = integration_settings_load(
self.settings_class(), collection.integration_configuration
)
Expand Down Expand Up @@ -690,7 +681,6 @@ def __init__(
_db: Session,
collection: Collection,
data_source_name: str | None = None,
http_get: Callable[..., HttpResponseTuple] | None = None,
):
""":param collection: LicensePools created by this OPDS import
will be associated with the given Collection. If this is None,
Expand All @@ -703,19 +693,11 @@ def __init__(
.data_source will take precedence over any value provided
here. This is only for use when you are importing OPDS
metadata without any particular Collection in mind.
:param http_get: Use this method to make an HTTP GET request. This
can be replaced with a stub method for testing purposes.
"""
super().__init__(_db, collection, data_source_name)

self.primary_identifier_source = self.settings.primary_identifier_source

# In general, we are cautious when mirroring resources so that
# we don't, e.g. accidentally get our IP banned from
# gutenberg.org.
self.http_get = http_get or Representation.cautious_http_get

def extract_next_links(self, feed: str | bytes | FeedParserDict) -> list[str]:
if isinstance(feed, (bytes, str)):
parsed = feedparser.parse(feed)
Expand Down Expand Up @@ -754,7 +736,7 @@ def extract_feed_data(
)
# gets: medium, measurements, links, contributors, etc.
xml_data_meta, xml_failures = self.extract_metadata_from_elementtree(
feed, data_source=data_source, feed_url=feed_url, do_get=self.http_get
feed, data_source=data_source, feed_url=feed_url
)

# translate the id in failures to identifier.urn
Expand Down Expand Up @@ -979,7 +961,6 @@ def extract_metadata_from_elementtree(
feed: bytes | str,
data_source: DataSource,
feed_url: str | None = None,
do_get: Callable[..., HttpResponseTuple] | None = None,
) -> tuple[dict[str, Any], dict[str, CoverageFailure]]:
"""Parse the OPDS as XML and extract all author and subject
information, as well as ratings and medium.
Expand Down Expand Up @@ -1027,7 +1008,7 @@ def extract_metadata_from_elementtree(
# Then turn Atom <entry> tags into Metadata objects.
for entry in parser._xpath(root, "/atom:feed/atom:entry"):
identifier, detail, failure_entry = cls.detail_for_elementtree_entry(
parser, entry, data_source, feed_url, do_get=do_get
parser, entry, data_source, feed_url
)
if identifier:
if failure_entry:
Expand Down Expand Up @@ -1309,7 +1290,6 @@ def detail_for_elementtree_entry(
entry_tag: Element,
data_source: DataSource,
feed_url: str | None = None,
do_get: Callable[..., HttpResponseTuple] | None = None,
) -> tuple[str | None, dict[str, Any] | None, CoverageFailure | None]:
"""Turn an <atom:entry> tag into a dictionary of metadata that can be
used as keyword arguments to the Metadata contructor.
Expand All @@ -1324,9 +1304,7 @@ def detail_for_elementtree_entry(
identifier = identifier.text

try:
data = cls._detail_for_elementtree_entry(
parser, entry_tag, feed_url, do_get=do_get
)
data = cls._detail_for_elementtree_entry(parser, entry_tag, feed_url)
return identifier, data, None

except Exception as e:
Expand All @@ -1343,7 +1321,6 @@ def _detail_for_elementtree_entry(
parser: OPDSXMLParser,
entry_tag: Element,
feed_url: str | None = None,
do_get: Callable[..., HttpResponseTuple] | None = None,
) -> dict[str, Any]:
"""Helper method that extracts metadata and circulation data from an elementtree
entry. This method can be overridden in tests to check that callers handle things
Expand Down Expand Up @@ -1734,7 +1711,7 @@ def __init__(
self._feed_base_url = f"{parsed_url.scheme}://{parsed_url.hostname}{(':' + str(parsed_url.port)) if parsed_url.port else ''}/"
super().__init__(_db, collection)

def _get(self, url: str, headers: Mapping[str, str]) -> HttpResponseTuple:
def _get(self, url: str, headers: Mapping[str, str]) -> Response:
"""Make the sort of HTTP request that's normal for an OPDS feed.
Long timeout, raise error on anything but 2xx or 3xx.
Expand All @@ -1748,8 +1725,7 @@ def _get(self, url: str, headers: Mapping[str, str]) -> HttpResponseTuple:
)
if not url.startswith("http"):
url = urljoin(self._feed_base_url, url)
response = HTTP.get_with_timeout(url, headers=headers, **kwargs)
return response.status_code, response.headers, response.content
return HTTP.get_with_timeout(url, headers=headers, **kwargs)

def _get_accept_header(self) -> str:
return ",".join(
Expand Down Expand Up @@ -1892,7 +1868,7 @@ def _verify_media_type(
raise BadResponseException(url, message=message, status_code=status_code)

def follow_one_link(
self, url: str, do_get: Callable[..., HttpResponseTuple] | None = None
self, url: str, do_get: Callable[..., Response] | None = None
) -> tuple[list[str], bytes | None]:
"""Download a representation of a URL and extract the useful
information.
Expand All @@ -1903,7 +1879,10 @@ def follow_one_link(
"""
self.log.info("Following next link: %s", url)
get = do_get or self._get
status_code, headers, feed = get(url, {})
resp = get(url, {})
feed = resp.content
status_code = resp.status_code
headers = resp.headers

self._verify_media_type(url, status_code, headers, feed)

Expand Down
7 changes: 4 additions & 3 deletions tests/fixtures/odl.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pytest
from jinja2 import Template
from requests import Response

from palace.manager.api.circulation import LoanInfo
from palace.manager.api.odl.api import OPDS2WithODLApi
Expand All @@ -23,10 +24,10 @@
LicensePoolDeliveryMechanism,
)
from palace.manager.sqlalchemy.model.patron import Loan, Patron
from palace.manager.sqlalchemy.model.resource import HttpResponseTuple
from palace.manager.sqlalchemy.model.work import Work
from tests.fixtures.database import DatabaseTransactionFixture
from tests.fixtures.files import FilesFixture, OPDS2WithODLFilesFixture
from tests.mocks.mock import MockRequestsResponse
from tests.mocks.odl import MockOPDS2WithODLApi


Expand Down Expand Up @@ -264,8 +265,8 @@ def __init__(
http_get=self.get_response,
)

def get_response(self, *args: Any, **kwargs: Any) -> HttpResponseTuple:
return 200, {}, self.responses.pop(0)
def get_response(self, *args: Any, **kwargs: Any) -> Response:
return MockRequestsResponse(200, content=self.responses.pop(0))

def queue_response(self, item: LicenseInfoHelper | str | bytes) -> None:
if isinstance(item, LicenseInfoHelper):
Expand Down
6 changes: 3 additions & 3 deletions tests/manager/api/controller/test_loan.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
from tests.fixtures.redis import RedisFixture
from tests.fixtures.services import ServicesFixture
from tests.mocks.circulation import MockPatronActivityCirculationAPI
from tests.mocks.mock import DummyHTTPClient
from tests.mocks.mock import MockRepresentationHTTPClient


class LoanFixture(CirculationControllerFixture):
Expand Down Expand Up @@ -423,7 +423,7 @@ def test_borrow_success(
# external request to obtain the book.
loan_fixture.pool.open_access = False

http = DummyHTTPClient()
http = MockRepresentationHTTPClient()

fulfillment = FulfillmentInfo(
loan_fixture.pool.collection,
Expand Down Expand Up @@ -602,7 +602,7 @@ def test_borrow_and_fulfill_with_streaming_delivery_mechanism(
assert None == loan.fulfillment

# We can still use the other mechanism too.
http = DummyHTTPClient()
http = MockRepresentationHTTPClient()
http.queue_response(200, content="I am an ACSM file")

loan_fixture.manager.d_circulation.queue_fulfill(
Expand Down
4 changes: 2 additions & 2 deletions tests/manager/api/metadata/test_novelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from palace.manager.util.http import HTTP
from tests.fixtures.database import DatabaseTransactionFixture
from tests.fixtures.files import FilesFixture
from tests.mocks.mock import DummyHTTPClient, MockRequestsResponse
from tests.mocks.mock import MockRepresentationHTTPClient, MockRequestsResponse


class NoveListFilesFixture(FilesFixture):
Expand Down Expand Up @@ -216,7 +216,7 @@ def test_get_series_information(self, novelist_fixture: NoveListFixture):

def test_lookup(self, novelist_fixture: NoveListFixture):
# Test the lookup() method.
h = DummyHTTPClient()
h = MockRepresentationHTTPClient()
h.queue_response(200, "text/html", content="yay")

novelist = novelist_fixture.novelist
Expand Down
Loading

0 comments on commit d7ab9fb

Please sign in to comment.