Add feed download command (#21)

* Add feed download commands * Update README
ThePalaceProject · Mar 8, 2024 · 51b95d9 · 51b95d9
1 parent 6c6a66d
commit 51b95d9
Show file tree

Hide file tree

Showing 8 changed files with 470 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -25,6 +25,13 @@ manifest conforming to the [Audiobook Profile](https://github.com/readium/webpub
       a local directory containing audiobook manifests and their associated media files.
     - Note: This application uses `python-vlc` which requires VLC to be installed on
       the system. The VLC installation can be found [here](https://www.videolan.org/vlc/).
+- `download-feed` - Download various feeds for local inspection.
+    - `opds2`
+        - Download an OPDS2 / OPDS2 + ODL feed.
+    - `overdrive`
+        - Download Overdrive feeds.
+    - `axis`
+        - Download B&T Axis 360 availability feed.
 
 ### Library Support
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,16 +37,19 @@ rich = "^13.7.1"
 textual = "^0.52.1"
 typer = "^0.9.0"
 typing_extensions = {version = "^4.9.0", python = "<3.11"}
+xmltodict = "^0.13.0"
 
 [tool.poetry.group.ci.dependencies]
 pre-commit = "^3.6.1"
 
 [tool.poetry.group.dev.dependencies]
 mypy = "^1.8.0"
 types-pytz = "^2024.1.0.20240203"
+types-xmltodict = "^0.13.0.3"
 
 [tool.poetry.scripts]
 audiobook-manifest-summary = "palace_tools.cli.summarize_rwpm_audio_manifest:main"
+download-feed = "palace_tools.cli.download_feed:main"
 fetch-lcp = "palace_tools.cli.fetch_lcp:main"
 palace-terminal = "palace_tools.cli.palace_terminal:main"
 patron-bookshelf = "palace_tools.cli.patron_bookshelf:main"

diff --git a/src/palace_tools/cli/download_feed.py b/src/palace_tools/cli/download_feed.py
@@ -0,0 +1,110 @@
+import asyncio
+import json
+from pathlib import Path
+from xml.dom import minidom
+
+import typer
+import xmltodict
+
+from palace_tools.feeds import axis, opds, overdrive
+from palace_tools.feeds.opds import write_json
+from palace_tools.utils.typer import run_typer_app_as_main
+
+app = typer.Typer()
+
+
+@app.command("axis")
+def download_axis(
+    username: str = typer.Option(..., "--username", "-u", help="Username"),
+    password: str = typer.Option(..., "--password", "-p", help="Password"),
+    library_id: str = typer.Option(..., "-l", "--library-id", help="Library ID"),
+    output_json: bool = typer.Option(False, "-j", "--json", help="Output JSON file"),
+    qa_endpoint: bool = typer.Option(False, "-q", "--qa", help="Use QA Endpoint"),
+    output_file: Path = typer.Argument(
+        ..., help="Output file", writable=True, file_okay=True, dir_okay=False
+    ),
+) -> None:
+    """Download B&T Axis 360 feed."""
+
+    # Find the base URL to use
+    base_url = axis.PRODUCTION_BASE_URL if not qa_endpoint else axis.QA_BASE_URL
+
+    # Fetch the document as XML
+    xml = axis.availability(base_url, username, password, library_id)
+
+    with output_file.open("w") as file:
+        if output_json:
+            xml_dict = xmltodict.parse(xml)
+            file.write(json.dumps(xml_dict, indent=4))
+        else:
+            parsed = minidom.parseString(xml)
+            file.write(parsed.toprettyxml())
+
+
+@app.command("overdrive")
+def download_overdrive(
+    client_key: str = typer.Option(..., "-k", "--client-key", help="Client Key"),
+    client_secret: str = typer.Option(
+        ..., "-s", "--client-secret", help="Client Secret"
+    ),
+    library_id: str = typer.Option(..., "-l", "--library-id", help="Library ID"),
+    parent_library_id: str = typer.Option(
+        None,
+        "-p",
+        "--parent-library-id",
+        help="Parent Library ID (for Advantage Accounts)",
+    ),
+    fetch_metadata: bool = typer.Option(
+        False, "-m", "--metadata", help="Fetch metadata"
+    ),
+    fetch_availability: bool = typer.Option(
+        False, "-a", "--availability", help="Fetch availability"
+    ),
+    qa_endpoint: bool = typer.Option(False, "-q", "--qa", help="Use QA Endpoint"),
+    connections: int = typer.Option(
+        20, "-c", "--connections", help="Number of connections to use"
+    ),
+    output_file: Path = typer.Argument(
+        ..., help="Output file", writable=True, file_okay=True, dir_okay=False
+    ),
+) -> None:
+    """Download Overdrive feed."""
+    base_url = overdrive.QA_BASE_URL if qa_endpoint else overdrive.PROD_BASE_URL
+    products = asyncio.run(
+        overdrive.fetch(
+            base_url,
+            client_key,
+            client_secret,
+            library_id,
+            parent_library_id,
+            fetch_metadata,
+            fetch_availability,
+            connections,
+        )
+    )
+
+    with output_file.open("w") as file:
+        file.write(json.dumps(products, indent=4))
+
+
+@app.command("opds2")
+def download_opds(
+    username: str = typer.Option(None, "--username", "-u", help="Username"),
+    password: str = typer.Option(None, "--password", "-p", help="Password"),
+    url: str = typer.Argument(..., help="URL of feed", metavar="URL"),
+    output_file: Path = typer.Argument(
+        ..., help="Output file", writable=True, file_okay=True, dir_okay=False
+    ),
+) -> None:
+    """Download OPDS 2 feed."""
+    publications = opds.fetch(url, username, password)
+    with output_file.open("w") as file:
+        write_json(file, publications)
+
+
+def main() -> None:
+    run_typer_app_as_main(app, prog_name="download-feed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/palace_tools/feeds/__init__.py b/src/palace_tools/feeds/__init__.py
diff --git a/src/palace_tools/feeds/axis.py b/src/palace_tools/feeds/axis.py
@@ -0,0 +1,43 @@
+import base64
+import json
+import sys
+
+import httpx
+
+PRODUCTION_BASE_URL = "https://axis360api.baker-taylor.com/Services/VendorAPI/"
+QA_BASE_URL = "https://axis360apiqa.baker-taylor.com/Services/VendorAPI/"
+
+access_token_endpoint = "accesstoken"
+availability_endpoint = "availability/v2"
+
+
+def get_headers(
+    base_url: str, username: str, password: str, library_id: str
+) -> dict[str, str]:
+    authorization_str = ":".join([username, password, library_id])
+    authorization_bytes = authorization_str.encode("utf_16_le")
+    authorization_b64 = base64.standard_b64encode(authorization_bytes)
+    resp = httpx.post(
+        base_url + access_token_endpoint,
+        headers={"Authorization": f"Basic {authorization_b64.decode('utf-8')}"},
+    )
+    if resp.status_code != 200:
+        print(f"Error: {resp.status_code}")
+        print(f"Headers: {json.dumps(dict(resp.headers), indent=4)}")
+        print(resp.text)
+        sys.exit(-1)
+    return {
+        "Authorization": "Bearer " + resp.json()["access_token"],
+        "Library": library_id,
+    }
+
+
+def availability(base_url: str, username: str, password: str, library_id: str) -> str:
+    headers = get_headers(base_url, username, password, library_id)
+    resp = httpx.get(
+        base_url + availability_endpoint,
+        headers=headers,
+        params={"updatedDate": "1970-01-01 00:00:00"},
+        timeout=30.0,
+    )
+    return resp.text
diff --git a/src/palace_tools/feeds/opds.py b/src/palace_tools/feeds/opds.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+import json
+import math
+import sys
+from typing import Any, TextIO
+
+import httpx
+from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn
+
+
+def make_request(session: httpx.Client, url: str) -> dict[str, Any]:
+    response = session.get(url)
+    if response.status_code != 200:
+        print(f"Error: {response.status_code}")
+        print(f"Headers: {json.dumps(dict(response.headers), indent=4)}")
+        print(response.text)
+        sys.exit(-1)
+    return response.json()  # type: ignore[no-any-return]
+
+
+def write_json(file: TextIO, data: list[dict[str, Any]]) -> None:
+    file.write(json.dumps(data, indent=4))
+
+
+def fetch(url: str, username: str | None, password: str | None) -> list[dict[str, Any]]:
+    # Create a session to fetch the documents
+    client = httpx.Client()
+
+    client.headers.update({"Accept": "application/opds+json", "User-Agent": "Palace"})
+    client.timeout = httpx.Timeout(30.0)
+
+    if username and password:
+        client.auth = httpx.BasicAuth(username, password)
+
+    publications = []
+
+    # Get the first page
+    response = make_request(client, url)
+    items = response.get("metadata", {}).get("numberOfItems")
+    items_per_page = response.get("metadata", {}).get("itemsPerPage")
+
+    if items is None or items_per_page is None:
+        pages = None
+    else:
+        pages = math.ceil(items / items_per_page)
+
+    # Fetch the rest of the pages:
+    next_url: str | None = url
+    with Progress(
+        SpinnerColumn(), *Progress.get_default_columns(), MofNCompleteColumn()
+    ) as progress:
+        download_task = progress.add_task(f"Downloading Feed", total=pages)
+        while next_url is not None:
+            response = make_request(client, next_url)
+            publications.extend(response["publications"])
+            next_url = None
+            for link in response["links"]:
+                if link["rel"] == "next":
+                    next_url = link["href"]
+                    break
+            progress.update(download_task, advance=1)
+
+    return publications