Skip to content

Commit

Permalink
Merge pull request #8 from gwax/gwax/pydantic
Browse files Browse the repository at this point in the history
Updates and switch to pydantic
  • Loading branch information
gwax authored Jun 5, 2022
2 parents ff6760a + 276e05e commit 55f0537
Show file tree
Hide file tree
Showing 38 changed files with 2,205 additions and 1,888 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
/*.xlsx
/*.xlsx.bak*

# Profiling data
*.prof

# Created by https://www.gitignore.io/api/python

### Python ###
Expand Down
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ repos:
name: Check import order
language: system
entry: isort
args: [--apply, --recursive]
types: [python]
- id: pylint
name: Static analysis from pylint
Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ script:

jobs:
include:
- python: "3.6"
- python: "3.7"
- python: "3.8"
- python: "3.9"
- python: "3.10"

- stage: lint
python: "3.8.5"
Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ Changelog
Upcoming
--------

- Switch over to pydantic for Scryfall data deserialization
- Bug fixes for newer sets that do not include a numeric component to ther
collector numbers.

Expand Down
8 changes: 2 additions & 6 deletions mtg_ssm/containers/bundles.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
"""Data bundle definitions."""

from typing import List
from typing import NamedTuple
from typing import Set
from typing import List, NamedTuple, Set

from mtg_ssm.scryfall.models import ScryCard
from mtg_ssm.scryfall.models import ScrySet
from mtg_ssm.scryfall.models import ScrySetType
from mtg_ssm.scryfall.models import ScryCard, ScrySet, ScrySetType


class ScryfallDataSet(NamedTuple):
Expand Down
11 changes: 4 additions & 7 deletions mtg_ssm/containers/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@

import collections
import enum
from typing import Any
from typing import Dict
from typing import Iterable
from typing import MutableMapping
from typing import Any, Dict, Iterable, MutableMapping
from uuid import UUID

from mtg_ssm.containers import legacy
Expand All @@ -15,8 +12,8 @@
class CountType(enum.Enum):
"""Enum for possible card printing types (nonfoil, foil)."""

nonfoil = enum.auto()
foil = enum.auto()
NONFOIL = "nonfoil"
FOIL = "foil"


ScryfallCardCount = Dict[UUID, MutableMapping[CountType, int]]
Expand All @@ -38,7 +35,7 @@ def aggregate_card_counts(
scryfall_id = UUID(scryfall_id)
counts = card_counts.get(scryfall_id, {})
for count_type in CountType:
value = int(card_row.get(count_type.name) or 0)
value = int(card_row.get(count_type.value) or 0)
if value:
counts[count_type] = value + counts.get(count_type, 0)
if counts:
Expand Down
10 changes: 2 additions & 8 deletions mtg_ssm/containers/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,12 @@

import collections
import string
from typing import Dict
from typing import Iterable
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple
from typing import Dict, Iterable, List, Optional, Set, Tuple
from uuid import UUID

from mtg_ssm.containers.bundles import ScryfallDataSet
from mtg_ssm.mtg import util
from mtg_ssm.scryfall.models import ScryCard
from mtg_ssm.scryfall.models import ScrySet
from mtg_ssm.scryfall.models import ScryCard, ScrySet


def name_card_sort_key(card: ScryCard) -> Tuple[str, int, str]:
Expand Down
7 changes: 1 addition & 6 deletions mtg_ssm/containers/legacy.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
"""Legacy record lookup capabilities for older file versions."""

from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple
from typing import Any, Dict, List, Optional, Set, Tuple
from uuid import UUID

from mtg_ssm.containers.indexes import Oracle
Expand Down
3 changes: 1 addition & 2 deletions mtg_ssm/mtg/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

import functools
import string
from typing import Optional
from typing import Tuple
from typing import Optional, Tuple

from mtg_ssm.scryfall.models import ScryCard

Expand Down
64 changes: 24 additions & 40 deletions mtg_ssm/scryfall/fetcher.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,18 @@
"""Scryfall data fetcher."""

from concurrent.futures import ProcessPoolExecutor
import gzip
import json
import os
import pickle
from typing import Any
from typing import List
from typing import Mapping
from typing import Union
from typing import cast
import uuid
from concurrent.futures import ProcessPoolExecutor
from typing import Any, List, Mapping, Union, cast

import appdirs
import requests

from mtg_ssm.containers.bundles import ScryfallDataSet
from mtg_ssm.scryfall import schema
from mtg_ssm.scryfall.models import ScryBulkData
from mtg_ssm.scryfall.models import ScryCard
from mtg_ssm.scryfall.models import ScryObject
from mtg_ssm.scryfall.models import ScryObjectList
from mtg_ssm.scryfall.models import ScrySet
from mtg_ssm.scryfall.models import ScryBulkData, ScryCard, ScryObjectList, ScrySet

DEBUG = os.getenv("DEBUG", "0")

Expand All @@ -36,20 +27,21 @@

CHUNK_SIZE = 8 * 1024 * 1024
DESERIALIZE_BATCH_SIZE = 50
_OBJECT_SCHEMA = schema.ScryfallUberSchema()

JSON = Union[str, int, float, bool, None, Mapping[str, Any], List[Any]]


def _cache_path(endpoint: str) -> str:
def _cache_path(endpoint: str, extension: str) -> str:
if not extension.startswith("."):
extension = "." + extension
cache_id = uuid.uuid5(uuid.NAMESPACE_URL, endpoint)
return os.path.join(CACHE_DIR, str(cache_id))
return os.path.join(CACHE_DIR, f"{cache_id}{extension}")


def _fetch_endpoint(endpoint: str, *, dirty: bool, write_cache: bool = True) -> JSON:
print(f"Retrieving {endpoint}")
os.makedirs(CACHE_DIR, exist_ok=True)
cache_path = _cache_path(endpoint)
cache_path = _cache_path(endpoint, ".json.gz")
if not os.path.exists(cache_path):
dirty = True
if dirty:
Expand All @@ -59,7 +51,7 @@ def _fetch_endpoint(endpoint: str, *, dirty: bool, write_cache: bool = True) ->
if not write_cache:
return response.json()
print(f"Caching {endpoint}")
with gzip.open(cache_path, "wb") as cache_file:
with gzip.open(cache_path, "wb", compresslevel=1) as cache_file:
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
cache_file.write(chunk)
else:
Expand All @@ -69,60 +61,52 @@ def _fetch_endpoint(endpoint: str, *, dirty: bool, write_cache: bool = True) ->
return json.load(cache_file)


def _deserialize_object(obj_json: JSON) -> Union[ScryObject, List[ScryObject]]:
return _OBJECT_SCHEMA.load(obj_json).data


def _deserialize_cards(card_jsons: List[JSON]) -> List[ScryCard]:
cards_data: List[ScryCard]
if DEBUG == "1":
print("Process pool disabled")
cards_data = []
for card_json in card_jsons:
try:
cards_data.append(cast(ScryCard, _deserialize_object(card_json)))
cards_data.append(ScryCard.parse_obj(card_json))
except Exception:
print("Failed on: ", repr(card_json))
raise
else:
with ProcessPoolExecutor() as executor:
cards_futures = executor.map(
_deserialize_object, card_jsons, chunksize=DESERIALIZE_BATCH_SIZE
ScryCard.parse_obj, card_jsons, chunksize=DESERIALIZE_BATCH_SIZE
)
cards_data = cast(List[ScryCard], list(cards_futures))
cards_data = list(cards_futures)
return cards_data


def scryfetch() -> ScryfallDataSet:
"""Retrieve and deserialize Scryfall object data."""
cached_bulk_json = None
if os.path.exists(_cache_path(BULK_DATA_ENDPOINT)):
if os.path.exists(_cache_path(BULK_DATA_ENDPOINT, ".json.gz")):
cached_bulk_json = _fetch_endpoint(BULK_DATA_ENDPOINT, dirty=False)
bulk_json = _fetch_endpoint(BULK_DATA_ENDPOINT, dirty=True, write_cache=False)
cache_dirty = bulk_json != cached_bulk_json

bulk_list: ScryObjectList = cast(ScryObjectList, _deserialize_object(bulk_json))
sets_list = cast(
ScryObjectList,
_deserialize_object(_fetch_endpoint(SETS_ENDPOINT, dirty=cache_dirty)),
bulk_list = ScryObjectList[ScryBulkData].parse_obj(bulk_json)
sets_list = ScryObjectList[ScrySet].parse_obj(
_fetch_endpoint(SETS_ENDPOINT, dirty=cache_dirty)
)
sets_data = cast(List[ScrySet], sets_list.data)
sets_data = list(sets_list.data)
while sets_list.has_more:
sets_list = cast(
ScryObjectList,
_deserialize_object(
_fetch_endpoint(str(sets_list.next_page), dirty=cache_dirty)
),
sets_list = ScryObjectList[ScrySet].parse_obj(
_fetch_endpoint(str(sets_list.next_page), dirty=cache_dirty)
)
sets_data += cast(List[ScrySet], sets_list.data)
sets_data += sets_list.data

bulk_data = cast(List[ScryBulkData], bulk_list.data)
bulk_data = bulk_list.data
[cards_endpoint] = [bd.download_uri for bd in bulk_data if bd.type == BULK_TYPE]
cards_json = cast(List[JSON], _fetch_endpoint(cards_endpoint, dirty=cache_dirty))

_fetch_endpoint(BULK_DATA_ENDPOINT, dirty=cache_dirty, write_cache=True)

object_cache_path = _cache_path(OBJECT_CACHE_URL)
object_cache_path = _cache_path(OBJECT_CACHE_URL, ".pickle.gz")
if os.path.exists(object_cache_path):
if cache_dirty or DEBUG == "1":
os.remove(object_cache_path)
Expand All @@ -140,6 +124,6 @@ def scryfetch() -> ScryfallDataSet:
cards_data = _deserialize_cards(cards_json)

scryfall_data = ScryfallDataSet(sets=sets_data, cards=cards_data)
with gzip.open(object_cache_path, "wb") as object_cache:
pickle.dump(scryfall_data, object_cache)
with gzip.open(object_cache_path, "wb", compresslevel=1) as object_cache:
pickle.dump(scryfall_data, object_cache, protocol=pickle.HIGHEST_PROTOCOL)
return scryfall_data
Loading

0 comments on commit 55f0537

Please sign in to comment.