Skip to content

Commit

Permalink
Fixed coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
mcqueary committed Dec 17, 2023
1 parent 1b7e7e1 commit b4babe6
Show file tree
Hide file tree
Showing 79 changed files with 63,651 additions and 135,684 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ __pycache__/
*.egg-info
data.json
*.log
graver.log.*
.bandit
.env
.idea/
Expand Down
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ lint: ## run flake8 to check the code
poetry run flake8 $(PACKAGES) tests --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics

install:
poetry install
poetry install --with dev,test

fmt: ## run black to format the code
poetry run isort $(PACKAGES) tests
poetry run black -q --line-length 88 $(PACKAGES) tests

$(VENV)/init: ## init the virtual environment
python3 -m venv $(VENV)
python3.12 -m venv $(VENV)
touch $@
$(VENV)/bin/activate && pip install -U pip
$(VENV)/bin/activate && pip install poetry
. $(VENV)/bin/activate && pip install -U pip
. $(VENV)/bin/activate && pip install poetry

$(VENV)/requirements: requirements.txt $(VENV)/init ## install requirements
$(PIP) install -r $<
Expand All @@ -53,4 +53,4 @@ clean: ## clean up test outputs and other temporary files
rm -f *.db

testclean:
rm -f tests/fixtures/vcr_cassettes/
rm -f tests/fixtures/vcr_cassettes/*
85 changes: 84 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ readme = "README.md"
python = ">=3.8.1,<4.0"
beautifulsoup4 = "4.12.2"
tqdm = "4.66.1"
typer = "0.9.0"
typer = {extras = ["all"], version = "^0.9.0"}


[tool.poetry.group.test.dependencies]
Expand Down
11 changes: 6 additions & 5 deletions src/graver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,26 @@

__version__ = "0.1.0"

# from .api import MemorialException # noqa
# noinspection PyUnresolvedReferences
from .api import MemorialException # noqa
from .api import (
from graver.api import (
Cemetery,
Driver,
Memorial,
MemorialException,
MemorialMergedException,
MemorialParseException,
MemorialRemovedException,
)

from .constants import * # noqa

__all__ = (
APP_NAME,
FINDAGRAVE_BASE_URL,
MEMORIAL_CANONICAL_URL_FORMAT,
"Cemetery",
"Driver",
"Memorial",
"MemorialException",
"MemorialMergedException",
"MemorialParseException",
"MemorialRemovedException",
)
85 changes: 51 additions & 34 deletions src/graver/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import requests
from bs4 import BeautifulSoup, Tag
from requests import Response
from requests import RequestException, Response

from .constants import FINDAGRAVE_BASE_URL, FINDAGRAVE_ROWS_PER_PAGE

Expand All @@ -25,7 +25,13 @@


class MemorialException(Exception):
pass
def __init__(self, message):
super().__init__(message)


class MemorialParseException(MemorialException):
def __init__(self, message):
super().__init__(message)


class MemorialMergedException(MemorialException):
Expand Down Expand Up @@ -61,21 +67,24 @@ def __init__(self, **kwargs) -> None:

def get(self, findagrave_url: str, **kwargs) -> Response:
retries = 0
response = self.session.get(findagrave_url, **kwargs)
while (
response.status_code in Driver.recoverable_errors.keys()
and retries < self.max_retries
):
retries += 1
log.warning(
f"Driver: [{response.status_code}: {response.reason}] {findagrave_url} "
f"-- Retrying ({retries} of {self.max_retries}, "
f"timeout={self.retry_ms}ms)"
)
sleep(self.retry_ms / 1000)
try:
response = self.session.get(findagrave_url, **kwargs)
self.num_retries += retries
return response
while (
response.status_code in Driver.recoverable_errors.keys()
and retries < self.max_retries
):
retries += 1
log.warning(
f"Driver: [{response.status_code}: {response.reason}] "
f"{findagrave_url} -- Retrying ({retries} of {self.max_retries}, "
f"timeout={self.retry_ms}ms)"
)
sleep(self.retry_ms / 1000)
response = self.session.get(findagrave_url, **kwargs)
self.num_retries += retries
return response
except requests.exceptions.RequestException as e:
raise e


@dataclass
Expand Down Expand Up @@ -332,17 +341,18 @@ def parse(cls, findagrave_url: str, **kwargs):
return _MemorialParser(findagrave_url, **kwargs).parse()

@classmethod
def get_by_id(cls, grave_id: int):
con = sqlite3.connect(os.getenv("DATABASE_NAME", "graves.db"))
def get_by_id(cls, memorial_id: int):
dbname = os.getenv("DATABASE_NAME", "graves.db")
con = sqlite3.connect(dbname)
con.row_factory = sqlite3.Row

cur = con.cursor()
cur.execute("SELECT * FROM graves WHERE memorial_id=?", (grave_id,))
cur.execute("SELECT * FROM graves WHERE memorial_id=?", (memorial_id,))

record = cur.fetchone()

if record is None:
raise NotFound
raise NotFound(f"memorial_id={memorial_id} not present in {dbname}")

memorial = Memorial(**record) # Row can be unpacked as dict

Expand Down Expand Up @@ -386,23 +396,30 @@ def __init__(self, findagrave_url: str, **kwargs) -> None:
# raise MemorialException(f"Invalid memorial URL: {self.findagrave_url}")

if self.get:
response = self.driver.get(self.findagrave_url)
self.soup = BeautifulSoup(response.content, "html.parser")
try:
response = self.driver.get(self.findagrave_url)
self.soup = BeautifulSoup(response.content, "html.parser")

if response.ok:
self.scrape_canonical_url()
else:
if response.status_code == 404:
if self.check_removed():
msg = f"{self.findagrave_url} has been removed"
raise MemorialRemovedException(msg)
elif (new_url := self.check_merged()) is not None:
msg = f"{self.findagrave_url} has been merged into {new_url}"
raise MemorialMergedException(msg, self.findagrave_url, new_url)
if response.ok:
self.scrape_canonical_url()
else:
if response.status_code == 404:
if self.check_removed():
msg = f"{self.findagrave_url} has been removed"
raise MemorialRemovedException(msg)
elif (new_url := self.check_merged()) is not None:
msg = (
f"{self.findagrave_url} has been merged into {new_url}"
)
raise MemorialMergedException(
msg, self.findagrave_url, new_url
)
else:
response.raise_for_status()
else:
response.raise_for_status()
else:
response.raise_for_status()
except RequestException as ex:
raise MemorialParseException(ex) from ex

if self.scrape:
self.scrape_page()
Expand Down
Loading

0 comments on commit b4babe6

Please sign in to comment.