Skip to content

Commit

Permalink
Merge pull request #19 from mcqueary/feature/coverage
Browse files Browse the repository at this point in the history
Feature/coverage
  • Loading branch information
mcqueary authored Dec 17, 2023
2 parents ee0c660 + a6fde3f commit c0c04e3
Show file tree
Hide file tree
Showing 81 changed files with 56,761 additions and 36,091 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest,macos-latest, windows-latest]
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -45,7 +45,7 @@ jobs:
poetry run flake8 src/graver tests --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest and report coverage
run: |
poetry run pytest --cov=graver
poetry run pytest --cov=graver --cov-report term-missing
- name: Publish to coveralls.io
uses: coverallsapp/[email protected]
with:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ __pycache__/
*.egg-info
data.json
*.log
graver.log.*
.bandit
.env
.idea/
Expand Down
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: test test-unit test-integration run help fmt install-editable lint git-setup clean all commitizen coveralls
.PHONY: test run help fmt install-editable lint git-setup clean testclean all commitizen coveralls

# same as `export PYTHONPATH="$PWD:$PYTHONPATH"`
# see also https://stackoverflow.com/a/18137056
Expand Down Expand Up @@ -29,17 +29,17 @@ lint: ## run flake8 to check the code
poetry run flake8 $(PACKAGES) tests --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics

install:
poetry install
poetry install --with dev,test

fmt: ## run black to format the code
poetry run isort $(PACKAGES) tests
poetry run black -q --line-length 88 $(PACKAGES) tests

$(VENV)/init: ## init the virtual environment
python3 -m venv $(VENV)
python3.12 -m venv $(VENV)
touch $@
$(VENV)/bin/activate && pip install -U pip
$(VENV)/bin/activate && pip install poetry
. $(VENV)/bin/activate && pip install -U pip
. $(VENV)/bin/activate && pip install poetry

$(VENV)/requirements: requirements.txt $(VENV)/init ## install requirements
$(PIP) install -r $<
Expand All @@ -51,3 +51,6 @@ commitizen:
clean: ## clean up test outputs and other temporary files
rm -f *.csv
rm -f *.db

testclean:
rm -f tests/fixtures/vcr_cassettes/*
85 changes: 84 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ readme = "README.md"
python = ">=3.8.1,<4.0"
beautifulsoup4 = "4.12.2"
tqdm = "4.66.1"
typer = "0.9.0"
typer = {extras = ["all"], version = "^0.9.0"}


[tool.poetry.group.test.dependencies]
Expand Down
8 changes: 6 additions & 2 deletions src/graver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@

__version__ = "0.1.0"

# from .api import MemorialException # noqa
# noinspection PyUnresolvedReferences
from .api import MemorialException # noqa
from .api import (
from graver.api import (
Cemetery,
Driver,
Memorial,
MemorialException,
MemorialMergedException,
MemorialParseException,
MemorialRemovedException,
)

from .constants import * # noqa

__all__ = (
Expand All @@ -19,5 +22,6 @@
"Memorial",
"MemorialException",
"MemorialMergedException",
"MemorialParseException",
"MemorialRemovedException",
)
94 changes: 55 additions & 39 deletions src/graver/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import requests
from bs4 import BeautifulSoup, Tag
from requests import Response
from requests import RequestException, Response

from .constants import FINDAGRAVE_BASE_URL, FINDAGRAVE_ROWS_PER_PAGE

Expand All @@ -25,7 +25,13 @@


class MemorialException(Exception):
pass
def __init__(self, message):
super().__init__(message)


class MemorialParseException(MemorialException):
def __init__(self, message):
super().__init__(message)


class MemorialMergedException(MemorialException):
Expand Down Expand Up @@ -61,21 +67,24 @@ def __init__(self, **kwargs) -> None:

def get(self, findagrave_url: str, **kwargs) -> Response:
retries = 0
response = self.session.get(findagrave_url, **kwargs)
while (
response.status_code in Driver.recoverable_errors.keys()
and retries < self.max_retries
):
retries += 1
log.warning(
f"Driver: [{response.status_code}: {response.reason}] {findagrave_url} "
f"-- Retrying ({retries} of {self.max_retries}, "
f"timeout={self.retry_ms}ms)"
)
sleep(self.retry_ms / 1000)
try:
response = self.session.get(findagrave_url, **kwargs)
self.num_retries += retries
return response
while (
response.status_code in Driver.recoverable_errors.keys()
and retries < self.max_retries
):
retries += 1
log.warning(
f"Driver: [{response.status_code}: {response.reason}] "
f"{findagrave_url} -- Retrying ({retries} of {self.max_retries}, "
f"timeout={self.retry_ms}ms)"
)
sleep(self.retry_ms / 1000)
response = self.session.get(findagrave_url, **kwargs)
self.num_retries += retries
return response
except requests.exceptions.RequestException as e:
raise e


@dataclass
Expand Down Expand Up @@ -332,17 +341,18 @@ def parse(cls, findagrave_url: str, **kwargs):
return _MemorialParser(findagrave_url, **kwargs).parse()

@classmethod
def get_by_id(cls, grave_id: int):
con = sqlite3.connect(os.getenv("DATABASE_NAME", "graves.db"))
def get_by_id(cls, memorial_id: int):
dbname = os.getenv("DATABASE_NAME", "graves.db")
con = sqlite3.connect(dbname)
con.row_factory = sqlite3.Row

cur = con.cursor()
cur.execute("SELECT * FROM graves WHERE memorial_id=?", (grave_id,))
cur.execute("SELECT * FROM graves WHERE memorial_id=?", (memorial_id,))

record = cur.fetchone()

if record is None:
raise NotFound
raise NotFound(f"memorial_id={memorial_id} not present in {dbname}")

memorial = Memorial(**record) # Row can be unpacked as dict

Expand Down Expand Up @@ -381,29 +391,35 @@ def __init__(self, findagrave_url: str, **kwargs) -> None:
self.soup = None
self.m: dict = {}

# Valid URL but not a Memorial
# if "/memorial/" not in self.findagrave_url:
# raise MemorialException(f"Invalid memorial URL: {self.findagrave_url}")

if self.get:
response = self.driver.get(self.findagrave_url)
self.soup = BeautifulSoup(response.content, "html.parser")
try:
response = self.driver.get(self.findagrave_url)
self.soup = BeautifulSoup(response.content, "html.parser")

if response.ok:
self.scrape_canonical_url()
# Valid URL but not a Memorial
if "/memorial/" not in self.findagrave_url:
raise MemorialException(
f"Invalid memorial URL: {self.findagrave_url}"
)
else:
if response.status_code == 404:
if self.check_removed():
msg = f"{self.findagrave_url} has been removed"
raise MemorialRemovedException(msg)
elif (new_url := self.check_merged()) is not None:
msg = f"{self.findagrave_url} has been merged into {new_url}"
raise MemorialMergedException(msg, self.findagrave_url, new_url)
if response.ok:
self.scrape_canonical_url()
else:
if response.status_code == 404:
if self.check_removed():
msg = f"{self.findagrave_url} has been removed"
raise MemorialRemovedException(msg)
elif (new_url := self.check_merged()) is not None:
msg = (
f"{self.findagrave_url} has been merged into {new_url}"
)
raise MemorialMergedException(
msg, self.findagrave_url, new_url
)
else:
response.raise_for_status()
else:
response.raise_for_status()
else:
response.raise_for_status()
except RequestException as ex:
raise MemorialParseException(ex) from ex

if self.scrape:
self.scrape_page()
Expand Down
Loading

0 comments on commit c0c04e3

Please sign in to comment.