Skip to content

Commit

Permalink
Adding and running ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
whubsch committed Aug 9, 2024
1 parent a0e1f01 commit 227cf3b
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 147 deletions.
Binary file removed .DS_Store
Binary file not shown.
10 changes: 0 additions & 10 deletions .github/workflows/black.yml

This file was deleted.

12 changes: 12 additions & 0 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: formatter

on: [push, pull_request]

jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
with:
args: "format --check"
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,5 @@ pip-delete-this-directory.txt
*scratch*.py
scripts/test*.py
.pypirc
.coverage
.coverage
*.DS_Store
1 change: 1 addition & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Convert raw address and phone number strings into the OSM format."""
39 changes: 26 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@ license = "MIT"
keywords = ["osm", "openstreetmap", "mapping", "address", "geocoding"]
authors = [{ name = "Will", email = "[email protected]" }]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"License :: OSI Approved :: MIT License",
"Typing :: Typed",
"Development Status :: 4 - Beta",
"Programming Language :: Python",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"License :: OSI Approved :: MIT License",
"Typing :: Typed",
]
dependencies = ["usaddress", "regex"]
dependencies = ["usaddress>0.5.10", "regex>2024.4.16"]

[project.urls]
Documentation = "https://whubsch.github.io/atlus/index.html"
Expand All @@ -52,3 +52,16 @@ tests = ["tests", "*/atlus/tests"]

[tool.coverage.report]
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]

[tool.ruff]
fix = true

[tool.ruff.lint]
select = ["D", "E", "F", "I"]
pydocstyle.convention = "google"
exclude = ["tests/*", "scripts/*"]
isort.split-on-trailing-comma = false

[tool.ruff.format]
docstring-code-format = true
skip-magic-trailing-comma = true
Binary file removed src/.DS_Store
Binary file not shown.
17 changes: 10 additions & 7 deletions src/atlus/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
"""`atlus` is a Python package to convert raw address and phone number strings into the OSM format.
It's designed to be used with US and Canadian phone numbers and addresses.
"""Convert raw address and phone number strings into the OSM format.
`atlus` is a Python package to convert raw address and phone number strings into
the OSM format. It's designed to be used with US and Canadian phone numbers and
addresses.
```python
>>> import atlus
>>> atlus.abbrs("St. Francis")
"Saint Francis"
>>> atlus.get_address("789 Oak Dr, Smallville California, 98765")[0]
{"addr:housenumber": "789", "addr:street": "Oak Drive", "addr:city": "Smallville", "addr:state": "CA", "addr:postcode": "98765"}
{"addr:housenumber": "789", "addr:street": "Oak Drive", "addr:city": "Smallville",
"addr:state": "CA", "addr:postcode": "98765"}
>>> atlus.get_phone("(202) 900-9019")
"+1 202-900-9019"
```
Expand All @@ -17,18 +21,17 @@
#
# SPDX-License-Identifier: MIT

from . import atlus, resources
from .atlus import (
abbrs,
get_address,
get_phone,
abbrs,
get_title,
mc_replace,
us_replace,
ord_replace,
remove_br_unicode,
us_replace,
)
from . import atlus
from . import resources

__all__ = [
"get_address",
Expand Down
66 changes: 22 additions & 44 deletions src/atlus/atlus.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
"""Functions and tools to process the raw address strings."""

from collections import Counter
from typing import Union, List, Dict, Tuple
from pydantic import ValidationError
import usaddress
from typing import Dict, List, Tuple, Union

import regex
import usaddress
from pydantic import ValidationError

from .objects import Address
from .resources import (
street_expand,
direction_expand,
name_expand,
state_expand,
saint_comp,
abbr_join_comp,
dir_fill_comp,
sr_comp,
usa_comp,
paren_comp,
direction_expand,
grid_comp,
name_expand,
paren_comp,
post_comp,
saint_comp,
sr_comp,
state_expand,
street_comp,
street_expand,
usa_comp,
)

toss_tags = [
Expand Down Expand Up @@ -179,7 +180,7 @@ def lower_match(match: regex.Match) -> str:
"""Lower-case improperly cased ordinal values.
Args:
value: String to fix.
match: String to fix.
Returns:
str: Fixed string.
Expand Down Expand Up @@ -213,39 +214,22 @@ def abbrs(value: str) -> str:
value = ord_replace(us_replace(mc_replace(get_title(value))))

# change likely 'St' to 'Saint'
value = saint_comp.sub(
"Saint",
value,
)
value = saint_comp.sub("Saint", value)

# expand common street and word abbreviations
value = abbr_join_comp.sub(
name_street_expand,
value,
)
value = abbr_join_comp.sub(name_street_expand, value)

# expand directionals
value = dir_fill_comp.sub(
direct_expand,
value,
)
value = dir_fill_comp.sub(direct_expand, value)

# normalize 'US'
value = us_replace(value)

# uppercase shortened street descriptors
value = regex.sub(
r"\b(C[rh]|S[rh]|[FR]m|Us)\b",
cap_match,
value,
)
value = regex.sub(r"\b(C[rh]|S[rh]|[FR]m|Us)\b", cap_match, value)

# remove unremoved abbr periods
value = regex.sub(
r"([a-zA-Z]{2,})\.",
r"\1",
value,
)
value = regex.sub(r"([a-zA-Z]{2,})\.", r"\1", value)

# expand 'SR' if no other street types
value = sr_comp.sub("State Route", value)
Expand Down Expand Up @@ -311,7 +295,7 @@ def addr_housenumber(tags: Dict[str, str]) -> str:


def _combine_consecutive_tuples(
tuples_list: List[Tuple[str, str]]
tuples_list: List[Tuple[str, str]],
) -> List[Tuple[str, str]]:
"""Join adjacent `usaddress` fields."""
combined_list = []
Expand Down Expand Up @@ -405,9 +389,7 @@ def remove_prefix(text: str, prefix: str) -> str:
return text


def get_address(
address_string: str,
) -> Tuple[Dict[str, str], List[Union[str, None]]]:
def get_address(address_string: str) -> Tuple[Dict[str, str], List[Union[str, None]]]:
"""Process address strings.
```python
Expand Down Expand Up @@ -449,10 +431,7 @@ def get_address(

if "addr:street" in cleaned:
street = abbrs(cleaned["addr:street"])
cleaned["addr:street"] = street_comp.sub(
"Street",
street,
).strip(".")
cleaned["addr:street"] = street_comp.sub("Street", street).strip(".")

if "addr:city" in cleaned:
cleaned["addr:city"] = abbrs(get_title(cleaned["addr:city"], single_word=True))
Expand Down Expand Up @@ -509,8 +488,7 @@ def get_phone(phone: str) -> str:
ValueError: If the phone number is invalid.
"""
phone_valid = regex.search(
r"^\(?(?:\+? ?1?[ -.]*)?(?:\(?(\d{3})\)?[ -.]*)(\d{3})[ -.]*(\d{4})$",
phone,
r"^\(?(?:\+? ?1?[ -.]*)?(?:\(?(\d{3})\)?[ -.]*)(\d{3})[ -.]*(\d{4})$", phone
)
if phone_valid:
return (
Expand Down
3 changes: 2 additions & 1 deletion src/atlus/objects.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Define objects for parsing fields."""

from typing import Union, Optional
from typing import Optional, Union

from pydantic import BaseModel, Field


Expand Down
14 changes: 5 additions & 9 deletions src/atlus/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,24 +486,20 @@
# pre-compile regex for speed
ABBR_JOIN = "|".join({**name_expand, **street_expand})
abbr_join_comp = regex.compile(
rf"(\b(?:{ABBR_JOIN})\b\.?)(?!')",
flags=regex.IGNORECASE,
rf"(\b(?:{ABBR_JOIN})\b\.?)(?!')", flags=regex.IGNORECASE
)

DIR_FILL = "|".join(r"\.?".join(list(abbr)) for abbr in direction_expand)
st_ave = r" (?:Street|Avenue)"
dir_fill_comp = regex.compile(
rf"(?<!(?:^(?:Avenue) |[\.']))(\b(?:{DIR_FILL})\b\.?)(?!(?:\.?[a-zA-Z]| (?:Street|Avenue)))",
rf"(?<!(?:^(?:Avenue) |[\.']))(\b(?:{DIR_FILL})\b\.?)(?!(?:\.?[a-zA-Z]|{st_ave}))",
flags=regex.IGNORECASE,
)

sr_comp = regex.compile(
r"(\bS\.?R\b\.?)(?= \d+)",
flags=regex.IGNORECASE,
)
sr_comp = regex.compile(r"(\bS\.?R\b\.?)(?= \d+)", flags=regex.IGNORECASE)

saint_comp = regex.compile(
rf"^(St\.?)(?= )|(\bSt\.?)(?= (?:{'|'.join(saints)}))",
flags=regex.IGNORECASE,
rf"^(St\.?)(?= )|(\bSt\.?)(?= (?:{'|'.join(saints)}))", flags=regex.IGNORECASE
)

street_comp = regex.compile(
Expand Down
Loading

0 comments on commit 227cf3b

Please sign in to comment.