From 38330e615c37cfe4cab597fa5de13084ee901cda Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 26 Jul 2024 06:59:57 -0400 Subject: [PATCH] Improving testing and docs --- src/atlus/__init__.py | 40 ++++++++++++++++++++++++++++++++++++- src/atlus/atlus.py | 22 +++++++++++++-------- src/atlus/objects.py | 46 +++++++++++++++++++++++++++++++++++++++++++ tests/test.py | 41 +++++++++++++++++++++++++++++++------- 4 files changed, 133 insertions(+), 16 deletions(-) create mode 100644 src/atlus/objects.py diff --git a/src/atlus/__init__.py b/src/atlus/__init__.py index 89790d8..0d4344f 100644 --- a/src/atlus/__init__.py +++ b/src/atlus/__init__.py @@ -1,6 +1,44 @@ -"""`atlus` is a Python package to convert raw address strings into the OSM format. +"""`atlus` is a Python package to convert raw address and phone number strings into the OSM format. +It's designed to be used with US and Canadian phone numbers and addresses. + +```python +>> import atlus +>> atlus.abbrs("St. Francis") +# "Saint Francis" +>> atlus.get_address("789 Oak Dr, Smallville California, 98765")[0] +# {"addr:housenumber": "789", "addr:street": "Oak Drive:, "addr:city": "Smallville", "addr:state": "CA", "addr:postcode": "98765"} +>> atlus.get_phone("(202) 900-9019") +# "+1 202-900-9019" +``` + """ # SPDX-FileCopyrightText: 2024-present Will # # SPDX-License-Identifier: MIT + +from .atlus import ( + get_address, + get_phone, + abbrs, + get_title, + mc_replace, + us_replace, + ord_replace, + clean, +) +from . import atlus +from . import resources + +__all__ = [ + "get_address", + "get_phone", + "abbrs", + "get_title", + "mc_replace", + "us_replace", + "ord_replace", + "clean", + "atlus", + "resources", +] diff --git a/src/atlus/atlus.py b/src/atlus/atlus.py index fc2be9d..b841042 100644 --- a/src/atlus/atlus.py +++ b/src/atlus/atlus.py @@ -1,7 +1,7 @@ """Functions and tools to process the raw address strings.""" from collections import Counter -from typing import OrderedDict, Union, List, Dict, Tuple +from typing import Union, List, Dict, Tuple import usaddress import regex from .resources import ( @@ -362,21 +362,27 @@ def collapse_list(seq: list) -> list: def get_address( address_string: str, -) -> Tuple[OrderedDict[str, Union[str, int]], List[Union[str, None]]]: +) -> Tuple[Dict[str, Union[str, int]], List[Union[str, None]]]: """Process address strings. ```python - >> get_address("345 MAPLE RD, COUNTRYSIDE, PA 24680-0198") - # {"addr:housenumber": "345", "addr:street": "Maple Road", "addr:city": "Countryside", "addr:state": "PA", "addr:postcode": "24680-0198"} - >> get_address("777 Strawberry St.") - # {"addr:housenumber": "777", "addr:street": "Strawberry Street",} + >> get_address("345 MAPLE RD, COUNTRYSIDE, PA 24680-0198")[0] + # {"addr:housenumber": "345", "addr:street": "Maple Road", + "addr:city": "Countryside", "addr:state": "PA", "addr:postcode": "24680-0198"} + >> get_address("777 Strawberry St.")[0] + # {"addr:housenumber": "777", "addr:street": "Strawberry Street"} + >> address = get_address("222 NW Pineapple Ave Suite A Unit B") + >> address[0] + # {"addr:housenumber": "222", "addr:street": "Northwest Pineapple Avenue"} + >> address[1] + # ["addr:unit"] ``` Args: address_string (str): The address string to process. Returns: - Tuple[OrderedDict[str, Union[str, int]], List[Union[str, None]]]: + Tuple[Dict[str, Union[str, int]], List[Union[str, None]]]: The processed address string and the removed fields. """ address_string = clean(address_string) @@ -431,7 +437,7 @@ def get_address( r"\1", cleaned["addr:postcode"] ).replace(" ", "-") - return cleaned, removed + return dict(cleaned), removed def get_phone(phone: str) -> str: diff --git a/src/atlus/objects.py b/src/atlus/objects.py new file mode 100644 index 0000000..a67dc4e --- /dev/null +++ b/src/atlus/objects.py @@ -0,0 +1,46 @@ +"""Define objects for parsing fields.""" + +from pydantic import BaseModel, Field + + +class Address(BaseModel): + """Define address parsing fields.""" + + addr_housenumber: int | str | None = Field( + alias="addr:housenumber", + description="The house number that is included in the address.", + examples=[200, "1200-29"], + default=None, + ) + addr_street: str | None = Field( + alias="addr:street", + description="The street that the address is located on.", + examples=["North Spring Street"], + default=None, + ) + addr_unit: str | None = Field( + alias="addr:unit", + description="The unit number or letter that is included in the address.", + examples=["B"], + default=None, + ) + addr_city: str | None = Field( + alias="addr:city", + description="The city that the address is located in.", + examples=["Los Angeles"], + default=None, + ) + addr_state: str | None = Field( + alias="addr:state", + pattern=r"^[A-Z]{2}$", + description="The state or territory of the address.", + examples=["CA"], + default=None, + ) + addr_postcode: str | None = Field( + alias="addr:postcode", + pattern=r"^\d{5}(?:\-\d{4})?$", + description="The postal code of the address.", + examples=["90012", "90012-4801"], + default=None, + ) diff --git a/tests/test.py b/tests/test.py index fb0d47f..d9d5ccd 100644 --- a/tests/test.py +++ b/tests/test.py @@ -201,6 +201,40 @@ def test_complex_data_types(): assert collapse_list([1, "1", 1, "1"]) == [1, "1"] +def test_get_address(): + """Test cases for get address""" + assert get_address("345 MAPLE RD, COUNTRYSIDE, PA 24680-0198")[0] == { + "addr:housenumber": "345", + "addr:street": "Maple Road", + "addr:city": "Countryside", + "addr:state": "PA", + "addr:postcode": "24680-0198", + } + assert get_address("777 Strawberry St.")[0] == { + "addr:housenumber": "777", + "addr:street": "Strawberry Street", + } + + +def test_get_address_removed(): + """Test cases for get address""" + add = get_address("222 NW Pineapple Ave Suite A Unit B, Beachville, SC 75309") + assert add[0] == { + "addr:housenumber": "222", + "addr:street": "Northwest Pineapple Avenue", + "addr:city": "Beachville", + "addr:state": "SC", + "addr:postcode": "75309", + } + assert add[1] == ["addr:unit"] + # add = get_address("158 S. Thomas Court 30008 90210") + # assert add[0] == { + # "addr:housenumber": "158", + # "addr:street": "South Thomas Court", + # } + # assert add[1] == ["addr:postcode"] + + def test_valid_phone_number_1(): """Test cases for valid phone numbers""" assert get_phone("2029009019") == "+1 202-900-9019" @@ -232,10 +266,3 @@ def test_invalid_phone_number_4(): """Test cases for blank phone numbers""" with pytest.raises(ValueError, match="Invalid phone number: "): get_phone("") - - -# def test_cap_match(): -# assert cap_match(regex.match("(\w+)", "test")) == "TEST" - - -# Add more tests for other functions in the file