Skip to content

Commit

Permalink
Issue #40 : Organization FertilizerInspection Schema (#77)
Browse files Browse the repository at this point in the history
* Refactor organization data structure and validation in inspection pipeline

* Refactor phone number validation in Organization model and add unit tests

* Add website lowercase validation and update inspection tests for organization fields

* Update version to 0.0.7 and remove unused import in inspection pipeline

* Update phone number assertions in inspection tests to check for None values

* Bump version to 0.0.8 in pyproject.toml

* Update organization model field descriptions and adjust website validation in tests

* Ensure website URLs start with 'www.' and convert to lowercase

* Bump version to 0.0.8 in pyproject.toml
  • Loading branch information
snakedye authored Nov 19, 2024
1 parent 81ee33b commit c3493f7
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 87 deletions.
22 changes: 14 additions & 8 deletions expected.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
{
"company_name": "GreenGrow Inc.",
"company_address": "123 Green Road, Farmville, State, 12345",
"company_website": "https://www.greengrow.com",
"company_phone_number": "123-456-7890",
"manufacturer_name": "AgriSupply Co.",
"manufacturer_address": "456 Supply Lane, AgriTown, State, 67890",
"manufacturer_website": "https://www.agrisupply.com",
"manufacturer_phone_number": "987-654-3210",
"organizations": [
{
"name": "GreenGrow Inc.",
"address": "123 Green Road, Farmville, State, 12345",
"website": "https://www.greengrow.com",
"phone_number": "123-456-7890"
},
{
"name": "AgriSupply Co.",
"address": "456 Supply Lane, AgriTown, State, 67890",
"website": "https://www.agrisupply.com",
"phone_number": "987-654-3210"
}
],
"fertiliser_name": "GreenGrow Fertilizer 20-20-20",
"registration_number": "2018007A",
"lot_number": "LOT20240901",
Expand Down
78 changes: 38 additions & 40 deletions pipeline/inspection.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import re
from typing import Annotated, List, Optional
from typing import List, Optional

import phonenumbers
from pydantic import (
BaseModel,
ConfigDict,
Field,
StringConstraints,
field_validator,
model_validator,
)
Expand All @@ -23,6 +22,40 @@ def extract_first_number(string: str) -> Optional[str]:
return match.group()
return None

class Organization(BaseModel):
"""
Represents an organization such as a manufacturer, company, or any entity
associated with a fertilizer.
"""
name: Optional[str] = Field(None, description="The name of the organization.")
address: Optional[str] = Field(None, description="The address of the organization.")
website: Optional[str] = Field(None, description="The website of the organization, ensuring 'www.' prefix is added..")
phone_number: Optional[str] = Field(None, description="The primary phone number of the organization. Return only one.")

@field_validator("phone_number", mode="before")
def validate_phone_number(cls, v):
if v is None:
return None
try:
phone_number = phonenumbers.parse(v, "CA", _check_region=False)
if not phonenumbers.is_valid_number(phone_number):
return None
phone_number = phonenumbers.format_number(
phone_number, phonenumbers.PhoneNumberFormat.E164
)
return phone_number

except phonenumbers.phonenumberutil.NumberParseException:
return None

@field_validator("website", mode="before")
def website_lowercase(cls, v):
if v is not None:
if not v.startswith("www."):
v = "www." + v
return v.lower()
return v


class NutrientValue(BaseModel):
nutrient: str
Expand Down Expand Up @@ -94,26 +127,7 @@ def convert_specification_values(cls, v):


class FertilizerInspection(BaseModel):
company_name: Optional[str] = None
company_address: Optional[str] = None
company_website: Annotated[str | None, StringConstraints(to_lower=True)] = Field(
None,
description="Return the distributor's website, ensuring 'www.' prefix is added.",
)
company_phone_number: Optional[str] = Field(
None, description="The distributor's primary phone number. Return only one."
)
manufacturer_name: Optional[str] = None
manufacturer_address: Optional[str] = None
manufacturer_website: Annotated[str | None, StringConstraints(to_lower=True)] = (
Field(
None,
description="Return the manufacturer's website, ensuring 'www.' prefix is added.",
)
)
manufacturer_phone_number: Optional[str] = Field(
None, description="The manufacturer's primary phone number. Return only one."
)
organizations: List[Organization] = []
fertiliser_name: Optional[str] = None
registration_number: Optional[str] = None
lot_number: Optional[str] = None
Expand Down Expand Up @@ -146,35 +160,19 @@ def validate_npk(cls, v):
"instructions_fr",
"ingredients_en",
"ingredients_fr",
"organizations",
"weight",
mode="before",
)
def replace_none_with_empty_list(cls, v):
if v is None:
v = []
return v

@field_validator("registration_number", mode="before")
def check_registration_number_format(cls, v):
if v is not None:
pattern = r"^\d{7}[A-Z]$"
if re.match(pattern, v):
return v
return None

@field_validator("company_phone_number", "manufacturer_phone_number", mode="before")
def check_phone_number_format(cls, v):
if v is None:
return

try:
phone_number = phonenumbers.parse(v, "CA")
if not phonenumbers.is_valid_number(phone_number):
return
phone_number = phonenumbers.format_number(
phone_number, phonenumbers.PhoneNumberFormat.E164
)
return phone_number

except phonenumbers.phonenumberutil.NumberParseException:
return
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "fertiscan_pipeline"
version = "0.0.7"
version = "0.0.8"
description = "A pipeline for the FertiScan project"
authors = [
{ name = "Albert Bryan Ndjeutcha", email = "[email protected]" }
Expand Down
72 changes: 46 additions & 26 deletions tests/test_inspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
GuaranteedAnalysis,
NutrientValue,
Specification,
Organization,
Value,
)

Expand Down Expand Up @@ -164,7 +165,34 @@ def test_invalid_specification(self):
f"Expected None for solubility with input {data['solubility']}",
)

class TestOrganizations(unittest.TestCase):
def setUp(self):
self.valid_organization_data = [
{"name": "Test Company", "address": "123 Test St", "website": "https://test.com", "phone_number": "800 640 9605"},
{"name": "Test Manufacturer", "address": "456 Test Blvd", "website": "https://manufacturer.com", "phone_number": "800-765-4321"},
]

self.invalid_organization_data = [
{"name": "Test Company", "address": "123 Test St", "website": "https://test.com", "phone_number": "123-456-7890"},
{"name": "Test Manufacturer", "address": "456 Test Blvd", "website": "https://manufacturer.com", "phone_number": "098-765-4321"},
]

def test_valid_organization(self):
for data in self.valid_organization_data:
with self.subTest(data=data):
organization = Organization(**data)
self.assertIsNotNone(organization.name, data["name"])
self.assertIsNotNone(organization.address, data["address"])
self.assertIsNotNone(organization.website, data["website"])
self.assertIsNotNone(organization.phone_number, data["phone_number"])

def test_invalid_organization(self):
for data in self.invalid_organization_data:
with self.subTest(data=data):
organization = Organization(**data)
self.assertIsNone(
organization.phone_number, f"Expected None for phone_number with input {data['phone_number']}"
)
class TestNPKValidation(unittest.TestCase):
def setUp(self):
self.valid_npk_data = ["10-5-20", "0-0-0", "100-200-300", "10.2-5.5-20.3"]
Expand Down Expand Up @@ -237,14 +265,6 @@ def test_is_minimal_in_none(self):
class TestFertilizerInspectionListFields(unittest.TestCase):
def setUp(self):
self.default_data = {
"company_name": "Test Company",
"company_address": "123 Test St",
"company_website": "https://test.com",
"company_phone_number": "123-456-7890",
"manufacturer_name": "Test Manufacturer",
"manufacturer_address": "456 Test Blvd",
"manufacturer_website": "https://manufacturer.com",
"manufacturer_phone_number": "098-765-4321",
"fertiliser_name": "Test Fertilizer",
"registration_number": "ABC123",
"lot_number": "LOT987",
Expand Down Expand Up @@ -301,40 +321,40 @@ def test_registration_number_mixed_format(self):
self.assertIsNone(instance.registration_number)


class TestFertilizerInspectionPhoneNumberFormat(unittest.TestCase):
class TestOrganizationPhoneNumberFormat(unittest.TestCase):
def test_valid_phone_number_with_country_code(self):
instance = FertilizerInspection(company_phone_number="+1 800 640 9605")
self.assertEqual(instance.company_phone_number, "+18006409605")
instance = Organization(phone_number="+1 800 640 9605")
self.assertEqual(instance.phone_number, "+18006409605")

def test_valid_phone_number_without_country_code(self):
instance = FertilizerInspection(company_phone_number="800 640 9605")
self.assertEqual(instance.company_phone_number, "+18006409605")
instance = Organization(phone_number="800 640 9605")
self.assertEqual(instance.phone_number, "+18006409605")

def test_phone_number_with_parentheses(self):
instance = FertilizerInspection(company_phone_number="(757) 321-4567")
self.assertEqual(instance.company_phone_number, "+17573214567")
instance = Organization(phone_number="(757) 321-4567")
self.assertEqual(instance.phone_number, "+17573214567")

def test_phone_number_with_extra_characters(self):
instance = FertilizerInspection(company_phone_number="+1 800 321-9605 FAX")
self.assertIsNone(instance.company_phone_number)
instance = Organization(phone_number="+1 800 321-9605 FAX")
self.assertIsNone(instance.phone_number)

def test_phone_number_with_multiple_numbers(self):
instance = FertilizerInspection(
company_phone_number="(757) 123-4567 (800) 456-7890, 1234567890"
instance = Organization(
phone_number="(757) 123-4567 (800) 456-7890, 1234567890"
)
self.assertIsNone(instance.company_phone_number)
self.assertIsNone(instance.phone_number)

def test_phone_number_from_other_country(self):
instance = FertilizerInspection(manufacturer_phone_number="+44 20 7946 0958")
self.assertEqual(instance.manufacturer_phone_number, "+442079460958")
instance = Organization(phone_number="+44 20 7946 0958")
self.assertEqual(instance.phone_number, "+442079460958")

def test_invalid_phone_number(self):
instance = FertilizerInspection(company_phone_number="invalid phone")
self.assertIsNone(instance.company_phone_number)
instance = Organization(phone_number="invalid phone")
self.assertIsNone(instance.phone_number)

def test_phone_number_with_invalid_format(self):
instance = FertilizerInspection(company_phone_number="12345")
self.assertIsNone(instance.company_phone_number)
instance = Organization(phone_number="12345")
self.assertIsNone(instance.phone_number)


if __name__ == "__main__":
Expand Down
20 changes: 8 additions & 12 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,7 @@ def test_analyze(self):
# Perform assertions
self.assertIsInstance(inspection, FertilizerInspection, inspection)
self.assertIn(Value(value="25", unit="kg"), inspection.weight, inspection)
manufacturer_or_company = (
inspection.manufacturer_name or inspection.company_name
)
manufacturer_or_company = inspection.organizations[0].name
self.assertIsNotNone(manufacturer_or_company, inspection)
self.assertGreater(
levenshtein_similarity(
Expand Down Expand Up @@ -151,8 +149,8 @@ def test_label_008_phone_number_inspection(self):
inspection = analyze(label_storage, self.ocr, self.gpt)

# Assertions
self.assertEqual(inspection.company_phone_number, "+18003279462")
self.assertIsNone(inspection.manufacturer_phone_number)
self.assertIn("+18003279462", str(inspection.organizations), inspection.organizations)
# self.assertIsNone(inspection.manufacturer_phone_number)

def test_label_024_phone_number_inspection(self):
label_folder = "test_data/labels/label_024"
Expand All @@ -166,8 +164,8 @@ def test_label_024_phone_number_inspection(self):
inspection = analyze(label_storage, self.ocr, self.gpt)

# Assertions
self.assertEqual(inspection.company_phone_number, "+14506556147")
self.assertIsNone(inspection.manufacturer_phone_number)
self.assertIn("+14506556147", str(inspection.organizations))
# self.assertIsNone(inspection.manufacturer_phone_number)

def test_label_001_website_inspection(self):
label_folder = "test_data/labels/label_001"
Expand All @@ -181,7 +179,7 @@ def test_label_001_website_inspection(self):
inspection = analyze(label_storage, self.ocr, self.gpt)

# Assertions for website fields
self.assertEqual(inspection.company_website, "www.soil-aid.com")
self.assertIn("www.soil-aid.com", str(inspection.organizations))

def test_label_006_website_inspection(self):
label_folder = "test_data/labels/label_006"
Expand All @@ -195,7 +193,7 @@ def test_label_006_website_inspection(self):
inspection = analyze(label_storage, self.ocr, self.gpt)

# Assertions for website fields
self.assertEqual(inspection.company_website, "www.activeagriscience.com")
self.assertIn("www.activeagriscience.com", str(inspection.organizations))

def test_label_034_website_inspection(self):
label_folder = "test_data/labels/label_034"
Expand All @@ -209,9 +207,7 @@ def test_label_034_website_inspection(self):
inspection = analyze(label_storage, self.ocr, self.gpt)

# Assertions for website fields
self.assertEqual(
inspection.company_website, "www.advancednutrients.com/growersupport"
)
self.assertIn("www.advancednutrients.com/growersupport", str(inspection.organizations))


if __name__ == "__main__":
Expand Down

0 comments on commit c3493f7

Please sign in to comment.