From c3493f7eae6007cd293e06452dec00d0fb892260 Mon Sep 17 00:00:00 2001 From: Bryan Ndjeutcha <49378990+snakedye@users.noreply.github.com> Date: Tue, 19 Nov 2024 08:44:25 -0500 Subject: [PATCH] Issue #40 : Organization FertilizerInspection Schema (#77) * Refactor organization data structure and validation in inspection pipeline * Refactor phone number validation in Organization model and add unit tests * Add website lowercase validation and update inspection tests for organization fields * Update version to 0.0.7 and remove unused import in inspection pipeline * Update phone number assertions in inspection tests to check for None values * Bump version to 0.0.8 in pyproject.toml * Update organization model field descriptions and adjust website validation in tests * Ensure website URLs start with 'www.' and convert to lowercase * Bump version to 0.0.8 in pyproject.toml --- expected.json | 22 +++++++----- pipeline/inspection.py | 78 ++++++++++++++++++++-------------------- pyproject.toml | 2 +- tests/test_inspection.py | 72 +++++++++++++++++++++++-------------- tests/test_pipeline.py | 20 +++++------ 5 files changed, 107 insertions(+), 87 deletions(-) diff --git a/expected.json b/expected.json index c2d639c..95de4ca 100644 --- a/expected.json +++ b/expected.json @@ -1,12 +1,18 @@ { - "company_name": "GreenGrow Inc.", - "company_address": "123 Green Road, Farmville, State, 12345", - "company_website": "https://www.greengrow.com", - "company_phone_number": "123-456-7890", - "manufacturer_name": "AgriSupply Co.", - "manufacturer_address": "456 Supply Lane, AgriTown, State, 67890", - "manufacturer_website": "https://www.agrisupply.com", - "manufacturer_phone_number": "987-654-3210", + "organizations": [ + { + "name": "GreenGrow Inc.", + "address": "123 Green Road, Farmville, State, 12345", + "website": "https://www.greengrow.com", + "phone_number": "123-456-7890" + }, + { + "name": "AgriSupply Co.", + "address": "456 Supply Lane, AgriTown, State, 67890", + "website": "https://www.agrisupply.com", + "phone_number": "987-654-3210" + } + ], "fertiliser_name": "GreenGrow Fertilizer 20-20-20", "registration_number": "2018007A", "lot_number": "LOT20240901", diff --git a/pipeline/inspection.py b/pipeline/inspection.py index 840ab79..28dcf98 100644 --- a/pipeline/inspection.py +++ b/pipeline/inspection.py @@ -1,12 +1,11 @@ import re -from typing import Annotated, List, Optional +from typing import List, Optional import phonenumbers from pydantic import ( BaseModel, ConfigDict, Field, - StringConstraints, field_validator, model_validator, ) @@ -23,6 +22,40 @@ def extract_first_number(string: str) -> Optional[str]: return match.group() return None +class Organization(BaseModel): + """ + Represents an organization such as a manufacturer, company, or any entity + associated with a fertilizer. + """ + name: Optional[str] = Field(None, description="The name of the organization.") + address: Optional[str] = Field(None, description="The address of the organization.") + website: Optional[str] = Field(None, description="The website of the organization, ensuring 'www.' prefix is added..") + phone_number: Optional[str] = Field(None, description="The primary phone number of the organization. Return only one.") + + @field_validator("phone_number", mode="before") + def validate_phone_number(cls, v): + if v is None: + return None + try: + phone_number = phonenumbers.parse(v, "CA", _check_region=False) + if not phonenumbers.is_valid_number(phone_number): + return None + phone_number = phonenumbers.format_number( + phone_number, phonenumbers.PhoneNumberFormat.E164 + ) + return phone_number + + except phonenumbers.phonenumberutil.NumberParseException: + return None + + @field_validator("website", mode="before") + def website_lowercase(cls, v): + if v is not None: + if not v.startswith("www."): + v = "www." + v + return v.lower() + return v + class NutrientValue(BaseModel): nutrient: str @@ -94,26 +127,7 @@ def convert_specification_values(cls, v): class FertilizerInspection(BaseModel): - company_name: Optional[str] = None - company_address: Optional[str] = None - company_website: Annotated[str | None, StringConstraints(to_lower=True)] = Field( - None, - description="Return the distributor's website, ensuring 'www.' prefix is added.", - ) - company_phone_number: Optional[str] = Field( - None, description="The distributor's primary phone number. Return only one." - ) - manufacturer_name: Optional[str] = None - manufacturer_address: Optional[str] = None - manufacturer_website: Annotated[str | None, StringConstraints(to_lower=True)] = ( - Field( - None, - description="Return the manufacturer's website, ensuring 'www.' prefix is added.", - ) - ) - manufacturer_phone_number: Optional[str] = Field( - None, description="The manufacturer's primary phone number. Return only one." - ) + organizations: List[Organization] = [] fertiliser_name: Optional[str] = None registration_number: Optional[str] = None lot_number: Optional[str] = None @@ -146,6 +160,7 @@ def validate_npk(cls, v): "instructions_fr", "ingredients_en", "ingredients_fr", + "organizations", "weight", mode="before", ) @@ -153,7 +168,7 @@ def replace_none_with_empty_list(cls, v): if v is None: v = [] return v - + @field_validator("registration_number", mode="before") def check_registration_number_format(cls, v): if v is not None: @@ -161,20 +176,3 @@ def check_registration_number_format(cls, v): if re.match(pattern, v): return v return None - - @field_validator("company_phone_number", "manufacturer_phone_number", mode="before") - def check_phone_number_format(cls, v): - if v is None: - return - - try: - phone_number = phonenumbers.parse(v, "CA") - if not phonenumbers.is_valid_number(phone_number): - return - phone_number = phonenumbers.format_number( - phone_number, phonenumbers.PhoneNumberFormat.E164 - ) - return phone_number - - except phonenumbers.phonenumberutil.NumberParseException: - return diff --git a/pyproject.toml b/pyproject.toml index e182da5..e70dcfc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "fertiscan_pipeline" -version = "0.0.7" +version = "0.0.8" description = "A pipeline for the FertiScan project" authors = [ { name = "Albert Bryan Ndjeutcha", email = "albert.ndjeutcha@inspection.gc.ca" } diff --git a/tests/test_inspection.py b/tests/test_inspection.py index 11dce0a..2d3e7da 100644 --- a/tests/test_inspection.py +++ b/tests/test_inspection.py @@ -5,6 +5,7 @@ GuaranteedAnalysis, NutrientValue, Specification, + Organization, Value, ) @@ -164,7 +165,34 @@ def test_invalid_specification(self): f"Expected None for solubility with input {data['solubility']}", ) +class TestOrganizations(unittest.TestCase): + def setUp(self): + self.valid_organization_data = [ + {"name": "Test Company", "address": "123 Test St", "website": "https://test.com", "phone_number": "800 640 9605"}, + {"name": "Test Manufacturer", "address": "456 Test Blvd", "website": "https://manufacturer.com", "phone_number": "800-765-4321"}, + ] + + self.invalid_organization_data = [ + {"name": "Test Company", "address": "123 Test St", "website": "https://test.com", "phone_number": "123-456-7890"}, + {"name": "Test Manufacturer", "address": "456 Test Blvd", "website": "https://manufacturer.com", "phone_number": "098-765-4321"}, + ] + def test_valid_organization(self): + for data in self.valid_organization_data: + with self.subTest(data=data): + organization = Organization(**data) + self.assertIsNotNone(organization.name, data["name"]) + self.assertIsNotNone(organization.address, data["address"]) + self.assertIsNotNone(organization.website, data["website"]) + self.assertIsNotNone(organization.phone_number, data["phone_number"]) + + def test_invalid_organization(self): + for data in self.invalid_organization_data: + with self.subTest(data=data): + organization = Organization(**data) + self.assertIsNone( + organization.phone_number, f"Expected None for phone_number with input {data['phone_number']}" + ) class TestNPKValidation(unittest.TestCase): def setUp(self): self.valid_npk_data = ["10-5-20", "0-0-0", "100-200-300", "10.2-5.5-20.3"] @@ -237,14 +265,6 @@ def test_is_minimal_in_none(self): class TestFertilizerInspectionListFields(unittest.TestCase): def setUp(self): self.default_data = { - "company_name": "Test Company", - "company_address": "123 Test St", - "company_website": "https://test.com", - "company_phone_number": "123-456-7890", - "manufacturer_name": "Test Manufacturer", - "manufacturer_address": "456 Test Blvd", - "manufacturer_website": "https://manufacturer.com", - "manufacturer_phone_number": "098-765-4321", "fertiliser_name": "Test Fertilizer", "registration_number": "ABC123", "lot_number": "LOT987", @@ -301,40 +321,40 @@ def test_registration_number_mixed_format(self): self.assertIsNone(instance.registration_number) -class TestFertilizerInspectionPhoneNumberFormat(unittest.TestCase): +class TestOrganizationPhoneNumberFormat(unittest.TestCase): def test_valid_phone_number_with_country_code(self): - instance = FertilizerInspection(company_phone_number="+1 800 640 9605") - self.assertEqual(instance.company_phone_number, "+18006409605") + instance = Organization(phone_number="+1 800 640 9605") + self.assertEqual(instance.phone_number, "+18006409605") def test_valid_phone_number_without_country_code(self): - instance = FertilizerInspection(company_phone_number="800 640 9605") - self.assertEqual(instance.company_phone_number, "+18006409605") + instance = Organization(phone_number="800 640 9605") + self.assertEqual(instance.phone_number, "+18006409605") def test_phone_number_with_parentheses(self): - instance = FertilizerInspection(company_phone_number="(757) 321-4567") - self.assertEqual(instance.company_phone_number, "+17573214567") + instance = Organization(phone_number="(757) 321-4567") + self.assertEqual(instance.phone_number, "+17573214567") def test_phone_number_with_extra_characters(self): - instance = FertilizerInspection(company_phone_number="+1 800 321-9605 FAX") - self.assertIsNone(instance.company_phone_number) + instance = Organization(phone_number="+1 800 321-9605 FAX") + self.assertIsNone(instance.phone_number) def test_phone_number_with_multiple_numbers(self): - instance = FertilizerInspection( - company_phone_number="(757) 123-4567 (800) 456-7890, 1234567890" + instance = Organization( + phone_number="(757) 123-4567 (800) 456-7890, 1234567890" ) - self.assertIsNone(instance.company_phone_number) + self.assertIsNone(instance.phone_number) def test_phone_number_from_other_country(self): - instance = FertilizerInspection(manufacturer_phone_number="+44 20 7946 0958") - self.assertEqual(instance.manufacturer_phone_number, "+442079460958") + instance = Organization(phone_number="+44 20 7946 0958") + self.assertEqual(instance.phone_number, "+442079460958") def test_invalid_phone_number(self): - instance = FertilizerInspection(company_phone_number="invalid phone") - self.assertIsNone(instance.company_phone_number) + instance = Organization(phone_number="invalid phone") + self.assertIsNone(instance.phone_number) def test_phone_number_with_invalid_format(self): - instance = FertilizerInspection(company_phone_number="12345") - self.assertIsNone(instance.company_phone_number) + instance = Organization(phone_number="12345") + self.assertIsNone(instance.phone_number) if __name__ == "__main__": diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 866a7aa..8846ac5 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -66,9 +66,7 @@ def test_analyze(self): # Perform assertions self.assertIsInstance(inspection, FertilizerInspection, inspection) self.assertIn(Value(value="25", unit="kg"), inspection.weight, inspection) - manufacturer_or_company = ( - inspection.manufacturer_name or inspection.company_name - ) + manufacturer_or_company = inspection.organizations[0].name self.assertIsNotNone(manufacturer_or_company, inspection) self.assertGreater( levenshtein_similarity( @@ -151,8 +149,8 @@ def test_label_008_phone_number_inspection(self): inspection = analyze(label_storage, self.ocr, self.gpt) # Assertions - self.assertEqual(inspection.company_phone_number, "+18003279462") - self.assertIsNone(inspection.manufacturer_phone_number) + self.assertIn("+18003279462", str(inspection.organizations), inspection.organizations) + # self.assertIsNone(inspection.manufacturer_phone_number) def test_label_024_phone_number_inspection(self): label_folder = "test_data/labels/label_024" @@ -166,8 +164,8 @@ def test_label_024_phone_number_inspection(self): inspection = analyze(label_storage, self.ocr, self.gpt) # Assertions - self.assertEqual(inspection.company_phone_number, "+14506556147") - self.assertIsNone(inspection.manufacturer_phone_number) + self.assertIn("+14506556147", str(inspection.organizations)) + # self.assertIsNone(inspection.manufacturer_phone_number) def test_label_001_website_inspection(self): label_folder = "test_data/labels/label_001" @@ -181,7 +179,7 @@ def test_label_001_website_inspection(self): inspection = analyze(label_storage, self.ocr, self.gpt) # Assertions for website fields - self.assertEqual(inspection.company_website, "www.soil-aid.com") + self.assertIn("www.soil-aid.com", str(inspection.organizations)) def test_label_006_website_inspection(self): label_folder = "test_data/labels/label_006" @@ -195,7 +193,7 @@ def test_label_006_website_inspection(self): inspection = analyze(label_storage, self.ocr, self.gpt) # Assertions for website fields - self.assertEqual(inspection.company_website, "www.activeagriscience.com") + self.assertIn("www.activeagriscience.com", str(inspection.organizations)) def test_label_034_website_inspection(self): label_folder = "test_data/labels/label_034" @@ -209,9 +207,7 @@ def test_label_034_website_inspection(self): inspection = analyze(label_storage, self.ocr, self.gpt) # Assertions for website fields - self.assertEqual( - inspection.company_website, "www.advancednutrients.com/growersupport" - ) + self.assertIn("www.advancednutrients.com/growersupport", str(inspection.organizations)) if __name__ == "__main__":