From cae598c35d91f028da11b6b590ffa36e76fa0529 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Thu, 24 Oct 2024 13:21:46 -0400 Subject: [PATCH 01/13] Add support for empower API --- health_rec/api/config.py | 9 +- health_rec/api/data.py | 339 +++++++++--------- health_rec/api/routes.py | 1 + health_rec/load_data.py | 7 +- health_rec/services/rag.py | 11 +- health_rec/services/ranking.py | 23 +- ...{download_data.py => download_211_data.py} | 86 ++++- scripts/download_empower_data.py | 234 ++++++++++++ ui/src/app/components/map.tsx | 126 +++---- ui/src/app/recommendation/page.tsx | 29 +- 10 files changed, 593 insertions(+), 272 deletions(-) rename scripts/{download_data.py => download_211_data.py} (54%) create mode 100644 scripts/download_empower_data.py diff --git a/health_rec/api/config.py b/health_rec/api/config.py index fd04bd4..2d18abf 100644 --- a/health_rec/api/config.py +++ b/health_rec/api/config.py @@ -34,8 +34,6 @@ class Config: Attributes ---------- - TEST_MODE : bool - Flag to indicate if the application is running in test mode. OPENAI_API_KEY : str API key for OpenAI services. OPENAI_MODEL : str @@ -54,14 +52,13 @@ class Config: The weight of the relevancy score in the ranking strategy. """ - TEST_MODE: bool = getenv("TEST_MODE", "False").lower() == "true" OPENAI_API_KEY: str = getenv("OPENAI_API_KEY", "") OPENAI_MODEL: str = getenv("OPENAI_MODEL", "gpt-4o-mini") - OPENAI_EMBEDDING: Optional[str] = ( - getenv("OPENAI_EMBEDDING", "text-embedding-3-small") if not TEST_MODE else None + OPENAI_EMBEDDING: Optional[str] = getenv( + "OPENAI_EMBEDDING", "text-embedding-3-small" ) COHERE_API_KEY: str = getenv("COHERE_API_KEY", "") CHROMA_HOST: str = getenv("CHROMA_HOST", "chromadb-dev") CHROMA_PORT: int = 8000 - COLLECTION_NAME: str = getenv("COLLECTION_NAME", "test") + COLLECTION_NAME: str = getenv("COLLECTION_NAME", "empower") RELEVANCY_WEIGHT: float = float(getenv("RELEVANCY_WEIGHT", "0.5")) diff --git a/health_rec/api/data.py b/health_rec/api/data.py index e310f1d..fc44705 100644 --- a/health_rec/api/data.py +++ b/health_rec/api/data.py @@ -1,192 +1,189 @@ """Data models.""" +from datetime import datetime +from enum import Enum from typing import Any, Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, validator -class PhoneNumber(BaseModel): - """ - Represents a phone number with various attributes. +class ServiceType(str, Enum): + """Standardized service types across different APIs.""" - Attributes - ---------- - phone : str - The phone number. - name : Optional[str] - The name of the phone number. - description : Optional[str] - The description of the phone number. - type : Optional[str] - The type of the phone number. - """ + EMERGENCY_ROOM = "emergency_room" + URGENT_CARE = "urgent_care" + WALK_IN_CLINIC = "walk_in_clinic" + PHARMACY = "pharmacy" + MEDICAL_LAB = "medical_lab" + FAMILY_DOCTOR = "family_doctor" + COMMUNITY_SERVICE = "community_service" + UNKNOWN = "unknown" + + +class AccessibilityLevel(str, Enum): + """Wheelchair accessibility levels.""" + + FULL = "full" + PARTIAL = "partial" + NONE = "none" + UNKNOWN = "unknown" + + +class DayOfWeek(str, Enum): + """Days of the week.""" + + SUNDAY = "sunday" + MONDAY = "monday" + TUESDAY = "tuesday" + WEDNESDAY = "wednesday" + THURSDAY = "thursday" + FRIDAY = "friday" + SATURDAY = "saturday" + + +class OperatingHours(BaseModel): + """Operating hours for a specific day.""" + + day: DayOfWeek + is_open: bool + is_24hour: bool = False + open_time: Optional[str] = None + close_time: Optional[str] = None + + +class HoursException(BaseModel): + """Special hours or holiday schedules.""" + + name: Optional[str] = None + start_date: datetime + end_date: datetime + is_open: bool + is_24hour: bool = False + open_time: Optional[str] = None + close_time: Optional[str] = None + + +class Address(BaseModel): + """Physical address information.""" - phone: Optional[str] = Field(default=None) - name: Optional[str] = Field(default=None) - description: Optional[str] = Field(default=None) - type: Optional[str] = Field(default=None) + street1: Optional[str] = None + street2: Optional[str] = None + city: Optional[str] = None + province: Optional[str] = None + postal_code: Optional[str] = None + country: Optional[str] = None + attention_name: Optional[str] = None + + +class PhoneNumber(BaseModel): + """Phone number with additional metadata.""" + + number: str + type: Optional[str] = None + name: Optional[str] = None + description: Optional[str] = None + extension: Optional[str] = None class Service(BaseModel): """ - Represents a service with various attributes. + Standardized service model that can accommodate data from multiple APIs. - Attributes - ---------- - id : int - The unique identifier of the service. - parent_id : Optional[int] - The ID of the parent service. - public_name : str - The public name of the service. - score : Optional[int] - The score of the service. - service_area : Optional[List[str]] - The areas where the service is available. - distance : Optional[str] - The distance to the service. - description : Optional[str] - The description of the service. - latitude : Optional[float] - The latitude coordinate of the service location. - longitude : Optional[float] - The longitude coordinate of the service location. - physical_address_street1 : Optional[str] - The first line of the physical address. - physical_address_street2 : Optional[str] - The second line of the physical address. - physical_address_city : Optional[str] - The city of the physical address. - physical_address_province : Optional[str] - The province of the physical address. - physical_address_postal_code : Optional[str] - The postal code of the physical address. - physical_address_country : Optional[str] - The country of the physical address. - mailing_attention_name : Optional[str] - The attention name for mailing. - mailing_address_street1 : Optional[str] - The first line of the mailing address. - mailing_address_street2 : Optional[str] - The second line of the mailing address. - mailing_address_city : Optional[str] - The city of the mailing address. - mailing_address_province : Optional[str] - The province of the mailing address. - mailing_address_postal_code : Optional[str] - The postal code of the mailing address. - mailing_address_country : Optional[str] - The country of the mailing address. - phone_numbers : List[PhoneNumber] - The phone numbers associated with the service. - website : Optional[str] - The website of the service. - email : Optional[str] - The email address of the service. - hours : Optional[str] - The hours of operation. - hours2 : Optional[str] - Additional hours of operation. - min_age : Optional[str] - The minimum age for the service. - max_age : Optional[str] - The maximum age for the service. - updated_on : Optional[str] - The date and time the service was last updated. - taxonomy_term : Optional[str] - The taxonomy terms associated with the service. - taxonomy_terms : Optional[str] - Additional taxonomy terms. - taxonomy_codes : Optional[str] - The taxonomy codes associated with the service. - eligibility : Optional[str] - The eligibility criteria for the service. - fee_structure_source : Optional[str] - The source of the fee structure. - official_name : Optional[str] - The official name of the service. - physical_city : Optional[str] - The physical city of the service. - unique_id_prior_system : Optional[str] - The unique ID from a prior system. - record_owner : Optional[str] - The owner of the record. + This model includes fields that might be present in various healthcare and + community service APIs, with optional fields to handle varying data availability. """ + # Core identification id: int - parent_id: Optional[int] = Field(default=None, alias="ParentId") - public_name: str = Field(alias="PublicName") - score: Optional[int] = Field(default=None, alias="Score") - service_area: Optional[List[str]] = Field(default=None, alias="ServiceArea") - distance: Optional[str] = Field(default=None, alias="Distance") - description: Optional[str] = Field(default=None, alias="Description") - latitude: Optional[float] = Field(default=None, alias="Latitude") - longitude: Optional[float] = Field(default=None, alias="Longitude") - physical_address_street1: Optional[str] = Field( - default=None, alias="PhysicalAddressStreet1" - ) - physical_address_street2: Optional[str] = Field( - default=None, alias="PhysicalAddressStreet2" - ) - physical_address_city: Optional[str] = Field( - default=None, alias="PhysicalAddressCity" - ) - physical_address_province: Optional[str] = Field( - default=None, alias="PhysicalAddressProvince" - ) - physical_address_postal_code: Optional[str] = Field( - default=None, alias="PhysicalAddressPostalCode" - ) - physical_address_country: Optional[str] = Field( - default=None, alias="PhysicalAddressCountry" - ) - mailing_attention_name: Optional[str] = Field( - default=None, alias="MailingAttentionName" - ) - mailing_address_street1: Optional[str] = Field( - default=None, alias="MailingAddressStreet1" - ) - mailing_address_street2: Optional[str] = Field( - default=None, alias="MailingAddressStreet2" - ) - mailing_address_city: Optional[str] = Field( - default=None, alias="MailingAddressCity" - ) - mailing_address_province: Optional[str] = Field( - default=None, alias="MailingAddressProvince" - ) - mailing_address_postal_code: Optional[str] = Field( - default=None, alias="MailingAddressPostalCode" - ) - mailing_address_country: Optional[str] = Field( - default=None, alias="MailingAddressCountry" - ) - phone_numbers: List[PhoneNumber] = Field(default_factory=list, alias="PhoneNumbers") - website: Optional[str] = Field(default=None, alias="Website") - email: Optional[str] = Field(default=None, alias="Email") - hours: Optional[str] = Field(default=None, alias="Hours") - hours2: Optional[str] = Field(default=None, alias="Hours2") - min_age: Optional[str] = Field(default=None, alias="MinAge") - max_age: Optional[str] = Field(default=None, alias="MaxAge") - updated_on: Optional[str] = Field(default=None, alias="UpdatedOn") - taxonomy_term: Optional[str] = Field(default=None, alias="TaxonomyTerm") - taxonomy_terms: Optional[str] = Field(default=None, alias="TaxonomyTerms") - taxonomy_codes: Optional[str] = Field(default=None, alias="TaxonomyCodes") - eligibility: Optional[str] = Field(default=None, alias="Eligibility") - fee_structure_source: Optional[str] = Field( - default=None, alias="FeeStructureSource" - ) - official_name: Optional[str] = Field(default=None, alias="OfficialName") - physical_city: Optional[str] = Field(default=None, alias="PhysicalCity") - unique_id_prior_system: Optional[str] = Field( - default=None, alias="UniqueIDPriorSystem" - ) - record_owner: Optional[str] = Field(default=None, alias="RecordOwner") + name: str + service_type: ServiceType = ServiceType.UNKNOWN + source_id: Optional[str] = None # Original ID from source system + official_name: Optional[str] = None + + # Location + latitude: float + longitude: float + distance: Optional[float] = None # Distance from search point in km + physical_address: Optional[Address] = None + mailing_address: Optional[Address] = None + + # Contact information + phone_numbers: List[PhoneNumber] = Field(default_factory=list) + fax: Optional[str] = None + email: Optional[str] = None + website: Optional[str] = None + social_media: Dict[str, str] = Field(default_factory=dict) + + # Service details + description: Optional[str] = None + services: List[str] = Field(default_factory=list) + languages: List[str] = Field(default_factory=list) + taxonomy_terms: List[str] = Field(default_factory=list) + taxonomy_codes: List[str] = Field(default_factory=list) + + # Operating information + status: Optional[str] = None # current operating status (open/closed) + regular_hours: List[OperatingHours] = Field(default_factory=list) + hours_exceptions: List[HoursException] = Field(default_factory=list) + timezone_offset: Optional[str] = None + + # Accessibility and special features + wheelchair_accessible: AccessibilityLevel = AccessibilityLevel.UNKNOWN + parking_type: Optional[str] = None + accepts_new_patients: Optional[bool] = None + wait_time: Optional[int] = None # in minutes + + # Booking capabilities + has_online_booking: bool = False + has_queue_system: bool = False + accepts_walk_ins: bool = False + can_book: bool = False + + # Eligibility and fees + eligibility_criteria: Optional[str] = None + fee_structure: Optional[str] = None + min_age: Optional[int] = None + max_age: Optional[int] = None + + # Metadata + last_updated: Optional[datetime] = None + record_owner: Optional[str] = None + data_source: Optional[str] = None # e.g., "211", "Empower" class Config: - """Override Pydantic configuration.""" + """Pydantic configuration.""" + + use_enum_values = True + + @validator("wheelchair_accessible", pre=True) + def normalize_wheelchair_access(cls, v: str) -> AccessibilityLevel: # noqa: N805 + """Normalize wheelchair accessibility values from different sources.""" + if isinstance(v, str): + mapping = { + "t": AccessibilityLevel.FULL, + "true": AccessibilityLevel.FULL, + "p": AccessibilityLevel.PARTIAL, + "partial": AccessibilityLevel.PARTIAL, + "f": AccessibilityLevel.NONE, + "false": AccessibilityLevel.NONE, + } + return mapping.get(v.lower(), AccessibilityLevel.UNKNOWN) + return AccessibilityLevel.UNKNOWN - populate_by_name = True + @validator("service_type", pre=True) + def normalize_service_type(cls, v: str) -> ServiceType: # noqa: N805 + """Normalize service type values from different sources.""" + if isinstance(v, str): + mapping = { + "Retail Pharmacy": ServiceType.PHARMACY, + "Emergency Rooms": ServiceType.EMERGENCY_ROOM, + "Urgent Care Centre": ServiceType.URGENT_CARE, + "Primary Care Walk-In Clinic": ServiceType.WALK_IN_CLINIC, + "Family Doctor's Office": ServiceType.FAMILY_DOCTOR, + "Medical Labs & Diagnostic Imaging Centres": ServiceType.MEDICAL_LAB, + } + return mapping.get(v, ServiceType.UNKNOWN) + return ServiceType.UNKNOWN class ServiceDocument(BaseModel): @@ -227,7 +224,7 @@ class RecommendationResponse(BaseModel): Whether the request signifies an emergency. is_out_of_scope : bool Whether the request is out of scope. - services : Optional[List[Service]] + services : Optional[List[BaseService]] A list of services ranked by location and relevancy. no_services_found : bool Whether no services were found. diff --git a/health_rec/api/routes.py b/health_rec/api/routes.py index da41210..bc9c5bd 100644 --- a/health_rec/api/routes.py +++ b/health_rec/api/routes.py @@ -47,6 +47,7 @@ async def get_additional_questions( A dictionary with the generated questions. """ try: + logger.info(f"Received query for additional questions: {query}") questions = refine_service.generate_questions(query, recommendation) return {"questions": questions} except Exception as e: diff --git a/health_rec/load_data.py b/health_rec/load_data.py index 712d5ec..d9dedef 100644 --- a/health_rec/load_data.py +++ b/health_rec/load_data.py @@ -66,7 +66,7 @@ def load_json_data(file_path: str) -> List[Dict[str, Any]]: try: with open(file_path, "r") as file: data = json.load(file) - return list(data["Records"]) + return list(data) except FileNotFoundError: logger.error(f"File not found: {file_path}") raise @@ -137,7 +137,10 @@ def get_or_create_collection(host: str, port: int, name: str) -> chromadb.Collec logger.info(f"Retrieved existing collection: {name}") except ValueError: logger.info(f"Creating new collection: {name}") - collection = chroma_client.create_collection(name=name) + collection = chroma_client.create_collection( + name=name, + metadata={"hnsw:space": "cosine"}, + ) return collection diff --git a/health_rec/services/rag.py b/health_rec/services/rag.py index 6fe3eb5..5a80c3e 100644 --- a/health_rec/services/rag.py +++ b/health_rec/services/rag.py @@ -108,9 +108,14 @@ def _retrieve_and_rank_services( service_documents, user_location ) if query.radius: + logger.info(f"Filtering services by radius: {query.radius}") service_documents = [ doc for doc in service_documents if doc.distance <= query.radius ] + for service in service_documents: + logger.info( + f"Service name: {service.metadata['PublicName']}, distance: {service.distance}" + ) return list(service_documents) def _prepare_context( @@ -147,16 +152,16 @@ def _generate_response(self, query: str, context: str) -> RecommendationResponse An object containing the generated recommendation and relevant services. """ generation_template = """ - You are an expert with deep knowledge of health and community services in the Greater Toronto Area (GTA). You will be providing a recommendation to an individual who is seeking help. The individual is seeking help with the following query: + You are an expert with deep knowledge of health and community services. You will be providing a recommendation to an individual who is seeking help. The individual is seeking help with the following query: {discover} If you determine that the individual has an emergency need, respond with only the word "EMERGENCY" (in all caps). - If you determine that the individual's query is not for a health or community service in the GTA, respond with an appropriate out of scope message in relation to the query. Structure your response as follows: + If you determine that the individual's query is not for a health or community service, respond with an appropriate out of scope message in relation to the query. Structure your response as follows: Response: A brief explanation of why the query is out of scope. - Reasoning: Provide more detailed reasoning for why this query cannot be answered within the context of GTA health and community services. + Reasoning: Provide more detailed reasoning for why this query cannot be answered within the context of health and community services. If no services are found within the context, respond with the word "NO_SERVICES_FOUND" (in all caps). If the individual does not need emergency help and the query is within scope, use only the following service context enclosed by the tag to provide a service recommendation. diff --git a/health_rec/services/ranking.py b/health_rec/services/ranking.py index 275c417..8d093ab 100644 --- a/health_rec/services/ranking.py +++ b/health_rec/services/ranking.py @@ -63,14 +63,6 @@ def _rank_by_relevancy_and_distance( float(service.metadata["Longitude"]), ) service.distance = _calculate_distance(service_location, user_location) - - # TODO: Remove the following lines later - scores = { - service.metadata["PublicName"]: self._calculate_ranking_score(service) - for service in services - } - logger.info(f"Services and their scores: \n {scores}") - services.sort( key=lambda service: self._calculate_ranking_score(service), reverse=True ) @@ -90,9 +82,16 @@ def _calculate_ranking_score(self, service: ServiceDocument) -> float: float The ranking score for the service based on the specified strategy. """ + # Convert cosine distance to cosine similarity + cosine_similarity = 1 - service.relevancy_score + + # Normalize distance to a 0-1 range (assuming max distance of 100 km) + normalized_distance = min(service.distance / 100, 1) + + # Calculate the ranking score return float( - self.relevancy_weight * service.relevancy_score - + self.distance_weight * (1 / service.distance) + self.relevancy_weight * cosine_similarity + + self.distance_weight * (1 - normalized_distance) ) @@ -117,8 +116,8 @@ def _calculate_distance( lat1, lon1 = location1 lat2, lon2 = location2 - # Radius of the Earth in kilometers - radius: float = 6371.0 + # Radius of the Earth in kilometers (using double precision) + radius: float = 6371.0088 # Convert latitude and longitude from degrees to radians lat1_rad, lon1_rad = radians(lat1), radians(lon1) diff --git a/scripts/download_data.py b/scripts/download_211_data.py similarity index 54% rename from scripts/download_data.py rename to scripts/download_211_data.py index d066d3f..4a6fa16 100644 --- a/scripts/download_data.py +++ b/scripts/download_211_data.py @@ -7,7 +7,88 @@ import math from pathlib import Path import argparse -from typing import Dict, Any +from typing import Dict, Any, Optional + +from api.data import ServiceType + + +def map_211_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: + """Map 211 API data to standardized Service format.""" + # Map phone numbers + phone_numbers = [] + if data.get("PhoneNumbers"): + for phone in data["PhoneNumbers"]: + phone_numbers.append( + { + "number": phone.get("Number", ""), + "type": phone.get("Type"), + "name": phone.get("Name"), + "description": phone.get("Description"), + "extension": phone.get("Extension"), + } + ) + + # Map addresses + physical_address = { + "street1": data.get("PhysicalAddressStreet1"), + "street2": data.get("PhysicalAddressStreet2"), + "city": data.get("PhysicalAddressCity"), + "province": data.get("PhysicalAddressProvince"), + "postal_code": data.get("PhysicalAddressPostalCode"), + "country": data.get("PhysicalAddressCountry"), + } + + mailing_address = { + "street1": data.get("MailingAddressStreet1"), + "street2": data.get("MailingAddressStreet2"), + "city": data.get("MailingAddressCity"), + "province": data.get("MailingAddressProvince"), + "postal_code": data.get("MailingAddressPostalCode"), + "country": data.get("MailingAddressCountry"), + "attention_name": data.get("MailingAttentionName"), + } + + # Handle age parsing + def parse_age(age_str: Optional[str]) -> Optional[int]: + if not age_str: + return None + try: + return int(float(age_str)) + except (ValueError, TypeError): + return None + + return { + "id": data["id"], + "name": data["PublicName"], + "service_type": ServiceType.UNKNOWN.value, + "source_id": data.get("UniqueIDPriorSystem"), + "official_name": data.get("OfficialName"), + "latitude": float(data["Latitude"]) if data.get("Latitude") else 0.0, + "longitude": float(data["Longitude"]) if data.get("Longitude") else 0.0, + "physical_address": physical_address, + "mailing_address": mailing_address, + "phone_numbers": phone_numbers, + "email": data.get("Email"), + "website": data.get("Website"), + "description": data.get("Description"), + "taxonomy_terms": [ + term.strip() + for term in data.get("TaxonomyTerms", "").split(";") + if term.strip() + ], + "taxonomy_codes": [ + code.strip() + for code in data.get("TaxonomyCodes", "").split(";") + if code.strip() + ], + "eligibility_criteria": data.get("Eligibility"), + "fee_structure": data.get("FeeStructureSource"), + "min_age": parse_age(data.get("MinAge", "")), + "max_age": parse_age(data.get("MaxAge", "")), + "last_updated": data.get("UpdatedOn"), + "record_owner": data.get("RecordOwner"), + "data_source": "211", + } def create_payload( @@ -70,8 +151,9 @@ def fetch_data( def save_to_file(data: Dict[str, Any], file_name: str) -> None: """Save the data to a JSON file.""" + mapped_services = [map_211_data_to_service(service) for service in data["Records"]] with open(file_name, "w") as f: - json.dump(data, f, indent=2) + json.dump(mapped_services, f, indent=2) def main( diff --git a/scripts/download_empower_data.py b/scripts/download_empower_data.py new file mode 100644 index 0000000..fcc426e --- /dev/null +++ b/scripts/download_empower_data.py @@ -0,0 +1,234 @@ +"""Download data from the Empower API.""" + +import requests +from dotenv import load_dotenv +import argparse +import json +import os +from typing import List, Dict, Any +import time + + +def map_empower_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: + """Map Empower API data to standardized Service format.""" + # Map operating hours + regular_hours = [] + day_mapping = { + 0: "sunday", + 1: "monday", + 2: "tuesday", + 3: "wednesday", + 4: "thursday", + 5: "friday", + 6: "saturday", + } + + for hour in data.get("hours", []): + regular_hours.append( + { + "day": day_mapping[hour["day"]], + "is_open": hour["is_open"], + "is_24hour": hour["is_24hour"], + "open_time": hour["opentime"], + "close_time": hour["closetime"], + } + ) + + # Map address + physical_address = { + "street1": data.get("address"), + "city": data.get("city"), + "province": data.get("province"), + "postal_code": data.get("postal_code"), + "country": "Canada", + } + + return { + "id": data["id"], + "name": data["name"], + "service_type": data["type"], + "latitude": float(data["lat"]), + "longitude": float(data["long"]), + "physical_address": physical_address, + "phone_numbers": [{"number": data["phone"]}] if data.get("phone") else [], + "fax": data.get("fax"), + "email": data.get("email"), + "website": data.get("website"), + "description": data.get("description"), + "services": data.get("services", []), + "languages": data.get("languages", []), + "status": data.get("status"), + "regular_hours": regular_hours, + "hours_exceptions": data.get("hours_exceptions", []), + "timezone_offset": data.get("tzoffset"), + "wheelchair_accessible": data.get("wheelchair", "unknown"), + "parking_type": data.get("parking"), + "accepts_new_patients": data.get("new_patients"), + "wait_time": data.get("wait_time"), + "has_online_booking": data.get("has_ebooking", False), + "can_book": data.get("can_book", False), + "data_source": "Empower", + } + + +class EmpowerDataFetcher: + def __init__(self, api_key: str, base_url: str): + """Initialize the EmpowerDataFetcher.""" + self.api_key = api_key + self.base_url = base_url + self.headers = { + "Accept": "application/json", + "Content-Type": "application/x-www-form-urlencoded", + "User-Agent": "Mozilla/5.0", + "platform": "web", + "lang_prefs": "en", + } + # Provider type mapping as per API documentation[1] + self.provider_types = { + 1: "Emergency Rooms", + 2: "Urgent Care Centre", + 3: "Primary Care Walk-In Clinic", + 4: "Retail Pharmacy", + 5: "Medical Labs & Diagnostic Imaging Centres", + 11: "Family Doctor's Office", + } + + def map_provider_type(self, type_id: int) -> str: + """Map provider type ID to human-readable string.""" + return self.provider_types.get(type_id, f"Unknown Type ({type_id})") + + def fetch_providers_list( + self, lat: float, long: float, radius: float, page: int + ) -> Dict[str, Any]: + """Fetch list of providers for a given page.""" + url = f"{self.base_url}/providers" + params: Dict[str, Any] = { + "api_key": self.api_key, + "lat": lat, + "long": long, + "radius": radius, + "page": page, + } + + response = requests.get(url, headers=self.headers, params=params) + response.raise_for_status() + data = response.json() + + # Map provider types in the response + for provider in data.get("providers", []): + if "type" in provider: + provider["type"] = self.map_provider_type(provider["type"]) + + return data # type: ignore + + def fetch_provider_details(self, provider_id: int) -> Dict[str, Any]: + """Fetch detailed information for a specific provider.""" + url = f"{self.base_url}/providers/{provider_id}" + params: Dict[str, str] = {"api_key": self.api_key} + + response = requests.get(url, headers=self.headers, params=params) + response.raise_for_status() + data = response.json() + + # Map provider type in the response + if "type" in data: + data["type"] = self.map_provider_type(data["type"]) + + return data # type: ignore + + def collect_provider_ids(self, lat: float, long: float, radius: float) -> List[int]: + """Collect all provider IDs from paginated results.""" + provider_ids = [] + page = 1 + + # Fetch first page to get total pages + initial_response = self.fetch_providers_list(lat, long, radius, page) + total_pages = initial_response["pages"]["total_pages"] + + print(f"Total pages to process: {total_pages}") + + # Process all pages + while page <= total_pages: + print(f"Fetching page {page}/{total_pages}") + response = self.fetch_providers_list(lat, long, radius, page) + + # Extract provider IDs from current page + for provider in response["providers"]: + provider_ids.append(provider["id"]) + + page += 1 + time.sleep(0.5) # Rate limiting + + return provider_ids + + def fetch_all_provider_details( + self, provider_ids: List[int], output_dir: str + ) -> None: + """Fetch and save mapped provider details.""" + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, "data-00.json") + + mapped_providers = [] + total_providers = len(provider_ids) + + for idx, provider_id in enumerate(provider_ids, 1): + print(f"Fetching provider {idx}/{total_providers} (ID: {provider_id})") + try: + provider_details = self.fetch_provider_details(provider_id) + mapped_provider = map_empower_data_to_service(provider_details) + mapped_providers.append(mapped_provider) + time.sleep(0.25) + except requests.exceptions.RequestException as e: + print(f"Error fetching provider {provider_id}: {e}") + + with open(output_file, "w") as f: + json.dump(mapped_providers, f, indent=2) + + print(f"Saved all provider details to {output_file}") + + +def main() -> None: + load_dotenv("./.env.development") + + parser = argparse.ArgumentParser(description="Download data from the Empower API.") + parser.add_argument( + "--api-key", + default=os.getenv("EMPOWER_API_KEY"), + help="API key for Empower API", + ) + parser.add_argument( + "--base-url", + default="https://empower.ca/api/v4", + help="Base URL for Empower API", + ) + parser.add_argument( + "--data-dir", default="./data/empower", help="Directory to save data" + ) + + args = parser.parse_args() + + if not args.api_key: + raise ValueError("EMPOWER_API_KEY is not set") + # Initialize fetcher + fetcher = EmpowerDataFetcher(args.api_key, args.base_url) + + # Parameters for the search + lat = 44.051507 + long = -79.45811 + radius = 100 # km + output_dir = args.data_dir + + try: + # Collect all provider IDs + provider_ids = fetcher.collect_provider_ids(lat, long, radius) + print(f"Collected {len(provider_ids)} provider IDs") + + # Fetch and save detailed information for all providers + fetcher.fetch_all_provider_details(provider_ids, output_dir) + + except requests.exceptions.RequestException as e: + print(f"Error occurred: {e}") + + +if __name__ == "__main__": + main() diff --git a/ui/src/app/components/map.tsx b/ui/src/app/components/map.tsx index 7f2421f..d50e74d 100644 --- a/ui/src/app/components/map.tsx +++ b/ui/src/app/components/map.tsx @@ -1,5 +1,3 @@ -'use client'; - import React, { useState, useMemo, useEffect, useRef, useCallback } from 'react'; import Map, { Marker, @@ -10,7 +8,6 @@ import Map, { Source, Layer } from 'react-map-gl'; -import mapboxgl from 'mapbox-gl'; import 'mapbox-gl/dist/mapbox-gl.css'; import { Box, Text, VStack, Flex, Badge, Modal, ModalOverlay, ModalContent, ModalHeader, ModalBody, ModalCloseButton, Icon } from '@chakra-ui/react'; import { FaBuilding } from 'react-icons/fa'; @@ -24,6 +21,8 @@ interface MapProps { height: string; width: string; initialViewState?: ViewState; + radius?: number; + center?: [number, number]; } interface ViewState { @@ -40,25 +39,41 @@ export const TORONTO_COORDINATES: ViewState = { }; export const computeViewState = (locations: Location[]): ViewState => { - if (locations.length === 0) return TORONTO_COORDINATES; - const bounds = new mapboxgl.LngLatBounds(); - locations.forEach(location => bounds.extend([location.longitude, location.latitude])); + if (!locations.length) { + return TORONTO_COORDINATES; + } + + const min_longitude = Math.min(...locations.map(loc => loc.longitude)); + const max_longitude = Math.max(...locations.map(loc => loc.longitude)); + const min_latitude = Math.min(...locations.map(loc => loc.latitude)); + const max_latitude = Math.max(...locations.map(loc => loc.latitude)); + + const center_longitude = (min_longitude + max_longitude) / 2; + const center_latitude = (min_latitude + max_latitude) / 2; + + const padding = { top: 40, bottom: 40, left: 40, right: 40 }; + return { - ...bounds.getCenter(), + longitude: center_longitude, + latitude: center_latitude, zoom: 11, - padding: { top: 40, bottom: 40, left: 40, right: 40 } + padding }; }; -const MapComponent: React.FC = ({ locations, onMarkerClick, height, width, initialViewState }) => { - const [viewState, setViewState] = useState(initialViewState || TORONTO_COORDINATES); +const MapComponent: React.FC = ({ locations, onMarkerClick, height, width, initialViewState, radius, center }) => { + const [viewState, setViewState] = useState(computeViewState(locations)); const [selectedLocation, setSelectedLocation] = useState(null); const [isFullScreenMapOpen, setIsFullScreenMapOpen] = useState(false); const mapRef = useRef(null); + useEffect(() => { + setViewState(computeViewState(locations)); + }, [locations]); + const handleMarkerClick = useCallback((location: Location) => { setSelectedLocation(location); - if (mapRef.current) { + if (mapRef.current && isFullScreenMapOpen) { mapRef.current.flyTo({ center: [location.longitude, location.latitude], zoom: 15, @@ -66,27 +81,13 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi }); } onMarkerClick && onMarkerClick(location); - }, [onMarkerClick]); - - const fitMapToLocations = useCallback(() => { - if (locations.length > 0 && mapRef.current) { - const bounds = new mapboxgl.LngLatBounds(); - locations.forEach(location => bounds.extend([location.longitude, location.latitude])); - mapRef.current.fitBounds(bounds, { - padding: { top: 50, bottom: 50, left: 50, right: 50 }, - maxZoom: 15, - duration: 0 // Set to 0 for initial load to prevent movement - }); - } - }, [locations]); - - useEffect(() => { - fitMapToLocations(); - }, [fitMapToLocations]); + }, [onMarkerClick, isFullScreenMapOpen]); const handleViewStateChange = useCallback((evt: ViewStateChangeEvent) => { - setViewState(evt.viewState); - }, []); + if (isFullScreenMapOpen) { + setViewState(evt.viewState); + } + }, [isFullScreenMapOpen]); const markers = useMemo(() => locations.map((location) => ( = ({ locations, onMarkerClick, height, wi )), [locations, handleMarkerClick]); + const radiusLayer = useMemo(() => { + if (!radius || !center) return null; + + return ( + + + + ); + }, [radius, center]); + const renderLocationsList = useCallback(() => ( {locations.map(location => ( @@ -140,11 +173,7 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi const handleFullScreenMapOpen = useCallback(() => { setIsFullScreenMapOpen(true); - // Delay the fitMapToLocations call to ensure the map is fully rendered - setTimeout(() => { - fitMapToLocations(); - }, 100); - }, [fitMapToLocations]); + }, []); if (!MAPBOX_TOKEN) return Error: Mapbox token is not set; @@ -160,31 +189,8 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi interactive={false} reuseMaps > - ({ - type: 'Feature', - geometry: { - type: 'Point', - coordinates: [location.longitude, location.latitude] - }, - properties: { - id: location.id, - name: location.name - } - })) - }}> - - + {markers} + {radiusLayer} diff --git a/ui/src/app/recommendation/page.tsx b/ui/src/app/recommendation/page.tsx index 6f7580a..150130f 100644 --- a/ui/src/app/recommendation/page.tsx +++ b/ui/src/app/recommendation/page.tsx @@ -1,13 +1,13 @@ 'use client'; -import React, { useState, useEffect, useMemo } from 'react'; +import React, { useState, useEffect, useMemo, useCallback } from 'react'; import { Box, Container, Heading, Text, VStack, SimpleGrid, useColorModeValue, Divider, Badge, Flex, Grid, GridItem, Skeleton, SkeletonText, SkeletonCircle } from '@chakra-ui/react'; import ServiceCard from '../components/service-card'; import Header from '../components/header'; -import Map, { computeViewState, TORONTO_COORDINATES } from '../components/map'; +import Map, { TORONTO_COORDINATES, computeViewState } from '../components/map'; import { Service, Location } from '../types/service'; import { useRecommendationStore, Recommendation, Query, RecommendationStore } from '../stores/recommendation-store'; import { useRouter } from 'next/navigation'; @@ -97,9 +97,9 @@ const RecommendationPage: React.FC = () => { } }; - const updateMapViewState = (services: Service[]) => { + const updateMapViewState = useCallback((services: Service[]) => { if (services && services.length > 0) { - const newMapLocations = services + const locations = services .filter((service): service is Service & Required> => typeof service.Latitude === 'number' && typeof service.Longitude === 'number' && @@ -107,19 +107,16 @@ const RecommendationPage: React.FC = () => { !isNaN(service.Longitude) ) .map(service => ({ - id: service.id, - name: service.PublicName, latitude: service.Latitude, longitude: service.Longitude, - description: service.Description || '', - address: service.Address || '', - phone: service.Phone || '', })); - const newViewState = computeViewState(newMapLocations); + const newViewState = computeViewState(locations); setMapViewState(newViewState); + } else { + setMapViewState(TORONTO_COORDINATES); } - }; + }, []); const mapLocations: Location[] = useMemo(() => { if (!recommendation?.services) return []; @@ -143,11 +140,10 @@ const RecommendationPage: React.FC = () => { }, [recommendation]); useEffect(() => { - if (mapLocations.length > 0) { - const newViewState = computeViewState(mapLocations); - setMapViewState(newViewState); + if (recommendation?.services) { + updateMapViewState(recommendation.services); } - }, [mapLocations]); + }, [recommendation, updateMapViewState]); const renderRecommendationCard = (recommendation: Recommendation | null) => { if (!recommendation?.message) return null; @@ -254,7 +250,8 @@ const RecommendationPage: React.FC = () => { locations={mapLocations} height={mapHeight} width={mapWidth} - initialViewState={mapViewState} + radius={originalQuery?.radius} + center={originalQuery?.latitude && originalQuery?.longitude ? [originalQuery.longitude, originalQuery.latitude] : undefined} /> From 66ac816575069e263d092bcb199dc88aba2f9c59 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Mon, 28 Oct 2024 08:52:38 -0400 Subject: [PATCH 02/13] Fix download scripts --- health_rec/api/data.py | 24 +-- health_rec/services/ranking.py | 4 +- health_rec/services/utils.py | 242 +++++++++++---------- scripts/download_211_data.py | 204 +++++++++++------- scripts/download_empower_data.py | 280 ++++++++++++++++--------- ui/src/app/components/map.tsx | 4 +- ui/src/app/components/service-card.tsx | 10 +- ui/src/app/dev/page.tsx | 24 +-- ui/src/app/recommendation/page.tsx | 45 ++-- ui/src/app/types/service.ts | 124 +++++++---- 10 files changed, 578 insertions(+), 383 deletions(-) diff --git a/health_rec/api/data.py b/health_rec/api/data.py index fc44705..3ae4490 100644 --- a/health_rec/api/data.py +++ b/health_rec/api/data.py @@ -17,7 +17,6 @@ class ServiceType(str, Enum): MEDICAL_LAB = "medical_lab" FAMILY_DOCTOR = "family_doctor" COMMUNITY_SERVICE = "community_service" - UNKNOWN = "unknown" class AccessibilityLevel(str, Enum): @@ -86,24 +85,19 @@ class PhoneNumber(BaseModel): class Service(BaseModel): - """ - Standardized service model that can accommodate data from multiple APIs. - - This model includes fields that might be present in various healthcare and - community service APIs, with optional fields to handle varying data availability. - """ + """Standardized service model that can accommodate data from multiple APIs.""" # Core identification id: int name: str - service_type: ServiceType = ServiceType.UNKNOWN - source_id: Optional[str] = None # Original ID from source system + service_type: ServiceType + source_id: Optional[str] = None official_name: Optional[str] = None # Location latitude: float longitude: float - distance: Optional[float] = None # Distance from search point in km + distance: Optional[float] = None physical_address: Optional[Address] = None mailing_address: Optional[Address] = None @@ -122,7 +116,7 @@ class Service(BaseModel): taxonomy_codes: List[str] = Field(default_factory=list) # Operating information - status: Optional[str] = None # current operating status (open/closed) + status: Optional[str] = None regular_hours: List[OperatingHours] = Field(default_factory=list) hours_exceptions: List[HoursException] = Field(default_factory=list) timezone_offset: Optional[str] = None @@ -131,7 +125,7 @@ class Service(BaseModel): wheelchair_accessible: AccessibilityLevel = AccessibilityLevel.UNKNOWN parking_type: Optional[str] = None accepts_new_patients: Optional[bool] = None - wait_time: Optional[int] = None # in minutes + wait_time: Optional[int] = None # Booking capabilities has_online_booking: bool = False @@ -182,8 +176,8 @@ def normalize_service_type(cls, v: str) -> ServiceType: # noqa: N805 "Family Doctor's Office": ServiceType.FAMILY_DOCTOR, "Medical Labs & Diagnostic Imaging Centres": ServiceType.MEDICAL_LAB, } - return mapping.get(v, ServiceType.UNKNOWN) - return ServiceType.UNKNOWN + return mapping.get(v, ServiceType.COMMUNITY_SERVICE) + return ServiceType.COMMUNITY_SERVICE class ServiceDocument(BaseModel): @@ -224,7 +218,7 @@ class RecommendationResponse(BaseModel): Whether the request signifies an emergency. is_out_of_scope : bool Whether the request is out of scope. - services : Optional[List[BaseService]] + services : Optional[List[Service]] A list of services ranked by location and relevancy. no_services_found : bool Whether no services were found. diff --git a/health_rec/services/ranking.py b/health_rec/services/ranking.py index 8d093ab..b6160ea 100644 --- a/health_rec/services/ranking.py +++ b/health_rec/services/ranking.py @@ -59,8 +59,8 @@ def _rank_by_relevancy_and_distance( """Rank services by relevancy score and distance.""" for service in services: service_location = ( - float(service.metadata["Latitude"]), - float(service.metadata["Longitude"]), + float(service.metadata["latitude"]), + float(service.metadata["longitude"]), ) service.distance = _calculate_distance(service_location, user_location) services.sort( diff --git a/health_rec/services/utils.py b/health_rec/services/utils.py index 2467784..5168734 100644 --- a/health_rec/services/utils.py +++ b/health_rec/services/utils.py @@ -2,11 +2,18 @@ import json import logging -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional from chromadb.api.types import QueryResult -from api.data import PhoneNumber, Service, ServiceDocument +from api.data import ( + AccessibilityLevel, + Address, + PhoneNumber, + Service, + ServiceDocument, + ServiceType, +) logging.basicConfig( @@ -16,19 +23,7 @@ def _parse_chroma_result(chroma_results: QueryResult) -> List[ServiceDocument]: - """ - Parse the results from ChromaDB into a list of Service objects. - - Parameters - ---------- - chroma_results : QueryResult - The results from a ChromaDB query. - - Returns - ------- - List[ServiceDocument] - A list of ServiceDocument objects created from the ChromaDB results. - """ + """Parse the results from ChromaDB into a list of ServiceDocument objects.""" parsed_results: List[ServiceDocument] = [ ServiceDocument(id=id_, document=doc, metadata=meta, relevancy_score=score) for id_, doc, meta, score in zip( @@ -42,97 +37,128 @@ def _parse_chroma_result(chroma_results: QueryResult) -> List[ServiceDocument]: return parsed_results +def _parse_json_field(field: Any, default: Any) -> Any: + """Parse a potentially JSON-encoded field.""" + if isinstance(field, str): + try: + return json.loads(field) + except json.JSONDecodeError: + return default + return field + + +def _parse_coordinates(metadata: Dict[str, Any]) -> tuple[float, float]: + """Parse latitude and longitude coordinates.""" + try: + latitude = float(metadata.get("latitude", 0)) + longitude = float(metadata.get("longitude", 0)) + except (ValueError, TypeError): + latitude = longitude = 0.0 + return latitude, longitude + + +def _parse_phone_numbers(phones: List[Dict[str, Any]]) -> List[PhoneNumber]: + """Parse phone numbers from raw data.""" + phone_numbers = [] + for phone in phones: + number = phone.get("number", "") + extension = None + + if isinstance(number, str) and "ext" in number.lower(): + parts = number.lower().split("ext") + number = parts[0].strip() + extension = parts[1].strip() + + phone_numbers.append( + PhoneNumber( + number=number, + type=phone.get("type"), + name=phone.get("name"), + description=phone.get("description"), + extension=extension, + ) + ) + return phone_numbers + + +def _parse_service_type(service_type: Optional[str]) -> ServiceType: + """Parse and validate service type.""" + if not service_type: + return ServiceType.COMMUNITY_SERVICE + + try: + return ServiceType(service_type) + except ValueError: + return ServiceType.COMMUNITY_SERVICE + + +def _parse_wheelchair_access(access: Optional[str]) -> str: + """Parse and validate wheelchair accessibility.""" + if not access: + return AccessibilityLevel.UNKNOWN.value + + try: + return AccessibilityLevel(access).value + except ValueError: + return AccessibilityLevel.UNKNOWN.value + + def _metadata_to_service(metadata: Dict[str, Any]) -> Service: - """ - Convert metadata to a Service object. - - Parameters - ---------- - metadata : Dict[str, Any] - The metadata dictionary containing service information. - - Returns - ------- - Service - A Service object created from the metadata. - """ - # Handle ServiceArea - if "ServiceArea" in metadata: - if isinstance(metadata["ServiceArea"], str): - metadata["ServiceArea"] = [ - s.strip() for s in metadata["ServiceArea"].split(",") - ] - elif metadata["ServiceArea"] is None: - metadata["ServiceArea"] = [] - else: - metadata["ServiceArea"] = [] - - # Convert numeric fields - metadata["Latitude"] = ( - float(metadata["Latitude"]) if metadata.get("Latitude") else None - ) - metadata["Longitude"] = ( - float(metadata["Longitude"]) if metadata.get("Longitude") else None - ) - metadata["Score"] = int(metadata["Score"]) if metadata.get("Score") else None - metadata["ParentId"] = ( - int(metadata["ParentId"]) if metadata.get("ParentId") else None - ) - - # Handle PhoneNumbers - if "PhoneNumbers" in metadata: - if isinstance(metadata["PhoneNumbers"], str): - try: - phone_numbers = json.loads(metadata["PhoneNumbers"]) - except json.JSONDecodeError: - phone_numbers = [] - elif isinstance(metadata["PhoneNumbers"], list): - phone_numbers = metadata["PhoneNumbers"] - else: - phone_numbers = [] - - metadata["PhoneNumbers"] = [PhoneNumber(**phone) for phone in phone_numbers] - else: - metadata["PhoneNumbers"] = [] - - return Service( - id=int(metadata["id"]), - parent_id=metadata["ParentId"], - public_name=metadata["PublicName"], - score=metadata["Score"], - service_area=metadata["ServiceArea"], - distance=metadata.get("Distance"), - description=metadata.get("Description"), - latitude=metadata["Latitude"], - longitude=metadata["Longitude"], - physical_address_street1=metadata.get("PhysicalAddressStreet1"), - physical_address_street2=metadata.get("PhysicalAddressStreet2"), - physical_address_city=metadata.get("PhysicalAddressCity"), - physical_address_province=metadata.get("PhysicalAddressProvince"), - physical_address_postal_code=metadata.get("PhysicalAddressPostalCode"), - physical_address_country=metadata.get("PhysicalAddressCountry"), - mailing_attention_name=metadata.get("MailingAttentionName"), - mailing_address_street1=metadata.get("MailingAddressStreet1"), - mailing_address_street2=metadata.get("MailingAddressStreet2"), - mailing_address_city=metadata.get("MailingAddressCity"), - mailing_address_province=metadata.get("MailingAddressProvince"), - mailing_address_postal_code=metadata.get("MailingAddressPostalCode"), - mailing_address_country=metadata.get("MailingAddressCountry"), - phone_numbers=metadata["PhoneNumbers"], - website=metadata.get("Website"), - email=metadata.get("Email"), - hours=metadata.get("Hours"), - hours2=metadata.get("Hours2"), - min_age=metadata.get("MinAge"), - max_age=metadata.get("MaxAge"), - updated_on=metadata.get("UpdatedOn"), - taxonomy_term=metadata.get("TaxonomyTerm"), - taxonomy_terms=metadata.get("TaxonomyTerms"), - taxonomy_codes=metadata.get("TaxonomyCodes"), - eligibility=metadata.get("Eligibility"), - fee_structure_source=metadata.get("FeeStructureSource"), - official_name=metadata.get("OfficialName"), - physical_city=metadata.get("PhysicalCity"), - unique_id_prior_system=metadata.get("UniqueIDPriorSystem"), - record_owner=metadata.get("RecordOwner"), - ) + """Convert metadata to a Service object.""" + try: + # Parse coordinates + latitude, longitude = _parse_coordinates(metadata) + + # Ensure service_type is always set to a valid ServiceType enum value + service_type = _parse_service_type(metadata.get("service_type")) + + # Parse complex fields that might be JSON strings + physical_address = _parse_json_field(metadata.get("physical_address"), None) + if physical_address: + physical_address = Address(**physical_address) + + phone_numbers = _parse_json_field(metadata.get("phone_numbers"), []) + if isinstance(phone_numbers, list): + phone_numbers = _parse_phone_numbers(phone_numbers) + + # Create the Service object with parsed fields + service = Service( + id=metadata["id"], + name=metadata["name"], + service_type=service_type, # This will now always be a valid ServiceType + latitude=latitude, + longitude=longitude, + physical_address=physical_address, + phone_numbers=phone_numbers, + fax=metadata.get("fax"), + email=metadata.get("email"), + website=metadata.get("website"), + description=metadata.get("description"), + services=metadata.get("services", []), + languages=metadata.get("languages", []), + status=metadata.get("status"), + regular_hours=metadata.get("regular_hours", []), + hours_exceptions=metadata.get("hours_exceptions", []), + timezone_offset=metadata.get("timezone_offset"), + wheelchair_accessible=metadata.get("wheelchair_accessible"), + parking_type=metadata.get("parking_type"), + accepts_new_patients=metadata.get("accepts_new_patients"), + wait_time=metadata.get("wait_time"), + has_online_booking=metadata.get("has_online_booking", False), + can_book=metadata.get("can_book", False), + data_source=metadata.get("data_source"), + ) + logger.debug(f"Successfully parsed service: {service.id}") + return service + except Exception as e: + logger.error(f"Error converting metadata to Service: {e}") + logger.debug(f"Problematic metadata: {metadata}") + # Return a minimal valid Service object with required fields + return Service( + id=metadata.get("id", 0), + name=metadata.get("name", "Unknown"), + service_type=ServiceType.UNKNOWN, # Always provide a valid ServiceType + latitude=float(metadata.get("latitude", 0)), + longitude=float(metadata.get("longitude", 0)), + data_source=metadata.get("data_source", "unknown"), + ) diff --git a/scripts/download_211_data.py b/scripts/download_211_data.py index 4a6fa16..b230242 100644 --- a/scripts/download_211_data.py +++ b/scripts/download_211_data.py @@ -1,4 +1,4 @@ -"""Download data from the 211 API.""" +"""Download and process data from the 211 API.""" import os import requests @@ -7,90 +7,157 @@ import math from pathlib import Path import argparse -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List +from datetime import datetime -from api.data import ServiceType +from api.data import Service, ServiceType, Address, PhoneNumber, AccessibilityLevel -def map_211_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: - """Map 211 API data to standardized Service format.""" - # Map phone numbers +def validate_service(data: Dict[str, Any]) -> Optional[Service]: + """Validate and create Service object from mapped data.""" + try: + return Service(**data) + except Exception as e: + print(f"Validation error for service {data.get('id')}: {e}") + return None + + +def parse_phone_numbers(phones: List[Dict[str, str]]) -> List[PhoneNumber]: + """Parse phone numbers from 211 data format.""" phone_numbers = [] - if data.get("PhoneNumbers"): - for phone in data["PhoneNumbers"]: - phone_numbers.append( - { - "number": phone.get("Number", ""), - "type": phone.get("Type"), - "name": phone.get("Name"), - "description": phone.get("Description"), - "extension": phone.get("Extension"), - } + for phone in phones: + number = phone.get("Phone", "") + extension = None + + if "ext" in number.lower(): + parts = number.lower().split("ext") + number = parts[0].strip() + extension = parts[1].strip() + + phone_numbers.append( + PhoneNumber( + number=number, + type=phone.get("Type", ""), + name=phone.get("Name", ""), + description=phone.get("Description", ""), + extension=extension, ) + ) + return phone_numbers - # Map addresses - physical_address = { - "street1": data.get("PhysicalAddressStreet1"), - "street2": data.get("PhysicalAddressStreet2"), - "city": data.get("PhysicalAddressCity"), - "province": data.get("PhysicalAddressProvince"), - "postal_code": data.get("PhysicalAddressPostalCode"), - "country": data.get("PhysicalAddressCountry"), - } - mailing_address = { - "street1": data.get("MailingAddressStreet1"), - "street2": data.get("MailingAddressStreet2"), - "city": data.get("MailingAddressCity"), - "province": data.get("MailingAddressProvince"), - "postal_code": data.get("MailingAddressPostalCode"), - "country": data.get("MailingAddressCountry"), - "attention_name": data.get("MailingAttentionName"), - } +def parse_taxonomy(taxonomy_str: str) -> List[str]: + """Parse taxonomy strings into clean list.""" + if not taxonomy_str: + return [] + + terms = [] + for term in taxonomy_str.split(";"): + clean_term = term.split("*")[0].strip() + if clean_term: + terms.append(clean_term) + return terms + + +class DateTimeEncoder(json.JSONEncoder): + """Custom JSON encoder for datetime objects.""" + + def default(self, obj): + if isinstance(obj, datetime): + return obj.isoformat() + return super().default(obj) + + +def parse_datetime(date_str: Optional[str]) -> Optional[str]: + """Parse datetime string to ISO format.""" + if not date_str: + return None + try: + dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S") + return dt.isoformat() + except ValueError: + return None + + +def parse_age(age_str: Optional[str]) -> Optional[int]: + """Parse age string to integer.""" + if not age_str: + return None + try: + return int(float(age_str)) + except (ValueError, TypeError): + return None + + +def map_211_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: + """Map 211 API data to standardized Service format.""" + try: + latitude = float(data.get("Latitude", 0)) + longitude = float(data.get("Longitude", 0)) + except (ValueError, TypeError): + latitude = longitude = 0.0 - # Handle age parsing - def parse_age(age_str: Optional[str]) -> Optional[int]: - if not age_str: - return None - try: - return int(float(age_str)) - except (ValueError, TypeError): - return None + physical_address = Address( + street1=data.get("PhysicalAddressStreet1"), + street2=data.get("PhysicalAddressStreet2"), + city=data.get("PhysicalAddressCity"), + province=data.get("PhysicalAddressProvince"), + postal_code=data.get("PhysicalAddressPostalCode"), + country=data.get("PhysicalAddressCountry"), + ) + + mailing_address = Address( + street1=data.get("MailingAddressStreet1"), + street2=data.get("MailingAddressStreet2"), + city=data.get("MailingAddressCity"), + province=data.get("MailingAddressProvince"), + postal_code=data.get("MailingAddressPostalCode"), + country=data.get("MailingAddressCountry"), + attention_name=data.get("MailingAttentionName"), + ) return { - "id": data["id"], + "id": int(data["id"]), "name": data["PublicName"], - "service_type": ServiceType.UNKNOWN.value, + "service_type": ServiceType.COMMUNITY_SERVICE, "source_id": data.get("UniqueIDPriorSystem"), "official_name": data.get("OfficialName"), - "latitude": float(data["Latitude"]) if data.get("Latitude") else 0.0, - "longitude": float(data["Longitude"]) if data.get("Longitude") else 0.0, - "physical_address": physical_address, - "mailing_address": mailing_address, - "phone_numbers": phone_numbers, + "latitude": latitude, + "longitude": longitude, + "physical_address": physical_address.dict(exclude_none=True), + "mailing_address": mailing_address.dict(exclude_none=True), + "phone_numbers": [ + p.dict() for p in parse_phone_numbers(data.get("PhoneNumbers", [])) + ], "email": data.get("Email"), "website": data.get("Website"), "description": data.get("Description"), - "taxonomy_terms": [ - term.strip() - for term in data.get("TaxonomyTerms", "").split(";") - if term.strip() - ], - "taxonomy_codes": [ - code.strip() - for code in data.get("TaxonomyCodes", "").split(";") - if code.strip() - ], + "taxonomy_terms": parse_taxonomy(data.get("TaxonomyTerms", "")), + "taxonomy_codes": parse_taxonomy(data.get("TaxonomyCodes", "")), "eligibility_criteria": data.get("Eligibility"), "fee_structure": data.get("FeeStructureSource"), - "min_age": parse_age(data.get("MinAge", "")), - "max_age": parse_age(data.get("MaxAge", "")), - "last_updated": data.get("UpdatedOn"), + "min_age": parse_age(data.get("MinAge")), + "max_age": parse_age(data.get("MaxAge")), + "last_updated": parse_datetime(data.get("UpdatedOn")), "record_owner": data.get("RecordOwner"), "data_source": "211", + "wheelchair_accessible": AccessibilityLevel.UNKNOWN, } +def save_to_file(data: Dict[str, Any], file_name: str) -> None: + """Save the data to a JSON file.""" + mapped_services = [] + for service_data in data["Records"]: + mapped_data = map_211_data_to_service(service_data) + validated_service = validate_service(mapped_data) + if validated_service: + mapped_services.append(validated_service.dict(exclude_none=True)) + + with open(file_name, "w") as f: + json.dump(mapped_services, f, indent=2, cls=DateTimeEncoder) + + def create_payload( page_index: int, dataset: str, is_gta: bool, page_size: int ) -> Dict[str, Any]: @@ -105,7 +172,11 @@ def create_payload( "SortOrder": "distance", "PageIndex": page_index, "PageSize": page_size, - "Fields": "TaxonomyTerm,TaxonomyTerms,TaxonomyCodes,Eligibility,FeeStructureSource,OfficialName,PhysicalCity,UniqueIDPriorSystem", + "Fields": ( + "TaxonomyTerm,TaxonomyTerms,TaxonomyCodes,Eligibility," + "FeeStructureSource,OfficialName,PhysicalCity,DocumentsRequired," + "ApplicationProcess,UniqueIDPriorSystem,DisabilitiesAccess" + ), } if is_gta: @@ -149,13 +220,6 @@ def fetch_data( return response.json() -def save_to_file(data: Dict[str, Any], file_name: str) -> None: - """Save the data to a JSON file.""" - mapped_services = [map_211_data_to_service(service) for service in data["Records"]] - with open(file_name, "w") as f: - json.dump(mapped_services, f, indent=2) - - def main( api_key: str, base_url: str, diff --git a/scripts/download_empower_data.py b/scripts/download_empower_data.py index fcc426e..c23cde5 100644 --- a/scripts/download_empower_data.py +++ b/scripts/download_empower_data.py @@ -5,77 +5,51 @@ import argparse import json import os -from typing import List, Dict, Any +from typing import List, Dict, Any, Optional import time - - -def map_empower_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: - """Map Empower API data to standardized Service format.""" - # Map operating hours - regular_hours = [] - day_mapping = { - 0: "sunday", - 1: "monday", - 2: "tuesday", - 3: "wednesday", - 4: "thursday", - 5: "friday", - 6: "saturday", - } - - for hour in data.get("hours", []): - regular_hours.append( - { - "day": day_mapping[hour["day"]], - "is_open": hour["is_open"], - "is_24hour": hour["is_24hour"], - "open_time": hour["opentime"], - "close_time": hour["closetime"], - } +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +from datetime import datetime + + +class RetryableSession: + """Session with retry capabilities.""" + + def __init__( + self, + retries: int = 3, + backoff_factor: float = 0.5, + status_forcelist: Optional[List[int]] = None, + ): + """Initialize session with retry strategy.""" + self.session = requests.Session() + if status_forcelist is None: + status_forcelist = [403, 500, 502, 503, 504] + + retry_strategy = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + allowed_methods=["GET", "POST"], ) - # Map address - physical_address = { - "street1": data.get("address"), - "city": data.get("city"), - "province": data.get("province"), - "postal_code": data.get("postal_code"), - "country": "Canada", - } + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) - return { - "id": data["id"], - "name": data["name"], - "service_type": data["type"], - "latitude": float(data["lat"]), - "longitude": float(data["long"]), - "physical_address": physical_address, - "phone_numbers": [{"number": data["phone"]}] if data.get("phone") else [], - "fax": data.get("fax"), - "email": data.get("email"), - "website": data.get("website"), - "description": data.get("description"), - "services": data.get("services", []), - "languages": data.get("languages", []), - "status": data.get("status"), - "regular_hours": regular_hours, - "hours_exceptions": data.get("hours_exceptions", []), - "timezone_offset": data.get("tzoffset"), - "wheelchair_accessible": data.get("wheelchair", "unknown"), - "parking_type": data.get("parking"), - "accepts_new_patients": data.get("new_patients"), - "wait_time": data.get("wait_time"), - "has_online_booking": data.get("has_ebooking", False), - "can_book": data.get("can_book", False), - "data_source": "Empower", - } + def get(self, *args: Any, **kwargs: Any) -> requests.Response: + """Perform GET request with retry capability.""" + return self.session.get(*args, **kwargs) class EmpowerDataFetcher: + """Fetcher for Empower API data.""" + def __init__(self, api_key: str, base_url: str): """Initialize the EmpowerDataFetcher.""" self.api_key = api_key self.base_url = base_url + self.session = RetryableSession() self.headers = { "Accept": "application/json", "Content-Type": "application/x-www-form-urlencoded", @@ -83,7 +57,6 @@ def __init__(self, api_key: str, base_url: str): "platform": "web", "lang_prefs": "en", } - # Provider type mapping as per API documentation[1] self.provider_types = { 1: "Emergency Rooms", 2: "Urgent Care Centre", @@ -93,6 +66,22 @@ def __init__(self, api_key: str, base_url: str): 11: "Family Doctor's Office", } + def _make_request( + self, url: str, params: Dict[str, Any], max_retries: int = 3 + ) -> Dict[str, Any]: + """Make API request with retries and error handling.""" + for attempt in range(max_retries): + try: + response = self.session.get(url, headers=self.headers, params=params) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + if attempt == max_retries - 1: + raise + print(f"Attempt {attempt + 1} failed: {e}. Retrying...") + time.sleep((attempt + 1) * 2) # Exponential backoff + raise Exception("Failed to make request after all retries") + def map_provider_type(self, type_id: int) -> str: """Map provider type ID to human-readable string.""" return self.provider_types.get(type_id, f"Unknown Type ({type_id})") @@ -102,7 +91,7 @@ def fetch_providers_list( ) -> Dict[str, Any]: """Fetch list of providers for a given page.""" url = f"{self.base_url}/providers" - params: Dict[str, Any] = { + params = { "api_key": self.api_key, "lat": lat, "long": long, @@ -110,54 +99,47 @@ def fetch_providers_list( "page": page, } - response = requests.get(url, headers=self.headers, params=params) - response.raise_for_status() - data = response.json() + data = self._make_request(url, params) - # Map provider types in the response for provider in data.get("providers", []): if "type" in provider: provider["type"] = self.map_provider_type(provider["type"]) - return data # type: ignore + return data def fetch_provider_details(self, provider_id: int) -> Dict[str, Any]: """Fetch detailed information for a specific provider.""" url = f"{self.base_url}/providers/{provider_id}" - params: Dict[str, str] = {"api_key": self.api_key} + params = {"api_key": self.api_key} - response = requests.get(url, headers=self.headers, params=params) - response.raise_for_status() - data = response.json() + data = self._make_request(url, params) - # Map provider type in the response if "type" in data: data["type"] = self.map_provider_type(data["type"]) - return data # type: ignore + return data def collect_provider_ids(self, lat: float, long: float, radius: float) -> List[int]: """Collect all provider IDs from paginated results.""" - provider_ids = [] + provider_ids: List[int] = [] page = 1 - # Fetch first page to get total pages initial_response = self.fetch_providers_list(lat, long, radius, page) total_pages = initial_response["pages"]["total_pages"] print(f"Total pages to process: {total_pages}") - # Process all pages while page <= total_pages: print(f"Fetching page {page}/{total_pages}") - response = self.fetch_providers_list(lat, long, radius, page) - - # Extract provider IDs from current page - for provider in response["providers"]: - provider_ids.append(provider["id"]) - - page += 1 - time.sleep(0.5) # Rate limiting + try: + response = self.fetch_providers_list(lat, long, radius, page) + provider_ids.extend(p["id"] for p in response["providers"]) + page += 1 + time.sleep(0.5) # Rate limiting + except Exception as e: + print(f"Error on page {page}: {e}. Retrying...") + time.sleep(2) # Wait before retry + continue return provider_ids @@ -167,8 +149,10 @@ def fetch_all_provider_details( """Fetch and save mapped provider details.""" os.makedirs(output_dir, exist_ok=True) output_file = os.path.join(output_dir, "data-00.json") + error_log = os.path.join(output_dir, "errors.log") mapped_providers = [] + failed_providers = [] total_providers = len(provider_ids) for idx, provider_id in enumerate(provider_ids, 1): @@ -178,16 +162,102 @@ def fetch_all_provider_details( mapped_provider = map_empower_data_to_service(provider_details) mapped_providers.append(mapped_provider) time.sleep(0.25) - except requests.exceptions.RequestException as e: + except Exception as e: print(f"Error fetching provider {provider_id}: {e}") + failed_providers.append({"id": provider_id, "error": str(e)}) + # Save successful providers with open(output_file, "w") as f: json.dump(mapped_providers, f, indent=2) - print(f"Saved all provider details to {output_file}") + # Save failed providers + if failed_providers: + with open(error_log, "w") as f: + json.dump(failed_providers, f, indent=2) + + print(f"Saved {len(mapped_providers)} provider details to {output_file}") + if failed_providers: + print(f"Failed to fetch {len(failed_providers)} providers. See {error_log}") + + +def map_empower_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: + """Map Empower API data to standardized Service format.""" + try: + # Convert coordinates to float + latitude = float(data.get("lat", 0)) + longitude = float(data.get("long", 0)) + except (ValueError, TypeError): + latitude = longitude = 0.0 + + # Map operating hours + regular_hours = [] + day_mapping = { + 0: "sunday", + 1: "monday", + 2: "tuesday", + 3: "wednesday", + 4: "thursday", + 5: "friday", + 6: "saturday", + } + + for hour in data.get("hours", []): + if all(key in hour for key in ["day", "is_open", "opentime", "closetime"]): + regular_hours.append( + { + "day": day_mapping[hour["day"]], + "is_open": hour["is_open"], + "is_24hour": hour.get("is_24hour", False), + "open_time": hour["opentime"], + "close_time": hour["closetime"], + } + ) + + # Map address + physical_address = { + "street1": data.get("address"), + "city": data.get("city"), + "province": data.get("province"), + "postal_code": data.get("postal_code"), + "country": "Canada", + } + + # Map phone numbers + phone_numbers = [] + if data.get("phone"): + phone_numbers.append({"number": data["phone"]}) + + return { + "id": data["id"], + "name": data["name"], + "service_type": data["type"], + "latitude": latitude, + "longitude": longitude, + "physical_address": physical_address, + "phone_numbers": phone_numbers, + "fax": data.get("fax"), + "email": data.get("email"), + "website": data.get("website"), + "description": data.get("description"), + "services": data.get("services", []), + "languages": data.get("languages", []), + "status": data.get("status"), + "regular_hours": regular_hours, + "hours_exceptions": data.get("hours_exceptions", []), + "timezone_offset": data.get("tzoffset"), + "wheelchair_accessible": data.get("wheelchair", "unknown"), + "parking_type": data.get("parking"), + "accepts_new_patients": data.get("new_patients", False), + "wait_time": data.get("wait_time"), + "has_online_booking": data.get("has_ebooking", False), + "can_book": data.get("can_book", False), + "data_source": "Empower", + "last_updated": datetime.now().isoformat(), + } def main() -> None: + """Main function to run the script.""" load_dotenv("./.env.development") parser = argparse.ArgumentParser(description="Download data from the Empower API.") @@ -204,30 +274,38 @@ def main() -> None: parser.add_argument( "--data-dir", default="./data/empower", help="Directory to save data" ) + parser.add_argument( + "--lat", + type=float, + default=44.051507, + help="Latitude for search center", + ) + parser.add_argument( + "--long", + type=float, + default=-79.45811, + help="Longitude for search center", + ) + parser.add_argument( + "--radius", + type=float, + default=100, + help="Search radius in kilometers", + ) args = parser.parse_args() if not args.api_key: raise ValueError("EMPOWER_API_KEY is not set") - # Initialize fetcher - fetcher = EmpowerDataFetcher(args.api_key, args.base_url) - # Parameters for the search - lat = 44.051507 - long = -79.45811 - radius = 100 # km - output_dir = args.data_dir + fetcher = EmpowerDataFetcher(args.api_key, args.base_url) try: - # Collect all provider IDs - provider_ids = fetcher.collect_provider_ids(lat, long, radius) + provider_ids = fetcher.collect_provider_ids(args.lat, args.long, args.radius) print(f"Collected {len(provider_ids)} provider IDs") - - # Fetch and save detailed information for all providers - fetcher.fetch_all_provider_details(provider_ids, output_dir) - - except requests.exceptions.RequestException as e: - print(f"Error occurred: {e}") + fetcher.fetch_all_provider_details(provider_ids, args.data_dir) + except Exception as e: + print(f"Fatal error occurred: {e}") if __name__ == "__main__": diff --git a/ui/src/app/components/map.tsx b/ui/src/app/components/map.tsx index d50e74d..45418e9 100644 --- a/ui/src/app/components/map.tsx +++ b/ui/src/app/components/map.tsx @@ -223,8 +223,8 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi > {selectedLocation.name} - {selectedLocation.address} - {selectedLocation.phone} + {selectedLocation.physical_address} + {selectedLocation.phone_numbers} )} diff --git a/ui/src/app/components/service-card.tsx b/ui/src/app/components/service-card.tsx index 6759794..dfe2ca2 100644 --- a/ui/src/app/components/service-card.tsx +++ b/ui/src/app/components/service-card.tsx @@ -60,9 +60,9 @@ const ServiceCard: React.FC = ({ service, bgColor }) => { > - {service.PublicName} + {service.name} - {service.Description && ( + {service.description && ( = ({ service, bgColor }) => { WebkitBoxOrient: 'vertical', }} > - {renderHtml(service.Description)} + {renderHtml(service.description)} )} - {service.ServiceArea && ( + {service.physical_address && ( - {Array.isArray(service.ServiceArea) ? service.ServiceArea[0] : service.ServiceArea} + {Array.isArray(service.physical_address) ? service.physical_address[0] : service.physical_address} )} diff --git a/ui/src/app/dev/page.tsx b/ui/src/app/dev/page.tsx index 227229a..1f03dbf 100644 --- a/ui/src/app/dev/page.tsx +++ b/ui/src/app/dev/page.tsx @@ -31,12 +31,12 @@ const DevPage: React.FC = () => { const countData: number = await countResponse.json(); const validServices = servicesData.filter( - (service): service is Service & Required> => - typeof service.Latitude === 'number' && - typeof service.Longitude === 'number' && - !isNaN(service.Latitude) && - !isNaN(service.Longitude) && - !(service.Latitude === 0 && service.Longitude === 0) + (service): service is Service & Required> => + typeof service.latitude === 'number' && + typeof service.longitude === 'number' && + !isNaN(service.latitude) && + !isNaN(service.longitude) && + !(service.latitude === 0 && service.longitude === 0) ); setServices(validServices); @@ -44,8 +44,8 @@ const DevPage: React.FC = () => { if (validServices.length > 0) { const locations = validServices.map(service => ({ - latitude: service.Latitude, - longitude: service.Longitude, + latitude: service.latitude, + longitude: service.longitude, })); const newViewState = computeViewState(locations); setMapViewState(newViewState); @@ -70,10 +70,10 @@ const DevPage: React.FC = () => { const locations = services.map(service => ({ id: service.id, - name: service.PublicName, - latitude: service.Latitude, - longitude: service.Longitude, - service_area: service.ServiceArea, + name: service.name, + latitude: service.latitude, + longitude: service.longitude, + service_area: service.physical_address, })); return ( diff --git a/ui/src/app/recommendation/page.tsx b/ui/src/app/recommendation/page.tsx index 150130f..f0377c9 100644 --- a/ui/src/app/recommendation/page.tsx +++ b/ui/src/app/recommendation/page.tsx @@ -25,6 +25,7 @@ const RecommendationPage: React.FC = () => { const [isLoading, setIsLoading] = useState(true); const [additionalQuestions, setAdditionalQuestions] = useState([]); + // Theme colors const bgColor = useColorModeValue('gray.50', 'gray.900'); const textColor = useColorModeValue('gray.800', 'white'); const cardBgColor = useColorModeValue('white', 'gray.800'); @@ -88,7 +89,6 @@ const RecommendationPage: React.FC = () => { const refinedRecommendation: Recommendation = await response.json(); setRecommendation(refinedRecommendation); - updateMapViewState(refinedRecommendation.services); } catch (error) { console.error('Error refining recommendations:', error); @@ -100,15 +100,15 @@ const RecommendationPage: React.FC = () => { const updateMapViewState = useCallback((services: Service[]) => { if (services && services.length > 0) { const locations = services - .filter((service): service is Service & Required> => - typeof service.Latitude === 'number' && - typeof service.Longitude === 'number' && - !isNaN(service.Latitude) && - !isNaN(service.Longitude) + .filter(service => + typeof service.latitude === 'number' && + typeof service.longitude === 'number' && + !isNaN(service.latitude) && + !isNaN(service.longitude) ) .map(service => ({ - latitude: service.Latitude, - longitude: service.Longitude, + latitude: service.latitude, + longitude: service.longitude, })); const newViewState = computeViewState(locations); @@ -122,20 +122,18 @@ const RecommendationPage: React.FC = () => { if (!recommendation?.services) return []; return recommendation.services - .filter((service): service is Service & Required> => - typeof service.Latitude === 'number' && - typeof service.Longitude === 'number' && - !isNaN(service.Latitude) && - !isNaN(service.Longitude) + .filter(service => + typeof service.latitude === 'number' && + typeof service.longitude === 'number' && + !isNaN(service.latitude) && + !isNaN(service.longitude) ) .map(service => ({ id: service.id, - name: service.PublicName, - latitude: service.Latitude, - longitude: service.Longitude, - description: service.Description || '', - address: service.Address || '', - phone: service.Phone || '', + name: service.name, + latitude: service.latitude, + longitude: service.longitude, + description: service.description || '', })); }, [recommendation]); @@ -152,15 +150,13 @@ const RecommendationPage: React.FC = () => { const overview = overviewWithLabel.replace('Overview:', '').trim(); const reasoning = reasoningParts.join('\n').replace('Reasoning:', '').trim(); - const serviceName = recommendation.services[0]?.PublicName || 'Unknown Service'; + const serviceName = recommendation.services[0]?.name || 'Unknown Service'; const updatedOverview = `${serviceName}

${overview}`; return ( - - - + @@ -251,7 +247,8 @@ const RecommendationPage: React.FC = () => { height={mapHeight} width={mapWidth} radius={originalQuery?.radius} - center={originalQuery?.latitude && originalQuery?.longitude ? [originalQuery.longitude, originalQuery.latitude] : undefined} + center={originalQuery?.latitude && originalQuery?.longitude ? + [originalQuery.longitude, originalQuery.latitude] : undefined} /> diff --git a/ui/src/app/types/service.ts b/ui/src/app/types/service.ts index d692a7e..5ffa19e 100644 --- a/ui/src/app/types/service.ts +++ b/ui/src/app/types/service.ts @@ -1,58 +1,94 @@ +interface Address { + street1: string | null; + street2: string | null; + city: string | null; + province: string | null; + postal_code: string | null; + country: string | null; + attention_name: string | null; +} + interface PhoneNumber { - phone: string | null; + number: string; + type: string | null; name: string | null; description: string | null; - type: string | null; + extension: string | null; } -interface Location { +interface OperatingHours { + day: string; + is_open: boolean; + is_24hour: boolean; + open_time: string | null; + close_time: string | null; +} + +interface Service { id: number; name: string; + service_type: string; + source_id: string | null; + official_name: string | null; + + // Location latitude: number; longitude: number; - description: string; + distance: number | null; + physical_address: Address | null; + mailing_address: Address | null; + + // Contact information + phone_numbers: PhoneNumber[]; + fax: string | null; + email: string | null; + website: string | null; + social_media: Record; + + // Service details + description: string | null; + services: string[]; + languages: string[]; + taxonomy_terms: string[]; + taxonomy_codes: string[]; + + // Operating information + status: string | null; + regular_hours: OperatingHours[]; + hours_exceptions: OperatingHours[]; + timezone_offset: string | null; + + // Accessibility and special features + wheelchair_accessible: string; + parking_type: string | null; + accepts_new_patients: boolean | null; + wait_time: number | null; + + // Booking capabilities + has_online_booking: boolean; + has_queue_system: boolean; + accepts_walk_ins: boolean; + can_book: boolean; + + // Eligibility and fees + eligibility_criteria: string | null; + fee_structure: string | null; + min_age: number | null; + max_age: number | null; + + // Metadata + last_updated: Date | null; + record_owner: string | null; + data_source: string | null; } -interface Service { +// Location interface for map functionality +interface Location { id: number; - ParentId: number | null; - PublicName: string; - Score: number | null; - ServiceArea: string[] | null; - Distance: string | null; - Description: string | null; - Latitude: number | null; - Longitude: number | null; - PhysicalAddressStreet1: string | null; - PhysicalAddressStreet2: string | null; - PhysicalAddressCity: string | null; - PhysicalAddressProvince: string | null; - PhysicalAddressPostalCode: string | null; - PhysicalAddressCountry: string | null; - MailingAttentionName: string | null; - MailingAddressStreet1: string | null; - MailingAddressStreet2: string | null; - MailingAddressCity: string | null; - MailingAddressProvince: string | null; - MailingAddressPostalCode: string | null; - MailingAddressCountry: string | null; - PhoneNumbers: PhoneNumber[]; - Website: string | null; - Email: string | null; - Hours: string | null; - Hours2: string | null; - MinAge: string | null; - MaxAge: string | null; - UpdatedOn: string | null; - TaxonomyTerm: string | null; - TaxonomyTerms: string | null; - TaxonomyCodes: string | null; - Eligibility: string | null; - FeeStructureSource: string | null; - OfficialName: string | null; - PhysicalCity: string | null; - UniqueIDPriorSystem: string | null; - RecordOwner: string | null; + name: string; + latitude: number; + longitude: number; + description: string; } -export type { Service, PhoneNumber, Location }; +export type { Service, PhoneNumber, Location, Address, OperatingHours }; From a0892792e7f43387d30a8a18cfee35077f8c2b63 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Mon, 28 Oct 2024 08:59:33 -0400 Subject: [PATCH 03/13] Fix code scanning alert no. 1: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- scripts/download_211_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/download_211_data.py b/scripts/download_211_data.py index b230242..d64029b 100644 --- a/scripts/download_211_data.py +++ b/scripts/download_211_data.py @@ -18,7 +18,7 @@ def validate_service(data: Dict[str, Any]) -> Optional[Service]: try: return Service(**data) except Exception as e: - print(f"Validation error for service {data.get('id')}: {e}") + print(f"Validation error for a service: {e}") return None From 7aef1c7effdb625c2973c447689909cc0ee7db3f Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Wed, 6 Nov 2024 11:17:22 -0500 Subject: [PATCH 04/13] Conform to same as main branch --- health_rec/api/config.py | 6 +++++- health_rec/load_data.py | 14 +++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/health_rec/api/config.py b/health_rec/api/config.py index 2d18abf..6aba149 100644 --- a/health_rec/api/config.py +++ b/health_rec/api/config.py @@ -60,5 +60,9 @@ class Config: COHERE_API_KEY: str = getenv("COHERE_API_KEY", "") CHROMA_HOST: str = getenv("CHROMA_HOST", "chromadb-dev") CHROMA_PORT: int = 8000 - COLLECTION_NAME: str = getenv("COLLECTION_NAME", "empower") + COLLECTION_NAME: str = getenv("COLLECTION_NAME", "211_gta") RELEVANCY_WEIGHT: float = float(getenv("RELEVANCY_WEIGHT", "0.5")) + MAX_CONTEXT_LENGTH: int = 300 + TOP_K: int = 5 + RERANKER_MAX_CONTEXT_LENGTH: int = 150 + RERANKER_MAX_SERVICES: int = 20 diff --git a/health_rec/load_data.py b/health_rec/load_data.py index d9dedef..de234f8 100644 --- a/health_rec/load_data.py +++ b/health_rec/load_data.py @@ -45,7 +45,7 @@ def __call__(self, texts: Documents) -> Embeddings: """ try: response = self.client.embeddings.create(input=texts, model=self.model) - return [data.embedding for data in response.data] + return [data.embedding for data in response.data] # type: ignore except Exception as e: logger.error(f"Error generating embeddings: {e}") raise @@ -66,16 +66,15 @@ def load_json_data(file_path: str) -> List[Dict[str, Any]]: try: with open(file_path, "r") as file: data = json.load(file) - return list(data) + if not isinstance(data, list): + raise ValueError("JSON file must contain a list of services") + return data except FileNotFoundError: logger.error(f"File not found: {file_path}") raise except json.JSONDecodeError: logger.error(f"Invalid JSON in file: {file_path}") raise - except KeyError: - logger.error("JSON structure is incorrect, missing 'Records' key") - raise def prepare_documents( @@ -137,10 +136,7 @@ def get_or_create_collection(host: str, port: int, name: str) -> chromadb.Collec logger.info(f"Retrieved existing collection: {name}") except ValueError: logger.info(f"Creating new collection: {name}") - collection = chroma_client.create_collection( - name=name, - metadata={"hnsw:space": "cosine"}, - ) + collection = chroma_client.create_collection(name=name) return collection From 09523d684e57cfca594249065b208f6a3f0a6d25 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 10:51:41 -0500 Subject: [PATCH 05/13] Fixes to backend to unify schema --- health_rec/api/data.py | 237 +++++++----------- health_rec/services/utils.py | 325 ++++++++++++++++-------- scripts/common.py | 36 +++ scripts/download_211_data.py | 332 +++++++++++-------------- scripts/fields.py | 111 +++++++++ ui/src/app/components/map.tsx | 130 +++++----- ui/src/app/components/service-card.tsx | 123 ++++++++- ui/src/app/design/page.tsx | 9 + ui/src/app/dev/page.tsx | 24 +- ui/src/app/recommendation/page.tsx | 72 +++--- ui/src/app/types/service.ts | 110 +++----- 11 files changed, 884 insertions(+), 625 deletions(-) create mode 100644 scripts/common.py create mode 100644 scripts/fields.py diff --git a/health_rec/api/data.py b/health_rec/api/data.py index 3ae4490..2d53777 100644 --- a/health_rec/api/data.py +++ b/health_rec/api/data.py @@ -1,69 +1,47 @@ """Data models.""" +import logging from datetime import datetime -from enum import Enum from typing import Any, Dict, List, Optional from pydantic import BaseModel, Field, validator -class ServiceType(str, Enum): - """Standardized service types across different APIs.""" +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) - EMERGENCY_ROOM = "emergency_room" - URGENT_CARE = "urgent_care" - WALK_IN_CLINIC = "walk_in_clinic" - PHARMACY = "pharmacy" - MEDICAL_LAB = "medical_lab" - FAMILY_DOCTOR = "family_doctor" - COMMUNITY_SERVICE = "community_service" +class PhoneNumber(BaseModel): + """Phone number with metadata.""" -class AccessibilityLevel(str, Enum): - """Wheelchair accessibility levels.""" - - FULL = "full" - PARTIAL = "partial" - NONE = "none" - UNKNOWN = "unknown" - - -class DayOfWeek(str, Enum): - """Days of the week.""" - - SUNDAY = "sunday" - MONDAY = "monday" - TUESDAY = "tuesday" - WEDNESDAY = "wednesday" - THURSDAY = "thursday" - FRIDAY = "friday" - SATURDAY = "saturday" - - -class OperatingHours(BaseModel): - """Operating hours for a specific day.""" - - day: DayOfWeek - is_open: bool - is_24hour: bool = False - open_time: Optional[str] = None - close_time: Optional[str] = None - - -class HoursException(BaseModel): - """Special hours or holiday schedules.""" - + number: str + type: Optional[str] = None name: Optional[str] = None - start_date: datetime - end_date: datetime - is_open: bool - is_24hour: bool = False - open_time: Optional[str] = None - close_time: Optional[str] = None + description: Optional[str] = None + extension: Optional[str] = None class Address(BaseModel): - """Physical address information.""" + """Physical address information. + + Attributes + ---------- + street1 : Optional[str] + The first line of the street address. + street2 : Optional[str] + The second line of the street address. + city : Optional[str] + The city of the address. + province : Optional[str] + The province of the address. + postal_code : Optional[str] + The postal code of the address. + country : Optional[str] + The country of the address. + """ street1: Optional[str] = None street2: Optional[str] = None @@ -71,113 +49,74 @@ class Address(BaseModel): province: Optional[str] = None postal_code: Optional[str] = None country: Optional[str] = None - attention_name: Optional[str] = None - -class PhoneNumber(BaseModel): - """Phone number with additional metadata.""" - - number: str - type: Optional[str] = None - name: Optional[str] = None - description: Optional[str] = None - extension: Optional[str] = None + def __str__(self) -> str: + """Return formatted address string.""" + parts = [] + if self.street1: + parts.append(self.street1) + if self.street2: + parts.append(self.street2) + if self.city: + parts.append(self.city) + if self.province: + parts.append(self.province) + if self.postal_code: + parts.append(self.postal_code) + if self.country: + parts.append(self.country) + return ", ".join(filter(None, parts)) class Service(BaseModel): - """Standardized service model that can accommodate data from multiple APIs.""" + """Unified service model with required and optional fields. - # Core identification - id: int - name: str - service_type: ServiceType - source_id: Optional[str] = None - official_name: Optional[str] = None + Attributes + ---------- + id : str + The unique identifier of the service. + name : str + The name of the service. + description : str + The description of the service. + latitude : float + The latitude coordinate of the service location. + longitude : float + The longitude coordinate of the service location. + phone_numbers : List[PhoneNumber] + A list of phone numbers associated with the service. + address : Address + The physical address of the service. + email : str + The email address of the service. + metadata : Dict[str, Any] + Additional metadata associated with the service. + last_updated : Optional[datetime] + The last updated timestamp of the service. + """ - # Location + # Required fields + id: str + name: str + description: str latitude: float longitude: float - distance: Optional[float] = None - physical_address: Optional[Address] = None - mailing_address: Optional[Address] = None - - # Contact information - phone_numbers: List[PhoneNumber] = Field(default_factory=list) - fax: Optional[str] = None - email: Optional[str] = None - website: Optional[str] = None - social_media: Dict[str, str] = Field(default_factory=dict) - - # Service details - description: Optional[str] = None - services: List[str] = Field(default_factory=list) - languages: List[str] = Field(default_factory=list) - taxonomy_terms: List[str] = Field(default_factory=list) - taxonomy_codes: List[str] = Field(default_factory=list) - - # Operating information - status: Optional[str] = None - regular_hours: List[OperatingHours] = Field(default_factory=list) - hours_exceptions: List[HoursException] = Field(default_factory=list) - timezone_offset: Optional[str] = None - - # Accessibility and special features - wheelchair_accessible: AccessibilityLevel = AccessibilityLevel.UNKNOWN - parking_type: Optional[str] = None - accepts_new_patients: Optional[bool] = None - wait_time: Optional[int] = None - - # Booking capabilities - has_online_booking: bool = False - has_queue_system: bool = False - accepts_walk_ins: bool = False - can_book: bool = False - - # Eligibility and fees - eligibility_criteria: Optional[str] = None - fee_structure: Optional[str] = None - min_age: Optional[int] = None - max_age: Optional[int] = None - - # Metadata + phone_numbers: List[PhoneNumber] + address: Address + email: str + + # Optional metadata fields stored as key-value pairs + metadata: Dict[str, Any] = Field(default_factory=dict) + + # Source tracking last_updated: Optional[datetime] = None - record_owner: Optional[str] = None - data_source: Optional[str] = None # e.g., "211", "Empower" - - class Config: - """Pydantic configuration.""" - - use_enum_values = True - - @validator("wheelchair_accessible", pre=True) - def normalize_wheelchair_access(cls, v: str) -> AccessibilityLevel: # noqa: N805 - """Normalize wheelchair accessibility values from different sources.""" - if isinstance(v, str): - mapping = { - "t": AccessibilityLevel.FULL, - "true": AccessibilityLevel.FULL, - "p": AccessibilityLevel.PARTIAL, - "partial": AccessibilityLevel.PARTIAL, - "f": AccessibilityLevel.NONE, - "false": AccessibilityLevel.NONE, - } - return mapping.get(v.lower(), AccessibilityLevel.UNKNOWN) - return AccessibilityLevel.UNKNOWN - - @validator("service_type", pre=True) - def normalize_service_type(cls, v: str) -> ServiceType: # noqa: N805 - """Normalize service type values from different sources.""" - if isinstance(v, str): - mapping = { - "Retail Pharmacy": ServiceType.PHARMACY, - "Emergency Rooms": ServiceType.EMERGENCY_ROOM, - "Urgent Care Centre": ServiceType.URGENT_CARE, - "Primary Care Walk-In Clinic": ServiceType.WALK_IN_CLINIC, - "Family Doctor's Office": ServiceType.FAMILY_DOCTOR, - "Medical Labs & Diagnostic Imaging Centres": ServiceType.MEDICAL_LAB, - } - return mapping.get(v, ServiceType.COMMUNITY_SERVICE) - return ServiceType.COMMUNITY_SERVICE + + @validator("phone_numbers") + def validate_phone_numbers(cls, v: List[PhoneNumber]) -> List[PhoneNumber]: # noqa: N805 + """Ensure at least one phone number exists.""" + if not v: + raise ValueError("At least one phone number is required") + return v class ServiceDocument(BaseModel): diff --git a/health_rec/services/utils.py b/health_rec/services/utils.py index 5168734..2714a07 100644 --- a/health_rec/services/utils.py +++ b/health_rec/services/utils.py @@ -1,164 +1,293 @@ """Utility functions for the services module.""" +import ast import json import logging -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Set, Tuple from chromadb.api.types import QueryResult -from api.data import ( - AccessibilityLevel, - Address, - PhoneNumber, - Service, - ServiceDocument, - ServiceType, -) +from api.data import Address, PhoneNumber, Service, ServiceDocument +# Configure logging logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) +# Core service fields +CORE_SERVICE_FIELDS: Set[str] = { + "id", + "name", + "description", + "latitude", + "longitude", + "address", + "phone_numbers", + "email", + "metadata", + "last_updated", +} -def _parse_chroma_result(chroma_results: QueryResult) -> List[ServiceDocument]: - """Parse the results from ChromaDB into a list of ServiceDocument objects.""" - parsed_results: List[ServiceDocument] = [ - ServiceDocument(id=id_, document=doc, metadata=meta, relevancy_score=score) - for id_, doc, meta, score in zip( - chroma_results["ids"][0] if chroma_results["ids"] else [], - chroma_results["documents"][0] if chroma_results["documents"] else [], - chroma_results["metadatas"][0] if chroma_results["metadatas"] else [], - chroma_results["distances"][0] if chroma_results["distances"] else [], - ) - ] - return parsed_results +def _safe_json_loads(value: Any) -> Any: + """Safely parse JSON or string representation of data structures.""" + if not isinstance(value, str): + return value + + try: + return json.loads(value) + except json.JSONDecodeError: + try: + result = ast.literal_eval(value) + return list(result) if isinstance(result, tuple) else result + except (ValueError, SyntaxError): + return value def _parse_json_field(field: Any, default: Any) -> Any: """Parse a potentially JSON-encoded field.""" + if not field: + return default + + if isinstance(field, (dict, list)): + return field + if isinstance(field, str): try: - return json.loads(field) - except json.JSONDecodeError: + parsed = _safe_json_loads(field) + if isinstance(parsed, (dict, list)): + return parsed + logger.warning(f"Parsed value is not dict/list: {parsed}") return default - return field + except Exception as e: + logger.warning(f"Error parsing field: {e}") + return default + + return default -def _parse_coordinates(metadata: Dict[str, Any]) -> tuple[float, float]: +def _parse_chroma_result(chroma_results: QueryResult) -> List[ServiceDocument]: + """Parse the results from ChromaDB into a list of ServiceDocument objects.""" + if not chroma_results or not all( + key in chroma_results for key in ["ids", "documents", "metadatas", "distances"] + ): + logger.warning("Invalid or empty ChromaDB results") + return [] + + try: + parsed_results: List[ServiceDocument] = [ + ServiceDocument(id=id_, document=doc, metadata=meta, relevancy_score=score) + for id_, doc, meta, score in zip( + chroma_results["ids"][0] if chroma_results["ids"] else [], + chroma_results["documents"][0] if chroma_results["documents"] else [], + chroma_results["metadatas"][0] if chroma_results["metadatas"] else [], + chroma_results["distances"][0] if chroma_results["distances"] else [], + ) + ] + return parsed_results + except Exception as e: + logger.error(f"Error parsing ChromaDB results: {e}") + return [] + + +def _parse_coordinates(metadata: Dict[str, Any]) -> Tuple[float, float]: """Parse latitude and longitude coordinates.""" try: latitude = float(metadata.get("latitude", 0)) longitude = float(metadata.get("longitude", 0)) - except (ValueError, TypeError): - latitude = longitude = 0.0 - return latitude, longitude + if not (-90 <= latitude <= 90) or not (-180 <= longitude <= 180): + logger.warning( + f"Invalid coordinate values: lat={latitude}, lon={longitude}" + ) + return 0.0, 0.0 + return latitude, longitude + except (ValueError, TypeError) as e: + logger.warning(f"Error parsing coordinates: {e}") + return 0.0, 0.0 -def _parse_phone_numbers(phones: List[Dict[str, Any]]) -> List[PhoneNumber]: - """Parse phone numbers from raw data.""" - phone_numbers = [] - for phone in phones: - number = phone.get("number", "") +def _normalize_phone_data(phones: Any) -> List[Dict[str, Any]]: + """Normalize phone data into a list of dictionaries.""" + # Handle string input + if isinstance(phones, str): + try: + phones = _safe_json_loads(phones) + except Exception: + return [] + + # Convert tuple to list + if isinstance(phones, tuple): + phones = list(phones) + + # Handle single dictionary + if isinstance(phones, dict): + return [phones] + + # Ensure we have a list + if not isinstance(phones, list): + return [] + + return phones + + +def _split_number_extension(number: str) -> Tuple[str, str]: + """Split phone number and extension.""" + parts = number.lower().split("ext") + return parts[0].strip(), parts[1].strip() + + +def _parse_single_phone(phone_data: Any) -> Optional[PhoneNumber]: + """Parse a single phone number entry.""" + try: + # Handle string representation of dict + if isinstance(phone_data, str): + try: + phone_data = _safe_json_loads(phone_data) + except Exception: + return None + + # Ensure we have a dictionary + if not isinstance(phone_data, dict): + return None + + # Get and validate number + number = str(phone_data.get("number", "")).strip() + if not number: + return None + + # Extract extension extension = None + if ext := phone_data.get("extension"): + extension = str(ext).strip() + elif "ext" in number.lower(): + number, extension = _split_number_extension(number) - if isinstance(number, str) and "ext" in number.lower(): - parts = number.lower().split("ext") - number = parts[0].strip() - extension = parts[1].strip() - - phone_numbers.append( - PhoneNumber( - number=number, - type=phone.get("type"), - name=phone.get("name"), - description=phone.get("description"), - extension=extension, - ) + # Create phone number object + return PhoneNumber( + number=number, + type=str(phone_data.get("type", "")).strip(), + name=str(phone_data.get("name", "")).strip(), + description=str(phone_data.get("description", "")).strip(), + extension=extension, ) - return phone_numbers + except Exception as e: + logger.debug(f"Error parsing phone number {phone_data}: {e}") + return None -def _parse_service_type(service_type: Optional[str]) -> ServiceType: - """Parse and validate service type.""" - if not service_type: - return ServiceType.COMMUNITY_SERVICE +def _parse_phone_numbers(phones: Any) -> List[PhoneNumber]: + """Parse phone numbers from raw data.""" + if not phones: + return [PhoneNumber(number="Unknown")] - try: - return ServiceType(service_type) - except ValueError: - return ServiceType.COMMUNITY_SERVICE + # Convert input to list of dictionaries + phone_list = _normalize_phone_data(phones) + if not phone_list: + return [PhoneNumber(number="Unknown")] + + # Parse each phone number + phone_numbers = [] + for phone_data in phone_list: + if parsed_phone := _parse_single_phone(phone_data): + phone_numbers.append(parsed_phone) + return phone_numbers if phone_numbers else [PhoneNumber(number="Unknown")] -def _parse_wheelchair_access(access: Optional[str]) -> str: - """Parse and validate wheelchair accessibility.""" - if not access: - return AccessibilityLevel.UNKNOWN.value + +def _parse_address(address_data: Any) -> Address: + """Parse address from raw data.""" + default_address = Address( + street1="Unknown", city="Unknown", province="Unknown", country="Canada" + ) + + if not address_data: + return default_address try: - return AccessibilityLevel(access).value - except ValueError: - return AccessibilityLevel.UNKNOWN.value + parsed_address = _parse_json_field(address_data, {}) + if not isinstance(parsed_address, dict): + logger.warning(f"Invalid address format: {parsed_address}") + return default_address + + return Address( + street1=parsed_address.get("street1", "Unknown"), + street2=parsed_address.get("street2"), + city=parsed_address.get("city", "Unknown"), + province=parsed_address.get("province", "Unknown"), + postal_code=parsed_address.get("postal_code"), + country=parsed_address.get("country", "Canada"), + ) + except Exception as e: + logger.error(f"Error parsing address: {e}") + return default_address def _metadata_to_service(metadata: Dict[str, Any]) -> Service: """Convert metadata to a Service object.""" try: + # Parse required fields + service_id = str(metadata.get("id", "0")) + name = metadata.get("name", "Unknown Service") + description = metadata.get("description", "No description available") + # Parse coordinates latitude, longitude = _parse_coordinates(metadata) - # Ensure service_type is always set to a valid ServiceType enum value - service_type = _parse_service_type(metadata.get("service_type")) - - # Parse complex fields that might be JSON strings - physical_address = _parse_json_field(metadata.get("physical_address"), None) - if physical_address: - physical_address = Address(**physical_address) + # Parse address + logger.debug(f"Raw address data: {metadata.get('address')}") + address = _parse_address(metadata.get("address")) - phone_numbers = _parse_json_field(metadata.get("phone_numbers"), []) - if isinstance(phone_numbers, list): - phone_numbers = _parse_phone_numbers(phone_numbers) + # Parse phone numbers + phone_numbers = _parse_phone_numbers(metadata.get("phone_numbers")) - # Create the Service object with parsed fields + # Create the Service object service = Service( - id=metadata["id"], - name=metadata["name"], - service_type=service_type, # This will now always be a valid ServiceType + id=service_id, + name=name, + description=description, latitude=latitude, longitude=longitude, - physical_address=physical_address, + address=address, phone_numbers=phone_numbers, - fax=metadata.get("fax"), - email=metadata.get("email"), - website=metadata.get("website"), - description=metadata.get("description"), - services=metadata.get("services", []), - languages=metadata.get("languages", []), - status=metadata.get("status"), - regular_hours=metadata.get("regular_hours", []), - hours_exceptions=metadata.get("hours_exceptions", []), - timezone_offset=metadata.get("timezone_offset"), - wheelchair_accessible=metadata.get("wheelchair_accessible"), - parking_type=metadata.get("parking_type"), - accepts_new_patients=metadata.get("accepts_new_patients"), - wait_time=metadata.get("wait_time"), - has_online_booking=metadata.get("has_online_booking", False), - can_book=metadata.get("can_book", False), - data_source=metadata.get("data_source"), + email=metadata.get("email", ""), + metadata=_extract_metadata(metadata), + last_updated=metadata.get("last_updated"), ) + logger.debug(f"Successfully parsed service: {service.id}") return service + except Exception as e: logger.error(f"Error converting metadata to Service: {e}") logger.debug(f"Problematic metadata: {metadata}") - # Return a minimal valid Service object with required fields return Service( - id=metadata.get("id", 0), + id=str(metadata.get("id", "0")), name=metadata.get("name", "Unknown"), - service_type=ServiceType.UNKNOWN, # Always provide a valid ServiceType + description="No description available", latitude=float(metadata.get("latitude", 0)), longitude=float(metadata.get("longitude", 0)), - data_source=metadata.get("data_source", "unknown"), + address=Address( + street1="Unknown", city="Unknown", province="Unknown", country="Canada" + ), + phone_numbers=[PhoneNumber(number="Unknown")], + email="", + metadata={}, ) + + +def _extract_metadata(metadata: Dict[str, Any]) -> Dict[str, Any]: + """Extract additional fields into metadata.""" + additional_metadata = { + key: value + for key, value in metadata.items() + if key not in CORE_SERVICE_FIELDS and value is not None + } + + # Parse any string values that might be JSON + for key, value in additional_metadata.items(): + if isinstance(value, str): + additional_metadata[key] = _safe_json_loads(value) + + return additional_metadata diff --git a/scripts/common.py b/scripts/common.py new file mode 100644 index 0000000..03258c8 --- /dev/null +++ b/scripts/common.py @@ -0,0 +1,36 @@ +"""Common utilities for the project.""" + +import requests +from typing import Any, List, Optional +from urllib3.util.retry import Retry +from requests.adapters import HTTPAdapter + + +class RetryableSession: + """Session with retry capabilities.""" + + def __init__( + self, + retries: int = 3, + backoff_factor: float = 0.5, + status_forcelist: Optional[List[int]] = None, + ): + """Initialize session with retry strategy.""" + self.session = requests.Session() + if status_forcelist is None: + status_forcelist = [403, 500, 502, 503, 504] + + retry_strategy = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + allowed_methods=["GET", "POST"], + ) + + adapter = HTTPAdapter(max_retries=retry_strategy) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) + + def post(self, *args: Any, **kwargs: Any) -> requests.Response: + """Perform POST request with retry capability.""" + return self.session.post(*args, **kwargs) diff --git a/scripts/download_211_data.py b/scripts/download_211_data.py index d64029b..eabec78 100644 --- a/scripts/download_211_data.py +++ b/scripts/download_211_data.py @@ -1,25 +1,27 @@ """Download and process data from the 211 API.""" -import os -import requests -from dotenv import load_dotenv import json +import logging import math -from pathlib import Path -import argparse -from typing import Dict, Any, Optional, List +import os from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List -from api.data import Service, ServiceType, Address, PhoneNumber, AccessibilityLevel +import requests +from dotenv import load_dotenv +from pydantic import ValidationError +from api.data import Address, PhoneNumber, Service +from common import RetryableSession +from fields import FIELDS -def validate_service(data: Dict[str, Any]) -> Optional[Service]: - """Validate and create Service object from mapped data.""" - try: - return Service(**data) - except Exception as e: - print(f"Validation error for a service: {e}") - return None + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) def parse_phone_numbers(phones: List[Dict[str, str]]) -> List[PhoneNumber]: @@ -27,8 +29,10 @@ def parse_phone_numbers(phones: List[Dict[str, str]]) -> List[PhoneNumber]: phone_numbers = [] for phone in phones: number = phone.get("Phone", "") - extension = None + if not number: + continue + extension = None if "ext" in number.lower(): parts = number.lower().split("ext") number = parts[0].strip() @@ -37,125 +41,98 @@ def parse_phone_numbers(phones: List[Dict[str, str]]) -> List[PhoneNumber]: phone_numbers.append( PhoneNumber( number=number, - type=phone.get("Type", ""), - name=phone.get("Name", ""), - description=phone.get("Description", ""), + type=phone.get("Type"), + name=phone.get("Name"), + description=phone.get("Description"), extension=extension, ) ) - return phone_numbers - -def parse_taxonomy(taxonomy_str: str) -> List[str]: - """Parse taxonomy strings into clean list.""" - if not taxonomy_str: - return [] + # Ensure at least one phone number exists + if not phone_numbers: + phone_numbers.append(PhoneNumber(number="Unknown")) - terms = [] - for term in taxonomy_str.split(";"): - clean_term = term.split("*")[0].strip() - if clean_term: - terms.append(clean_term) - return terms - - -class DateTimeEncoder(json.JSONEncoder): - """Custom JSON encoder for datetime objects.""" - - def default(self, obj): - if isinstance(obj, datetime): - return obj.isoformat() - return super().default(obj) - - -def parse_datetime(date_str: Optional[str]) -> Optional[str]: - """Parse datetime string to ISO format.""" - if not date_str: - return None - try: - dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S") - return dt.isoformat() - except ValueError: - return None - - -def parse_age(age_str: Optional[str]) -> Optional[int]: - """Parse age string to integer.""" - if not age_str: - return None - try: - return int(float(age_str)) - except (ValueError, TypeError): - return None + return phone_numbers -def map_211_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: +def map_211_data_to_service(data: Dict[str, Any]) -> Service: """Map 211 API data to standardized Service format.""" try: - latitude = float(data.get("Latitude", 0)) - longitude = float(data.get("Longitude", 0)) - except (ValueError, TypeError): - latitude = longitude = 0.0 - - physical_address = Address( - street1=data.get("PhysicalAddressStreet1"), - street2=data.get("PhysicalAddressStreet2"), - city=data.get("PhysicalAddressCity"), - province=data.get("PhysicalAddressProvince"), - postal_code=data.get("PhysicalAddressPostalCode"), - country=data.get("PhysicalAddressCountry"), - ) - - mailing_address = Address( - street1=data.get("MailingAddressStreet1"), - street2=data.get("MailingAddressStreet2"), - city=data.get("MailingAddressCity"), - province=data.get("MailingAddressProvince"), - postal_code=data.get("MailingAddressPostalCode"), - country=data.get("MailingAddressCountry"), - attention_name=data.get("MailingAttentionName"), - ) + # Parse required fields + address = Address( + street1=data.get("PhysicalAddressStreet1", "Unknown"), + street2=data.get("PhysicalAddressStreet2"), + city=data.get("PhysicalAddressCity", "Unknown"), + province=data.get("PhysicalAddressProvince", "Unknown"), + postal_code=data.get("PhysicalAddressPostalCode"), + country=data.get("PhysicalAddressCountry", "Canada"), + ) - return { - "id": int(data["id"]), - "name": data["PublicName"], - "service_type": ServiceType.COMMUNITY_SERVICE, - "source_id": data.get("UniqueIDPriorSystem"), - "official_name": data.get("OfficialName"), - "latitude": latitude, - "longitude": longitude, - "physical_address": physical_address.dict(exclude_none=True), - "mailing_address": mailing_address.dict(exclude_none=True), - "phone_numbers": [ - p.dict() for p in parse_phone_numbers(data.get("PhoneNumbers", [])) - ], - "email": data.get("Email"), - "website": data.get("Website"), - "description": data.get("Description"), - "taxonomy_terms": parse_taxonomy(data.get("TaxonomyTerms", "")), - "taxonomy_codes": parse_taxonomy(data.get("TaxonomyCodes", "")), - "eligibility_criteria": data.get("Eligibility"), - "fee_structure": data.get("FeeStructureSource"), - "min_age": parse_age(data.get("MinAge")), - "max_age": parse_age(data.get("MaxAge")), - "last_updated": parse_datetime(data.get("UpdatedOn")), - "record_owner": data.get("RecordOwner"), - "data_source": "211", - "wheelchair_accessible": AccessibilityLevel.UNKNOWN, - } + # Store additional fields in metadata + metadata = { + "website": data.get("Website"), + "taxonomy_terms": [ + term.strip() + for term in data.get("TaxonomyTerms", "").split(";") + if term.strip() + ], + "eligibility": data.get("Eligibility"), + "fee_structure": data.get("FeeStructureSource"), + "mailing_address": { + "street1": data.get("MailingAddress1"), + "street2": data.get("MailingAddress2"), + "city": data.get("MailingCity"), + "province": data.get("MailingStateProvince"), + "postal_code": data.get("MailingPostalCode"), + "country": data.get("MailingCountry"), + }, + "hours_of_operation": data.get("HoursOfOperation"), + "email": data.get("EmailAddressMain"), + "agency_description": data.get("AgencyDescription"), + "agency_description_site": data.get("AgencyDescription_Site"), + "search_hints": data.get("SearchHints"), + "coverage_area": data.get("CoverageArea"), + "disabilities_access": data.get("DisabilitiesAccess"), + "physical_location_description": data.get("PhysicalLocationDescription"), + "application_process": data.get("ApplicationProcess"), + "documents_required": data.get("DocumentsRequired"), + "languages_offered": data.get("LanguagesOffered"), + "languages_offered_list": data.get("LanguagesOfferedList"), + "language_of_record": data.get("LanguageOfRecord"), + "coverage": data.get("Coverage"), + "hours": data.get("Hours"), + } + + return Service( + id=str(data["id"]), + name=data["PublicName"], + description=data.get("Description", "No description available"), + latitude=float(data.get("Latitude", 0)), + longitude=float(data.get("Longitude", 0)), + phone_numbers=parse_phone_numbers(data.get("PhoneNumbers", [])), + address=address, + email=data.get("Email", ""), + metadata=metadata, + last_updated=datetime.now(), + ) + except (ValueError, ValidationError) as e: + logger.error(f"Error mapping service {data.get('id')}: {str(e)}") + raise -def save_to_file(data: Dict[str, Any], file_name: str) -> None: +def save_to_file(data: Dict[str, Any], file_path: Path) -> None: """Save the data to a JSON file.""" mapped_services = [] for service_data in data["Records"]: - mapped_data = map_211_data_to_service(service_data) - validated_service = validate_service(mapped_data) - if validated_service: - mapped_services.append(validated_service.dict(exclude_none=True)) + try: + service = map_211_data_to_service(service_data) + mapped_services.append(service.dict(exclude_none=True)) + except Exception as e: + logger.error(f"Failed to process service: {e}") + continue - with open(file_name, "w") as f: - json.dump(mapped_services, f, indent=2, cls=DateTimeEncoder) + with open(file_path, "w") as f: + json.dump(mapped_services, f, indent=2, default=str) def create_payload( @@ -172,11 +149,7 @@ def create_payload( "SortOrder": "distance", "PageIndex": page_index, "PageSize": page_size, - "Fields": ( - "TaxonomyTerm,TaxonomyTerms,TaxonomyCodes,Eligibility," - "FeeStructureSource,OfficialName,PhysicalCity,DocumentsRequired," - "ApplicationProcess,UniqueIDPriorSystem,DisabilitiesAccess" - ), + "Fields": FIELDS, } if is_gta: @@ -198,56 +171,12 @@ def create_payload( return payload -def fetch_data( - page_index: int, - api_key: str, - base_url: str, - dataset: str, - is_gta: bool, - page_size: int, -) -> Any: - """Fetch data from the API for a given page index.""" - headers = {"Content-Type": "application/json"} - params = {"key": api_key} - - response = requests.post( - base_url, - headers=headers, - params=params, - json=create_payload(page_index, dataset, is_gta, page_size), - ) - response.raise_for_status() - return response.json() - - -def main( - api_key: str, - base_url: str, - dataset: str, - is_gta: bool, - data_dir: str, - page_size: int, -) -> None: - os.makedirs(data_dir, exist_ok=True) - - first_page = fetch_data(0, api_key, base_url, dataset, is_gta, page_size) - total_records = first_page["RecordCount"] - total_pages = math.ceil(total_records / page_size) - - print(f"Total records: {total_records}") - print(f"Total pages: {total_pages}") - - for page in range(total_pages): - print(f"Fetching page {page + 1} of {total_pages}") - data = fetch_data(page, api_key, base_url, dataset, is_gta, page_size) - file_name = Path(f"data-{page:02d}.json") - save_to_file(data, os.path.join(data_dir, file_name)) - print(f"Saved data to {file_name}") - - -if __name__ == "__main__": +def main() -> None: + """Main function to run the script.""" load_dotenv("./.env.development") + import argparse + parser = argparse.ArgumentParser(description="Download data from the 211 API.") parser.add_argument( "--api-key", default=os.getenv("211_API_KEY"), help="API key for 211 API" @@ -262,7 +191,7 @@ def main( "--is-gta", action="store_true", help="Whether to download GTA data" ) parser.add_argument( - "--data-dir", default="/mnt/data/211", help="Directory to save data" + "--data-dir", default="/mnt/data/211", help="Directory to save data", type=Path ) parser.add_argument( "--page-size", type=int, default=1000, help="Number of records per page" @@ -271,14 +200,55 @@ def main( args = parser.parse_args() if not args.api_key: - raise ValueError("211_API_KEY is not set") + raise ValueError("211_API_KEY environment variable is not set") - data_dir = args.data_dir - if args.is_gta: - data_dir = os.path.join(data_dir, "gta") - else: - data_dir = os.path.join(data_dir, args.dataset) + # Setup HTTP session with retries + session = RetryableSession() - main( - args.api_key, args.base_url, args.dataset, args.is_gta, data_dir, args.page_size - ) + # Create data directory + data_dir = args.data_dir / ("gta" if args.is_gta else args.dataset) + data_dir.mkdir(parents=True, exist_ok=True) + + try: + # Fetch first page to get total count + response = session.post( + args.base_url, + headers={"Content-Type": "application/json"}, + params={"key": args.api_key}, + json=create_payload(0, args.dataset, args.is_gta, args.page_size), + ) + response.raise_for_status() + first_page = response.json() + + total_records = first_page["RecordCount"] + total_pages = math.ceil(total_records / args.page_size) + + logger.info(f"Total records: {total_records}") + logger.info(f"Total pages: {total_pages}") + + # Process all pages + for page in range(total_pages): + logger.info(f"Fetching page {page + 1} of {total_pages}") + + response = session.post( + args.base_url, + headers={"Content-Type": "application/json"}, + params={"key": args.api_key}, + json=create_payload(page, args.dataset, args.is_gta, args.page_size), + ) + response.raise_for_status() + + file_path = data_dir / f"data-{page:02d}.json" + save_to_file(response.json(), file_path) + logger.info(f"Saved data to {file_path}") + + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching data: {e}") + raise + except Exception as e: + logger.error(f"Unexpected error: {e}") + raise + + +if __name__ == "__main__": + main() diff --git a/scripts/fields.py b/scripts/fields.py new file mode 100644 index 0000000..1b29a29 --- /dev/null +++ b/scripts/fields.py @@ -0,0 +1,111 @@ +"""Fields to download from 211 data.""" + +FIELDS = ( + "AlternateName," + "OfficialName," + "ParentAgency," + "RecordOwner," + "UniqueIDPriorSystem," + "MailingAttentionName," + "MailingAddress1," + "MailingAddress2," + "MailingCity," + "MailingStateProvince," + "MailingPostalCode," + "MailingCountry," + "MailingAddressIsPrivate," + "PhysicalAddress1," + "PhysicalAddress2," + "PhysicalCity," + "PhysicalCounty," + "PhysicalStateProvince," + "PhysicalPostalCode," + "PhysicalCountry," + "PhysicalAddressIsPrivate," + "OtherAddress1," + "OtherAddress2," + "OtherCity," + "OtherCounty," + "OtherStateProvince," + "OtherPostalCode," + "OtherCountry," + "Latitude," + "Longitude," + "HoursOfOperation," + "EmailAddressMain," + "WebsiteAddress," + "AgencyStatus," + "AgencyClassification," + "AgencyDescription," + "AgencyDescription_Site," + "SearchHints," + "CoverageArea," + "CoverageAreaText," + "Eligibility," + "EligibilityAdult," + "EligibilityChild," + "EligibilityFamily," + "EligibilityFemale," + "EligibilityMale," + "EligibilityTeen," + "LicenseAccreditation," + "IRSStatus," + "FEIN," + "YearIncorporated," + "AnnualBudgetTotal," + "LegalStatus," + "SourceOfFunds," + "ExcludeFromWebsite," + "ExcludeFromDirectory," + "DisabilitiesAccess," + "PhysicalLocationDescription," + "BusServiceAccess," + "PublicAccessTransportation," + "PaymentMethods," + "FeeStructureSource," + "ApplicationProcess," + "ResourceInfo," + "DocumentsRequired," + "AvailabilityNumberOfTimes," + "AvailabilityFrequency," + "AvailabilityPeriod," + "ServiceNotAlwaysAvailability," + "CapacityType," + "ServiceCapacity," + "NormalWaitTime," + "TemporaryMessage," + "TemporaryMessageAppears," + "TemporaryMessageExpires," + "EnteredOn," + "UpdatedOn," + "MadeInactiveOn," + "HighlightedResource," + "LanguagesOffered," + "LanguagesOfferedList," + "LastVerifiedOn," + "LastVerifiedByName," + "LastVerifiedByTitle," + "LastVerifiedByPhoneNumber," + "LastVerifiedByEmailAddress," + "LastVerificationApprovedBy," + "AvailableForDirectory," + "AvailableForReferral," + "AvailableForResearch," + "PreferredProvider," + "ConnectsToSiteNum," + "ConnectsToProgramNum," + "LanguageOfRecord," + "CurrentWorkflowStepCode," + "VolunteerOpportunities," + "VolunteerDuties," + "IsLinkOnly," + "ProgramAgencyNamePublic," + "SiteAgencyNamePublic," + "Categories," + "TaxonomyTerm," + "TaxonomyTerms," + "TaxonomyTermsNotDeactivated," + "TaxonomyCodes," + "Coverage," + "Hours" +) diff --git a/ui/src/app/components/map.tsx b/ui/src/app/components/map.tsx index 45418e9..7f2421f 100644 --- a/ui/src/app/components/map.tsx +++ b/ui/src/app/components/map.tsx @@ -1,3 +1,5 @@ +'use client'; + import React, { useState, useMemo, useEffect, useRef, useCallback } from 'react'; import Map, { Marker, @@ -8,6 +10,7 @@ import Map, { Source, Layer } from 'react-map-gl'; +import mapboxgl from 'mapbox-gl'; import 'mapbox-gl/dist/mapbox-gl.css'; import { Box, Text, VStack, Flex, Badge, Modal, ModalOverlay, ModalContent, ModalHeader, ModalBody, ModalCloseButton, Icon } from '@chakra-ui/react'; import { FaBuilding } from 'react-icons/fa'; @@ -21,8 +24,6 @@ interface MapProps { height: string; width: string; initialViewState?: ViewState; - radius?: number; - center?: [number, number]; } interface ViewState { @@ -39,41 +40,25 @@ export const TORONTO_COORDINATES: ViewState = { }; export const computeViewState = (locations: Location[]): ViewState => { - if (!locations.length) { - return TORONTO_COORDINATES; - } - - const min_longitude = Math.min(...locations.map(loc => loc.longitude)); - const max_longitude = Math.max(...locations.map(loc => loc.longitude)); - const min_latitude = Math.min(...locations.map(loc => loc.latitude)); - const max_latitude = Math.max(...locations.map(loc => loc.latitude)); - - const center_longitude = (min_longitude + max_longitude) / 2; - const center_latitude = (min_latitude + max_latitude) / 2; - - const padding = { top: 40, bottom: 40, left: 40, right: 40 }; - + if (locations.length === 0) return TORONTO_COORDINATES; + const bounds = new mapboxgl.LngLatBounds(); + locations.forEach(location => bounds.extend([location.longitude, location.latitude])); return { - longitude: center_longitude, - latitude: center_latitude, + ...bounds.getCenter(), zoom: 11, - padding + padding: { top: 40, bottom: 40, left: 40, right: 40 } }; }; -const MapComponent: React.FC = ({ locations, onMarkerClick, height, width, initialViewState, radius, center }) => { - const [viewState, setViewState] = useState(computeViewState(locations)); +const MapComponent: React.FC = ({ locations, onMarkerClick, height, width, initialViewState }) => { + const [viewState, setViewState] = useState(initialViewState || TORONTO_COORDINATES); const [selectedLocation, setSelectedLocation] = useState(null); const [isFullScreenMapOpen, setIsFullScreenMapOpen] = useState(false); const mapRef = useRef(null); - useEffect(() => { - setViewState(computeViewState(locations)); - }, [locations]); - const handleMarkerClick = useCallback((location: Location) => { setSelectedLocation(location); - if (mapRef.current && isFullScreenMapOpen) { + if (mapRef.current) { mapRef.current.flyTo({ center: [location.longitude, location.latitude], zoom: 15, @@ -81,13 +66,27 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi }); } onMarkerClick && onMarkerClick(location); - }, [onMarkerClick, isFullScreenMapOpen]); + }, [onMarkerClick]); + + const fitMapToLocations = useCallback(() => { + if (locations.length > 0 && mapRef.current) { + const bounds = new mapboxgl.LngLatBounds(); + locations.forEach(location => bounds.extend([location.longitude, location.latitude])); + mapRef.current.fitBounds(bounds, { + padding: { top: 50, bottom: 50, left: 50, right: 50 }, + maxZoom: 15, + duration: 0 // Set to 0 for initial load to prevent movement + }); + } + }, [locations]); + + useEffect(() => { + fitMapToLocations(); + }, [fitMapToLocations]); const handleViewStateChange = useCallback((evt: ViewStateChangeEvent) => { - if (isFullScreenMapOpen) { - setViewState(evt.viewState); - } - }, [isFullScreenMapOpen]); + setViewState(evt.viewState); + }, []); const markers = useMemo(() => locations.map((location) => ( = ({ locations, onMarkerClick, height, wi )), [locations, handleMarkerClick]); - const radiusLayer = useMemo(() => { - if (!radius || !center) return null; - - return ( - - - - ); - }, [radius, center]); - const renderLocationsList = useCallback(() => ( {locations.map(location => ( @@ -173,7 +140,11 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi const handleFullScreenMapOpen = useCallback(() => { setIsFullScreenMapOpen(true); - }, []); + // Delay the fitMapToLocations call to ensure the map is fully rendered + setTimeout(() => { + fitMapToLocations(); + }, 100); + }, [fitMapToLocations]); if (!MAPBOX_TOKEN) return Error: Mapbox token is not set; @@ -189,8 +160,31 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi interactive={false} reuseMaps > - {markers} - {radiusLayer} + ({ + type: 'Feature', + geometry: { + type: 'Point', + coordinates: [location.longitude, location.latitude] + }, + properties: { + id: location.id, + name: location.name + } + })) + }}> + + @@ -223,8 +217,8 @@ const MapComponent: React.FC = ({ locations, onMarkerClick, height, wi > {selectedLocation.name} - {selectedLocation.physical_address} - {selectedLocation.phone_numbers} + {selectedLocation.address} + {selectedLocation.phone} )} diff --git a/ui/src/app/components/service-card.tsx b/ui/src/app/components/service-card.tsx index dfe2ca2..2247fd2 100644 --- a/ui/src/app/components/service-card.tsx +++ b/ui/src/app/components/service-card.tsx @@ -1,9 +1,20 @@ import React from 'react'; import parse, { Element, HTMLReactParserOptions, domToReact, DOMNode } from 'html-react-parser'; -import { Box, Heading, Text, VStack, useColorModeValue, useDisclosure, Flex, Badge, Icon } from '@chakra-ui/react'; -import { FaMapMarkerAlt } from 'react-icons/fa'; +import { + Box, + Heading, + Text, + VStack, + useColorModeValue, + useDisclosure, + Flex, + Badge, + Icon, + Link, +} from '@chakra-ui/react'; +import { FaMapMarkerAlt, FaPhone, FaEnvelope, FaGlobe } from 'react-icons/fa'; import ServiceModal from './service-modal'; -import { Service } from '../types/service'; +import { Service, Address } from '../types/service'; interface ServiceCardProps { service: Service; @@ -15,6 +26,19 @@ const ServiceCard: React.FC = ({ service, bgColor }) => { const defaultBgColor = useColorModeValue('white', 'gray.700'); const borderColor = useColorModeValue('gray.200', 'gray.600'); const textColor = useColorModeValue('gray.800', 'white'); + const mutedColor = useColorModeValue('gray.600', 'gray.400'); + + const formatAddress = (address: Address): string => { + const parts = [ + address.street1, + address.street2, + address.city, + address.province, + address.postal_code, + address.country, + ].filter(Boolean); + return parts.join(', '); + }; const renderHtml = (html: string) => { const options: HTMLReactParserOptions = { @@ -40,6 +64,9 @@ const ServiceCard: React.FC = ({ service, bgColor }) => { return parse(html, options); }; + const primaryPhone = service.phone_numbers[0]?.number; + const website = service.metadata?.website; + return ( <> = ({ service, bgColor }) => { flexDirection="column" justifyContent="space-between" > - + {service.name} + {service.description && ( = ({ service, bgColor }) => { {renderHtml(service.description)} )} + + + {service.address && ( + + + + {formatAddress(service.address)} + + + )} + + {primaryPhone && ( + + + e.stopPropagation()} + > + {primaryPhone} + + + )} + + {service.email && ( + + + e.stopPropagation()} + > + {service.email} + + + )} + + {website && ( + + + e.stopPropagation()} + > + Website + + + )} + + - {service.physical_address && ( - - - - {Array.isArray(service.physical_address) ? service.physical_address[0] : service.physical_address} - + {service.metadata?.services && ( + + {(Array.isArray(service.metadata.services) + ? service.metadata.services + : [service.metadata.services] + ).slice(0, 2).map((serviceName, index) => ( + + {serviceName} + + ))} + {(Array.isArray(service.metadata.services) + ? service.metadata.services.length + : 1) > 2 && ( + + +{(Array.isArray(service.metadata.services) + ? service.metadata.services.length + : 1) - 2} more + + )} )} - View Details + View Details diff --git a/ui/src/app/design/page.tsx b/ui/src/app/design/page.tsx index 685aada..90787d2 100644 --- a/ui/src/app/design/page.tsx +++ b/ui/src/app/design/page.tsx @@ -32,6 +32,12 @@ const ArchitectureDiagram: React.FC = () => { position: { x: -120, y: 200 }, style: { background: '#ffffff', width: 200, border: '2px dashed #666' }, }, + { + id: 'google maps', + data: { label: 'Google Maps API' }, + position: { x: -120, y: 0 }, + style: { background: '#ffffff', width: 200, border: '2px dashed #666' }, + }, { id: 'backend', data: { label: 'Backend\n(FastAPI + RAG)' }, @@ -58,6 +64,8 @@ const ArchitectureDiagram: React.FC = () => { { id: 'f-b', source: 'frontend', target: 'backend', label: '', animated: true, style: { stroke: '#007bff' }, markerEnd: { type: MarkerType.ArrowClosed } }, { id: 'f-m', source: 'frontend', target: 'mapbox', label: '', animated: true, style: { stroke: '#800080' }, markerEnd: { type: MarkerType.ArrowClosed } }, { id: 'm-f', source: 'mapbox', target: 'frontend', label: '', animated: true, style: { stroke: '#800080' }, markerEnd: { type: MarkerType.ArrowClosed } }, + { id: 'g-f', source: 'google maps', target: 'frontend', label: '', animated: true, style: { stroke: '#5e45d9' }, markerEnd: { type: MarkerType.ArrowClosed } }, + { id: 'f-g', source: 'frontend', target: 'google maps', label: '', animated: true, style: { stroke: '#5e45d9' }, markerEnd: { type: MarkerType.ArrowClosed } }, { id: 'b-f', source: 'backend', target: 'frontend', label: 'Recommendation\n+ Service List', animated: true, style: { stroke: '#28a745' }, markerEnd: { type: MarkerType.ArrowClosed } }, { id: 'b-c', source: 'backend', target: 'chromadb', label: 'RAG Retrieval', animated: true, style: { stroke: '#ffc107' }, markerEnd: { type: MarkerType.ArrowClosed } }, { id: 'c-b', source: 'chromadb', target: 'backend', label: '', animated: true, style: { stroke: '#ffc107' }, markerEnd: { type: MarkerType.ArrowClosed } }, @@ -84,6 +92,7 @@ const ArchitectureDiagram: React.FC = () => {
  • ● OpenAI API interaction
  • ● Results and recommendations
  • ● Mapbox API interaction
  • +
  • ● Google Maps API interaction
  • The Frontend integrates Mapbox for displaying service locations.

    diff --git a/ui/src/app/dev/page.tsx b/ui/src/app/dev/page.tsx index 1f03dbf..227229a 100644 --- a/ui/src/app/dev/page.tsx +++ b/ui/src/app/dev/page.tsx @@ -31,12 +31,12 @@ const DevPage: React.FC = () => { const countData: number = await countResponse.json(); const validServices = servicesData.filter( - (service): service is Service & Required> => - typeof service.latitude === 'number' && - typeof service.longitude === 'number' && - !isNaN(service.latitude) && - !isNaN(service.longitude) && - !(service.latitude === 0 && service.longitude === 0) + (service): service is Service & Required> => + typeof service.Latitude === 'number' && + typeof service.Longitude === 'number' && + !isNaN(service.Latitude) && + !isNaN(service.Longitude) && + !(service.Latitude === 0 && service.Longitude === 0) ); setServices(validServices); @@ -44,8 +44,8 @@ const DevPage: React.FC = () => { if (validServices.length > 0) { const locations = validServices.map(service => ({ - latitude: service.latitude, - longitude: service.longitude, + latitude: service.Latitude, + longitude: service.Longitude, })); const newViewState = computeViewState(locations); setMapViewState(newViewState); @@ -70,10 +70,10 @@ const DevPage: React.FC = () => { const locations = services.map(service => ({ id: service.id, - name: service.name, - latitude: service.latitude, - longitude: service.longitude, - service_area: service.physical_address, + name: service.PublicName, + latitude: service.Latitude, + longitude: service.Longitude, + service_area: service.ServiceArea, })); return ( diff --git a/ui/src/app/recommendation/page.tsx b/ui/src/app/recommendation/page.tsx index f0377c9..6f7580a 100644 --- a/ui/src/app/recommendation/page.tsx +++ b/ui/src/app/recommendation/page.tsx @@ -1,13 +1,13 @@ 'use client'; -import React, { useState, useEffect, useMemo, useCallback } from 'react'; +import React, { useState, useEffect, useMemo } from 'react'; import { Box, Container, Heading, Text, VStack, SimpleGrid, useColorModeValue, Divider, Badge, Flex, Grid, GridItem, Skeleton, SkeletonText, SkeletonCircle } from '@chakra-ui/react'; import ServiceCard from '../components/service-card'; import Header from '../components/header'; -import Map, { TORONTO_COORDINATES, computeViewState } from '../components/map'; +import Map, { computeViewState, TORONTO_COORDINATES } from '../components/map'; import { Service, Location } from '../types/service'; import { useRecommendationStore, Recommendation, Query, RecommendationStore } from '../stores/recommendation-store'; import { useRouter } from 'next/navigation'; @@ -25,7 +25,6 @@ const RecommendationPage: React.FC = () => { const [isLoading, setIsLoading] = useState(true); const [additionalQuestions, setAdditionalQuestions] = useState([]); - // Theme colors const bgColor = useColorModeValue('gray.50', 'gray.900'); const textColor = useColorModeValue('gray.800', 'white'); const cardBgColor = useColorModeValue('white', 'gray.800'); @@ -89,6 +88,7 @@ const RecommendationPage: React.FC = () => { const refinedRecommendation: Recommendation = await response.json(); setRecommendation(refinedRecommendation); + updateMapViewState(refinedRecommendation.services); } catch (error) { console.error('Error refining recommendations:', error); @@ -97,51 +97,57 @@ const RecommendationPage: React.FC = () => { } }; - const updateMapViewState = useCallback((services: Service[]) => { + const updateMapViewState = (services: Service[]) => { if (services && services.length > 0) { - const locations = services - .filter(service => - typeof service.latitude === 'number' && - typeof service.longitude === 'number' && - !isNaN(service.latitude) && - !isNaN(service.longitude) + const newMapLocations = services + .filter((service): service is Service & Required> => + typeof service.Latitude === 'number' && + typeof service.Longitude === 'number' && + !isNaN(service.Latitude) && + !isNaN(service.Longitude) ) .map(service => ({ - latitude: service.latitude, - longitude: service.longitude, + id: service.id, + name: service.PublicName, + latitude: service.Latitude, + longitude: service.Longitude, + description: service.Description || '', + address: service.Address || '', + phone: service.Phone || '', })); - const newViewState = computeViewState(locations); + const newViewState = computeViewState(newMapLocations); setMapViewState(newViewState); - } else { - setMapViewState(TORONTO_COORDINATES); } - }, []); + }; const mapLocations: Location[] = useMemo(() => { if (!recommendation?.services) return []; return recommendation.services - .filter(service => - typeof service.latitude === 'number' && - typeof service.longitude === 'number' && - !isNaN(service.latitude) && - !isNaN(service.longitude) + .filter((service): service is Service & Required> => + typeof service.Latitude === 'number' && + typeof service.Longitude === 'number' && + !isNaN(service.Latitude) && + !isNaN(service.Longitude) ) .map(service => ({ id: service.id, - name: service.name, - latitude: service.latitude, - longitude: service.longitude, - description: service.description || '', + name: service.PublicName, + latitude: service.Latitude, + longitude: service.Longitude, + description: service.Description || '', + address: service.Address || '', + phone: service.Phone || '', })); }, [recommendation]); useEffect(() => { - if (recommendation?.services) { - updateMapViewState(recommendation.services); + if (mapLocations.length > 0) { + const newViewState = computeViewState(mapLocations); + setMapViewState(newViewState); } - }, [recommendation, updateMapViewState]); + }, [mapLocations]); const renderRecommendationCard = (recommendation: Recommendation | null) => { if (!recommendation?.message) return null; @@ -150,13 +156,15 @@ const RecommendationPage: React.FC = () => { const overview = overviewWithLabel.replace('Overview:', '').trim(); const reasoning = reasoningParts.join('\n').replace('Reasoning:', '').trim(); - const serviceName = recommendation.services[0]?.name || 'Unknown Service'; + const serviceName = recommendation.services[0]?.PublicName || 'Unknown Service'; const updatedOverview = `${serviceName}

    ${overview}`; return ( - + + + @@ -246,9 +254,7 @@ const RecommendationPage: React.FC = () => { locations={mapLocations} height={mapHeight} width={mapWidth} - radius={originalQuery?.radius} - center={originalQuery?.latitude && originalQuery?.longitude ? - [originalQuery.longitude, originalQuery.latitude] : undefined} + initialViewState={mapViewState} /> diff --git a/ui/src/app/types/service.ts b/ui/src/app/types/service.ts index 5ffa19e..bd980bb 100644 --- a/ui/src/app/types/service.ts +++ b/ui/src/app/types/service.ts @@ -1,94 +1,58 @@ -interface Address { - street1: string | null; - street2: string | null; - city: string | null; - province: string | null; - postal_code: string | null; - country: string | null; - attention_name: string | null; -} - interface PhoneNumber { number: string; - type: string | null; - name: string | null; - description: string | null; - extension: string | null; + type?: string | null; + name?: string | null; + description?: string | null; + extension?: string | null; } -interface OperatingHours { - day: string; - is_open: boolean; - is_24hour: boolean; - open_time: string | null; - close_time: string | null; +interface Address { + street1?: string | null; + street2?: string | null; + city?: string | null; + province?: string | null; + postal_code?: string | null; + country?: string | null; } interface Service { - id: number; + // Required fields + id: string; name: string; - service_type: string; - source_id: string | null; - official_name: string | null; - - // Location + description: string; latitude: number; longitude: number; - distance: number | null; - physical_address: Address | null; - mailing_address: Address | null; - - // Contact information phone_numbers: PhoneNumber[]; - fax: string | null; - email: string | null; - website: string | null; - social_media: Record; - - // Service details - description: string | null; - services: string[]; - languages: string[]; - taxonomy_terms: string[]; - taxonomy_codes: string[]; + address: Address; + email: string; - // Operating information - status: string | null; - regular_hours: OperatingHours[]; - hours_exceptions: OperatingHours[]; - timezone_offset: string | null; - - // Accessibility and special features - wheelchair_accessible: string; - parking_type: string | null; - accepts_new_patients: boolean | null; - wait_time: number | null; - - // Booking capabilities - has_online_booking: boolean; - has_queue_system: boolean; - accepts_walk_ins: boolean; - can_book: boolean; - - // Eligibility and fees - eligibility_criteria: string | null; - fee_structure: string | null; - min_age: number | null; - max_age: number | null; - - // Metadata - last_updated: Date | null; - record_owner: string | null; - data_source: string | null; + // Optional fields + metadata: Record; + last_updated?: string | null; } -// Location interface for map functionality interface Location { - id: number; + id: string; name: string; latitude: number; longitude: number; description: string; + address: string; +} + +interface Recommendation { + message: string; + is_emergency: boolean; + is_out_of_scope: boolean; + services?: Service[] | null; + no_services_found: boolean; +} + +interface Query { + query: string; + latitude?: number | null; + longitude?: number | null; + radius?: number | null; } -export type { Service, PhoneNumber, Location, Address, OperatingHours }; +export type { Service, PhoneNumber, Address, Location, Recommendation, Query }; From 5ac66ed90d3a65cd5e60f172a331d83ac3ad5be7 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 11:21:18 -0500 Subject: [PATCH 06/13] Fixes to the frontend --- ui/src/app/components/service-modal.tsx | 154 +++++++++++++------ ui/src/app/recommendation/page.tsx | 189 +++++++++++++++++------- 2 files changed, 240 insertions(+), 103 deletions(-) diff --git a/ui/src/app/components/service-modal.tsx b/ui/src/app/components/service-modal.tsx index fcf0b32..a0c7a1c 100644 --- a/ui/src/app/components/service-modal.tsx +++ b/ui/src/app/components/service-modal.tsx @@ -20,9 +20,10 @@ import { Grid, GridItem, Link, + Badge, } from '@chakra-ui/react'; -import { FaMapMarkerAlt, FaPhone, FaGlobe, FaClock } from 'react-icons/fa'; -import { Service } from '../types/service'; +import { FaMapMarkerAlt, FaPhone, FaGlobe, FaClock, FaEnvelope } from 'react-icons/fa'; +import { Service, PhoneNumber, Address } from '../types/service'; interface ServiceModalProps { isOpen: boolean; @@ -38,13 +39,24 @@ const ServiceModal: React.FC = ({ isOpen, onClose, service }) const borderColor = useColorModeValue('gray.200', 'gray.600'); const linkColor = useColorModeValue('pink.600', 'pink.300'); - const formatServiceArea = (serviceArea: string | string[] | undefined): string => { - if (Array.isArray(serviceArea)) { - return serviceArea.join(', '); - } else if (typeof serviceArea === 'string') { - return serviceArea; + const formatAddress = (address: Address): string => { + const parts = [ + address.street1, + address.street2, + address.city, + address.province, + address.postal_code, + address.country, + ].filter(Boolean); + return parts.join(', '); + }; + + const formatPhoneNumber = (phone: PhoneNumber): string => { + let formatted = phone.number; + if (phone.extension) { + formatted += ` ext. ${phone.extension}`; } - return 'Not specified'; + return formatted; }; const renderHtml = (html: string) => { @@ -52,7 +64,6 @@ const ServiceModal: React.FC = ({ isOpen, onClose, service }) replace: (domNode: DOMNode) => { if (domNode instanceof Element && domNode.name === 'a' && domNode.attribs) { let href = domNode.attribs.href || ''; - // Ensure the URL has a protocol if (href && !href.startsWith('http://') && !href.startsWith('https://')) { href = `https://${href}`; } @@ -79,33 +90,42 @@ const ServiceModal: React.FC = ({ isOpen, onClose, service }) return parse(html, options); }; - const renderAdditionalInfo = () => { - const excludedKeys = ['id', 'ParentId', 'Score', 'Hours2', 'RecordOwner', 'UniqueIDPriorSystem', 'Latitude', 'Longitude', 'TaxonomyCodes', 'TaxonomyTerm', 'TaxonomyTerms', 'PublicName', 'Description', 'ServiceArea', 'PhoneNumbers', 'Website', 'Hours']; - const additionalInfo = Object.entries(service).filter(([key]) => !excludedKeys.includes(key)); + const renderMetadata = () => { + if (!service.metadata || Object.keys(service.metadata).length === 0) { + return null; + } return ( - {additionalInfo.map(([key, value]) => ( - - - - - {key} - - - - - {typeof value === 'string' ? value : JSON.stringify(value)} - + {Object.entries(service.metadata).map(([key, value]) => { + if (!value) return null; + + return ( + + + + + {key.charAt(0).toUpperCase() + key.slice(1)} + + + + + {Array.isArray(value) + ? value.join(', ') + : typeof value === 'object' + ? JSON.stringify(value) + : String(value)} + + - - - ))} + + ); + })} ); }; @@ -114,41 +134,72 @@ const ServiceModal: React.FC = ({ isOpen, onClose, service }) - {service.PublicName} + {service.name} - {service.Description && ( + {service.description && ( Description - {renderHtml(service.Description)} + {renderHtml(service.description)} )} - {service.ServiceArea && ( + + {service.address && ( - Service Area + Address - {formatServiceArea(service.ServiceArea)} + {formatAddress(service.address)} )} - {service.PhoneNumbers && service.PhoneNumbers.length > 0 && ( + + {service.phone_numbers && service.phone_numbers.length > 0 && ( - Phone + Contact Numbers - {service.PhoneNumbers[0].phone} + + {service.phone_numbers.map((phone, index) => ( + + + {formatPhoneNumber(phone)} + {phone.name && ` (${phone.name})`} + + {phone.type && ( + + {phone.type} + + )} + + ))} + )} - {service.Website && ( + + {service.email && ( + + + + + Email + + + + {service.email} + + + )} + + {service.metadata?.website && ( @@ -157,15 +208,18 @@ const ServiceModal: React.FC = ({ isOpen, onClose, service }) - {service.Website} + {service.metadata.website} )} - {service.Hours && ( + + {service.metadata?.hours && ( @@ -173,15 +227,21 @@ const ServiceModal: React.FC = ({ isOpen, onClose, service }) Hours - {service.Hours} + + {Array.isArray(service.metadata.hours) + ? service.metadata.hours.join(', ') + : service.metadata.hours} + )} + + Additional Information - {renderAdditionalInfo()} + {renderMetadata()} diff --git a/ui/src/app/recommendation/page.tsx b/ui/src/app/recommendation/page.tsx index 6f7580a..13edb00 100644 --- a/ui/src/app/recommendation/page.tsx +++ b/ui/src/app/recommendation/page.tsx @@ -2,14 +2,32 @@ import React, { useState, useEffect, useMemo } from 'react'; import { - Box, Container, Heading, Text, VStack, SimpleGrid, useColorModeValue, - Divider, Badge, Flex, Grid, GridItem, Skeleton, SkeletonText, SkeletonCircle + Box, + Container, + Heading, + Text, + VStack, + SimpleGrid, + useColorModeValue, + Divider, + Badge, + Flex, + Grid, + GridItem, + Skeleton, + SkeletonText, + SkeletonCircle, } from '@chakra-ui/react'; import ServiceCard from '../components/service-card'; import Header from '../components/header'; import Map, { computeViewState, TORONTO_COORDINATES } from '../components/map'; -import { Service, Location } from '../types/service'; -import { useRecommendationStore, Recommendation, Query, RecommendationStore } from '../stores/recommendation-store'; +import { Service, Location, Address } from '../types/service'; +import { + useRecommendationStore, + Recommendation, + Query, + RecommendationStore, +} from '../stores/recommendation-store'; import { useRouter } from 'next/navigation'; import AdditionalQuestions from '../components/additional-questions'; import EmergencyAlert from '../components/emergency-alert'; @@ -17,9 +35,15 @@ import OutOfScopeAlert from '../components/out-of-scope-alert'; import NoServicesFoundAlert from '../components/no-services-found-alert'; const RecommendationPage: React.FC = () => { - const recommendation = useRecommendationStore((state: RecommendationStore) => state.recommendation); - const setRecommendation = useRecommendationStore((state: RecommendationStore) => state.setRecommendation); - const originalQuery = useRecommendationStore((state: RecommendationStore) => state.query); + const recommendation = useRecommendationStore( + (state: RecommendationStore) => state.recommendation + ); + const setRecommendation = useRecommendationStore( + (state: RecommendationStore) => state.setRecommendation + ); + const originalQuery = useRecommendationStore( + (state: RecommendationStore) => state.query + ); const router = useRouter(); const [mapViewState, setMapViewState] = useState(TORONTO_COORDINATES); const [isLoading, setIsLoading] = useState(true); @@ -35,6 +59,18 @@ const RecommendationPage: React.FC = () => { const mapHeight = '400px'; const mapWidth = '100%'; + const formatAddress = (address: Address): string => { + const parts = [ + address.street1, + address.street2, + address.city, + address.province, + address.postal_code, + address.country, + ].filter(Boolean); + return parts.join(', '); + }; + useEffect(() => { if (!recommendation || !originalQuery) { router.replace('/'); @@ -50,7 +86,11 @@ const RecommendationPage: React.FC = () => { } try { - const response = await fetch(`/api/questions?query=${encodeURIComponent(originalQuery.query)}&recommendation=${encodeURIComponent(recommendation.message)}`); + const response = await fetch( + `/api/questions?query=${encodeURIComponent( + originalQuery.query + )}&recommendation=${encodeURIComponent(recommendation.message)}` + ); const data = await response.json(); setAdditionalQuestions(data.questions); setIsLoading(false); @@ -72,7 +112,7 @@ const RecommendationPage: React.FC = () => { query: originalQuery, recommendation: recommendation.message, questions: additionalQuestions, - answers: answers + answers: answers, }; const response = await fetch('/api/refine_recommendations', { @@ -89,7 +129,9 @@ const RecommendationPage: React.FC = () => { const refinedRecommendation: Recommendation = await response.json(); setRecommendation(refinedRecommendation); - updateMapViewState(refinedRecommendation.services); + if (refinedRecommendation.services) { + updateMapViewState(refinedRecommendation.services); + } } catch (error) { console.error('Error refining recommendations:', error); } finally { @@ -99,22 +141,15 @@ const RecommendationPage: React.FC = () => { const updateMapViewState = (services: Service[]) => { if (services && services.length > 0) { - const newMapLocations = services - .filter((service): service is Service & Required> => - typeof service.Latitude === 'number' && - typeof service.Longitude === 'number' && - !isNaN(service.Latitude) && - !isNaN(service.Longitude) - ) - .map(service => ({ - id: service.id, - name: service.PublicName, - latitude: service.Latitude, - longitude: service.Longitude, - description: service.Description || '', - address: service.Address || '', - phone: service.Phone || '', - })); + const newMapLocations = services.map((service) => ({ + id: service.id, + name: service.name, + latitude: service.latitude, + longitude: service.longitude, + description: service.description, + address: formatAddress(service.address), + phone: service.phone_numbers[0]?.number || '', + })); const newViewState = computeViewState(newMapLocations); setMapViewState(newViewState); @@ -124,22 +159,15 @@ const RecommendationPage: React.FC = () => { const mapLocations: Location[] = useMemo(() => { if (!recommendation?.services) return []; - return recommendation.services - .filter((service): service is Service & Required> => - typeof service.Latitude === 'number' && - typeof service.Longitude === 'number' && - !isNaN(service.Latitude) && - !isNaN(service.Longitude) - ) - .map(service => ({ - id: service.id, - name: service.PublicName, - latitude: service.Latitude, - longitude: service.Longitude, - description: service.Description || '', - address: service.Address || '', - phone: service.Phone || '', - })); + return recommendation.services.map((service) => ({ + id: service.id, + name: service.name, + latitude: service.latitude, + longitude: service.longitude, + description: service.description, + address: formatAddress(service.address), + phone: service.phone_numbers[0]?.number || '', + })); }, [recommendation]); useEffect(() => { @@ -152,15 +180,25 @@ const RecommendationPage: React.FC = () => { const renderRecommendationCard = (recommendation: Recommendation | null) => { if (!recommendation?.message) return null; - const [overviewWithLabel, ...reasoningParts] = recommendation.message.split('\n').filter(part => part.trim() !== ''); + const [overviewWithLabel, ...reasoningParts] = recommendation.message + .split('\n') + .filter((part) => part.trim() !== ''); const overview = overviewWithLabel.replace('Overview:', '').trim(); const reasoning = reasoningParts.join('\n').replace('Reasoning:', '').trim(); - const serviceName = recommendation.services[0]?.PublicName || 'Unknown Service'; + const serviceName = recommendation.services?.[0]?.name || 'Unknown Service'; const updatedOverview = `${serviceName}

    ${overview}`; return ( - + @@ -178,9 +216,11 @@ const RecommendationPage: React.FC = () => { }; const renderRecommendedServices = (services: Service[] | null) => { - const coloredServices = services?.map((service, index) => ({ + if (!services) return null; + + const coloredServices = services.map((service, index) => ({ ...service, - bgColor: index === 0 ? highlightColor : cardBgColor + bgColor: index === 0 ? highlightColor : cardBgColor, })); return ( @@ -191,10 +231,23 @@ const RecommendationPage: React.FC = () => { {isLoading ? ( Array.from({ length: 6 }).map((_, index) => ( - + - + @@ -204,8 +257,12 @@ const RecommendationPage: React.FC = () => { )) ) : ( - coloredServices?.map((service) => ( - + coloredServices.map((service) => ( + )) )} @@ -223,21 +280,41 @@ const RecommendationPage: React.FC = () => { } if (recommendation?.no_services_found) { - return ; + return ( + + ); } return ( <> - + {isLoading ? ( - + - + - + ) : ( From 9a48d17c3a4e2c3bcd010e2690b730ae43a3c4f1 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 11:21:49 -0500 Subject: [PATCH 07/13] Fix code scanning alert no. 2: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- health_rec/services/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/health_rec/services/utils.py b/health_rec/services/utils.py index 2714a07..4d5e05e 100644 --- a/health_rec/services/utils.py +++ b/health_rec/services/utils.py @@ -98,9 +98,8 @@ def _parse_coordinates(metadata: Dict[str, Any]) -> Tuple[float, float]: latitude = float(metadata.get("latitude", 0)) longitude = float(metadata.get("longitude", 0)) if not (-90 <= latitude <= 90) or not (-180 <= longitude <= 180): - logger.warning( - f"Invalid coordinate values: lat={latitude}, lon={longitude}" - ) + logger.warning("Invalid coordinate values detected") + # Sensitive data (latitude, longitude) is not logged return 0.0, 0.0 return latitude, longitude except (ValueError, TypeError) as e: From f389340d0c0f70a3cd4a4177e02d60d9169a02a3 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 11:23:59 -0500 Subject: [PATCH 08/13] Fix code scanning alert no. 3: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- health_rec/services/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/health_rec/services/utils.py b/health_rec/services/utils.py index 4d5e05e..b3e0136 100644 --- a/health_rec/services/utils.py +++ b/health_rec/services/utils.py @@ -172,7 +172,7 @@ def _parse_single_phone(phone_data: Any) -> Optional[PhoneNumber]: extension=extension, ) except Exception as e: - logger.debug(f"Error parsing phone number {phone_data}: {e}") + logger.debug(f"Error parsing phone number: {e}") return None From fcb1368af0e34d031ac2f0f30d5aebb39e3b1a6e Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 13:10:17 -0500 Subject: [PATCH 09/13] Mypy fixes and fix empower data script --- eval/collect_rag_outputs.py | 15 +- scripts/common.py | 15 +- scripts/download_empower_data.py | 314 +++++++++++++------------------ 3 files changed, 145 insertions(+), 199 deletions(-) diff --git a/eval/collect_rag_outputs.py b/eval/collect_rag_outputs.py index 611e662..900fa5e 100644 --- a/eval/collect_rag_outputs.py +++ b/eval/collect_rag_outputs.py @@ -2,19 +2,18 @@ import asyncio import json import logging -from typing import Dict, Any +from typing import Dict, Any, List, Optional import aiohttp from tqdm.asyncio import tqdm_asyncio - logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) async def fetch_recommendation( session: aiohttp.ClientSession, query: Dict[str, Any], endpoint: str -) -> Dict[str, Any]: +) -> Optional[Dict[str, Any]]: """Fetch recommendation from the RAG system API.""" try: async with session.post( @@ -41,19 +40,19 @@ async def fetch_recommendation( async def process_samples( - samples_file: str, output_file: str, batch_size: int = 5 + samples_file: str, output_file: str, endpoint: str, batch_size: int = 5 ) -> None: """Process samples in batches and save results.""" # Load samples with open(samples_file, "r") as f: samples = json.load(f) - results = [] + results: List[Dict[str, Any]] = [] async with aiohttp.ClientSession() as session: # Process in batches for i in range(0, len(samples), batch_size): batch = samples[i : i + batch_size] - tasks = [fetch_recommendation(session, query) for query in batch] + tasks = [fetch_recommendation(session, query, endpoint) for query in batch] batch_results = await tqdm_asyncio.gather(*tasks) results.extend([r for r in batch_results if r is not None]) @@ -89,7 +88,9 @@ def main() -> None: args = parser.parse_args() - asyncio.run(process_samples(args.input, args.output, args.batch_size)) + asyncio.run( + process_samples(args.input, args.output, args.endpoint, args.batch_size) + ) if __name__ == "__main__": diff --git a/scripts/common.py b/scripts/common.py index 03258c8..2f4b39f 100644 --- a/scripts/common.py +++ b/scripts/common.py @@ -1,12 +1,12 @@ """Common utilities for the project.""" import requests -from typing import Any, List, Optional +from typing import List, Optional from urllib3.util.retry import Retry from requests.adapters import HTTPAdapter -class RetryableSession: +class RetryableSession(requests.Session): """Session with retry capabilities.""" def __init__( @@ -16,7 +16,8 @@ def __init__( status_forcelist: Optional[List[int]] = None, ): """Initialize session with retry strategy.""" - self.session = requests.Session() + super().__init__() + if status_forcelist is None: status_forcelist = [403, 500, 502, 503, 504] @@ -28,9 +29,5 @@ def __init__( ) adapter = HTTPAdapter(max_retries=retry_strategy) - self.session.mount("http://", adapter) - self.session.mount("https://", adapter) - - def post(self, *args: Any, **kwargs: Any) -> requests.Response: - """Perform POST request with retry capability.""" - return self.session.post(*args, **kwargs) + self.mount("http://", adapter) + self.mount("https://", adapter) diff --git a/scripts/download_empower_data.py b/scripts/download_empower_data.py index c23cde5..ec831ec 100644 --- a/scripts/download_empower_data.py +++ b/scripts/download_empower_data.py @@ -1,45 +1,86 @@ """Download data from the Empower API.""" -import requests -from dotenv import load_dotenv import argparse import json +import logging import os -from typing import List, Dict, Any, Optional import time -from requests.adapters import HTTPAdapter -from urllib3.util.retry import Retry from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List + +import requests +from dotenv import load_dotenv + +from api.data import Address, PhoneNumber, Service -class RetryableSession: - """Session with retry capabilities.""" - - def __init__( - self, - retries: int = 3, - backoff_factor: float = 0.5, - status_forcelist: Optional[List[int]] = None, - ): - """Initialize session with retry strategy.""" - self.session = requests.Session() - if status_forcelist is None: - status_forcelist = [403, 500, 502, 503, 504] - - retry_strategy = Retry( - total=retries, - backoff_factor=backoff_factor, - status_forcelist=status_forcelist, - allowed_methods=["GET", "POST"], +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +def map_empower_data_to_service(data: Dict[str, Any]) -> Service: + """Map Empower API data to unified Service model.""" + try: + # Parse required fields + phones = [] + if data.get("phone"): + phones.append(PhoneNumber(number=data["phone"], type="primary")) + if not phones: + phones = [PhoneNumber(number="Unknown")] + + address = Address( + street1=data.get("address", "Unknown"), + city=data.get("city", "Unknown"), + province=data.get("province", "Unknown"), + postal_code=data.get("postal_code"), + country="Canada", ) - adapter = HTTPAdapter(max_retries=retry_strategy) - self.session.mount("http://", adapter) - self.session.mount("https://", adapter) + # Store additional fields in metadata + metadata = { + "type": data.get("type"), + "services": data.get("services", []), + "languages": data.get("languages", []), + "hours": [ + { + "day": hour["day"], + "is_open": hour["is_open"], + "open_time": hour.get("opentime"), + "close_time": hour.get("closetime"), + "is_24hour": hour.get("is_24hour", False), + } + for hour in data.get("hours", []) + if all(key in hour for key in ["day", "is_open"]) + ], + "website": data.get("website"), + "fax": data.get("fax"), + "wheelchair_accessible": data.get("wheelchair"), + "parking": data.get("parking"), + "accepts_new_patients": data.get("new_patients", False), + "has_online_booking": data.get("has_ebooking", False), + "wait_time": data.get("wait_time"), + "timezone_offset": data.get("tzoffset"), + } - def get(self, *args: Any, **kwargs: Any) -> requests.Response: - """Perform GET request with retry capability.""" - return self.session.get(*args, **kwargs) + return Service( + id=str(data["id"]), + name=data["name"], + description=data.get("description", "No description available"), + latitude=float(data.get("lat", 0)), + longitude=float(data.get("long", 0)), + phone_numbers=phones, + address=address, + email=data.get("email", ""), + metadata=metadata, + last_updated=datetime.now(), + ) + except Exception as e: + logger.error(f"Error mapping service {data.get('id')}: {str(e)}") + raise class EmpowerDataFetcher: @@ -49,7 +90,6 @@ def __init__(self, api_key: str, base_url: str): """Initialize the EmpowerDataFetcher.""" self.api_key = api_key self.base_url = base_url - self.session = RetryableSession() self.headers = { "Accept": "application/json", "Content-Type": "application/x-www-form-urlencoded", @@ -66,22 +106,6 @@ def __init__(self, api_key: str, base_url: str): 11: "Family Doctor's Office", } - def _make_request( - self, url: str, params: Dict[str, Any], max_retries: int = 3 - ) -> Dict[str, Any]: - """Make API request with retries and error handling.""" - for attempt in range(max_retries): - try: - response = self.session.get(url, headers=self.headers, params=params) - response.raise_for_status() - return response.json() - except requests.exceptions.RequestException as e: - if attempt == max_retries - 1: - raise - print(f"Attempt {attempt + 1} failed: {e}. Retrying...") - time.sleep((attempt + 1) * 2) # Exponential backoff - raise Exception("Failed to make request after all retries") - def map_provider_type(self, type_id: int) -> str: """Map provider type ID to human-readable string.""" return self.provider_types.get(type_id, f"Unknown Type ({type_id})") @@ -91,7 +115,7 @@ def fetch_providers_list( ) -> Dict[str, Any]: """Fetch list of providers for a given page.""" url = f"{self.base_url}/providers" - params = { + params: Dict[str, Any] = { "api_key": self.api_key, "lat": lat, "long": long, @@ -99,9 +123,18 @@ def fetch_providers_list( "page": page, } - data = self._make_request(url, params) + response = requests.get(url, headers=self.headers, params=params) + response.raise_for_status() + raw_data: Any = response.json() + + # Create a properly typed dictionary + data: Dict[str, Any] = { + "providers": raw_data.get("providers", []), + "pages": raw_data.get("pages", {}), + } - for provider in data.get("providers", []): + # Map provider types in the response + for provider in data["providers"]: if "type" in provider: provider["type"] = self.map_provider_type(provider["type"]) @@ -110,10 +143,16 @@ def fetch_providers_list( def fetch_provider_details(self, provider_id: int) -> Dict[str, Any]: """Fetch detailed information for a specific provider.""" url = f"{self.base_url}/providers/{provider_id}" - params = {"api_key": self.api_key} + params: Dict[str, str] = {"api_key": self.api_key} - data = self._make_request(url, params) + response = requests.get(url, headers=self.headers, params=params) + response.raise_for_status() + raw_data: Any = response.json() + # Create a properly typed dictionary + data: Dict[str, Any] = dict(raw_data) + + # Map provider type in the response if "type" in data: data["type"] = self.map_provider_type(data["type"]) @@ -121,139 +160,57 @@ def fetch_provider_details(self, provider_id: int) -> Dict[str, Any]: def collect_provider_ids(self, lat: float, long: float, radius: float) -> List[int]: """Collect all provider IDs from paginated results.""" - provider_ids: List[int] = [] + provider_ids = [] page = 1 + # Fetch first page to get total pages initial_response = self.fetch_providers_list(lat, long, radius, page) total_pages = initial_response["pages"]["total_pages"] - print(f"Total pages to process: {total_pages}") + logger.info(f"Total pages to process: {total_pages}") + # Process all pages while page <= total_pages: - print(f"Fetching page {page}/{total_pages}") - try: - response = self.fetch_providers_list(lat, long, radius, page) - provider_ids.extend(p["id"] for p in response["providers"]) - page += 1 - time.sleep(0.5) # Rate limiting - except Exception as e: - print(f"Error on page {page}: {e}. Retrying...") - time.sleep(2) # Wait before retry - continue + logger.info(f"Fetching page {page}/{total_pages}") + response = self.fetch_providers_list(lat, long, radius, page) + + # Extract provider IDs from current page + for provider in response["providers"]: + provider_ids.append(provider["id"]) + + page += 1 + time.sleep(0.5) # Rate limiting return provider_ids def fetch_all_provider_details( - self, provider_ids: List[int], output_dir: str + self, provider_ids: List[int], output_dir: Path ) -> None: """Fetch and save mapped provider details.""" - os.makedirs(output_dir, exist_ok=True) - output_file = os.path.join(output_dir, "data-00.json") - error_log = os.path.join(output_dir, "errors.log") + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + output_file = output_dir / "data-00.json" mapped_providers = [] - failed_providers = [] total_providers = len(provider_ids) for idx, provider_id in enumerate(provider_ids, 1): - print(f"Fetching provider {idx}/{total_providers} (ID: {provider_id})") + logger.info( + f"Fetching provider {idx}/{total_providers} (ID: {provider_id})" + ) try: provider_details = self.fetch_provider_details(provider_id) mapped_provider = map_empower_data_to_service(provider_details) - mapped_providers.append(mapped_provider) - time.sleep(0.25) + mapped_providers.append(mapped_provider.dict(exclude_none=True)) + time.sleep(0.25) # Rate limiting except Exception as e: - print(f"Error fetching provider {provider_id}: {e}") - failed_providers.append({"id": provider_id, "error": str(e)}) + logger.error(f"Failed to process provider {provider_id}: {e}") + continue - # Save successful providers with open(output_file, "w") as f: - json.dump(mapped_providers, f, indent=2) + json.dump(mapped_providers, f, indent=2, default=str) - # Save failed providers - if failed_providers: - with open(error_log, "w") as f: - json.dump(failed_providers, f, indent=2) - - print(f"Saved {len(mapped_providers)} provider details to {output_file}") - if failed_providers: - print(f"Failed to fetch {len(failed_providers)} providers. See {error_log}") - - -def map_empower_data_to_service(data: Dict[str, Any]) -> Dict[str, Any]: - """Map Empower API data to standardized Service format.""" - try: - # Convert coordinates to float - latitude = float(data.get("lat", 0)) - longitude = float(data.get("long", 0)) - except (ValueError, TypeError): - latitude = longitude = 0.0 - - # Map operating hours - regular_hours = [] - day_mapping = { - 0: "sunday", - 1: "monday", - 2: "tuesday", - 3: "wednesday", - 4: "thursday", - 5: "friday", - 6: "saturday", - } - - for hour in data.get("hours", []): - if all(key in hour for key in ["day", "is_open", "opentime", "closetime"]): - regular_hours.append( - { - "day": day_mapping[hour["day"]], - "is_open": hour["is_open"], - "is_24hour": hour.get("is_24hour", False), - "open_time": hour["opentime"], - "close_time": hour["closetime"], - } - ) - - # Map address - physical_address = { - "street1": data.get("address"), - "city": data.get("city"), - "province": data.get("province"), - "postal_code": data.get("postal_code"), - "country": "Canada", - } - - # Map phone numbers - phone_numbers = [] - if data.get("phone"): - phone_numbers.append({"number": data["phone"]}) - - return { - "id": data["id"], - "name": data["name"], - "service_type": data["type"], - "latitude": latitude, - "longitude": longitude, - "physical_address": physical_address, - "phone_numbers": phone_numbers, - "fax": data.get("fax"), - "email": data.get("email"), - "website": data.get("website"), - "description": data.get("description"), - "services": data.get("services", []), - "languages": data.get("languages", []), - "status": data.get("status"), - "regular_hours": regular_hours, - "hours_exceptions": data.get("hours_exceptions", []), - "timezone_offset": data.get("tzoffset"), - "wheelchair_accessible": data.get("wheelchair", "unknown"), - "parking_type": data.get("parking"), - "accepts_new_patients": data.get("new_patients", False), - "wait_time": data.get("wait_time"), - "has_online_booking": data.get("has_ebooking", False), - "can_book": data.get("can_book", False), - "data_source": "Empower", - "last_updated": datetime.now().isoformat(), - } + logger.info(f"Saved {len(mapped_providers)} provider details to {output_file}") def main() -> None: @@ -272,40 +229,31 @@ def main() -> None: help="Base URL for Empower API", ) parser.add_argument( - "--data-dir", default="./data/empower", help="Directory to save data" - ) - parser.add_argument( - "--lat", - type=float, - default=44.051507, - help="Latitude for search center", - ) - parser.add_argument( - "--long", - type=float, - default=-79.45811, - help="Longitude for search center", - ) - parser.add_argument( - "--radius", - type=float, - default=100, - help="Search radius in kilometers", + "--data-dir", + type=Path, + default=Path("./data/empower"), + help="Directory to save data", ) args = parser.parse_args() if not args.api_key: - raise ValueError("EMPOWER_API_KEY is not set") + raise ValueError("EMPOWER_API_KEY environment variable is not set") fetcher = EmpowerDataFetcher(args.api_key, args.base_url) + # Parameters for the search + lat = 44.051507 + long = -79.45811 + radius = 100 # km + try: - provider_ids = fetcher.collect_provider_ids(args.lat, args.long, args.radius) - print(f"Collected {len(provider_ids)} provider IDs") + provider_ids = fetcher.collect_provider_ids(lat, long, radius) + logger.info(f"Collected {len(provider_ids)} provider IDs") fetcher.fetch_all_provider_details(provider_ids, args.data_dir) except Exception as e: - print(f"Fatal error occurred: {e}") + logger.error(f"Fatal error occurred: {e}") + raise if __name__ == "__main__": From 231840baebcf12f4f173f5d36fe2f520a592f1fc Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 13:32:21 -0500 Subject: [PATCH 10/13] Add docstring for phonenumber dataclass --- health_rec/api/data.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/health_rec/api/data.py b/health_rec/api/data.py index 2d53777..54207ec 100644 --- a/health_rec/api/data.py +++ b/health_rec/api/data.py @@ -15,7 +15,21 @@ class PhoneNumber(BaseModel): - """Phone number with metadata.""" + """Phone number with metadata. + + Attributes + ---------- + number : str + The phone number. + type : Optional[str] + The type of the phone number, e.g., fax, toll-free, primary, secondary, etc. + name : Optional[str] + Any name associated with the phone number. + description : Optional[str] + The description of the phone number. + extension : Optional[str] + The extension of the phone number. + """ number: str type: Optional[str] = None From c3c61593b3d04aaf1b0ae79293913451d771733f Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 13:39:07 -0500 Subject: [PATCH 11/13] Add rerank attribute to the Query dataclass --- health_rec/api/data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/health_rec/api/data.py b/health_rec/api/data.py index 54207ec..300c1fb 100644 --- a/health_rec/api/data.py +++ b/health_rec/api/data.py @@ -198,12 +198,15 @@ class Query(BaseModel): The latitude coordinate of the user. radius : Optional[float] The radius of the search. + rerank : Optional[bool] + Whether to rerank the recommendations. """ query: str latitude: Optional[float] = Field(default=None) longitude: Optional[float] = Field(default=None) radius: Optional[float] = Field(default=None) + rerank: Optional[bool] = Field(default=False) class RefineRequest(BaseModel): From 0a28271d5ea0debfd69055314ee785b2b7cc2bc4 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 13:51:38 -0500 Subject: [PATCH 12/13] Add script to generate dummy test data --- scripts/generate_test_data.py | 132 ++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 scripts/generate_test_data.py diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py new file mode 100644 index 0000000..25c19a3 --- /dev/null +++ b/scripts/generate_test_data.py @@ -0,0 +1,132 @@ +"""Generate test data for the health recommendation system.""" + +import argparse +import json +import logging +import random +from datetime import datetime +from pathlib import Path +from typing import Any, Dict + +from api.data import Address, PhoneNumber, Service + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +def generate_dummy_service(service_id: int) -> Service: + """Generate a dummy service with random data around Toronto.""" + # Generate random coordinates around Toronto + latitude = random.uniform(43.5, 43.9) + longitude = random.uniform(-79.7, -79.3) + + # Generate random phone number + phone = PhoneNumber( + number=f"{random.randint(100, 999)}-{random.randint(100, 999)}-{random.randint(1000, 9999)}", + type="primary", + ) + + # Generate random address + address = Address( + street1=f"{random.randint(1, 999)} {random.choice(['Main', 'Queen', 'King', 'Yonge'])} Street", + city="Toronto", + province="ON", + postal_code=f"M{random.randint(1, 9)}{random.choice(['A', 'B', 'C', 'D', 'E'])}{random.randint(0, 9)} {random.choice(['A', 'B', 'C', 'D', 'E'])}{random.randint(0, 9)}{random.randint(0, 9)}", + country="Canada", + ) + + # Generate random service type + service_types = [ + "Medical Clinic", + "Mental Health Center", + "Community Support", + "Emergency Services", + "Addiction Services", + ] + service_type = random.choice(service_types) + + # Generate metadata + metadata: Dict[str, Any] = { + "type": service_type, + "languages": ["English", "French"], + "hours": [ + { + "day": day, + "is_open": True, + "open_time": "09:00", + "close_time": "17:00", + "is_24hour": False, + } + for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"] + ], + "website": f"https://example-{service_id}.com", + "wheelchair_accessible": random.choice([True, False]), + "accepts_new_patients": random.choice([True, False]), + } + + return Service( + id=str(service_id), + name=f"Test Service {service_id}", + description=f"This is a test service {service_id} providing {service_type.lower()} services.", + latitude=latitude, + longitude=longitude, + phone_numbers=[phone], + address=address, + email=f"service{service_id}@example.com", + metadata=metadata, + last_updated=datetime.now(), + ) + + +def create_test_data(num_services: int, output_dir: Path) -> None: + """Create test data and save to JSON file.""" + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + output_file = output_dir / "data-00.json" + + # Generate services + logger.info(f"Generating {num_services} test services...") + services = [generate_dummy_service(i) for i in range(1, num_services + 1)] + + # Convert to JSON-serializable format + json_services = [service.dict(exclude_none=True) for service in services] + + # Save to file + with open(output_file, "w") as f: + json.dump(json_services, f, indent=2, default=str) + + logger.info(f"Successfully saved {len(services)} services to {output_file}") + + +def main() -> None: + """Main function to run the script.""" + parser = argparse.ArgumentParser( + description="Generate test data for the health recommendation system." + ) + parser.add_argument( + "--num-services", + type=int, + default=320, + help="Number of test services to generate", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path("./data/test_data"), + help="Directory to save the test data", + ) + + args = parser.parse_args() + + try: + create_test_data(args.num_services, args.output_dir) + except Exception as e: + logger.error(f"Error generating test data: {e}") + raise + + +if __name__ == "__main__": + main() From 0f45d8d199a4e3e3abb023ff5aec0f3081b039d4 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Tue, 12 Nov 2024 13:59:14 -0500 Subject: [PATCH 13/13] Update the documentation about generating test data --- README.md | 27 ++++++++++++++++++++++----- docs/source/index.md | 27 ++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f7e4ecb..692dda3 100644 --- a/README.md +++ b/README.md @@ -82,18 +82,35 @@ docker compose --env-file .env.development --profile frontend -f docker-compose. ### 📥 Data setup -#### Download service data (211 API) +#### Test data -**GTA data** +If you want to test the system without real data, you can generate some dummy testing data: ```bash -python3 scripts/download_data.py --api-key $YOUR_211_API_KEY --dataset on --is-gta --data-dir +python3 scripts/generate_test_data.py ``` -**Ontario-wide data** +#### Download service data + +If you are using the 211 API or Empower's API, make sure you check with them to see if the API keys are +configured correctly for the geography of interest. + +**GTA data (211 API)** + +```bash +python3 scripts/download_211_data.py --api-key $YOUR_211_API_KEY --dataset on --is-gta --data-dir +``` + +**Ontario-wide data (211 API)** + +```bash +python3 scripts/download_211_data.py --api-key $YOUR_211_API_KEY --dataset on --data-dir +``` + +**Empower API data** ```bash -python3 scripts/download_data.py --api-key $YOUR_211_API_KEY --dataset on --data-dir +python3 scripts/download_empower_data.py --api-key $YOUR_EMPOWER_API_KEY --data-dir ``` #### Upload data and embeddings diff --git a/docs/source/index.md b/docs/source/index.md index 8655bb2..09f0df3 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -91,18 +91,35 @@ docker compose --env-file .env.development --profile frontend -f docker-compose. ### 📥 Data setup -#### Download service data (211 API) +#### Test data -**GTA data** +If you want to test the system without real data, you can generate some dummy testing data: ```bash -python3 scripts/download_data.py --api-key $YOUR_211_API_KEY --dataset on --is-gta --data-dir +python3 scripts/generate_test_data.py ``` -**Ontario-wide data** +#### Download service data + +If you are using the 211 API or Empower's API, make sure you check with them to see if the API keys are +configured correctly for the geography of interest. + +**GTA data (211 API)** + +```bash +python3 scripts/download_211_data.py --api-key $YOUR_211_API_KEY --dataset on --is-gta --data-dir +``` + +**Ontario-wide data (211 API)** + +```bash +python3 scripts/download_211_data.py --api-key $YOUR_211_API_KEY --dataset on --data-dir +``` + +**Empower API data** ```bash -python3 scripts/download_data.py --api-key $YOUR_211_API_KEY --dataset on --data-dir +python3 scripts/download_empower_data.py --api-key $YOUR_EMPOWER_API_KEY --data-dir ``` #### Upload data and embeddings