Skip to content

Commit

Permalink
format code style
Browse files Browse the repository at this point in the history
  • Loading branch information
lpicci96 committed Jun 5, 2024
1 parent 25c75c3 commit ff151dd
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 23 deletions.
3 changes: 2 additions & 1 deletion src/imf_reader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# read version from installed package
from importlib.metadata import version
__version__ = version("imf_reader")

__version__ = version("imf_reader")
2 changes: 1 addition & 1 deletion src/imf_reader/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ class UnexpectedFileError(Exception):

shell_formatter = logging.Formatter(fmt_shell) # Create formatters
shell_handler.setFormatter(shell_formatter) # Add formatters to handlers
logger.addHandler(shell_handler) # Add handlers to the logger
logger.addHandler(shell_handler) # Add handlers to the logger
48 changes: 36 additions & 12 deletions src/imf_reader/weo/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from imf_reader.config import UnexpectedFileError, logger

# columns to map to labels and the schema element to look them up
SDMX_FIELDS_TO_MAP = {
"UNIT": "IMF.CL_WEO_UNIT.1.0",
"CONCEPT": "IMF.CL_WEO_CONCEPT.1.0",
Expand All @@ -15,11 +16,19 @@
}

# numeric columns and the type to convert them to
SDMX_NUMERIC_COLUMNS = ["REF_AREA_CODE", "OBS_VALUE", "SCALE_CODE", "LASTACTUALDATE", "TIME_PERIOD"]
SDMX_NUMERIC_COLUMNS = [
"REF_AREA_CODE",
"OBS_VALUE",
"SCALE_CODE",
"LASTACTUALDATE",
"TIME_PERIOD",
]


class SDMXParser:
"""Class to parse SDMX data"""
"""Class to parse SDMX data
To use this class, call the parse method with the folder containing the SDMX files.
"""

@staticmethod
def parse_xml(tree: ET.ElementTree) -> pd.DataFrame:
Expand All @@ -42,7 +51,9 @@ def parse_xml(tree: ET.ElementTree) -> pd.DataFrame:
return pd.DataFrame(rows)

@staticmethod
def lookup_schema_element(schema_tree: ET.ElementTree, field_name) -> dict[str, str]:
def lookup_schema_element(
schema_tree: ET.ElementTree, field_name
) -> dict[str, str]:
"""Lookup the elements in the schema and find the label for a given label_name.
Args:
Expand All @@ -64,7 +75,9 @@ def lookup_schema_element(schema_tree: ET.ElementTree, field_name) -> dict[str,
return lookup_dict

@staticmethod
def add_label_columns(data_df: pd.DataFrame, schema_tree: ET.ElementTree) -> pd.DataFrame:
def add_label_columns(
data_df: pd.DataFrame, schema_tree: ET.ElementTree
) -> pd.DataFrame:
"""Maps columns with codes to columns with labels and renames the code columns.
Args:
Expand Down Expand Up @@ -94,10 +107,14 @@ def check_folder(sdmx_folder: ZipFile) -> None:
"""

if len([file for file in sdmx_folder.namelist() if file.endswith(".xml")]) != 1:
raise UnexpectedFileError("There should be exactly one xml file in the folder")
raise UnexpectedFileError(
"There should be exactly one xml file in the folder"
)

if len([file for file in sdmx_folder.namelist() if file.endswith(".xsd")]) != 1:
raise UnexpectedFileError("There should be exactly one xsd file in the folder")
raise UnexpectedFileError(
"There should be exactly one xsd file in the folder"
)

logger.debug("Zip folder check passed")

Expand All @@ -109,10 +126,9 @@ def clean_numeric_columns(df: pd.DataFrame) -> pd.DataFrame:
"""

df[SDMX_NUMERIC_COLUMNS] = (df[SDMX_NUMERIC_COLUMNS]
.replace(["n/a", "--"], pd.NA)
.apply(pd.to_numeric)
)
df[SDMX_NUMERIC_COLUMNS] = (
df[SDMX_NUMERIC_COLUMNS].replace(["n/a", "--"], pd.NA).apply(pd.to_numeric)
)

return df

Expand All @@ -130,8 +146,16 @@ def parse(sdmx_folder: ZipFile) -> pd.DataFrame:
SDMXParser.check_folder(sdmx_folder)

# Get the data and schema trees
data_tree = ET.parse(sdmx_folder.open([file for file in sdmx_folder.namelist() if file.endswith(".xml")][0]))
schema_tree = ET.parse(sdmx_folder.open([file for file in sdmx_folder.namelist() if file.endswith(".xsd")][0]))
data_tree = ET.parse(
sdmx_folder.open(
[file for file in sdmx_folder.namelist() if file.endswith(".xml")][0]
)
)
schema_tree = ET.parse(
sdmx_folder.open(
[file for file in sdmx_folder.namelist() if file.endswith(".xsd")][0]
)
)

# Parse and clean the data
data = SDMXParser.parse_xml(data_tree) # Parse the xml data
Expand Down
23 changes: 18 additions & 5 deletions src/imf_reader/weo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def validate_version(version: Tuple) -> Version:
"""

if not isinstance(version, tuple) or len(version) != 2:
raise TypeError("Invalid version. Must be a tuple of month ('April' or 'October') and year")
raise TypeError(
"Invalid version. Must be a tuple of month ('April' or 'October') and year"
)

# check that the month is either April or October
month = version[0].strip().capitalize()
Expand Down Expand Up @@ -94,11 +96,20 @@ def roll_back_version(version: Version) -> Version:

@lru_cache
def _fetch(version: Version) -> pd.DataFrame:
"""Helper function which handles caching and fetching the data from the IMF website"""
"""Helper function which handles caching and fetching the data from the IMF website
Args:
version: The version of the WEO data to fetch
Returns:
A pandas DataFrame containing the WEO data
"""

folder = SDMXScraper.scrape(*version) # scrape the data and get the SDMX files
df = SDMXParser.parse(folder) # parse the SDMX files into a DataFrame
logger.debug(f"Data scraped and parsed successfully for version {version[0]} {version[1]}")
logger.debug(
f"Data scraped and parsed successfully for version {version[0]} {version[1]}"
)
return df


Expand Down Expand Up @@ -144,7 +155,9 @@ def fetch_data(version: Optional[Version] = None) -> pd.DataFrame:

# if no data is found for the expected latest version, roll back once and try again
except NoDataError:
logger.debug(f"No data found for the expected latest version {latest_version[0]} {latest_version[1]}."
f" Rolling back version")
logger.debug(
f"No data found for the expected latest version {latest_version[0]} {latest_version[1]}."
f" Rolling back version"
)
latest_version = roll_back_version(latest_version)
return fetch_data(latest_version)
4 changes: 0 additions & 4 deletions src/imf_reader/weo/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,3 @@ def scrape(month: str, year: str | int) -> ZipFile:
sdmx_folder = SDMXScraper.get_sdmx_folder(sdmx_url)

return sdmx_folder




0 comments on commit ff151dd

Please sign in to comment.