From ff151dd7292ab8cddd42e94cc239f22c0a241bf5 Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Wed, 5 Jun 2024 17:37:29 +0200 Subject: [PATCH] format code style --- src/imf_reader/__init__.py | 3 ++- src/imf_reader/config.py | 2 +- src/imf_reader/weo/parser.py | 48 ++++++++++++++++++++++++++--------- src/imf_reader/weo/reader.py | 23 +++++++++++++---- src/imf_reader/weo/scraper.py | 4 --- 5 files changed, 57 insertions(+), 23 deletions(-) diff --git a/src/imf_reader/__init__.py b/src/imf_reader/__init__.py index 430177e..612ae37 100644 --- a/src/imf_reader/__init__.py +++ b/src/imf_reader/__init__.py @@ -1,3 +1,4 @@ # read version from installed package from importlib.metadata import version -__version__ = version("imf_reader") \ No newline at end of file + +__version__ = version("imf_reader") diff --git a/src/imf_reader/config.py b/src/imf_reader/config.py index 11717be..33073b2 100644 --- a/src/imf_reader/config.py +++ b/src/imf_reader/config.py @@ -30,4 +30,4 @@ class UnexpectedFileError(Exception): shell_formatter = logging.Formatter(fmt_shell) # Create formatters shell_handler.setFormatter(shell_formatter) # Add formatters to handlers -logger.addHandler(shell_handler) # Add handlers to the logger \ No newline at end of file +logger.addHandler(shell_handler) # Add handlers to the logger diff --git a/src/imf_reader/weo/parser.py b/src/imf_reader/weo/parser.py index 3602d20..df13306 100644 --- a/src/imf_reader/weo/parser.py +++ b/src/imf_reader/weo/parser.py @@ -6,6 +6,7 @@ from imf_reader.config import UnexpectedFileError, logger +# columns to map to labels and the schema element to look them up SDMX_FIELDS_TO_MAP = { "UNIT": "IMF.CL_WEO_UNIT.1.0", "CONCEPT": "IMF.CL_WEO_CONCEPT.1.0", @@ -15,11 +16,19 @@ } # numeric columns and the type to convert them to -SDMX_NUMERIC_COLUMNS = ["REF_AREA_CODE", "OBS_VALUE", "SCALE_CODE", "LASTACTUALDATE", "TIME_PERIOD"] +SDMX_NUMERIC_COLUMNS = [ + "REF_AREA_CODE", + "OBS_VALUE", + "SCALE_CODE", + "LASTACTUALDATE", + "TIME_PERIOD", +] class SDMXParser: - """Class to parse SDMX data""" + """Class to parse SDMX data + To use this class, call the parse method with the folder containing the SDMX files. + """ @staticmethod def parse_xml(tree: ET.ElementTree) -> pd.DataFrame: @@ -42,7 +51,9 @@ def parse_xml(tree: ET.ElementTree) -> pd.DataFrame: return pd.DataFrame(rows) @staticmethod - def lookup_schema_element(schema_tree: ET.ElementTree, field_name) -> dict[str, str]: + def lookup_schema_element( + schema_tree: ET.ElementTree, field_name + ) -> dict[str, str]: """Lookup the elements in the schema and find the label for a given label_name. Args: @@ -64,7 +75,9 @@ def lookup_schema_element(schema_tree: ET.ElementTree, field_name) -> dict[str, return lookup_dict @staticmethod - def add_label_columns(data_df: pd.DataFrame, schema_tree: ET.ElementTree) -> pd.DataFrame: + def add_label_columns( + data_df: pd.DataFrame, schema_tree: ET.ElementTree + ) -> pd.DataFrame: """Maps columns with codes to columns with labels and renames the code columns. Args: @@ -94,10 +107,14 @@ def check_folder(sdmx_folder: ZipFile) -> None: """ if len([file for file in sdmx_folder.namelist() if file.endswith(".xml")]) != 1: - raise UnexpectedFileError("There should be exactly one xml file in the folder") + raise UnexpectedFileError( + "There should be exactly one xml file in the folder" + ) if len([file for file in sdmx_folder.namelist() if file.endswith(".xsd")]) != 1: - raise UnexpectedFileError("There should be exactly one xsd file in the folder") + raise UnexpectedFileError( + "There should be exactly one xsd file in the folder" + ) logger.debug("Zip folder check passed") @@ -109,10 +126,9 @@ def clean_numeric_columns(df: pd.DataFrame) -> pd.DataFrame: """ - df[SDMX_NUMERIC_COLUMNS] = (df[SDMX_NUMERIC_COLUMNS] - .replace(["n/a", "--"], pd.NA) - .apply(pd.to_numeric) - ) + df[SDMX_NUMERIC_COLUMNS] = ( + df[SDMX_NUMERIC_COLUMNS].replace(["n/a", "--"], pd.NA).apply(pd.to_numeric) + ) return df @@ -130,8 +146,16 @@ def parse(sdmx_folder: ZipFile) -> pd.DataFrame: SDMXParser.check_folder(sdmx_folder) # Get the data and schema trees - data_tree = ET.parse(sdmx_folder.open([file for file in sdmx_folder.namelist() if file.endswith(".xml")][0])) - schema_tree = ET.parse(sdmx_folder.open([file for file in sdmx_folder.namelist() if file.endswith(".xsd")][0])) + data_tree = ET.parse( + sdmx_folder.open( + [file for file in sdmx_folder.namelist() if file.endswith(".xml")][0] + ) + ) + schema_tree = ET.parse( + sdmx_folder.open( + [file for file in sdmx_folder.namelist() if file.endswith(".xsd")][0] + ) + ) # Parse and clean the data data = SDMXParser.parse_xml(data_tree) # Parse the xml data diff --git a/src/imf_reader/weo/reader.py b/src/imf_reader/weo/reader.py index a6ba56a..d614039 100644 --- a/src/imf_reader/weo/reader.py +++ b/src/imf_reader/weo/reader.py @@ -26,7 +26,9 @@ def validate_version(version: Tuple) -> Version: """ if not isinstance(version, tuple) or len(version) != 2: - raise TypeError("Invalid version. Must be a tuple of month ('April' or 'October') and year") + raise TypeError( + "Invalid version. Must be a tuple of month ('April' or 'October') and year" + ) # check that the month is either April or October month = version[0].strip().capitalize() @@ -94,11 +96,20 @@ def roll_back_version(version: Version) -> Version: @lru_cache def _fetch(version: Version) -> pd.DataFrame: - """Helper function which handles caching and fetching the data from the IMF website""" + """Helper function which handles caching and fetching the data from the IMF website + + Args: + version: The version of the WEO data to fetch + + Returns: + A pandas DataFrame containing the WEO data + """ folder = SDMXScraper.scrape(*version) # scrape the data and get the SDMX files df = SDMXParser.parse(folder) # parse the SDMX files into a DataFrame - logger.debug(f"Data scraped and parsed successfully for version {version[0]} {version[1]}") + logger.debug( + f"Data scraped and parsed successfully for version {version[0]} {version[1]}" + ) return df @@ -144,7 +155,9 @@ def fetch_data(version: Optional[Version] = None) -> pd.DataFrame: # if no data is found for the expected latest version, roll back once and try again except NoDataError: - logger.debug(f"No data found for the expected latest version {latest_version[0]} {latest_version[1]}." - f" Rolling back version") + logger.debug( + f"No data found for the expected latest version {latest_version[0]} {latest_version[1]}." + f" Rolling back version" + ) latest_version = roll_back_version(latest_version) return fetch_data(latest_version) diff --git a/src/imf_reader/weo/scraper.py b/src/imf_reader/weo/scraper.py index 7f0cd10..cdd76e4 100644 --- a/src/imf_reader/weo/scraper.py +++ b/src/imf_reader/weo/scraper.py @@ -121,7 +121,3 @@ def scrape(month: str, year: str | int) -> ZipFile: sdmx_folder = SDMXScraper.get_sdmx_folder(sdmx_url) return sdmx_folder - - - -