From caee7df576a38e23c00e582b4cde2344310fca86 Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 14:49:32 +0100 Subject: [PATCH 01/13] move make_request function to utils module --- src/imf_reader/utils.py | 25 +++++++++++++++++++++++++ src/imf_reader/weo/scraper.py | 24 +----------------------- 2 files changed, 26 insertions(+), 23 deletions(-) create mode 100644 src/imf_reader/utils.py diff --git a/src/imf_reader/utils.py b/src/imf_reader/utils.py new file mode 100644 index 0000000..6a301cb --- /dev/null +++ b/src/imf_reader/utils.py @@ -0,0 +1,25 @@ +"""Utility functions""" + +import requests + +def make_request(url: str) -> requests.models.Response: + """Make a request to a url. + + Args: + url: url to make request to + + Returns: + requests.models.Response: response object + """ + + try: + response = requests.get(url) + if response.status_code != 200: + raise ConnectionError( + f"Could not connect to {url}. Status code: {response.status_code}" + ) + + return response + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") \ No newline at end of file diff --git a/src/imf_reader/weo/scraper.py b/src/imf_reader/weo/scraper.py index cdd76e4..e6db2d5 100644 --- a/src/imf_reader/weo/scraper.py +++ b/src/imf_reader/weo/scraper.py @@ -6,33 +6,11 @@ from zipfile import ZipFile, BadZipFile from imf_reader.config import NoDataError, logger +from imf_reader.utils import make_request BASE_URL = "https://www.imf.org/" -def make_request(url: str) -> requests.models.Response: - """Make a request to a url. - - Args: - url: url to make request to - - Returns: - requests.models.Response: response object - """ - - try: - response = requests.get(url) - if response.status_code != 200: - raise ConnectionError( - f"Could not connect to {url}. Status code: {response.status_code}" - ) - - return response - - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") - - def get_soup(month: str, year: str | int) -> BeautifulSoup: """Get the BeautifulSoup object of the IMF WEO website. From bea601a6b020ddcb3c9f761b6064a86798c884fd Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 14:53:32 +0100 Subject: [PATCH 02/13] refactor tests --- tests/test_utils.py | 32 ++++++++++++++++++++++++++++++++ tests/test_weo/test_scraper.py | 23 ----------------------- 2 files changed, 32 insertions(+), 23 deletions(-) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..145b21b --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,32 @@ +"""Tests for the utils module""" + +import pytest +from unittest.mock import patch +import requests + +from imf_reader import utils + + +TEST_URL = "https://test.com" + + +def test_make_request(): + """Test make_request""" + + # test successful request + with patch("requests.get") as mock_get: + mock_get.return_value.status_code = 200 + response = utils.make_request(TEST_URL) + assert response == mock_get.return_value + + # test failed request + with patch("requests.get") as mock_get: + mock_get.side_effect = requests.exceptions.RequestException + with pytest.raises(ConnectionError, match="Could not connect to"): + utils.make_request(TEST_URL) + + # test when status code is not 200 + with patch("requests.get") as mock_get: + mock_get.return_value.status_code = 404 + with pytest.raises(ConnectionError, match="Could not connect to"): + utils.make_request(TEST_URL) \ No newline at end of file diff --git a/tests/test_weo/test_scraper.py b/tests/test_weo/test_scraper.py index 52e9f0a..3ef8841 100644 --- a/tests/test_weo/test_scraper.py +++ b/tests/test_weo/test_scraper.py @@ -2,7 +2,6 @@ import pytest from unittest.mock import patch, Mock -import requests from bs4 import BeautifulSoup import io from zipfile import ZipFile, BadZipFile @@ -14,28 +13,6 @@ TEST_URL = "https://test.com" -def test_make_request(): - """Test make_request""" - - # test successful request - with patch("requests.get") as mock_get: - mock_get.return_value.status_code = 200 - response = scraper.make_request(TEST_URL) - assert response == mock_get.return_value - - # test failed request - with patch("requests.get") as mock_get: - mock_get.side_effect = requests.exceptions.RequestException - with pytest.raises(ConnectionError, match="Could not connect to"): - scraper.make_request(TEST_URL) - - # test when status code is not 200 - with patch("requests.get") as mock_get: - mock_get.return_value.status_code = 404 - with pytest.raises(ConnectionError, match="Could not connect to"): - scraper.make_request(TEST_URL) - - def test_get_soup(): """Test get_soup""" From 31316d5ddf976a9019e0243e684460c78100589b Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 14:55:01 +0100 Subject: [PATCH 03/13] setup sdr module --- src/imf_reader/sdr/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/imf_reader/sdr/__init__.py diff --git a/src/imf_reader/sdr/__init__.py b/src/imf_reader/sdr/__init__.py new file mode 100644 index 0000000..8f8a156 --- /dev/null +++ b/src/imf_reader/sdr/__init__.py @@ -0,0 +1 @@ +"""Special Drawing Rights (SDR) reader module.""" \ No newline at end of file From 97dbfdba36e0906cc583e2a192e62e6d12442237 Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 17:07:48 +0100 Subject: [PATCH 04/13] add functionality to get sdr announcements --- src/imf_reader/sdr/read.py | 101 +++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/imf_reader/sdr/read.py diff --git a/src/imf_reader/sdr/read.py b/src/imf_reader/sdr/read.py new file mode 100644 index 0000000..ece6e2c --- /dev/null +++ b/src/imf_reader/sdr/read.py @@ -0,0 +1,101 @@ +"""Module to get SDR data from the IMF website + + +info: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + +""" +from functools import lru_cache +import pandas as pd +import calendar +from bs4 import BeautifulSoup +from datetime import datetime + +from imf_reader.utils import make_request +from imf_reader.config import logger, NoDataError + +BASE_URL = "https://www.imf.org/external/np/fin/tad/" +MAIN_PAGE_URL = "https://www.imf.org/external/np/fin/tad/extsdr1.aspx" + + + +def read_tsv(url: str) -> pd.DataFrame: + """Read a tsv file from a url and return a dataframe""" + + try: + return pd.read_csv(url, delimiter="/t", engine="python") + + except pd.errors.ParserError: + raise ValueError("SDR _data not available for this date") + + +def clean_df(df: pd.DataFrame) -> pd.DataFrame: + """Clean the SDR dataframe""" + + df = df.iloc[3:, 0].str.split("\t", expand=True) + df.columns = ["entity", "holdings", "allocations"] + + return ( + df + .assign(holdings = lambda d: pd.to_numeric(d.holdings.str.replace(r"[^\d.]", "", regex=True), errors="coerce"), + allocations = lambda d: pd.to_numeric(d.allocations.str.replace(r"[^\d.]", "", regex=True), errors="coerce") + ) + .melt(id_vars="entity", value_vars=["holdings", "allocations"], var_name = "indicator") + ) + +def format_date(month: int, year: int) -> str: + """Return a date as year-month-day where day is the last day in the month + """ + + last_day = calendar.monthrange(year, month)[1] + return f"{year}-{month}-{last_day}" + + +@lru_cache +def get_data(year: int, month: int): + """Get sdr allocations and holdings data for a given month and year""" + + date = format_date(month, year) + url = f"{BASE_URL}extsdr2.aspx?date1key={date}&tsvflag=Y" + + logger.info(f"Fetching SDR data for date: {date}") + + df = read_tsv(url) + df = clean_df(df) + df["date"] = pd.to_datetime(date) + + return df + + +@lru_cache +def get_latest_date() -> tuple[int, int]: + """Get the latest date for which SDR data is available""" + + logger.info("Fetching latest date") + + response = make_request(MAIN_PAGE_URL) + soup = BeautifulSoup(response.content, "html.parser") + table = soup.find_all("table")[4] + row = table.find_all("tr")[1] + + date = row.td.text.strip() + date = datetime.strptime(date, '%B %d, %Y') + + # Extract the year and month as a tuple + return date.year, date.month + + +def fetch_data(date: tuple[int, int] | None = None) -> pd.DataFrame: + """Fetch SDR holdings and allocations data for a given date + + Args: + date: A tuple of year and month. If None, the latest date is used + + returns: + pd.DataFrame: A dataframe with the SDR data + """ + + if date is None: + date = get_latest_date() + + return get_data(*date) + From 3dfbf8aa7a49bedf53ef5fa30862b9a21d66519f Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 17:23:30 +0100 Subject: [PATCH 05/13] rename module --- src/imf_reader/sdr/{read.py => read_announcements.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/imf_reader/sdr/{read.py => read_announcements.py} (100%) diff --git a/src/imf_reader/sdr/read.py b/src/imf_reader/sdr/read_announcements.py similarity index 100% rename from src/imf_reader/sdr/read.py rename to src/imf_reader/sdr/read_announcements.py From 8cf43bf0fb13e40f9c08deb1c942f6bacecd19ac Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 21:05:45 +0100 Subject: [PATCH 06/13] add interest rate reader --- src/imf_reader/sdr/read_interest_rate.py | 100 +++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 src/imf_reader/sdr/read_interest_rate.py diff --git a/src/imf_reader/sdr/read_interest_rate.py b/src/imf_reader/sdr/read_interest_rate.py new file mode 100644 index 0000000..ecfd500 --- /dev/null +++ b/src/imf_reader/sdr/read_interest_rate.py @@ -0,0 +1,100 @@ +"""Module to read SDR interest and exchange rates from the IMF website + +""" + +import requests +import pandas as pd +import io +from functools import lru_cache + +from imf_reader.config import logger + + +BASE_URL: str = "https://www.imf.org/external/np/fin/data/sdr_ir.aspx" + + +def read_data(): + """Read the data from the IMF website""" + + data = { + '__EVENTTARGET': 'lbnTSV', + } + + try: + response = requests.post(BASE_URL, data=data) + response.raise_for_status() + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") + + try: + return pd.read_csv(io.BytesIO(response.content), delimiter="/t", engine="python") + + except pd.errors.ParserError as e: + raise ValueError(f"Could not parse data. Error: {str(e)}") + + +def clean_data(df: pd.DataFrame) -> pd.DataFrame: + """Cleaning/parsing steps for the data. split tab separated value into separate columns, rename columns, assign types, and additional formatting + """ + columns = {"Effective from": "effective_from", + "Effective to": "effective_to", + } + + df = df.iloc[:, 0].str.split("\t", expand=True) + df.columns = df.iloc[0] + df = df.iloc[1:] + + return (df + .rename(columns = columns) + .loc[:, columns.values()] + .dropna(subset=["effective_to"]) + .pipe(_format_data) + .assign(interest_rate = lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), + effective_from = lambda d: pd.to_datetime(d.effective_from), + effective_to = lambda d: pd.to_datetime(d.effective_to) + ) + ) + + +def _format_data(df: pd.DataFrame) -> pd.DataFrame: + """Format the data. Only keep the rows for the SDR interest rate and add the dates + """ + + dates_df = (df + .loc[lambda d: ~ d['effective_from'].isin(["SDR Interest Rate", "Total", "Floor for SDR Interest Rate"])] + .drop_duplicates() + .reset_index(drop=True) + ) + + sdr_df = (df + .loc[lambda d: d.effective_from == "SDR Interest Rate"] + .iloc[:, 1:2] + .reset_index(drop=True) + ) + + sdr_df.columns = ['interest_rate'] + + return sdr_df.join(dates_df) + + + +@lru_cache +def fetch_interest_rate() -> pd.DataFrame: + """Fetch the historic SDR interest rates from the IMF + + The SDR interest rate is based on the sum of the multiplicative products in SDR terms of the currency + amounts in the SDR valuation basket, the level of the interest rate on the financial + instrument of each component currency in the basket, and the exchange rate of each currency + against the SDR. The SDR interest rate for the current week is released on Sunday morning, Washington D.C. time. + + returns: + A DataFrame with the historical SDR interest rates + """ + + logger.info("Fetching SDR interest rates") + + df = read_data() + df = clean_data(df) + + return df From a51ff8b0fcf378bb8de8a39e8cdd35ee9f4e6d77 Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 21:35:26 +0100 Subject: [PATCH 07/13] add exchange rate reader --- src/imf_reader/sdr/read_exchange_rate.py | 91 ++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 src/imf_reader/sdr/read_exchange_rate.py diff --git a/src/imf_reader/sdr/read_exchange_rate.py b/src/imf_reader/sdr/read_exchange_rate.py new file mode 100644 index 0000000..fe93463 --- /dev/null +++ b/src/imf_reader/sdr/read_exchange_rate.py @@ -0,0 +1,91 @@ +"""Module to read exchange rate data from the IMF's Special Drawing Rights (SDR) Valuation dataset. + +Read about SDR valuation at: https://www.imf.org/external/np/fin/data/rms_sdrv.aspx +""" + +import requests +import pandas as pd +import io +from functools import lru_cache +from typing import Literal + +from imf_reader.config import logger + + +BASE_URL = "https://www.imf.org/external/np/fin/data/rms_sdrv.aspx" + + +def read_data(): + """Read the data from the IMF website""" + + data = { + '__EVENTTARGET': 'lbnTSV', + } + + try: + response = requests.post(BASE_URL, data=data) + response.raise_for_status() + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") + + try: + return pd.read_csv(io.BytesIO(response.content), delimiter="/t", engine="python") + + except pd.errors.ParserError as e: + raise ValueError(f"Could not parse data. Error: {str(e)}") + + +def parse_data(df: pd.DataFrame, unit_basis: Literal["SDR", "USD"]): + """Parse the data from the IMF website""" + + if unit_basis == "USD": + col_val = "U.S.$1.00 = SDR" + elif unit_basis == "SDR": + col_val = "SDR1 = US$" + else: + raise ValueError("unit_basis must be either 'SDR' or 'USD'") + + df = df.iloc[:, 0].str.split("\t", expand=True) + df.columns = df.iloc[0] + df = df.iloc[1:] + + exchange_series = (df + .loc[lambda d: d["Report date"] == col_val] + .iloc[:, 1] + .reset_index(drop=True) + ) + + dates_series = (df + .dropna(subset = df.columns[3]) + .iloc[:, 0] + .drop_duplicates() + .reset_index(drop=True) + ) + + return (pd.DataFrame({'date': dates_series, "exchange_rate": exchange_series}) + .assign(date = lambda d: pd.to_datetime(d.date), + exchange_rate = lambda d: pd.to_numeric(d.exchange_rate, errors="coerce") + ) + ) + + +@lru_cache +def fetch_exchange_rates(unit_basis: Literal["SDR", "USD"] = "SDR") -> pd.DataFrame: + """Fetch the historic SDR exchange rates from the IMF + + The currency value of the SDR is determined by summing the values in U.S. dollars, based on market exchange rates, of a basket of major currencies (the U.S. dollar, Euro, Japanese yen, pound sterling and the Chinese renminbi). The SDR currency value is calculated daily except on IMF holidays, or whenever the IMF is closed for business, or on an ad-hoc basis to facilitate unscheduled IMF operations. The SDR valuation basket is reviewed and adjusted every five years. + + Read more at: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + + Args: + unit_basis: The unit basis for the exchange rate. Default is "SDR" i.e. 1 SDR in USD. Other option is "USD" i.e. 1 USD in SDR + + Returns: + A DataFrame with the exchange rate data + """ + + logger.info("Fetching exchange rate data") + + df = read_data() + return parse_data(df, unit_basis) \ No newline at end of file From 25b33eb7f04200676b6496d43ae15d1cd40f3afb Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Fri, 29 Nov 2024 21:51:08 +0100 Subject: [PATCH 08/13] update documentation --- src/imf_reader/sdr/__init__.py | 53 +++++++++++++++++++++++- src/imf_reader/sdr/read_announcements.py | 4 +- src/imf_reader/sdr/read_interest_rate.py | 2 +- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/imf_reader/sdr/__init__.py b/src/imf_reader/sdr/__init__.py index 8f8a156..c1d0c6b 100644 --- a/src/imf_reader/sdr/__init__.py +++ b/src/imf_reader/sdr/__init__.py @@ -1 +1,52 @@ -"""Special Drawing Rights (SDR) reader module.""" \ No newline at end of file +"""Special Drawing Rights (SDR) reader module. + +This module offers access to the IMF's Special Drawing Rights (SDR) data. +The SDR is an international reserve asset created by the IMF in 1969. +It is not a currency, but the holder of SDRs can exchange them for usable currencies in times of need. + +Read more about SDRs at: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + + +Usage: + +Import the module + +```python +from imf_reader import sdr +``` + +Read allocations and holdings data + +```python +sdr.fetch_allocations_holdings() +``` +SDRs holdings and allocations are published at a monthly frequency. The function fetches the latest data available. + +To retrieve SDR holdings and allocations for a specific month and year, eg April 2021, pass the year and month as a tuple + +```python +sdr.fetch_allocations_holdings((2021, 4)) +``` + +Read interest rates + +```python +sdr.fetch_interest_rates() +``` + +Read exchange rates + +```python +sdr.fetch_exchange_rates() +``` +By default, the exchange rate is in USDs per 1 SDR. To get the exchange rate in SDRs per 1 USD, pass the unit basis as "USD" + +```python +sdr.fetch_exchange_rates("USD") +``` + +""" + +from imf_reader.sdr.read_interest_rate import fetch_interest_rates +from imf_reader.sdr.read_exchange_rate import fetch_exchange_rates +from imf_reader.sdr.read_announcements import fetch_allocations_holdings \ No newline at end of file diff --git a/src/imf_reader/sdr/read_announcements.py b/src/imf_reader/sdr/read_announcements.py index ece6e2c..8945f6b 100644 --- a/src/imf_reader/sdr/read_announcements.py +++ b/src/imf_reader/sdr/read_announcements.py @@ -84,11 +84,11 @@ def get_latest_date() -> tuple[int, int]: return date.year, date.month -def fetch_data(date: tuple[int, int] | None = None) -> pd.DataFrame: +def fetch_allocations_holdings(date: tuple[int, int] | None = None) -> pd.DataFrame: """Fetch SDR holdings and allocations data for a given date Args: - date: A tuple of year and month. If None, the latest date is used + date: A tuple of year and month e.g (2024, 11). If None, the latest data announcements released are fetched returns: pd.DataFrame: A dataframe with the SDR data diff --git a/src/imf_reader/sdr/read_interest_rate.py b/src/imf_reader/sdr/read_interest_rate.py index ecfd500..ab9ec24 100644 --- a/src/imf_reader/sdr/read_interest_rate.py +++ b/src/imf_reader/sdr/read_interest_rate.py @@ -80,7 +80,7 @@ def _format_data(df: pd.DataFrame) -> pd.DataFrame: @lru_cache -def fetch_interest_rate() -> pd.DataFrame: +def fetch_interest_rates() -> pd.DataFrame: """Fetch the historic SDR interest rates from the IMF The SDR interest rate is based on the sum of the multiplicative products in SDR terms of the currency From 73525549b02c87c16c3418c9a055740d9d755f66 Mon Sep 17 00:00:00 2001 From: Luca Picci Date: Mon, 2 Dec 2024 09:38:45 +0100 Subject: [PATCH 09/13] black --- src/imf_reader/sdr/__init__.py | 2 +- src/imf_reader/sdr/read_announcements.py | 24 +++++----- src/imf_reader/sdr/read_exchange_rate.py | 39 +++++++-------- src/imf_reader/sdr/read_interest_rate.py | 61 +++++++++++++----------- src/imf_reader/utils.py | 3 +- tests/test_utils.py | 2 +- 6 files changed, 70 insertions(+), 61 deletions(-) diff --git a/src/imf_reader/sdr/__init__.py b/src/imf_reader/sdr/__init__.py index c1d0c6b..87113c4 100644 --- a/src/imf_reader/sdr/__init__.py +++ b/src/imf_reader/sdr/__init__.py @@ -49,4 +49,4 @@ from imf_reader.sdr.read_interest_rate import fetch_interest_rates from imf_reader.sdr.read_exchange_rate import fetch_exchange_rates -from imf_reader.sdr.read_announcements import fetch_allocations_holdings \ No newline at end of file +from imf_reader.sdr.read_announcements import fetch_allocations_holdings diff --git a/src/imf_reader/sdr/read_announcements.py b/src/imf_reader/sdr/read_announcements.py index 8945f6b..be056d9 100644 --- a/src/imf_reader/sdr/read_announcements.py +++ b/src/imf_reader/sdr/read_announcements.py @@ -4,6 +4,7 @@ info: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr """ + from functools import lru_cache import pandas as pd import calendar @@ -17,7 +18,6 @@ MAIN_PAGE_URL = "https://www.imf.org/external/np/fin/tad/extsdr1.aspx" - def read_tsv(url: str) -> pd.DataFrame: """Read a tsv file from a url and return a dataframe""" @@ -34,17 +34,20 @@ def clean_df(df: pd.DataFrame) -> pd.DataFrame: df = df.iloc[3:, 0].str.split("\t", expand=True) df.columns = ["entity", "holdings", "allocations"] - return ( - df - .assign(holdings = lambda d: pd.to_numeric(d.holdings.str.replace(r"[^\d.]", "", regex=True), errors="coerce"), - allocations = lambda d: pd.to_numeric(d.allocations.str.replace(r"[^\d.]", "", regex=True), errors="coerce") - ) - .melt(id_vars="entity", value_vars=["holdings", "allocations"], var_name = "indicator") + return df.assign( + holdings=lambda d: pd.to_numeric( + d.holdings.str.replace(r"[^\d.]", "", regex=True), errors="coerce" + ), + allocations=lambda d: pd.to_numeric( + d.allocations.str.replace(r"[^\d.]", "", regex=True), errors="coerce" + ), + ).melt( + id_vars="entity", value_vars=["holdings", "allocations"], var_name="indicator" ) + def format_date(month: int, year: int) -> str: - """Return a date as year-month-day where day is the last day in the month - """ + """Return a date as year-month-day where day is the last day in the month""" last_day = calendar.monthrange(year, month)[1] return f"{year}-{month}-{last_day}" @@ -78,7 +81,7 @@ def get_latest_date() -> tuple[int, int]: row = table.find_all("tr")[1] date = row.td.text.strip() - date = datetime.strptime(date, '%B %d, %Y') + date = datetime.strptime(date, "%B %d, %Y") # Extract the year and month as a tuple return date.year, date.month @@ -98,4 +101,3 @@ def fetch_allocations_holdings(date: tuple[int, int] | None = None) -> pd.DataFr date = get_latest_date() return get_data(*date) - diff --git a/src/imf_reader/sdr/read_exchange_rate.py b/src/imf_reader/sdr/read_exchange_rate.py index fe93463..11830e9 100644 --- a/src/imf_reader/sdr/read_exchange_rate.py +++ b/src/imf_reader/sdr/read_exchange_rate.py @@ -19,7 +19,7 @@ def read_data(): """Read the data from the IMF website""" data = { - '__EVENTTARGET': 'lbnTSV', + "__EVENTTARGET": "lbnTSV", } try: @@ -30,7 +30,9 @@ def read_data(): raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") try: - return pd.read_csv(io.BytesIO(response.content), delimiter="/t", engine="python") + return pd.read_csv( + io.BytesIO(response.content), delimiter="/t", engine="python" + ) except pd.errors.ParserError as e: raise ValueError(f"Could not parse data. Error: {str(e)}") @@ -50,24 +52,23 @@ def parse_data(df: pd.DataFrame, unit_basis: Literal["SDR", "USD"]): df.columns = df.iloc[0] df = df.iloc[1:] - exchange_series = (df - .loc[lambda d: d["Report date"] == col_val] - .iloc[:, 1] - .reset_index(drop=True) - ) + exchange_series = ( + df.loc[lambda d: d["Report date"] == col_val].iloc[:, 1].reset_index(drop=True) + ) - dates_series = (df - .dropna(subset = df.columns[3]) - .iloc[:, 0] - .drop_duplicates() - .reset_index(drop=True) - ) + dates_series = ( + df.dropna(subset=df.columns[3]) + .iloc[:, 0] + .drop_duplicates() + .reset_index(drop=True) + ) - return (pd.DataFrame({'date': dates_series, "exchange_rate": exchange_series}) - .assign(date = lambda d: pd.to_datetime(d.date), - exchange_rate = lambda d: pd.to_numeric(d.exchange_rate, errors="coerce") - ) - ) + return pd.DataFrame( + {"date": dates_series, "exchange_rate": exchange_series} + ).assign( + date=lambda d: pd.to_datetime(d.date), + exchange_rate=lambda d: pd.to_numeric(d.exchange_rate, errors="coerce"), + ) @lru_cache @@ -88,4 +89,4 @@ def fetch_exchange_rates(unit_basis: Literal["SDR", "USD"] = "SDR") -> pd.DataFr logger.info("Fetching exchange rate data") df = read_data() - return parse_data(df, unit_basis) \ No newline at end of file + return parse_data(df, unit_basis) diff --git a/src/imf_reader/sdr/read_interest_rate.py b/src/imf_reader/sdr/read_interest_rate.py index ab9ec24..3e662d5 100644 --- a/src/imf_reader/sdr/read_interest_rate.py +++ b/src/imf_reader/sdr/read_interest_rate.py @@ -17,7 +17,7 @@ def read_data(): """Read the data from the IMF website""" data = { - '__EVENTTARGET': 'lbnTSV', + "__EVENTTARGET": "lbnTSV", } try: @@ -28,57 +28,62 @@ def read_data(): raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") try: - return pd.read_csv(io.BytesIO(response.content), delimiter="/t", engine="python") + return pd.read_csv( + io.BytesIO(response.content), delimiter="/t", engine="python" + ) except pd.errors.ParserError as e: raise ValueError(f"Could not parse data. Error: {str(e)}") def clean_data(df: pd.DataFrame) -> pd.DataFrame: - """Cleaning/parsing steps for the data. split tab separated value into separate columns, rename columns, assign types, and additional formatting - """ - columns = {"Effective from": "effective_from", - "Effective to": "effective_to", - } + """Cleaning/parsing steps for the data. split tab separated value into separate columns, rename columns, assign types, and additional formatting""" + columns = { + "Effective from": "effective_from", + "Effective to": "effective_to", + } df = df.iloc[:, 0].str.split("\t", expand=True) df.columns = df.iloc[0] df = df.iloc[1:] - return (df - .rename(columns = columns) - .loc[:, columns.values()] - .dropna(subset=["effective_to"]) - .pipe(_format_data) - .assign(interest_rate = lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), - effective_from = lambda d: pd.to_datetime(d.effective_from), - effective_to = lambda d: pd.to_datetime(d.effective_to) - ) + return ( + df.rename(columns=columns) + .loc[:, columns.values()] + .dropna(subset=["effective_to"]) + .pipe(_format_data) + .assign( + interest_rate=lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), + effective_from=lambda d: pd.to_datetime(d.effective_from), + effective_to=lambda d: pd.to_datetime(d.effective_to), + ) ) def _format_data(df: pd.DataFrame) -> pd.DataFrame: - """Format the data. Only keep the rows for the SDR interest rate and add the dates - """ - - dates_df = (df - .loc[lambda d: ~ d['effective_from'].isin(["SDR Interest Rate", "Total", "Floor for SDR Interest Rate"])] + """Format the data. Only keep the rows for the SDR interest rate and add the dates""" + + dates_df = ( + df.loc[ + lambda d: ~d["effective_from"].isin( + ["SDR Interest Rate", "Total", "Floor for SDR Interest Rate"] + ) + ] .drop_duplicates() .reset_index(drop=True) ) - sdr_df = (df - .loc[lambda d: d.effective_from == "SDR Interest Rate"] - .iloc[:, 1:2] - .reset_index(drop=True) - ) + sdr_df = ( + df.loc[lambda d: d.effective_from == "SDR Interest Rate"] + .iloc[:, 1:2] + .reset_index(drop=True) + ) - sdr_df.columns = ['interest_rate'] + sdr_df.columns = ["interest_rate"] return sdr_df.join(dates_df) - @lru_cache def fetch_interest_rates() -> pd.DataFrame: """Fetch the historic SDR interest rates from the IMF diff --git a/src/imf_reader/utils.py b/src/imf_reader/utils.py index 6a301cb..0acaa1c 100644 --- a/src/imf_reader/utils.py +++ b/src/imf_reader/utils.py @@ -2,6 +2,7 @@ import requests + def make_request(url: str) -> requests.models.Response: """Make a request to a url. @@ -22,4 +23,4 @@ def make_request(url: str) -> requests.models.Response: return response except requests.exceptions.RequestException as e: - raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") \ No newline at end of file + raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") diff --git a/tests/test_utils.py b/tests/test_utils.py index 145b21b..a5fd4a5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -29,4 +29,4 @@ def test_make_request(): with patch("requests.get") as mock_get: mock_get.return_value.status_code = 404 with pytest.raises(ConnectionError, match="Could not connect to"): - utils.make_request(TEST_URL) \ No newline at end of file + utils.make_request(TEST_URL) From 7ecc5f3ccde9f3bb66ca4ae0ba1dc186db69bcb1 Mon Sep 17 00:00:00 2001 From: mharoruiz <88669820+mharoruiz@users.noreply.github.com> Date: Tue, 3 Dec 2024 10:31:56 +0100 Subject: [PATCH 10/13] rename, split functions --- src/imf_reader/sdr/read_announcements.py | 4 +- src/imf_reader/sdr/read_exchange_rate.py | 51 +++++++++++++++++------- src/imf_reader/sdr/read_interest_rate.py | 27 ++++++++----- 3 files changed, 55 insertions(+), 27 deletions(-) diff --git a/src/imf_reader/sdr/read_announcements.py b/src/imf_reader/sdr/read_announcements.py index be056d9..113acb9 100644 --- a/src/imf_reader/sdr/read_announcements.py +++ b/src/imf_reader/sdr/read_announcements.py @@ -54,7 +54,7 @@ def format_date(month: int, year: int) -> str: @lru_cache -def get_data(year: int, month: int): +def get_holdings_and_allocations_data(year: int, month: int): """Get sdr allocations and holdings data for a given month and year""" date = format_date(month, year) @@ -100,4 +100,4 @@ def fetch_allocations_holdings(date: tuple[int, int] | None = None) -> pd.DataFr if date is None: date = get_latest_date() - return get_data(*date) + return get_holdings_and_allocations_data(*date) diff --git a/src/imf_reader/sdr/read_exchange_rate.py b/src/imf_reader/sdr/read_exchange_rate.py index 11830e9..a2a3309 100644 --- a/src/imf_reader/sdr/read_exchange_rate.py +++ b/src/imf_reader/sdr/read_exchange_rate.py @@ -15,7 +15,7 @@ BASE_URL = "https://www.imf.org/external/np/fin/data/rms_sdrv.aspx" -def read_data(): +def get_exchange_rates_data(): """Read the data from the IMF website""" data = { @@ -38,31 +38,52 @@ def read_data(): raise ValueError(f"Could not parse data. Error: {str(e)}") -def parse_data(df: pd.DataFrame, unit_basis: Literal["SDR", "USD"]): - """Parse the data from the IMF website""" - - if unit_basis == "USD": - col_val = "U.S.$1.00 = SDR" - elif unit_basis == "SDR": - col_val = "SDR1 = US$" - else: - raise ValueError("unit_basis must be either 'SDR' or 'USD'") - +def preprocess_dataframe(df: pd.DataFrame): + """ + Preprocess the input DataFrame by splitting columns and setting headers. + """ df = df.iloc[:, 0].str.split("\t", expand=True) df.columns = df.iloc[0] - df = df.iloc[1:] + return df.iloc[1:] - exchange_series = ( + +def extract_exchange_series(df: pd.DataFrame, col_val: str): + """ + Extract the exchange rate series for the given column value. + """ + return ( df.loc[lambda d: d["Report date"] == col_val].iloc[:, 1].reset_index(drop=True) ) - dates_series = ( + +def extract_dates_series(df: pd.DataFrame): + """ + Extract the dates series from the DataFrame. + """ + return ( df.dropna(subset=df.columns[3]) .iloc[:, 0] .drop_duplicates() .reset_index(drop=True) ) + +def parse_data(df: pd.DataFrame, unit_basis: Literal["SDR", "USD"]): + """Parse the data from the IMF website""" + + # Validate unit basis + if unit_basis == "USD": + col_val = "U.S.$1.00 = SDR" + elif unit_basis == "SDR": + col_val = "SDR1 = US$" + else: + raise ValueError("unit_basis must be either 'SDR' or 'USD'") + + # Preprocess dataframe and extract relevant columns + df = preprocess_dataframe(df) + exchange_series = extract_exchange_series(df, col_val) + dates_series = extract_dates_series(df) + return pd.DataFrame( {"date": dates_series, "exchange_rate": exchange_series} ).assign( @@ -88,5 +109,5 @@ def fetch_exchange_rates(unit_basis: Literal["SDR", "USD"] = "SDR") -> pd.DataFr logger.info("Fetching exchange rate data") - df = read_data() + df = get_exchange_rates_data() return parse_data(df, unit_basis) diff --git a/src/imf_reader/sdr/read_interest_rate.py b/src/imf_reader/sdr/read_interest_rate.py index 3e662d5..c13adf2 100644 --- a/src/imf_reader/sdr/read_interest_rate.py +++ b/src/imf_reader/sdr/read_interest_rate.py @@ -13,7 +13,7 @@ BASE_URL: str = "https://www.imf.org/external/np/fin/data/sdr_ir.aspx" -def read_data(): +def get_interest_rates_data(): """Read the data from the IMF website""" data = { @@ -51,6 +51,7 @@ def clean_data(df: pd.DataFrame) -> pd.DataFrame: df.rename(columns=columns) .loc[:, columns.values()] .dropna(subset=["effective_to"]) + .pipe(_filter_data()) .pipe(_format_data) .assign( interest_rate=lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), @@ -60,21 +61,27 @@ def clean_data(df: pd.DataFrame) -> pd.DataFrame: ) -def _format_data(df: pd.DataFrame) -> pd.DataFrame: - """Format the data. Only keep the rows for the SDR interest rate and add the dates""" +def _filter_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Filter the DataFrame to separate rows for dates and SDR interest rates. + """ + return df.loc[ + lambda d: ~d["effective_from"].isin(["Total", "Floor for SDR Interest Rate"]) + ].reset_index(drop=True) + +def _format_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Format the filtered DataFrame into a clean DataFrame with interest rates and dates. + """ dates_df = ( - df.loc[ - lambda d: ~d["effective_from"].isin( - ["SDR Interest Rate", "Total", "Floor for SDR Interest Rate"] - ) - ] + df.loc[lambda d: d["effective_from"] != "SDR Interest Rate"] .drop_duplicates() .reset_index(drop=True) ) sdr_df = ( - df.loc[lambda d: d.effective_from == "SDR Interest Rate"] + df.loc[lambda d: d["effective_from"] == "SDR Interest Rate"] .iloc[:, 1:2] .reset_index(drop=True) ) @@ -99,7 +106,7 @@ def fetch_interest_rates() -> pd.DataFrame: logger.info("Fetching SDR interest rates") - df = read_data() + df = get_interest_rates_data() df = clean_data(df) return df From 6cc222015ceb6faa509e2766bf3c3da150968a32 Mon Sep 17 00:00:00 2001 From: mharoruiz <88669820+mharoruiz@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:30:39 +0100 Subject: [PATCH 11/13] add sdr tests --- src/imf_reader/sdr/read_exchange_rate.py | 10 +- tests/test_sdr/test_read_announcements.py | 102 +++++++++++ tests/test_sdr/test_read_exchange_rate.py | 202 ++++++++++++++++++++++ tests/test_utils.py | 32 ---- 4 files changed, 313 insertions(+), 33 deletions(-) create mode 100644 tests/test_sdr/test_read_announcements.py create mode 100644 tests/test_sdr/test_read_exchange_rate.py delete mode 100644 tests/test_utils.py diff --git a/src/imf_reader/sdr/read_exchange_rate.py b/src/imf_reader/sdr/read_exchange_rate.py index a2a3309..c4bbc74 100644 --- a/src/imf_reader/sdr/read_exchange_rate.py +++ b/src/imf_reader/sdr/read_exchange_rate.py @@ -44,7 +44,15 @@ def preprocess_dataframe(df: pd.DataFrame): """ df = df.iloc[:, 0].str.split("\t", expand=True) df.columns = df.iloc[0] - return df.iloc[1:] + df = df.iloc[1:].reset_index(drop=True) + + # Ensure required columns are present + required_columns = ["Report date"] + for column in required_columns: + if column not in df.columns: + raise KeyError(f"Missing required column: {column}") + + return df def extract_exchange_series(df: pd.DataFrame, col_val: str): diff --git a/tests/test_sdr/test_read_announcements.py b/tests/test_sdr/test_read_announcements.py new file mode 100644 index 0000000..14d3a00 --- /dev/null +++ b/tests/test_sdr/test_read_announcements.py @@ -0,0 +1,102 @@ +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd +from bs4 import BeautifulSoup +from imf_reader.sdr import read_announcements + + +class TestReadAnnouncements(unittest.TestCase): + """Tests functions in the read_announcements module.""" + + @patch("pandas.read_csv") + def test_read_tsv(self, mock_read_csv): + """Ensure read_tsv processes well-formated tsv correctly and raises ValueError on malformed data.""" + # Mock successful TSV read + mock_read_csv.return_value = pd.DataFrame({"A": [1], "B": [2]}) + result = read_announcements.read_tsv("mock_url") + self.assertTrue(isinstance(result, pd.DataFrame)) + + # Mock failure + mock_read_csv.side_effect = pd.errors.ParserError + with self.assertRaises(ValueError): + read_announcements.read_tsv("mock_url") + + def test_clean_df_correct_format(self): + """Test clean_df with the expected format.""" + # Mock input DataFrame + raw_data = pd.DataFrame({0: ["", "", "", "Country A\t$100\t$200"]}) + expected_data = pd.DataFrame( + { + "entity": ["Country A", "Country A"], + "indicator": ["holdings", "allocations"], + "value": [100, 200], + } + ) + + result = read_announcements.clean_df(raw_data) + pd.testing.assert_frame_equal(result, expected_data) + + def test_clean_df_empty(self): + """Test clean_df with an empty DataFrame""" + input_df = pd.DataFrame() + with self.assertRaises(IndexError): + read_announcements.clean_df(input_df) + + def test_format_date(self): + """Test format_date computes last day of a given month/year.""" + self.assertEqual(read_announcements.format_date(2, 2024), "2024-2-29") + self.assertEqual(read_announcements.format_date(1, 2023), "2023-1-31") + + @patch("imf_reader.sdr.read_announcements.read_tsv") + @patch("imf_reader.sdr.read_announcements.clean_df") + def test_get_holdings_and_allocations_data(self, mock_clean_df, mock_read_tsv): + """Test get_holdings_and_allocations_data caches data properly.""" + mock_read_tsv.return_value = pd.DataFrame() + mock_clean_df.return_value = pd.DataFrame({"data": [1]}) + + result = read_announcements.get_holdings_and_allocations_data(2024, 11) + self.assertTrue("data" in result.columns) + + @patch("imf_reader.sdr.read_announcements.make_request") + @patch("bs4.BeautifulSoup") + def test_get_latest_date(self, mock_soup, mock_make_request): + """Test correct extraction of get_latest_date.""" + # Simulate HTML content + html_content = """ + + +
+ + + +
Header
November 30, 2024
+ + + """ + mock_make_request.return_value.content = html_content + mock_soup.return_value = BeautifulSoup(html_content, "html.parser") + + # Call the function + year, month = read_announcements.get_latest_date() + + # Assert expected output + self.assertEqual((year, month), (2024, 11)) + + @patch("imf_reader.sdr.read_announcements.get_latest_date") + @patch("imf_reader.sdr.read_announcements.get_holdings_and_allocations_data") + def test_fetch_allocations_holdings(self, mock_get_data, mock_get_latest_date): + """Ensure fetch_allocations_holdings fetches data for the provided date or the latest date.""" + # Mock latest date and data fetch + mock_get_latest_date.return_value = (2024, 11) + mock_get_data.return_value = pd.DataFrame({"data": [1]}) + + result = read_announcements.fetch_allocations_holdings() + self.assertTrue("data" in result.columns) + + # Test with specific date + result = read_announcements.fetch_allocations_holdings((2023, 10)) + mock_get_data.assert_called_with(2023, 10) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_sdr/test_read_exchange_rate.py b/tests/test_sdr/test_read_exchange_rate.py new file mode 100644 index 0000000..ac0f4cb --- /dev/null +++ b/tests/test_sdr/test_read_exchange_rate.py @@ -0,0 +1,202 @@ +from unittest.mock import patch, MagicMock, ANY +import pytest +import requests +import pandas as pd +from imf_reader.sdr import read_exchange_rate +from imf_reader.sdr.read_exchange_rate import BASE_URL + + +@pytest.fixture +def input_df(): + df = pd.DataFrame( + { + "SDR Valuations": [ + "Report date\tCurrency Unit\tCurrency amount\tExchange Rate", + "2023-11-30\tEuro\t0.456\t-1.234", + "U.S.$1.00 = SDR\t0.123", + "SDR1 = US$\t0.321", + ] + } + ) + return df + + +class TestExchangeRateModule: + + @pytest.fixture(autouse=True) + def clear_cache(self): + """Clear cache before each test.""" + read_exchange_rate.fetch_exchange_rates.cache_clear() + + @patch("requests.post") + def test_get_exchange_rates_data_success(self, mock_post): + """Test successful data retrieval and parsing""" + # Mock the response content with a valid TSV format + mock_response = MagicMock() + mock_response.content = ( + b"Column1\tColumn2\n2023-11-30\t1.234\n2023-12-01\t0.789\n" + ) + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + expected_df = pd.DataFrame( + {"Column1\tColumn2": ["2023-11-30\t1.234", "2023-12-01\t0.789"]} + ) + result = read_exchange_rate.get_exchange_rates_data() + + # Assertions + pd.testing.assert_frame_equal(result, expected_df) + mock_post.assert_called_once_with(BASE_URL, data={"__EVENTTARGET": "lbnTSV"}) + + def test_get_exchange_rates_data_connection_error(self): + """Test ConnectionError is raised when requests.post fails.""" + with patch("requests.post") as mock_post: + # Simulate raising a requests.exceptions.RequestException + mock_post.side_effect = requests.exceptions.RequestException( + "Network error" + ) + + # Verify the exception + with pytest.raises( + ConnectionError, + match=f"Could not connect to {read_exchange_rate.BASE_URL}", + ): + read_exchange_rate.get_exchange_rates_data() + + # Verify the mock was called with the expected arguments + mock_post.assert_called_once_with( + read_exchange_rate.BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + + def test_get_exchange_rates_data_parse_error(self): + """Test ValueError is raised when parsing fails.""" + with patch("requests.post") as mock_post, patch( + "pandas.read_csv" + ) as mock_read_csv: + # Mock the response content with invalid data + mock_response = MagicMock() + mock_response.content = b"invalid data" + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + # Simulate pd.read_csv raising a ParserError + mock_read_csv.side_effect = pd.errors.ParserError("Parsing error") + + # Use pytest.raises to assert the ValueError + with pytest.raises(ValueError, match="Could not parse data"): + read_exchange_rate.get_exchange_rates_data() + + # Assertions + mock_post.assert_called_once_with( + read_exchange_rate.BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + mock_read_csv.assert_called_once_with(ANY, delimiter="/t", engine="python") + + def test_preprocess_dataframe_success(self, input_df): + """Test preprocessing of the DataFrame""" + + expected_df = pd.DataFrame( + { + "Report date": ["2023-11-30", "U.S.$1.00 = SDR", "SDR1 = US$"], + "Currency Unit": ["Euro", "0.123", "0.321"], + "Currency amount": ["0.456", None, None], + "Exchange Rate": ["-1.234", None, None], + } + ) + expected_df.columns.name = 0 + result = read_exchange_rate.preprocess_dataframe(input_df) + + # Assertion + pd.testing.assert_frame_equal(result, expected_df) + + def test_preprocess_dataframe_missing_column(self): + """Test that KeyError is raised when 'Report date' column is missing.""" + # Create the input DataFrame + input_df = pd.DataFrame( + [ + "Other Column\tCurrency Unit\tCurrency amount\tExchange Rate", + "2023-11-30\tEuro\t0.456\t-1.234", + "U.S.$1.00 = SDR\t0.123", + "SDR1 = US$\t0.321", + ] + ) + + # Assert that KeyError is raised with the correct message + with pytest.raises(KeyError, match="Missing required column: Report date"): + read_exchange_rate.preprocess_dataframe(input_df) + + @pytest.mark.parametrize( + "currency_code, expected_xrate", + [ + ("U.S.$1.00 = SDR", "0.123"), + ("SDR1 = US$", "0.321"), + ], + ) + def test_extract_exchange_series(self, input_df, currency_code, expected_xrate): + """Test extracting the exchange series for a specific column value""" + input_df = read_exchange_rate.preprocess_dataframe(input_df) + result = read_exchange_rate.extract_exchange_series(input_df, currency_code) + expected_series = pd.Series([expected_xrate], name="Currency Unit") + + # Assertion + pd.testing.assert_series_equal(result, expected_series) + + def test_extract_dates_series(self, input_df): + """Test extracting unique dates from the DataFrame""" + preprocessed_df = read_exchange_rate.preprocess_dataframe(input_df) + result = read_exchange_rate.extract_dates_series(preprocessed_df) + expected_series = pd.Series(["2023-11-30"], name="Report date") + + # Assertion + pd.testing.assert_series_equal(result, expected_series) + + @pytest.mark.parametrize( + "currency_code, expected_xrate", + [ + ("USD", 0.123), + ("SDR", 0.321), + ], + ) + def test_parse_data_valid_input(self, input_df, currency_code, expected_xrate): + """Test parsing valid input DataFrame with mocked helpers""" + + expected_df = pd.DataFrame( + {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [expected_xrate]}, + ) + result = read_exchange_rate.parse_data(input_df, currency_code) + + # Assertions + pd.testing.assert_frame_equal(result, expected_df) + assert result.date.dtype == "datetime64[ns]" + assert result.exchange_rate.dtype == "float64" + + def test_parse_data_invalid_unit_basis(self, input_df): + """Test parse_data raises error on invalid unit_basis.""" + # Assert that ValueError is raised when passing an invalid unit_basis + with pytest.raises( + ValueError, match="unit_basis must be either 'SDR' or 'USD'" + ): + read_exchange_rate.parse_data(input_df, "INVALID") + + @patch("imf_reader.sdr.read_exchange_rate.get_exchange_rates_data") + @patch("imf_reader.sdr.read_exchange_rate.parse_data") + def test_fetch_exchange_rates(self, mock_parse_data, mock_get_data, input_df): + """Test fetching exchange rates""" + # Mock return values for the patched functions + mock_get_data.return_value = input_df + mock_parse_data.return_value = pd.DataFrame( + {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [0.123]} + ) + expected_df = pd.DataFrame( + {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [0.123]} + ) + + # Mock the logger + with patch("imf_reader.sdr.read_exchange_rate.logger.info") as mock_logger: + result = read_exchange_rate.fetch_exchange_rates("USD") + + # Assertions + mock_get_data.assert_called_once() + mock_parse_data.assert_called_once_with(mock_get_data.return_value, "USD") + pd.testing.assert_frame_equal(result, expected_df) + mock_logger.assert_called_once_with("Fetching exchange rate data") diff --git a/tests/test_utils.py b/tests/test_utils.py deleted file mode 100644 index a5fd4a5..0000000 --- a/tests/test_utils.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Tests for the utils module""" - -import pytest -from unittest.mock import patch -import requests - -from imf_reader import utils - - -TEST_URL = "https://test.com" - - -def test_make_request(): - """Test make_request""" - - # test successful request - with patch("requests.get") as mock_get: - mock_get.return_value.status_code = 200 - response = utils.make_request(TEST_URL) - assert response == mock_get.return_value - - # test failed request - with patch("requests.get") as mock_get: - mock_get.side_effect = requests.exceptions.RequestException - with pytest.raises(ConnectionError, match="Could not connect to"): - utils.make_request(TEST_URL) - - # test when status code is not 200 - with patch("requests.get") as mock_get: - mock_get.return_value.status_code = 404 - with pytest.raises(ConnectionError, match="Could not connect to"): - utils.make_request(TEST_URL) From 15fd5917d0772bfb9671cf8720af887df0b3d739 Mon Sep 17 00:00:00 2001 From: mharoruiz <88669820+mharoruiz@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:21:52 +0100 Subject: [PATCH 12/13] add interest rate tests --- src/imf_reader/sdr/read_exchange_rate.py | 4 +- src/imf_reader/sdr/read_interest_rate.py | 40 ++-- tests/test_sdr/test_read_exchange_rate.py | 48 +++-- tests/test_sdr/test_read_interest_rate.py | 244 ++++++++++++++++++++++ 4 files changed, 301 insertions(+), 35 deletions(-) create mode 100644 tests/test_sdr/test_read_interest_rate.py diff --git a/src/imf_reader/sdr/read_exchange_rate.py b/src/imf_reader/sdr/read_exchange_rate.py index c4bbc74..23a8eb7 100644 --- a/src/imf_reader/sdr/read_exchange_rate.py +++ b/src/imf_reader/sdr/read_exchange_rate.py @@ -38,7 +38,7 @@ def get_exchange_rates_data(): raise ValueError(f"Could not parse data. Error: {str(e)}") -def preprocess_dataframe(df: pd.DataFrame): +def preprocess_data(df: pd.DataFrame): """ Preprocess the input DataFrame by splitting columns and setting headers. """ @@ -88,7 +88,7 @@ def parse_data(df: pd.DataFrame, unit_basis: Literal["SDR", "USD"]): raise ValueError("unit_basis must be either 'SDR' or 'USD'") # Preprocess dataframe and extract relevant columns - df = preprocess_dataframe(df) + df = preprocess_data(df) exchange_series = extract_exchange_series(df, col_val) dates_series = extract_dates_series(df) diff --git a/src/imf_reader/sdr/read_interest_rate.py b/src/imf_reader/sdr/read_interest_rate.py index c13adf2..7254f9b 100644 --- a/src/imf_reader/sdr/read_interest_rate.py +++ b/src/imf_reader/sdr/read_interest_rate.py @@ -36,28 +36,27 @@ def get_interest_rates_data(): raise ValueError(f"Could not parse data. Error: {str(e)}") -def clean_data(df: pd.DataFrame) -> pd.DataFrame: - """Cleaning/parsing steps for the data. split tab separated value into separate columns, rename columns, assign types, and additional formatting""" +def preprocess_data(df: pd.DataFrame): + """ + Preprocess the input DataFrame by splitting columns and setting headers. + """ + df = df.iloc[:, 0].str.split("\t", expand=True) + df.columns = df.iloc[0] + df = df.iloc[1:] + + # Ensure required columns are present columns = { "Effective from": "effective_from", "Effective to": "effective_to", } - - df = df.iloc[:, 0].str.split("\t", expand=True) - df.columns = df.iloc[0] - df = df.iloc[1:] + for column in columns: + if column not in df.columns: + raise KeyError(f"Missing required column: {column}") return ( df.rename(columns=columns) .loc[:, columns.values()] .dropna(subset=["effective_to"]) - .pipe(_filter_data()) - .pipe(_format_data) - .assign( - interest_rate=lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), - effective_from=lambda d: pd.to_datetime(d.effective_from), - effective_to=lambda d: pd.to_datetime(d.effective_to), - ) ) @@ -91,6 +90,21 @@ def _format_data(df: pd.DataFrame) -> pd.DataFrame: return sdr_df.join(dates_df) +def clean_data(df: pd.DataFrame) -> pd.DataFrame: + """Cleaning/parsing steps for the data. split tab separated value into separate columns, rename columns, assign types, and additional formatting""" + + df = preprocess_data(df) + return ( + df.pipe(_filter_data) + .pipe(_format_data) + .assign( + interest_rate=lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), + effective_from=lambda d: pd.to_datetime(d.effective_from), + effective_to=lambda d: pd.to_datetime(d.effective_to), + ) + ) + + @lru_cache def fetch_interest_rates() -> pd.DataFrame: """Fetch the historic SDR interest rates from the IMF diff --git a/tests/test_sdr/test_read_exchange_rate.py b/tests/test_sdr/test_read_exchange_rate.py index ac0f4cb..0922fd3 100644 --- a/tests/test_sdr/test_read_exchange_rate.py +++ b/tests/test_sdr/test_read_exchange_rate.py @@ -2,8 +2,15 @@ import pytest import requests import pandas as pd -from imf_reader.sdr import read_exchange_rate -from imf_reader.sdr.read_exchange_rate import BASE_URL +from imf_reader.sdr.read_exchange_rate import ( + preprocess_data, + fetch_exchange_rates, + get_exchange_rates_data, + extract_exchange_series, + extract_dates_series, + parse_data, + BASE_URL, +) @pytest.fixture @@ -26,7 +33,7 @@ class TestExchangeRateModule: @pytest.fixture(autouse=True) def clear_cache(self): """Clear cache before each test.""" - read_exchange_rate.fetch_exchange_rates.cache_clear() + fetch_exchange_rates.cache_clear() @patch("requests.post") def test_get_exchange_rates_data_success(self, mock_post): @@ -42,7 +49,7 @@ def test_get_exchange_rates_data_success(self, mock_post): expected_df = pd.DataFrame( {"Column1\tColumn2": ["2023-11-30\t1.234", "2023-12-01\t0.789"]} ) - result = read_exchange_rate.get_exchange_rates_data() + result = get_exchange_rates_data() # Assertions pd.testing.assert_frame_equal(result, expected_df) @@ -59,13 +66,13 @@ def test_get_exchange_rates_data_connection_error(self): # Verify the exception with pytest.raises( ConnectionError, - match=f"Could not connect to {read_exchange_rate.BASE_URL}", + match=f"Could not connect to {BASE_URL}", ): - read_exchange_rate.get_exchange_rates_data() + get_exchange_rates_data() # Verify the mock was called with the expected arguments mock_post.assert_called_once_with( - read_exchange_rate.BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} ) def test_get_exchange_rates_data_parse_error(self): @@ -84,15 +91,15 @@ def test_get_exchange_rates_data_parse_error(self): # Use pytest.raises to assert the ValueError with pytest.raises(ValueError, match="Could not parse data"): - read_exchange_rate.get_exchange_rates_data() + get_exchange_rates_data() # Assertions mock_post.assert_called_once_with( - read_exchange_rate.BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} ) mock_read_csv.assert_called_once_with(ANY, delimiter="/t", engine="python") - def test_preprocess_dataframe_success(self, input_df): + def test_preprocess_data_success(self, input_df): """Test preprocessing of the DataFrame""" expected_df = pd.DataFrame( @@ -104,12 +111,12 @@ def test_preprocess_dataframe_success(self, input_df): } ) expected_df.columns.name = 0 - result = read_exchange_rate.preprocess_dataframe(input_df) + result = preprocess_data(input_df) # Assertion pd.testing.assert_frame_equal(result, expected_df) - def test_preprocess_dataframe_missing_column(self): + def test_preprocess_data_missing_column(self): """Test that KeyError is raised when 'Report date' column is missing.""" # Create the input DataFrame input_df = pd.DataFrame( @@ -123,7 +130,7 @@ def test_preprocess_dataframe_missing_column(self): # Assert that KeyError is raised with the correct message with pytest.raises(KeyError, match="Missing required column: Report date"): - read_exchange_rate.preprocess_dataframe(input_df) + preprocess_data(input_df) @pytest.mark.parametrize( "currency_code, expected_xrate", @@ -134,8 +141,8 @@ def test_preprocess_dataframe_missing_column(self): ) def test_extract_exchange_series(self, input_df, currency_code, expected_xrate): """Test extracting the exchange series for a specific column value""" - input_df = read_exchange_rate.preprocess_dataframe(input_df) - result = read_exchange_rate.extract_exchange_series(input_df, currency_code) + input_df = preprocess_data(input_df) + result = extract_exchange_series(input_df, currency_code) expected_series = pd.Series([expected_xrate], name="Currency Unit") # Assertion @@ -143,8 +150,8 @@ def test_extract_exchange_series(self, input_df, currency_code, expected_xrate): def test_extract_dates_series(self, input_df): """Test extracting unique dates from the DataFrame""" - preprocessed_df = read_exchange_rate.preprocess_dataframe(input_df) - result = read_exchange_rate.extract_dates_series(preprocessed_df) + preprocessed_df = preprocess_data(input_df) + result = extract_dates_series(preprocessed_df) expected_series = pd.Series(["2023-11-30"], name="Report date") # Assertion @@ -163,7 +170,7 @@ def test_parse_data_valid_input(self, input_df, currency_code, expected_xrate): expected_df = pd.DataFrame( {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [expected_xrate]}, ) - result = read_exchange_rate.parse_data(input_df, currency_code) + result = parse_data(input_df, currency_code) # Assertions pd.testing.assert_frame_equal(result, expected_df) @@ -176,7 +183,7 @@ def test_parse_data_invalid_unit_basis(self, input_df): with pytest.raises( ValueError, match="unit_basis must be either 'SDR' or 'USD'" ): - read_exchange_rate.parse_data(input_df, "INVALID") + parse_data(input_df, "INVALID") @patch("imf_reader.sdr.read_exchange_rate.get_exchange_rates_data") @patch("imf_reader.sdr.read_exchange_rate.parse_data") @@ -184,6 +191,7 @@ def test_fetch_exchange_rates(self, mock_parse_data, mock_get_data, input_df): """Test fetching exchange rates""" # Mock return values for the patched functions mock_get_data.return_value = input_df + mock_get_data.return_value = input_df mock_parse_data.return_value = pd.DataFrame( {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [0.123]} ) @@ -193,7 +201,7 @@ def test_fetch_exchange_rates(self, mock_parse_data, mock_get_data, input_df): # Mock the logger with patch("imf_reader.sdr.read_exchange_rate.logger.info") as mock_logger: - result = read_exchange_rate.fetch_exchange_rates("USD") + result = fetch_exchange_rates("USD") # Assertions mock_get_data.assert_called_once() diff --git a/tests/test_sdr/test_read_interest_rate.py b/tests/test_sdr/test_read_interest_rate.py new file mode 100644 index 0000000..d7effd7 --- /dev/null +++ b/tests/test_sdr/test_read_interest_rate.py @@ -0,0 +1,244 @@ +import pytest +import pandas as pd +import requests +from unittest.mock import patch, MagicMock, ANY +from io import BytesIO +from imf_reader.sdr.read_interest_rate import ( + BASE_URL, + get_interest_rates_data, + preprocess_data, + _filter_data, + _format_data, + clean_data, + fetch_interest_rates, +) + + +@pytest.fixture +def input_df(): + df = pd.DataFrame( + { + "SDR Interest Rate Calculation": [ + "Effective from\tEffective to\tCurrency Unit\tCurrency amount\tExchange rate", + "01/12/2024\t05/12/2024\tN/A\tN/A", + "SDR Interest Rate\t1.50", + "06/12/2024\t08/12/2024\tN/A\tN/A", + "Total\t2.75", + "09/12/2024\t12/12/2024\tN/A\tN/A", + "Floor for SDR Interest Rate\t3.50", + "empty row", + ] + } + ) + return df + + +class TestReadInterestRate: + + @pytest.fixture(autouse=True) + def clear_cache(self): + """Clear cache before each test.""" + fetch_interest_rates.cache_clear() + + @patch("requests.post") + def test_get_interest_rates_data(self, mock_post): + """Test successful data retrieval and parsing""" + # Mock the response content with a valid TSV format + mock_response = MagicMock() + mock_response.content = ( + b"Column1\tColumn2\n2023-11-30\t1.234\n2023-12-01\t0.789\n" + ) + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + expected_df = pd.DataFrame( + {"Column1\tColumn2": ["2023-11-30\t1.234", "2023-12-01\t0.789"]} + ) + result = get_interest_rates_data() + + # Assertions + pd.testing.assert_frame_equal(result, expected_df) + mock_post.assert_called_once_with(BASE_URL, data={"__EVENTTARGET": "lbnTSV"}) + + def test_get_interest_rates_data_connection_error(self): + """Test ConnectionError is raised when requests.post fails.""" + with patch("requests.post") as mock_post: + # Simulate raising a requests.exceptions.RequestException + mock_post.side_effect = requests.exceptions.RequestException( + "Network error" + ) + + # Verify the exception + with pytest.raises( + ConnectionError, + match=f"Could not connect to {BASE_URL}", + ): + get_interest_rates_data() + + # Verify the mock was called with the expected arguments + mock_post.assert_called_once_with( + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + + def test_get_interest_rates_data_parse_error(self): + """Test ValueError is raised when parsing fails.""" + with patch("requests.post") as mock_post, patch( + "pandas.read_csv" + ) as mock_read_csv: + # Mock the response content with invalid data + mock_response = MagicMock() + mock_response.content = b"invalid data" + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + # Simulate pd.read_csv raising a ParserError + mock_read_csv.side_effect = pd.errors.ParserError("Parsing error") + + # Use pytest.raises to assert the ValueError + with pytest.raises(ValueError, match="Could not parse data"): + get_interest_rates_data() + + # Assertions + mock_post.assert_called_once_with( + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + mock_read_csv.assert_called_once_with(ANY, delimiter="/t", engine="python") + + def test_preprocess_data_success(self, input_df): + """Test preprocess_data function with valid input.""" + expected_df = pd.DataFrame( + { + "effective_from": [ + "01/12/2024", + "SDR Interest Rate", + "06/12/2024", + "Total", + "09/12/2024", + "Floor for SDR Interest Rate", + ], + "effective_to": [ + "05/12/2024", + "1.50", + "08/12/2024", + "2.75", + "12/12/2024", + "3.50", + ], + } + ) + expected_df.columns.name = 0 + result = preprocess_data(input_df).reset_index(drop=True) + + # Validate the structure and content of the DataFrame + pd.testing.assert_frame_equal(result, expected_df) + + def test_preprocess_data_missing_column(self): + """Test preprocess_data function raises KeyError when required columns are missing.""" + invalid_df = pd.DataFrame( + { + "SDR Interest Rate Calculation": [ + "Some column\tAnother column", + "01/12/2024\tN/A", + ] + } + ) + + with pytest.raises(KeyError, match="Missing required column: Effective from"): + preprocess_data(invalid_df) + + def test_filter_data_valid(self, input_df): + """Test _filter_data with a valid DataFrame.""" + + input_df = preprocess_data(input_df) + result = _filter_data(input_df) + + # Expected Output DataFrame + expected_df = pd.DataFrame( + { + "effective_from": ["01/12/2024", "SDR Interest Rate", "06/12/2024", "09/12/2024"], + "effective_to": ["05/12/2024", "1.50", "08/12/2024", "12/12/2024"], + } + ) + expected_df.columns.name = 0 + + # Validate the results + pd.testing.assert_frame_equal(result, expected_df) + + + def test_format_data_valid(self, input_df): + """Test _format_data with valid input.""" + expected_df = pd.DataFrame({ + "interest_rate": ["1.50"], + "effective_from": ["01/12/2024"], + "effective_to": ["05/12/2024"] + }) + + result = (preprocess_data(input_df) + .pipe(_filter_data) + .pipe(_format_data) + .reset_index(drop=True)) + + # Validate the structure and content of the DataFrame + pd.testing.assert_frame_equal(result, expected_df) + + + + def test_clean_data_valid(self, input_df): + """Test clean_data with valid input DataFrame.""" + expected_df = pd.DataFrame( + { + "interest_rate": [1.50], + "effective_from": [ + pd.Timestamp("01/12/2024"), + ], + "effective_to": [ + pd.Timestamp("05/12/2024"), + ], + } + ) + + result = clean_data(input_df).reset_index(drop=True) + + # Validate the structure and content of the resulting DataFrame + pd.testing.assert_frame_equal(result, expected_df) + + @patch("imf_reader.sdr.read_interest_rate.get_interest_rates_data") + @patch("imf_reader.sdr.read_interest_rate.clean_data") + def test_fetch_exchange_rates(self, mock_clean_data, mock_get_data, input_df): + """Test fetching exchange rates""" + # Mock return values for the patched functions + mock_get_data.return_value = input_df + mock_get_data.return_value = input_df + mock_clean_data.return_value = pd.DataFrame( + { + "interest_rate": [1.50], + "effective_from": [ + pd.Timestamp("01/12/2024"), + ], + "effective_to": [ + pd.Timestamp("05/12/2024"), + ], + } + ) + + expected_df = pd.DataFrame( + { + "interest_rate": [1.50], + "effective_from": [ + pd.Timestamp("01/12/2024"), + ], + "effective_to": [ + pd.Timestamp("05/12/2024"), + ], + } + ) + + # Mock the logger + with patch("imf_reader.sdr.read_interest_rate.logger.info") as mock_logger: + result = fetch_interest_rates() + + # Assertions + mock_get_data.assert_called_once() + mock_clean_data.assert_called_once_with(mock_get_data.return_value) + pd.testing.assert_frame_equal(result, expected_df) + mock_logger.assert_called_once_with("Fetching SDR interest rates") From 127056ecc19c3a02541d001745ea90ddc34ca9f9 Mon Sep 17 00:00:00 2001 From: mharoruiz <88669820+mharoruiz@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:22:37 +0100 Subject: [PATCH 13/13] black --- tests/test_sdr/test_read_interest_rate.py | 32 ++++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/test_sdr/test_read_interest_rate.py b/tests/test_sdr/test_read_interest_rate.py index d7effd7..06693e1 100644 --- a/tests/test_sdr/test_read_interest_rate.py +++ b/tests/test_sdr/test_read_interest_rate.py @@ -155,7 +155,12 @@ def test_filter_data_valid(self, input_df): # Expected Output DataFrame expected_df = pd.DataFrame( { - "effective_from": ["01/12/2024", "SDR Interest Rate", "06/12/2024", "09/12/2024"], + "effective_from": [ + "01/12/2024", + "SDR Interest Rate", + "06/12/2024", + "09/12/2024", + ], "effective_to": ["05/12/2024", "1.50", "08/12/2024", "12/12/2024"], } ) @@ -164,25 +169,26 @@ def test_filter_data_valid(self, input_df): # Validate the results pd.testing.assert_frame_equal(result, expected_df) - def test_format_data_valid(self, input_df): """Test _format_data with valid input.""" - expected_df = pd.DataFrame({ - "interest_rate": ["1.50"], - "effective_from": ["01/12/2024"], - "effective_to": ["05/12/2024"] - }) + expected_df = pd.DataFrame( + { + "interest_rate": ["1.50"], + "effective_from": ["01/12/2024"], + "effective_to": ["05/12/2024"], + } + ) - result = (preprocess_data(input_df) - .pipe(_filter_data) - .pipe(_format_data) - .reset_index(drop=True)) + result = ( + preprocess_data(input_df) + .pipe(_filter_data) + .pipe(_format_data) + .reset_index(drop=True) + ) # Validate the structure and content of the DataFrame pd.testing.assert_frame_equal(result, expected_df) - - def test_clean_data_valid(self, input_df): """Test clean_data with valid input DataFrame.""" expected_df = pd.DataFrame(