diff --git a/src/imf_reader/sdr/__init__.py b/src/imf_reader/sdr/__init__.py new file mode 100644 index 0000000..87113c4 --- /dev/null +++ b/src/imf_reader/sdr/__init__.py @@ -0,0 +1,52 @@ +"""Special Drawing Rights (SDR) reader module. + +This module offers access to the IMF's Special Drawing Rights (SDR) data. +The SDR is an international reserve asset created by the IMF in 1969. +It is not a currency, but the holder of SDRs can exchange them for usable currencies in times of need. + +Read more about SDRs at: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + + +Usage: + +Import the module + +```python +from imf_reader import sdr +``` + +Read allocations and holdings data + +```python +sdr.fetch_allocations_holdings() +``` +SDRs holdings and allocations are published at a monthly frequency. The function fetches the latest data available. + +To retrieve SDR holdings and allocations for a specific month and year, eg April 2021, pass the year and month as a tuple + +```python +sdr.fetch_allocations_holdings((2021, 4)) +``` + +Read interest rates + +```python +sdr.fetch_interest_rates() +``` + +Read exchange rates + +```python +sdr.fetch_exchange_rates() +``` +By default, the exchange rate is in USDs per 1 SDR. To get the exchange rate in SDRs per 1 USD, pass the unit basis as "USD" + +```python +sdr.fetch_exchange_rates("USD") +``` + +""" + +from imf_reader.sdr.read_interest_rate import fetch_interest_rates +from imf_reader.sdr.read_exchange_rate import fetch_exchange_rates +from imf_reader.sdr.read_announcements import fetch_allocations_holdings diff --git a/src/imf_reader/sdr/read_announcements.py b/src/imf_reader/sdr/read_announcements.py new file mode 100644 index 0000000..113acb9 --- /dev/null +++ b/src/imf_reader/sdr/read_announcements.py @@ -0,0 +1,103 @@ +"""Module to get SDR data from the IMF website + + +info: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + +""" + +from functools import lru_cache +import pandas as pd +import calendar +from bs4 import BeautifulSoup +from datetime import datetime + +from imf_reader.utils import make_request +from imf_reader.config import logger, NoDataError + +BASE_URL = "https://www.imf.org/external/np/fin/tad/" +MAIN_PAGE_URL = "https://www.imf.org/external/np/fin/tad/extsdr1.aspx" + + +def read_tsv(url: str) -> pd.DataFrame: + """Read a tsv file from a url and return a dataframe""" + + try: + return pd.read_csv(url, delimiter="/t", engine="python") + + except pd.errors.ParserError: + raise ValueError("SDR _data not available for this date") + + +def clean_df(df: pd.DataFrame) -> pd.DataFrame: + """Clean the SDR dataframe""" + + df = df.iloc[3:, 0].str.split("\t", expand=True) + df.columns = ["entity", "holdings", "allocations"] + + return df.assign( + holdings=lambda d: pd.to_numeric( + d.holdings.str.replace(r"[^\d.]", "", regex=True), errors="coerce" + ), + allocations=lambda d: pd.to_numeric( + d.allocations.str.replace(r"[^\d.]", "", regex=True), errors="coerce" + ), + ).melt( + id_vars="entity", value_vars=["holdings", "allocations"], var_name="indicator" + ) + + +def format_date(month: int, year: int) -> str: + """Return a date as year-month-day where day is the last day in the month""" + + last_day = calendar.monthrange(year, month)[1] + return f"{year}-{month}-{last_day}" + + +@lru_cache +def get_holdings_and_allocations_data(year: int, month: int): + """Get sdr allocations and holdings data for a given month and year""" + + date = format_date(month, year) + url = f"{BASE_URL}extsdr2.aspx?date1key={date}&tsvflag=Y" + + logger.info(f"Fetching SDR data for date: {date}") + + df = read_tsv(url) + df = clean_df(df) + df["date"] = pd.to_datetime(date) + + return df + + +@lru_cache +def get_latest_date() -> tuple[int, int]: + """Get the latest date for which SDR data is available""" + + logger.info("Fetching latest date") + + response = make_request(MAIN_PAGE_URL) + soup = BeautifulSoup(response.content, "html.parser") + table = soup.find_all("table")[4] + row = table.find_all("tr")[1] + + date = row.td.text.strip() + date = datetime.strptime(date, "%B %d, %Y") + + # Extract the year and month as a tuple + return date.year, date.month + + +def fetch_allocations_holdings(date: tuple[int, int] | None = None) -> pd.DataFrame: + """Fetch SDR holdings and allocations data for a given date + + Args: + date: A tuple of year and month e.g (2024, 11). If None, the latest data announcements released are fetched + + returns: + pd.DataFrame: A dataframe with the SDR data + """ + + if date is None: + date = get_latest_date() + + return get_holdings_and_allocations_data(*date) diff --git a/src/imf_reader/sdr/read_exchange_rate.py b/src/imf_reader/sdr/read_exchange_rate.py new file mode 100644 index 0000000..23a8eb7 --- /dev/null +++ b/src/imf_reader/sdr/read_exchange_rate.py @@ -0,0 +1,121 @@ +"""Module to read exchange rate data from the IMF's Special Drawing Rights (SDR) Valuation dataset. + +Read about SDR valuation at: https://www.imf.org/external/np/fin/data/rms_sdrv.aspx +""" + +import requests +import pandas as pd +import io +from functools import lru_cache +from typing import Literal + +from imf_reader.config import logger + + +BASE_URL = "https://www.imf.org/external/np/fin/data/rms_sdrv.aspx" + + +def get_exchange_rates_data(): + """Read the data from the IMF website""" + + data = { + "__EVENTTARGET": "lbnTSV", + } + + try: + response = requests.post(BASE_URL, data=data) + response.raise_for_status() + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") + + try: + return pd.read_csv( + io.BytesIO(response.content), delimiter="/t", engine="python" + ) + + except pd.errors.ParserError as e: + raise ValueError(f"Could not parse data. Error: {str(e)}") + + +def preprocess_data(df: pd.DataFrame): + """ + Preprocess the input DataFrame by splitting columns and setting headers. + """ + df = df.iloc[:, 0].str.split("\t", expand=True) + df.columns = df.iloc[0] + df = df.iloc[1:].reset_index(drop=True) + + # Ensure required columns are present + required_columns = ["Report date"] + for column in required_columns: + if column not in df.columns: + raise KeyError(f"Missing required column: {column}") + + return df + + +def extract_exchange_series(df: pd.DataFrame, col_val: str): + """ + Extract the exchange rate series for the given column value. + """ + return ( + df.loc[lambda d: d["Report date"] == col_val].iloc[:, 1].reset_index(drop=True) + ) + + +def extract_dates_series(df: pd.DataFrame): + """ + Extract the dates series from the DataFrame. + """ + return ( + df.dropna(subset=df.columns[3]) + .iloc[:, 0] + .drop_duplicates() + .reset_index(drop=True) + ) + + +def parse_data(df: pd.DataFrame, unit_basis: Literal["SDR", "USD"]): + """Parse the data from the IMF website""" + + # Validate unit basis + if unit_basis == "USD": + col_val = "U.S.$1.00 = SDR" + elif unit_basis == "SDR": + col_val = "SDR1 = US$" + else: + raise ValueError("unit_basis must be either 'SDR' or 'USD'") + + # Preprocess dataframe and extract relevant columns + df = preprocess_data(df) + exchange_series = extract_exchange_series(df, col_val) + dates_series = extract_dates_series(df) + + return pd.DataFrame( + {"date": dates_series, "exchange_rate": exchange_series} + ).assign( + date=lambda d: pd.to_datetime(d.date), + exchange_rate=lambda d: pd.to_numeric(d.exchange_rate, errors="coerce"), + ) + + +@lru_cache +def fetch_exchange_rates(unit_basis: Literal["SDR", "USD"] = "SDR") -> pd.DataFrame: + """Fetch the historic SDR exchange rates from the IMF + + The currency value of the SDR is determined by summing the values in U.S. dollars, based on market exchange rates, of a basket of major currencies (the U.S. dollar, Euro, Japanese yen, pound sterling and the Chinese renminbi). The SDR currency value is calculated daily except on IMF holidays, or whenever the IMF is closed for business, or on an ad-hoc basis to facilitate unscheduled IMF operations. The SDR valuation basket is reviewed and adjusted every five years. + + Read more at: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + + Args: + unit_basis: The unit basis for the exchange rate. Default is "SDR" i.e. 1 SDR in USD. Other option is "USD" i.e. 1 USD in SDR + + Returns: + A DataFrame with the exchange rate data + """ + + logger.info("Fetching exchange rate data") + + df = get_exchange_rates_data() + return parse_data(df, unit_basis) diff --git a/src/imf_reader/sdr/read_interest_rate.py b/src/imf_reader/sdr/read_interest_rate.py new file mode 100644 index 0000000..7254f9b --- /dev/null +++ b/src/imf_reader/sdr/read_interest_rate.py @@ -0,0 +1,126 @@ +"""Module to read SDR interest and exchange rates from the IMF website + +""" + +import requests +import pandas as pd +import io +from functools import lru_cache + +from imf_reader.config import logger + + +BASE_URL: str = "https://www.imf.org/external/np/fin/data/sdr_ir.aspx" + + +def get_interest_rates_data(): + """Read the data from the IMF website""" + + data = { + "__EVENTTARGET": "lbnTSV", + } + + try: + response = requests.post(BASE_URL, data=data) + response.raise_for_status() + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") + + try: + return pd.read_csv( + io.BytesIO(response.content), delimiter="/t", engine="python" + ) + + except pd.errors.ParserError as e: + raise ValueError(f"Could not parse data. Error: {str(e)}") + + +def preprocess_data(df: pd.DataFrame): + """ + Preprocess the input DataFrame by splitting columns and setting headers. + """ + df = df.iloc[:, 0].str.split("\t", expand=True) + df.columns = df.iloc[0] + df = df.iloc[1:] + + # Ensure required columns are present + columns = { + "Effective from": "effective_from", + "Effective to": "effective_to", + } + for column in columns: + if column not in df.columns: + raise KeyError(f"Missing required column: {column}") + + return ( + df.rename(columns=columns) + .loc[:, columns.values()] + .dropna(subset=["effective_to"]) + ) + + +def _filter_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Filter the DataFrame to separate rows for dates and SDR interest rates. + """ + return df.loc[ + lambda d: ~d["effective_from"].isin(["Total", "Floor for SDR Interest Rate"]) + ].reset_index(drop=True) + + +def _format_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Format the filtered DataFrame into a clean DataFrame with interest rates and dates. + """ + dates_df = ( + df.loc[lambda d: d["effective_from"] != "SDR Interest Rate"] + .drop_duplicates() + .reset_index(drop=True) + ) + + sdr_df = ( + df.loc[lambda d: d["effective_from"] == "SDR Interest Rate"] + .iloc[:, 1:2] + .reset_index(drop=True) + ) + + sdr_df.columns = ["interest_rate"] + + return sdr_df.join(dates_df) + + +def clean_data(df: pd.DataFrame) -> pd.DataFrame: + """Cleaning/parsing steps for the data. split tab separated value into separate columns, rename columns, assign types, and additional formatting""" + + df = preprocess_data(df) + return ( + df.pipe(_filter_data) + .pipe(_format_data) + .assign( + interest_rate=lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), + effective_from=lambda d: pd.to_datetime(d.effective_from), + effective_to=lambda d: pd.to_datetime(d.effective_to), + ) + ) + + +@lru_cache +def fetch_interest_rates() -> pd.DataFrame: + """Fetch the historic SDR interest rates from the IMF + + The SDR interest rate is based on the sum of the multiplicative products in SDR terms of the currency + amounts in the SDR valuation basket, the level of the interest rate on the financial + instrument of each component currency in the basket, and the exchange rate of each currency + against the SDR. The SDR interest rate for the current week is released on Sunday morning, Washington D.C. time. + + returns: + A DataFrame with the historical SDR interest rates + """ + + logger.info("Fetching SDR interest rates") + + df = get_interest_rates_data() + df = clean_data(df) + + return df diff --git a/src/imf_reader/utils.py b/src/imf_reader/utils.py new file mode 100644 index 0000000..0acaa1c --- /dev/null +++ b/src/imf_reader/utils.py @@ -0,0 +1,26 @@ +"""Utility functions""" + +import requests + + +def make_request(url: str) -> requests.models.Response: + """Make a request to a url. + + Args: + url: url to make request to + + Returns: + requests.models.Response: response object + """ + + try: + response = requests.get(url) + if response.status_code != 200: + raise ConnectionError( + f"Could not connect to {url}. Status code: {response.status_code}" + ) + + return response + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") diff --git a/src/imf_reader/weo/scraper.py b/src/imf_reader/weo/scraper.py index cdd76e4..e6db2d5 100644 --- a/src/imf_reader/weo/scraper.py +++ b/src/imf_reader/weo/scraper.py @@ -6,33 +6,11 @@ from zipfile import ZipFile, BadZipFile from imf_reader.config import NoDataError, logger +from imf_reader.utils import make_request BASE_URL = "https://www.imf.org/" -def make_request(url: str) -> requests.models.Response: - """Make a request to a url. - - Args: - url: url to make request to - - Returns: - requests.models.Response: response object - """ - - try: - response = requests.get(url) - if response.status_code != 200: - raise ConnectionError( - f"Could not connect to {url}. Status code: {response.status_code}" - ) - - return response - - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") - - def get_soup(month: str, year: str | int) -> BeautifulSoup: """Get the BeautifulSoup object of the IMF WEO website. diff --git a/tests/test_sdr/test_read_announcements.py b/tests/test_sdr/test_read_announcements.py new file mode 100644 index 0000000..14d3a00 --- /dev/null +++ b/tests/test_sdr/test_read_announcements.py @@ -0,0 +1,102 @@ +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd +from bs4 import BeautifulSoup +from imf_reader.sdr import read_announcements + + +class TestReadAnnouncements(unittest.TestCase): + """Tests functions in the read_announcements module.""" + + @patch("pandas.read_csv") + def test_read_tsv(self, mock_read_csv): + """Ensure read_tsv processes well-formated tsv correctly and raises ValueError on malformed data.""" + # Mock successful TSV read + mock_read_csv.return_value = pd.DataFrame({"A": [1], "B": [2]}) + result = read_announcements.read_tsv("mock_url") + self.assertTrue(isinstance(result, pd.DataFrame)) + + # Mock failure + mock_read_csv.side_effect = pd.errors.ParserError + with self.assertRaises(ValueError): + read_announcements.read_tsv("mock_url") + + def test_clean_df_correct_format(self): + """Test clean_df with the expected format.""" + # Mock input DataFrame + raw_data = pd.DataFrame({0: ["", "", "", "Country A\t$100\t$200"]}) + expected_data = pd.DataFrame( + { + "entity": ["Country A", "Country A"], + "indicator": ["holdings", "allocations"], + "value": [100, 200], + } + ) + + result = read_announcements.clean_df(raw_data) + pd.testing.assert_frame_equal(result, expected_data) + + def test_clean_df_empty(self): + """Test clean_df with an empty DataFrame""" + input_df = pd.DataFrame() + with self.assertRaises(IndexError): + read_announcements.clean_df(input_df) + + def test_format_date(self): + """Test format_date computes last day of a given month/year.""" + self.assertEqual(read_announcements.format_date(2, 2024), "2024-2-29") + self.assertEqual(read_announcements.format_date(1, 2023), "2023-1-31") + + @patch("imf_reader.sdr.read_announcements.read_tsv") + @patch("imf_reader.sdr.read_announcements.clean_df") + def test_get_holdings_and_allocations_data(self, mock_clean_df, mock_read_tsv): + """Test get_holdings_and_allocations_data caches data properly.""" + mock_read_tsv.return_value = pd.DataFrame() + mock_clean_df.return_value = pd.DataFrame({"data": [1]}) + + result = read_announcements.get_holdings_and_allocations_data(2024, 11) + self.assertTrue("data" in result.columns) + + @patch("imf_reader.sdr.read_announcements.make_request") + @patch("bs4.BeautifulSoup") + def test_get_latest_date(self, mock_soup, mock_make_request): + """Test correct extraction of get_latest_date.""" + # Simulate HTML content + html_content = """ + + +
+ + + +
Header
November 30, 2024
+ + + """ + mock_make_request.return_value.content = html_content + mock_soup.return_value = BeautifulSoup(html_content, "html.parser") + + # Call the function + year, month = read_announcements.get_latest_date() + + # Assert expected output + self.assertEqual((year, month), (2024, 11)) + + @patch("imf_reader.sdr.read_announcements.get_latest_date") + @patch("imf_reader.sdr.read_announcements.get_holdings_and_allocations_data") + def test_fetch_allocations_holdings(self, mock_get_data, mock_get_latest_date): + """Ensure fetch_allocations_holdings fetches data for the provided date or the latest date.""" + # Mock latest date and data fetch + mock_get_latest_date.return_value = (2024, 11) + mock_get_data.return_value = pd.DataFrame({"data": [1]}) + + result = read_announcements.fetch_allocations_holdings() + self.assertTrue("data" in result.columns) + + # Test with specific date + result = read_announcements.fetch_allocations_holdings((2023, 10)) + mock_get_data.assert_called_with(2023, 10) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_sdr/test_read_exchange_rate.py b/tests/test_sdr/test_read_exchange_rate.py new file mode 100644 index 0000000..0922fd3 --- /dev/null +++ b/tests/test_sdr/test_read_exchange_rate.py @@ -0,0 +1,210 @@ +from unittest.mock import patch, MagicMock, ANY +import pytest +import requests +import pandas as pd +from imf_reader.sdr.read_exchange_rate import ( + preprocess_data, + fetch_exchange_rates, + get_exchange_rates_data, + extract_exchange_series, + extract_dates_series, + parse_data, + BASE_URL, +) + + +@pytest.fixture +def input_df(): + df = pd.DataFrame( + { + "SDR Valuations": [ + "Report date\tCurrency Unit\tCurrency amount\tExchange Rate", + "2023-11-30\tEuro\t0.456\t-1.234", + "U.S.$1.00 = SDR\t0.123", + "SDR1 = US$\t0.321", + ] + } + ) + return df + + +class TestExchangeRateModule: + + @pytest.fixture(autouse=True) + def clear_cache(self): + """Clear cache before each test.""" + fetch_exchange_rates.cache_clear() + + @patch("requests.post") + def test_get_exchange_rates_data_success(self, mock_post): + """Test successful data retrieval and parsing""" + # Mock the response content with a valid TSV format + mock_response = MagicMock() + mock_response.content = ( + b"Column1\tColumn2\n2023-11-30\t1.234\n2023-12-01\t0.789\n" + ) + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + expected_df = pd.DataFrame( + {"Column1\tColumn2": ["2023-11-30\t1.234", "2023-12-01\t0.789"]} + ) + result = get_exchange_rates_data() + + # Assertions + pd.testing.assert_frame_equal(result, expected_df) + mock_post.assert_called_once_with(BASE_URL, data={"__EVENTTARGET": "lbnTSV"}) + + def test_get_exchange_rates_data_connection_error(self): + """Test ConnectionError is raised when requests.post fails.""" + with patch("requests.post") as mock_post: + # Simulate raising a requests.exceptions.RequestException + mock_post.side_effect = requests.exceptions.RequestException( + "Network error" + ) + + # Verify the exception + with pytest.raises( + ConnectionError, + match=f"Could not connect to {BASE_URL}", + ): + get_exchange_rates_data() + + # Verify the mock was called with the expected arguments + mock_post.assert_called_once_with( + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + + def test_get_exchange_rates_data_parse_error(self): + """Test ValueError is raised when parsing fails.""" + with patch("requests.post") as mock_post, patch( + "pandas.read_csv" + ) as mock_read_csv: + # Mock the response content with invalid data + mock_response = MagicMock() + mock_response.content = b"invalid data" + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + # Simulate pd.read_csv raising a ParserError + mock_read_csv.side_effect = pd.errors.ParserError("Parsing error") + + # Use pytest.raises to assert the ValueError + with pytest.raises(ValueError, match="Could not parse data"): + get_exchange_rates_data() + + # Assertions + mock_post.assert_called_once_with( + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + mock_read_csv.assert_called_once_with(ANY, delimiter="/t", engine="python") + + def test_preprocess_data_success(self, input_df): + """Test preprocessing of the DataFrame""" + + expected_df = pd.DataFrame( + { + "Report date": ["2023-11-30", "U.S.$1.00 = SDR", "SDR1 = US$"], + "Currency Unit": ["Euro", "0.123", "0.321"], + "Currency amount": ["0.456", None, None], + "Exchange Rate": ["-1.234", None, None], + } + ) + expected_df.columns.name = 0 + result = preprocess_data(input_df) + + # Assertion + pd.testing.assert_frame_equal(result, expected_df) + + def test_preprocess_data_missing_column(self): + """Test that KeyError is raised when 'Report date' column is missing.""" + # Create the input DataFrame + input_df = pd.DataFrame( + [ + "Other Column\tCurrency Unit\tCurrency amount\tExchange Rate", + "2023-11-30\tEuro\t0.456\t-1.234", + "U.S.$1.00 = SDR\t0.123", + "SDR1 = US$\t0.321", + ] + ) + + # Assert that KeyError is raised with the correct message + with pytest.raises(KeyError, match="Missing required column: Report date"): + preprocess_data(input_df) + + @pytest.mark.parametrize( + "currency_code, expected_xrate", + [ + ("U.S.$1.00 = SDR", "0.123"), + ("SDR1 = US$", "0.321"), + ], + ) + def test_extract_exchange_series(self, input_df, currency_code, expected_xrate): + """Test extracting the exchange series for a specific column value""" + input_df = preprocess_data(input_df) + result = extract_exchange_series(input_df, currency_code) + expected_series = pd.Series([expected_xrate], name="Currency Unit") + + # Assertion + pd.testing.assert_series_equal(result, expected_series) + + def test_extract_dates_series(self, input_df): + """Test extracting unique dates from the DataFrame""" + preprocessed_df = preprocess_data(input_df) + result = extract_dates_series(preprocessed_df) + expected_series = pd.Series(["2023-11-30"], name="Report date") + + # Assertion + pd.testing.assert_series_equal(result, expected_series) + + @pytest.mark.parametrize( + "currency_code, expected_xrate", + [ + ("USD", 0.123), + ("SDR", 0.321), + ], + ) + def test_parse_data_valid_input(self, input_df, currency_code, expected_xrate): + """Test parsing valid input DataFrame with mocked helpers""" + + expected_df = pd.DataFrame( + {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [expected_xrate]}, + ) + result = parse_data(input_df, currency_code) + + # Assertions + pd.testing.assert_frame_equal(result, expected_df) + assert result.date.dtype == "datetime64[ns]" + assert result.exchange_rate.dtype == "float64" + + def test_parse_data_invalid_unit_basis(self, input_df): + """Test parse_data raises error on invalid unit_basis.""" + # Assert that ValueError is raised when passing an invalid unit_basis + with pytest.raises( + ValueError, match="unit_basis must be either 'SDR' or 'USD'" + ): + parse_data(input_df, "INVALID") + + @patch("imf_reader.sdr.read_exchange_rate.get_exchange_rates_data") + @patch("imf_reader.sdr.read_exchange_rate.parse_data") + def test_fetch_exchange_rates(self, mock_parse_data, mock_get_data, input_df): + """Test fetching exchange rates""" + # Mock return values for the patched functions + mock_get_data.return_value = input_df + mock_get_data.return_value = input_df + mock_parse_data.return_value = pd.DataFrame( + {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [0.123]} + ) + expected_df = pd.DataFrame( + {"date": pd.to_datetime(["2023-11-30"]), "exchange_rate": [0.123]} + ) + + # Mock the logger + with patch("imf_reader.sdr.read_exchange_rate.logger.info") as mock_logger: + result = fetch_exchange_rates("USD") + + # Assertions + mock_get_data.assert_called_once() + mock_parse_data.assert_called_once_with(mock_get_data.return_value, "USD") + pd.testing.assert_frame_equal(result, expected_df) + mock_logger.assert_called_once_with("Fetching exchange rate data") diff --git a/tests/test_sdr/test_read_interest_rate.py b/tests/test_sdr/test_read_interest_rate.py new file mode 100644 index 0000000..06693e1 --- /dev/null +++ b/tests/test_sdr/test_read_interest_rate.py @@ -0,0 +1,250 @@ +import pytest +import pandas as pd +import requests +from unittest.mock import patch, MagicMock, ANY +from io import BytesIO +from imf_reader.sdr.read_interest_rate import ( + BASE_URL, + get_interest_rates_data, + preprocess_data, + _filter_data, + _format_data, + clean_data, + fetch_interest_rates, +) + + +@pytest.fixture +def input_df(): + df = pd.DataFrame( + { + "SDR Interest Rate Calculation": [ + "Effective from\tEffective to\tCurrency Unit\tCurrency amount\tExchange rate", + "01/12/2024\t05/12/2024\tN/A\tN/A", + "SDR Interest Rate\t1.50", + "06/12/2024\t08/12/2024\tN/A\tN/A", + "Total\t2.75", + "09/12/2024\t12/12/2024\tN/A\tN/A", + "Floor for SDR Interest Rate\t3.50", + "empty row", + ] + } + ) + return df + + +class TestReadInterestRate: + + @pytest.fixture(autouse=True) + def clear_cache(self): + """Clear cache before each test.""" + fetch_interest_rates.cache_clear() + + @patch("requests.post") + def test_get_interest_rates_data(self, mock_post): + """Test successful data retrieval and parsing""" + # Mock the response content with a valid TSV format + mock_response = MagicMock() + mock_response.content = ( + b"Column1\tColumn2\n2023-11-30\t1.234\n2023-12-01\t0.789\n" + ) + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + expected_df = pd.DataFrame( + {"Column1\tColumn2": ["2023-11-30\t1.234", "2023-12-01\t0.789"]} + ) + result = get_interest_rates_data() + + # Assertions + pd.testing.assert_frame_equal(result, expected_df) + mock_post.assert_called_once_with(BASE_URL, data={"__EVENTTARGET": "lbnTSV"}) + + def test_get_interest_rates_data_connection_error(self): + """Test ConnectionError is raised when requests.post fails.""" + with patch("requests.post") as mock_post: + # Simulate raising a requests.exceptions.RequestException + mock_post.side_effect = requests.exceptions.RequestException( + "Network error" + ) + + # Verify the exception + with pytest.raises( + ConnectionError, + match=f"Could not connect to {BASE_URL}", + ): + get_interest_rates_data() + + # Verify the mock was called with the expected arguments + mock_post.assert_called_once_with( + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + + def test_get_interest_rates_data_parse_error(self): + """Test ValueError is raised when parsing fails.""" + with patch("requests.post") as mock_post, patch( + "pandas.read_csv" + ) as mock_read_csv: + # Mock the response content with invalid data + mock_response = MagicMock() + mock_response.content = b"invalid data" + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + # Simulate pd.read_csv raising a ParserError + mock_read_csv.side_effect = pd.errors.ParserError("Parsing error") + + # Use pytest.raises to assert the ValueError + with pytest.raises(ValueError, match="Could not parse data"): + get_interest_rates_data() + + # Assertions + mock_post.assert_called_once_with( + BASE_URL, data={"__EVENTTARGET": "lbnTSV"} + ) + mock_read_csv.assert_called_once_with(ANY, delimiter="/t", engine="python") + + def test_preprocess_data_success(self, input_df): + """Test preprocess_data function with valid input.""" + expected_df = pd.DataFrame( + { + "effective_from": [ + "01/12/2024", + "SDR Interest Rate", + "06/12/2024", + "Total", + "09/12/2024", + "Floor for SDR Interest Rate", + ], + "effective_to": [ + "05/12/2024", + "1.50", + "08/12/2024", + "2.75", + "12/12/2024", + "3.50", + ], + } + ) + expected_df.columns.name = 0 + result = preprocess_data(input_df).reset_index(drop=True) + + # Validate the structure and content of the DataFrame + pd.testing.assert_frame_equal(result, expected_df) + + def test_preprocess_data_missing_column(self): + """Test preprocess_data function raises KeyError when required columns are missing.""" + invalid_df = pd.DataFrame( + { + "SDR Interest Rate Calculation": [ + "Some column\tAnother column", + "01/12/2024\tN/A", + ] + } + ) + + with pytest.raises(KeyError, match="Missing required column: Effective from"): + preprocess_data(invalid_df) + + def test_filter_data_valid(self, input_df): + """Test _filter_data with a valid DataFrame.""" + + input_df = preprocess_data(input_df) + result = _filter_data(input_df) + + # Expected Output DataFrame + expected_df = pd.DataFrame( + { + "effective_from": [ + "01/12/2024", + "SDR Interest Rate", + "06/12/2024", + "09/12/2024", + ], + "effective_to": ["05/12/2024", "1.50", "08/12/2024", "12/12/2024"], + } + ) + expected_df.columns.name = 0 + + # Validate the results + pd.testing.assert_frame_equal(result, expected_df) + + def test_format_data_valid(self, input_df): + """Test _format_data with valid input.""" + expected_df = pd.DataFrame( + { + "interest_rate": ["1.50"], + "effective_from": ["01/12/2024"], + "effective_to": ["05/12/2024"], + } + ) + + result = ( + preprocess_data(input_df) + .pipe(_filter_data) + .pipe(_format_data) + .reset_index(drop=True) + ) + + # Validate the structure and content of the DataFrame + pd.testing.assert_frame_equal(result, expected_df) + + def test_clean_data_valid(self, input_df): + """Test clean_data with valid input DataFrame.""" + expected_df = pd.DataFrame( + { + "interest_rate": [1.50], + "effective_from": [ + pd.Timestamp("01/12/2024"), + ], + "effective_to": [ + pd.Timestamp("05/12/2024"), + ], + } + ) + + result = clean_data(input_df).reset_index(drop=True) + + # Validate the structure and content of the resulting DataFrame + pd.testing.assert_frame_equal(result, expected_df) + + @patch("imf_reader.sdr.read_interest_rate.get_interest_rates_data") + @patch("imf_reader.sdr.read_interest_rate.clean_data") + def test_fetch_exchange_rates(self, mock_clean_data, mock_get_data, input_df): + """Test fetching exchange rates""" + # Mock return values for the patched functions + mock_get_data.return_value = input_df + mock_get_data.return_value = input_df + mock_clean_data.return_value = pd.DataFrame( + { + "interest_rate": [1.50], + "effective_from": [ + pd.Timestamp("01/12/2024"), + ], + "effective_to": [ + pd.Timestamp("05/12/2024"), + ], + } + ) + + expected_df = pd.DataFrame( + { + "interest_rate": [1.50], + "effective_from": [ + pd.Timestamp("01/12/2024"), + ], + "effective_to": [ + pd.Timestamp("05/12/2024"), + ], + } + ) + + # Mock the logger + with patch("imf_reader.sdr.read_interest_rate.logger.info") as mock_logger: + result = fetch_interest_rates() + + # Assertions + mock_get_data.assert_called_once() + mock_clean_data.assert_called_once_with(mock_get_data.return_value) + pd.testing.assert_frame_equal(result, expected_df) + mock_logger.assert_called_once_with("Fetching SDR interest rates") diff --git a/tests/test_weo/test_scraper.py b/tests/test_weo/test_scraper.py index 52e9f0a..3ef8841 100644 --- a/tests/test_weo/test_scraper.py +++ b/tests/test_weo/test_scraper.py @@ -2,7 +2,6 @@ import pytest from unittest.mock import patch, Mock -import requests from bs4 import BeautifulSoup import io from zipfile import ZipFile, BadZipFile @@ -14,28 +13,6 @@ TEST_URL = "https://test.com" -def test_make_request(): - """Test make_request""" - - # test successful request - with patch("requests.get") as mock_get: - mock_get.return_value.status_code = 200 - response = scraper.make_request(TEST_URL) - assert response == mock_get.return_value - - # test failed request - with patch("requests.get") as mock_get: - mock_get.side_effect = requests.exceptions.RequestException - with pytest.raises(ConnectionError, match="Could not connect to"): - scraper.make_request(TEST_URL) - - # test when status code is not 200 - with patch("requests.get") as mock_get: - mock_get.return_value.status_code = 404 - with pytest.raises(ConnectionError, match="Could not connect to"): - scraper.make_request(TEST_URL) - - def test_get_soup(): """Test get_soup"""