diff --git a/src/imf_reader/sdr/__init__.py b/src/imf_reader/sdr/__init__.py new file mode 100644 index 0000000..87113c4 --- /dev/null +++ b/src/imf_reader/sdr/__init__.py @@ -0,0 +1,52 @@ +"""Special Drawing Rights (SDR) reader module. + +This module offers access to the IMF's Special Drawing Rights (SDR) data. +The SDR is an international reserve asset created by the IMF in 1969. +It is not a currency, but the holder of SDRs can exchange them for usable currencies in times of need. + +Read more about SDRs at: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + + +Usage: + +Import the module + +```python +from imf_reader import sdr +``` + +Read allocations and holdings data + +```python +sdr.fetch_allocations_holdings() +``` +SDRs holdings and allocations are published at a monthly frequency. The function fetches the latest data available. + +To retrieve SDR holdings and allocations for a specific month and year, eg April 2021, pass the year and month as a tuple + +```python +sdr.fetch_allocations_holdings((2021, 4)) +``` + +Read interest rates + +```python +sdr.fetch_interest_rates() +``` + +Read exchange rates + +```python +sdr.fetch_exchange_rates() +``` +By default, the exchange rate is in USDs per 1 SDR. To get the exchange rate in SDRs per 1 USD, pass the unit basis as "USD" + +```python +sdr.fetch_exchange_rates("USD") +``` + +""" + +from imf_reader.sdr.read_interest_rate import fetch_interest_rates +from imf_reader.sdr.read_exchange_rate import fetch_exchange_rates +from imf_reader.sdr.read_announcements import fetch_allocations_holdings diff --git a/src/imf_reader/sdr/read_announcements.py b/src/imf_reader/sdr/read_announcements.py new file mode 100644 index 0000000..113acb9 --- /dev/null +++ b/src/imf_reader/sdr/read_announcements.py @@ -0,0 +1,103 @@ +"""Module to get SDR data from the IMF website + + +info: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + +""" + +from functools import lru_cache +import pandas as pd +import calendar +from bs4 import BeautifulSoup +from datetime import datetime + +from imf_reader.utils import make_request +from imf_reader.config import logger, NoDataError + +BASE_URL = "https://www.imf.org/external/np/fin/tad/" +MAIN_PAGE_URL = "https://www.imf.org/external/np/fin/tad/extsdr1.aspx" + + +def read_tsv(url: str) -> pd.DataFrame: + """Read a tsv file from a url and return a dataframe""" + + try: + return pd.read_csv(url, delimiter="/t", engine="python") + + except pd.errors.ParserError: + raise ValueError("SDR _data not available for this date") + + +def clean_df(df: pd.DataFrame) -> pd.DataFrame: + """Clean the SDR dataframe""" + + df = df.iloc[3:, 0].str.split("\t", expand=True) + df.columns = ["entity", "holdings", "allocations"] + + return df.assign( + holdings=lambda d: pd.to_numeric( + d.holdings.str.replace(r"[^\d.]", "", regex=True), errors="coerce" + ), + allocations=lambda d: pd.to_numeric( + d.allocations.str.replace(r"[^\d.]", "", regex=True), errors="coerce" + ), + ).melt( + id_vars="entity", value_vars=["holdings", "allocations"], var_name="indicator" + ) + + +def format_date(month: int, year: int) -> str: + """Return a date as year-month-day where day is the last day in the month""" + + last_day = calendar.monthrange(year, month)[1] + return f"{year}-{month}-{last_day}" + + +@lru_cache +def get_holdings_and_allocations_data(year: int, month: int): + """Get sdr allocations and holdings data for a given month and year""" + + date = format_date(month, year) + url = f"{BASE_URL}extsdr2.aspx?date1key={date}&tsvflag=Y" + + logger.info(f"Fetching SDR data for date: {date}") + + df = read_tsv(url) + df = clean_df(df) + df["date"] = pd.to_datetime(date) + + return df + + +@lru_cache +def get_latest_date() -> tuple[int, int]: + """Get the latest date for which SDR data is available""" + + logger.info("Fetching latest date") + + response = make_request(MAIN_PAGE_URL) + soup = BeautifulSoup(response.content, "html.parser") + table = soup.find_all("table")[4] + row = table.find_all("tr")[1] + + date = row.td.text.strip() + date = datetime.strptime(date, "%B %d, %Y") + + # Extract the year and month as a tuple + return date.year, date.month + + +def fetch_allocations_holdings(date: tuple[int, int] | None = None) -> pd.DataFrame: + """Fetch SDR holdings and allocations data for a given date + + Args: + date: A tuple of year and month e.g (2024, 11). If None, the latest data announcements released are fetched + + returns: + pd.DataFrame: A dataframe with the SDR data + """ + + if date is None: + date = get_latest_date() + + return get_holdings_and_allocations_data(*date) diff --git a/src/imf_reader/sdr/read_exchange_rate.py b/src/imf_reader/sdr/read_exchange_rate.py new file mode 100644 index 0000000..23a8eb7 --- /dev/null +++ b/src/imf_reader/sdr/read_exchange_rate.py @@ -0,0 +1,121 @@ +"""Module to read exchange rate data from the IMF's Special Drawing Rights (SDR) Valuation dataset. + +Read about SDR valuation at: https://www.imf.org/external/np/fin/data/rms_sdrv.aspx +""" + +import requests +import pandas as pd +import io +from functools import lru_cache +from typing import Literal + +from imf_reader.config import logger + + +BASE_URL = "https://www.imf.org/external/np/fin/data/rms_sdrv.aspx" + + +def get_exchange_rates_data(): + """Read the data from the IMF website""" + + data = { + "__EVENTTARGET": "lbnTSV", + } + + try: + response = requests.post(BASE_URL, data=data) + response.raise_for_status() + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") + + try: + return pd.read_csv( + io.BytesIO(response.content), delimiter="/t", engine="python" + ) + + except pd.errors.ParserError as e: + raise ValueError(f"Could not parse data. Error: {str(e)}") + + +def preprocess_data(df: pd.DataFrame): + """ + Preprocess the input DataFrame by splitting columns and setting headers. + """ + df = df.iloc[:, 0].str.split("\t", expand=True) + df.columns = df.iloc[0] + df = df.iloc[1:].reset_index(drop=True) + + # Ensure required columns are present + required_columns = ["Report date"] + for column in required_columns: + if column not in df.columns: + raise KeyError(f"Missing required column: {column}") + + return df + + +def extract_exchange_series(df: pd.DataFrame, col_val: str): + """ + Extract the exchange rate series for the given column value. + """ + return ( + df.loc[lambda d: d["Report date"] == col_val].iloc[:, 1].reset_index(drop=True) + ) + + +def extract_dates_series(df: pd.DataFrame): + """ + Extract the dates series from the DataFrame. + """ + return ( + df.dropna(subset=df.columns[3]) + .iloc[:, 0] + .drop_duplicates() + .reset_index(drop=True) + ) + + +def parse_data(df: pd.DataFrame, unit_basis: Literal["SDR", "USD"]): + """Parse the data from the IMF website""" + + # Validate unit basis + if unit_basis == "USD": + col_val = "U.S.$1.00 = SDR" + elif unit_basis == "SDR": + col_val = "SDR1 = US$" + else: + raise ValueError("unit_basis must be either 'SDR' or 'USD'") + + # Preprocess dataframe and extract relevant columns + df = preprocess_data(df) + exchange_series = extract_exchange_series(df, col_val) + dates_series = extract_dates_series(df) + + return pd.DataFrame( + {"date": dates_series, "exchange_rate": exchange_series} + ).assign( + date=lambda d: pd.to_datetime(d.date), + exchange_rate=lambda d: pd.to_numeric(d.exchange_rate, errors="coerce"), + ) + + +@lru_cache +def fetch_exchange_rates(unit_basis: Literal["SDR", "USD"] = "SDR") -> pd.DataFrame: + """Fetch the historic SDR exchange rates from the IMF + + The currency value of the SDR is determined by summing the values in U.S. dollars, based on market exchange rates, of a basket of major currencies (the U.S. dollar, Euro, Japanese yen, pound sterling and the Chinese renminbi). The SDR currency value is calculated daily except on IMF holidays, or whenever the IMF is closed for business, or on an ad-hoc basis to facilitate unscheduled IMF operations. The SDR valuation basket is reviewed and adjusted every five years. + + Read more at: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr + + Args: + unit_basis: The unit basis for the exchange rate. Default is "SDR" i.e. 1 SDR in USD. Other option is "USD" i.e. 1 USD in SDR + + Returns: + A DataFrame with the exchange rate data + """ + + logger.info("Fetching exchange rate data") + + df = get_exchange_rates_data() + return parse_data(df, unit_basis) diff --git a/src/imf_reader/sdr/read_interest_rate.py b/src/imf_reader/sdr/read_interest_rate.py new file mode 100644 index 0000000..7254f9b --- /dev/null +++ b/src/imf_reader/sdr/read_interest_rate.py @@ -0,0 +1,126 @@ +"""Module to read SDR interest and exchange rates from the IMF website + +""" + +import requests +import pandas as pd +import io +from functools import lru_cache + +from imf_reader.config import logger + + +BASE_URL: str = "https://www.imf.org/external/np/fin/data/sdr_ir.aspx" + + +def get_interest_rates_data(): + """Read the data from the IMF website""" + + data = { + "__EVENTTARGET": "lbnTSV", + } + + try: + response = requests.post(BASE_URL, data=data) + response.raise_for_status() + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {BASE_URL}. Error: {str(e)}") + + try: + return pd.read_csv( + io.BytesIO(response.content), delimiter="/t", engine="python" + ) + + except pd.errors.ParserError as e: + raise ValueError(f"Could not parse data. Error: {str(e)}") + + +def preprocess_data(df: pd.DataFrame): + """ + Preprocess the input DataFrame by splitting columns and setting headers. + """ + df = df.iloc[:, 0].str.split("\t", expand=True) + df.columns = df.iloc[0] + df = df.iloc[1:] + + # Ensure required columns are present + columns = { + "Effective from": "effective_from", + "Effective to": "effective_to", + } + for column in columns: + if column not in df.columns: + raise KeyError(f"Missing required column: {column}") + + return ( + df.rename(columns=columns) + .loc[:, columns.values()] + .dropna(subset=["effective_to"]) + ) + + +def _filter_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Filter the DataFrame to separate rows for dates and SDR interest rates. + """ + return df.loc[ + lambda d: ~d["effective_from"].isin(["Total", "Floor for SDR Interest Rate"]) + ].reset_index(drop=True) + + +def _format_data(df: pd.DataFrame) -> pd.DataFrame: + """ + Format the filtered DataFrame into a clean DataFrame with interest rates and dates. + """ + dates_df = ( + df.loc[lambda d: d["effective_from"] != "SDR Interest Rate"] + .drop_duplicates() + .reset_index(drop=True) + ) + + sdr_df = ( + df.loc[lambda d: d["effective_from"] == "SDR Interest Rate"] + .iloc[:, 1:2] + .reset_index(drop=True) + ) + + sdr_df.columns = ["interest_rate"] + + return sdr_df.join(dates_df) + + +def clean_data(df: pd.DataFrame) -> pd.DataFrame: + """Cleaning/parsing steps for the data. split tab separated value into separate columns, rename columns, assign types, and additional formatting""" + + df = preprocess_data(df) + return ( + df.pipe(_filter_data) + .pipe(_format_data) + .assign( + interest_rate=lambda d: pd.to_numeric(d.interest_rate, errors="coerce"), + effective_from=lambda d: pd.to_datetime(d.effective_from), + effective_to=lambda d: pd.to_datetime(d.effective_to), + ) + ) + + +@lru_cache +def fetch_interest_rates() -> pd.DataFrame: + """Fetch the historic SDR interest rates from the IMF + + The SDR interest rate is based on the sum of the multiplicative products in SDR terms of the currency + amounts in the SDR valuation basket, the level of the interest rate on the financial + instrument of each component currency in the basket, and the exchange rate of each currency + against the SDR. The SDR interest rate for the current week is released on Sunday morning, Washington D.C. time. + + returns: + A DataFrame with the historical SDR interest rates + """ + + logger.info("Fetching SDR interest rates") + + df = get_interest_rates_data() + df = clean_data(df) + + return df diff --git a/src/imf_reader/utils.py b/src/imf_reader/utils.py new file mode 100644 index 0000000..0acaa1c --- /dev/null +++ b/src/imf_reader/utils.py @@ -0,0 +1,26 @@ +"""Utility functions""" + +import requests + + +def make_request(url: str) -> requests.models.Response: + """Make a request to a url. + + Args: + url: url to make request to + + Returns: + requests.models.Response: response object + """ + + try: + response = requests.get(url) + if response.status_code != 200: + raise ConnectionError( + f"Could not connect to {url}. Status code: {response.status_code}" + ) + + return response + + except requests.exceptions.RequestException as e: + raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") diff --git a/src/imf_reader/weo/scraper.py b/src/imf_reader/weo/scraper.py index cdd76e4..e6db2d5 100644 --- a/src/imf_reader/weo/scraper.py +++ b/src/imf_reader/weo/scraper.py @@ -6,33 +6,11 @@ from zipfile import ZipFile, BadZipFile from imf_reader.config import NoDataError, logger +from imf_reader.utils import make_request BASE_URL = "https://www.imf.org/" -def make_request(url: str) -> requests.models.Response: - """Make a request to a url. - - Args: - url: url to make request to - - Returns: - requests.models.Response: response object - """ - - try: - response = requests.get(url) - if response.status_code != 200: - raise ConnectionError( - f"Could not connect to {url}. Status code: {response.status_code}" - ) - - return response - - except requests.exceptions.RequestException as e: - raise ConnectionError(f"Could not connect to {url}. Error: {str(e)}") - - def get_soup(month: str, year: str | int) -> BeautifulSoup: """Get the BeautifulSoup object of the IMF WEO website. diff --git a/tests/test_sdr/test_read_announcements.py b/tests/test_sdr/test_read_announcements.py new file mode 100644 index 0000000..14d3a00 --- /dev/null +++ b/tests/test_sdr/test_read_announcements.py @@ -0,0 +1,102 @@ +import unittest +from unittest.mock import patch, MagicMock +import pandas as pd +from bs4 import BeautifulSoup +from imf_reader.sdr import read_announcements + + +class TestReadAnnouncements(unittest.TestCase): + """Tests functions in the read_announcements module.""" + + @patch("pandas.read_csv") + def test_read_tsv(self, mock_read_csv): + """Ensure read_tsv processes well-formated tsv correctly and raises ValueError on malformed data.""" + # Mock successful TSV read + mock_read_csv.return_value = pd.DataFrame({"A": [1], "B": [2]}) + result = read_announcements.read_tsv("mock_url") + self.assertTrue(isinstance(result, pd.DataFrame)) + + # Mock failure + mock_read_csv.side_effect = pd.errors.ParserError + with self.assertRaises(ValueError): + read_announcements.read_tsv("mock_url") + + def test_clean_df_correct_format(self): + """Test clean_df with the expected format.""" + # Mock input DataFrame + raw_data = pd.DataFrame({0: ["", "", "", "Country A\t$100\t$200"]}) + expected_data = pd.DataFrame( + { + "entity": ["Country A", "Country A"], + "indicator": ["holdings", "allocations"], + "value": [100, 200], + } + ) + + result = read_announcements.clean_df(raw_data) + pd.testing.assert_frame_equal(result, expected_data) + + def test_clean_df_empty(self): + """Test clean_df with an empty DataFrame""" + input_df = pd.DataFrame() + with self.assertRaises(IndexError): + read_announcements.clean_df(input_df) + + def test_format_date(self): + """Test format_date computes last day of a given month/year.""" + self.assertEqual(read_announcements.format_date(2, 2024), "2024-2-29") + self.assertEqual(read_announcements.format_date(1, 2023), "2023-1-31") + + @patch("imf_reader.sdr.read_announcements.read_tsv") + @patch("imf_reader.sdr.read_announcements.clean_df") + def test_get_holdings_and_allocations_data(self, mock_clean_df, mock_read_tsv): + """Test get_holdings_and_allocations_data caches data properly.""" + mock_read_tsv.return_value = pd.DataFrame() + mock_clean_df.return_value = pd.DataFrame({"data": [1]}) + + result = read_announcements.get_holdings_and_allocations_data(2024, 11) + self.assertTrue("data" in result.columns) + + @patch("imf_reader.sdr.read_announcements.make_request") + @patch("bs4.BeautifulSoup") + def test_get_latest_date(self, mock_soup, mock_make_request): + """Test correct extraction of get_latest_date.""" + # Simulate HTML content + html_content = """ + +
+Header |
November 30, 2024 |