Skip to content

Commit

Permalink
Merge pull request #4 from ONEcampaign/v1-dev
Browse files Browse the repository at this point in the history
V1 dev
  • Loading branch information
jm-rivera authored Jun 6, 2024
2 parents bcb41bf + e723259 commit 689963c
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 11 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ be either "April" or "October".
df = weo.fetch_data(version=("April", 2020))
```

If the version of the data fetched is needed, it can be
retrieved from the function attribute `last_version_fetched`.

```python
df = weo.fetch_data()
print(weo.fetch_data.last_version_fetched)
# >>> ('April', 2024) or whichever version was just fetched
```


Caching is used to avoid multiple requests to the IMF website for the same data and to enhance performance.
Caching using the LRU (Least Recently Used) algorithm approach and stores data in RAM. The cache is cleared when the program is terminated.
To clear the cache manually, use the `clear_cache` function.
Expand Down
21 changes: 10 additions & 11 deletions src/imf_reader/weo/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
}

# numeric columns and the type to convert them to
SDMX_NUMERIC_COLUMNS = [
"REF_AREA_CODE",
"OBS_VALUE",
"SCALE_CODE",
"LASTACTUALDATE",
"TIME_PERIOD",
]
SDMX_NUMERIC_COLUMNS = {
"REF_AREA_CODE": "Int16",
"OBS_VALUE": "Float64",
"SCALE_CODE": "Int16",
"LASTACTUALDATE": "Int16",
"TIME_PERIOD": "Int16",
}


class SDMXParser:
Expand Down Expand Up @@ -122,13 +122,12 @@ def check_folder(sdmx_folder: ZipFile) -> None:
def clean_numeric_columns(df: pd.DataFrame) -> pd.DataFrame:
"""Cleans the numeric columns
Replaces "n/a" and "--" with pd.NA and converts the columns to numeric.
Replaces "n/a" and "--" with pd.NA and converts the columns to numeric and the correct type.
"""

df[SDMX_NUMERIC_COLUMNS] = (
df[SDMX_NUMERIC_COLUMNS].replace(["n/a", "--"], pd.NA).apply(pd.to_numeric)
)
for column, dtype in SDMX_NUMERIC_COLUMNS.items():
df[column] = df[column].replace(["n/a", "--"], pd.NA).astype(dtype)

return df

Expand Down
3 changes: 3 additions & 0 deletions src/imf_reader/weo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ def fetch_data(version: Optional[Version] = None) -> pd.DataFrame:
version = validate_version(version)
df = _fetch(version)
logger.info(f"Data fetched successfully for version {version[0]} {version[1]}")
fetch_data.last_version_fetched = (
version # store the version fetched as function attribute
)
return df

# if no version is passed, generate the latest version and fetch the data
Expand Down
18 changes: 18 additions & 0 deletions tests/test_weo/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,24 @@ def test_fetch_data(mock_fetch):
mock_fetch.assert_called_with(reader.gen_latest_version())


@patch("imf_reader.weo.reader.gen_latest_version")
@patch("imf_reader.weo.reader._fetch")
def test_fetch_data_attribute(mock_fetch, mock_gen_latest_version):
"""Test for fetch_data method attribute."""

mock_data = pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]})
mock_fetch.return_value = mock_data
mock_gen_latest_version.return_value = ("April", 2024)

# when a version is passed, check that the attribute is set
reader.fetch_data(("April", 2022))
assert reader.fetch_data.last_version_fetched == ("April", 2022)

# when no version is passed, check that the attribute is set
reader.fetch_data()
assert reader.fetch_data.last_version_fetched == ("April", 2024)


@patch("imf_reader.weo.reader._fetch.cache_clear")
def test_clear_cache(mock_cache_clear):
"""Test for clear_cache method."""
Expand Down

0 comments on commit 689963c

Please sign in to comment.