diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index daeab97..6593236 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1,4 +1,4 @@
-name: Build and test [Python 3.7, 3.8, 3.9]
+name: Build and test [Python 3.9, 3.10, 3.11]
on: [push, pull_request]
@@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: [3.7, 3.8, 3.9]
+ python-version: ["3.9", "3.10", "3.11"]
steps:
- name: Checkout
diff --git a/PSL_catalog.json b/PSL_catalog.json
index d5bc007..f42eb6e 100644
--- a/PSL_catalog.json
+++ b/PSL_catalog.json
@@ -1,51 +1,24 @@
{
- "project_one_line": {
- "start_header": null,
- "end_header": null,
- "source": null,
- "type": "html",
- "data": "
microdf is a Python package for analyzing economic microdata as pandas DataFrames, with special functions for Tax-Calculator.
"
- },
- "project_overview": {
- "start_header": null,
- "end_header": null,
- "source": null,
- "type": "html",
- "data": "What is microdf?"
- },
- "core_maintainers": {
- "start_header": null,
- "end_header": null,
- "source": null,
- "type": "html",
- "data": "- Max Ghenis
"
- },
- "user_documentation": {
- "start_header": null,
- "end_header": null,
- "source": null,
- "type": "html",
- "data": ""
- },
- "contributor_overview": {
- "start_header": null,
- "end_header": null,
- "source": null,
- "type": "html",
- "data": ""
- },
- "user_changelog_recent": {
- "start_header": null,
- "end_header": null,
- "source": null,
- "type": "html",
- "data": ""
- },
- "link_to_webapp": {
- "start_header": null,
- "end_header": null,
- "source": null,
- "type": null,
- "data": null
+ "name": "microdf",
+ "img": "https://github.com/PSLmodels/microdf/blob/master/docs/microdf_logo.png?raw=true",
+ "banner_title": "microdf",
+ "banner_subtitle": "Analysis tools for working with survey microdata as DataFrames",
+ "detailed_description": "microdf is a Python package for analyzing economic microdata as pandas DataFrames, with special functions for handling sampling weights.",
+ "policy_area": "Survey data, data analysis",
+ "geography": "Not specific",
+ "language": "Python",
+ "maintainers": [
+ {
+ "name": "Max Ghenis",
+ "image": "https://policyengine.org/static/media/max-ghenis.536762d4b2439bf591f5.png",
+ "link": "mailto:max@policyengine.org"
+ }
+ ],
+ "links": {
+ "code_repository": "https://github.com/PSLmodels/microdf",
+ "user_documentation": "http://pslmodels.github.io/microdf/",
+ "contributor_documentation": "",
+ "webapp": "",
+ "recent_changes": "https://github.com/PSLmodels/microdf/releases"
}
}
diff --git a/microdf/charts.py b/microdf/charts.py
index 9d64ba4..cab747e 100644
--- a/microdf/charts.py
+++ b/microdf/charts.py
@@ -34,7 +34,7 @@ def quantile_pct_chg_plot(df1, df2, col1, col2, w1=None, w2=None, q=None):
# Plot.
fig, ax = plt.subplots()
markerline, stemlines, baseline = ax.stem(
- df.index_newline, df.pct_chg, use_line_collection=True
+ df.index_newline, df.pct_chg
)
plt.setp(baseline, color="gray", linewidth=0)
ax.yaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))
diff --git a/microdf/generic.py b/microdf/generic.py
index a22e2b5..86149b7 100644
--- a/microdf/generic.py
+++ b/microdf/generic.py
@@ -661,7 +661,9 @@ def equals(self, other) -> bool:
@get_args_as_micro_series()
def groupby(self, by: Union[str, list], *args, **kwargs):
- """Returns a GroupBy object with MicroSeriesGroupBy objects for each column
+ """
+ Returns a GroupBy object with MicroSeriesGroupBy objects for
+ each column
:param by: column to group by
:type by: Union[str, list]
@@ -766,7 +768,9 @@ def poverty_count(
income: Union[MicroSeries, str],
threshold: Union[MicroSeries, str],
) -> int:
- """Calculates the number of entities with income below a poverty threshold.
+ """
+ Calculates the number of entities with income below a poverty
+ threshold.
:param income: income array or column name
:type income: Union[MicroSeries, str]
diff --git a/microdf/io.py b/microdf/io.py
index e8573e6..a7d5d67 100644
--- a/microdf/io.py
+++ b/microdf/io.py
@@ -1,9 +1,15 @@
import io
import zipfile
-from urllib.request import urlopen
-
+import requests
import pandas as pd
+HEADER = {
+ "User-Agent":
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) " +
+ "AppleWebKit/537.36 (KHTML, like Gecko) " +
+ "Chrome/50.0.2661.102 Safari/537.36"
+ }
+
def read_stata_zip(url: str, **kwargs) -> pd.DataFrame:
"""Reads zipped Stata file by URL.
@@ -19,8 +25,8 @@ def read_stata_zip(url: str, **kwargs) -> pd.DataFrame:
:returns: DataFrame.
"""
- with urlopen(url) as request:
- data = io.BytesIO(request.read())
+ r = requests.get(url, headers=HEADER)
+ data = io.BytesIO(r.content)
with zipfile.ZipFile(data) as archive:
with archive.open(archive.namelist()[0]) as stata:
return pd.read_stata(stata, **kwargs)
diff --git a/microdf/tests/test_compare.py b/microdf/tests/test_compare.py
index c5d1235..814ca25 100644
--- a/microdf/tests/test_compare.py
+++ b/microdf/tests/test_compare.py
@@ -14,7 +14,7 @@ def differences(actual, expected, f_actual, f_expected):
:param f_actual: Filename of the actual CSV.
:param f_expected: Filename of the expected CSV.
"""
- if not actual.equals(expected):
+ if not np.allclose(actual, expected):
msg = "COMPARE RESULTS DIFFER\n"
msg += "-------------------------------------------------\n"
msg += "--- NEW RESULTS IN {} FILE ---\n"