PolicyEngine · MaxGhenis · Mar 23, 2024 · Mar 20, 2024 · Mar 23, 2024 · Mar 23, 2024
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -1,4 +1,4 @@
-name: Build and test [Python 3.7, 3.8, 3.9]
+name: Build and test [Python 3.9, 3.10, 3.11]
 
 on: [push, pull_request]
 
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: ["3.9", "3.10", "3.11"]
 
     steps:
       - name: Checkout

diff --git a/PSL_catalog.json b/PSL_catalog.json
@@ -1,51 +1,24 @@
 {
-    "project_one_line": {
-        "start_header": null,
-        "end_header": null,
-        "source": null,
-        "type": "html",
-        "data": "<p>microdf is a Python package for analyzing economic microdata as pandas DataFrames, with special functions for Tax-Calculator.</p>"
-    },
-    "project_overview": {
-        "start_header": null,
-        "end_header": null,
-        "source": null,
-        "type": "html",
-        "data": "<a href=\"https://github.com/PSLmodels/microdf\">What is microdf?</a>"
-    },
-    "core_maintainers": {
-        "start_header": null,
-        "end_header": null,
-        "source": null,
-        "type": "html",
-        "data": "<ul><li>Max Ghenis</li><ul><li>email: [email protected]</li></ul>"
-    },
-    "user_documentation": {
-        "start_header": null,
-        "end_header": null,
-        "source": null,
-        "type": "html",
-        "data": "<a href=\"http://pslmodels.github.io/microdf/\"></a>"
-    },
-    "contributor_overview": {
-        "start_header": null,
-        "end_header": null,
-        "source": null,
-        "type": "html",
-        "data": "<a href=\"http://github.com/PSLmodels/microdf/\"></a>"
-    },
-    "user_changelog_recent": {
-        "start_header": null,
-        "end_header": null,
-        "source": null,
-        "type": "html",
-        "data": "<a href=\"https://github.com/PSLmodels/microdf/releases\"></a>"
-    },
-    "link_to_webapp": {
-        "start_header": null,
-        "end_header": null,
-        "source": null,
-        "type": null,
-        "data": null
+    "name": "microdf",
+    "img": "https://github.com/PSLmodels/microdf/blob/master/docs/microdf_logo.png?raw=true",
+    "banner_title": "microdf",
+    "banner_subtitle": "Analysis tools for working with survey microdata as DataFrames",
+    "detailed_description": "microdf is a Python package for analyzing economic microdata as pandas DataFrames, with special functions for handling sampling weights.",
+    "policy_area": "Survey data, data analysis",
+    "geography": "Not specific",
+    "language": "Python",
+    "maintainers": [
+        {
+            "name": "Max Ghenis",
+            "image": "https://policyengine.org/static/media/max-ghenis.536762d4b2439bf591f5.png",
+            "link": "mailto:[email protected]"
+          }
+    ],
+    "links": {
+      "code_repository": "https://github.com/PSLmodels/microdf",
+      "user_documentation": "http://pslmodels.github.io/microdf/",
+      "contributor_documentation": "",
+      "webapp": "",
+      "recent_changes": "https://github.com/PSLmodels/microdf/releases"
     }
 }
diff --git a/microdf/charts.py b/microdf/charts.py
@@ -34,7 +34,7 @@ def quantile_pct_chg_plot(df1, df2, col1, col2, w1=None, w2=None, q=None):
     # Plot.
     fig, ax = plt.subplots()
     markerline, stemlines, baseline = ax.stem(
-        df.index_newline, df.pct_chg, use_line_collection=True
+        df.index_newline, df.pct_chg
     )
     plt.setp(baseline, color="gray", linewidth=0)
     ax.yaxis.set_major_locator(mpl.ticker.MaxNLocator(integer=True))

diff --git a/microdf/generic.py b/microdf/generic.py
@@ -661,7 +661,9 @@ def equals(self, other) -> bool:
 
     @get_args_as_micro_series()
     def groupby(self, by: Union[str, list], *args, **kwargs):
-        """Returns a GroupBy object with MicroSeriesGroupBy objects for each column
+        """
+        Returns a GroupBy object with MicroSeriesGroupBy objects for
+        each column
 
         :param by: column to group by
         :type by: Union[str, list]
@@ -766,7 +768,9 @@ def poverty_count(
         income: Union[MicroSeries, str],
         threshold: Union[MicroSeries, str],
     ) -> int:
-        """Calculates the number of entities with income below a poverty threshold.
+        """
+        Calculates the number of entities with income below a poverty
+        threshold.
 
         :param income: income array or column name
         :type income: Union[MicroSeries, str]

diff --git a/microdf/io.py b/microdf/io.py
@@ -1,9 +1,15 @@
 import io
 import zipfile
-from urllib.request import urlopen
-
+import requests
 import pandas as pd
 
+HEADER = {
+    "User-Agent":
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) " +
+    "AppleWebKit/537.36 (KHTML, like Gecko) " +
+    "Chrome/50.0.2661.102 Safari/537.36"
+    }
+
 
 def read_stata_zip(url: str, **kwargs) -> pd.DataFrame:
     """Reads zipped Stata file by URL.
@@ -19,8 +25,8 @@ def read_stata_zip(url: str, **kwargs) -> pd.DataFrame:
     :returns: DataFrame.
 
     """
-    with urlopen(url) as request:
-        data = io.BytesIO(request.read())
+    r = requests.get(url, headers=HEADER)
+    data = io.BytesIO(r.content)
     with zipfile.ZipFile(data) as archive:
         with archive.open(archive.namelist()[0]) as stata:
             return pd.read_stata(stata, **kwargs)
diff --git a/microdf/tests/test_compare.py b/microdf/tests/test_compare.py
@@ -14,7 +14,7 @@ def differences(actual, expected, f_actual, f_expected):
     :param f_actual: Filename of the actual CSV.
     :param f_expected: Filename of the expected CSV.
     """
-    if not actual.equals(expected):
+    if not np.allclose(actual, expected):
         msg = "COMPARE RESULTS DIFFER\n"
         msg += "-------------------------------------------------\n"
         msg += "--- NEW RESULTS IN {} FILE ---\n"