fix: Specify encoding as utf-8 to enforce PEP 597 (#2007)

* Explicitly specify the encoding as utf-8 while opening a file to enforce PEP 597. This is future-proofing work to some degree as Python 3.15+ will make utf-8 the default. - c.f. https://peps.python.org/pep-0597/ * Add the flake8-encodings pre-commit hook to enforce PEP 597.
scikit-hep · Sep 15, 2022 · 8c5d930 · 8c5d930
1 parent 511d8a9
commit 8c5d930
Show file tree

Hide file tree

Showing 32 changed files with 230 additions and 111 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -58,6 +58,7 @@ repos:
     hooks:
     - id: flake8
       args: ["--count", "--statistics"]
+      additional_dependencies: [flake8-encodings==0.5.0.post1]
 
 -   repo: https://github.com/pre-commit/mirrors-mypy
     rev: v0.971

diff --git a/docs/development.rst b/docs/development.rst
@@ -52,7 +52,7 @@ available by the ``datadir`` fixture. Therefore, one can do:
 .. code-block:: python
 
     def test_patchset(datadir):
-        data_file = open(datadir.join("test.txt"))
+        data_file = open(datadir.join("test.txt"), encoding="utf-8")
         ...
 
 which will load the copy of ``text.txt`` in the temporary directory. This also

diff --git a/docs/examples/notebooks/ImpactPlot.ipynb b/docs/examples/notebooks/ImpactPlot.ipynb
@@ -76,9 +76,13 @@
    "outputs": [],
    "source": [
     "def make_model(channel_list):\n",
-    "    spec = json.load(open(\"1Lbb-probability-models/RegionA/BkgOnly.json\"))\n",
+    "    spec = json.load(\n",
+    "        open(\"1Lbb-probability-models/RegionA/BkgOnly.json\", encoding=\"utf-8\")\n",
+    "    )\n",
     "    patchset = pyhf.PatchSet(\n",
-    "        json.load(open(\"1Lbb-probability-models/RegionA/patchset.json\"))\n",
+    "        json.load(\n",
+    "            open(\"1Lbb-probability-models/RegionA/patchset.json\", encoding=\"utf-8\")\n",
+    "        )\n",
     "    )\n",
     "    patch = patchset[\"sbottom_750_745_60\"]\n",
     "    spec = jsonpatch.apply_patch(spec, patch)\n",

diff --git a/docs/examples/notebooks/multiBinPois.ipynb b/docs/examples/notebooks/multiBinPois.ipynb
@@ -85,7 +85,7 @@
     }
    ],
    "source": [
-    "source = json.load(open(validation_datadir + '/1bin_example1.json'))\n",
+    "source = json.load(open(validation_datadir + \"/1bin_example1.json\", encoding=\"utf-8\"))\n",
     "model = uncorrelated_background(\n",
     "    source['bindata']['sig'], source['bindata']['bkg'], source['bindata']['bkgerr']\n",
     ")\n",

diff --git a/docs/examples/notebooks/multichannel-coupled-histo.ipynb b/docs/examples/notebooks/multichannel-coupled-histo.ipynb
@@ -165,7 +165,9 @@
     }
    ],
    "source": [
-    "with open(validation_datadir + \"/2bin_2channel_coupledhisto.json\") as spec:\n",
+    "with open(\n",
+    "    validation_datadir + \"/2bin_2channel_coupledhisto.json\", encoding=\"utf-8\"\n",
+    ") as spec:\n",
     "    source = json.load(spec)\n",
     "\n",
     "data, pdf = prep_data(source[\"channels\"])\n",

diff --git a/docs/examples/notebooks/pullplot.ipynb b/docs/examples/notebooks/pullplot.ipynb
@@ -72,7 +72,10 @@
    "outputs": [],
    "source": [
     "def make_model(channel_list):\n",
-    "    spec = json.load(open(\"1Lbb-probability-models/RegionA/BkgOnly.json\"))\n",
+    "    with open(\n",
+    "        \"1Lbb-probability-models/RegionA/BkgOnly.json\", encoding=\"utf-8\"\n",
+    "    ) as spec_file:\n",
+    "        spec = json.load(spec_file)\n",
     "    spec[\"channels\"] = [c for c in spec[\"channels\"] if c[\"name\"] in channel_list]\n",
     "    spec[\"measurements\"][0][\"config\"][\"poi\"] = \"lumi\"\n",
     "\n",

diff --git a/docs/likelihood.rst b/docs/likelihood.rst
@@ -28,7 +28,8 @@ check that it conforms to the provided workspace specification as follows:
 
    import json, requests, jsonschema
 
-   workspace = json.load(open("/path/to/analysis_workspace.json"))
+   with open("/path/to/analysis_workspace.json", encoding="utf-8") as ws_file:
+       workspace = json.load(ws_file)
    # if no exception is raised, it found and parsed the schema
    schema = requests.get("https://scikit-hep.org/pyhf/schemas/1.0.0/workspace.json").json()
    # If no exception is raised by validate(), the instance is valid.

diff --git a/src/pyhf/cli/infer.py b/src/pyhf/cli/infer.py
@@ -99,10 +99,13 @@ def fit(
         )
         set_backend(tensorlib, new_optimizer(**optconf))
 
-    with click.open_file(workspace, "r") as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
     ws = Workspace(spec)
-    patches = [json.loads(click.open_file(pfile, "r").read()) for pfile in patch]
+    patches = [
+        json.loads(click.open_file(pfile, "r", encoding="utf-8").read())
+        for pfile in patch
+    ]
 
     model = ws.model(
         measurement_name=measurement,
@@ -125,7 +128,7 @@ def fit(
     if output_file is None:
         click.echo(json.dumps(result, indent=4, sort_keys=True))
     else:
-        with open(output_file, "w+") as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(result, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
 
@@ -190,12 +193,15 @@ def cls(
             "CLs_obs": 0.3599845631401915
         }
     """
-    with click.open_file(workspace, 'r') as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     ws = Workspace(spec)
 
-    patches = [json.loads(click.open_file(pfile, 'r').read()) for pfile in patch]
+    patches = [
+        json.loads(click.open_file(pfile, "r", encoding="utf-8").read())
+        for pfile in patch
+    ]
     model = ws.model(
         measurement_name=measurement,
         patches=patches,
@@ -241,6 +247,6 @@ def cls(
     if output_file is None:
         click.echo(json.dumps(result, indent=4, sort_keys=True))
     else:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(result, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
diff --git a/src/pyhf/cli/patchset.py b/src/pyhf/cli/patchset.py
@@ -39,7 +39,7 @@ def extract(patchset, name, output_file, with_metadata):
     Returns:
         jsonpatch (:obj:`list`): A list of jsonpatch operations to apply to a workspace.
     """
-    with click.open_file(patchset, 'r') as fstream:
+    with click.open_file(patchset, "r", encoding="utf-8") as fstream:
         patchset_spec = json.load(fstream)
 
     patchset = PatchSet(patchset_spec)
@@ -52,7 +52,7 @@ def extract(patchset, name, output_file, with_metadata):
         result = patch.patch
 
     if output_file:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w", encoding="utf-8") as out_file:
             json.dump(result, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
     else:
@@ -79,19 +79,19 @@ def apply(background_only, patchset, name, output_file):
     Returns:
         workspace (:class:`~pyhf.workspace.Workspace`): The patched background-only workspace.
     """
-    with click.open_file(background_only, 'r') as specstream:
+    with click.open_file(background_only, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     ws = Workspace(spec)
 
-    with click.open_file(patchset, 'r') as fstream:
+    with click.open_file(patchset, "r", encoding="utf-8") as fstream:
         patchset_spec = json.load(fstream)
 
     patchset = PatchSet(patchset_spec)
     patched_ws = patchset.apply(ws, name)
 
     if output_file:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(patched_ws, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
     else:
@@ -111,12 +111,12 @@ def verify(background_only, patchset):
     Returns:
         None
     """
-    with click.open_file(background_only, 'r') as specstream:
+    with click.open_file(background_only, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     ws = Workspace(spec)
 
-    with click.open_file(patchset, 'r') as fstream:
+    with click.open_file(patchset, "r", encoding="utf-8") as fstream:
         patchset_spec = json.load(fstream)
 
     patchset = PatchSet(patchset_spec)
@@ -134,7 +134,7 @@ def inspect(patchset):
     Returns:
         None
     """
-    with click.open_file(patchset, 'r') as fstream:
+    with click.open_file(patchset, "r", encoding="utf-8") as fstream:
         patchset_spec = json.load(fstream)
 
     patchset = PatchSet(patchset_spec)

diff --git a/src/pyhf/cli/rootio.py b/src/pyhf/cli/rootio.py
@@ -65,7 +65,7 @@ def xml2json(
     if output_file is None:
         click.echo(json.dumps(spec, indent=4, sort_keys=True))
     else:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(spec, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
 
@@ -92,15 +92,15 @@ def json2xml(workspace, output_dir, specroot, dataroot, resultprefix, patch):
     from pyhf import writexml
 
     os.makedirs(output_dir, exist_ok=True)
-    with click.open_file(workspace, 'r') as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
         for pfile in patch:
-            patch = json.loads(click.open_file(pfile, 'r').read())
+            patch = json.loads(click.open_file(pfile, "r", encoding="utf-8").read())
             spec = jsonpatch.JsonPatch(patch).apply(spec)
         os.makedirs(Path(output_dir).joinpath(specroot), exist_ok=True)
         os.makedirs(Path(output_dir).joinpath(dataroot), exist_ok=True)
         with click.open_file(
-            Path(output_dir).joinpath(f'{resultprefix}.xml'), 'w'
+            Path(output_dir).joinpath(f"{resultprefix}.xml"), "w", encoding="utf-8"
         ) as outstream:
             outstream.write(
                 writexml.writexml(

diff --git a/src/pyhf/cli/spec.py b/src/pyhf/cli/spec.py
@@ -60,7 +60,7 @@ def inspect(workspace, output_file, measurement):
         (*) Measurement            mu            (none)
 
     """
-    with click.open_file(workspace, 'r') as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     ws = Workspace(spec)
@@ -158,7 +158,7 @@ def inspect(workspace, output_file, measurement):
     click.echo()
 
     if output_file:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(result, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
 
@@ -189,7 +189,7 @@ def prune(
 
     See :func:`pyhf.workspace.Workspace.prune` for more information.
     """
-    with click.open_file(workspace, 'r') as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     ws = Workspace(spec)
@@ -204,7 +204,7 @@ def prune(
     if output_file is None:
         click.echo(json.dumps(pruned_ws, indent=4, sort_keys=True))
     else:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(pruned_ws, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
 
@@ -253,7 +253,7 @@ def rename(workspace, output_file, channel, sample, modifier, measurement):
 
     See :func:`pyhf.workspace.Workspace.rename` for more information.
     """
-    with click.open_file(workspace, 'r') as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     ws = Workspace(spec)
@@ -267,7 +267,7 @@ def rename(workspace, output_file, channel, sample, modifier, measurement):
     if output_file is None:
         click.echo(json.dumps(renamed_ws, indent=4, sort_keys=True))
     else:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(renamed_ws, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
 
@@ -298,10 +298,10 @@ def combine(workspace_one, workspace_two, join, output_file, merge_channels):
 
     See :func:`pyhf.workspace.Workspace.combine` for more information.
     """
-    with click.open_file(workspace_one, 'r') as specstream:
+    with click.open_file(workspace_one, "r", encoding="utf-8") as specstream:
         spec_one = json.load(specstream)
 
-    with click.open_file(workspace_two, 'r') as specstream:
+    with click.open_file(workspace_two, "r", encoding="utf-8") as specstream:
         spec_two = json.load(specstream)
 
     ws_one = Workspace(spec_one)
@@ -313,7 +313,7 @@ def combine(workspace_one, workspace_two, join, output_file, merge_channels):
     if output_file is None:
         click.echo(json.dumps(combined_ws, indent=4, sort_keys=True))
     else:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(combined_ws, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file:s}")
 
@@ -347,7 +347,7 @@ def digest(workspace, algorithm, output_json):
         $ curl -sL https://raw.githubusercontent.com/scikit-hep/pyhf/master/docs/examples/json/2-bin_1-channel.json | pyhf digest
         sha256:dad8822af55205d60152cbe4303929042dbd9d4839012e055e7c6b6459d68d73
     """
-    with click.open_file(workspace, 'r') as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     workspace = Workspace(spec)
@@ -393,7 +393,7 @@ def sort(workspace, output_file):
 
 
     """
-    with click.open_file(workspace, 'r') as specstream:
+    with click.open_file(workspace, "r", encoding="utf-8") as specstream:
         spec = json.load(specstream)
 
     workspace = Workspace(spec)
@@ -402,6 +402,6 @@ def sort(workspace, output_file):
     if output_file is None:
         click.echo(json.dumps(sorted_ws, indent=4, sort_keys=True))
     else:
-        with open(output_file, 'w+') as out_file:
+        with open(output_file, "w+", encoding="utf-8") as out_file:
             json.dump(sorted_ws, out_file, indent=4, sort_keys=True)
         log.debug(f"Written to {output_file}")
diff --git a/src/pyhf/schema/loader.py b/src/pyhf/schema/loader.py
@@ -50,7 +50,7 @@ def load_schema(schema_id: str):
             raise pyhf.exceptions.SchemaNotFound(
                 f'The schema {schema_id} was not found. Do you have the right version or the right path? {path}'
             )
-        with path.open() as json_schema:
+        with path.open(encoding="utf-8") as json_schema:
             schema = json.load(json_schema)
             variables.SCHEMA_CACHE[schema['$id']] = schema
         return variables.SCHEMA_CACHE[schema['$id']]

diff --git a/src/pyhf/writexml.py b/src/pyhf/writexml.py
@@ -289,7 +289,7 @@ def writexml(spec, specdir, data_rootdir, resultprefix):
             channelfilename = str(
                 Path(specdir).joinpath(f'{resultprefix}_{channelspec["name"]}.xml')
             )
-            with open(channelfilename, 'w') as channelfile:
+            with open(channelfilename, "w", encoding="utf-8") as channelfile:
                 channel = build_channel(spec, channelspec, spec.get('observations'))
                 indent(channel)
                 channelfile.write(

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -13,7 +13,7 @@
 @pytest.fixture
 def get_json_from_tarfile():
     def _get_json_from_tarfile(archive_data_path, json_name):
-        with tarfile.open(archive_data_path, "r:gz") as archive:
+        with tarfile.open(archive_data_path, "r:gz", encoding="utf-8") as archive:
             json_file = (
                 archive.extractfile(archive.getmember(json_name)).read().decode("utf8")
             )

diff --git a/tests/contrib/test_contrib_utils.py b/tests/contrib/test_contrib_utils.py
@@ -11,25 +11,35 @@
 
 @pytest.fixture(scope="function")
 def tarfile_path(tmpdir):
-    with open(tmpdir.join("test_file.txt").strpath, "w") as write_file:
+    with open(
+        tmpdir.join("test_file.txt").strpath, "w", encoding="utf-8"
+    ) as write_file:
         write_file.write("test file")
-    with tarfile.open(tmpdir.join("test_tar.tar.gz").strpath, mode="w:gz") as archive:
+    with tarfile.open(
+        tmpdir.join("test_tar.tar.gz").strpath, mode="w:gz", encoding="utf-8"
+    ) as archive:
         archive.add(tmpdir.join("test_file.txt").strpath)
     return Path(tmpdir.join("test_tar.tar.gz").strpath)
 
 
 @pytest.fixture(scope="function")
 def tarfile_uncompressed_path(tmpdir):
-    with open(tmpdir.join("test_file.txt").strpath, "w") as write_file:
+    with open(
+        tmpdir.join("test_file.txt").strpath, "w", encoding="utf-8"
+    ) as write_file:
         write_file.write("test file")
-    with tarfile.open(tmpdir.join("test_tar.tar").strpath, mode="w") as archive:
+    with tarfile.open(
+        tmpdir.join("test_tar.tar").strpath, mode="w", encoding="utf-8"
+    ) as archive:
         archive.add(tmpdir.join("test_file.txt").strpath)
     return Path(tmpdir.join("test_tar.tar").strpath)
 
 
 @pytest.fixture(scope="function")
 def zipfile_path(tmpdir):
-    with open(tmpdir.join("test_file.txt").strpath, "w") as write_file:
+    with open(
+        tmpdir.join("test_file.txt").strpath, "w", encoding="utf-8"
+    ) as write_file:
         write_file.write("test file")
     with zipfile.ZipFile(tmpdir.join("test_zip.zip").strpath, "w") as archive:
         archive.write(tmpdir.join("test_file.txt").strpath)