diff --git a/example/go.mod b/example/go.mod index 4b79c94..2a4ec10 100644 --- a/example/go.mod +++ b/example/go.mod @@ -5,10 +5,10 @@ go 1.21 require github.com/artefactual-labs/bagit-gython v0.0.0-00010101000000-000000000000 require ( - github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-1 // indirect + github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-2 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect - golang.org/x/sync v0.7.0 // indirect + golang.org/x/sync v0.8.0 // indirect golang.org/x/sys v0.21.0 // indirect ) diff --git a/example/go.sum b/example/go.sum index c18af82..3df32e2 100644 --- a/example/go.sum +++ b/example/go.sum @@ -5,8 +5,8 @@ github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-1 h1:IX2O3LJUL0AjYsROGZ4aNENTEb3c/Ll/0b9Yb/8P61Q= -github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-1/go.mod h1:9kqX8IjRCNh4ppXxlKGtLN+QFuvsdSsNGKsTLgdSNRw= +github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-2 h1:JcYhVgX7jFN8QcoBxx8/kLxUyeUzE/JnGf5ntulNPPM= +github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-2/go.mod h1:9kqX8IjRCNh4ppXxlKGtLN+QFuvsdSsNGKsTLgdSNRw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= @@ -17,8 +17,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD b/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD deleted file mode 100644 index ab47624..0000000 --- a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD +++ /dev/null @@ -1,12 +0,0 @@ -../../bin/__pycache__/bagit.cpython-312.pyc,, -../../bin/bagit.py,sha256=xU9jk5ZmmrgFvawkXVOp2zWQV5sj65Pv4hAv7G4ORY0,55551 -../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 -__pycache__/bagit.cpython-312.pyc,, -bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -bagit-1.8.1.dev26+g5b77243.dist-info/METADATA,sha256=crTvH7UCcMB5Ddz3n6u-xJWmtv0NLiga4pmQLNNHNe0,8167 -bagit-1.8.1.dev26+g5b77243.dist-info/RECORD,, -bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110 -bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json,sha256=qk6jLuV7knLYcmZsdFksaCxxehTR2eGa8s8jqoewkv8,211 -bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 -bagit.py,sha256=_8AUO2ZmW5PdhTfTpJetie5lkyhFkomhXM_Fn8kzvrU,55530 diff --git a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json b/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json deleted file mode 100644 index f5592c2..0000000 --- a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json +++ /dev/null @@ -1 +0,0 @@ -{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "5b7724356d8c5c3a3be9120814d4d996574485a9", "requested_revision": "5b7724356d8c5c3a3be9120814d4d996574485a9", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER similarity index 100% rename from internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER rename to internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER diff --git a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA similarity index 88% rename from internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA rename to internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA index 81a7c6b..d5f5d35 100644 --- a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA +++ b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA @@ -1,34 +1,23 @@ Metadata-Version: 2.1 Name: bagit -Version: 1.8.1.dev26+g5b77243 +Version: 1.9b3.dev3+gda04180 Summary: Create and validate BagIt packages Home-page: https://libraryofcongress.github.io/bagit-python/ Author: Ed Summers -Author-email: ehs@pobox.com +Author-email: Ed Summers +Project-URL: Homepage, https://libraryofcongress.github.io/bagit-python/ Platform: POSIX -Classifier: License :: Public Domain Classifier: Intended Audience :: Developers +Classifier: License :: Public Domain +Classifier: Programming Language :: Python :: 3 Classifier: Topic :: Communications :: File Sharing Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: System :: Filesystems -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.1 -Classifier: Programming Language :: Python :: 3.2 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Requires-Dist: importlib-metadata ; python_version < "3.8" +Description-Content-Type: text/x-rst bagit-python ============ -|Build Status| |Coverage Status| - bagit is a Python library and command line utility for working with `BagIt `__ style packages. @@ -42,7 +31,7 @@ project as needed or you can install globally with: pip install bagit -Python v2.7+ is required. +A supported version of Python 3 is required. Command Line Usage ------------------ @@ -252,20 +241,11 @@ Contributing to bagit-python development Running the tests ~~~~~~~~~~~~~~~~~ -You can quickly run the tests by having setuptools install dependencies: - -:: - - python setup.py test - -Once your code is working, you can use -`Tox `__ to run the tests with every -supported version of Python which you have installed on the local -system: +You can quickly run the tests using the built-in unittest framework: :: - tox + python -m unittest discover If you have Docker installed, you can run the tests under Linux inside a container: diff --git a/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD new file mode 100644 index 0000000..694e91f --- /dev/null +++ b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD @@ -0,0 +1,12 @@ +../../bin/__pycache__/bagit.cpython-312.pyc,, +../../bin/bagit.py,sha256=eWACe26vquZwx7ISQ7xfZRHvhNLnK6LfOyowanb8a64,54722 +../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 +__pycache__/bagit.cpython-312.pyc,, +bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +bagit-1.9b3.dev3+gda04180.dist-info/METADATA,sha256=xeBL-vZhcdQpG_M3AvddWJ-YvQ_BCxJ8UUBmmTfizXA,7527 +bagit-1.9b3.dev3+gda04180.dist-info/RECORD,, +bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +bagit-1.9b3.dev3+gda04180.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91 +bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json,sha256=2hbjksYnB7HjDIZNtDaTebBXsdAYM2ZI_reqL50sFWw,211 +bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 +bagit.py,sha256=L3uPbWYK4GPwRlCTWSnzMpOukmVWc-tmLxfHzACB-5w,54701 diff --git a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED similarity index 100% rename from internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED rename to internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED diff --git a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL similarity index 54% rename from internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL rename to internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL index 832be11..da25d7b 100644 --- a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL +++ b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL @@ -1,6 +1,5 @@ Wheel-Version: 1.0 -Generator: bdist_wheel (0.43.0) +Generator: setuptools (75.2.0) Root-Is-Purelib: true -Tag: py2-none-any Tag: py3-none-any diff --git a/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json new file mode 100644 index 0000000..71df225 --- /dev/null +++ b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json @@ -0,0 +1 @@ +{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "da041808d2453da2209054a9f4c48187dc323c0a", "requested_revision": "da041808d2453da2209054a9f4c48187dc323c0a", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt b/internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt similarity index 100% rename from internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt rename to internal/dist/data/darwin-amd64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt diff --git a/internal/dist/data/darwin-amd64/bagit.py b/internal/dist/data/darwin-amd64/bagit.py index 458fba8..69ea9ab 100644 --- a/internal/dist/data/darwin-amd64/bagit.py +++ b/internal/dist/data/darwin-amd64/bagit.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/darwin-amd64/bin/bagit.py b/internal/dist/data/darwin-amd64/bin/bagit.py index fb620d1..a550bb6 100644 --- a/internal/dist/data/darwin-amd64/bin/bagit.py +++ b/internal/dist/data/darwin-amd64/bin/bagit.py @@ -1,8 +1,6 @@ #!/tmp/python-pip-darwin-amd64/bin/python3 # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/darwin-amd64/files.json b/internal/dist/data/darwin-amd64/files.json index eb30964..dd0eff5 100644 --- a/internal/dist/data/darwin-amd64/files.json +++ b/internal/dist/data/darwin-amd64/files.json @@ -1,94 +1,79 @@ { - "contentHash": "83a9c8b1872040c578b029df5be45db97dc02b7025aa313c6c69f54850dfc425", + "contentHash": "7176b1710dd2ebb34124b242c7fc1e844cc5053d80901c3eb0966db899f49445", "files": [ { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info", + "name": "bagit-1.9b3.dev3+gda04180.dist-info", "size": 0, - "modTime": 1713519592, "perm": 2147484157 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER", "size": 4, - "modTime": 1713519592, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/METADATA", - "size": 8167, - "modTime": 1713519592, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/METADATA", + "size": 7527, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/RECORD", - "size": 994, - "modTime": 1713519592, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/RECORD", + "size": 986, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED", "size": 0, - "modTime": 1713519592, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL", - "size": 110, - "modTime": 1713519592, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/WHEEL", + "size": 91, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json", "size": 211, - "modTime": 1713519592, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt", "size": 6, - "modTime": 1713519592, "perm": 436 }, { "name": "bagit.py", - "size": 55530, - "modTime": 1713519592, + "size": 54701, "perm": 436 }, { "name": "bin", "size": 0, - "modTime": 1713519592, "perm": 2147484157 }, { "name": "bin/bagit.py", - "size": 55551, - "modTime": 1713519592, + "size": 54722, "perm": 509 }, { "name": "locale", "size": 0, - "modTime": 1713519592, "perm": 2147484157 }, { "name": "locale/en", "size": 0, - "modTime": 1713519592, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES", "size": 0, - "modTime": 1713519592, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES/bagit-python.mo", "size": 2808, - "modTime": 1713519592, "perm": 436, "compressed": true } diff --git a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD b/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD deleted file mode 100644 index 3b071f1..0000000 --- a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD +++ /dev/null @@ -1,12 +0,0 @@ -../../bin/__pycache__/bagit.cpython-312.pyc,, -../../bin/bagit.py,sha256=9nFrkYa6ObdgZHcQE6V6BWJXC_UZ-vHQQgG3HBKLde4,55551 -../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 -__pycache__/bagit.cpython-312.pyc,, -bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -bagit-1.8.1.dev26+g5b77243.dist-info/METADATA,sha256=crTvH7UCcMB5Ddz3n6u-xJWmtv0NLiga4pmQLNNHNe0,8167 -bagit-1.8.1.dev26+g5b77243.dist-info/RECORD,, -bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110 -bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json,sha256=qk6jLuV7knLYcmZsdFksaCxxehTR2eGa8s8jqoewkv8,211 -bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 -bagit.py,sha256=_8AUO2ZmW5PdhTfTpJetie5lkyhFkomhXM_Fn8kzvrU,55530 diff --git a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json b/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json deleted file mode 100644 index f5592c2..0000000 --- a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json +++ /dev/null @@ -1 +0,0 @@ -{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "5b7724356d8c5c3a3be9120814d4d996574485a9", "requested_revision": "5b7724356d8c5c3a3be9120814d4d996574485a9", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER similarity index 100% rename from internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER rename to internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER diff --git a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA similarity index 88% rename from internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA rename to internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA index 81a7c6b..d5f5d35 100644 --- a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA +++ b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA @@ -1,34 +1,23 @@ Metadata-Version: 2.1 Name: bagit -Version: 1.8.1.dev26+g5b77243 +Version: 1.9b3.dev3+gda04180 Summary: Create and validate BagIt packages Home-page: https://libraryofcongress.github.io/bagit-python/ Author: Ed Summers -Author-email: ehs@pobox.com +Author-email: Ed Summers +Project-URL: Homepage, https://libraryofcongress.github.io/bagit-python/ Platform: POSIX -Classifier: License :: Public Domain Classifier: Intended Audience :: Developers +Classifier: License :: Public Domain +Classifier: Programming Language :: Python :: 3 Classifier: Topic :: Communications :: File Sharing Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: System :: Filesystems -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.1 -Classifier: Programming Language :: Python :: 3.2 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Requires-Dist: importlib-metadata ; python_version < "3.8" +Description-Content-Type: text/x-rst bagit-python ============ -|Build Status| |Coverage Status| - bagit is a Python library and command line utility for working with `BagIt `__ style packages. @@ -42,7 +31,7 @@ project as needed or you can install globally with: pip install bagit -Python v2.7+ is required. +A supported version of Python 3 is required. Command Line Usage ------------------ @@ -252,20 +241,11 @@ Contributing to bagit-python development Running the tests ~~~~~~~~~~~~~~~~~ -You can quickly run the tests by having setuptools install dependencies: - -:: - - python setup.py test - -Once your code is working, you can use -`Tox `__ to run the tests with every -supported version of Python which you have installed on the local -system: +You can quickly run the tests using the built-in unittest framework: :: - tox + python -m unittest discover If you have Docker installed, you can run the tests under Linux inside a container: diff --git a/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD new file mode 100644 index 0000000..c722418 --- /dev/null +++ b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD @@ -0,0 +1,12 @@ +../../bin/__pycache__/bagit.cpython-312.pyc,, +../../bin/bagit.py,sha256=MyoXQvpHyW5oOQr0XPh_JMwMDpzcuW3vqSzO8rrP2Fo,54722 +../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 +__pycache__/bagit.cpython-312.pyc,, +bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +bagit-1.9b3.dev3+gda04180.dist-info/METADATA,sha256=xeBL-vZhcdQpG_M3AvddWJ-YvQ_BCxJ8UUBmmTfizXA,7527 +bagit-1.9b3.dev3+gda04180.dist-info/RECORD,, +bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +bagit-1.9b3.dev3+gda04180.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91 +bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json,sha256=2hbjksYnB7HjDIZNtDaTebBXsdAYM2ZI_reqL50sFWw,211 +bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 +bagit.py,sha256=L3uPbWYK4GPwRlCTWSnzMpOukmVWc-tmLxfHzACB-5w,54701 diff --git a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED similarity index 100% rename from internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED rename to internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED diff --git a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL similarity index 54% rename from internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL rename to internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL index 832be11..da25d7b 100644 --- a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL +++ b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL @@ -1,6 +1,5 @@ Wheel-Version: 1.0 -Generator: bdist_wheel (0.43.0) +Generator: setuptools (75.2.0) Root-Is-Purelib: true -Tag: py2-none-any Tag: py3-none-any diff --git a/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json new file mode 100644 index 0000000..71df225 --- /dev/null +++ b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json @@ -0,0 +1 @@ +{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "da041808d2453da2209054a9f4c48187dc323c0a", "requested_revision": "da041808d2453da2209054a9f4c48187dc323c0a", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt b/internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt similarity index 100% rename from internal/dist/data/darwin-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt rename to internal/dist/data/darwin-arm64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt diff --git a/internal/dist/data/darwin-arm64/bagit.py b/internal/dist/data/darwin-arm64/bagit.py index 458fba8..69ea9ab 100644 --- a/internal/dist/data/darwin-arm64/bagit.py +++ b/internal/dist/data/darwin-arm64/bagit.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/darwin-arm64/bin/bagit.py b/internal/dist/data/darwin-arm64/bin/bagit.py index 920193e..f9b8b51 100644 --- a/internal/dist/data/darwin-arm64/bin/bagit.py +++ b/internal/dist/data/darwin-arm64/bin/bagit.py @@ -1,8 +1,6 @@ #!/tmp/python-pip-darwin-arm64/bin/python3 # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/darwin-arm64/files.json b/internal/dist/data/darwin-arm64/files.json index d5d8d55..086f7b0 100644 --- a/internal/dist/data/darwin-arm64/files.json +++ b/internal/dist/data/darwin-arm64/files.json @@ -1,94 +1,79 @@ { - "contentHash": "b070e1f8e1eb39e729a92e1a6defde8dc8f537428fab7c5606fb88412d0b75c8", + "contentHash": "e2ede92fb45de62ded534e75f08da4c2b900e3c76b1773913fa584a367a98ecf", "files": [ { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info", + "name": "bagit-1.9b3.dev3+gda04180.dist-info", "size": 0, - "modTime": 1713519596, "perm": 2147484157 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER", "size": 4, - "modTime": 1713519596, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/METADATA", - "size": 8167, - "modTime": 1713519596, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/METADATA", + "size": 7527, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/RECORD", - "size": 994, - "modTime": 1713519596, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/RECORD", + "size": 986, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED", "size": 0, - "modTime": 1713519596, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL", - "size": 110, - "modTime": 1713519596, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/WHEEL", + "size": 91, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json", "size": 211, - "modTime": 1713519596, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt", "size": 6, - "modTime": 1713519596, "perm": 436 }, { "name": "bagit.py", - "size": 55530, - "modTime": 1713519596, + "size": 54701, "perm": 436 }, { "name": "bin", "size": 0, - "modTime": 1713519596, "perm": 2147484157 }, { "name": "bin/bagit.py", - "size": 55551, - "modTime": 1713519596, + "size": 54722, "perm": 509 }, { "name": "locale", "size": 0, - "modTime": 1713519596, "perm": 2147484157 }, { "name": "locale/en", "size": 0, - "modTime": 1713519596, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES", "size": 0, - "modTime": 1713519596, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES/bagit-python.mo", "size": 2808, - "modTime": 1713519596, "perm": 436, "compressed": true } diff --git a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD b/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD deleted file mode 100644 index 7d131e9..0000000 --- a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD +++ /dev/null @@ -1,12 +0,0 @@ -../../bin/__pycache__/bagit.cpython-312.pyc,, -../../bin/bagit.py,sha256=I3UO4uCuTN9ogLyKuA1cD6SU7d43r0IkImBZAVzVokg,55550 -../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 -__pycache__/bagit.cpython-312.pyc,, -bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -bagit-1.8.1.dev26+g5b77243.dist-info/METADATA,sha256=crTvH7UCcMB5Ddz3n6u-xJWmtv0NLiga4pmQLNNHNe0,8167 -bagit-1.8.1.dev26+g5b77243.dist-info/RECORD,, -bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110 -bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json,sha256=qk6jLuV7knLYcmZsdFksaCxxehTR2eGa8s8jqoewkv8,211 -bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 -bagit.py,sha256=_8AUO2ZmW5PdhTfTpJetie5lkyhFkomhXM_Fn8kzvrU,55530 diff --git a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json b/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json deleted file mode 100644 index f5592c2..0000000 --- a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json +++ /dev/null @@ -1 +0,0 @@ -{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "5b7724356d8c5c3a3be9120814d4d996574485a9", "requested_revision": "5b7724356d8c5c3a3be9120814d4d996574485a9", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER similarity index 100% rename from internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER rename to internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER diff --git a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA similarity index 88% rename from internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA rename to internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA index 81a7c6b..d5f5d35 100644 --- a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA +++ b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA @@ -1,34 +1,23 @@ Metadata-Version: 2.1 Name: bagit -Version: 1.8.1.dev26+g5b77243 +Version: 1.9b3.dev3+gda04180 Summary: Create and validate BagIt packages Home-page: https://libraryofcongress.github.io/bagit-python/ Author: Ed Summers -Author-email: ehs@pobox.com +Author-email: Ed Summers +Project-URL: Homepage, https://libraryofcongress.github.io/bagit-python/ Platform: POSIX -Classifier: License :: Public Domain Classifier: Intended Audience :: Developers +Classifier: License :: Public Domain +Classifier: Programming Language :: Python :: 3 Classifier: Topic :: Communications :: File Sharing Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: System :: Filesystems -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.1 -Classifier: Programming Language :: Python :: 3.2 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Requires-Dist: importlib-metadata ; python_version < "3.8" +Description-Content-Type: text/x-rst bagit-python ============ -|Build Status| |Coverage Status| - bagit is a Python library and command line utility for working with `BagIt `__ style packages. @@ -42,7 +31,7 @@ project as needed or you can install globally with: pip install bagit -Python v2.7+ is required. +A supported version of Python 3 is required. Command Line Usage ------------------ @@ -252,20 +241,11 @@ Contributing to bagit-python development Running the tests ~~~~~~~~~~~~~~~~~ -You can quickly run the tests by having setuptools install dependencies: - -:: - - python setup.py test - -Once your code is working, you can use -`Tox `__ to run the tests with every -supported version of Python which you have installed on the local -system: +You can quickly run the tests using the built-in unittest framework: :: - tox + python -m unittest discover If you have Docker installed, you can run the tests under Linux inside a container: diff --git a/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD new file mode 100644 index 0000000..3f17715 --- /dev/null +++ b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD @@ -0,0 +1,12 @@ +../../bin/__pycache__/bagit.cpython-312.pyc,, +../../bin/bagit.py,sha256=kD_kG7MQ7cQtEY6eyyI_UN-BGs1FloAUuxnrn9bhioA,54721 +../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 +__pycache__/bagit.cpython-312.pyc,, +bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +bagit-1.9b3.dev3+gda04180.dist-info/METADATA,sha256=xeBL-vZhcdQpG_M3AvddWJ-YvQ_BCxJ8UUBmmTfizXA,7527 +bagit-1.9b3.dev3+gda04180.dist-info/RECORD,, +bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +bagit-1.9b3.dev3+gda04180.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91 +bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json,sha256=2hbjksYnB7HjDIZNtDaTebBXsdAYM2ZI_reqL50sFWw,211 +bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 +bagit.py,sha256=L3uPbWYK4GPwRlCTWSnzMpOukmVWc-tmLxfHzACB-5w,54701 diff --git a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED similarity index 100% rename from internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED rename to internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED diff --git a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL similarity index 54% rename from internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL rename to internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL index 832be11..da25d7b 100644 --- a/internal/dist/data/darwin-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL +++ b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL @@ -1,6 +1,5 @@ Wheel-Version: 1.0 -Generator: bdist_wheel (0.43.0) +Generator: setuptools (75.2.0) Root-Is-Purelib: true -Tag: py2-none-any Tag: py3-none-any diff --git a/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json new file mode 100644 index 0000000..71df225 --- /dev/null +++ b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json @@ -0,0 +1 @@ +{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "da041808d2453da2209054a9f4c48187dc323c0a", "requested_revision": "da041808d2453da2209054a9f4c48187dc323c0a", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt b/internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt similarity index 100% rename from internal/dist/data/linux-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt rename to internal/dist/data/linux-amd64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt diff --git a/internal/dist/data/linux-amd64/bagit.py b/internal/dist/data/linux-amd64/bagit.py index 458fba8..69ea9ab 100644 --- a/internal/dist/data/linux-amd64/bagit.py +++ b/internal/dist/data/linux-amd64/bagit.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/linux-amd64/bin/bagit.py b/internal/dist/data/linux-amd64/bin/bagit.py index 8ab57d6..a19789d 100644 --- a/internal/dist/data/linux-amd64/bin/bagit.py +++ b/internal/dist/data/linux-amd64/bin/bagit.py @@ -1,8 +1,6 @@ #!/tmp/python-pip-linux-amd64/bin/python3 # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/linux-amd64/files.json b/internal/dist/data/linux-amd64/files.json index 0ffe65f..462f683 100644 --- a/internal/dist/data/linux-amd64/files.json +++ b/internal/dist/data/linux-amd64/files.json @@ -1,94 +1,79 @@ { - "contentHash": "4e11aa00d332e3391b2b6542749955552e13875ec747c0ed8c2e4b47016680f7", + "contentHash": "48d7f048401cd37eca1a903f0728299f9d45d2e679c8d3f0dd5a48310dd03e46", "files": [ { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info", + "name": "bagit-1.9b3.dev3+gda04180.dist-info", "size": 0, - "modTime": 1713519580, "perm": 2147484157 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER", "size": 4, - "modTime": 1713519580, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/METADATA", - "size": 8167, - "modTime": 1713519580, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/METADATA", + "size": 7527, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/RECORD", - "size": 994, - "modTime": 1713519580, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/RECORD", + "size": 986, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED", "size": 0, - "modTime": 1713519580, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL", - "size": 110, - "modTime": 1713519580, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/WHEEL", + "size": 91, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json", "size": 211, - "modTime": 1713519580, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt", "size": 6, - "modTime": 1713519580, "perm": 436 }, { "name": "bagit.py", - "size": 55530, - "modTime": 1713519580, + "size": 54701, "perm": 436 }, { "name": "bin", "size": 0, - "modTime": 1713519581, "perm": 2147484157 }, { "name": "bin/bagit.py", - "size": 55550, - "modTime": 1713519580, + "size": 54721, "perm": 509 }, { "name": "locale", "size": 0, - "modTime": 1713519580, "perm": 2147484157 }, { "name": "locale/en", "size": 0, - "modTime": 1713519580, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES", "size": 0, - "modTime": 1713519580, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES/bagit-python.mo", "size": 2808, - "modTime": 1713519580, "perm": 436, "compressed": true } diff --git a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD b/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD deleted file mode 100644 index 401220b..0000000 --- a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD +++ /dev/null @@ -1,12 +0,0 @@ -../../bin/__pycache__/bagit.cpython-312.pyc,, -../../bin/bagit.py,sha256=P7SBxKj6U2GwFmGtxIuQDnJTVtjY3OodBfJ2ntzjnHI,55550 -../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 -__pycache__/bagit.cpython-312.pyc,, -bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -bagit-1.8.1.dev26+g5b77243.dist-info/METADATA,sha256=crTvH7UCcMB5Ddz3n6u-xJWmtv0NLiga4pmQLNNHNe0,8167 -bagit-1.8.1.dev26+g5b77243.dist-info/RECORD,, -bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110 -bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json,sha256=qk6jLuV7knLYcmZsdFksaCxxehTR2eGa8s8jqoewkv8,211 -bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 -bagit.py,sha256=_8AUO2ZmW5PdhTfTpJetie5lkyhFkomhXM_Fn8kzvrU,55530 diff --git a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json b/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json deleted file mode 100644 index f5592c2..0000000 --- a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json +++ /dev/null @@ -1 +0,0 @@ -{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "5b7724356d8c5c3a3be9120814d4d996574485a9", "requested_revision": "5b7724356d8c5c3a3be9120814d4d996574485a9", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER similarity index 100% rename from internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER rename to internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER diff --git a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA similarity index 88% rename from internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA rename to internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA index 81a7c6b..d5f5d35 100644 --- a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA +++ b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA @@ -1,34 +1,23 @@ Metadata-Version: 2.1 Name: bagit -Version: 1.8.1.dev26+g5b77243 +Version: 1.9b3.dev3+gda04180 Summary: Create and validate BagIt packages Home-page: https://libraryofcongress.github.io/bagit-python/ Author: Ed Summers -Author-email: ehs@pobox.com +Author-email: Ed Summers +Project-URL: Homepage, https://libraryofcongress.github.io/bagit-python/ Platform: POSIX -Classifier: License :: Public Domain Classifier: Intended Audience :: Developers +Classifier: License :: Public Domain +Classifier: Programming Language :: Python :: 3 Classifier: Topic :: Communications :: File Sharing Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: System :: Filesystems -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.1 -Classifier: Programming Language :: Python :: 3.2 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Requires-Dist: importlib-metadata ; python_version < "3.8" +Description-Content-Type: text/x-rst bagit-python ============ -|Build Status| |Coverage Status| - bagit is a Python library and command line utility for working with `BagIt `__ style packages. @@ -42,7 +31,7 @@ project as needed or you can install globally with: pip install bagit -Python v2.7+ is required. +A supported version of Python 3 is required. Command Line Usage ------------------ @@ -252,20 +241,11 @@ Contributing to bagit-python development Running the tests ~~~~~~~~~~~~~~~~~ -You can quickly run the tests by having setuptools install dependencies: - -:: - - python setup.py test - -Once your code is working, you can use -`Tox `__ to run the tests with every -supported version of Python which you have installed on the local -system: +You can quickly run the tests using the built-in unittest framework: :: - tox + python -m unittest discover If you have Docker installed, you can run the tests under Linux inside a container: diff --git a/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD new file mode 100644 index 0000000..a997373 --- /dev/null +++ b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD @@ -0,0 +1,12 @@ +../../bin/__pycache__/bagit.cpython-312.pyc,, +../../bin/bagit.py,sha256=-Rayt_GMLiVliaFvqTE1RKoPBU5PcqdnTUlW3ctCIZk,54721 +../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 +__pycache__/bagit.cpython-312.pyc,, +bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +bagit-1.9b3.dev3+gda04180.dist-info/METADATA,sha256=xeBL-vZhcdQpG_M3AvddWJ-YvQ_BCxJ8UUBmmTfizXA,7527 +bagit-1.9b3.dev3+gda04180.dist-info/RECORD,, +bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +bagit-1.9b3.dev3+gda04180.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91 +bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json,sha256=2hbjksYnB7HjDIZNtDaTebBXsdAYM2ZI_reqL50sFWw,211 +bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 +bagit.py,sha256=L3uPbWYK4GPwRlCTWSnzMpOukmVWc-tmLxfHzACB-5w,54701 diff --git a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED similarity index 100% rename from internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED rename to internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED diff --git a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL similarity index 54% rename from internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL rename to internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL index 832be11..da25d7b 100644 --- a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL +++ b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL @@ -1,6 +1,5 @@ Wheel-Version: 1.0 -Generator: bdist_wheel (0.43.0) +Generator: setuptools (75.2.0) Root-Is-Purelib: true -Tag: py2-none-any Tag: py3-none-any diff --git a/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json new file mode 100644 index 0000000..71df225 --- /dev/null +++ b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json @@ -0,0 +1 @@ +{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "da041808d2453da2209054a9f4c48187dc323c0a", "requested_revision": "da041808d2453da2209054a9f4c48187dc323c0a", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt b/internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt similarity index 100% rename from internal/dist/data/linux-arm64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt rename to internal/dist/data/linux-arm64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt diff --git a/internal/dist/data/linux-arm64/bagit.py b/internal/dist/data/linux-arm64/bagit.py index 458fba8..69ea9ab 100644 --- a/internal/dist/data/linux-arm64/bagit.py +++ b/internal/dist/data/linux-arm64/bagit.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/linux-arm64/bin/bagit.py b/internal/dist/data/linux-arm64/bin/bagit.py index 04fc5b3..7f5630c 100644 --- a/internal/dist/data/linux-arm64/bin/bagit.py +++ b/internal/dist/data/linux-arm64/bin/bagit.py @@ -1,8 +1,6 @@ #!/tmp/python-pip-linux-arm64/bin/python3 # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/linux-arm64/files.json b/internal/dist/data/linux-arm64/files.json index 86c5809..928ceb0 100644 --- a/internal/dist/data/linux-arm64/files.json +++ b/internal/dist/data/linux-arm64/files.json @@ -1,94 +1,79 @@ { - "contentHash": "7f3c7144b775739328ebc5b9efefb711dbcf6760bbc6ec98a488c1c519a57646", + "contentHash": "12745b7f951baf45d6fce763bf63bd82be73e0469675918fe34961bfd95f16c7", "files": [ { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info", + "name": "bagit-1.9b3.dev3+gda04180.dist-info", "size": 0, - "modTime": 1713519584, "perm": 2147484157 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER", "size": 4, - "modTime": 1713519584, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/METADATA", - "size": 8167, - "modTime": 1713519584, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/METADATA", + "size": 7527, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/RECORD", - "size": 994, - "modTime": 1713519584, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/RECORD", + "size": 986, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED", "size": 0, - "modTime": 1713519584, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL", - "size": 110, - "modTime": 1713519584, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/WHEEL", + "size": 91, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json", "size": 211, - "modTime": 1713519584, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt", "size": 6, - "modTime": 1713519584, "perm": 436 }, { "name": "bagit.py", - "size": 55530, - "modTime": 1713519584, + "size": 54701, "perm": 436 }, { "name": "bin", "size": 0, - "modTime": 1713519584, "perm": 2147484157 }, { "name": "bin/bagit.py", - "size": 55550, - "modTime": 1713519584, + "size": 54721, "perm": 509 }, { "name": "locale", "size": 0, - "modTime": 1713519584, "perm": 2147484157 }, { "name": "locale/en", "size": 0, - "modTime": 1713519584, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES", "size": 0, - "modTime": 1713519584, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES/bagit-python.mo", "size": 2808, - "modTime": 1713519584, "perm": 436, "compressed": true } diff --git a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA b/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA deleted file mode 100644 index 81a7c6b..0000000 --- a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/METADATA +++ /dev/null @@ -1,299 +0,0 @@ -Metadata-Version: 2.1 -Name: bagit -Version: 1.8.1.dev26+g5b77243 -Summary: Create and validate BagIt packages -Home-page: https://libraryofcongress.github.io/bagit-python/ -Author: Ed Summers -Author-email: ehs@pobox.com -Platform: POSIX -Classifier: License :: Public Domain -Classifier: Intended Audience :: Developers -Classifier: Topic :: Communications :: File Sharing -Classifier: Topic :: Software Development :: Libraries :: Python Modules -Classifier: Topic :: System :: Filesystems -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.1 -Classifier: Programming Language :: Python :: 3.2 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Requires-Dist: importlib-metadata ; python_version < "3.8" - -bagit-python -============ - -|Build Status| |Coverage Status| - -bagit is a Python library and command line utility for working with -`BagIt `__ style packages. - -Installation ------------- - -bagit.py is a single-file python module that you can drop into your -project as needed or you can install globally with: - -:: - - pip install bagit - -Python v2.7+ is required. - -Command Line Usage ------------------- - -When you install bagit you should get a command-line program called -bagit.py which you can use to turn an existing directory into a bag: - -:: - - bagit.py --contact-name 'John Kunze' /directory/to/bag - -Finding Bagit on your system -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``bagit.py`` program should be available in your normal command-line -window (Terminal on OS X, Command Prompt or Powershell on Windows, -etc.). If you are unsure where it was installed you can also request -that Python search for ``bagit`` as a Python module: simply replace -``bagit.py`` with ``python -m bagit``: - -:: - - python -m bagit --help - -On some systems Python may have been installed as ``python3``, ``py``, -etc. – simply use the same name you use to start an interactive Python -shell: - -:: - - py -m bagit --help - python3 -m bagit --help - -Configuring BagIt -~~~~~~~~~~~~~~~~~ - -You can pass in key/value metadata for the bag using options like -``--contact-name`` above, which get persisted to the bag-info.txt. For a -complete list of bag-info.txt properties you can use as commmand line -arguments see ``--help``. - -Since calculating checksums can take a while when creating a bag, you -may want to calculate them in parallel if you are on a multicore -machine. You can do that with the ``--processes`` option: - -:: - - bagit.py --processes 4 /directory/to/bag - -To specify which checksum algorithm(s) to use when generating the -manifest, use the --md5, --sha1, --sha256 and/or --sha512 flags (MD5 is -generated by default). - -:: - - bagit.py --sha1 /path/to/bag - bagit.py --sha256 /path/to/bag - bagit.py --sha512 /path/to/bag - -If you would like to validate a bag you can use the --validate flag. - -:: - - bagit.py --validate /path/to/bag - -If you would like to take a quick look at the bag to see if it seems -valid by just examining the structure of the bag, and comparing its -payload-oxum (byte count and number of files) then use the ``--fast`` -flag. - -:: - - bagit.py --validate --fast /path/to/bag - -And finally, if you'd like to parallelize validation to take advantage -of multiple CPUs you can: - -:: - - bagit.py --validate --processes 4 /path/to/bag - -Using BagIt in your programs ----------------------------- - -You can also use BagIt programatically in your own Python programs by -importing the ``bagit`` module. - -Create -~~~~~~ - -To create a bag you would do this: - -.. code:: python - - bag = bagit.make_bag('mydir', {'Contact-Name': 'John Kunze'}) - -``make_bag`` returns a Bag instance. If you have a bag already on disk -and would like to create a Bag instance for it, simply call the -constructor directly: - -.. code:: python - - bag = bagit.Bag('/path/to/bag') - -Update Bag Metadata -~~~~~~~~~~~~~~~~~~~ - -You can change the metadata persisted to the bag-info.txt by using the -``info`` property on a ``Bag``. - -.. code:: python - - # load the bag - bag = bagit.Bag('/path/to/bag') - - # update bag info metadata - bag.info['Internal-Sender-Description'] = 'Updated on 2014-06-28.' - bag.info['Authors'] = ['John Kunze', 'Andy Boyko'] - bag.save() - -Update Bag Manifests -~~~~~~~~~~~~~~~~~~~~ - -By default ``save`` will not update manifests. This guards against a -situation where a call to ``save`` to persist bag metadata accidentally -regenerates manifests for an invalid bag. If you have modified the -payload of a bag by adding, modifying or deleting files in the data -directory, and wish to regenerate the manifests set the ``manifests`` -parameter to True when calling ``save``. - -.. code:: python - - - import shutil, os - - # add a file - shutil.copyfile('newfile', '/path/to/bag/data/newfile') - - # remove a file - os.remove('/path/to/bag/data/file') - - # persist changes - bag.save(manifests=True) - -The save method takes an optional processes parameter which will -determine how many processes are used to regenerate the checksums. This -can be handy on multicore machines. - -Validation -~~~~~~~~~~ - -If you would like to see if a bag is valid, use its ``is_valid`` method: - -.. code:: python - - bag = bagit.Bag('/path/to/bag') - if bag.is_valid(): - print("yay :)") - else: - print("boo :(") - -If you'd like to get a detailed list of validation errors, execute the -``validate`` method and catch the ``BagValidationError`` exception. If -the bag's manifest was invalid (and it wasn't caught by the payload -oxum) the exception's ``details`` property will contain a list of -``ManifestError``\ s that you can introspect on. Each ManifestError, -will be of type ``ChecksumMismatch``, ``FileMissing``, -``UnexpectedFile``. - -So for example if you want to print out checksums that failed to -validate you can do this: - -.. code:: python - - - bag = bagit.Bag("/path/to/bag") - - try: - bag.validate() - - except bagit.BagValidationError as e: - for d in e.details: - if isinstance(d, bagit.ChecksumMismatch): - print("expected %s to have %s checksum of %s but found %s" % - (d.path, d.algorithm, d.expected, d.found)) - -To iterate through a bag's manifest and retrieve checksums for the -payload files use the bag's entries dictionary: - -.. code:: python - - bag = bagit.Bag("/path/to/bag") - - for path, fixity in bag.entries.items(): - print("path:%s md5:%s" % (path, fixity["md5"])) - -Contributing to bagit-python development ----------------------------------------- - -:: - - % git clone git://github.com/LibraryOfCongress/bagit-python.git - % cd bagit-python - # MAKE CHANGES - % python test.py - -Running the tests -~~~~~~~~~~~~~~~~~ - -You can quickly run the tests by having setuptools install dependencies: - -:: - - python setup.py test - -Once your code is working, you can use -`Tox `__ to run the tests with every -supported version of Python which you have installed on the local -system: - -:: - - tox - -If you have Docker installed, you can run the tests under Linux inside a -container: - -:: - - % docker build -t bagit:latest . && docker run -it bagit:latest - -Benchmarks ----------- - -If you'd like to see how increasing parallelization of bag creation on -your system effects the time to create a bag try using the included -bench utility: - -:: - - % ./bench.py - -License -------- - -|cc0| - -Note: By contributing to this project, you agree to license your work -under the same terms as those that govern this project's distribution. - -.. |Coverage Status| image:: https://coveralls.io/repos/github/LibraryOfCongress/bagit-python/badge.svg?branch=master - :target: https://coveralls.io/github/LibraryOfCongress/bagit-python?branch=master -.. |cc0| image:: http://i.creativecommons.org/p/zero/1.0/88x31.png - :target: http://creativecommons.org/publicdomain/zero/1.0/ diff --git a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD b/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD deleted file mode 100644 index f6b1c88..0000000 --- a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/RECORD +++ /dev/null @@ -1,12 +0,0 @@ -../../bin/__pycache__/bagit.cpython-312.pyc,, -../../bin/bagit.py,sha256=9ebaJNJqrnn9IQjfFcP6IJ6aM7iJUwLcfm2-dzjZRAQ,55552 -../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 -__pycache__/bagit.cpython-312.pyc,, -bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -bagit-1.8.1.dev26+g5b77243.dist-info/METADATA,sha256=crTvH7UCcMB5Ddz3n6u-xJWmtv0NLiga4pmQLNNHNe0,8167 -bagit-1.8.1.dev26+g5b77243.dist-info/RECORD,, -bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 -bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110 -bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json,sha256=qk6jLuV7knLYcmZsdFksaCxxehTR2eGa8s8jqoewkv8,211 -bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 -bagit.py,sha256=_8AUO2ZmW5PdhTfTpJetie5lkyhFkomhXM_Fn8kzvrU,55530 diff --git a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL b/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL deleted file mode 100644 index 832be11..0000000 --- a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL +++ /dev/null @@ -1,6 +0,0 @@ -Wheel-Version: 1.0 -Generator: bdist_wheel (0.43.0) -Root-Is-Purelib: true -Tag: py2-none-any -Tag: py3-none-any - diff --git a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json b/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json deleted file mode 100644 index f5592c2..0000000 --- a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json +++ /dev/null @@ -1 +0,0 @@ -{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "5b7724356d8c5c3a3be9120814d4d996574485a9", "requested_revision": "5b7724356d8c5c3a3be9120814d4d996574485a9", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER similarity index 100% rename from internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER rename to internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER diff --git a/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA new file mode 100644 index 0000000..d5f5d35 --- /dev/null +++ b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/METADATA @@ -0,0 +1,279 @@ +Metadata-Version: 2.1 +Name: bagit +Version: 1.9b3.dev3+gda04180 +Summary: Create and validate BagIt packages +Home-page: https://libraryofcongress.github.io/bagit-python/ +Author: Ed Summers +Author-email: Ed Summers +Project-URL: Homepage, https://libraryofcongress.github.io/bagit-python/ +Platform: POSIX +Classifier: Intended Audience :: Developers +Classifier: License :: Public Domain +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Communications :: File Sharing +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: System :: Filesystems +Description-Content-Type: text/x-rst + +bagit-python +============ + +bagit is a Python library and command line utility for working with +`BagIt `__ style packages. + +Installation +------------ + +bagit.py is a single-file python module that you can drop into your +project as needed or you can install globally with: + +:: + + pip install bagit + +A supported version of Python 3 is required. + +Command Line Usage +------------------ + +When you install bagit you should get a command-line program called +bagit.py which you can use to turn an existing directory into a bag: + +:: + + bagit.py --contact-name 'John Kunze' /directory/to/bag + +Finding Bagit on your system +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``bagit.py`` program should be available in your normal command-line +window (Terminal on OS X, Command Prompt or Powershell on Windows, +etc.). If you are unsure where it was installed you can also request +that Python search for ``bagit`` as a Python module: simply replace +``bagit.py`` with ``python -m bagit``: + +:: + + python -m bagit --help + +On some systems Python may have been installed as ``python3``, ``py``, +etc. – simply use the same name you use to start an interactive Python +shell: + +:: + + py -m bagit --help + python3 -m bagit --help + +Configuring BagIt +~~~~~~~~~~~~~~~~~ + +You can pass in key/value metadata for the bag using options like +``--contact-name`` above, which get persisted to the bag-info.txt. For a +complete list of bag-info.txt properties you can use as commmand line +arguments see ``--help``. + +Since calculating checksums can take a while when creating a bag, you +may want to calculate them in parallel if you are on a multicore +machine. You can do that with the ``--processes`` option: + +:: + + bagit.py --processes 4 /directory/to/bag + +To specify which checksum algorithm(s) to use when generating the +manifest, use the --md5, --sha1, --sha256 and/or --sha512 flags (MD5 is +generated by default). + +:: + + bagit.py --sha1 /path/to/bag + bagit.py --sha256 /path/to/bag + bagit.py --sha512 /path/to/bag + +If you would like to validate a bag you can use the --validate flag. + +:: + + bagit.py --validate /path/to/bag + +If you would like to take a quick look at the bag to see if it seems +valid by just examining the structure of the bag, and comparing its +payload-oxum (byte count and number of files) then use the ``--fast`` +flag. + +:: + + bagit.py --validate --fast /path/to/bag + +And finally, if you'd like to parallelize validation to take advantage +of multiple CPUs you can: + +:: + + bagit.py --validate --processes 4 /path/to/bag + +Using BagIt in your programs +---------------------------- + +You can also use BagIt programatically in your own Python programs by +importing the ``bagit`` module. + +Create +~~~~~~ + +To create a bag you would do this: + +.. code:: python + + bag = bagit.make_bag('mydir', {'Contact-Name': 'John Kunze'}) + +``make_bag`` returns a Bag instance. If you have a bag already on disk +and would like to create a Bag instance for it, simply call the +constructor directly: + +.. code:: python + + bag = bagit.Bag('/path/to/bag') + +Update Bag Metadata +~~~~~~~~~~~~~~~~~~~ + +You can change the metadata persisted to the bag-info.txt by using the +``info`` property on a ``Bag``. + +.. code:: python + + # load the bag + bag = bagit.Bag('/path/to/bag') + + # update bag info metadata + bag.info['Internal-Sender-Description'] = 'Updated on 2014-06-28.' + bag.info['Authors'] = ['John Kunze', 'Andy Boyko'] + bag.save() + +Update Bag Manifests +~~~~~~~~~~~~~~~~~~~~ + +By default ``save`` will not update manifests. This guards against a +situation where a call to ``save`` to persist bag metadata accidentally +regenerates manifests for an invalid bag. If you have modified the +payload of a bag by adding, modifying or deleting files in the data +directory, and wish to regenerate the manifests set the ``manifests`` +parameter to True when calling ``save``. + +.. code:: python + + + import shutil, os + + # add a file + shutil.copyfile('newfile', '/path/to/bag/data/newfile') + + # remove a file + os.remove('/path/to/bag/data/file') + + # persist changes + bag.save(manifests=True) + +The save method takes an optional processes parameter which will +determine how many processes are used to regenerate the checksums. This +can be handy on multicore machines. + +Validation +~~~~~~~~~~ + +If you would like to see if a bag is valid, use its ``is_valid`` method: + +.. code:: python + + bag = bagit.Bag('/path/to/bag') + if bag.is_valid(): + print("yay :)") + else: + print("boo :(") + +If you'd like to get a detailed list of validation errors, execute the +``validate`` method and catch the ``BagValidationError`` exception. If +the bag's manifest was invalid (and it wasn't caught by the payload +oxum) the exception's ``details`` property will contain a list of +``ManifestError``\ s that you can introspect on. Each ManifestError, +will be of type ``ChecksumMismatch``, ``FileMissing``, +``UnexpectedFile``. + +So for example if you want to print out checksums that failed to +validate you can do this: + +.. code:: python + + + bag = bagit.Bag("/path/to/bag") + + try: + bag.validate() + + except bagit.BagValidationError as e: + for d in e.details: + if isinstance(d, bagit.ChecksumMismatch): + print("expected %s to have %s checksum of %s but found %s" % + (d.path, d.algorithm, d.expected, d.found)) + +To iterate through a bag's manifest and retrieve checksums for the +payload files use the bag's entries dictionary: + +.. code:: python + + bag = bagit.Bag("/path/to/bag") + + for path, fixity in bag.entries.items(): + print("path:%s md5:%s" % (path, fixity["md5"])) + +Contributing to bagit-python development +---------------------------------------- + +:: + + % git clone git://github.com/LibraryOfCongress/bagit-python.git + % cd bagit-python + # MAKE CHANGES + % python test.py + +Running the tests +~~~~~~~~~~~~~~~~~ + +You can quickly run the tests using the built-in unittest framework: + +:: + + python -m unittest discover + +If you have Docker installed, you can run the tests under Linux inside a +container: + +:: + + % docker build -t bagit:latest . && docker run -it bagit:latest + +Benchmarks +---------- + +If you'd like to see how increasing parallelization of bag creation on +your system effects the time to create a bag try using the included +bench utility: + +:: + + % ./bench.py + +License +------- + +|cc0| + +Note: By contributing to this project, you agree to license your work +under the same terms as those that govern this project's distribution. + +.. |Coverage Status| image:: https://coveralls.io/repos/github/LibraryOfCongress/bagit-python/badge.svg?branch=master + :target: https://coveralls.io/github/LibraryOfCongress/bagit-python?branch=master +.. |cc0| image:: http://i.creativecommons.org/p/zero/1.0/88x31.png + :target: http://creativecommons.org/publicdomain/zero/1.0/ diff --git a/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD new file mode 100644 index 0000000..1527b3c --- /dev/null +++ b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/RECORD @@ -0,0 +1,12 @@ +../../bin/__pycache__/bagit.cpython-312.pyc,, +../../bin/bagit.py,sha256=uZC0jWU00G4xfXYJl43FR6PSVmIq57au6gWIhawON4U,54723 +../../locale/en/LC_MESSAGES/bagit-python.mo,sha256=onTi-42QABidt47Lx6KeFzbP-lQ0HU1X9fW2j1M8UK0,2808 +__pycache__/bagit.cpython-312.pyc,, +bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +bagit-1.9b3.dev3+gda04180.dist-info/METADATA,sha256=xeBL-vZhcdQpG_M3AvddWJ-YvQ_BCxJ8UUBmmTfizXA,7527 +bagit-1.9b3.dev3+gda04180.dist-info/RECORD,, +bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +bagit-1.9b3.dev3+gda04180.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91 +bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json,sha256=2hbjksYnB7HjDIZNtDaTebBXsdAYM2ZI_reqL50sFWw,211 +bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt,sha256=f288ippe7ERSzETsEftza1dXq61Z4zFSXDgaTF6djpE,6 +bagit.py,sha256=L3uPbWYK4GPwRlCTWSnzMpOukmVWc-tmLxfHzACB-5w,54701 diff --git a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED similarity index 100% rename from internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED rename to internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED diff --git a/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL new file mode 100644 index 0000000..da25d7b --- /dev/null +++ b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.2.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json new file mode 100644 index 0000000..71df225 --- /dev/null +++ b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json @@ -0,0 +1 @@ +{"url": "https://github.com/LibraryOfCongress/bagit-python", "vcs_info": {"commit_id": "da041808d2453da2209054a9f4c48187dc323c0a", "requested_revision": "da041808d2453da2209054a9f4c48187dc323c0a", "vcs": "git"}} \ No newline at end of file diff --git a/internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt b/internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt similarity index 100% rename from internal/dist/data/windows-amd64/bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt rename to internal/dist/data/windows-amd64/bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt diff --git a/internal/dist/data/windows-amd64/bagit.py b/internal/dist/data/windows-amd64/bagit.py index 458fba8..69ea9ab 100644 --- a/internal/dist/data/windows-amd64/bagit.py +++ b/internal/dist/data/windows-amd64/bagit.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/windows-amd64/bin/bagit.py b/internal/dist/data/windows-amd64/bin/bagit.py index 7763d39..951284b 100644 --- a/internal/dist/data/windows-amd64/bin/bagit.py +++ b/internal/dist/data/windows-amd64/bin/bagit.py @@ -1,8 +1,6 @@ #!/tmp/python-pip-windows-amd64/bin/python3 # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function, unicode_literals - import argparse import codecs import gettext @@ -19,7 +17,6 @@ from collections import defaultdict from datetime import date from functools import partial -from os.path import abspath, isdir, isfile, join try: from importlib.metadata import version @@ -42,10 +39,8 @@ def find_locale_dir(): TRANSLATION_CATALOG = gettext.translation( "bagit-python", localedir=find_locale_dir(), fallback=True ) -if sys.version_info < (3,): - _ = TRANSLATION_CATALOG.ugettext -else: - _ = TRANSLATION_CATALOG.gettext + +_ = TRANSLATION_CATALOG.gettext MODULE_NAME = "bagit" if __name__ == "__main__" else __name__ @@ -140,7 +135,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -282,7 +277,7 @@ class Bag(object): valid_files = ["bagit.txt", "fetch.txt"] valid_directories = ["data"] - def __init__(self, path=None): + def __init__(self, path): super(Bag, self).__init__() self.tags = {} self.info = {} @@ -304,12 +299,8 @@ def __init__(self, path=None): self.algorithms = [] self.tag_file_name = None - self.path = abspath(path) - if path: - # if path ends in a path separator, strip it off - if path[-1] == os.sep: - self.path = path[:-1] - self._open() + self.path = os.path.abspath(path) + self._open() def __str__(self): # FIXME: develop a more informative string representation for a Bag @@ -333,7 +324,7 @@ def _open(self): # the required version and encoding. bagit_file_path = os.path.join(self.path, "bagit.txt") - if not isfile(bagit_file_path): + if not os.path.isfile(bagit_file_path): raise BagError(_("Expected bagit.txt does not exist: %s") % bagit_file_path) self.tags = tags = _load_tag_file(bagit_file_path) @@ -382,13 +373,13 @@ def _open(self): def manifest_files(self): for filename in ["manifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def tagmanifest_files(self): for filename in ["tagmanifest-%s.txt" % a for a in CHECKSUM_ALGOS]: f = os.path.join(self.path, filename) - if isfile(f): + if os.path.isfile(f): yield f def compare_manifests_with_fs(self): @@ -422,8 +413,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +440,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -562,7 +553,7 @@ def fetch_entries(self): fetch_file_path = os.path.join(self.path, "fetch.txt") - if isfile(fetch_file_path): + if os.path.isfile(fetch_file_path): with open_text_file( fetch_file_path, "r", encoding=self.encoding ) as fetch_file: @@ -618,7 +609,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -746,7 +739,7 @@ def _validate_structure(self): def _validate_structure_payload_directory(self): data_dir_path = os.path.join(self.path, "data") - if not isdir(data_dir_path): + if not os.path.isdir(data_dir_path): raise BagValidationError( _("Expected data directory %s does not exist") % data_dir_path ) @@ -776,7 +769,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +847,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -886,13 +882,12 @@ def _validate_entries(self, processes): if processes == 1: hash_results = [_calc_hashes(i) for i in args] else: - try: - pool = multiprocessing.Pool( - processes if processes else None, initializer=worker_init - ) - hash_results = pool.map(_calc_hashes, args) - finally: - pool.terminate() + pool = multiprocessing.Pool( + processes if processes else None, initializer=worker_init + ) + hash_results = pool.map(_calc_hashes, args) + pool.close() + pool.join() # Any unhandled exceptions are probably fatal except: @@ -906,7 +901,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +958,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +983,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +992,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1034,22 +1029,10 @@ def posix_multiprocessing_worker_initializer(): # is consistency since the input value will be preserved: -def normalize_unicode_py3(s): +def normalize_unicode(s): return unicodedata.normalize("NFC", s) -def normalize_unicode_py2(s): - if isinstance(s, str): - s = s.decode("utf-8") - return unicodedata.normalize("NFC", s) - - -if sys.version_info > (3, 0): - normalize_unicode = normalize_unicode_py3 -else: - normalize_unicode = normalize_unicode_py2 - - def build_unicode_normalized_lookup_dict(filenames): """ Return a dictionary mapping unicode-normalized filenames to as-encoded @@ -1138,7 +1121,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1144,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1170,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1220,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1295,14 +1278,14 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): - tagmanifest_file = join(bag_dir, "tagmanifest-%s.txt" % alg) + tagmanifest_file = os.path.join(bag_dir, "tagmanifest-%s.txt" % alg) LOGGER.info(_("Creating %s"), tagmanifest_file) checksums = [] for f in _find_tag_files(bag_dir): if re.match(r"^tagmanifest-.+\.txt$", f): continue - with open(join(bag_dir, f), "rb") as fh: + with open(os.path.join(bag_dir, f), "rb") as fh: m = hashlib.new(alg) while True: block = fh.read(HASH_BLOCK_SIZE) @@ -1312,7 +1295,7 @@ def _make_tagmanifest_file(alg, bag_dir, encoding="utf-8"): checksums.append((m.hexdigest(), f)) with open_text_file( - join(bag_dir, tagmanifest_file), mode="w", encoding=encoding + os.path.join(bag_dir, tagmanifest_file), mode="w", encoding=encoding ) as tagmanifest: for digest, filename in checksums: tagmanifest.write("%s %s\n" % (digest, filename)) @@ -1328,7 +1311,7 @@ def _find_tag_files(bag_dir): if filename.startswith("tagmanifest-"): continue # remove everything up to the bag_dir directory - p = join(dir_name, filename) + p = os.path.join(dir_name, filename) yield os.path.relpath(p, bag_dir) @@ -1433,19 +1416,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1501,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1547,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1568,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/internal/dist/data/windows-amd64/files.json b/internal/dist/data/windows-amd64/files.json index e676aa9..6a606f5 100644 --- a/internal/dist/data/windows-amd64/files.json +++ b/internal/dist/data/windows-amd64/files.json @@ -1,94 +1,79 @@ { - "contentHash": "9f61ab290903616280e2f88b60680a97ac13a656126f6fa1a0b46bdb3ab5aabf", + "contentHash": "8bc5d2a983078cc82b219a9a352f3a22696f4e8b9e42576326ed13b5f97f24e0", "files": [ { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info", + "name": "bagit-1.9b3.dev3+gda04180.dist-info", "size": 0, - "modTime": 1713519588, "perm": 2147484157 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/INSTALLER", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/INSTALLER", "size": 4, - "modTime": 1713519588, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/METADATA", - "size": 8167, - "modTime": 1713519588, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/METADATA", + "size": 7527, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/RECORD", - "size": 994, - "modTime": 1713519588, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/RECORD", + "size": 986, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/REQUESTED", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/REQUESTED", "size": 0, - "modTime": 1713519588, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/WHEEL", - "size": 110, - "modTime": 1713519588, + "name": "bagit-1.9b3.dev3+gda04180.dist-info/WHEEL", + "size": 91, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/direct_url.json", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/direct_url.json", "size": 211, - "modTime": 1713519588, "perm": 436 }, { - "name": "bagit-1.8.1.dev26+g5b77243.dist-info/top_level.txt", + "name": "bagit-1.9b3.dev3+gda04180.dist-info/top_level.txt", "size": 6, - "modTime": 1713519588, "perm": 436 }, { "name": "bagit.py", - "size": 55530, - "modTime": 1713519588, + "size": 54701, "perm": 436 }, { "name": "bin", "size": 0, - "modTime": 1713519588, "perm": 2147484157 }, { "name": "bin/bagit.py", - "size": 55552, - "modTime": 1713519588, + "size": 54723, "perm": 509 }, { "name": "locale", "size": 0, - "modTime": 1713519588, "perm": 2147484157 }, { "name": "locale/en", "size": 0, - "modTime": 1713519588, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES", "size": 0, - "modTime": 1713519588, "perm": 2147484157 }, { "name": "locale/en/LC_MESSAGES/bagit-python.mo", "size": 2808, - "modTime": 1713519588, "perm": 436, "compressed": true } diff --git a/internal/dist/requirements.txt b/internal/dist/requirements.txt index 9345f4b..fa6062d 100644 --- a/internal/dist/requirements.txt +++ b/internal/dist/requirements.txt @@ -1 +1 @@ -bagit @ git+https://github.com/LibraryOfCongress/bagit-python@5b7724356d8c5c3a3be9120814d4d996574485a9 +bagit @ git+https://github.com/LibraryOfCongress/bagit-python@da041808d2453da2209054a9f4c48187dc323c0a