From 01f1957327d0e09e41346410b6667c35eb981c30 Mon Sep 17 00:00:00 2001 From: jiakf Date: Wed, 7 Feb 2024 11:35:39 -0600 Subject: [PATCH] DEV-2479: py38 upgrade This updates DTT to run with py38 and drops support for earlier python versions. This change also includes running pyupgrade on the repository and updating the github actions to run on supported operating systems. --- .github/workflows/ci.yaml | 62 +++++++++-------- .gitlab-ci.yml | 4 +- .travis.yml | 8 +-- bin/package | 13 ++-- dev-requirements.in | 1 + dev-requirements.txt | 32 ++++----- gdc_client/auth/parser.py | 4 +- gdc_client/client/client.py | 6 +- gdc_client/client/parser.py | 4 +- gdc_client/common/config.py | 12 ++-- gdc_client/download/client.py | 44 +++++------- gdc_client/download/parser.py | 24 +++---- gdc_client/parcel/client.py | 20 +++--- gdc_client/parcel/download_stream.py | 34 ++++----- gdc_client/parcel/http_client.py | 2 +- gdc_client/parcel/manifest.py | 3 +- gdc_client/parcel/portability.py | 2 +- gdc_client/parcel/segment.py | 42 ++++++----- gdc_client/parcel/utils.py | 22 +++--- gdc_client/query/index.py | 10 ++- gdc_client/query/versions.py | 4 +- gdc_client/settings/parser.py | 2 +- gdc_client/upload/client.py | 100 ++++++++++++--------------- requirements.txt | 8 ++- setup.py | 2 +- tests/conftest.py | 13 ++-- tests/mock_server.py | 10 +-- tests/test_parcel_utils.py | 2 +- tests/test_query.py | 8 +-- tox.ini | 3 +- 30 files changed, 240 insertions(+), 261 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 00d6e5ab..8acb1d1a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -5,9 +5,9 @@ on: - develop - release/** - hotfix/** - - feat/dev-583-remove-set-env-in-github-actions + - feat/DEV-2479-upgrade-py38 tags: - - "*" + - '*' jobs: build: @@ -15,35 +15,37 @@ jobs: strategy: matrix: os: - - macos-10.15 + - macos-11 + - macos-12 + - macos-13 - ubuntu-20.04 - - ubuntu-16.04 - - ubuntu-18.04 - - windows-latest + - ubuntu-22.04 + - windows-2019 + - windows-2022 python: - - 3.7 + - 3.8 env: - GDC_CLIENT_ZIP: "gdc-client.zip" + GDC_CLIENT_ZIP: 'gdc-client.zip' steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python }} - - name: Run Tests - if: matrix.os != 'windows-latest' - run: | - python -m pip install --upgrade pip - pip install tox - tox -e py - - name: Package - run: | - cd bin - pip install virtualenv - . ./package - echo "GDC_CLIENT_ZIP=$GDC_CLIENT_ZIP" >> $GITHUB_ENV - shell: bash - - uses: actions/upload-artifact@v2 - with: - name: ${{ env.GDC_CLIENT_ZIP }}-py${{ matrix.python }}-${{ matrix.os }} - path: bin/gdc-client_*.zip + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Run Tests + if: matrix.os != 'windows-latest' + run: | + pip install --upgrade pip + pip install tox + tox -e py + - name: Package + run: | + cd bin + pip install virtualenv + . ./package + echo "GDC_CLIENT_ZIP=$GDC_CLIENT_ZIP" >> $GITHUB_ENV + shell: bash + - uses: actions/upload-artifact@v2 + with: + name: ${{ env.GDC_CLIENT_ZIP }}-py${{ matrix.python }}-${{ matrix.os }} + path: bin/gdc-client_*.zip diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b38c8d84..17861cca 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,12 +11,10 @@ tox: parallel: matrix: - BUILD_PY_VERSION: - - python3.6 - - python3.7 + - python3.8 script: - tox -r -e py - release: before_script: # unshallow the git repo to resolve version with setuptools_scm. diff --git a/.travis.yml b/.travis.yml index 8264f73d..8dd6e0a8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,13 +12,11 @@ install: - virtualenv venv - source venv/bin/activate - pip install tox-travis tox-pyenv - - pyenv install 3.5.7 - - pyenv install 3.6.9 - - pyenv install 3.7.5 + - pyenv install 3.8.18 script: - - pyenv local 3.5.7 3.6.9 3.7.5 - - tox -e py35,py36,py37 + - pyenv local 3.8.18 + - tox -e py38 after_script: - tox -e coverage diff --git a/bin/package b/bin/package index 2bce6639..415eb50c 100755 --- a/bin/package +++ b/bin/package @@ -1,6 +1,6 @@ #!/usr/bin/env bash -set -e +set -euox pipefail function cleanup() { # get rid of the virtualenv @@ -69,7 +69,9 @@ esac # setup.py is in previous dir cd .. -python -m pip install -r requirements.txt +python -m pip install --upgrade pip +python -m pip install --no-deps -r requirements.txt +python -m pip install --no-deps . python setup.py install # go back go the ./gdc_client/bin dir @@ -78,10 +80,10 @@ cd bin echo "Building for ${TARGET_ENVIRONMENT}..." # Make sure the correct version of pyinstaller and setuptools are installed -pip install -U "PyInstaller==3.5" +pip install -U PyInstaller # Ran into ModuleNotFoundError: No module named 'pkg_resources.py2_warn' with higher versions of setuptools -pip install --upgrade 'setuptools<45.0.0' +pip install --upgrade setuptools # Get version VERSION=$(python -c " @@ -96,6 +98,7 @@ pyinstaller \ --additional-hooks-dir=. \ --noconfirm \ --onefile \ + --copy-metadata gdc-client \ -c gdc-client echo "Testing produced binary..." @@ -106,7 +109,7 @@ echo "Zipping binary..." cd dist PRE_ZIP_NAME="gdc-client_${VERSION}_${TARGET_ENVIRONMENT}_x64" -if [ -n "${DATE}" ]; then +if [ -n "${DATE+x}" ]; then PRE_ZIP_NAME="gdc-client_${VERSION}_${DATE}_${TARGET_ENVIRONMENT}_x64" fi diff --git a/dev-requirements.in b/dev-requirements.in index aa683b3e..f263ded9 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -1,5 +1,6 @@ -c requirements.txt +click>= 8 flask~=1.0.2 pytest~=4.6.2 pytest-cov~=2.7.1 diff --git a/dev-requirements.txt b/dev-requirements.txt index 293e51ee..ca6372c7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile dev-requirements.in # @@ -14,12 +14,12 @@ aws-sam-translator==1.11.0 # cfn-lint aws-xray-sdk==2.11.0 # via moto +boto==2.49.0 + # via moto boto3==1.16.63 # via # aws-sam-translator # moto -boto==2.49.0 - # via moto botocore==1.19.63 # via # aws-xray-sdk @@ -42,8 +42,10 @@ chardet==3.0.4 # via # -c requirements.txt # requests -click==7.1.2 - # via flask +click==8.1.7 + # via + # -r dev-requirements.in + # flask coverage==5.5 # via pytest-cov cryptography==2.8 @@ -68,9 +70,9 @@ idna==2.8 # -c requirements.txt # moto # requests -importlib-metadata==2.1.3 +importlib-metadata==7.0.1 # via - # pluggy + # -c requirements.txt # pytest itsdangerous==1.1.0 # via flask @@ -107,8 +109,6 @@ moto==1.3.16 # via -r dev-requirements.in packaging==20.9 # via pytest -pathlib2==2.3.7.post1 - # via pytest pluggy==0.13.1 # via pytest py==1.11.0 @@ -124,12 +124,12 @@ pycparser==2.21 # cffi pyparsing==2.4.7 # via packaging -pytest-cov==2.7.1 - # via -r dev-requirements.in pytest==4.6.2 # via # -r dev-requirements.in # pytest-cov +pytest-cov==2.7.1 + # via -r dev-requirements.in python-dateutil==2.8.2 # via # botocore @@ -145,8 +145,6 @@ pyyaml==5.3.1 # -c requirements.txt # cfn-lint # moto -requests-mock==1.5.2 - # via -r dev-requirements.in requests==2.22.0 # via # -c requirements.txt @@ -156,6 +154,8 @@ requests==2.22.0 # moto # requests-mock # responses +requests-mock==1.5.2 + # via -r dev-requirements.in responses==0.17.0 # via moto rsa==4.7.2 @@ -172,7 +172,6 @@ six==1.16.0 # ecdsa # mock # moto - # pathlib2 # pytest # python-dateutil # python-jose @@ -199,8 +198,9 @@ wrapt==1.14.1 # via aws-xray-sdk xmltodict==0.13.0 # via moto -zipp==1.2.0 +zipp==3.17.0 # via + # -c requirements.txt # importlib-metadata # moto diff --git a/gdc_client/auth/parser.py b/gdc_client/auth/parser.py index 48334558..bdd513e7 100644 --- a/gdc_client/auth/parser.py +++ b/gdc_client/auth/parser.py @@ -61,8 +61,8 @@ def read_token_file(path): # raise argparse.ArgumentTypeError(permissions_msg) try: - ifs = open(abspath, "r") - except IOError as err: + ifs = open(abspath) + except OSError as err: raise argparse.ArgumentTypeError(err) with closing(ifs): diff --git a/gdc_client/client/client.py b/gdc_client/client/client.py index 590742ae..adff3274 100644 --- a/gdc_client/client/client.py +++ b/gdc_client/client/client.py @@ -11,7 +11,7 @@ GDC_API_PORT = 443 -class GDCClient(object): +class GDCClient: """GDC API Requests Client""" def __init__(self, host=GDC_API_HOST, port=GDC_API_PORT, token=None): @@ -23,7 +23,7 @@ def __init__(self, host=GDC_API_HOST, port=GDC_API_PORT, token=None): agent = " ".join( [ - "GDC-Client/{version}".format(version=version.__version__), + f"GDC-Client/{version.__version__}", self.session.headers.get("User-Agent", "Unknown"), ] ) @@ -43,7 +43,7 @@ def request(self, verb, path, **kwargs): path=path, ), auth=auth.GDCTokenAuth(self.token), - **kwargs + **kwargs, ) with closing(res): diff --git a/gdc_client/client/parser.py b/gdc_client/client/parser.py index 2ecc3916..eb1b13b2 100644 --- a/gdc_client/client/parser.py +++ b/gdc_client/client/parser.py @@ -10,12 +10,12 @@ def config(parser): "-H", "--host", default=os.environ.get("GDC_API_HOST", GDC_API_HOST), - help="GDC API host [{host}]".format(host=GDC_API_HOST), + help=f"GDC API host [{GDC_API_HOST}]", ) parser.add_argument( "-P", "--port", default=os.environ.get("GDC_API_PORT", GDC_API_PORT), - help="GDC API port [{port}]".format(port=GDC_API_PORT), + help=f"GDC API port [{GDC_API_PORT}]", ) diff --git a/gdc_client/common/config.py b/gdc_client/common/config.py index 5db6c93d..824ac606 100644 --- a/gdc_client/common/config.py +++ b/gdc_client/common/config.py @@ -26,11 +26,11 @@ class GDCClientArgumentParser(argparse.ArgumentParser): def error(self, message): self.print_help(sys.stderr) - sys.stderr.write("\ngdc-client error: {}\n".format(message)) + sys.stderr.write(f"\ngdc-client error: {message}\n") sys.exit(2) -class GDCClientConfigShared(object): +class GDCClientConfigShared: setting_getters = { "server": ConfigParser.get, "http_chunk_size": ConfigParser.getint, @@ -106,13 +106,11 @@ def get_setting(self, section, option): try: return self.setting_getters[option](self.config, section, option) except NoOptionError: - log.debug( - 'Setting named "{}" not found in section "{}"'.format(option, section) - ) + log.debug(f'Setting named "{option}" not found in section "{section}"') except NoSectionError: - log.debug('No section named "{}" found'.format(section)) + log.debug(f'No section named "{section}" found') except KeyError: - log.debug('Invalid setting "{}"'.format(option)) + log.debug(f'Invalid setting "{option}"') # Return defaults if nothing was provided in config file diff --git a/gdc_client/download/client.py b/gdc_client/download/client.py index 4ed53e57..3a2552c9 100644 --- a/gdc_client/download/client.py +++ b/gdc_client/download/client.py @@ -26,10 +26,10 @@ def fix_url(url): api.gdc.cancer.gov -> https://api.gdc.cancer.gov/ """ if not url.endswith("/"): - url = "{0}/".format(url) + url = f"{url}/" if not (url.startswith("https://") or url.startswith("http://")): - url = "https://{0}".format(url) + url = f"https://{url}" return url @@ -45,7 +45,7 @@ def __init__( download_annotations=True, index_client=None, *args, - **kwargs + **kwargs, ): """GDC parcel client that overrides parallel download Args: @@ -67,7 +67,7 @@ def __init__( self.base_directory = kwargs.get("directory") self.verify = kwargs.get("verify") - super(GDCHTTPDownloadClient, self).__init__(self.data_uri, *args, **kwargs) + super().__init__(self.data_uri, *args, **kwargs) def download_related_files(self, file_id): # type: (str) -> None @@ -81,12 +81,10 @@ def download_related_files(self, file_id): related_files = self.gdc_index_client.get_related_files(file_id) if related_files: - log.debug( - "Found {0} related files for {1}.".format(len(related_files), file_id) - ) + log.debug(f"Found {len(related_files)} related files for {file_id}.") for related_file in related_files: - log.debug("related file {0}".format(related_file)) + log.debug(f"related file {related_file}") related_file_url = urlparse.urljoin(self.data_uri, related_file) stream = DownloadStream(related_file_url, directory, self.token) @@ -95,7 +93,7 @@ def download_related_files(self, file_id): stream.directory = directory # run original parallel download - super(GDCHTTPDownloadClient, self).parallel_download(stream) + super().parallel_download(stream) if os.path.isfile(stream.temp_path): utils.remove_partial_extension(stream.temp_path) @@ -114,9 +112,7 @@ def download_annotations(self, file_id): annotations = self.gdc_index_client.get_annotations(file_id) if annotations: - log.debug( - "Found {0} annotations for {1}.".format(len(annotations), file_id) - ) + log.debug(f"Found {len(annotations)} annotations for {file_id}.") # {'ids': ['id1', 'id2'..., 'idn']} ann_ids = {"ids": annotations} @@ -131,7 +127,7 @@ def download_annotations(self, file_id): with open(path, "wb") as f: f.write(ann) - log.debug("Wrote annotations to {0}.".format(path)) + log.debug(f"Wrote annotations to {path}.") def _untar_file(self, tarfile_name): # type: (str) -> list[str] @@ -160,7 +156,7 @@ def _md5_members(self, members): ) continue member_uuid = m.split("/")[0] - log.debug("Validating checksum for {0}...".format(member_uuid)) + log.debug(f"Validating checksum for {member_uuid}...") md5sum = hashlib.md5() filename = os.path.join(self.base_directory, m) @@ -168,7 +164,7 @@ def _md5_members(self, members): md5sum.update(f.read()) if self.gdc_index_client.get_md5sum(member_uuid) != md5sum.hexdigest(): - log.error("UUID {0} has invalid md5sum".format(member_uuid)) + log.error(f"UUID {member_uuid} has invalid md5sum") errors.append(member_uuid) return errors @@ -184,7 +180,7 @@ def _post(self, path, headers=None, json=None, stream=True): try: # try active active = urlparse.urljoin(self.base_uri, path) - legacy = urlparse.urljoin(self.base_uri, "legacy/{0}".format(path)) + legacy = urlparse.urljoin(self.base_uri, f"legacy/{path}") r = requests.post( active, @@ -227,7 +223,7 @@ def _download_tarfile(self, small_files): if r.status_code == requests.codes.bad: log.error("Unable to connect to the API") - log.error("Is this the correct URL? {0}".format(self.base_uri)) + log.error(f"Is this the correct URL? {self.base_uri}") elif r.status_code == requests.codes.forbidden: # since the files are grouped by access control, that means @@ -238,7 +234,7 @@ def _download_tarfile(self, small_files): return "", [] if r.status_code not in [200, 203]: - log.warning("[{0}] Unable to download group".format(r.status_code)) + log.warning(f"[{r.status_code}] Unable to download group") errors.append(ids["ids"]) return "", errors @@ -281,7 +277,7 @@ def download_small_groups(self, smalls): log.error("There are no files to download") return [], 0 - log.debug("Saving grouping {0}/{1}".format(i + 1, groupings_len)) + log.debug(f"Saving grouping {i + 1}/{groupings_len}") pbar = get_percentage_pbar(1) @@ -313,15 +309,13 @@ def parallel_download(self, stream): # gdc-client calls parcel's parallel_download, # which is where most of the downloading takes place file_id = stream.url.split("/")[-1] - super(GDCHTTPDownloadClient, self).parallel_download(stream) + super().parallel_download(stream) if self.related_files: try: self.download_related_files(file_id) except Exception as e: - log.warning( - "Unable to download related files for {0}: {1}".format(file_id, e) - ) + log.warning(f"Unable to download related files for {file_id}: {e}") if self.debug: raise @@ -329,8 +323,6 @@ def parallel_download(self, stream): try: self.download_annotations(file_id) except Exception as e: - log.warning( - "Unable to download annotations for {0}: {1}".format(file_id, e) - ) + log.warning(f"Unable to download annotations for {file_id}: {e}") if self.debug: raise diff --git a/gdc_client/download/parser.py b/gdc_client/download/parser.py index e66ac156..18cfee59 100644 --- a/gdc_client/download/parser.py +++ b/gdc_client/download/parser.py @@ -71,12 +71,10 @@ def download(parser, args): for file_id, latest_id in ids_map.items(): if args.latest: - log.info("Latest version for {} ==> {}".format(file_id, latest_id)) + log.info(f"Latest version for {file_id} ==> {latest_id}") continue if latest_id is not None and file_id != latest_id: - log.warning( - 'The file "{}" was superseded by "{}"'.format(file_id, latest_id) - ) + log.warning(f'The file "{file_id}" was superseded by "{latest_id}"') ids = ids_map.values() if args.latest else ids_map.keys() @@ -135,7 +133,7 @@ def download(parser, args): if big_errors: log.debug( - "Big files not downloaded: {0}".format( + "Big files not downloaded: {}".format( ", ".join([b.split("/")[-1] for b in big_errors]) ) ) @@ -146,7 +144,7 @@ def download(parser, args): msg = "Successfully downloaded" log.info( - "{0}: {1}".format( + "{}: {}".format( colored(msg, "green") if not args.color_off else msg, successful_count ) ) @@ -154,7 +152,7 @@ def download(parser, args): if unsuccessful_count > 0: msg = "Failed downloads" log.info( - "{0}: {1}".format( + "{}: {}".format( colored(msg, "red") if not args.color_off else msg, unsuccessful_count ) ) @@ -164,31 +162,31 @@ def download(parser, args): def retry_download(client, url, retry_amount, no_auto_retry, wait_time): - log.debug("Retrying download {0}".format(url)) + log.debug(f"Retrying download {url}") error = True while 0 < retry_amount and error: if no_auto_retry: - should_retry = input("Retry download for {0}? (y/N): ".format(url)) + should_retry = input(f"Retry download for {url}? (y/N): ") else: should_retry = "y" if should_retry.lower() == "y": - log.debug("{0} retries remaining...".format(retry_amount)) - log.debug("Retrying download... {0} in {1} seconds".format(url, wait_time)) + log.debug(f"{retry_amount} retries remaining...") + log.debug(f"Retrying download... {url} in {wait_time} seconds") retry_amount -= 1 time.sleep(wait_time) # client.download_files accepts a list of urls to download # but we want to only try one at a time _, e = client.download_files([url]) if not e: - log.debug("Successfully downloaded {0}!".format(url)) + log.debug(f"Successfully downloaded {url}!") return else: error = False retry_amount = 0 - log.error("Unable to download file {0}".format(url)) + log.error(f"Unable to download file {url}") return url diff --git a/gdc_client/parcel/client.py b/gdc_client/parcel/client.py index 37a2d54b..ca4597bf 100644 --- a/gdc_client/parcel/client.py +++ b/gdc_client/parcel/client.py @@ -23,7 +23,7 @@ log = logging.getLogger("client") -class Client(object): +class Client: def __init__( self, url, token, n_procs, directory=None, verify=True, debug=False, **kwargs ): @@ -64,15 +64,15 @@ def fix_uri(url): """ if not (url.startswith("https://") or url.startswith("http://")): - url = "https://{0}".format(url) + url = f"https://{url}" return url @staticmethod def raise_for_write_permissions(directory): try: tempfile.NamedTemporaryFile(dir=directory).close() - except (OSError, IOError) as e: - raise IOError( + except OSError as e: + raise OSError( utils.STRIP( """Unable to write to download to directory '{directory}': {err}. This @@ -106,7 +106,7 @@ def stop_timer(self, file_size=None): rate_info = "" if file_size and file_size > 0: rate = (int(file_size) * 8 / 1e9) / (self.stop_time - self.start_time) - rate_info = ": {0:.2f} Gbps average".format(rate) + rate_info = f": {rate:.2f} Gbps average" log.debug("Download complete" + rate_info) @@ -127,7 +127,7 @@ def download_files(self, urls, *args, **kwargs): # Log file ids for url in urls: - log.debug("Given url: {0}".format(url)) + log.debug(f"Given url: {url}") # Download each file downloaded, errors = [], {} @@ -166,7 +166,7 @@ def download_files(self, urls, *args, **kwargs): # Print error messages for url, error in errors.items(): file_id = url.split("/")[-1] - log.error("{0}: {1}".format(file_id, error)) + log.error(f"{file_id}: {error}") return downloaded, errors @@ -222,7 +222,7 @@ def download_worker(): if self.debug: raise else: - log.error("Download aborted: {0}".format(str(e))) + log.error(f"Download aborted: {str(e)}") # worker needs to stay alive until final sentinel value # from master process is received continue @@ -258,11 +258,11 @@ def _standard_tcp_download(self, stream): else: raise Exception( - "[{0}] Unable to download url {1}".format(r.status_code, stream.url) + f"[{r.status_code}] Unable to download url {stream.url}" ) r.close() except Exception as e: log.error(e) - raise Exception("Unable to connect to {0}".format(stream.url)) + raise Exception(f"Unable to connect to {stream.url}") diff --git a/gdc_client/parcel/download_stream.py b/gdc_client/parcel/download_stream.py index 5157c97c..0c546474 100644 --- a/gdc_client/parcel/download_stream.py +++ b/gdc_client/parcel/download_stream.py @@ -18,7 +18,7 @@ from urllib.parse import urlparse -class DownloadStream(object): +class DownloadStream: http_chunk_size = const.HTTP_CHUNK_SIZE check_segment_md5sums = True @@ -69,7 +69,7 @@ def setup_file(self): self.log.warning( utils.STRIP( """Unable to set file length. File appears to - be a {0} file, attempting to proceed. + be a {} file, attempting to proceed. """.format( utils.get_file_type(self.path) ) @@ -97,7 +97,7 @@ def temp_path(self): :returns: A string specifying the full temp path """ - return os.path.join(self.directory, "{0}.partial".format(self.name)) + return os.path.join(self.directory, f"{self.name}.partial") @property def state_path(self): @@ -105,7 +105,7 @@ def state_path(self): :returns: A string specifying the download state path """ - return os.path.join(self.state_directory, "{0}.parcel".format(self.name)) + return os.path.join(self.state_directory, f"{self.name}.parcel") @property def state_directory(self): @@ -132,7 +132,7 @@ def header(self, start=None, end=None): "X-Auth-Token": self.token, } if start is not None and end is not None: - header["Range"] = "bytes={0}-{1}".format(start, end) + header["Range"] = f"bytes={start}-{end}" # provide host because it's mandatory, range request # may not work otherwise scheme, host, path, params, q, frag = urlparse(self.url) @@ -151,7 +151,7 @@ def request(self, headers=None, verify=True, close=False, max_retries=16): :returns: A `requests` response. """ - self.log.debug("Request to {0}".format(self.url)) + self.log.debug(f"Request to {self.url}") # Set urllib3 retries and mount for session a = requests.adapters.HTTPAdapter(max_retries=max_retries) @@ -170,14 +170,14 @@ def request(self, headers=None, verify=True, close=False, max_retries=16): except Exception as e: raise RuntimeError( ( - "Unable to connect to API: ({0}). Is this url correct: '{1}'? " + "Unable to connect to API: ({}). Is this url correct: '{}'? " "Is there a connection to the API? Is the server running?" ).format(str(e), self.url) ) try: r.raise_for_status() except Exception as e: - raise RuntimeError("{0}: {1}".format(str(e), r.text)) + raise RuntimeError(f"{str(e)}: {r.text}") if close: r.close() @@ -202,10 +202,10 @@ def get_information(self): self.check_file_md5sum = False else: self.size = int(content_length) - self.log.debug("{0} bytes".format(self.size)) + self.log.debug(f"{self.size} bytes") attachment = r.headers.get("content-disposition", None) - self.log.debug("Attachment: : {}".format(attachment)) + self.log.debug(f"Attachment: : {attachment}") # Some of the filenames are set to be equal to an S3 key, which can # contain '/' characters and it breaks saving the file @@ -246,7 +246,7 @@ def write_segment(self, segment, q_complete, retries=5): r = self.request(self.header(start, end)) # Iterate over the data stream - self.log.debug("Initializing segment: {0}-{1}".format(start, end)) + self.log.debug(f"Initializing segment: {start}-{end}") for chunk in r.iter_content(chunk_size=self.http_chunk_size): if not chunk: continue # Empty are keep-alives. @@ -273,7 +273,7 @@ def write_segment(self, segment, q_complete, retries=5): # TODO FIXME HACK create new segment to avoid duplicate downloads segment = Interval(segment.begin + written, segment.end, None) - self.log.debug("Unable to download part of file: {0}\n.".format(str(e))) + self.log.debug(f"Unable to download part of file: {str(e)}\n.") if retries > 0: self.log.debug("Retrying download of this segment") return self.write_segment(segment, q_complete, retries - 1) @@ -287,7 +287,7 @@ def write_segment(self, segment, q_complete, retries=5): segment = Interval(segment.begin + written, segment.end, None) self.log.debug( - "Segment corruption: {0}".format( + "Segment corruption: {}".format( "(non-fatal) retrying" if retries else "max retries exceeded" ) ) @@ -300,15 +300,15 @@ def write_segment(self, segment, q_complete, retries=5): return written def print_download_information(self): - self.log.debug("Starting download : {0}".format(self.url)) - self.log.debug("File name : {0}".format(self.name)) + self.log.debug(f"Starting download : {self.url}") + self.log.debug(f"File name : {self.name}") # some tarfiles will not come with Content-Length in the header if self.size: self.log.debug( - "Download size : {0} B ({1:.2f} GB)".format( + "Download size : {} B ({:.2f} GB)".format( self.size, (self.size / float(const.GB)) ) ) - self.log.debug("Downloading file to : {0}".format(self.path)) + self.log.debug(f"Downloading file to : {self.path}") diff --git a/gdc_client/parcel/http_client.py b/gdc_client/parcel/http_client.py index 42e65075..929e726e 100644 --- a/gdc_client/parcel/http_client.py +++ b/gdc_client/parcel/http_client.py @@ -11,4 +11,4 @@ class HTTPClient(Client): def __init__(self, *args, **kwargs): - super(HTTPClient, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) diff --git a/gdc_client/parcel/manifest.py b/gdc_client/parcel/manifest.py index bc557fc9..e59da52d 100644 --- a/gdc_client/parcel/manifest.py +++ b/gdc_client/parcel/manifest.py @@ -19,8 +19,7 @@ def parse(fd, delimiter="\t", quotechar="#", **kwargs): manifest = csv.DictReader(fd, delimiter=delimiter, quotechar=quotechar) - for row in manifest: - yield row + yield from manifest argparse_type = lambda x: parse(argparse.FileType("r")(x)) diff --git a/gdc_client/parcel/portability.py b/gdc_client/parcel/portability.py index 88f6701d..af80dba6 100644 --- a/gdc_client/parcel/portability.py +++ b/gdc_client/parcel/portability.py @@ -22,7 +22,7 @@ OS_LINUX = True # Are we running on windows? -if OS_WINDOWS: +if OS_WINDOWS or OS_OSX: from threading import Thread as Process else: # Assume a posix system diff --git a/gdc_client/parcel/segment.py b/gdc_client/parcel/segment.py index 7f9f48c7..c908a36e 100644 --- a/gdc_client/parcel/segment.py +++ b/gdc_client/parcel/segment.py @@ -44,12 +44,10 @@ log = logging.getLogger("segment") -class SegmentProducer(object): - +class SegmentProducer: save_interval = SAVE_INTERVAL def __init__(self, download, n_procs): - assert ( download.size is not None ), "Segment producer passed uninitizalied Download!" @@ -74,7 +72,7 @@ def _setup_work(self): work_size = self.integrate(self.work_pool) self.block_size = work_size // self.n_procs self.total_tasks = math.ceil(work_size / self.block_size) - log.debug("Total number of tasks: {0}".format(self.total_tasks)) + log.debug(f"Total number of tasks: {self.total_tasks}") def _setup_queues(self): if WINDOWS: @@ -94,13 +92,13 @@ def validate_segment_md5sums(self, path=None): corrupt_segments = 0 intervals = sorted(self.completed.items()) - log.debug("Checksumming {0}:".format(self.download.url)) + log.debug(f"Checksumming {self.download.url}:") pbar = get_percentage_pbar(len(intervals)) with mmap_open(path or self.download.path) as data: for interval in pbar(intervals): - log.debug("Checking segment md5: {0}".format(interval)) + log.debug(f"Checking segment md5: {interval}") if not interval.data or "md5sum" not in interval.data: log.error( STRIP( @@ -114,7 +112,7 @@ def validate_segment_md5sums(self, path=None): checksum = md5sum(chunk) if checksum != interval.data.get("md5sum"): log.debug( - "Redownloading corrupt segment {0}, {1}.".format( + "Redownloading corrupt segment {}, {}.".format( interval, checksum ) ) @@ -122,7 +120,7 @@ def validate_segment_md5sums(self, path=None): self.completed.remove(interval) if corrupt_segments: - log.warning("Redownloading {0} corrupt segments.".format(corrupt_segments)) + log.warning(f"Redownloading {corrupt_segments} corrupt segments.") def recover_intervals(self) -> bool: """Recreate list of completed intervals and calculate remaining work pool @@ -143,14 +141,14 @@ def recover_intervals(self) -> bool: # If the state file does not exist, treat as first time download if not state_file_exists: log.debug( - "State file {0} does not exist. Beginning new download...".format( + "State file {} does not exist. Beginning new download...".format( self.download.state_path ) ) return False log.debug( - "Found state file {0}, attempting to resume download".format( + "Found state file {}, attempting to resume download".format( self.download.state_path ) ) @@ -160,12 +158,12 @@ def recover_intervals(self) -> bool: self.completed = pickle.load(f) assert isinstance( self.completed, IntervalTree - ), "Bad save state: {0}".format(self.download.state_path) + ), f"Bad save state: {self.download.state_path}" except Exception as e: # An error has occured while loading state file. # Treat as entire file download and recreate temporary file log.error( - "Unable to resume file state: {0}, will restart entire download".format( + "Unable to resume file state: {}, will restart entire download".format( str(e) ) ) @@ -176,7 +174,7 @@ def recover_intervals(self) -> bool: # Recreate the temporary file and return if download_file_exists: log.debug( - "A file named {0} found, will attempt to validate file".format( + "A file named {} found, will attempt to validate file".format( self.download.path ) ) @@ -191,7 +189,7 @@ def recover_intervals(self) -> bool: validate_file_md5sum(self.download, self.download.path) except Exception as e: log.error( - "MD5 check of downloaded file failed due to following reason: {0}. Proceeding to restart entire download".format( + "MD5 check of downloaded file failed due to following reason: {}. Proceeding to restart entire download".format( str(e) ) ) @@ -205,14 +203,14 @@ def recover_intervals(self) -> bool: if not temporary_file_exists: log.debug( - "State file exists but no previous partial file {0} detected. Restarting entire download.".format( + "State file exists but no previous partial file {} detected. Restarting entire download.".format( self.download.temp_path ) ) return False log.debug( - "Partial file {0} detected. Validating already downloaded segments".format( + "Partial file {} detected. Validating already downloaded segments".format( self.download.temp_path ) ) @@ -223,7 +221,7 @@ def recover_intervals(self) -> bool: self.validate_segment_md5sums(self.download.temp_path) log.debug("Segments checksum validation complete") self.size_complete = self.integrate(self.completed) - log.debug("size complete: {0}".format(self.size_complete)) + log.debug(f"size complete: {self.size_complete}") # Remove already completed intervals from work_pool for interval in self.completed: self.work_pool.chop(interval.begin, interval.end) @@ -282,7 +280,7 @@ def save_state(self): # if no exception, then delete the old stash os.remove(old_path) except Exception as msg: - log.error("Unable to write state file: {0}".format(msg)) + log.error(f"Unable to write state file: {msg}") try: os.rename(old_path, self.download.state_path) except: @@ -294,17 +292,17 @@ def save_state(self): os.rename(temp.name, self.download.state_path) except KeyboardInterrupt: - log.warning("Keyboard interrupt. removing temp save file".format(temp.name)) + log.warning(f"Keyboard interrupt. removing temp save file") temp.close() os.remove(temp.name) except Exception as e: - log.error("Unable to save state: {0}".format(str(e))) + log.error(f"Unable to save state: {str(e)}") raise def schedule(self): while True: interval = self._get_next_interval() - log.debug("Returning interval: {0}".format(interval)) + log.debug(f"Returning interval: {interval}") if not interval: return self.q_work.put(interval) @@ -328,7 +326,7 @@ def print_progress(self): try: self.pbar.update(pbar_value) except Exception as e: - log.debug("Unable to update pbar: {}".format(str(e))) + log.debug(f"Unable to update pbar: {str(e)}") def check_file_exists_and_size(self, file_path): if self.download.is_regular_file: diff --git a/gdc_client/parcel/utils.py b/gdc_client/parcel/utils.py index c60ae03f..25b19118 100644 --- a/gdc_client/parcel/utils.py +++ b/gdc_client/parcel/utils.py @@ -34,7 +34,7 @@ try: requests.packages.urllib3.disable_warnings() except Exception as e: - log.debug("Unable to silence requests warnings: {0}".format(str(e))) + log.debug(f"Unable to silence requests warnings: {str(e)}") def check_transfer_size(actual, expected): @@ -65,7 +65,7 @@ def get_file_transfer_pbar( Returns: ProgressBar: progress bar instance """ - log.debug("{} {}:".format(desc, file_id)) + log.debug(f"{desc} {file_id}:") pbar = ProgressBar( widgets=[ @@ -103,15 +103,11 @@ def get_percentage_pbar(maxval: int): def print_opening_header(file_id): log.debug("") - log.debug( - "v{0}v".format("{s:{c}^{n}}".format(s=" {0} ".format(file_id), n=50, c="-")) - ) + log.debug("v{}v".format("{s:{c}^{n}}".format(s=f" {file_id} ", n=50, c="-"))) def print_closing_header(file_id): - log.debug( - "^{0}^".format("{s:{c}^{n}}".format(s=" {0} ".format(file_id), n=50, c="-")) - ) + log.debug("^{}^".format("{s:{c}^{n}}".format(s=f" {file_id} ", n=50, c="-"))) def write_offset(path, data, offset): @@ -140,12 +136,12 @@ def remove_partial_extension(path): try: if not path.endswith(".partial"): log.warning("No partial extension found") - log.warning("Got {0}".format(path)) + log.warning(f"Got {path}") return - log.debug("renaming to {0}".format(path.replace(".partial", ""))) + log.debug("renaming to {}".format(path.replace(".partial", ""))) os.rename(path, path.replace(".partial", "")) except Exception as e: - raise Exception("Unable to remove partial extension: {0}".format(str(e))) + raise Exception(f"Unable to remove partial extension: {str(e)}") def check_file_existence_and_size(path, size): @@ -172,7 +168,7 @@ def get_file_type(path): else: return "unknown" except Exception as e: - raise RuntimeError("Unable to get file type: {0}".format(str(e))) + raise RuntimeError(f"Unable to get file type: {str(e)}") def calculate_segments(start, stop, block): @@ -232,7 +228,7 @@ def mmap_open(path): mm = mmap.mmap(f.fileno(), 0) yield mm except Exception as e: - raise RuntimeError("Unable to get file type: {0}".format(str(e))) + raise RuntimeError(f"Unable to get file type: {str(e)}") def STRIP(comment): diff --git a/gdc_client/query/index.py b/gdc_client/query/index.py index 2045e3e3..c2726076 100644 --- a/gdc_client/query/index.py +++ b/gdc_client/query/index.py @@ -7,7 +7,7 @@ log = logging.getLogger("query") -class GDCIndexClient(object): +class GDCIndexClient: def __init__(self, uri, verify=True): self.uri = uri self.active_meta_endpoint = "/v0/files" @@ -245,12 +245,10 @@ def separate_small_files(self, ids, chunk_size): total_count = len(bigs) + sum([len(s) for s in smalls]) if len(potential_smalls) > total_count: log.warning("There are less files to download than originally given") - log.warning( - "Number of files originally given: {0}".format(len(potential_smalls)) - ) + log.warning(f"Number of files originally given: {len(potential_smalls)}") - log.debug("{0} total number of files to download".format(total_count)) - log.debug("{0} groupings of files".format(len(smalls))) + log.debug(f"{total_count} total number of files to download") + log.debug(f"{len(smalls)} groupings of files") smalls = [s for s in smalls if s != []] diff --git a/gdc_client/query/versions.py b/gdc_client/query/versions.py index 85f2df60..2f45e001 100644 --- a/gdc_client/query/versions.py +++ b/gdc_client/query/versions.py @@ -35,8 +35,8 @@ def get_latest_versions(url, uuids, verify=True): if not resp.ok: raise HTTPError( ( - "The following request {0} for ids {1} returned with " - "status code: {2} and response content: {3}" + "The following request {} for ids {} returned with " + "status code: {} and response content: {}" ).format( versions_url, chunk, diff --git a/gdc_client/settings/parser.py b/gdc_client/settings/parser.py index e60f80c2..45d2d68a 100644 --- a/gdc_client/settings/parser.py +++ b/gdc_client/settings/parser.py @@ -11,7 +11,7 @@ ) -class SettingsResolver(object): +class SettingsResolver: def __init__(self, config_file): self.config = GDCClientConfigShared(config_file) diff --git a/gdc_client/upload/client.py b/gdc_client/upload/client.py index 8e39ba28..e2d77e92 100644 --- a/gdc_client/upload/client.py +++ b/gdc_client/upload/client.py @@ -42,7 +42,7 @@ log = logging.getLogger("upload-client") -class Stream(object): +class Stream: def __init__(self, file, pbar, filesize: int): self._file = file self.pbar = pbar @@ -75,7 +75,7 @@ def upload_multipart( tries = MAX_RETRIES while tries > 0: try: - log.debug("Start upload part {}".format(part_number)) + log.debug(f"Start upload part {part_number}") with open(filename, "rb") as source: if OS_WINDOWS: chunk_file = mmap( @@ -92,29 +92,29 @@ def upload_multipart( prot=PROT_READ, ) - log.debug("Making http request for part {}".format(part_number)) + log.debug(f"Making http request for part {part_number}") res = requests.put( - url + "?uploadId={}&partNumber={}".format(upload_id, part_number), + url + f"?uploadId={upload_id}&partNumber={part_number}", headers=headers, data=chunk_file, verify=verify, ) - log.debug("Done making http request for part {}".format(part_number)) + log.debug(f"Done making http request for part {part_number}") chunk_file.close() if res.status_code == 200: - log.debug("Finish upload part {}".format(part_number)) + log.debug(f"Finish upload part {part_number}") return True time.sleep(get_sleep_time(tries)) tries -= 1 - log.debug("Retry upload part {}, {}".format(part_number, res.content)) + log.debug(f"Retry upload part {part_number}, {res.content}") except Exception as e: if debug: - log.exception("Part upload failed: {}. Retrying".format(e)) + log.exception(f"Part upload failed: {e}. Retrying") time.sleep(get_sleep_time(tries)) tries -= 1 @@ -141,7 +141,7 @@ def create_resume_path(file_path): return "resume_" + file_path -class GDCUploadClient(object): +class GDCUploadClient: def __init__( self, token, @@ -172,7 +172,7 @@ def __init__( int(max(upload_part_size, MIN_PARTSIZE) / PAGESIZE + 1) * PAGESIZE ) self._metadata = {} - self.resume_path = "resume_{}".format(self.manifest_name) + self.resume_path = f"resume_{self.manifest_name}" self.graphql_url = urlparse.urljoin(self.server, "v0/submission/graphql") def _get_node_metadata_via_graphql( @@ -208,7 +208,7 @@ def _get_node_type(self, node_id): nodes = result["data"]["node"] if not nodes: - raise Exception("File with id {} not found".format(node_id)) + raise Exception(f"File with id {node_id} not found") return nodes[0]["type"] @@ -235,7 +235,7 @@ def get_metadata(self, node_id, field): ) if r.status_code != 200: - raise Exception("Fail to get project_id, filename: {}".format(r.content)) + raise Exception(f"Fail to get project_id, filename: {r.content}") result = r.json() if "errors" in result: @@ -250,7 +250,7 @@ def get_metadata(self, node_id, field): self._metadata[node_id] = result["data"][file_type][0] return self._metadata[node_id][field] - raise Exception("File with id {} not found".format(node_id)) + raise Exception(f"File with id {node_id} not found") def get_files(self, action="download"): """Parse file information from manifest""" @@ -264,16 +264,14 @@ def get_files(self, action="download"): project_id = f.get("project_id") or self.get_metadata( file_id, "project_id" ) - program, project = [part.upper() for part in project_id.split("-", 1)] + program, project = (part.upper() for part in project_id.split("-", 1)) if not program or not project: - raise RuntimeError( - "Unable to parse project id {}".format(project_id) - ) + raise RuntimeError(f"Unable to parse project id {project_id}") file_entity.url = urlparse.urljoin( self.server, - "v0/submission/{}/{}/files/{}".format(program, project, file_id), + f"v0/submission/{program}/{project}/files/{file_id}", ) if action == "delete": @@ -336,7 +334,7 @@ def get_files(self, action="download"): self.file_entities.append(file_entity) except KeyError as e: - log.error("Please provide {} from manifest or as an argument".format(e)) + log.error(f"Please provide {e} from manifest or as an argument") return False # this makes things very hard to debug @@ -358,14 +356,14 @@ def upload(self): ) ) if use_resume.lower() not in ["n", "no"]: - with open(self.resume_path, "r") as f: + with open(self.resume_path) as f: self.files = manifest.load(f)["files"] self.get_files() for f in self.file_entities: self.load_file(f) - log.info("Attempting to upload to {}".format(self.url)) + log.info(f"Attempting to upload to {self.url}") if not self.multipart: self._upload() else: @@ -387,16 +385,14 @@ def abort(self): for f in self.file_entities: self.load_file(f) r = requests.delete( - self.url + "?uploadId={}".format(self.upload_id), + self.url + f"?uploadId={self.upload_id}", headers=self.headers, verify=self.verify, ) if r.status_code not in [204, 404]: - raise Exception( - "Fail to abort multipart upload: \n{}".format(r.content) - ) + raise Exception(f"Fail to abort multipart upload: \n{r.content}") else: - log.warning("Abort multipart upload {}".format(self.upload_id)) + log.warning(f"Abort multipart upload {self.upload_id}") def delete(self): """Delete file from object storage""" @@ -405,11 +401,9 @@ def delete(self): self.load_file(f) r = requests.delete(self.url, headers=self.headers, verify=self.verify) if r.status_code == 204: - log.info("Delete file {}".format(self.node_id)) + log.info(f"Delete file {self.node_id}") else: - log.warning( - "Fail to delete file {}: {}".format(self.node_id, r.content) - ) + log.warning(f"Fail to delete file {self.node_id}: {r.content}") def _upload(self): """Simple S3 PUT""" @@ -420,7 +414,7 @@ def _upload(self): self.url + "/_dry_run", headers=self.headers, verify=self.verify ) if r.status_code != 200: - log.error("Can't upload: {}".format(r.content)) + log.error(f"Can't upload: {r.content}") return pbar = get_file_transfer_pbar( @@ -434,16 +428,16 @@ def _upload(self): ) if r.status_code != 200: - log.error("Upload failed {}".format(r.content)) + log.error(f"Upload failed {r.content}") return pbar.finish() self.cleanup() - log.info("Upload finished for file {}".format(self.node_id)) + log.info(f"Upload finished for file {self.node_id}") except Exception as e: log.exception(e) - log.error("Upload failed {}".format(e)) + log.error(f"Upload failed {e}") def multipart_upload(self): """S3 Multipart upload""" @@ -455,9 +449,7 @@ def multipart_upload(self): self.check_multipart() if self.debug: - log.debug( - "Completed: {}/{}".format(self.completed, self.total_parts) - ) + log.debug(f"Completed: {self.completed}/{self.total_parts}") self.complete() @@ -481,13 +473,13 @@ def handle_multipart(self): {"files": list(self.incompleted)}, default_flow_style=False ) ) - log.info("Saved to {}".format(path)) + log.info(f"Saved to {path}") if self.debug: log.exception(e) raise - log.error("Failure: {}".format(e)) + log.error(f"Failure: {e}") def check_multipart(self): tries = MAX_RETRIES @@ -499,9 +491,7 @@ def check_multipart(self): tries -= 1 time.sleep(get_sleep_time(tries)) - raise Exception( - "Can't find multipart upload with upload id {}".format(self.upload_id) - ) + raise Exception(f"Can't find multipart upload with upload id {self.upload_id}") def initiate(self): if not self.upload_id: @@ -511,10 +501,10 @@ def initiate(self): if r.status_code == 200: xml = XMLResponse(r.content) self.upload_id = xml.get_key("UploadId") - log.info("Start multipart upload. UploadId: {}".format(self.upload_id)) + log.info(f"Start multipart upload. UploadId: {self.upload_id}") return True else: - log.error("Fail to initiate multipart upload: {}".format(r.content)) + log.error(f"Fail to initiate multipart upload: {r.content}") return False return True @@ -567,16 +557,16 @@ def upload_parts(self): so no exception should be re-raised here """ if future.result(): - log.debug("Part: {} is done".format(part_number)) + log.debug(f"Part: {part_number} is done") self.completed += 1 pbar.update(self.completed) else: - log.warning("Part: {} failed".format(part_number)) + log.warning(f"Part: {part_number} failed") pbar.finish() def list_parts(self): r = requests.get( - self.url + "?uploadId={}".format(self.upload_id), + self.url + f"?uploadId={self.upload_id}", headers=self.headers, verify=self.verify, ) @@ -597,7 +587,7 @@ def complete(self): ) ) - url = self.url + "?uploadId={}".format(self.upload_id) + url = self.url + f"?uploadId={self.upload_id}" tries = MAX_RETRIES while tries > 0: r = requests.post( @@ -611,16 +601,16 @@ def complete(self): time.sleep(get_sleep_time(tries)) else: - log.info("Multipart upload finished for file {}".format(self.node_id)) + log.info(f"Multipart upload finished for file {self.node_id}") return - raise Exception("Multipart upload complete failed: {}".format(r.content)) + raise Exception(f"Multipart upload complete failed: {r.content}") def cleanup(self): if os.path.isfile(self.resume_path): os.remove(self.resume_path) -class FileEntity(object): +class FileEntity: def __init__(self, **kwargs): self.__dict__.update(kwargs) @@ -633,7 +623,7 @@ def __init__(self, **kwargs): self.upload_id = None -class Multiparts(object): +class Multiparts: def __init__(self, xml_string): self.xml = XMLResponse(xml_string) self.parts = self.xml.parse("Part") @@ -655,19 +645,19 @@ def uploaded(self, part_number): return False -class XMLResponse(object): +class XMLResponse: def __init__(self, xml_string): self.root = etree.fromstring(xml_string) self.namespace = self.root.nsmap[None] def get_key(self, key): - element = self.root.find("{%s}%s" % (self.namespace, key)) + element = self.root.find("{{{}}}{}".format(self.namespace, key)) if element is not None: return element.text return None def parse(self, key): - elements = self.root.findall("{%s}%s" % (self.namespace, key)) + elements = self.root.findall("{{{}}}{}".format(self.namespace, key)) keys = [] for element in elements: keys.append({ele.tag.split("}")[-1]: ele.text for ele in element}) diff --git a/requirements.txt b/requirements.txt index 92e0db53..8f4a00ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile # @@ -16,6 +16,8 @@ cryptography==2.8 # pyopenssl idna==2.8 # via requests +importlib-metadata==7.0.1 + # via gdc_client (setup.py) intervaltree==3.0.2 # via gdc_client (setup.py) jsonschema==2.6.0 @@ -54,3 +56,5 @@ termcolor==1.1.0 # via gdc_client (setup.py) urllib3==1.25.11 # via requests +zipp==3.17.0 + # via importlib-metadata diff --git a/setup.py b/setup.py index 1454119f..4cd1ce33 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ "pyOpenSSL~=18.0.0", "PyYAML>=5.1", "intervaltree~=3.0.2", - "importlib_metadata; python_version<'3.8'", + "importlib_metadata", "termcolor~=1.1.0", "requests~=2.22.0", "progressbar2~=3.43.1", diff --git a/tests/conftest.py b/tests/conftest.py index 2c01e9d8..3d0bf553 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import hashlib +import platform from io import BytesIO from multiprocessing import Process import tarfile @@ -115,14 +116,18 @@ def get_big_content(n: int) -> str: } -@pytest.fixture(scope="class") -def setup_mock_server() -> None: +def run_mock_server(): # import mock_server here to avoid cyclic import import mock_server - server = Process(target=mock_server.app.run) + mock_server.app.run() + + +@pytest.fixture(scope="class") +def setup_mock_server() -> None: + server = Process(target=run_mock_server) server.start() - time.sleep(2) + time.sleep(5) # starting with py38, takes longer for process to start on macOS yield server.terminate() diff --git a/tests/mock_server.py b/tests/mock_server.py index f9fe8113..1d67d1da 100644 --- a/tests/mock_server.py +++ b/tests/mock_server.py @@ -42,7 +42,7 @@ def files_versions(): files = uuids.keys() for i in ids: if i not in files: - return jsonify({"message": "{0} not found in {1}".format(i, files)}), 404 + return jsonify({"message": f"{i} not found in {files}"}), 404 else: result.append({"id": i, "latest_id": i}) @@ -129,7 +129,7 @@ def files(): result["data"]["hits"].append(hit) except Exception as e: - print("Error {}".format(e)) + print(f"Error {e}") result["data"]["pagination"]["size"] = size return jsonify(result) @@ -162,7 +162,7 @@ def download(ids=""): for i in ids: if i not in uuids.keys(): return ( - jsonify({"message": "{0} not found in {1}".format(i, uuids.keys())}), + jsonify({"message": f"{i} not found in {uuids.keys()}"}), 404, ) @@ -198,7 +198,7 @@ def download(ids=""): md5sum = uuids[ids[0]]["md5sum"] resp = Response(data) - resp.headers["Content-Disposition"] = "attachment; filename={0}".format(filename) + resp.headers["Content-Disposition"] = f"attachment; filename={filename}" resp.headers["Content-Type"] = "application/octet-stream" resp.headers["Content-Length"] = len(data) if md5sum: @@ -222,7 +222,7 @@ def handle_range_request( data = uuids[ids[0]]["contents"][start:end] resp = Response(data) - resp.headers["Content-Disposition"] = "attachment; filename={0}".format(filename) + resp.headers["Content-Disposition"] = f"attachment; filename={filename}" resp.headers["Content-Type"] = "application/octet-stream" resp.headers["Content-Length"] = len(data) diff --git a/tests/test_parcel_utils.py b/tests/test_parcel_utils.py index d4002477..db0d0af5 100644 --- a/tests/test_parcel_utils.py +++ b/tests/test_parcel_utils.py @@ -76,7 +76,7 @@ def test__validate_file_md5sum_negative_validation_errors( "d47b127bc2de2d687ddc82dac354c415" # pragma: allowlist secret ) - with pytest.raises(exceptions.MD5ValidationError, match=r"{}".format(expected)): + with pytest.raises(exceptions.MD5ValidationError, match=rf"{expected}"): utils.validate_file_md5sum(stream, file_path) diff --git a/tests/test_query.py b/tests/test_query.py index 9bb67cd2..3519d734 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -137,11 +137,9 @@ def test_get_latest_versions_error( versions_response_error(url + "/files/versions") expected_err_msg = re.escape( - ( - "The following request {0} for ids " - "{1} returned with status code: 502 and response content: " - "b'502 Bad Gateway'".format(url + "/files/versions", ids) - ) + "The following request {} for ids " + "{} returned with status code: 502 and response content: " + "b'502 Bad Gateway'".format(url + "/files/versions", ids) ) with pytest.raises(HTTPError, match=expected_err_msg): diff --git a/tox.ini b/tox.ini index 24a722eb..ed66a342 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,8 @@ [tox] -envlist=py35,py36 +envlist=py38 [testenv] +allowlist_externals = pytest setenv= PYTHONHASHSEED=0 deps=