From d41ebe11ad4b4e76ab63eb75520e9ca5a31e4d53 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Jan 2024 12:18:51 -0500 Subject: [PATCH 1/9] windows fix (temporarily) --- .github/workflows/run-pytest-windows.yml | 32 ++++++++++++++++++++++++ .github/workflows/run-pytest.yml | 2 +- geofetch/geofetch.py | 22 ++++++++++------ geofetch/utils.py | 2 +- 4 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/run-pytest-windows.yml diff --git a/.github/workflows/run-pytest-windows.yml b/.github/workflows/run-pytest-windows.yml new file mode 100644 index 0000000..54b4c99 --- /dev/null +++ b/.github/workflows/run-pytest-windows.yml @@ -0,0 +1,32 @@ +name: Run pytests windows + +on: + push: + branches: [dev] + pull_request: + branches: [master, dev] + +jobs: + pytest: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: ["3.10"] + os: [windows-latest] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install test dependencies + run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi + + - name: Install package + run: python -m pip install . + + - name: Run pytest tests + run: pytest tests -x -vv diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml index 6184da9..489f0f6 100644 --- a/.github/workflows/run-pytest.yml +++ b/.github/workflows/run-pytest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.8", "3.12"] os: [ubuntu-latest] steps: diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 932fd74..0f904e3 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -372,10 +372,17 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje # check to make sure prefetch is callable if not self.just_metadata and not self.processed: if not is_command_callable("prefetch"): - raise SystemExit( - "To download raw data You must first install the sratoolkit, with prefetch in your PATH." - " Installation instruction: http://geofetch.databio.org/en/latest/install/" - ) + if os.name == "nt": + _LOGGER.warning( + "GEOfetch is not checking if prefetch is installed on Windows," + " please make sure it is installed and in your PATH, otherwise " + "it will not be possible to download raw data." + ) + else: + raise SystemExit( + "To download raw data You must first install the sratoolkit, with prefetch in your PATH." + " Installation instruction: http://geofetch.databio.org/en/latest/install/" + ) acc_GSE_list = parse_accessions( input, self.metadata_expanded, self.just_metadata @@ -1036,7 +1043,7 @@ def _write_processed_annotation( ) if not just_object: - with open(file_annotation_path, "w") as m_file: + with open(file_annotation_path, "w", encoding="utf-8") as m_file: dict_writer = csv.DictWriter(m_file, processed_metadata[0].keys()) dict_writer.writeheader() dict_writer.writerows(processed_metadata) @@ -1865,12 +1872,13 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): else: # open existing annotation _LOGGER.info("Found SRA metadata, opening..") - with open(file_sra, "r") as m_file: + with open(file_sra, "r", encoding="UTF-8") as m_file: reader = csv.reader(m_file) file_list = [] srp_list = [] for k in reader: - file_list.append(k) + if k: + file_list.append(k) for value_list in file_list[1:]: srp_list.append(dict(zip(file_list[0], value_list))) diff --git a/geofetch/utils.py b/geofetch/utils.py index 850a77e..70570da 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -275,7 +275,7 @@ def fetch_metadata( os.makedirs(dirpath) # save file: - with open(outpath, "w") as f: + with open(outpath, "w", encoding="utf-8") as f: f.write(result_text) return result_list From 256f5f5d0123e1f875b0311580f8480dcad4a261 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Jan 2024 12:21:18 -0500 Subject: [PATCH 2/9] updated test dependencies --- .github/workflows/run-pytest-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-pytest-windows.yml b/.github/workflows/run-pytest-windows.yml index 54b4c99..b4839f7 100644 --- a/.github/workflows/run-pytest-windows.yml +++ b/.github/workflows/run-pytest-windows.yml @@ -23,7 +23,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install test dependencies - run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi + run: pip install -r requirements/requirements-test.txt - name: Install package run: python -m pip install . From 0cbb0c2210b2cb182159ff0031fc170e777c3305 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Jan 2024 16:18:44 -0500 Subject: [PATCH 3/9] updated windows support --- .github/workflows/run-pytest-windows.yml | 32 ------------------------ docs/README.md | 4 +++ docs/changelog.md | 3 +++ geofetch/_version.py | 2 +- geofetch/geofetch.py | 25 +++++++++++------- 5 files changed, 24 insertions(+), 42 deletions(-) delete mode 100644 .github/workflows/run-pytest-windows.yml diff --git a/.github/workflows/run-pytest-windows.yml b/.github/workflows/run-pytest-windows.yml deleted file mode 100644 index b4839f7..0000000 --- a/.github/workflows/run-pytest-windows.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Run pytests windows - -on: - push: - branches: [dev] - pull_request: - branches: [master, dev] - -jobs: - pytest: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: ["3.10"] - os: [windows-latest] - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install test dependencies - run: pip install -r requirements/requirements-test.txt - - - name: Install package - run: python -m pip install . - - - name: Run pytest tests - run: pytest tests -x -vv diff --git a/docs/README.md b/docs/README.md index 1976829..3d00313 100644 --- a/docs/README.md +++ b/docs/README.md @@ -54,6 +54,10 @@ geofetch -i GSE95654 --just-metadata geofetch -i GSE95654 --processed --just-metadata ``` + +⁣**Note:** We ensure that GEOfetch is compatible with Unix, Linux, and Mac OS X. +However, due to dependencies, some features of GEOfetch may not be available on Windows. + ### Check out what exactly argument you want to use to download data: ![](./img/arguments_outputs.svg) diff --git a/docs/changelog.md b/docs/changelog.md index 2846978..62194c1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,8 @@ # Changelog +## [0.12.6] -- 2024-01-18 +- Updated support for Windows (Some of the functionality could not be available on Windows) + ## [0.12.5] -- 2023-11-29 - Fixed bug, where description was not populated in PEP diff --git a/geofetch/_version.py b/geofetch/_version.py index 8e377d6..8e2394f 100644 --- a/geofetch/_version.py +++ b/geofetch/_version.py @@ -1 +1 @@ -__version__ = "0.12.5" +__version__ = "0.12.6" diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 0f904e3..07f3ce4 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1796,15 +1796,22 @@ def _download_processed_file(self, file_url: str, data_folder: str) -> bool: return True except IOError as e: - _LOGGER.error(str(e)) - # The server times out if we are hitting it too frequently, - # so we should sleep a bit to reduce frequency - sleeptime = (ntry + 1) ** 3 - _LOGGER.info(f"Sleeping for {sleeptime} seconds") - time.sleep(sleeptime) - ntry += 1 - if ntry > 4: - raise e + if os.name == "nt": + _LOGGER.error(f"{e}") + raise OSError( + "Windows may not have wget command. " + "Check if `wget` command is installed correctly." + ) + else: + _LOGGER.error(str(e)) + # The server times out if we are hitting it too frequently, + # so we should sleep a bit to reduce frequency + sleeptime = (ntry + 1) ** 3 + _LOGGER.info(f"Sleeping for {sleeptime} seconds") + time.sleep(sleeptime) + ntry += 1 + if ntry > 4: + raise e def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): """ From dd2c2a72d4b5df354c781b26e31fbbbe3e0fba90 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Jan 2024 16:20:32 -0500 Subject: [PATCH 4/9] updated pypi publish script --- .github/workflows/python-publish.yml | 14 +++++--------- docs_jupyter/python-usage.ipynb | 12 +++++++++--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4e1ef42..b120129 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,6 +1,3 @@ -# This workflows will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - name: Upload Python Package on: @@ -9,9 +6,10 @@ on: jobs: deploy: - + name: upload release to PyPI runs-on: ubuntu-latest - + permissions: + id-token: write steps: - uses: actions/checkout@v2 - name: Set up Python @@ -23,9 +21,7 @@ jobs: python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel - twine upload dist/* + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/docs_jupyter/python-usage.ipynb b/docs_jupyter/python-usage.ipynb index 9b34736..39e6111 100644 --- a/docs_jupyter/python-usage.ipynb +++ b/docs_jupyter/python-usage.ipynb @@ -138,7 +138,13 @@ } ], "source": [ - "geof = Geofetcher(processed=True, data_source=\"all\", const_limit_project = 20, const_limit_discard = 500, attr_limit_truncate = 10000 )" + "geof = Geofetcher(\n", + " processed=True,\n", + " data_source=\"all\",\n", + " const_limit_project=20,\n", + " const_limit_discard=500,\n", + " attr_limit_truncate=10000,\n", + ")" ] }, { @@ -418,7 +424,7 @@ } ], "source": [ - "len(projects['GSE95654_samples'].samples)" + "len(projects[\"GSE95654_samples\"].samples)" ] }, { @@ -684,7 +690,7 @@ } ], "source": [ - "projects['GSE95654_samples'].sample_table.iloc[:15 , :5]" + "projects[\"GSE95654_samples\"].sample_table.iloc[:15, :5]" ] } ], From 62943bed777d8aa2deceac995b64473b9fac890f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Jan 2024 16:34:51 -0500 Subject: [PATCH 5/9] updated requirements --- requirements/requirements-all.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 854b024..8cc987b 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -1,10 +1,9 @@ -attmap>=0.1.8 colorama>=0.3.9 logmuse>=0.2.6 ubiquerg>=0.6.2 requests>=2.28.1 xmltodict>=0.13.0 pandas>=1.5.3 -peppy>=0.35.3 +peppy>=0.40.0 rich>=12.5.1 coloredlogs>=15.0.1 From 095b2bc5c1ae1739a8059853a172fee267c7b973 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 18 Jan 2024 16:38:59 -0500 Subject: [PATCH 6/9] deleted codecov --- .github/workflows/run-codecov.yml | 38 ------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 .github/workflows/run-codecov.yml diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml deleted file mode 100644 index 364eb68..0000000 --- a/.github/workflows/run-codecov.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Run codecov - -on: - push: - branches: [dev] - pull_request: - branches: [master] - -jobs: - pytest: - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.11] - os: [ubuntu-latest] - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install test dependencies - run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi - - - name: Install package - run: python -m pip install . - - - name: Run pytest tests - run: pytest tests --cov=./ --cov-report=xml - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 - with: - file: ./coverage.xml - name: py-${{ matrix.python-version }}-${{ matrix.os }} \ No newline at end of file From cf50094e37e5d6a701486764533c3f7b1531ccf6 Mon Sep 17 00:00:00 2001 From: Peter Hull Date: Fri, 2 Feb 2024 15:10:00 +0000 Subject: [PATCH 7/9] Use internal function to check for prefetch --- geofetch/geofetch.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 07f3ce4..f86ae23 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1,6 +1,7 @@ import copy import csv import os +import subprocess import sys import requests import xmltodict @@ -11,7 +12,7 @@ from rich.progress import track import re import logmuse -from ubiquerg import expandpath, is_command_callable +from ubiquerg import expandpath from typing import List, Union, Dict, Tuple, NoReturn import peppy import pandas as pd @@ -63,6 +64,20 @@ _LOGGER = logging.getLogger(__name__) +def is_prefetch_callable() -> bool: + """ + Test if the prefetch command can be run. + :return: True if it is available. + """ + try: + # Option -V means display version and then quit. + subprocess.run(["prefetch", "-V"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + return True + except (subprocess.SubprocessError, OSError): + return False class Geofetcher: """ @@ -351,7 +366,7 @@ def get_projects( new_pr_dict[pr_key] = project_dict[pr_key] return new_pr_dict - + def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Project]: """ Main function driver/workflow @@ -371,18 +386,11 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje # check to make sure prefetch is callable if not self.just_metadata and not self.processed: - if not is_command_callable("prefetch"): - if os.name == "nt": - _LOGGER.warning( - "GEOfetch is not checking if prefetch is installed on Windows," - " please make sure it is installed and in your PATH, otherwise " - "it will not be possible to download raw data." - ) - else: - raise SystemExit( - "To download raw data You must first install the sratoolkit, with prefetch in your PATH." - " Installation instruction: http://geofetch.databio.org/en/latest/install/" - ) + if not is_prefetch_callable(): + raise SystemExit( + "To download raw data, you must first install the sratoolkit, with prefetch in your PATH. " + "Installation instruction: http://geofetch.databio.org/en/latest/install/" + ) acc_GSE_list = parse_accessions( input, self.metadata_expanded, self.just_metadata From 82f3a1a1178036977dd652886a22bfbaf1b52f2f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 5 Feb 2024 17:04:35 +0100 Subject: [PATCH 8/9] minor polishing --- geofetch/__init__.py | 1 + geofetch/geofetch.py | 30 ++++++++---------------------- geofetch/utils.py | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/geofetch/__init__.py b/geofetch/__init__.py index 8e208d1..fcd9139 100644 --- a/geofetch/__init__.py +++ b/geofetch/__init__.py @@ -1,4 +1,5 @@ """ Package-level data """ + import logmuse import coloredlogs diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index f86ae23..bdbd6a1 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1,7 +1,6 @@ import copy import csv import os -import subprocess import sys import requests import xmltodict @@ -60,24 +59,11 @@ _filter_gsm, _unify_list_keys, gse_content_to_dict, + is_prefetch_callable, ) _LOGGER = logging.getLogger(__name__) -def is_prefetch_callable() -> bool: - """ - Test if the prefetch command can be run. - :return: True if it is available. - """ - try: - # Option -V means display version and then quit. - subprocess.run(["prefetch", "-V"], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - return True - except (subprocess.SubprocessError, OSError): - return False class Geofetcher: """ @@ -366,7 +352,7 @@ def get_projects( new_pr_dict[pr_key] = project_dict[pr_key] return new_pr_dict - + def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Project]: """ Main function driver/workflow @@ -561,9 +547,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje name=self.project_name, meta_processed_samples=processed_metadata_samples, meta_processed_series=processed_metadata_series, - gse_meta_dict=file_gse_content_dict - if len(acc_GSE_list.keys()) == 1 - else None, + gse_meta_dict=( + file_gse_content_dict if len(acc_GSE_list.keys()) == 1 else None + ), ) if self.just_object: return return_value @@ -574,9 +560,9 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje f"{self.project_name}_PEP", metadata_dict_combined, subannotation_dict_combined, - gse_meta_dict=file_gse_content_dict - if len(acc_GSE_list.keys()) == 1 - else None, + gse_meta_dict=( + file_gse_content_dict if len(acc_GSE_list.keys()) == 1 else None + ), ) if self.just_object: return return_value diff --git a/geofetch/utils.py b/geofetch/utils.py index 70570da..dcab44f 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -757,3 +757,22 @@ def gse_content_to_dict(gse_content: List[str]) -> Dict[str, dict]: gse_dict[new_key] = new_value return {"experiment_metadata": gse_dict} + + +def is_prefetch_callable() -> bool: + """ + Test if the prefetch command can be run. + + :return: True if it is available. + """ + try: + # Option -V means display version and then quit. + subprocess.run( + ["prefetch", "-V"], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return True + except (subprocess.SubprocessError, OSError): + return False From b88522ccabd93f1b5d5fa42819398d842ea1d8d2 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 5 Feb 2024 17:06:22 +0100 Subject: [PATCH 9/9] updated changelog --- docs/changelog.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 62194c1..e7af384 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,7 +1,7 @@ # Changelog -## [0.12.6] -- 2024-01-18 -- Updated support for Windows (Some of the functionality could not be available on Windows) +## [0.12.6] -- 2024-02-05 +- Updated support for Windows in Prefetch (Note: Some functionality may still be unavailable on Windows) ## [0.12.5] -- 2023-11-29 - Fixed bug, where description was not populated in PEP