Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: avoid duplicate mappings (#131)
Browse files Browse the repository at this point in the history
Co-authored-by: Boris Jurič <499542@mail.muni.cz>
Co-authored-by: Alex Kanitz <alexander.kanitz@alumni.ethz.ch>
3 people authored and balajtimate committed Nov 9, 2023
1 parent b67c251 commit dc8af23
Showing 14 changed files with 801 additions and 656 deletions.
35 changes: 12 additions & 23 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -12,21 +12,18 @@ jobs:
steps:

- name: check out repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: set up miniconda and env
uses: conda-incubator/setup-miniconda@v2
with:
auto-update-conda: true
python-version: "3.9"
mamba-version: "*"
channels: conda-forge,defaults
environment-file: environment.yml
auto-update-conda: true
activate-environment: htsinfer
environment-file: environment-dev.yml
auto-activate-base: false

- name: update env with dev packages
run: mamba env update --file environment-dev.yml

- name: display env info
run: |
conda info -a
@@ -50,7 +47,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ '3.7', '3.8', '3.9' ]
python-version: [ '3.8', '3.9', '3.10' ]

name: unit-testing-Python-${{ matrix.python-version }}

@@ -63,16 +60,12 @@ jobs:
uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.python-version }}
auto-update-conda: true
mamba-version: "*"
channels: conda-forge,defaults
environment-file: environment.yml
auto-update-conda: true
activate-environment: htsinfer
environment-file: environment-dev.yml
auto-activate-base: false

- name: update env with dev packages
run: mamba env update --file environment-dev.yml

- name: display env info
run: |
conda info -a
@@ -100,29 +93,25 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ '3.7', '3.8', '3.9' ]
python-version: [ '3.8', '3.9', '3.10' ]

name: integration-testing-Python-${{ matrix.python-version }}

steps:

- name: check out repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: set up miniconda and env
uses: conda-incubator/setup-miniconda@v2
with:
python-version: ${{ matrix.python-version }}
auto-update-conda: true
mamba-version: "*"
channels: conda-forge,defaults
environment-file: environment.yml
auto-update-conda: true
activate-environment: htsinfer
environment-file: environment-dev.yml
auto-activate-base: false

- name: update env with dev packages
run: mamba env update --file environment-dev.yml

- name: display env info
run: |
conda info -a
@@ -171,4 +160,4 @@ jobs:
run: |
echo "Push indicator: ${{ steps.docker.outputs.push-indicator }}"
echo "# Set to 'true' if image was pushed, empty string otherwise"
test "${{ steps.docker.outputs.push-indicator }}" == "true"
test "${{ steps.docker.outputs.push-indicator }}" == "true"
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -123,12 +123,14 @@ dependencies via [Conda][conda]:
git clone https://github.com/zavolanlab/htsinfer
cd htsinfer
conda env create --file environment.yml
conda env update --file environment-dev.yml # optional: install development/testing dependencies
# Alternatively, to install with development dependencies,
# run the following instead
conda env create --file environment-dev.yml
```

Note that creating the environment takes non-trivial time and it is strongly
recommended that you install [Mamba][mamba] and replace `conda` with `mamba`
in the previous commands.
> Note that creating the environment takes non-trivial time and it is strongly
> recommended that you install [Mamba][mamba] and replace `conda` with `mamba`
> in the previous command.
Then, activate the `htsinfer` Conda environment with:

25 changes: 19 additions & 6 deletions environment-dev.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
name: htsinfer
channels:
- defaults
- conda-forge
- bioconda
dependencies:
- coverage>=5.3
- flake8>=3.8.4
- mypy>=0.782
- pylint>=2.4.4
- pytest>=6.1.0
- biopython >=1.78
- coverage >=5.3
- cutadapt >=3.5, <=4.2
- flake8 >=3.8.4
- kallisto >=0.46.1, <= 0.48.0
- mypy >=0.782
- numpy >=1.22, <1.25
- pandas >=1.3.5, <1.4.0
- pip >=20.2.3
- pyahocorasick >=1.4.0
- pydantic >=1.8.1, <2
- pylint >=2.4.4
- pysam >=0.16.0
- pytest >=6.1.0
- python >=3.8, <=3.10
- star >=2.7.6
- pip:
- python-semantic-release>=7.15.0
- -e .
24 changes: 12 additions & 12 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
name: htsinfer
channels:
- defaults
- bioconda
- conda-forge
- bioconda
dependencies:
- biopython>=1.78
- kallisto>=0.46.1, <= 0.48.0
- pandas>=1.0.5
- pip>=20.2.3
- pyahocorasick>=1.4.0
- pydantic>=1.8.1, <2
- pysam>=0.16.0
- python>3.6, <3.10
- star>=2.7.6
- cutadapt>=3.5, <=4.2
- biopython >=1.78
- cutadapt >=3.5, <=4.2
- kallisto >=0.46.1, <= 0.48.0
- numpy >=1.22, <1.25
- pandas >=1.3.5, <1.4.0
- pip >=20.2.3
- pyahocorasick >=1.4.0
- pydantic >=1.8.1, <2
- pysam >=0.16.0
- python >=3.8, <=3.10
- star >=2.7.6
- pip:
- -e .
1 change: 1 addition & 0 deletions htsinfer/cli.py
Original file line number Diff line number Diff line change
@@ -50,6 +50,7 @@ def __call__(
values,
option_string=None,
) -> None:
assert isinstance(values, list)
if len(values) > 2:
parser.print_usage(file=sys.stderr)
sys.stderr.write(
31 changes: 17 additions & 14 deletions htsinfer/get_library_type.py
Original file line number Diff line number Diff line change
@@ -21,9 +21,7 @@
SeqIdFormats,
Config,
)
from htsinfer.get_read_orientation import (
GetOrientation,
)
from htsinfer.mapping import Mapping

LOGGER = logging.getLogger(__name__)

@@ -62,15 +60,15 @@ class GetLibType:
def __init__(
self,
config: Config,
mapping: Mapping,
):
"""Class constructor."""
self.path_1: Path = config.args.path_1_processed
self.path_2: Optional[Path] = config.args.path_2_processed
self.library_source = config.results.library_source
self.results: ResultsType = ResultsType()
self.tmp_dir = config.args.tmp_dir
self.get_read_orientation: \
GetOrientation = GetOrientation(config=config)
self.mapping = mapping
self.max_distance = config.args.lib_type_max_distance
self.cutoff = config.args.lib_type_mates_cutoff

@@ -126,23 +124,24 @@ def _evaluate_mate_relationship(
self.results.relationship = (
StatesTypeRelationship.split_mates
)
self.mapping.library_type.relationship = (
StatesTypeRelationship.split_mates
)
else:
self.get_read_orientation.library_type.relationship \
self.mapping.library_type.relationship \
= StatesTypeRelationship.not_available
self.get_read_orientation.library_source = self.library_source
_ = self.get_read_orientation.evaluate()
self.mapping.library_source = self.library_source
self.mapping.evaluate()
self._align_mates()

def _align_mates(self):
"""Decide mate relationship by alignment."""

alignment_1 = Path(self.tmp_dir) \
/ "alignments" / "file_1" / "Aligned.out.sam"
alignment_2 = Path(self.tmp_dir) \
/ "alignments" / "file_2" / "Aligned.out.sam"
alignment_1 = self.mapping.star_dirs[0] / 'Aligned.out.sam'
alignment_2 = self.mapping.star_dirs[1] / 'Aligned.out.sam'

samfile1 = pysam.AlignmentFile(alignment_1, 'r')
samfile2 = pysam.AlignmentFile(alignment_2, 'r')
samfile1 = pysam.AlignmentFile(str(alignment_1), 'r')
samfile2 = pysam.AlignmentFile(str(alignment_2), 'r')

previous_seq_id1 = None
previous_seq_id2 = None
@@ -184,6 +183,10 @@ def _align_mates(self):
self.results.relationship = (
StatesTypeRelationship.split_mates
)
self.mapping.library_type.relationship \
= StatesTypeRelationship.split_mates
self.mapping.mapped = False
self.mapping.star_dirs = []
else:
self.results.relationship = (
StatesTypeRelationship.not_mates
Loading

0 comments on commit dc8af23

Please sign in to comment.