Skip to content

Commit

Permalink
chore: review release
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriellovate committed Mar 13, 2023
1 parent 8810a38 commit 0abd4ab
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 17 deletions.
9 changes: 7 additions & 2 deletions bin/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def parse_fasta(fasta_file):
fasta_dict[header] = seq
return fasta_dict

def make_combination_array(genome_dict, intra_combinations=False):
def make_combination_array(genome_dict, intra_only=False):
"""
Creates a dictionary of numpy array of all possible genome segment combinations.
Use helper.parse_genome() to create genome_dict.
Expand All @@ -71,8 +71,13 @@ def make_combination_array(genome_dict, intra_combinations=False):
# * while I usually appreciate the usage of list comprehensions, you can directly transform
# * the iterator to a list. Actually, we also could just put the iterator in the for loop.
# * should work as well. Is a tad more memory efficient.
if intra_combinations:
if intra_only:
segment_combinations = list(itertools.combinations_with_replacement(segments, 2))
segment_combinations = [
segment_combination
for segment_combination in segment_combinations
if segment_combination[0] == segment_combination[1]
]
else:
segment_combinations = list(itertools.combinations_with_replacement(segments, 2))
segment_combinations = [
Expand Down
17 changes: 11 additions & 6 deletions bin/plot_heatmaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"""plot_heatmaps.py
Usage:
plot_heatmaps.py <trns_file> <trns_file>... -g <genome> [-a <annotation_table>] -o <output_folder>
plot_heatmaps.py <trns_file> -g <genome> [-a <annotation_table>] -o <output_folder>
plot_heatmaps.py <trns_file> <trns_file>... -g <genome> [-a <annotation_table> --intra_only] -o <output_folder>
plot_heatmaps.py <trns_file> -g <genome> [-a <annotation_table> --intra_only] -o <output_folder>
Options:
Expand All @@ -13,6 +13,7 @@
-g --genome=<genome> The genome filepath.
-o --output=<output_folder> The output folder.
-a --annotation_table=<annotation_table> The annotation table filepath.
--intra_only Only plot intra-segment interactions.
"""

Expand Down Expand Up @@ -223,6 +224,10 @@ def main():
trns_files = args["<trns_file>"]
genome_file = args["--genome"]
output_folder = args["--output"]
# Check if --intra_only is given
intra_only = False
if args["--intra_only"]:
intra_only = True
# check if --annotation_table is given
if args["--annotation_table"]:
annotation_table = args["--annotation_table"]
Expand All @@ -243,8 +248,8 @@ def main():
trns_file_name = trns_file_name.split(".")[0]

# Create and fill combination arrays
combination_arrays[trns_file_name] = hp.make_combination_array(genome_dict)
th.segemehlTrans2heatmap(trns_file, combination_arrays[trns_file_name])
combination_arrays[trns_file_name] = hp.make_combination_array(genome_dict, intra_only=intra_only)
th.segemehlTrans2heatmap(trns_file, combination_arrays[trns_file_name], intra_only=intra_only)
merged_combination_arrays = combination_arrays

elif isinstance(trns_files, list):
Expand All @@ -254,8 +259,8 @@ def main():
trns_file_name = trns_file_name.split(".")[0]

# Create and fill combination arrays
combination_arrays[trns_file_name] = hp.make_combination_array(genome_dict)
th.segemehlTrans2heatmap(trns_file, combination_arrays[trns_file_name])
combination_arrays[trns_file_name] = hp.make_combination_array(genome_dict, intra_only=intra_only )
th.segemehlTrans2heatmap(trns_file, combination_arrays[trns_file_name], intra_only=intra_only)

# Merge combination arrays
merged_combination_arrays = ah.combine_arrays(
Expand Down
13 changes: 9 additions & 4 deletions bin/trns_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __extract_start_stop_segemehl(read):
return [seg, start, stop]


def segemehlTrans2heatmap(trnsFile, interaction_arrays, intra_combinations=False):
def segemehlTrans2heatmap(trnsFile, interaction_arrays, intra_only=False):
"""Parses the trns file and fills the interaction_arrays
Parameters
Expand All @@ -91,14 +91,15 @@ def segemehlTrans2heatmap(trnsFile, interaction_arrays, intra_combinations=False
firstRead
) + __extract_start_stop_segemehl(secondRead)
interaction = __check_interaction(currentRow, interaction_arrays)
if intra_combinations:
fill_heatmap(interaction, interaction_arrays)
if intra_only:
if interaction[0] == interaction[3]:
fill_heatmap(interaction, interaction_arrays, intra=True)
else:
if interaction[0] != interaction[3]:
fill_heatmap(interaction, interaction_arrays)


def fill_heatmap(interaction, interaction_arrays):
def fill_heatmap(interaction, interaction_arrays, intra = False):
"""Fills the interaction_arrays with the interaction
Parameters
Expand All @@ -115,6 +116,10 @@ def fill_heatmap(interaction, interaction_arrays):
interaction_arrays[(firstSegment, secondSegment)][
interaction[1] : interaction[2], interaction[4] : interaction[5]
] += 1
if intra:
interaction_arrays[(secondSegment, firstSegment)][
interaction[4] : interaction[5], interaction[1] : interaction[2]
] += 1
return 1


Expand Down
4 changes: 2 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ workflow segemehl_mapping {
segemehlPublish( segemehl.out )
// Converts segemehl's SAM output to BAM file
convertSAMtoBAM(
segemehl.out.map{ it -> [ it[0], it[2], 'segemehl' ] }.view()
segemehl.out.map{ it -> [ it[0], it[4], 'segemehl' ] }
)
// Runs samtools flagstats on the BAM file
getStats( segemehl.out.map{ it -> [ it[0], it[2] ] } )
getStats( segemehl.out.map{ it -> [ it[0], it[4] ] } )
emit:
segemehl.out
convertSAMtoBAM.out
Expand Down
6 changes: 3 additions & 3 deletions modules/map_reads.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ process segemehl {
tuple val(sample_name), path(genome), path(index), path(reads)

output:
tuple val(sample_name), path("${reads.baseName}.trns.txt"), path("${reads.baseName}*_segemehl.sam"), path(genome), val(genome.baseName)
tuple val(sample_name), path("${reads.baseName}.trns.txt"), path("${reads.baseName}.sngl.bed"), path("${reads.baseName}.mult.bed"),path("${reads.baseName}*_segemehl.sam"), path(genome), val(genome.baseName)

script:
"""
Expand All @@ -46,10 +46,10 @@ process segemehlPublish {
label 'mapping_segemehl'

input:
tuple val(name), path(trns_file), path(sam_file), path(genome), val(genome_name)
tuple val(name), path(trns_file), path(sngl_file), path(mult_file), path(sam_file), path(genome), val(genome_name)

output:
tuple val(name), path(trns_file)
tuple val(name), path(trns_file), path(sngl_file), path(mult_file), path(sam_file)

publishDir "${params.output}/02-mappings/segemehl", mode: 'copy'

Expand Down

0 comments on commit 0abd4ab

Please sign in to comment.