Skip to content

Commit

Permalink
feat: recall sequences from positions
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriellovate committed Sep 11, 2024
1 parent 6829cd1 commit e063fb5
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 22 deletions.
1 change: 1 addition & 0 deletions bin/annotate_interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
Usage:
annotate_interactions.py -t <trns_file>... -g <genome> -o <output_file> [-m <min_components> -M <max_components> --step_size <step_size> --sigma <sigma>]
annotate_interactions.py -d <array_dir> -g <genome> -o <output_file> [-m <min_components> -M <max_components> --step_size <step_size> --sigma <sigma>]
annotate_interactions.py --filter --only_partner01 <partner_segment01> --only_partner02 <partner_segment02> -a <annotation_table> -o <output_file>
Options:
-h --help Show this screen.
Expand Down
108 changes: 86 additions & 22 deletions bin/annotation_table_to_viennaRNA_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,72 @@ def annotation_to_sequence(annotation, genome_dict, extension=0):
Returns:
sequences (list): The sequences of the annotation.
"""
sequences = [
genome_dict[annotation["segment01"]][
annotation["start01"] + extension : annotation["end01"] + extension
],
genome_dict[annotation["segment02"]][
annotation["start02"] + extension : annotation["end02"] + extension
],
]
# add checks to make sure things are within range
sequences = []
extension_start01 = extension
extension_start02 = extension
extension_end01 = extension
extension_end02 = extension
if annotation["start01"] - extension <= 0:
extension_start01 = 0
if annotation["start02"] - extension <= 0:
extension_start02 = 0
if annotation["end01"] + extension >= len(genome_dict[annotation["segment01"]]):
extension_end01 = 0
if annotation["end02"] + extension >= len(genome_dict[annotation["segment02"]]):
extension_end02 = 0
if extension:
sequences = [
genome_dict[annotation["segment01"]][
annotation["start01"]
- extension_start01 : annotation["end01"]
+ extension_end01
],
genome_dict[annotation["segment02"]][
annotation["start02"]
- extension_start02 : annotation["end02"]
+ extension_end02
],
]
else:
sequences = [
genome_dict[annotation["segment01"]][
annotation["start01"] : annotation["end01"]
],
genome_dict[annotation["segment02"]][
annotation["start02"] : annotation["end02"]
],
]
return sequences


def create_extended_seqs(
annotation_table_df, genome_dict, extension_window=[5, 50], extension_step=5
):
"""Creates exteded sequences for structure prediction.
Args:
annotation_table_df ()
genome_dict ()
extension_window ()
extension_step ()
Returns:
dict ()
"""
sequences_extended = {}
# iterate over each line of the annotation_table_df
for i in range(
extension_window[0], extension_window[1] + extension_step, extension_step
):
sequences_extended[i] = {}
for index, row in annotation_table_df.iterrows():
# extract the sequences of the annotation from the genome file
interaction = annotation_to_sequence(row, genome_dict, extension=i)
sequences_extended[i][index] = interaction
return sequences_extended


def write_viennaRNA_input(sequences, output_file):
"""Writes the sequences to an input file for viennaRNA's RNAduplex
Expand All @@ -43,37 +98,46 @@ def write_viennaRNA_input(sequences, output_file):


def main():
parser = argparse.ArgumentParser(description="annotation table to fasta.")
parser = argparse.ArgumentParser(description="annotation table to viennaRNA input.")
parser.add_argument("-g", "--genome_file", help="The genome file.")
parser.add_argument("-a", "--annotation_table", help="The annotation table.")
parser.add_argument(
"-e",
"--extension_window",
default=[5, 50],
help="Interval to extend the annotation (inclusive).",
)
parser.add_argument(
"-s", "--extension_step", default=5, help="Step to extend the annotation"
)
parser.add_argument("-o", "--output_folder", help="The output folder.")
args = parser.parse_args()

annotation_table_df = da.parse_annotation_table(args.annotation_table)
genome_dict = hp.parse_fasta(args.genome_file)


sequences = {}
# iterate over each line of the annotation_table_df
for index, row in annotation_table_df.iterrows():
# extract the sequences of the annotation from the genome file
interaction = annotation_to_sequence(row, genome_dict)
sequences[index] = interaction


sequences_extended = {}
# iterate over each line of the annotation_table_df
for index, row in annotation_table_df.iterrows():
# extract the sequences of the annotation from the genome file
interaction = annotation_to_sequence(row, genome_dict, extension=25)
sequences_extended[index] = interaction

sequences_extended = create_extended_seqs(
annotation_table_df, genome_dict, extension_window=[5, 50], extension_step=5
)

# write the sequences to a fasta file
output_file = Path(args.output_folder) / Path(f"{Path(args.genome_file).stem}_annotations.fasta")
output_file_extended = Path(args.output_folder) / Path(f"{Path(args.genome_file).stem}_annotations_extended.fasta")
write_fasta(sequences, output_file)
write_fasta(sequences_extended, output_file_extended)
output_file = Path(args.output_folder) / Path(
f"{Path(args.genome_file).stem}_annotations.fasta"
)

write_viennaRNA_input(sequences, output_file)
for extension, sequences_extended in sequences_extended.items():
output_file_extended = Path(args.output_folder) / Path(
f"{Path(args.genome_file).stem}_annotations_extended_{extension}.fasta"
)
write_viennaRNA_input(sequences_extended, output_file_extended)


if __name__ == "__main__":
Expand Down
5 changes: 5 additions & 0 deletions bin/plot_heatmaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def plot_heatmap(
else:
suffix = f"{suffix}_unannotated"
save_plot(plots_folder, combination, suffix)

plt.close()


Expand Down Expand Up @@ -265,6 +266,10 @@ def save_plot(plots_folder: str, combination: tuple, suffix: str) -> None:
os.path.join(plots_folder, f"{combination[0]}_{combination[1]}{suffix}.pdf"),
format="pdf",
)
plt.savefig(
os.path.join(plots_folder, f"{combination[0]}_{combination[1]}{suffix}.svg"),
format="svg",
)



Expand Down

0 comments on commit e063fb5

Please sign in to comment.