Skip to content

Commit

Permalink
Merge pull request #37 from jjjk123/develop
Browse files Browse the repository at this point in the history
added function to filter transcripts in clusters
  • Loading branch information
cmatKhan authored Aug 31, 2023
2 parents c193533 + 13202d6 commit 9d8e2c8
Showing 1 changed file with 49 additions and 39 deletions.
88 changes: 49 additions & 39 deletions src/isocomp/Compare/compare_isoforms_in_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,43 +125,53 @@ def compare_isoforms_in_cluster(
# TODO parameterize the cases in which isoforms are compared --eg,
# same strand, overlap threshold, different subjects
else:
# this produces a cartesian product of sorts... looks something
# like this:
# vector_crosser(['tx_1','tx_2','tx_3'],['tx_1','tx_2','tx_3'])
# {'V1': ['tx_2', 'tx_2', 'tx_1'], 'V2': ['tx_1', 'tx_3', 'tx_3']}
# the V1 and V2 lists will be the same length, so if you iterate over
# the length of either list and compare the elements at the same index,

cross_isoforms = vector_crosser(
cluster_gtf.unique_id,
cluster_gtf.unique_id)

# iterate over the comparisons produced by vector_crosser() and
# conduct the sequence alignments
for i in range(len(cross_isoforms['V1'])):

# get the unique_id corresponding to two comparisons in the
# cross_isoforms dict
isoform1_id = cross_isoforms['V1'][i]
isoform2_id = cross_isoforms['V2'][i]

# create window ojects which describe the location of the isoforms
# according to the gtf
isoform1_window = isoform_library\
.get_isoform_coord(unique_id=isoform1_id)
isoform2_window = isoform_library\
.get_isoform_coord(unique_id=isoform2_id)

# compare the isoform sequences
aln = align_isoforms(
isoform_library.get_isoform_seq(unique_id=isoform1_id),
isoform_library.get_isoform_seq(unique_id=isoform2_id))

# append the compare_dict as an element to the list out
out.append(__output_dict(cluster,
cluster_window.chr,
isoform1_window,
isoform2_window,
aln))

# group transcripts by coordinates; return unique
cluster_gtf_grouped = cluster_gtf.df.groupby(by=['Start', 'End', 'Strand'], as_index=True)

for group, cluster_gtf_unique in cluster_gtf_grouped:
if len(cluster_gtf_unique) > 1:
# this produces a cartesian product of sorts... looks something
# like this:
# vector_crosser(['tx_1','tx_2','tx_3'],['tx_1','tx_2','tx_3'])
# {'V1': ['tx_2', 'tx_2', 'tx_1'], 'V2': ['tx_1', 'tx_3', 'tx_3']}
# the V1 and V2 lists will be the same length, so if you iterate over
# the length of either list and compare the elements at the same index,

cross_isoforms = vector_crosser(
cluster_gtf_unique.unique_id,
cluster_gtf_unique.unique_id)

# iterate over the comparisons produced by vector_crosser() and
# conduct the sequence alignments
for i in range(len(cross_isoforms['V1'])):

# get the unique_id corresponding to two comparisons in the
# cross_isoforms dict
isoform1_id = cross_isoforms['V1'][i]
isoform2_id = cross_isoforms['V2'][i]

# create window objects which describe the location of the isoforms
# according to the gtf
isoform1_window = isoform_library\
.get_isoform_coord(unique_id=isoform1_id)
isoform2_window = isoform_library\
.get_isoform_coord(unique_id=isoform2_id)

# compare the isoform sequences
aln = align_isoforms(
isoform_library.get_isoform_seq(unique_id=isoform1_id),
isoform_library.get_isoform_seq(unique_id=isoform2_id))

# append the compare_dict as an element to the list out
out.append(__output_dict(cluster,
cluster_window.chr,
isoform1_window,
isoform2_window,
aln))
else:
tx_id = cluster_gtf_unique['unique_id'].iloc[0]
isoform1_window = isoform_library.get_isoform_coord(unique_id=tx_id)
out.append(__output_dict(cluster,
cluster_window.chr,
isoform1_window))
return out

0 comments on commit 9d8e2c8

Please sign in to comment.