Skip to content

Commit

Permalink
Fix for MEI
Browse files Browse the repository at this point in the history
  • Loading branch information
asylvz committed Sep 10, 2020
1 parent 7c2ba86 commit 1425e5d
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 29 deletions.
39 changes: 22 additions & 17 deletions variants.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,14 @@ char* reverseComplement( char* str)

void print_sv_stats()
{
fprintf(logFile,"\n\nTARDIS found %d SVs total\n", del_cnt + ins_cnt + inv_cnt + tandup_cnt + mei_cnt + interdup_cnt + invdup_cnt);
fprintf(logFile,"\n\nTARDIS found %d SVs total\n", del_cnt + ins_cnt + inv_cnt + tandup_cnt + mei_cnt - mei_cnt_filtered + interdup_cnt + invdup_cnt);
fprintf(logFile,"\tDeletion: %d\n", del_cnt);
fprintf(logFile,"\tInsertion: %d\n", ins_cnt);
fprintf(logFile,"\tInversion: %d\n", inv_cnt);
fprintf(logFile,"\tTandem Duplication: %d\n", tandup_cnt);
fprintf(logFile,"\tInterspersed Duplication: %d\n", interdup_cnt);
fprintf(logFile,"\tInterspersed (Inverted) Duplication: %d\n", invdup_cnt);
fprintf(logFile,"\tMEI: %d (%d filtered)\n", mei_cnt, mei_cnt_filtered);
fprintf(logFile,"\tMEI: %d (%d filtered)\n", mei_cnt - mei_cnt_filtered, mei_cnt_filtered);
fprintf(logFile,"\tNUMT: %d\n", numt_cnt);

fprintf(stderr,"\n\nTARDIS is complete. Found %d SVs total\n", del_cnt + ins_cnt + inv_cnt + tandup_cnt + mei_cnt + interdup_cnt + invdup_cnt);
Expand All @@ -156,7 +156,7 @@ void print_sv_stats()
fprintf(stderr,"\tTandem Duplication: %d\n", tandup_cnt);
fprintf(stderr,"\tInterspersed Duplication: %d\n", interdup_cnt);
fprintf(stderr,"\tInterspersed (Inverted) Duplication: %d\n", invdup_cnt);
fprintf(stderr,"\tMEI: %d (%d filtered)\n", mei_cnt, mei_cnt_filtered);
fprintf(stderr,"\tMEI: %d (%d filtered)\n", mei_cnt - mei_cnt_filtered, mei_cnt_filtered);
fprintf(stderr,"\tNUMT: %d\n", numt_cnt);
}

Expand Down Expand Up @@ -250,6 +250,9 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
if( seq != NULL)
free( seq);

if( sv->filtered != false)
mei_cnt_filtered++;

sv_len = abs( ( sv->inner_end - sv->inner_start + 1));
total_inv_length += sv_len;
}
Expand All @@ -258,17 +261,16 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
seq = readRefAltSeqMEI( params, sv->chr_name, sv->mei_name);

if( seq != NULL)
sv_len = strlen(seq);
sv_len = strlen(seq) + abs(sv->inner_start - sv->outer_start);
else
sv_len = abs(sv->inner_end - sv->inner_start) + abs(sv->inner_start - sv->outer_start);

if( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE)
{
if( seq != NULL)
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, seq[0], seq, 255, ( sv->filtered == false) ? "PASS" : "mfilt");
else
fprintf( fpOut, "%s\t%i\t%s%d\t%s\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, ".", ".", 255, ( sv->filtered == false) ? "PASS" : "mfilt");

if( sv->filtered)
mei_cnt_filtered++;
}
else if( seq != NULL)
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, seq[0], "<", "INS:ME:", sv->mei_name, ">", 255, ( sv->filtered == false) ? "PASS" : "mfilt");
Expand All @@ -278,26 +280,30 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
if( seq != NULL)
free( seq);

//sv_len = abs( ( sv->outer_end - sv->inner_end + 1));
if( sv->filtered != false)
mei_cnt_filtered++;

total_mei_length += sv_len;
}
else if( sv->svtype == MEIREVERSE)
{
seq = readRefAltSeqMEI( params, sv->chr_name, sv->mei_name);
if(seq != NULL)
sv_len = strlen(seq);
sv_len = strlen(seq) + abs(sv->inner_start - sv->outer_start);
else
sv_len = abs(sv->inner_end - sv->inner_start) + abs(sv->inner_start - sv->outer_start);

if( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE)
{
if( seq != NULL)
{
seq_rev = reverseComplement( seq);
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_end) + 1, "vh_mei_", ++mei_cnt, seq[0], seq_rev, 255, ( sv->filtered == false) ? "PASS" : "mfilt");
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, seq[0], seq_rev, 255, ( sv->filtered == false) ? "PASS" : "mfilt");
}
else
{
seq_rev = NULL;
fprintf( fpOut, "%s\t%i\t%s%d\t%s\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_end) + 1, "vh_mei_", ++mei_cnt, ".", ".", 255, ( sv->filtered == false) ? "PASS" : "mfilt");
fprintf( fpOut, "%s\t%i\t%s%d\t%s\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, ".", ".", 255, ( sv->filtered == false) ? "PASS" : "mfilt");
}
if( sv->filtered)
mei_cnt_filtered++;
Expand All @@ -306,14 +312,13 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
free( seq_rev);
}
else if( seq != NULL)
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_end) + 1, "vh_mei_", ++mei_cnt, seq[0], "<", "INS:ME:", sv->mei_name, ">", 255, ( sv->filtered == false) ? "PASS" : "mfilt");
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, seq[0], "<", "INS:ME:", sv->mei_name, ">", 255, ( sv->filtered == false) ? "PASS" : "mfilt");
else
fprintf( fpOut, "%s\t%i\t%s%d\t%s\t%s%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_end) + 1, "vh_mei_", ++mei_cnt, ".", "<", "INS:ME:", sv->mei_name, ">", 255, ( sv->filtered == false) ? "PASS" : "mfilt");
fprintf( fpOut, "%s\t%i\t%s%d\t%s\t%s%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, ".", "<", "INS:ME:", sv->mei_name, ">", 255, ( sv->filtered == false) ? "PASS" : "mfilt");

if( seq != NULL)
free( seq);

//sv_len = abs( ( sv->outer_end - sv->inner_end + 1));
total_mei_length += sv_len;
}
else if( sv->svtype == NUMTFORWARD || sv->svtype == NUMTREVERSE)
Expand Down Expand Up @@ -445,8 +450,8 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI

if( sv->svtype == MEIFORWARD)
fprintf( fpOut, "END=%d;SVLEN=%d;MEINFO=%s;RPSUP=%d;SRSUP=%d;", (sv->outer_start) + sv_len + 1, sv_len, sv->mei_name, rp_total, sr_total);
if( sv->svtype == MEIREVERSE)
fprintf( fpOut, "END=%d;SVLEN=%d;MEINFO=%s;RPSUP=%d;SRSUP=%d;", (sv->outer_end) + sv_len + 1, sv_len, sv->mei_name, rp_total, sr_total);
else if( sv->svtype == MEIREVERSE)
fprintf( fpOut, "END=%d;SVLEN=%d;MEINFO=%s;RPSUP=%d;SRSUP=%d;", (sv->outer_start) + sv_len + 1, sv_len, sv->mei_name, rp_total, sr_total);
else if( sv->svtype == INVERSION)
fprintf( fpOut, "END=%d;SVLEN=%d;RPSUP=%d;SRSUP=%d;", (sv->outer_end) + 1, sv_len, rp_total, sr_total);
else if( sv->svtype == INVDUPLEFT || sv->svtype == INTERDUPLEFT)
Expand Down Expand Up @@ -478,7 +483,7 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
fprintf( fpOut, "SVTYPE=DEL:ME:%s\t", sv->mei_type);
else if( sv->svtype == DELETION)
fprintf( fpOut, "SVTYPE=DEL\t");
else if( sv->svtype == MEIFORWARD || sv->svtype == MEIREVERSE)
else if(sv->svtype == MEIREVERSE || sv->svtype == MEIFORWARD)
fprintf( fpOut, "SVTYPE=INS:ME:%s\t", sv->mei_type);
else if( sv->svtype == NUMTFORWARD || sv->svtype == NUMTREVERSE)
fprintf( fpOut, "SVTYPE=INS:MT\t");
Expand Down
1 change: 1 addition & 0 deletions variants.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,5 @@ void free_chr(struct strvar ** variations, char* chr_);
void print_vcf_header( FILE *fpOut, bam_info** in_bams, parameters *params);
int print_chr(struct strvar ** variations, parameters *params, char* chr_, FILE *fpOut);
void print_sv_stats();
char* readRefAltSeqMEI( parameters *params, char* chr_name, char *mei_string);
#endif
50 changes: 38 additions & 12 deletions vh/vh_setcover.c
Original file line number Diff line number Diff line change
Expand Up @@ -716,9 +716,8 @@ void outputCluster( bam_info** in_bams, parameters* params, int cluster_id, FILE
{
int i;
char mobileName[strSize];

float is_start_satellite, is_end_satellite, is_mid_satellite;
sonic_repeat *mei_start, *mei_end, *mei_mid;
float is_start_satellite, is_end_satellite, is_mid_satellite, is_all_satellite;
sonic_repeat *mei_start, *mei_end, *mei_mid, *mei_all;
bool MEI_Filter = false;
struct strvar* var_example = NULL;

Expand All @@ -731,19 +730,32 @@ void outputCluster( bam_info** in_bams, parameters* params, int cluster_id, FILE
in_bams[i]->contribution = true;
}

is_start_satellite = sonic_is_satellite(params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posStartSV_Outer, listClusterEl[cluster_id].posStartSV_Outer + 1);
is_end_satellite = sonic_is_satellite(params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posEndSV_Outer - 1, listClusterEl[cluster_id].posEndSV_Outer);
is_mid_satellite = sonic_is_satellite(params->this_sonic, listClusterEl[cluster_id].chromosome_name, (int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2,
(int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2 + 1);
if( listClusterEl[cluster_id].SVtype == MEIFORWARD || listClusterEl[cluster_id].SVtype == MEIREVERSE)
{

is_all_satellite = sonic_is_satellite(params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posStartSV_Outer, listClusterEl[cluster_id].posEndSV_Outer);
is_start_satellite = sonic_is_satellite(params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posStartSV_Outer, listClusterEl[cluster_id].posStartSV_Outer + 1);
is_end_satellite = sonic_is_satellite(params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posEndSV_Outer - 1, listClusterEl[cluster_id].posEndSV_Outer);
is_mid_satellite = sonic_is_satellite(params->this_sonic, listClusterEl[cluster_id].chromosome_name, (int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2,
(int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2 + 1);

mei_start = sonic_is_mobile_element( params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posStartSV_Outer, listClusterEl[cluster_id].posStartSV_Outer + 1, params->mei);
mei_end = sonic_is_mobile_element( params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posEndSV_Outer - 1, listClusterEl[cluster_id].posEndSV_Outer, params->mei);
mei_mid = sonic_is_mobile_element( params->this_sonic, listClusterEl[cluster_id].chromosome_name, (int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2,
(int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2 + 1, params->mei);
mei_start = sonic_is_mobile_element( params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posStartSV_Outer, listClusterEl[cluster_id].posStartSV_Outer + 1, params->mei);
mei_end = sonic_is_mobile_element( params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posEndSV_Outer - 1, listClusterEl[cluster_id].posEndSV_Outer, params->mei);
mei_mid = sonic_is_mobile_element( params->this_sonic, listClusterEl[cluster_id].chromosome_name, (int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2,
(int)( listClusterEl[cluster_id].posStartSV_Outer + listClusterEl[cluster_id].posEndSV_Outer) / 2 + 1, params->mei);

mei_all = sonic_is_mobile_element( params->this_sonic, listClusterEl[cluster_id].chromosome_name, listClusterEl[cluster_id].posStartSV_Outer - 1, listClusterEl[cluster_id].posEndSV_Outer, params->mei);
}
if( listClusterEl[cluster_id].SVtype == MEIFORWARD)
{

strcpy( mobileName, listClusterEl[cluster_id].mobileName);
//if(mei_all != NULL)
//fprintf(stderr, "FORWARD: %s, %s - %s, %s, %d\n", mobileName, listClusterEl[cluster_id].mei_type, mei_all->repeat_type, mei_all->repeat_class, mei_all->strand);

if( ( mei_all != NULL && strcmp( mei_all->repeat_class, listClusterEl[cluster_id].mei_type) == 0 &&
mei_all->strand == SONIC_STRAND_FWD) || ( is_all_satellite != 0))
MEI_Filter = true;

if( ( mei_start != NULL && strcmp( mei_start->repeat_type, mobileName) == 0 &&
mei_start->strand == SONIC_STRAND_FWD) || ( is_start_satellite != 0))
Expand All @@ -760,7 +772,8 @@ void outputCluster( bam_info** in_bams, parameters* params, int cluster_id, FILE
else if( listClusterEl[cluster_id].SVtype == MEIREVERSE)
{
strcpy( mobileName, listClusterEl[cluster_id].mobileName);

//if(mei_all != NULL)
//fprintf(stderr, "REVERSE: %s, %s - %s, %s, %d\n", mobileName, listClusterEl[cluster_id].mei_type, mei_all->repeat_type, mei_all->repeat_class, mei_all->strand);
if( ( mei_start != NULL && strcmp( mei_start->repeat_type, mobileName) == 0 &&
mei_start->strand == SONIC_STRAND_REV) || ( is_start_satellite != 0))
MEI_Filter = true;
Expand All @@ -772,6 +785,10 @@ void outputCluster( bam_info** in_bams, parameters* params, int cluster_id, FILE
if( ( mei_mid != NULL && strcmp( mei_mid->repeat_type, mobileName) == 0 &&
mei_mid->strand == SONIC_STRAND_REV) || ( is_mid_satellite != 0))
MEI_Filter = true;

if( ( mei_all != NULL && strcmp( mei_all->repeat_class, listClusterEl[cluster_id].mei_type) == 0 &&
mei_all->strand == SONIC_STRAND_REV) || ( is_all_satellite != 0))
MEI_Filter = true;
}

if( listClusterEl[cluster_id].SVtype == MEIFORWARD || listClusterEl[cluster_id].SVtype == NUMTFORWARD)
Expand Down Expand Up @@ -1299,6 +1316,15 @@ void processTheSV( bam_info **in_bams, parameters *params, int listClusterId, in
tmp_cluster = tmp_cluster->next;
}
}
/*if(listClusterEl[listClusterId].SVtype == MEIREVERSE)
{
fprintf(stderr,"REVERSE: %d - %d - %d - %d\n", posStartSV_Outer, posStartSV, posEndSV, posEndSV_Outer);
}
else if(listClusterEl[listClusterId].SVtype == MEIFORWARD)
{
fprintf(stderr,"FORWARD: %d - %d - %d - %d\n", posStartSV_Outer, posStartSV, posEndSV, posEndSV_Outer);
}*/

listClusterEl[listClusterId].posStartSV = posStartSV;
listClusterEl[listClusterId].posStartSV_Outer = posStartSV_Outer;
listClusterEl[listClusterId].posEndSV = posEndSV;
Expand Down

0 comments on commit 1425e5d

Please sign in to comment.