Skip to content

Commit

Permalink
Precise calls are sequence resolved by default and some bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
asylvz authored and calkan committed May 10, 2019
1 parent 6c6e28e commit e64cf62
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 30 deletions.
34 changes: 29 additions & 5 deletions processbam.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ void load_bam( parameters *params, configuration *cfg, bam_info* in_bam, char* p
int* fragment_size_total;
float* variance;
char* library_name = NULL;
int i, j, library_index, diff, return_value;
int i, j, k, library_index, diff, return_value, temp;
char fai_file[MAX_SEQ];
double count;

fprintf( stderr, "Processing BAM file %s.\n", path);

Expand Down Expand Up @@ -100,10 +101,10 @@ void load_bam( parameters *params, configuration *cfg, bam_info* in_bam, char* p
/* For SAMPLEFRAG number of alignments, store the template length field.
Performed for each different library */
fragment_size = ( int**) getMem( in_bam->num_libraries * sizeof( int*));

for( i = 0; i < in_bam->num_libraries; i++)
{
fragment_size[i] = NULL;
fragment_size[i] = NULL;
fragment_size[i] = ( int*) getMem( SAMPLEFRAG * sizeof( int));
}

Expand Down Expand Up @@ -163,8 +164,23 @@ void load_bam( parameters *params, configuration *cfg, bam_info* in_bam, char* p
/* Sort the fragment sizes */
qsort( fragment_size[i], SAMPLEFRAG, sizeof( int), compare_size_int);

/* When the library does not have sufficient number of reads, we discard the leading 0s when calculating the median */
count = 0;
if( fragments_sampled[i] < SAMPLEFRAG)
{
for( k = 0; k < SAMPLEFRAG; k++)
{
if( fragment_size[i][k] == 0)
count++;
else
break;
}
}
/* Get the medians */
( in_bam->libraries)[i]->frag_med = fragment_size[i][( SAMPLEFRAG / 2) - 1];
temp = ( fragments_sampled[i] / 2) - 1 + (int) count;
( in_bam->libraries)[i]->frag_med = fragment_size[i][temp];

//fprintf(stderr,"There are %d 0s, %d fragments, index %d and median is %d\n", (int) count, fragments_sampled[i], temp, ( in_bam->libraries)[i]->frag_med);
}

/* Find the fragment sizes which pass the second test, and will contribute to the avg and std */
Expand Down Expand Up @@ -195,7 +211,6 @@ void load_bam( parameters *params, configuration *cfg, bam_info* in_bam, char* p
second_test_pass[i] = second_test_pass[i] + 1;
}
}

}

for( i = 0; i < in_bam->num_libraries; i++)
Expand Down Expand Up @@ -404,6 +419,15 @@ void create_fastq( bam_info* in_bam, char* bam_path, parameters* params)
}
}

void is_library_valid( struct library_properties* in_lib)
{
if( in_lib->frag_avg >=0 && in_lib->frag_std >=0 && in_lib->read_length >= 0 && in_lib->conc_min>=0 && in_lib->conc_max >=0)
in_lib->is_valid = true;
else
in_lib->is_valid = false;
}


void set_library_min_max( struct library_properties* in_lib)
{
in_lib->conc_min = in_lib->frag_avg - ( 4 * in_lib->frag_std);
Expand Down
1 change: 1 addition & 0 deletions processfq.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ typedef struct library_properties
char *divet; /* file name for the DIVET file post mrfast mapping */
int read_length; /* length of reads for this library */
int num_sequences; /* number of paired-end sequences for this library */
bool is_valid; /* Checks if the library is valid, i.e., mean, conc_min, conc_max, etc. */

struct softClip *listSoftClip;
struct discordantMapping **mappings_discordant;
Expand Down
70 changes: 55 additions & 15 deletions variants.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
/* Find ref and alt sequences */
seq = readRefAltSeq( params, sv->chr_name, sv->inner_start, sv->inner_end);

if( params->seq_resolved != 0 && seq != NULL)
if( ( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE) && seq != NULL)
{
fprintf( fpOut, "%s\t%i\t%s%d\t%s\t%c\t255\t%s\t", sv->chr_name, ( sv->inner_start) + 1, "vh_del_", ++del_cnt, seq, seq[0], ( sv->filtered == false) ? "PASS" : "LowQual");
}
Expand Down Expand Up @@ -232,7 +232,7 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
{
seq = readRefAltSeq( params, sv->chr_name, sv->outer_start, sv->outer_end);

if( params->seq_resolved != 0 && seq != NULL)
if( ( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE) && seq != NULL)
{
/* Find ref and alt sequences */
seq_rev = reverseComplement( seq);
Expand All @@ -256,11 +256,11 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
else if( sv->svtype == MEIFORWARD)
{
seq = readRefAltSeqMEI( params, sv->chr_name, sv->mei_name);
if(seq != NULL)
sv_len = strlen(seq);

if( seq != NULL)
sv_len = strlen(seq);

if( params->seq_resolved != 0)
if( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE)
{
if( seq != NULL)
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1, "vh_mei_", ++mei_cnt, seq[0], seq, 255, ( sv->filtered == false) ? "PASS" : "mfilt");
Expand All @@ -287,7 +287,7 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
if(seq != NULL)
sv_len = strlen(seq);

if( params->seq_resolved != 0)
if( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE)
{
if( seq != NULL)
{
Expand Down Expand Up @@ -332,7 +332,7 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
{
seq = readRefAltSeq( params, sv->chr_name, sv->outer_start, sv->outer_end);

if( params->seq_resolved != 0 && seq != NULL)
if( ( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE) && seq != NULL)
{
fprintf( fpOut, "%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, seq[0], seq, 255, ( sv->filtered == false) ? "PASS" : "LowQual");
}
Expand All @@ -350,52 +350,92 @@ void print_strvar( bam_info** in_bams, parameters* params, struct strvar* sv, FI
}
else if( sv->svtype == INVDUPRIGHT)
{
if( params->seq_resolved != 0)
if( sv->inner_start > sv->outer_start)
seq = readRefAltSeq( params, sv->chr_name, sv->outer_start, sv->inner_start);
else
seq = readRefAltSeq( params, sv->chr_name, sv->inner_start, sv->outer_start);

if( ( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE) && seq != NULL)
{
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, ".", ".", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, seq[0], seq, 255, ( sv->filtered == false) ? "PASS" : "LowQual");
}
else if( seq != NULL)
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, seq[0], "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
else
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, ".", "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");

if( seq != NULL)
free( seq);

invdup_cnt++;
sv_len = abs( ( sv->inner_start - sv->outer_start + 1));
total_invdup_length += sv_len;
}
else if( sv->svtype == INVDUPLEFT )
{
if( params->seq_resolved != 0)
if( sv->outer_end > sv->inner_end)
seq = readRefAltSeq( params, sv->chr_name, sv->inner_end, sv->outer_end);
else
seq = readRefAltSeq( params, sv->chr_name, sv->outer_end, sv->inner_end);

if( ( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE) && seq != NULL)
{
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, ".", ".", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, seq[0], seq, 255, ( sv->filtered == false) ? "PASS" : "LowQual");
}
else if( seq != NULL)
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, seq[0], "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
else
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, ".", "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");

if( seq != NULL)
free( seq);

invdup_cnt++;
sv_len = abs( ( sv->inner_end - sv->outer_end + 1));
total_invdup_length += sv_len;
}
else if( sv->svtype == INTERDUPRIGHT)
{
if( params->seq_resolved != 0)
if( sv->inner_start > sv->outer_start)
seq = readRefAltSeq( params, sv->chr_name, sv->outer_start, sv->inner_start);
else
seq = readRefAltSeq( params, sv->chr_name, sv->inner_start, sv->outer_start);

if( ( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE) && seq != NULL)
{
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, ".", ".", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, seq[0], seq, 255, ( sv->filtered == false) ? "PASS" : "LowQual");
}
else if( seq!= NULL)
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, seq[0], "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
else
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->outer_start) + 1," vh_dup_", ++dup_cnt, ".", "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");

if( seq != NULL)
free( seq);

interdup_cnt++;
sv_len = abs( ( sv->inner_start - sv->outer_start + 1));
total_interdup_length += sv_len;
}
else if( sv->svtype == INTERDUPLEFT )
{
if( params->seq_resolved != 0)
if( sv->outer_end > sv->inner_end)
seq = readRefAltSeq( params, sv->chr_name, sv->inner_end, sv->outer_end);
else
seq = readRefAltSeq( params, sv->chr_name, sv->outer_end, sv->inner_end);

if( ( params->seq_resolved != 0 || sv->imprecise == NOT_IMPRECISE) && seq != NULL)
{
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, ".", ".", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, seq[0], seq, 255, ( sv->filtered == false) ? "PASS" : "LowQual");
}
else if( seq != NULL)
fprintf( fpOut,"%s\t%i\t%s%d\t%c\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, seq[0], "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");
else
fprintf( fpOut,"%s\t%i\t%s%d\t%s\t%s%s%s\t%d\t%s\t", sv->chr_name, ( sv->inner_end) + 1," vh_dup_", ++dup_cnt, ".", "<", "DUP:ISP", ">", 255, ( sv->filtered == false) ? "PASS" : "LowQual");

if( seq != NULL)
free( seq);

interdup_cnt++;
sv_len = abs( ( sv->inner_end - sv->outer_end + 1));
total_interdup_length += sv_len;
Expand Down
29 changes: 19 additions & 10 deletions vh/vh_divethandler.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,9 @@ DivetRow *vh_loadDivetRowFromBamAlternative( alternativeMapping *discordantReadP


//fprintf( divet_file, "%s\t%s\t%d\t%d\t%c\t=\t%d\t%d\t%c\t%c\t%d\t%d\t%d\n", discordantReadPtr->readName, discordantReadPtr->chromosome_name,
// discordantReadPtr->pos1, discordantReadPtr->pos1_End, orientLeft, discordantReadPtr->pos2,
//discordantReadPtr->pos2_End, orientRight, SV, discordantReadPtr->editDistance,
//discordantReadPtr->mQual1, discordantReadPtr->mQual2);
// discordantReadPtr->pos1, discordantReadPtr->pos1_End, orientLeft, discordantReadPtr->pos2,
//discordantReadPtr->pos2_End, orientRight, SV, discordantReadPtr->editDistance,
//discordantReadPtr->mQual1, discordantReadPtr->mQual2);

DivetRow *newRow = createDivetRowNew (libInfo->hash, discordantReadPtr->ten_x_barcode,
discordantReadPtr->readName, discordantReadPtr->chromosome_name, discordantReadPtr->pos1,
Expand Down Expand Up @@ -515,8 +515,8 @@ int read_Divet_bam( discordantMapping **mapping, parameters *params, LibraryInfo
while( discordantReadPtr != NULL)
{
is_satellite = sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name , discordantReadPtr->pos1, discordantReadPtr->pos1_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name, discordantReadPtr->pos2, discordantReadPtr->pos2_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name , discordantReadPtr->pos2, discordantReadPtr->pos2_End);
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name, discordantReadPtr->pos2, discordantReadPtr->pos2_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name , discordantReadPtr->pos2, discordantReadPtr->pos2_End);

if ( is_satellite == 0 && discordantReadPtr->mQual1 > params->mq_threshold && discordantReadPtr->mQual2 > params->mq_threshold
&& !sonic_is_gap( params->this_sonic, params->this_sonic->chromosome_names[chr_index], discordantReadPtr->pos1, discordantReadPtr->pos2)
Expand Down Expand Up @@ -626,9 +626,9 @@ int read_Divet_bam_alternative( alternativeMapping **mapping, parameters *params
/* Since MT is circular, we need to eliminate the read-pairs at both ends of the chromosome */
insLen = abs( discordantReadPtr->pos1 - discordantReadPtr->pos2);
is_satellite = sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name , discordantReadPtr->pos1, discordantReadPtr->pos1_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name, discordantReadPtr->pos2, discordantReadPtr->pos2_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name , discordantReadPtr->pos2, discordantReadPtr->pos2_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name, discordantReadPtr->pos2, discordantReadPtr->pos2_End);
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name, discordantReadPtr->pos2, discordantReadPtr->pos2_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name , discordantReadPtr->pos2, discordantReadPtr->pos2_End)
+ sonic_is_satellite( params->this_sonic, discordantReadPtr->chromosome_name, discordantReadPtr->pos2, discordantReadPtr->pos2_End);

if ( is_satellite == 0 && ( discordantReadPtr->mQual1 > params->mq_threshold) &&
( !sonic_is_gap( params->this_sonic, params->this_sonic->chromosome_names[chr_index], discordantReadPtr->pos1, discordantReadPtr->pos2)) &&
Expand Down Expand Up @@ -826,20 +826,23 @@ int load_Divet_bam( bam_info** in_bams, parameters *params, int chr_index)
g_maxListBrkPointIntr = MAXLISTBRKPOINTINTR;
g_libInfo = NULL;

/*
/*
divetfile_path = (char *) getMem(sizeof(char) * (2+strlen("divv.vh")+strlen(params->outprefix)));
sprintf(divetfile_path, "%s-%s", params->outprefix, "divv.vh");
divet_file = safe_fopen (divetfile_path, "w");
free(divetfile_path);
*/
*/

del_cnt_div = 0, ins_cnt_div = 0, inv_cnt_div = 0, tandup_cnt_div = 0, sr_cnt_div = 0, alt_cnt_div = 0, invdup_cnt_div = 0, interdup_cnt_div = 0;

for( bam_index = 0; bam_index < params->num_bams; bam_index++)
{
for( lib_cnt = 0; lib_cnt < in_bams[bam_index]->num_libraries; lib_cnt++)
{
if( in_bams[bam_index]->libraries[lib_cnt]->is_valid == false)
continue;

newLibInfo = ( struct LibraryInfo *) getMem( sizeof( struct LibraryInfo));
strcpy( newLibInfo->libName, in_bams[bam_index]->libraries[lib_cnt]->libname);
strcpy( newLibInfo->indName, in_bams[bam_index]->sample_name);
Expand Down Expand Up @@ -882,6 +885,9 @@ int load_Divet_bam( bam_info** in_bams, parameters *params, int chr_index)
{
for( lib_cnt = 0; lib_cnt < in_bams[bam_index]->num_libraries; lib_cnt++)
{
if( in_bams[bam_index]->libraries[lib_cnt]->is_valid == false)
continue;

newLibInfo = ( struct LibraryInfo *) getMem( sizeof( struct LibraryInfo));
strcpy( newLibInfo->libName, "Alternative\0");
strcpy( newLibInfo->indName, in_bams[bam_index]->sample_name);
Expand Down Expand Up @@ -926,6 +932,9 @@ int load_Divet_bam( bam_info** in_bams, parameters *params, int chr_index)
{
for( lib_cnt = 0; lib_cnt < in_bams[bam_index]->num_libraries; lib_cnt++)
{
if( in_bams[bam_index]->libraries[lib_cnt]->is_valid == false)
continue;

newLibInfo = ( struct LibraryInfo *) getMem( sizeof( struct LibraryInfo));
strcpy( newLibInfo->libName, "SplitRead\0");
strcpy( newLibInfo->indName, in_bams[bam_index]->sample_name);
Expand Down

0 comments on commit e64cf62

Please sign in to comment.