Skip to content

Commit

Permalink
Fixes issue 23: fatal errors when run on merged vcf files.
Browse files Browse the repository at this point in the history
Changed dot values to 0 unless the value is for GT.
Added ALT to sample info to know how many AO values there should be.
Checked number of AO values against the number of ALT values in expandSampleInfo.
Converted single dot values to multiple zeros for AO values in expandSampleInfo.
Added test 12 specific to this fix.

Files checked in: src/vcfSampleCompare.pl t/run_tests.t
  • Loading branch information
hepcat72 committed Dec 4, 2019
1 parent a65a284 commit 23309a7
Show file tree
Hide file tree
Showing 5 changed files with 1,340 additions and 18 deletions.
64 changes: 46 additions & 18 deletions src/vcfSampleCompare.pl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
use strict;
use CommandLineInterface;

our $VERSION = '2.012';
our $VERSION = '2.013';
setScriptInfo(VERSION => $VERSION,
CREATED => '6/22/2017',
AUTHOR => 'Robert William Leach',
Expand Down Expand Up @@ -914,9 +914,15 @@
#Create easy access to the sample info by creating a hash like:
#sample_info->{$samplename}->{GT} = value
$sample_info->{$sample} =
{map {$_ => $d[$format_key_tosubindex->{$_}]}
{map {$_ => ($d[$format_key_tosubindex->{$_}] eq '.' &&
$_ ne 'GT' ? 0 : $d[$format_key_tosubindex->{$_}])}
keys(%$format_key_tosubindex)};
$orig_sample_order->{$sample} = $sample_index;

#Let's add ALT, because if data was merged from multiple runs,
#different samples will have comma-delimited AO values while others
#will have a single '.' even though there may be multiple ALT values
$sample_info->{$sample}->{ALT} = $cols[4];
}

#Quick error check. There must be either 0 or scalar(@$group_diff_mins)
Expand Down Expand Up @@ -1615,6 +1621,9 @@ sub createStaticMinSampleGroupPair
my $best_remainder2 = [];
my $best_gap = 0;

debug("Variant (AO/RO) keys: [",join(',',@variant_states),"].",
{LEVEL => 3});

foreach my $state (@variant_states)
{
my $tmp_group1_1 = [];#Case1: bottom of group 1 vs top of group 2
Expand Down Expand Up @@ -2661,40 +2670,59 @@ sub expandSampleInfo
{
my $sample_info = $_[0];
my $expanded_sample_info = {};
my $ao_keys = [];
my $ao_keys = {};

foreach my $sample (keys(%$sample_info))
{
my @real_alts = split(/,/,$sample_info->{$sample}->{ALT},-1);

foreach my $key (keys(%{$sample_info->{$sample}}))
{
my $val = $sample_info->{$sample}->{$key};
my $val = ($sample_info->{$sample}->{$key} eq '.' && $key ne 'GT' ?
0 : $sample_info->{$sample}->{$key});
my @alts = split(/,/,$val,-1);

if(scalar(@alts) != 1 && ($key eq 'RO' || $key eq 'DP'))
{error("Multiple values found in $key key [$val].",
{DETAIL => ('This script assumes that the FORMAT ' .
'keys "DP" and "RO" each have a single ' .
'(comma-delimited) value, but found more ' .
'than 1.')})}
elsif($key eq 'AO' && scalar(@real_alts) != 1 &&
scalar(@alts) == 1 && $val ne '.' && $val ne '0')
{
error("Single value found in $key key [$val] but multiple ALT ",
"values: [$sample_info->{$sample}->{ALT}]. Cannot ",
"associate it with an ALT state. Setting zeroes.",
{DETAIL => ('Multiple ALT values must map to the same ' .
'number of observation values for key ' .
"$key.")});
@alts = map {0} @real_alts;
}
elsif($key eq 'AO' && scalar(@real_alts) != 1 &&
scalar(@alts) == 1 && ($val eq '.' || $val eq '0'))
{@alts = map {0} @real_alts}

if(scalar(@alts) == 1)
{
$expanded_sample_info->{$sample}->{$key} = $val;
if($key eq 'AO' && scalar(@$ao_keys) == 0)
{push(@$ao_keys,'AO')}
if($key eq 'AO')
{$ao_keys->{AO}++}
}
else
{
if($key eq 'RO' || $key eq 'DP')
{
error("Multiple values found in $key key [$val].",
{DETAIL => 'This script assumes that the FORMAT ' .
'keys "DP" and "RO" each have a single (comma-' .
'delimited) value, but found more than 1.'});
}

if($key eq 'AO' && scalar(@$ao_keys) == 0)
{push(@$ao_keys,map {"AO$_"} (1..scalar(@alts)))}
if($key eq 'AO')
{foreach my $key (map {"AO$_"} (1..scalar(@alts)))
{$ao_keys->{$key}++}}

for(my $i = 0;$i <= $#alts;$i++)
{$expanded_sample_info->{$sample}->{$key .($i+1)} = $alts[$i]}
{$expanded_sample_info->{$sample}->{$key .($i+1)} =
($alts[$i] eq '.' && $key ne 'GT' ? 0 : $alts[$i])}
}
}
}

return($expanded_sample_info,$ao_keys);
return($expanded_sample_info,[sort(keys(%$ao_keys))]);
}

sub max
Expand Down
4 changes: 4 additions & 0 deletions t/expected/mixed_dots.vcf.vsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#CHROM POS ID REF ALT BEST_PAIR BEST_GT_SCORE BEST_OR_SCORE BEST_DP_SCORE PAIR_ID PAIR_GT_SCORE PAIR_OR_SCORE PAIR_DP_SCORE STATES_USED_GT STATES_USED_OR GROUP1_SAMPLES GROUP1_GTS GROUP1_ORS GROUP2_SAMPLES GROUP2_GTS GROUP2_ORS
1 11291515 . G C,A 1.1 1 0.7 0.3 1.1 1 0.7 0.3 0/1+1/1;0/2 1 147_93B,149_93D 1/1,0/1 2/2,4/10 153_93H 0/2 0/8
1 1493804 . A C,G 1.1 1 0.8 0.25 1.1 1 0.8 0.25 0/0;0/2 2 147_93B,149_93D ./.,0/0 .,0/17 153_93H 0/2 4/5
1 6692944 . T A,C 1.1 1 0.75 0.2 1.1 1 0.75 0.2 0/0;0/2 2 147_93B,149_93D ./.,0/0 .,0/14 153_93H 0/2 3/4
Loading

0 comments on commit 23309a7

Please sign in to comment.