Skip to content

Commit

Permalink
bwd seems to work now
Browse files Browse the repository at this point in the history
  • Loading branch information
shajoezhu committed Nov 20, 2017
1 parent 53bdb25 commit 9995457
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 58 deletions.
19 changes: 10 additions & 9 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,16 @@ dEploid_prof_LDADD = $(common_LDADD)

unit_tests_SOURCES = $(common_src) \
tests/unittest/test_runner.cpp \
tests/unittest/test_updateSingleHap.cpp \
tests/unittest/test_utilities.cpp \
tests/unittest/test_ibd.cpp \
tests/unittest/test_panel.cpp \
tests/unittest/test_txtReader.cpp \
tests/unittest/test_dEploidIO.cpp \
tests/unittest/test_mcmc.cpp \
tests/unittest/test_updatePairHap.cpp \
tests/unittest/test_vcfReader.cpp
tests/unittest/test_ibd.cpp

# tests/unittest/test_updateSingleHap.cpp \
# tests/unittest/test_utilities.cpp \
# tests/unittest/test_panel.cpp \
# tests/unittest/test_txtReader.cpp \
# tests/unittest/test_dEploidIO.cpp \
# tests/unittest/test_mcmc.cpp \
# tests/unittest/test_updatePairHap.cpp \
# tests/unittest/test_vcfReader.cpp

unit_tests_CXXFLAGS = $(common_flags) -DNDEBUG -DUNITTEST -Wno-write-strings --coverage
unit_tests_LDADD = -lcppunit -ldl $(common_LDADD)
Expand Down
42 changes: 31 additions & 11 deletions src/ibd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,9 @@ bool twoVectorsAreSame(vector<int> vec1, vector<int> vec2){
}



IBDpath::IBDpath(){};


void IBDpath::init(DEploidIO &dEploidIO, RandomGenerator* rg){
this->ibdRg_ = rg;
this->setNLoci(dEploidIO.nLoci());
Expand Down Expand Up @@ -408,7 +408,8 @@ vector <size_t> IBDpath::findWhichIsSomething(vector <size_t> tmpOp, size_t some


void IBDpath::buildPathProbabilityForPainting(vector <double> proportion){
vector <double> effectiveKPrior = this->computeEffectiveKPrior(this->theta());
//vector <double> effectiveKPrior = this->computeEffectiveKPrior(this->theta());
vector <double> effectiveKPrior = vector <double> (this->hprior.nPattern(), 1.0/this->hprior.nPattern());
vector <double> statePrior = this->computeStatePrior(effectiveKPrior);
// First building the path likelihood
this->computeIbdPathFwdProb(proportion, statePrior);
Expand All @@ -424,6 +425,7 @@ void IBDpath::computeIbdPathBwdProb(vector <double> proportion, vector <double>

//tmpBw = (a.prior * 1/table(state.idx)) %*% tij
//bwd[,n.loci] = tmpBw/sum(tmpBw)
cout << " start building ibd bwd "<< endl;
vector <double> tmp = vector <double> (hprior.stateIdxFreq.size());
assert(effectiveKPrior.size() == hprior.stateIdxFreq.size());
for (size_t i = 0; i < tmp.size(); i++){
Expand All @@ -438,18 +440,23 @@ void IBDpath::computeIbdPathBwdProb(vector <double> proportion, vector <double>
}

this->bwd.push_back(tmpBw);

for ( size_t rev_siteI = 1; rev_siteI < this->nLoci(); rev_siteI++ ){
size_t siteI = this->nLoci()-rev_siteI;

vector<double> lk = computeLlkOfStatesAtSiteI(proportion, siteI);
//vector<double> lk = vector <double> (hprior.nState(), 1.0);
cout << "lk = " ; for (double l :lk){cout << l << " ";}cout<<endl;

vector <double> bSumState = vector <double> (hprior.nState());
cout <<"bsumstate = ";
vector <double> bSumState = vector <double> (hprior.nPattern());
for ( size_t i = 0; i < bSumState.size(); i++){
for ( size_t j = 0; j < hprior.nState(); j++ ){
bSumState[i] += ibdTransProbs[i][j]*this->bwd.back()[j];
}
cout<<" "<<bSumState[i];
}

cout<<endl;
vector <double> vNoRecomb(hprior.nState());
for (size_t i = 0; i < hprior.stateIdx.size(); i++ ){
vNoRecomb[i] = bSumState[hprior.stateIdx[i]];
Expand All @@ -458,16 +465,19 @@ void IBDpath::computeIbdPathBwdProb(vector <double> proportion, vector <double>
for (size_t i = 0; i < hprior.nState(); i++ ){
tmpBw[i] = 0;
for (size_t j = 0; j < lk.size(); j++){
tmpBw[i] += (lk[j] * bwd.back()[j]);
tmpBw[i] += (lk[j] * bwd.back()[j])*this->ibdRecombProbs.pRec_[siteI-1];
}
tmpBw[i] *= this->ibdRecombProbs.pRec_[siteI] * statePrior[i];
tmpBw[i] += lk[i] * (1-this->ibdRecombProbs.pRec_[siteI]) * vNoRecomb[i];
cout<< tmpBw[i] << ", ";
tmpBw[i] *= statePrior[i];
cout<< tmpBw[i] << ", ";
tmpBw[i] += lk[i] * (this->ibdRecombProbs.pNoRec_[siteI-1]) * vNoRecomb[i];
cout<< tmpBw[i] << "\n";
tmpBw[i] *= hprior.priorProb[i][siteI];
}
normalizeBySum(tmpBw);
this->bwd.push_back(tmpBw);
cout<<endl;
}

reverse(bwd.begin(),bwd.end());
//print("compute backward")
//for ( site in n.loci:2 ){
Expand Down Expand Up @@ -513,6 +523,7 @@ void IBDpath::computeIbdPathFwdProb(vector <double> proportion, vector <double>
}

lk = computeLlkOfStatesAtSiteI(proportion, siteI);
//cout << "lk = " ; for (double l :lk){cout << l << " ";}cout<<endl;
this->updateFmAtSiteI(vPrior, lk);
}
}
Expand All @@ -535,12 +546,18 @@ void IBDpath::updateFmAtSiteI(vector <double> & prior, vector <double> & llk){
void IBDpath::combineFwdBwd(){
for (size_t i = 0; i < nLoci(); i++ ){
normalizeBySum(fm[i]);
//normalizeBySum(bwd[i]);
vector <double> tmp;
cout << " site " << i << endl;
for (size_t j = 0; j < fm[i].size(); j++){
tmp.push_back(exp(log(fm[i][j])+log(bwd[i][j])));

//tmp.push_back(exp(log(fm[i][j])+log(bwd[i][j])));
//tmp.push_back(exp(log(fm[i][j])));
tmp.push_back(exp(log(fm[i][j])));
//cout << "fwd = "<<fm[i][j]<<" "<< ", bwd = "<<bwd[i][j]<< ", fwdbwd = "<< tmp.back()<<endl;
}
normalizeBySum(tmp);
fwdbwd.push_back(tmp);

}
//post = exp(log(fmNomalized) + log(bwdNormalized))
}
Expand Down Expand Up @@ -571,7 +588,7 @@ vector < vector <double> > IBDpath::reshapeFm(vector <size_t> stateIdx){
vector <double> tmpRow;
double cumProb = 0;
for (size_t fm_ij = 0; fm_ij < this->fwdbwd[siteIndex].size(); fm_ij++){
cumProb += this->fm[siteIndex][fm_ij];
cumProb += this->fwdbwd[siteIndex][fm_ij];
if (previousStateIdx != stateIdx[fm_ij]){
previousStateIdx++;
tmpRow.push_back(cumProb);
Expand Down Expand Up @@ -704,15 +721,18 @@ vector <double> IBDpath::computeLlkOfStatesAtSiteI(vector<double> proportion, si
qs += (double)hSetI[j] * proportion[j];
}
double qs2 = qs*(1-err) + (1-qs)*err ;
//cout << qs2 << endl;
llks.push_back(logBetaPdf(qs2, this->llkSurf[siteI][0], this->llkSurf[siteI][1]));
}

double maxllk = max_value(llks);
vector <double> ret;
for ( double llk : llks ){
double normalized = exp(llk-maxllk);
//cout << "llk-maxllk = "<<llk-maxllk<< " normalized " <<normalized<< endl;
if ( normalized == 0 ){
normalized = std::numeric_limits< double >::min();
//cout << normalized<<endl;
}
ret.push_back(normalized);
}
Expand Down
5 changes: 5 additions & 0 deletions src/panel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ class InitialHaplotypes: public Panel{

class IBDrecombProbs: public VariantIndex{
friend class IBDpath;

#ifdef UNITTEST
friend class TestIBDpath;
#endif

private:
vector < double > pRec_;
vector < double > pNoRec_; // = 1.0 - pRec;
Expand Down
127 changes: 89 additions & 38 deletions tests/unittest/test_ibd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,11 +233,15 @@ class TestIBDpath : public CppUnit::TestCase {
CPPUNIT_TEST( testIbdTransProbs );
CPPUNIT_TEST( testComputeUniqueEffectiveKCount );
CPPUNIT_TEST( testIBDconfigureHeader );
CPPUNIT_TEST( testStatePrior );
//CPPUNIT_TEST( checkFwd );
CPPUNIT_TEST( checkBwd );
CPPUNIT_TEST_SUITE_END();

private:
IBDpath* ibdPath_;
IBDpath* ibdPath2_;
IBDpath* ibdPath3_;
IBDpath* ibdPath5_;
DEploidIO* dEploidIO_;
MersenneTwister* rg_;
double epsilon2;
Expand All @@ -250,31 +254,40 @@ class TestIBDpath : public CppUnit::TestCase {
dEploidIO_->setKstrain(3);
dEploidIO_->plaf_ = vector<double> ({0.1, .4, .4, .3, .2, .5});
vector < vector <int> > testPosition;
testPosition.push_back(vector<int> ({200, 3000}));
testPosition.push_back(vector<int> ({300}));
testPosition.push_back(vector<int> ({300, 400, 500}));
testPosition.push_back(vector<int> ({1,2,3,4,5,6}));
//testPosition.push_back(vector<int> ({200, 3000}));
//testPosition.push_back(vector<int> ({300}));
//testPosition.push_back(vector<int> ({300, 400, 500}));
dEploidIO_->position_ = testPosition;
dEploidIO_->nLoci_ = 6;
dEploidIO_->chrom_ = vector <string> ({"chrom1", "chrom2"});
//dEploidIO_->chrom_ = vector <string> ({"chrom1", "chrom2", "chrom3"});
dEploidIO_->chrom_ = vector <string> ({"chrom1"});
dEploidIO_->useConstRecomb_ = true;
dEploidIO_->constRecombProb_ = 0.000001;
rg_ = new MersenneTwister(dEploidIO_->randomSeed());
epsilon2 = 0.001;
ibdPath_ = new IBDpath;
this->ibdPath_->init(*dEploidIO_, rg_);
ibdPath3_ = new IBDpath;
this->ibdPath3_->init(*dEploidIO_, rg_);
dEploidIO_->setKstrain(5);
ibdPath5_ = new IBDpath;
this->ibdPath5_->init(*dEploidIO_, rg_);
dEploidIO_->setKstrain(2);
ibdPath2_ = new IBDpath;
this->ibdPath2_->init(*dEploidIO_, rg_);
}


void tearDown() {
delete ibdPath_;
delete ibdPath2_;
delete ibdPath3_;
delete ibdPath5_;
delete rg_;
delete dEploidIO_;
}

void testMainConstructor(){

for (double p : ibdPath2_->ibdRecombProbs.pRec_){cout <<p<<endl;}
for (double p : ibdPath3_->ibdRecombProbs.pRec_){cout <<p<<endl;}
}

void testmakeLlkSurf(){
Expand All @@ -287,19 +300,19 @@ class TestIBDpath : public CppUnit::TestCase {
//[5,] 2.968471 1.029853
//[6,] 2.800135 2.800135

CPPUNIT_ASSERT_EQUAL( this->ibdPath_->llkSurf.size(), (size_t)6 );
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.149687, this->ibdPath_->llkSurf[0][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 62.602446, this->ibdPath_->llkSurf[0][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 62.602446, this->ibdPath_->llkSurf[1][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.149687, this->ibdPath_->llkSurf[1][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 50.250071, this->ibdPath_->llkSurf[2][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 1.507423, this->ibdPath_->llkSurf[2][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 24.735000, this->ibdPath_->llkSurf[3][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 24.735000, this->ibdPath_->llkSurf[3][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.968471, this->ibdPath_->llkSurf[4][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 1.029853, this->ibdPath_->llkSurf[4][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.800135, this->ibdPath_->llkSurf[5][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.800135, this->ibdPath_->llkSurf[5][1], epsilon2);
CPPUNIT_ASSERT_EQUAL( this->ibdPath3_->llkSurf.size(), (size_t)6 );
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.149687, this->ibdPath3_->llkSurf[0][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 62.602446, this->ibdPath3_->llkSurf[0][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 62.602446, this->ibdPath3_->llkSurf[1][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.149687, this->ibdPath3_->llkSurf[1][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 50.250071, this->ibdPath3_->llkSurf[2][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 1.507423, this->ibdPath3_->llkSurf[2][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 24.735000, this->ibdPath3_->llkSurf[3][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 24.735000, this->ibdPath3_->llkSurf[3][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.968471, this->ibdPath3_->llkSurf[4][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 1.029853, this->ibdPath3_->llkSurf[4][1], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.800135, this->ibdPath3_->llkSurf[5][0], epsilon2);
CPPUNIT_ASSERT_DOUBLES_EQUAL ( 2.800135, this->ibdPath3_->llkSurf[5][1], epsilon2);
}


Expand All @@ -319,45 +332,83 @@ class TestIBDpath : public CppUnit::TestCase {
//[4,] 0 0 1 1 1 1 0 0
//[5,] 0 0 0 0 0 0 1 1
//vector <double> tmpPlaf = vector <double> ({0.1, 0.2, 0.3});
//CPPUNIT_ASSERT_NO_THROW(this->ibdPath_->hprior.buildHprior(3, tmpPlaf));
CPPUNIT_ASSERT_EQUAL(this->ibdPath_->hprior.nPattern(), (size_t)5);
CPPUNIT_ASSERT_EQUAL(this->ibdPath_->hprior.nState(), (size_t)22);
//CPPUNIT_ASSERT_NO_THROW(this->ibdPath3_->hprior.buildHprior(3, tmpPlaf));
CPPUNIT_ASSERT_EQUAL(this->ibdPath3_->hprior.nPattern(), (size_t)5);
CPPUNIT_ASSERT_EQUAL(this->ibdPath3_->hprior.nState(), (size_t)22);

//CPPUNIT_ASSERT_NO_THROW(this->ibdPath_->makeIbdTransProbs());
CPPUNIT_ASSERT_EQUAL((size_t)5, this->ibdPath_->ibdTransProbs.size());
double tmpValue = sumOfMat(this->ibdPath_->ibdTransProbs);
//CPPUNIT_ASSERT_NO_THROW(this->ibdPath3_->makeIbdTransProbs());
CPPUNIT_ASSERT_EQUAL((size_t)5, this->ibdPath3_->ibdTransProbs.size());
double tmpValue = sumOfMat(this->ibdPath3_->ibdTransProbs);
CPPUNIT_ASSERT_EQUAL((double)22.0, tmpValue);
for ( size_t i = 0; i < 22; i++){
CPPUNIT_ASSERT_EQUAL(this->ibdPath_->ibdTransProbs[this->ibdPath_->hprior.stateIdx[i]][i], (double)1.0);
CPPUNIT_ASSERT_EQUAL(this->ibdPath3_->ibdTransProbs[this->ibdPath3_->hprior.stateIdx[i]][i], (double)1.0);
}
}


void testComputeUniqueEffectiveKCount(){
//vector <double> tmpPlaf = vector <double> ({0.1, 0.2, 0.3});
//CPPUNIT_ASSERT_NO_THROW(this->ibdPath_->hprior.buildHprior(5, tmpPlaf));
CPPUNIT_ASSERT_NO_THROW(this->ibdPath2_->computeUniqueEffectiveKCount());
CPPUNIT_ASSERT_EQUAL(this->ibdPath2_->uniqueEffectiveKCount.size(), (size_t)5);
//CPPUNIT_ASSERT_NO_THROW(this->ibdPath3_->hprior.buildHprior(5, tmpPlaf));
CPPUNIT_ASSERT_NO_THROW(this->ibdPath5_->computeUniqueEffectiveKCount());
CPPUNIT_ASSERT_EQUAL(this->ibdPath5_->uniqueEffectiveKCount.size(), (size_t)5);
//> table(make.ibd.mat.joe(5)$k.eff)
//1 2 3 4 5
//1 15 25 10 1
CPPUNIT_ASSERT_EQUAL(this->ibdPath2_->uniqueEffectiveKCount[0], (int)1);
CPPUNIT_ASSERT_EQUAL(this->ibdPath2_->uniqueEffectiveKCount[1], (int)15);
CPPUNIT_ASSERT_EQUAL(this->ibdPath2_->uniqueEffectiveKCount[2], (int)25);
CPPUNIT_ASSERT_EQUAL(this->ibdPath2_->uniqueEffectiveKCount[3], (int)10);
CPPUNIT_ASSERT_EQUAL(this->ibdPath2_->uniqueEffectiveKCount[4], (int)1);
CPPUNIT_ASSERT_EQUAL(this->ibdPath5_->uniqueEffectiveKCount[0], (int)1);
CPPUNIT_ASSERT_EQUAL(this->ibdPath5_->uniqueEffectiveKCount[1], (int)15);
CPPUNIT_ASSERT_EQUAL(this->ibdPath5_->uniqueEffectiveKCount[2], (int)25);
CPPUNIT_ASSERT_EQUAL(this->ibdPath5_->uniqueEffectiveKCount[3], (int)10);
CPPUNIT_ASSERT_EQUAL(this->ibdPath5_->uniqueEffectiveKCount[4], (int)1);
}


void testIBDconfigureHeader(){
vector <string> headerOf3Strain = this->ibdPath_->getIBDprobsHeader();
vector <string> headerOf3Strain = this->ibdPath3_->getIBDprobsHeader();
CPPUNIT_ASSERT_EQUAL((size_t)5, headerOf3Strain.size());
CPPUNIT_ASSERT("0-1-2" == headerOf3Strain[0]);
CPPUNIT_ASSERT("0-2-2" == headerOf3Strain[1]);
CPPUNIT_ASSERT("2-1-2" == headerOf3Strain[2]);
CPPUNIT_ASSERT("2-2-2" == headerOf3Strain[3]);
CPPUNIT_ASSERT("1-1-2" == headerOf3Strain[4]);
}


void testStatePrior(){
//vector <double> effectiveKPrior = this->ibdPath3_->computeEffectiveKPrior(this->ibdPath3_->theta());
vector <double> effectiveKPrior = vector <double> (this->ibdPath3_->hprior.nPattern(), 1.0/this->ibdPath3_->hprior.nPattern());
//for (double p : effectiveKPrior){cout<<p<<endl;}
vector <double> statePrior = this->ibdPath3_->computeStatePrior(effectiveKPrior);
//for (double p : statePrior){cout<<p<<endl;}
}

void checkFwd(){
vector <double> effectiveKPrior = vector <double> (this->ibdPath3_->hprior.nPattern(), 1.0/this->ibdPath3_->hprior.nPattern());
//for (double p : effectiveKPrior){cout<<p<<endl;}
vector <double> statePrior = this->ibdPath3_->computeStatePrior(effectiveKPrior);
this->ibdPath3_->computeIbdPathFwdProb(vector <double> ({.5, .25, .25}), statePrior);
vector <double > tmp =this->ibdPath3_->fm[5];
normalizeBySum(tmp);
for (double p:tmp) printf("%8.2f\n", p);
}

void checkBwd(){
vector <double> effectiveKPrior = vector <double> (this->ibdPath2_->hprior.nPattern(), 1.0/this->ibdPath2_->hprior.nPattern());
//for (double p : effectiveKPrior){cout<<p<<endl;}
vector <double> statePrior = this->ibdPath2_->computeStatePrior(effectiveKPrior);
this->ibdPath2_->computeIbdPathBwdProb(vector <double> ({.5, .5}), effectiveKPrior, statePrior);
for (size_t i = 0; i < 6; i++){
vector <double > tmp =this->ibdPath2_->bwd[i];
normalizeBySum(tmp);
}
for (size_t i = 0; i < 6; i++){
for (size_t j = 0; j < 5; j++){
printf("%8.3f ", this->ibdPath2_->bwd[j][i]);
}
cout << endl;
//for (double p:tmp) printf("%8.3f\n", p);
}
}

};

CPPUNIT_TEST_SUITE_REGISTRATION(TestIBDUtility);
Expand Down

0 comments on commit 9995457

Please sign in to comment.