From a26be24c4f8411f097f20a85f54ed1384666870d Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Mon, 8 May 2017 09:17:02 +0100 Subject: [PATCH] compute diagnostics --- src/dEploidIO.hpp | 13 +++++++++ src/export/dEploidIOExport.cpp | 14 +++++++--- src/mcmc.cpp | 48 ++++++++++++++++++++++++++++++++++ src/mcmc.hpp | 2 ++ 4 files changed, 74 insertions(+), 3 deletions(-) diff --git a/src/dEploidIO.hpp b/src/dEploidIO.hpp index 40744939..eeff2e3f 100644 --- a/src/dEploidIO.hpp +++ b/src/dEploidIO.hpp @@ -159,6 +159,19 @@ class DEploidIO{ std::vector argv_; std::vector::iterator argv_i; + // Diagnostics + double maxLLKs_; + void setmaxLLKs ( const double setTo ){ this->maxLLKs_ = setTo; } + double meanThetallks_; + void setmeanThetallks ( const double setTo ){ this->meanThetallks_ = setTo; } + double meanllks_; + void setmeanllks ( const double setTo ){ this->meanllks_ = setTo; } + double stdvllks_; + void setstdvllks ( const double setTo ){ this->stdvllks_ = setTo; } + double dicByTheta_; + void setdicByTheta ( const double setTo ){ this->dicByTheta_ = setTo; } + double dicByVar_; + void setdicByVar ( const double setTo ){ this->dicByVar_ = setTo; } // Output stream string dEploidGitVersion_; diff --git a/src/export/dEploidIOExport.cpp b/src/export/dEploidIOExport.cpp index 0404fab3..53835a71 100644 --- a/src/export/dEploidIOExport.cpp +++ b/src/export/dEploidIOExport.cpp @@ -62,10 +62,10 @@ void DEploidIO::wrapUp(){ // Get End time before writing the log this->getTime(false); - this->writeLog (&std::cout ); + this->writeLog (&std::cout); ofstreamExportTmp.open( strExportLog.c_str(), ios::out | ios::app | ios::binary ); - this->writeLog (&ofstreamExportTmp ); + this->writeLog (&ofstreamExportTmp); ofstreamExportTmp.close(); } @@ -129,8 +129,8 @@ void DEploidIO::writeLog ( ostream * writeTo ){ (*writeTo) << setw(19) << " Update Prop: " << (this->doUpdateProp() ? "YES":"NO") << "\n"; (*writeTo) << setw(19) << " Update Single: " << (this->doUpdateSingle()? "YES":"NO") << "\n"; (*writeTo) << setw(19) << " Update Pair: " << (this->doUpdatePair() ? "YES":"NO") << "\n"; + (*writeTo) << "\n"; } - (*writeTo) << "\n"; (*writeTo) << "Other parameters:"<< "\n"; if ( forbidCopyFromSame_ ){ (*writeTo) << " Update pair haplotypes move forbid copying from the same strain!!! \n"; } (*writeTo) << setw(20) << " Miss copy prob: " << this->missCopyProb_ << "\n"; @@ -145,6 +145,14 @@ void DEploidIO::writeLog ( ostream * writeTo ){ } } (*writeTo) << "\n"; + (*writeTo) << "MCMC diagnostic:"<< "\n"; + (*writeTo) << setw(19) << " Max_llks: " << maxLLKs_ << "\n"; + (*writeTo) << setw(19) << " Mean_theta_llks: " << meanThetallks_ << "\n"; + (*writeTo) << setw(19) << " Mean_llks: " << meanllks_ << "\n"; + (*writeTo) << setw(19) << " Stdv_llks: " << stdvllks_ << "\n"; + (*writeTo) << setw(19) << " DIC_by_Dtheta: " << dicByTheta_ << "\n"; + (*writeTo) << setw(19) << " DIC_by_varD: " << dicByVar_ << "\n"; + (*writeTo) << "\n"; (*writeTo) << "Run time:\n"; (*writeTo) << setw(14) << "Start at: " << startingTime_ ; (*writeTo) << setw(14) << "End at: " << endTime_ ; diff --git a/src/mcmc.cpp b/src/mcmc.cpp index 62f3a734..315155f3 100644 --- a/src/mcmc.cpp +++ b/src/mcmc.cpp @@ -174,6 +174,7 @@ void McmcMachinery::initializeExpectedWsaf(){ assert( this->currentExpectedWsaf_.size() == 0); this->currentExpectedWsaf_ = this->calcExpectedWsaf( this->currentProp_ ); assert( this->currentExpectedWsaf_.size() == this->nLoci_ ); + this->cumExpectedWsaf_ = this->currentExpectedWsaf_; } @@ -318,6 +319,7 @@ void McmcMachinery::runMcmcChain( bool showProgress, bool useIBD ){ this->mcmcSample_->siteOfOneSwitchOne[atSiteI] /= (double)this->maxIteration_; this->mcmcSample_->siteOfOneMissCopyOne[atSiteI] /= (double)this->maxIteration_; } + this->dEploidIO_->writeMcmcRelated(this->mcmcSample_, useIBD); if ( useIBD == true ){ @@ -328,12 +330,53 @@ void McmcMachinery::runMcmcChain( bool showProgress, bool useIBD ){ this->dEploidIO_->initialHap = this->mcmcSample_->hap; this->dEploidIO_->setInitialHapWasGiven(true); } + + this->computeDiagnostics(); + dout << "###########################################"<< endl; dout << "# MCMC RUN finished #"<< endl; dout << "###########################################"<< endl; } +void McmcMachinery::computeDiagnostics(){ + // average cumulate expectedWSAF + for ( size_t i = 0; i < this->cumExpectedWsaf_.size(); i++){ + this->cumExpectedWsaf_[i] /= this->dEploidIO_->nMcmcSample_; + } + vector tmpLLKs1 = calcLLKs (this->dEploidIO_->refCount_, this->dEploidIO_->altCount_, this->cumExpectedWsaf_, 0, this->cumExpectedWsaf_.size(), this->dEploidIO_->scalingFactor()); + this->dEploidIO_->setmeanThetallks( sumOfVec(tmpLLKs1) ); + + vector wsaf_vec; + for ( size_t i = 0; i < nLoci(); i++){ + double wsaf = this->dEploidIO_->altCount_[i] / (this->dEploidIO_->refCount_[i] + this->dEploidIO_->altCount_[i] + 0.00000000000001); + double adjustedWsaf = wsaf*(1-0.01) + (1-wsaf)*0.01; + wsaf_vec.push_back(adjustedWsaf); + //llkOfData.push_back( logBetaPdf(adjustedWsaf, this->llkSurf[i][0], this->llkSurf[i][1])); + } + vector tmpLLKs = calcLLKs (this->dEploidIO_->refCount_, this->dEploidIO_->altCount_, wsaf_vec, 0, wsaf_vec.size(), this->dEploidIO_->scalingFactor()); + this->dEploidIO_->setmaxLLKs( sumOfVec(tmpLLKs) ); + + double sum = std::accumulate(this->mcmcSample_->sumLLKs.begin(), this->mcmcSample_->sumLLKs.end(), 0.0); + double mean = sum / this->mcmcSample_->sumLLKs.size(); + double sq_sum = std::inner_product(this->mcmcSample_->sumLLKs.begin(), this->mcmcSample_->sumLLKs.end(), this->mcmcSample_->sumLLKs.begin(), 0.0); + double varLLKs = sq_sum / this->mcmcSample_->sumLLKs.size() - mean * mean; + double stdev = std::sqrt(varLLKs); + this->dEploidIO_->setmeanllks(mean); + this->dEploidIO_->setstdvllks(stdev); + + double dicByVar = (-2*mean) + 4*varLLKs/2; + this->dEploidIO_->setdicByVar(dicByVar); + //return ( mean(-2*tmpllk) + var(-2*tmpllk)/2 )# D_bar + 1/2 var (D_theta), where D_theta = -2*tmpllk, and D_bar = mean(D_theta) + + double dicWSAFBar = -2 * sumOfVec(tmpLLKs1); + double dicByTheta = (-2*mean) + (-2*mean) - dicWSAFBar; + this->dEploidIO_->setdicByTheta(dicByTheta); + //DIC.WSAF.bar = -2 * sum(thetallk) + //return ( mean(-2*tmpllk) + (mean(-2*tmpllk) - DIC.WSAF.bar) ) # D_bar + pD, where pD = D_bar - D_theta, and D_bar = mean(D_theta) + +} + vector McmcMachinery::averageProportion(vector < vector > &proportion ){ assert(proportion.size()>0); vector ret(this->kStrain()); @@ -645,6 +688,11 @@ void McmcMachinery::recordMcmcMachinery(){ this->mcmcSample_->proportion.push_back(this->currentProp_); this->mcmcSample_->sumLLKs.push_back(sumOfVec(this->currentLLks_)); this->mcmcSample_->moves.push_back(this->eventInt_); + + // Cumulate expectedWSAF for computing the mean expectedWSAF + for ( size_t i = 0; i < this->cumExpectedWsaf_.size(); i++){ + this->cumExpectedWsaf_[i] += this->currentExpectedWsaf_[i]; + } } diff --git a/src/mcmc.hpp b/src/mcmc.hpp index a161942f..146bbbb4 100644 --- a/src/mcmc.hpp +++ b/src/mcmc.hpp @@ -133,6 +133,7 @@ class McmcMachinery { vector currentLLks_; vector < vector > currentHap_; vector < double > currentExpectedWsaf_; + vector < double > cumExpectedWsaf_; /* Methods */ void calcMaxIteration( size_t nSample, size_t McmcMachineryRate, double burnIn ); @@ -163,6 +164,7 @@ class McmcMachinery { void writeLastFwdProb(bool useIBD); void updateReferencePanel(size_t inbreedingPanelSizeSetTo, size_t excludedStrain); void initializeUpdateReferencePanel(size_t inbreedingPanelSizeSetTo); + void computeDiagnostics(); /* IBD */ double theta_;