diff --git a/.ci/checkedList b/.ci/checkedList index 4348eb2a..8c7a90ef 100644 --- a/.ci/checkedList +++ b/.ci/checkedList @@ -22,3 +22,4 @@ tests/unittest/test_runner.cpp tests/unittest/test_utilities.cpp src/export/dEploidIOExportPosteriorProb.cpp src/export/writeMcmcRelated.cpp +src/dEploidIO-workflow.cpp diff --git a/Makefile.am b/Makefile.am index 4649f2ad..d8c6c757 100644 --- a/Makefile.am +++ b/Makefile.am @@ -28,6 +28,7 @@ common_src = src/random/fastfunc.cpp \ src/panel.cpp \ src/utility.cpp \ src/dEploidIO.cpp \ + src/dEploidIO-workflow.cpp \ src/updateHap.cpp \ src/txtReader.cpp \ src/vcfReader.cpp \ diff --git a/src/dEploid.cpp b/src/dEploid.cpp index 548697d0..0a77e6fe 100644 --- a/src/dEploid.cpp +++ b/src/dEploid.cpp @@ -24,7 +24,6 @@ */ #include // std::cout -#include "mcmc.hpp" #include "dEploidIO.hpp" int main(int argc, char *argv[]) { @@ -51,256 +50,11 @@ int main(int argc, char *argv[]) { } else if (dEploidIO.doIbdViterbiPainting()) { dEploidIO.paintIBDviterbi(); } else if (dEploidIO.useLasso()) { // DEploid-Lasso - dEploidIO.dEploidLasso(); - MersenneTwister lassoRg(dEploidIO.randomSeed()); - DEploidIO tmpIO(dEploidIO); - vector < vector > hap; - for (size_t chromi = 0; - chromi < dEploidIO.indexOfChromStarts_.size(); - chromi++ ) { - tmpIO.position_.clear(); - tmpIO.position_.push_back(dEploidIO.position_.at(chromi)); - tmpIO.indexOfChromStarts_.clear(); - tmpIO.indexOfChromStarts_.push_back(0); - string job = string("DEploid-Lasso learning chromosome "); - job.append(dEploidIO.chrom_[chromi]).append(" haplotypes"); - McmcSample * lassoMcmcSample = new McmcSample(); - McmcMachinery lassoMcmcMachinery( - &dEploidIO.lassoPlafs.at(chromi), - &dEploidIO.lassoRefCount.at(chromi), - &dEploidIO.lassoAltCount.at(chromi), - dEploidIO.lassoPanels.at(chromi), - &tmpIO, - job, - "lasso", - lassoMcmcSample, - &lassoRg, - false); - lassoMcmcMachinery.runMcmcChain(true, // show progress - false); // use IBD - for (size_t snpi = 0; - snpi < lassoMcmcSample->hap.size(); snpi++) { - hap.push_back(vector ( - lassoMcmcSample->hap[snpi].begin(), - lassoMcmcSample->hap[snpi].end())); - } - delete lassoMcmcSample; - } - dEploidIO.writeHap(hap, "lasso"); + dEploidIO.workflow_lasso(); } else if (dEploidIO.useBestPractice()) { // best practice - MersenneTwister rg(dEploidIO.randomSeed()); - - // ############################################################# - // ################# DEploid-LASSO to learn K ################## - // ############################################################# - - vector < vector > chooseKhap; - - DEploidIO toLearnK(dEploidIO); - toLearnK.dEploidLassoTrimfirst(); - DEploidIO toLearnKtmp(dEploidIO); - for (size_t chromi = 0; - chromi < toLearnK.indexOfChromStarts_.size(); - chromi++ ) { - bool tryAgain = true; - while (tryAgain) { - toLearnKtmp.position_.clear(); - toLearnKtmp.position_.push_back( - toLearnK.position_.at(chromi)); - toLearnKtmp.indexOfChromStarts_.clear(); - toLearnKtmp.indexOfChromStarts_.push_back(0); - - McmcSample * lassoMcmcSample = new McmcSample(); - string job = string("DEploid-Lasso learning K "); - job.append(toLearnK.chrom_[chromi]).append(" leanrning K"); - McmcMachinery lassoMcmcMachinery( - &toLearnK.lassoPlafs.at(chromi), - &toLearnK.lassoRefCount.at(chromi), - &toLearnK.lassoAltCount.at(chromi), - toLearnK.lassoPanels.at(chromi), - &toLearnKtmp, - job, - job, - lassoMcmcSample, - &rg, - false); - lassoMcmcMachinery.runMcmcChain(true, // show progress - false, // use IBD - true, // notInR - false); // averageP - toLearnKtmp.initialProp = toLearnKtmp.finalProp; - toLearnKtmp.setInitialPropWasGiven(true); - if (toLearnKtmp.acceptRatio() != 0) { - dEploidIO.chooseK.appendProportions(toLearnKtmp.finalProp); - tryAgain = false; - } else { - toLearnKtmp.setInitialPropWasGiven(false); - tryAgain = true; - } - - if (!tryAgain) { - for (size_t snpi = 0; - snpi < lassoMcmcSample->hap.size(); snpi++) { - chooseKhap.push_back(vector ( - lassoMcmcSample->hap[snpi].begin(), - lassoMcmcSample->hap[snpi].end())); - } - } - delete lassoMcmcSample; - } - } - // Gathering haplotype information - toLearnK.writeHap(chooseKhap, "chooseK"); - - // Gathering proportion information - dEploidIO.initialProp.clear(); - vector initialP = dEploidIO.chooseK.chosenP(); - for (auto const& value : initialP) { - if (value > 0.01) { - dEploidIO.initialProp.push_back(value); - } - } - dEploidIO.setKstrain(dEploidIO.initialProp.size()); - dEploidIO.setInitialPropWasGiven(true); - - if (dEploidIO.inferBestPracticeP()) { - if (dEploidIO.initialProp.size() > 1) { - // ############################################################# - // ###################### DEploid-IBD ######################## - // ############################################################# - McmcSample * ibdMcmcSample = new McmcSample(); - DEploidIO tmpIO2(dEploidIO); - tmpIO2.ibdTrimming(); - McmcMachinery ibdMcmcMachinery(&tmpIO2.plaf_, - &tmpIO2.refCount_, - &tmpIO2.altCount_, - tmpIO2.panel, - &tmpIO2, - string("DEploid-IBD learning proportion"), - string("ibd"), - ibdMcmcSample, - &rg, - true); - ibdMcmcMachinery.runMcmcChain(true, // show progress - true); // use IBD - // if (dEploidIO.useIbdOnly()) { - // tmpIO.paintIBD(); - // dEploidIO.finalProp = tmpIO.initialProp; - // } - - tmpIO2.paintIBD(); - - vector initialP; - for (auto const& value : tmpIO2.finalProp) { - if (value > 0.01) { - initialP.push_back(value); - } - } - cout << endl; - dEploidIO.initialProp = initialP; - dEploidIO.finalProp = initialP; - dEploidIO.setKstrain(initialP.size()); - dEploidIO.setInitialPropWasGiven(true); - dEploidIO.setDoUpdateProp(false); - delete ibdMcmcSample; - } else { - dEploidIO.finalProp.clear(); - dEploidIO.finalProp.push_back(1.0); - dEploidIO.setKstrain(1); - } - } - - if (dEploidIO.inferBestPracticeHap()) { - // ################################################################# - // ###################### DEploid-LASSO ############################ - // ################################################################# - // *** Frist split the reference panel etc - dEploidIO.dEploidLasso(); - - DEploidIO dEploidLassoIO(dEploidIO); - dEploidLassoIO.initialProp = dEploidIO.initialProp; - dEploidLassoIO.setDoUpdateProp(false); - dEploidLassoIO.setInitialPropWasGiven(true); - dEploidLassoIO.setKstrain(dEploidIO.kStrain()); - vector < vector > hap; - for (size_t chromi = 0; - chromi < dEploidIO.indexOfChromStarts_.size(); - chromi++ ) { - dEploidLassoIO.position_.clear(); - dEploidLassoIO.position_.push_back( - dEploidIO.position_.at(chromi)); - dEploidLassoIO.indexOfChromStarts_.clear(); - dEploidLassoIO.indexOfChromStarts_.push_back(0); - - McmcSample * lassoMcmcSample = new McmcSample(); - string job = string("DEploid-Lasso learning chromosome "); - job.append(dEploidIO.chrom_[chromi]).append(" haplotypes"); - McmcMachinery lassoMcmcMachinery( - &dEploidIO.lassoPlafs.at(chromi), - &dEploidIO.lassoRefCount.at(chromi), - &dEploidIO.lassoAltCount.at(chromi), - dEploidIO.lassoPanels.at(chromi), - &dEploidLassoIO, - job, - job, - lassoMcmcSample, - &rg, - false); - lassoMcmcMachinery.runMcmcChain(true, // show progress - false); // use IBD - // *** Append haplotypes to the end - for (size_t snpi = 0; - snpi < lassoMcmcSample->hap.size(); snpi++) { - hap.push_back(vector ( - lassoMcmcSample->hap[snpi].begin(), - lassoMcmcSample->hap[snpi].end())); - } - delete lassoMcmcSample; - } - dEploidIO.writeHap(hap, "final"); - dEploidIO.writeVcf(hap, dEploidLassoIO.initialProp, "final"); - } - if (dEploidIO.inferBestPracticeP() & - (dEploidIO.initialProp.size() > 1)) { - dEploidIO.paintIBD(); - } - + dEploidIO.workflow_best(); } else { // classic version, and DEploid-IBD - if (dEploidIO.useIBD()) { // ibd - McmcSample * ibdMcmcSample = new McmcSample(); - MersenneTwister ibdRg(dEploidIO.randomSeed()); - McmcMachinery ibdMcmcMachinery(&dEploidIO.plaf_, - &dEploidIO.refCount_, - &dEploidIO.altCount_, - dEploidIO.panel, - &dEploidIO, - "DEploid-IBD", - "ibd", - ibdMcmcSample, - &ibdRg, - true); - ibdMcmcMachinery.runMcmcChain(true, // show progress - true); // use IBD - delete ibdMcmcSample; - } - - McmcSample * mcmcSample = new McmcSample(); - MersenneTwister rg(dEploidIO.randomSeed()); - McmcMachinery mcmcMachinery(&dEploidIO.plaf_, - &dEploidIO.refCount_, - &dEploidIO.altCount_, - dEploidIO.panel, - &dEploidIO, - "DEploid classic version", - "classic", // brief - mcmcSample, - &rg, - false); // use IBD - mcmcMachinery.runMcmcChain(true, // show progress - false); // use IBD - dEploidIO.paintIBD(); - dEploidIO.writeHap(mcmcSample->hap, "final"); - delete mcmcSample; + dEploidIO.workflow_ibd(); } // Finishing, write log dEploidIO.wrapUp(); diff --git a/src/dEploidIO-workflow.cpp b/src/dEploidIO-workflow.cpp new file mode 100644 index 00000000..8e7e4b95 --- /dev/null +++ b/src/dEploidIO-workflow.cpp @@ -0,0 +1,289 @@ +/* + * dEploid is used for deconvoluting Plasmodium falciparum genome from + * mix-infected patient sample. + * + * Copyright (C) 2016-2017 University of Oxford + * + * Author: Sha (Joe) Zhu + * + * This file is part of dEploid. + * + * dEploid is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + + +#include // std::cout +#include "mcmc.hpp" +#include "dEploidIO.hpp" + +void DEploidIO::workflow_lasso() { + this->dEploidLasso(); + MersenneTwister lassoRg(this->randomSeed()); + DEploidIO tmpIO(*this); + vector < vector > hap; + for (size_t chromi = 0; + chromi < this->indexOfChromStarts_.size(); + chromi++ ) { + tmpIO.position_.clear(); + tmpIO.position_.push_back(this->position_.at(chromi)); + tmpIO.indexOfChromStarts_.clear(); + tmpIO.indexOfChromStarts_.push_back(0); + string job = string("DEploid-Lasso learning chromosome "); + job.append(this->chrom_[chromi]).append(" haplotypes"); + McmcSample * lassoMcmcSample = new McmcSample(); + McmcMachinery lassoMcmcMachinery( + &this->lassoPlafs.at(chromi), + &this->lassoRefCount.at(chromi), + &this->lassoAltCount.at(chromi), + this->lassoPanels.at(chromi), + &tmpIO, + job, + "lasso", + lassoMcmcSample, + &lassoRg, + false); + lassoMcmcMachinery.runMcmcChain(true, // show progress + false); // use IBD + for (size_t snpi = 0; + snpi < lassoMcmcSample->hap.size(); snpi++) { + hap.push_back(vector ( + lassoMcmcSample->hap[snpi].begin(), + lassoMcmcSample->hap[snpi].end())); + } + delete lassoMcmcSample; + } + this->writeHap(hap, "lasso"); +} + + +void DEploidIO::workflow_ibd() { + if (this->useIBD()) { // ibd + McmcSample * ibdMcmcSample = new McmcSample(); + MersenneTwister ibdRg(this->randomSeed()); + McmcMachinery ibdMcmcMachinery(&this->plaf_, + &this->refCount_, + &this->altCount_, + this->panel, + this, + "DEploid-IBD", + "ibd", + ibdMcmcSample, + &ibdRg, + true); + ibdMcmcMachinery.runMcmcChain(true, // show progress + true); // use IBD + delete ibdMcmcSample; + } + + McmcSample * mcmcSample = new McmcSample(); + MersenneTwister rg(this->randomSeed()); + McmcMachinery mcmcMachinery(&this->plaf_, + &this->refCount_, + &this->altCount_, + this->panel, + this, + "DEploid classic version", + "classic", // brief + mcmcSample, + &rg, + false); // use IBD + mcmcMachinery.runMcmcChain(true, // show progress + false); // use IBD + this->paintIBD(); + this->writeHap(mcmcSample->hap, "final"); + delete mcmcSample; + + +} + + +void DEploidIO::workflow_best() { + MersenneTwister rg(this->randomSeed()); + + // ############################################################# + // ################# DEploid-LASSO to learn K ################## + // ############################################################# + + vector < vector > chooseKhap; + + DEploidIO toLearnK(*this); + toLearnK.dEploidLassoTrimfirst(); + DEploidIO toLearnKtmp(*this); + for (size_t chromi = 0; + chromi < toLearnK.indexOfChromStarts_.size(); + chromi++ ) { + bool tryAgain = true; + while (tryAgain) { + toLearnKtmp.position_.clear(); + toLearnKtmp.position_.push_back(toLearnK.position_.at(chromi)); + toLearnKtmp.indexOfChromStarts_.clear(); + toLearnKtmp.indexOfChromStarts_.push_back(0); + + McmcSample * lassoMcmcSample = new McmcSample(); + string job = string("DEploid-Lasso learning K "); + job.append(toLearnK.chrom_[chromi]).append(" leanrning K"); + McmcMachinery lassoMcmcMachinery( + &toLearnK.lassoPlafs.at(chromi), + &toLearnK.lassoRefCount.at(chromi), + &toLearnK.lassoAltCount.at(chromi), + toLearnK.lassoPanels.at(chromi), + &toLearnKtmp, + job, + job, + lassoMcmcSample, + &rg, + false); + lassoMcmcMachinery.runMcmcChain(true, // show progress + false, // use IBD + true, // notInR + false); // averageP + toLearnKtmp.initialProp = toLearnKtmp.finalProp; + toLearnKtmp.setInitialPropWasGiven(true); + if (toLearnKtmp.acceptRatio() != 0) { + this->chooseK.appendProportions(toLearnKtmp.finalProp); + tryAgain = false; + } else { + toLearnKtmp.setInitialPropWasGiven(false); + tryAgain = true; + } + + if (!tryAgain) { + for (size_t snpi = 0; + snpi < lassoMcmcSample->hap.size(); snpi++) { + chooseKhap.push_back(vector ( + lassoMcmcSample->hap[snpi].begin(), + lassoMcmcSample->hap[snpi].end())); + } + } + delete lassoMcmcSample; + } + } + // Gathering haplotype information + toLearnK.writeHap(chooseKhap, "chooseK"); + + // Gathering proportion information + this->initialProp.clear(); + vector initialP = this->chooseK.chosenP(); + for (auto const& value : initialP) { + if (value > 0.01) { + this->initialProp.push_back(value); + } + } + this->setKstrain(this->initialProp.size()); + this->setInitialPropWasGiven(true); + + if (this->inferBestPracticeP()) { + if (this->initialProp.size() > 1) { + // ############################################################# + // ###################### DEploid-IBD ######################## + // ############################################################# + McmcSample * ibdMcmcSample = new McmcSample(); + DEploidIO tmpIO2(*this); + tmpIO2.ibdTrimming(); + McmcMachinery ibdMcmcMachinery(&tmpIO2.plaf_, + &tmpIO2.refCount_, + &tmpIO2.altCount_, + tmpIO2.panel, + &tmpIO2, + string("DEploid-IBD learning proportion"), + string("ibd"), + ibdMcmcSample, + &rg, + true); + ibdMcmcMachinery.runMcmcChain(true, // show progress + true); // use IBD + // if (this->useIbdOnly()) { + // tmpIO.paintIBD(); + // this->finalProp = tmpIO.initialProp; + // } + + tmpIO2.paintIBD(); + + vector initialP; + for (auto const& value : tmpIO2.finalProp) { + if (value > 0.01) { + initialP.push_back(value); + } + } + cout << endl; + this->initialProp = initialP; + this->finalProp = initialP; + this->setKstrain(initialP.size()); + this->setInitialPropWasGiven(true); + this->setDoUpdateProp(false); + delete ibdMcmcSample; + } else { + this->finalProp.clear(); + this->finalProp.push_back(1.0); + this->setKstrain(1); + } + } + + if (this->inferBestPracticeHap()) { + // ################################################################# + // ###################### DEploid-LASSO ############################ + // ################################################################# + // *** Frist split the reference panel etc + this->dEploidLasso(); + + DEploidIO dEploidLassoIO(*this); + dEploidLassoIO.initialProp = this->initialProp; + dEploidLassoIO.setDoUpdateProp(false); + dEploidLassoIO.setInitialPropWasGiven(true); + dEploidLassoIO.setKstrain(this->kStrain()); + vector < vector > hap; + for (size_t chromi = 0; + chromi < this->indexOfChromStarts_.size(); + chromi++ ) { + dEploidLassoIO.position_.clear(); + dEploidLassoIO.position_.push_back( + this->position_.at(chromi)); + dEploidLassoIO.indexOfChromStarts_.clear(); + dEploidLassoIO.indexOfChromStarts_.push_back(0); + + McmcSample * lassoMcmcSample = new McmcSample(); + string job = string("DEploid-Lasso learning chromosome "); + job.append(this->chrom_[chromi]).append(" haplotypes"); + McmcMachinery lassoMcmcMachinery( + &this->lassoPlafs.at(chromi), + &this->lassoRefCount.at(chromi), + &this->lassoAltCount.at(chromi), + this->lassoPanels.at(chromi), + &dEploidLassoIO, + job, + job, + lassoMcmcSample, + &rg, + false); + lassoMcmcMachinery.runMcmcChain(true, // show progress + false); // use IBD + // *** Append haplotypes to the end + for (size_t snpi = 0; + snpi < lassoMcmcSample->hap.size(); snpi++) { + hap.push_back(vector ( + lassoMcmcSample->hap[snpi].begin(), + lassoMcmcSample->hap[snpi].end())); + } + delete lassoMcmcSample; + } + this->writeHap(hap, "final"); + this->writeVcf(hap, dEploidLassoIO.initialProp, "final"); + } + if (this->inferBestPracticeP() & + (this->initialProp.size() > 1)) { + this->paintIBD(); + } +} + diff --git a/src/dEploidIO.hpp b/src/dEploidIO.hpp index 13add331..425a7e67 100644 --- a/src/dEploidIO.hpp +++ b/src/dEploidIO.hpp @@ -139,6 +139,10 @@ class DEploidIO{ size_t lassoMaxNumPanel_; double acceptRatio() const { return this->acceptRatio_;} + void workflow_lasso(); + void workflow_ibd(); + void workflow_best(); + private: void core(); double llkFromInitialHap_; diff --git a/src/mcmc.cpp b/src/mcmc.cpp index 9de3491e..7a1c6886 100644 --- a/src/mcmc.cpp +++ b/src/mcmc.cpp @@ -40,7 +40,7 @@ McmcMachinery::McmcMachinery( vector * plaf, vector * refCount, vector * altCount, Panel *panel_ptr, - DEploidIO* dEploidIO, + DEploidIO *dEploidIO, string mcmcJob, string jobbrief, McmcSample *mcmcSample, diff --git a/src/mcmc.hpp b/src/mcmc.hpp index 3e97482f..c51668c5 100644 --- a/src/mcmc.hpp +++ b/src/mcmc.hpp @@ -85,7 +85,7 @@ class McmcMachinery { vector * refCount, vector * altCount, Panel *panel_ptr, - DEploidIO* dEplioidIO, + DEploidIO *dEplioidIO, string mcmcJob, string jobbrief, McmcSample *mcmcSample,