From 594bc5f1c4166115ea065c94c90ad75fa74553cf Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 27 Dec 2020 01:09:41 +0000 Subject: [PATCH 1/3] close #142 --- src/dEploidIO.cpp | 48 ++++++++++++++++++++++++++++++----------------- src/dEploidIO.hpp | 4 ++++ src/vcfReader.cpp | 20 ++++++++++++++------ src/vcfReader.hpp | 10 +++++++--- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/src/dEploidIO.cpp b/src/dEploidIO.cpp index 99afd8f3..22e98e45 100644 --- a/src/dEploidIO.cpp +++ b/src/dEploidIO.cpp @@ -218,7 +218,8 @@ void DEploidIO::finalize() { } if ( useVcf() ) { // read vcf files, and parse it to refCount and altCount - this->vcfReaderPtr_ = new VcfReader (vcfFileName_, vcfSampleName_); + this->vcfReaderPtr_ = new VcfReader (vcfFileName_, vcfSampleName_, + extractPlafFromVcf_); if ( this->excludeSites() ) { this->vcfReaderPtr_->findAndKeepMarkers (excludedMarkers); } @@ -241,25 +242,32 @@ void DEploidIO::finalize() { this->altCount_ = alt.info_; } - TxtReader plaf; - plaf.readFromFile(plafFileName_.c_str()); - if ( this->excludeSites() ) { - plaf.findAndKeepMarkers( excludedMarkers ); - } - this->plaf_ = plaf.info_; - this->chrom_ = plaf.chrom_; - this->position_ = plaf.position_; - this->indexOfChromStarts_ = plaf.indexOfChromStarts_; - this->nLoci_ = refCount_.size(); - if ( this->nLoci_ != this->plaf_.size() ) { - throw LociNumberUnequal( this->plafFileName_ ); - } - if ( this->nLoci_ != this->altCount_.size() ) { throw LociNumberUnequal( this->altFileName_ ); } + + if (extractPlafFromVcf()){ + this->plaf_ = this->vcfReaderPtr_->plaf; + this->chrom_ = this->vcfReaderPtr_->chrom_; + this->position_ = this->vcfReaderPtr_->position_; + this->indexOfChromStarts_ = this->vcfReaderPtr_->indexOfChromStarts_; + } else { + TxtReader plaf; + plaf.readFromFile(plafFileName_.c_str()); + if ( this->excludeSites() ) { + plaf.findAndKeepMarkers( excludedMarkers ); + } + this->plaf_ = plaf.info_; + this->chrom_ = plaf.chrom_; + this->position_ = plaf.position_; + this->indexOfChromStarts_ = plaf.indexOfChromStarts_; + if ( this->nLoci_ != this->plaf_.size() ) { + throw LociNumberUnequal( this->plafFileName_ ); + } + } + (void)removeFilesWithSameName(); this->readPanel(); @@ -364,9 +372,15 @@ void DEploidIO::parse () { this->readNextStringto ( this->vcfFileName_ ) ; } else if (*argv_i == "-sample") { if (useVcf() == false){ + throw(FlagsOrderIncorrect((*argv_i) , "-vcf") ); } this->setUseVcfSample(true); this->readNextStringto ( this->vcfSampleName_ ) ; + } else if (*argv_i == "-plafFromVcf") { + if (useVcf() == false){ + throw(FlagsOrderIncorrect((*argv_i) , "-vcf") ); + } + this->setExtractPlafFromVcf(true); } else if (*argv_i == "-vcfOut") { this->setDoExportVcf (true); } else if (*argv_i == "-plaf") { @@ -582,7 +596,7 @@ void DEploidIO::checkInput() { throw FileNameMissing ( "Ref count" );} if ( this->altFileName_.size() == 0 && this->useVcf() == false ) { throw FileNameMissing ( "Alt count" );} - if ( this->plafFileName_.size() == 0 ) { + if ( this->plafFileName_.size() == 0 && extractPlafFromVcf() == false ) { throw FileNameMissing ( "PLAF" );} if ( usePanel() && this->panelFileName_.size() == 0 && !this->doIbdPainting() && !this->doComputeLLK() ) { throw FileNameMissing ( "Reference panel" );} @@ -686,7 +700,7 @@ void DEploidIO::printHelp(std::ostream& out) { out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CNopanel -noPanel -k 2 -nSample 250 -rate 8 -burn 0.67 -ibd " <useVcfSample_ = setto; } bool useVcfSample() const {return this->useVcfSample_; } + bool extractPlafFromVcf_; + void setExtractPlafFromVcf(const bool setto) { this->extractPlafFromVcf_ = setto; } + bool extractPlafFromVcf() const {return this->extractPlafFromVcf_; } + bool doExportVcf_; void setDoExportVcf( const bool exportVcf ) { this->doExportVcf_ = exportVcf; } bool doExportVcf() const { return this->doExportVcf_; } diff --git a/src/vcfReader.cpp b/src/vcfReader.cpp index 60ee7036..b73949a9 100644 --- a/src/vcfReader.cpp +++ b/src/vcfReader.cpp @@ -35,10 +35,11 @@ using std::min; /*! Initialize vcf file, search for the end of the vcf header. * Extract the first block of data ( "buffer_length" lines ) into buff */ -VcfReader::VcfReader(string fileName, string sampleName) { +VcfReader::VcfReader(string fileName, string sampleName, bool extractPlaf) { /*! Initialize by read in the vcf header file */ this->init(fileName); this->sampleName_ = sampleName; + this->extractPlaf_ = extractPlaf; this->sampleColumnIndex_ = 0; this->readHeader(); this->readVariants(); @@ -85,6 +86,7 @@ void VcfReader::finalize() { this->refCount.push_back(static_cast(this->variants[i].ref)); this->altCount.push_back(static_cast(this->variants[i].alt)); this->vqslod.push_back(this->variants[i].vqslod); + this->plaf.push_back(this->variants[i].plaf); } if ( this->isCompressed() ) { @@ -190,7 +192,7 @@ void VcfReader::readVariants() { getline(inFile, this->tmpLine_); } while (inFile.good() && this->tmpLine_.size() > 0) { - VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_); + VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_, this->extractPlaf_); // check variantLine quality this->variants.push_back(newVariant); if (this->isCompressed()) { @@ -275,8 +277,8 @@ void VcfReader::findLegitSnpsGivenVQSLODHalf(double vqslodThreshold) { } -VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex) { - this->init(tmpLine, sampleColumnIndex); +VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex, bool extractPlaf) { + this->init(tmpLine, sampleColumnIndex, extractPlaf); while (fieldEnd_ < this->tmpLine_.size()) { fieldEnd_ = min(this->tmpLine_.find('\t', feildStart_), @@ -305,13 +307,14 @@ VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex) { } -void VariantLine::init(string tmpLine, size_t sampleColumnIndex) { +void VariantLine::init(string tmpLine, size_t sampleColumnIndex, bool extractPlaf) { this->tmpLine_ = tmpLine; this->feildStart_ = 0; this->fieldEnd_ = 0; this->fieldIndex_ = 0; this->adFieldIndex_ = -1; this->sampleColumnIndex_ = sampleColumnIndex; + this->extractPlaf_ = extractPlaf; } @@ -367,8 +370,13 @@ void VariantLine::extract_field_INFO() { vqslodNotFound = false; vqslod = stod(filterFiledStr.substr(eqIndex+1, filterFiledStr.size())); - break; } + + if ( ("AF" == filterFiledName) & (this->extractPlaf_) ) { + plaf = stod(filterFiledStr.substr(eqIndex+1, + filterFiledStr.size())); + } + feild_start = field_end+1; field_index++; } diff --git a/src/vcfReader.hpp b/src/vcfReader.hpp index 16dc541f..7c5f06b3 100644 --- a/src/vcfReader.hpp +++ b/src/vcfReader.hpp @@ -88,14 +88,14 @@ class VariantLine{ friend class VcfReader; friend class DEploidIO; public: - explicit VariantLine(string tmpLine, size_t sampleColumnIndex); + explicit VariantLine(string tmpLine, size_t sampleColumnIndex, bool extractPlaf = false); ~VariantLine() {} private: string tmpLine_; string tmpStr_; - void init(string tmpLine, size_t sampleColumnIndex); + void init(string tmpLine, size_t sampleColumnIndex, bool extractPlaf); void extract_field_CHROM(); void extract_field_POS(); @@ -126,7 +126,9 @@ class VariantLine{ int ref; int alt; double vqslod; + double plaf; size_t sampleColumnIndex_; + bool extractPlaf_; }; @@ -139,7 +141,7 @@ class VcfReader : public VariantIndex { friend class DEploidIO; public: // Constructors and Destructors - explicit VcfReader(string fileName, string sampleName); + explicit VcfReader(string fileName, string sampleName, bool extractPlaf = false); // parse in exclude sites ~VcfReader() {} @@ -148,6 +150,7 @@ class VcfReader : public VariantIndex { vector refCount; // calling from python, need to be public vector altCount; // calling from python, need to be public vector vqslod; // calling from python, need to be public + vector plaf; void finalize(); // calling from python, need to be public private: @@ -166,6 +169,7 @@ class VcfReader : public VariantIndex { size_t sampleColumnIndex_; string tmpLine_; string tmpStr_; + bool extractPlaf_; // Methods void init(string fileName); From 8bc02c4864f3dd49600dc7a583fedfee949aa392 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 27 Dec 2020 15:45:26 +0000 Subject: [PATCH 2/3] initialize extract plaf from vcF --- src/dEploidIO.cpp | 4 ++++ tests/unittest/test_dEploidIO.cpp | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/src/dEploidIO.cpp b/src/dEploidIO.cpp index 22e98e45..de7093ad 100644 --- a/src/dEploidIO.cpp +++ b/src/dEploidIO.cpp @@ -142,6 +142,7 @@ void DEploidIO::init() { this->setIBDSigma(20.0); this->setUseVcf(false); this->setUseVcfSample(false); + this->setExtractPlafFromVcf(false); this->vcfSampleName_ = ""; this->vcfReaderPtr_ = NULL; this->setDoExportVcf(false); @@ -384,6 +385,9 @@ void DEploidIO::parse () { } else if (*argv_i == "-vcfOut") { this->setDoExportVcf (true); } else if (*argv_i == "-plaf") { + if ( this->extractPlafFromVcf() ) { + throw ( FlagsConflict((*argv_i) , "-plafFromVcf") ); + } this->readNextStringto ( this->plafFileName_ ) ; } else if (*argv_i == "-panel") { if ( this->usePanel() == false ) { diff --git a/tests/unittest/test_dEploidIO.cpp b/tests/unittest/test_dEploidIO.cpp index 80fffe78..d2c3ed24 100644 --- a/tests/unittest/test_dEploidIO.cpp +++ b/tests/unittest/test_dEploidIO.cpp @@ -617,6 +617,15 @@ class TestIO : public CppUnit::TestCase { "-lasso", "-ibd" }; CPPUNIT_ASSERT_THROW(DEploidIO(11, argv15), FlagsConflict); + + // plaf conflict with plafFromVcf + char *argv16[] = { "./dEploid", + "-vcf", "data/testData/PG0390-C.test.vcf", + "-plafFromVcf", + "-plaf", "data/testData/labStrains.test.PLAF.txt", + "-panel", "data/testData/labStrains.test.panel.txt" + }; + CPPUNIT_ASSERT_THROW(DEploidIO(8, argv16), FlagsConflict); } From 7257393fb2f8549cdece2282b54b760eedc6bd38 Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Sun, 27 Dec 2020 22:41:58 +0000 Subject: [PATCH 3/3] check style --- src/vcfReader.cpp | 9 ++++++--- src/vcfReader.hpp | 6 ++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/vcfReader.cpp b/src/vcfReader.cpp index b73949a9..c2cd55f4 100644 --- a/src/vcfReader.cpp +++ b/src/vcfReader.cpp @@ -192,7 +192,8 @@ void VcfReader::readVariants() { getline(inFile, this->tmpLine_); } while (inFile.good() && this->tmpLine_.size() > 0) { - VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_, this->extractPlaf_); + VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_, + this->extractPlaf_); // check variantLine quality this->variants.push_back(newVariant); if (this->isCompressed()) { @@ -277,7 +278,8 @@ void VcfReader::findLegitSnpsGivenVQSLODHalf(double vqslodThreshold) { } -VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex, bool extractPlaf) { +VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex, + bool extractPlaf) { this->init(tmpLine, sampleColumnIndex, extractPlaf); while (fieldEnd_ < this->tmpLine_.size()) { @@ -307,7 +309,8 @@ VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex, bool extractP } -void VariantLine::init(string tmpLine, size_t sampleColumnIndex, bool extractPlaf) { +void VariantLine::init(string tmpLine, size_t sampleColumnIndex, + bool extractPlaf) { this->tmpLine_ = tmpLine; this->feildStart_ = 0; this->fieldEnd_ = 0; diff --git a/src/vcfReader.hpp b/src/vcfReader.hpp index 7c5f06b3..ad1208a7 100644 --- a/src/vcfReader.hpp +++ b/src/vcfReader.hpp @@ -88,7 +88,8 @@ class VariantLine{ friend class VcfReader; friend class DEploidIO; public: - explicit VariantLine(string tmpLine, size_t sampleColumnIndex, bool extractPlaf = false); + explicit VariantLine(string tmpLine, size_t sampleColumnIndex, + bool extractPlaf = false); ~VariantLine() {} private: @@ -141,7 +142,8 @@ class VcfReader : public VariantIndex { friend class DEploidIO; public: // Constructors and Destructors - explicit VcfReader(string fileName, string sampleName, bool extractPlaf = false); + explicit VcfReader(string fileName, string sampleName, + bool extractPlaf = false); // parse in exclude sites ~VcfReader() {}