Skip to content

Commit

Permalink
Merge pull request #332 from DEploid-dev/issue142
Browse files Browse the repository at this point in the history
close #142
  • Loading branch information
shajoezhu authored Dec 27, 2020
2 parents 0808a77 + 7257393 commit f37d8e7
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 26 deletions.
52 changes: 35 additions & 17 deletions src/dEploidIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ void DEploidIO::init() {
this->setIBDSigma(20.0);
this->setUseVcf(false);
this->setUseVcfSample(false);
this->setExtractPlafFromVcf(false);
this->vcfSampleName_ = "";
this->vcfReaderPtr_ = NULL;
this->setDoExportVcf(false);
Expand Down Expand Up @@ -218,7 +219,8 @@ void DEploidIO::finalize() {
}

if ( useVcf() ) { // read vcf files, and parse it to refCount and altCount
this->vcfReaderPtr_ = new VcfReader (vcfFileName_, vcfSampleName_);
this->vcfReaderPtr_ = new VcfReader (vcfFileName_, vcfSampleName_,
extractPlafFromVcf_);
if ( this->excludeSites() ) {
this->vcfReaderPtr_->findAndKeepMarkers (excludedMarkers);
}
Expand All @@ -241,25 +243,32 @@ void DEploidIO::finalize() {
this->altCount_ = alt.info_;
}

TxtReader plaf;
plaf.readFromFile(plafFileName_.c_str());
if ( this->excludeSites() ) {
plaf.findAndKeepMarkers( excludedMarkers );
}
this->plaf_ = plaf.info_;
this->chrom_ = plaf.chrom_;
this->position_ = plaf.position_;
this->indexOfChromStarts_ = plaf.indexOfChromStarts_;

this->nLoci_ = refCount_.size();

if ( this->nLoci_ != this->plaf_.size() ) {
throw LociNumberUnequal( this->plafFileName_ );
}

if ( this->nLoci_ != this->altCount_.size() ) {
throw LociNumberUnequal( this->altFileName_ );
}

if (extractPlafFromVcf()){
this->plaf_ = this->vcfReaderPtr_->plaf;
this->chrom_ = this->vcfReaderPtr_->chrom_;
this->position_ = this->vcfReaderPtr_->position_;
this->indexOfChromStarts_ = this->vcfReaderPtr_->indexOfChromStarts_;
} else {
TxtReader plaf;
plaf.readFromFile(plafFileName_.c_str());
if ( this->excludeSites() ) {
plaf.findAndKeepMarkers( excludedMarkers );
}
this->plaf_ = plaf.info_;
this->chrom_ = plaf.chrom_;
this->position_ = plaf.position_;
this->indexOfChromStarts_ = plaf.indexOfChromStarts_;
if ( this->nLoci_ != this->plaf_.size() ) {
throw LociNumberUnequal( this->plafFileName_ );
}
}

(void)removeFilesWithSameName();

this->readPanel();
Expand Down Expand Up @@ -364,12 +373,21 @@ void DEploidIO::parse () {
this->readNextStringto ( this->vcfFileName_ ) ;
} else if (*argv_i == "-sample") {
if (useVcf() == false){
throw(FlagsOrderIncorrect((*argv_i) , "-vcf") );
}
this->setUseVcfSample(true);
this->readNextStringto ( this->vcfSampleName_ ) ;
} else if (*argv_i == "-plafFromVcf") {
if (useVcf() == false){
throw(FlagsOrderIncorrect((*argv_i) , "-vcf") );
}
this->setExtractPlafFromVcf(true);
} else if (*argv_i == "-vcfOut") {
this->setDoExportVcf (true);
} else if (*argv_i == "-plaf") {
if ( this->extractPlafFromVcf() ) {
throw ( FlagsConflict((*argv_i) , "-plafFromVcf") );
}
this->readNextStringto ( this->plafFileName_ ) ;
} else if (*argv_i == "-panel") {
if ( this->usePanel() == false ) {
Expand Down Expand Up @@ -582,7 +600,7 @@ void DEploidIO::checkInput() {
throw FileNameMissing ( "Ref count" );}
if ( this->altFileName_.size() == 0 && this->useVcf() == false ) {
throw FileNameMissing ( "Alt count" );}
if ( this->plafFileName_.size() == 0 ) {
if ( this->plafFileName_.size() == 0 && extractPlafFromVcf() == false ) {
throw FileNameMissing ( "PLAF" );}
if ( usePanel() && this->panelFileName_.size() == 0 && !this->doIbdPainting() && !this->doComputeLLK() ) {
throw FileNameMissing ( "Reference panel" );}
Expand Down Expand Up @@ -686,7 +704,7 @@ void DEploidIO::printHelp(std::ostream& out) {
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CNopanel -noPanel -k 2 -nSample 250 -rate 8 -burn 0.67 -ibd " <<endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CNopanel -initialP 0.2 0.8 -ibdPainting" <<endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -exclude data/testData/labStrains.test.exclude.txt -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CLassoExclude -panel data/testData/labStrains.test.panel.txt -initialP 0.2 0.8 -writePanel -lasso" << endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf.gz -sample PG0390-C -exclude data/testData/labStrains.test.exclude.txt.gz -plaf data/testData/labStrains.test.PLAF.txt.gz -o PG0390-C-best -panel data/testData/labStrains.test.panel.txt.gz -best" << endl;
out << "./dEploid -vcf data/testData/PG0390-C.test.vcf.gz -sample PG0390-C -plafFromVcf -exclude data/testData/labStrains.test.exclude.txt.gz -o PG0390-C-best -panel data/testData/labStrains.test.panel.txt.gz -best" << endl;
}


Expand Down
4 changes: 4 additions & 0 deletions src/dEploidIO.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ class DEploidIO{
void setUseVcfSample(const bool setto) { this->useVcfSample_ = setto; }
bool useVcfSample() const {return this->useVcfSample_; }

bool extractPlafFromVcf_;
void setExtractPlafFromVcf(const bool setto) { this->extractPlafFromVcf_ = setto; }
bool extractPlafFromVcf() const {return this->extractPlafFromVcf_; }

bool doExportVcf_;
void setDoExportVcf( const bool exportVcf ) { this->doExportVcf_ = exportVcf; }
bool doExportVcf() const { return this->doExportVcf_; }
Expand Down
23 changes: 17 additions & 6 deletions src/vcfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ using std::min;
/*! Initialize vcf file, search for the end of the vcf header.
* Extract the first block of data ( "buffer_length" lines ) into buff
*/
VcfReader::VcfReader(string fileName, string sampleName) {
VcfReader::VcfReader(string fileName, string sampleName, bool extractPlaf) {
/*! Initialize by read in the vcf header file */
this->init(fileName);
this->sampleName_ = sampleName;
this->extractPlaf_ = extractPlaf;
this->sampleColumnIndex_ = 0;
this->readHeader();
this->readVariants();
Expand Down Expand Up @@ -85,6 +86,7 @@ void VcfReader::finalize() {
this->refCount.push_back(static_cast<double>(this->variants[i].ref));
this->altCount.push_back(static_cast<double>(this->variants[i].alt));
this->vqslod.push_back(this->variants[i].vqslod);
this->plaf.push_back(this->variants[i].plaf);
}

if ( this->isCompressed() ) {
Expand Down Expand Up @@ -190,7 +192,8 @@ void VcfReader::readVariants() {
getline(inFile, this->tmpLine_);
}
while (inFile.good() && this->tmpLine_.size() > 0) {
VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_);
VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_,
this->extractPlaf_);
// check variantLine quality
this->variants.push_back(newVariant);
if (this->isCompressed()) {
Expand Down Expand Up @@ -275,8 +278,9 @@ void VcfReader::findLegitSnpsGivenVQSLODHalf(double vqslodThreshold) {
}


VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex) {
this->init(tmpLine, sampleColumnIndex);
VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex,
bool extractPlaf) {
this->init(tmpLine, sampleColumnIndex, extractPlaf);

while (fieldEnd_ < this->tmpLine_.size()) {
fieldEnd_ = min(this->tmpLine_.find('\t', feildStart_),
Expand Down Expand Up @@ -305,13 +309,15 @@ VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex) {
}


void VariantLine::init(string tmpLine, size_t sampleColumnIndex) {
void VariantLine::init(string tmpLine, size_t sampleColumnIndex,
bool extractPlaf) {
this->tmpLine_ = tmpLine;
this->feildStart_ = 0;
this->fieldEnd_ = 0;
this->fieldIndex_ = 0;
this->adFieldIndex_ = -1;
this->sampleColumnIndex_ = sampleColumnIndex;
this->extractPlaf_ = extractPlaf;
}


Expand Down Expand Up @@ -367,8 +373,13 @@ void VariantLine::extract_field_INFO() {
vqslodNotFound = false;
vqslod = stod(filterFiledStr.substr(eqIndex+1,
filterFiledStr.size()));
break;
}

if ( ("AF" == filterFiledName) & (this->extractPlaf_) ) {
plaf = stod(filterFiledStr.substr(eqIndex+1,
filterFiledStr.size()));
}

feild_start = field_end+1;
field_index++;
}
Expand Down
12 changes: 9 additions & 3 deletions src/vcfReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,15 @@ class VariantLine{
friend class VcfReader;
friend class DEploidIO;
public:
explicit VariantLine(string tmpLine, size_t sampleColumnIndex);
explicit VariantLine(string tmpLine, size_t sampleColumnIndex,
bool extractPlaf = false);
~VariantLine() {}

private:
string tmpLine_;
string tmpStr_;

void init(string tmpLine, size_t sampleColumnIndex);
void init(string tmpLine, size_t sampleColumnIndex, bool extractPlaf);

void extract_field_CHROM();
void extract_field_POS();
Expand Down Expand Up @@ -126,7 +127,9 @@ class VariantLine{
int ref;
int alt;
double vqslod;
double plaf;
size_t sampleColumnIndex_;
bool extractPlaf_;
};


Expand All @@ -139,7 +142,8 @@ class VcfReader : public VariantIndex {
friend class DEploidIO;
public:
// Constructors and Destructors
explicit VcfReader(string fileName, string sampleName);
explicit VcfReader(string fileName, string sampleName,
bool extractPlaf = false);
// parse in exclude sites
~VcfReader() {}

Expand All @@ -148,6 +152,7 @@ class VcfReader : public VariantIndex {
vector <double> refCount; // calling from python, need to be public
vector <double> altCount; // calling from python, need to be public
vector <double> vqslod; // calling from python, need to be public
vector <double> plaf;
void finalize(); // calling from python, need to be public

private:
Expand All @@ -166,6 +171,7 @@ class VcfReader : public VariantIndex {
size_t sampleColumnIndex_;
string tmpLine_;
string tmpStr_;
bool extractPlaf_;

// Methods
void init(string fileName);
Expand Down
9 changes: 9 additions & 0 deletions tests/unittest/test_dEploidIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,15 @@ class TestIO : public CppUnit::TestCase {
"-lasso", "-ibd"
};
CPPUNIT_ASSERT_THROW(DEploidIO(11, argv15), FlagsConflict);

// plaf conflict with plafFromVcf
char *argv16[] = { "./dEploid",
"-vcf", "data/testData/PG0390-C.test.vcf",
"-plafFromVcf",
"-plaf", "data/testData/labStrains.test.PLAF.txt",
"-panel", "data/testData/labStrains.test.panel.txt"
};
CPPUNIT_ASSERT_THROW(DEploidIO(8, argv16), FlagsConflict);
}


Expand Down

0 comments on commit f37d8e7

Please sign in to comment.