diff --git a/.gitmodules b/.gitmodules index 374337f..c668738 100755 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "utilities"] path = utilities url = https://github.com/DEploid-dev/DEploid-Utilities +[submodule "src/vcf"] + path = src/vcf + url = https://github.com/DEploid-dev/DEploid-vcf-lib.git diff --git a/Makefile.am b/Makefile.am index 4cea9ed..84d1fc7 100755 --- a/Makefile.am +++ b/Makefile.am @@ -16,7 +16,7 @@ TESTS = unit_tests io_unit_tests check_PROGRAMS = unit_tests dEploid_dbg io_unit_tests current_unit_tests # dEploid_prof PROG = DEPLOID -common_flags = -std=c++17 -Isrc/ -DDEPLOIDVERSION=\"${DEPLOIDVERSION}\" -DLASSOVERSION=\"${LASSOVERSION}\" -DCOMPILEDATE=\"${COMPILEDATE}\" -Wall -Wextra +common_flags = -std=c++17 -Isrc/ -Isrc/vcf/src/ -DDEPLOIDVERSION=\"${DEPLOIDVERSION}\" -DLASSOVERSION=\"${LASSOVERSION}\" -DCOMPILEDATE=\"${COMPILEDATE}\" -Wall -Wextra common_LDADD = -lz @@ -32,17 +32,17 @@ common_src = src/random/fastfunc.cpp \ src/dEploidIO-operation.cpp \ src/dEploidIO-workflow.cpp \ src/updateHap.cpp \ - src/txtReader.cpp \ - src/vcfReader.cpp \ - src/variantIndex.cpp \ - src/gzstream/gzstream.cpp \ + src/vcf/src/txtReader.cpp \ + src/vcf/src/vcfReader.cpp \ + src/vcf/src/variantIndex.cpp \ + src/vcf/src/gzstream/gzstream.cpp \ src/export/dEploidIOExport.cpp \ src/export/dEploidIOExportPosteriorProb.cpp \ src/export/writeMcmcRelated.cpp \ src/lasso/src/lasso.cpp \ src/lasso/src/dEploidLasso.cpp -debug_src = src/debug/mcmcDebug.cpp src/debug/vcfReaderDebug.cpp \ +debug_src = src/debug/mcmcDebug.cpp src/vcf/src/vcfReaderDebug.cpp \ src/lasso/src/lassoDBG.cpp dEploid_SOURCES = src/dEploid.cpp $(common_src) @@ -72,9 +72,7 @@ unit_tests_LDADD = -lcppunit -ldl $(common_LDADD) io_unit_tests_SOURCES = $(common_src) \ tests/unittest/test_runner.cpp \ - tests/unittest/test_dEploidIO.cpp \ - tests/unittest/test_txtReader.cpp \ - tests/unittest/test_vcfReader.cpp + tests/unittest/test_dEploidIO.cpp io_unit_tests_CXXFLAGS = $(common_flags) -DNDEBUG -DUNITTEST -Wno-write-strings --coverage io_unit_tests_LDADD = -lcppunit -ldl $(common_LDADD) diff --git a/src/debug/vcfReaderDebug.cpp b/src/debug/vcfReaderDebug.cpp deleted file mode 100755 index 006295c..0000000 --- a/src/debug/vcfReaderDebug.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#include // std::cout -#include "global.hpp" -#include "vcfReader.hpp" - -using std::endl; - -bool VcfReader::printSampleName() { - dout << "Sample name is " << this->sampleName_ << endl; - return true; -} diff --git a/src/txtReader.cpp b/src/txtReader.cpp deleted file mode 100755 index 2f24dbf..0000000 --- a/src/txtReader.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#include -#include -#include -#include // std::distance -#include "exceptions.hpp" -#include "txtReader.hpp" - -using std::min; - -void TxtReader::readFromFileBase(const char inchar[]) { - this->fileName_ = string(inchar); - this->checkFileCompressed(); - - if (this->isCompressed()) { - this->inFileGz.open(this->fileName_.c_str(), std::ios::in); - } else { - this->inFile.open(this->fileName_.c_str(), std::ios::in); - } - - if (this->isCompressed()) { - if (!inFileGz.good()) { - throw InvalidInputFile(this->fileName_); - } - } else { - if (!inFile.good()) { - throw InvalidInputFile(this->fileName_); - } - } - - tmpChromInex_ = -1; - string tmp_line; - // skip the first line, which is the header - if (this->isCompressed()) { - getline(inFileGz, tmp_line); - } else { - getline(inFile, tmp_line); - } - this->extractHeader(tmp_line); - - if (this->isCompressed()) { - getline(inFileGz, tmp_line); - } else { - getline(inFile, tmp_line); - } - - while (inFile.good() && tmp_line.size() > 0) { - size_t field_start = 0; - size_t field_end = 0; - size_t field_index = 0; - vector contentRow; - while (field_end < tmp_line.size()) { - field_end = min( - min( - min(tmp_line.find(' ', field_start), - tmp_line.find(',', field_start)), - tmp_line.find('\t', field_start)), - tmp_line.find('\n', field_start)); - - string tmp_str = tmp_line.substr(field_start, - field_end - field_start); - if (field_index > 1) { - contentRow.push_back(strtod(tmp_str.c_str(), NULL)); - } else if (field_index == 0) { - this->extractChrom(tmp_str); - } else if (field_index == 1) { - this->extractPOS(tmp_str); - } - - field_start = field_end+1; - field_index++; - } - this->content_.push_back(contentRow); - - if (this->isCompressed()) { - getline(inFileGz, tmp_line); - } else { - getline(inFile, tmp_line); - } - } - - if (this->isCompressed()) { - this->inFileGz.close(); - } else { - this->inFile.close(); - } - - this->position_.push_back(this->tmpPosition_); - - this->nLoci_ = this->content_.size(); - this->nInfoLines_ = this->content_.back().size(); - - if (this->nInfoLines_ == 1) { - this->reshapeContentToInfo(); - } - - this->getIndexOfChromStarts(); - assert(tmpChromInex_ > -1); - assert(chrom_.size() == position_.size()); - assert(this->doneGetIndexOfChromStarts_ == true); - this->checkSortedPositions(this->fileName_); -} - - -void TxtReader::checkFileCompressed() { - FILE *f = NULL; - f = fopen(this->fileName_.c_str(), "rb"); - if (f == NULL) { - throw InvalidInputFile(this->fileName_); - } - - unsigned char magic[2]; - - size_t freadResults = fread(reinterpret_cast(magic), 1, 2, f); - dout << "Check if text file is compressed " << freadResults << std::endl; - this->setIsCompressed((static_cast(magic[0]) == 0x1f) && - (static_cast(magic[1]) == 0x8b)); - fclose(f); -} - - -void TxtReader::extractHeader(const string &line) { - this->header_.clear(); - size_t field_start = 0; - size_t field_end = 0; - size_t field_index = 0; - while (field_end < line.size()) { - field_end = min( - min( - min(line.find(' ', field_start), - line.find(',', field_start)), - line.find('\t', field_start)), - line.find('\n', field_start)); - - string tmp_str = line.substr(field_start, - field_end - field_start); - if (field_index > 1) { - this->header_.push_back(tmp_str); - } - - field_start = field_end+1; - field_index++; - } -} - - -void TxtReader::extractChrom(const string & tmp_str) { - if (tmpChromInex_ >= 0) { - if (tmp_str != this->chrom_.back()) { - tmpChromInex_++; - // save current positions - this->position_.push_back(this->tmpPosition_); - - // start new chrom - this->tmpPosition_.clear(); - this->chrom_.push_back(tmp_str); - } - } else { - tmpChromInex_++; - assert(this->chrom_.size() == 0); - this->chrom_.push_back(tmp_str); - assert(this->tmpPosition_.size() == 0); - assert(this->position_.size() == 0); - } -} - - -void TxtReader::extractPOS(const string & tmp_str) { - if (tmp_str.find("e") != std::string::npos) { - throw BadScientificNotation(tmp_str, this->fileName_); - } - - if (tmp_str.find("E") != std::string::npos) { - throw BadScientificNotation(tmp_str, this->fileName_); - } - - int ret; - try { - ret = stoi(tmp_str.c_str(), NULL); - } catch ( const std::exception &e) { - throw BadConversion(tmp_str, this->fileName_); - } - this->tmpPosition_.push_back(ret); -} - - -void TxtReader::reshapeContentToInfo() { - assert(this->info_.size() == 0); - for (size_t i = 0; i < this->content_.size(); i++) { - this->info_.push_back(this->content_[i][0]); - } -} - - -void TxtReader::removeMarkers() { - assert(this->keptContent_.size() == (size_t)0); - for (auto const &value : this->indexOfContentToBeKept) { - this->keptContent_.push_back(this->content_[value]); - } - - this->content_.clear(); - assert(this->content_.size() == (size_t)0); - this->content_ = this->keptContent_; - this->keptContent_.clear(); - - if (this->nInfoLines_ == 1) { - this->info_.clear(); - this->reshapeContentToInfo(); - } - this->nLoci_ = this->content_.size(); -} diff --git a/src/txtReader.hpp b/src/txtReader.hpp deleted file mode 100755 index 372ff5a..0000000 --- a/src/txtReader.hpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - - -#ifndef TXTREADER -#define TXTREADER - -#include -#include -#include "variantIndex.hpp" -#include "exceptions.hpp" -#include "gzstream/gzstream.h" - -class TxtReader : public VariantIndex { - #ifdef UNITTEST - friend class TestPanel; - friend class TestTxtReader; - friend class TestInitialHaplotypes; - #endif - friend class McmcMachinery; - friend class UpdateSingleHap; - friend class UpdatePairHap; - friend class UpdateHap; - friend class Panel; - friend class DEploidIO; - private: - // Members - string fileName_; - ifstream inFile; - igzstream inFileGz; - bool isCompressed_; - bool isCompressed() const { return this->isCompressed_; } - void setIsCompressed(const bool compressed) { - this->isCompressed_ = compressed; } - void checkFileCompressed(); - // content is a matrix of n.loci by n.strains, i.e. content length is n.loci - vector < vector < double > > keptContent_; - // info_ only refers to the first column of the content - vector info_; - - vector header_; - size_t nInfoLines_; - - int tmpChromInex_; - vector < int > tmpPosition_; - - // Methods - void extractChrom(const string & tmp_str); - void extractPOS(const string & tmp_str); - void extractHeader(const string &line); - void reshapeContentToInfo(); - - public: // move the following to private - vector < vector < double > > content_; - TxtReader() {} - virtual void readFromFile(const char inchar[]) { - this->readFromFileBase(inchar); } - void readFromFileBase(const char inchar[]); - virtual ~TxtReader() {} - void removeMarkers(); -}; - - - - -class ExcludeMarker : public TxtReader { - public: - ExcludeMarker():TxtReader() {} - ~ExcludeMarker() {} -}; - - -#endif diff --git a/src/variantIndex.cpp b/src/variantIndex.cpp deleted file mode 100755 index 613c9dc..0000000 --- a/src/variantIndex.cpp +++ /dev/null @@ -1,234 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#include // find -#include -#include "exceptions.hpp" -#include "txtReader.hpp" -#include "variantIndex.hpp" - -using std::min; -using std::endl; - - -VariantIndex::VariantIndex() { - this->init(); -} - -void VariantIndex::findWhoToBeKept(ExcludeMarker* excludedMarkers) { - dout << " Starts findWhoToBeKept " << endl; - assert(this->indexOfContentToBeKept.size() == 0); - assert(this->indexOfPosToBeKept.size() == 0); - - for (size_t chromI = 0; chromI < this->chrom_.size(); chromI++) { - dout << " Going through chrom "<< chrom_[chromI]; - vector < size_t > tmpindexOfPosToBeKept; - - // detemine if something needs to be removed from the current chrom. - vector::iterator chromIt = find(excludedMarkers->chrom_.begin(), - excludedMarkers->chrom_.end(), - this->chrom_[chromI]); - - size_t hapIndex = indexOfChromStarts_[chromI]; - size_t chromIndexInExclude = std::distance( - excludedMarkers->chrom_.begin(), - chromIt); - for (size_t posI = 0; posI < this->position_[chromI].size(); posI++) { - if (chromIt == excludedMarkers->chrom_.end()) { - indexOfContentToBeKept.push_back(hapIndex); - tmpindexOfPosToBeKept.push_back(posI); - } else if ( - std::find(excludedMarkers->position_[chromIndexInExclude].begin(), - excludedMarkers->position_[chromIndexInExclude].end(), - this->position_[chromI][posI]) == - excludedMarkers->position_[chromIndexInExclude].end()) { - indexOfContentToBeKept.push_back(hapIndex); - tmpindexOfPosToBeKept.push_back(posI); - } - hapIndex++; - } - indexOfPosToBeKept.push_back(tmpindexOfPosToBeKept); - - dout << " keeping " << tmpindexOfPosToBeKept.size() << endl; - } - assert(indexOfPosToBeKept.size() == this->chrom_.size()); - - dout << indexOfContentToBeKept.size() << " sites need to be Kept " << endl; -} - - -void VariantIndex::findAndKeepMarkers(ExcludeMarker* excludedMarkers) { - this->setDoneGetIndexOfChromStarts(false); - dout << " findAndKeepMarkers called" <findWhoToBeKept(excludedMarkers); - this->removePositions(); - this->getIndexOfChromStarts(); - this->removeMarkers(); -} - - -void VariantIndex::findWhoToBeKeptGivenIndex( - const vector & givenIndex) { - dout << " Starts findWhoToBeKeptGivenIndex " << endl; - assert(this->indexOfContentToBeKept.size() == 0); - indexOfContentToBeKept = vector (givenIndex.begin(), - givenIndex.end()); - // assert(this->indexOfPosToBeKept.size() == 0); - vector oldChrom = vector (chrom_.begin(), chrom_.end()); - this->chrom_.clear(); - - vector < vector < int > > oldposition = this->position_; - this->position_.clear(); - - for (size_t chromI = 0; chromI < oldChrom.size(); chromI++) { - dout << " Going through chrom "<< oldChrom[chromI] << endl; - size_t hapIndex = indexOfChromStarts_[chromI]; - vector newTrimmedPos; - for (size_t posI = 0; posI < oldposition[chromI].size(); posI++) { - if (std::find(givenIndex.begin(), givenIndex.end(), hapIndex) - != givenIndex.end()) { - if (newTrimmedPos.size() == 0) { - this->chrom_.push_back(oldChrom[chromI]); - } - newTrimmedPos.push_back(oldposition[chromI][posI]); - } - - hapIndex++; - } - this->position_.push_back(newTrimmedPos); - } - - // assert(indexOfPosToBeKept.size() == this->chrom_.size()); - - dout << indexOfContentToBeKept.size() << " sites need to be Kept " << endl; -} - - -void VariantIndex::findWhoToBeKeptGivenIndexHalf( - const vector & givenIndex) { - dout << " Starts findWhoToBeKeptGivenIndexHalf " << endl; - assert(this->indexOfContentToBeKept.size() == 0); - indexOfContentToBeKept = vector (givenIndex.begin(), - givenIndex.end()); - // assert(this->indexOfPosToBeKept.size() == 0); - vector oldChrom = vector (chrom_.begin(), chrom_.end()); - this->chrom_.clear(); - - vector < vector < int > > oldposition = this->position_; - this->position_.clear(); - - for (size_t chromI = 0; chromI < oldChrom.size(); chromI++) { - // if (chromI%2 == 0) { - if (chromI > 10) { - dout << " Going through chrom "<< oldChrom[chromI] << " "; - size_t hapIndex = indexOfChromStarts_[chromI]; - vector newTrimmedPos; - for (size_t posI = 0; posI < oldposition[chromI].size(); posI++) { - if (std::find(givenIndex.begin(), givenIndex.end(), hapIndex) - != givenIndex.end()) { - if (newTrimmedPos.size() == 0) { - this->chrom_.push_back(oldChrom[chromI]); - } - newTrimmedPos.push_back(oldposition[chromI][posI]); - } - hapIndex++; - } - // std::cout << newTrimmedPos.size () << endl; - this->position_.push_back(newTrimmedPos); - } - } - - // assert(indexOfPosToBeKept.size() == this->chrom_.size()); - - dout << indexOfContentToBeKept.size() << " sites need to be Kept, with " - << this->chrom_.size() << endl; -} - - -void VariantIndex::removePositions() { - assert(this->keptPosition_.size() == (size_t)0); - for (size_t chromI = 0; chromI < this->chrom_.size(); chromI++) { - vector tmpKeptPosition_; - for (size_t i = 0; i < this->indexOfPosToBeKept[chromI].size(); i++) { - tmpKeptPosition_.push_back( - this->position_[chromI][this->indexOfPosToBeKept[chromI][i]]); - } - this->keptPosition_.push_back(tmpKeptPosition_); - } - this->position_.clear(); - this->position_ = this->keptPosition_; - this->keptPosition_.clear(); -} - - -void VariantIndex::getIndexOfChromStarts() { - assert(this->doneGetIndexOfChromStarts_ == false); - this->indexOfChromStarts_.clear(); - assert(indexOfChromStarts_.size() == 0); - this->indexOfChromStarts_.push_back((size_t)0); - for (size_t tmpChrom = 0; - indexOfChromStarts_.size() < this->chrom_.size(); tmpChrom++ ) { - indexOfChromStarts_.push_back( - indexOfChromStarts_.back()+this->position_[tmpChrom].size()); - } - assert(indexOfChromStarts_.size() == this->chrom_.size()); - this->setDoneGetIndexOfChromStarts(true); -} - - -void VariantIndex::getIndexOfChromStartsHalf() { - assert(this->doneGetIndexOfChromStarts_ == false); - this->indexOfChromStarts_.clear(); - assert(indexOfChromStarts_.size() == 0); - this->indexOfChromStarts_.push_back((size_t)0); - for (size_t tmpChrom = 0; - indexOfChromStarts_.size() < this->chrom_.size(); tmpChrom++ ) { - indexOfChromStarts_.push_back( - indexOfChromStarts_.back()+this->position_[tmpChrom].size()); - } - assert(indexOfChromStarts_.size() == this->chrom_.size()); - this->setDoneGetIndexOfChromStarts(true); -} - - -void VariantIndex::init() { - this->setDoneGetIndexOfChromStarts(false); -} - - -void VariantIndex::removeMarkers() { throw VirtualFunctionShouldNotBeCalled();} - - -void VariantIndex::checkSortedPositions(string fileName) { - for (size_t chromI = 0; chromI < this->chrom_.size(); chromI++) { - int previousPosition_ = 0; - for (auto const &value : this->position_[chromI]) { - if (value < previousPosition_) { - throw PositionUnsorted(fileName); - } - previousPosition_ = value; - } - } -} diff --git a/src/variantIndex.hpp b/src/variantIndex.hpp deleted file mode 100755 index ebfc9cd..0000000 --- a/src/variantIndex.hpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#ifndef DEPLOID_SRC_VARIANTINDEX_HPP_ -#define DEPLOID_SRC_VARIANTINDEX_HPP_ - - -#include -#include -#include -#include "global.hpp" - - -using std::vector; -using std::string; - -class ExcludeMarker; - -class VariantIndex { - #ifdef UNITTEST - friend class TestPanel; - friend class TestTxtReader; - friend class TestInitialHaplotypes; - #endif - friend class DEploidIO; - friend class TxtReader; - friend class ExcludeMarker; - friend class Panel; - friend class IBDrecombProbs; - friend class VcfReader; - friend class Rvcf; - - private: - // Members - size_t nLoci_; - bool doneGetIndexOfChromStarts_; - vector chrom_; - vector < size_t > indexOfChromStarts_; - vector < vector < int> > position_; - vector < vector < int> > keptPosition_; - /* Index of content/info will be kept */ - vector < size_t > indexOfContentToBeKept; - /* Index of positions entry to be kept, - * this will have the same size as this->chrom_, */ - vector < vector < size_t > > indexOfPosToBeKept; - - // Getter and Setter - bool doneGetIndexOfChromStarts() const { - return doneGetIndexOfChromStarts_; } - void setDoneGetIndexOfChromStarts(const bool setTo) { - this->doneGetIndexOfChromStarts_ = setTo; } - - // Methods - void init(); - void getIndexOfChromStarts(); - void getIndexOfChromStartsHalf(); - void removePositions(); - void checkSortedPositions(string fileName); - void findAndKeepMarkers(ExcludeMarker* excludedMarkers); - virtual void removeMarkers(); - // For removing markers and positions - void findWhoToBeKept(ExcludeMarker* excludedMarkers); - void findWhoToBeKeptGivenIndex(const vector & givenIndex); - void findWhoToBeKeptGivenIndexHalf(const vector & givenIndex); - - public: - VariantIndex(); - virtual ~VariantIndex() {} -}; - - -#endif // DEPLOID_SRC_VARIANTINDEX_HPP_ diff --git a/src/vcf b/src/vcf new file mode 160000 index 0000000..46093c7 --- /dev/null +++ b/src/vcf @@ -0,0 +1 @@ +Subproject commit 46093c7ebb3bfad4b3e930ecef7a65f55f13e3af diff --git a/src/vcfReader.cpp b/src/vcfReader.cpp deleted file mode 100755 index c43fbbd..0000000 --- a/src/vcfReader.cpp +++ /dev/null @@ -1,481 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#include // std::min -#include // assert -#include // std::cout -#include "vcfReader.hpp" -#include "global.hpp" - -// using namespace std; -using std::min; - -/*! Initialize vcf file, search for the end of the vcf header. - * Extract the first block of data ( "buffer_length" lines ) into buff - */ -VcfReader::VcfReader(string fileName, string sampleName, bool extractPlaf) { - /*! Initialize by read in the vcf header file */ - this->init(fileName); - this->sampleName_ = sampleName; - this->extractPlaf_ = extractPlaf; - this->sampleColumnIndex_ = 0; - this->readHeader(); - this->readVariants(); - this->getChromList(); - this->getIndexOfChromStarts(); - assert(this->doneGetIndexOfChromStarts_ == true); - this->checkSortedPositions(fileName); -} - - -void VcfReader::checkFileCompressed() { - FILE *f = NULL; - f = fopen(this->fileName_.c_str(), "rb"); - if (f == NULL) { - throw InvalidInputFile(this->fileName_); - } - - unsigned char magic[2]; - - size_t freadResults = fread(reinterpret_cast(magic), 1, 2, f); - dout << "Check if vcf is compressed " << freadResults << std::endl; - this->setIsCompressed((static_cast(magic[0]) == 0x1f) && - (static_cast(magic[1]) == 0x8b)); - fclose(f); -} - -void VcfReader::init(string fileName) { - /*! Initialize other VcfReader class members - */ - this->fileName_ = fileName; - - this->checkFileCompressed(); - - if ( this->isCompressed() ) { - this->inFileGz.open(this->fileName_.c_str(), std::ios::in); - } else { - this->inFile.open(this->fileName_.c_str(), std::ios::in); - } -} - - -void VcfReader::finalize() { - for (size_t i = 0; i < this->variants.size(); i++) { - this->refCount.push_back(static_cast(this->variants[i].ref)); - this->altCount.push_back(static_cast(this->variants[i].alt)); - this->vqslod.push_back(this->variants[i].vqslod); - this->plaf.push_back(this->variants[i].plaf); - } - - if ( this->isCompressed() ) { - this->inFileGz.close(); - } else { - this->inFile.close(); - } -} - - -void VcfReader::readHeader() { - if (this->isCompressed()) { - if (!inFileGz.good()) { - throw InvalidInputFile(this->fileName_); - } - } else { - if (!inFile.good()) { - throw InvalidInputFile(this->fileName_); - } - } - - if (this->isCompressed()) { - getline(inFileGz, this->tmpLine_); - } else { - getline(inFile, this->tmpLine_); - } - - while (this->tmpLine_.size() > 0) { - if (this->tmpLine_[0] == '#') { - if (this->tmpLine_[1] == '#') { - this->headerLines.push_back(this->tmpLine_); - if (this->isCompressed()) { - getline(inFileGz, this->tmpLine_); - } else { - getline(inFile, this->tmpLine_); - } - } else { - this->checkFeilds(); - break; // end of the header - } - } else { - this->checkFeilds(); - } - } - - dout << " There are " << this->headerLines.size() - << " lines in the header." <tmpLine_.size()) { - field_end = min(this->tmpLine_.find('\t', feild_start), - this->tmpLine_.find('\n', feild_start)); - this->tmpStr_ = this->tmpLine_.substr(feild_start, - field_end-feild_start); - string correctFieldValue; - switch ( field_index ) { - case 0: correctFieldValue = "#CHROM"; break; - case 1: correctFieldValue = "POS"; break; - case 2: correctFieldValue = "ID"; break; - case 3: correctFieldValue = "REF"; break; - case 4: correctFieldValue = "ALT"; break; - case 5: correctFieldValue = "QUAL"; break; - case 6: correctFieldValue = "FILTER"; break; - case 7: correctFieldValue = "INFO"; break; - case 8: correctFieldValue = "FORMAT"; break; - } - - if (this->tmpStr_ != correctFieldValue && field_index < 9) { - throw VcfInvalidHeaderFieldNames(correctFieldValue, this->tmpStr_); - } - - if ((field_index == 9) & (this->sampleName_ == "")) { - this->sampleName_ = this->tmpStr_; - } - - if (this->tmpStr_ == this->sampleName_) { - sampleColumnIndex_ = field_index; - break; - } - - feild_start = field_end+1; - field_index++; - } // End of while loop: field_end < line.size() - - if (sampleColumnIndex_ == 0) { - throw InvalidSampleInVcf(this->sampleName_, this->fileName_); - } - assert(sampleColumnIndex_ != 0); - assert(printSampleName()); -} - - -void VcfReader::readVariants() { - if (this->isCompressed()) { - getline(inFileGz, this->tmpLine_); - } else { - getline(inFile, this->tmpLine_); - } - while (inFile.good() && this->tmpLine_.size() > 0) { - VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_, - this->extractPlaf_); - // check variantLine quality - this->variants.push_back(newVariant); - if (this->isCompressed()) { - getline(inFileGz, this->tmpLine_); - } else { - getline(inFile, this->tmpLine_); - } - } -} - - -void VcfReader::getChromList() { - this->chrom_.clear(); - this->position_.clear(); - - assert(this->chrom_.size() == (size_t)0); - assert(this->position_.size() == (size_t)0); - - string previousChrom(""); - vector positionOfChrom_; - - for (size_t i = 0; i < this->variants.size() ; i++) { - if (previousChrom != this->variants[i].chromStr && - previousChrom.size() > (size_t)0) { - this->chrom_.push_back(previousChrom); - this->position_.push_back(positionOfChrom_); - positionOfChrom_.clear(); - } - positionOfChrom_.push_back( - stoi(this->variants[i].posStr.c_str(), NULL)); - previousChrom = this->variants[i].chromStr; - } - - this->chrom_.push_back(previousChrom); - this->position_.push_back(positionOfChrom_); - assert(this->position_.size() == this->chrom_.size()); -} - - -void VcfReader::removeMarkers() { - assert(this->keptVariants.size() == (size_t)0); - for (auto const &value : this->indexOfContentToBeKept) { - this->keptVariants.push_back(this->variants[value]); - } - this->variants.clear(); - this->variants = this->keptVariants; - this->keptVariants.clear(); - this->nLoci_ = this->variants.size(); - dout << " Vcf number of loci kept = " << this->nLoci_ << std::endl; -} - - -void VcfReader::findLegitSnpsGivenVQSLOD(double vqslodThreshold) { - this->legitVqslodAt.clear(); - assert(legitVqslodAt.size() == 0); - for (size_t i = 0; i < this->vqslod.size(); i++) { - if (this->vqslod[i] > vqslodThreshold) { - this->legitVqslodAt.push_back(i); - } - } -} - - -void VcfReader::findLegitSnpsGivenVQSLODHalf(double vqslodThreshold) { - this->legitVqslodAt.clear(); - assert(legitVqslodAt.size() == 0); - - for (size_t chromI = 0; - chromI < this->indexOfChromStarts_.size(); chromI++) { - size_t start = this->indexOfChromStarts_[chromI]; - size_t length = this->position_[chromI].size(); - // if (chromI%2 == 0) { - if (chromI > 10) { - for ( size_t ii = start ; ii < (start+length); ii++ ) { - if (this->vqslod[ii] > vqslodThreshold) { - // std::cout << ii <legitVqslodAt.push_back(ii); - } - } - } - } -} - - -VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex, - bool extractPlaf) { - this->init(tmpLine, sampleColumnIndex, extractPlaf); - - while (fieldEnd_ < this->tmpLine_.size()) { - fieldEnd_ = min(this->tmpLine_.find('\t', feildStart_), - this->tmpLine_.find('\n', feildStart_)); - this->tmpStr_ = this->tmpLine_.substr(feildStart_, - fieldEnd_-feildStart_); - switch (fieldIndex_) { - case 0: this->extract_field_CHROM(); break; - case 1: this->extract_field_POS(); break; - case 2: this->extract_field_ID(); break; - case 3: this->extract_field_REF() ; break; - case 4: this->extract_field_ALT() ; break; - case 5: this->extract_field_QUAL() ; break; - case 6: this->extract_field_FILTER(); break; - case 7: this->extract_field_INFO(); break; - case 8: this->extract_field_FORMAT(); break; - } - - if (fieldIndex_ == this->sampleColumnIndex_) { - this->extract_field_VARIANT(); - break; - } - feildStart_ = fieldEnd_+1; - fieldIndex_++; - } -} - - -void VariantLine::init(string tmpLine, size_t sampleColumnIndex, - bool extractPlaf) { - this->tmpLine_ = tmpLine; - this->feildStart_ = 0; - this->fieldEnd_ = 0; - this->fieldIndex_ = 0; - this->adFieldIndex_ = -1; - this->sampleColumnIndex_ = sampleColumnIndex; - this->extractPlaf_ = extractPlaf; -} - - -void VariantLine::extract_field_CHROM() { - this->chromStr = this->tmpStr_; -} - - -void VariantLine::extract_field_POS() { - this->posStr = this->tmpStr_; -} - - -void VariantLine::extract_field_ID() { - this->idStr = this->tmpStr_; -} - - -void VariantLine::extract_field_REF() { - this->refStr = this->tmpStr_; -} - - -void VariantLine::extract_field_ALT() { - this->altStr = this->tmpStr_; -} - - -void VariantLine::extract_field_QUAL() { // Check for PASS - this->qualStr = this->tmpStr_; -} - - -void VariantLine::extract_field_FILTER() { - this->filterStr = this->tmpStr_; -} - -void VariantLine::extract_field_INFO() { - this->infoStr = this->tmpStr_; - bool vqslodNotFound = true; - size_t feild_start = 0; - size_t field_end = 0; - int field_index = 0; - - while (field_end < this->tmpStr_.size()) { - field_end = min(this->tmpStr_.find(';', feild_start), - this->tmpStr_.find('\t', feild_start)); - string filterFiledStr = this->tmpStr_.substr(feild_start, - field_end-feild_start); - size_t eqIndex = filterFiledStr.find('=', 0); - string filterFiledName = filterFiledStr.substr(0, eqIndex); - if ( "VQSLOD" == filterFiledName ) { - vqslodNotFound = false; - vqslod = stod(filterFiledStr.substr(eqIndex+1, - filterFiledStr.size())); - } - - if ( ("AF" == filterFiledName) & (this->extractPlaf_) ) { - plaf = stod(filterFiledStr.substr(eqIndex+1, - filterFiledStr.size())); - } - - feild_start = field_end+1; - field_index++; - } - - if (vqslodNotFound) { - throw VcfVQSLODNotFound(this->tmpStr_); - } - - assert(vqslodNotFound == false); -} - - -void VariantLine::extract_field_FORMAT() { - this->formatStr = this->tmpStr_; - size_t feild_start = 0; - size_t field_end = 0; - size_t field_index = 0; - - while (field_end < this->formatStr.size()) { - field_end = min(this->formatStr.find(':', feild_start), - this->formatStr.find('\n', feild_start)); - if ( "AD" == this->formatStr.substr(feild_start, - field_end-feild_start)) { - adFieldIndex_ = field_index; - break; - } - feild_start = field_end+1; - field_index++; - } - if (adFieldIndex_ == -1) { - throw VcfCoverageFieldNotFound(this->tmpStr_); - } - assert(adFieldIndex_ > -1); -} - - -int maybe_dot_to_integer(const string& s) { - if (s == ".") - return 0; - else - return stoi(s); -} - -int n_fields(const string& s, char delim = ',') { - int count = 0; - for (auto ch : s) - if (ch == delim) - count++; - return count+1; -} - -void VariantLine::extract_field_VARIANT() { - size_t feild_start = 0; - size_t field_end = 0; - int field_index = 0; - - while (field_end < this->tmpStr_.size()) { - field_end = min(this->tmpStr_.find(':', feild_start), - this->tmpStr_.find('\n', feild_start)); - if (field_index == adFieldIndex_) { - string adStr = this->tmpStr_.substr(feild_start, - field_end-feild_start); - try { - int n = n_fields(adStr); - if (n != 2) - throw std::runtime_error( - "there should be exactly 2 AD entries, but found " + - std::to_string(n) + - ".\n Wrong number of ALT alleles!."); - - size_t commaIndex = adStr.find(',', 0); - auto ref_AD = adStr.substr(0, commaIndex); - auto alt_AD = adStr.substr(commaIndex+1, adStr.size()); - - ref = maybe_dot_to_integer(ref_AD); - alt = maybe_dot_to_integer(alt_AD); - break; - } - catch (std::exception& e) { - std::cerr << "Error parsing vcf AD field: '" - << adStr << "': " << e.what() << "\n"; - exit(1); - } - } - feild_start = field_end+1; - field_index++; - } -} - -/* -void VcfReader::findLegitSnpsGivenVQSLODandWsfGt0(double vqslodThreshold) { - assert(legitVqslodAt.size() == 0); - for (size_t i = 0; i < this->vqslod.size(); i++) { - if (this->vqslod[i] > vqslodThreshold & this->altCount[i] > 0) { - this->legitVqslodAt.push_back(i); - } - } -} -*/ diff --git a/src/vcfReader.hpp b/src/vcfReader.hpp deleted file mode 100755 index ad1208a..0000000 --- a/src/vcfReader.hpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#include /* strtol, strtod */ -#include /* string */ -#include /* vector */ -#include -#include "exceptions.hpp" -#include "variantIndex.hpp" -#include "gzstream/gzstream.h" - -#ifndef DEPLOID_SRC_VCFREADER_HPP_ -#define DEPLOID_SRC_VCFREADER_HPP_ - -// using namespace std; -// using std::endl; - -struct InvalidVcf : public InvalidInput{ - explicit InvalidVcf(string str):InvalidInput(str) { - } - virtual ~InvalidVcf() throw() {} - // virtual const char* what () const noexcept { - // return throwMsg.c_str(); - // } -}; - - -struct VcfInvalidHeaderFieldNames : public InvalidVcf{ - explicit VcfInvalidHeaderFieldNames(string str1, string str2): - InvalidVcf(str1) { - this->reason = " VCF field header expects: "; - throwMsg = this->reason + this->src + ", " + str2 + " was found!"; - } - ~VcfInvalidHeaderFieldNames() throw() {} -}; - - -struct VcfInvalidVariantEntry : public InvalidVcf{ - explicit VcfInvalidVariantEntry(string str):InvalidVcf(str) {} - virtual ~VcfInvalidVariantEntry() throw() {} - // virtual const char* what () const noexcept { - // return throwMsg.c_str(); - // } -}; - - -struct VcfCoverageFieldNotFound : public VcfInvalidVariantEntry{ - explicit VcfCoverageFieldNotFound(string str):VcfInvalidVariantEntry(str) { - this->reason = "Coverage field AD was not found in the FORMAT, found: "; - throwMsg = this->reason + this->src; - } - ~VcfCoverageFieldNotFound() throw() {} -}; - - -struct VcfVQSLODNotFound : public VcfInvalidVariantEntry{ - explicit VcfVQSLODNotFound(string str):VcfInvalidVariantEntry(str) { - this->reason = "VQSLOD was note found, check: "; - throwMsg = this->reason + this->src; - } - ~VcfVQSLODNotFound() throw() {} -}; - - -class VariantLine{ - friend class VcfReader; - friend class DEploidIO; - public: - explicit VariantLine(string tmpLine, size_t sampleColumnIndex, - bool extractPlaf = false); - ~VariantLine() {} - - private: - string tmpLine_; - string tmpStr_; - - void init(string tmpLine, size_t sampleColumnIndex, bool extractPlaf); - - void extract_field_CHROM(); - void extract_field_POS(); - void extract_field_ID(); - void extract_field_REF(); - void extract_field_ALT(); - void extract_field_QUAL(); - void extract_field_FILTER(); - void extract_field_INFO(); - void extract_field_FORMAT(); - void extract_field_VARIANT(); - - size_t feildStart_; - size_t fieldEnd_; - size_t fieldIndex_; - - string chromStr; - string posStr; - string idStr; - string refStr; - string altStr; - string qualStr; - string filterStr; - string infoStr; - string formatStr; - int adFieldIndex_; - - int ref; - int alt; - double vqslod; - double plaf; - size_t sampleColumnIndex_; - bool extractPlaf_; -}; - - - -/*! \brief VCF file reader @ingroup group_data */ -class VcfReader : public VariantIndex { -#ifdef UNITTEST - friend class TestVCF; -#endif - friend class DEploidIO; - public: - // Constructors and Destructors - explicit VcfReader(string fileName, string sampleName, - bool extractPlaf = false); - // parse in exclude sites - ~VcfReader() {} - - // Members and Methods - vector headerLines; // calling from python, need to be public - vector refCount; // calling from python, need to be public - vector altCount; // calling from python, need to be public - vector vqslod; // calling from python, need to be public - vector plaf; - void finalize(); // calling from python, need to be public - - private: - vector variants; - vector keptVariants; - vector legitVqslodAt; - string fileName_; - ifstream inFile; - igzstream inFileGz; - bool isCompressed_; - bool isCompressed() const { return this->isCompressed_; } - void setIsCompressed(const bool compressed) { - this->isCompressed_ = compressed; } - void checkFileCompressed(); - string sampleName_; - size_t sampleColumnIndex_; - string tmpLine_; - string tmpStr_; - bool extractPlaf_; - - // Methods - void init(string fileName); - void readVariants(); - void readHeader(); - void checkFeilds(); - void findLegitSnpsGivenVQSLOD(double vqslodThreshold); - void findLegitSnpsGivenVQSLODHalf(double vqslodThreshold); - - void getChromList(); - void removeMarkers(); - - // Debug tools - bool printSampleName(); -}; - -#endif // DEPLOID_SRC_VCFREADER_HPP_ diff --git a/tests/unittest/test_txtReader.cpp b/tests/unittest/test_txtReader.cpp deleted file mode 100755 index 47b1b83..0000000 --- a/tests/unittest/test_txtReader.cpp +++ /dev/null @@ -1,317 +0,0 @@ -#include -#include -#include "src/txtReader.hpp" - -class TestTxtReader : public CppUnit::TestCase { - CPPUNIT_TEST_SUITE( TestTxtReader ); - CPPUNIT_TEST( testMainConstructor ); - CPPUNIT_TEST( checkSizeBefore ); - CPPUNIT_TEST( checkInfo ); - CPPUNIT_TEST(compressed_checkInfo); - CPPUNIT_TEST( checkRemoveMarkers ); - CPPUNIT_TEST( checkSizeAfter ); - CPPUNIT_TEST( checkSortedPositions ); - CPPUNIT_TEST( checkBadConversion ); - CPPUNIT_TEST( checkBadScientificNotation ); - CPPUNIT_TEST_SUITE_END(); - - private: - TxtReader * txtReader_; - TxtReader * afterExclude_; - ExcludeMarker* excludedMarkers_; - - public: - void setUp() { - this->txtReader_ = new TxtReader(); - this->afterExclude_ = new TxtReader(); - this->excludedMarkers_ = new ExcludeMarker(); - this->txtReader_->readFromFile("data/testData/txtReaderForTesting.txt" ); - this->afterExclude_->readFromFile("data/testData/txtReaderForTestingAfterExclude.txt"); - this->excludedMarkers_->readFromFile("data/testData/txtReaderForTestingToBeExclude.txt" ); - } - - void tearDown() { - delete txtReader_; - delete afterExclude_; - delete excludedMarkers_; - } - - void testMainConstructor(){ } - - void checkBadConversion(){ - TxtReader tmp; - CPPUNIT_ASSERT_THROW ( tmp.readFromFile("data/testData/bad.plaf.txt"), BadConversion ); - } - - void checkBadScientificNotation(){ - TxtReader tmp2; - CPPUNIT_ASSERT_THROW ( tmp2.readFromFile("data/testData/bad.plaf_scientific.txt"), BadScientificNotation ); - - TxtReader tmp3; - CPPUNIT_ASSERT_THROW ( tmp3.readFromFile("data/testData/bad.plaf_scientificE.txt"), BadScientificNotation ); - } - - void checkSortedPositions() { - TxtReader tmp; - CPPUNIT_ASSERT_THROW ( tmp.readFromFile("data/testData/bad.plaf_badpos.txt"), PositionUnsorted ); - } - - void checkSizeBefore(){ - CPPUNIT_ASSERT_EQUAL ( (size_t)100, this->txtReader_->info_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)100, this->txtReader_->nLoci_ ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->chrom_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->position_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->txtReader_->position_[0].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)16, this->txtReader_->position_[1].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->txtReader_->position_[2].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)24, this->txtReader_->position_[3].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->txtReader_->position_[4].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)11, this->txtReader_->position_[5].size() ); - - CPPUNIT_ASSERT_EQUAL ( (size_t)0, this->excludedMarkers_->info_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->chrom_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->position_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)4, this->excludedMarkers_->position_[0].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)1, this->excludedMarkers_->position_[1].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)2, this->excludedMarkers_->position_[2].size() ); - - CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->info_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->nLoci_ ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->chrom_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->position_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->afterExclude_->position_[0].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->afterExclude_->position_[1].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->afterExclude_->position_[2].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)23, this->afterExclude_->position_[3].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->afterExclude_->position_[4].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)9, this->afterExclude_->position_[5].size() ); - } - - void checkInfo(){ - //Pf3D7_01_v3 93157 0 - CPPUNIT_ASSERT_EQUAL ( (int)93157, this->txtReader_->position_[0][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[0] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[0][0] ); - - //Pf3D7_01_v3 95518 46 - CPPUNIT_ASSERT_EQUAL ( (int)95518, this->txtReader_->position_[0][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->info_[6-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->content_[6-2][0] ); - - //Pf3D7_02_v3 100608 35 - CPPUNIT_ASSERT_EQUAL ( (int)100608, this->txtReader_->position_[1][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)35, this->txtReader_->info_[14-2] ); // it is at line 14 in the file, minus 1 for the header, minus another 1 for 0-indexing - CPPUNIT_ASSERT_EQUAL ( (double)35, this->txtReader_->content_[14-2][0] ); - - //Pf3D7_02_v3 107823 0 - CPPUNIT_ASSERT_EQUAL ( (int)107823, this->txtReader_->position_[1][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[18-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[18-2][0] ); - - //Pf3D7_02_v3 111040 20 - CPPUNIT_ASSERT_EQUAL ( (int)111040, this->txtReader_->position_[1][5] ); - CPPUNIT_ASSERT_EQUAL ( (double)20, this->txtReader_->info_[19-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)20, this->txtReader_->content_[19-2][0] ); - - //Pf3D7_02_v3 114473 49 - CPPUNIT_ASSERT_EQUAL ( (int)114473, this->txtReader_->position_[1][9] ); - CPPUNIT_ASSERT_EQUAL ( (double)49, this->txtReader_->info_[23-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)49, this->txtReader_->content_[23-2][0] ); - - //Pf3D7_04_v3 144877 47 - CPPUNIT_ASSERT_EQUAL ( (int)144877, this->txtReader_->position_[3][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)47, this->txtReader_->info_[47-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)47, this->txtReader_->content_[47-2][0] ); - - //Pf3D7_06_v3 180170 63 - CPPUNIT_ASSERT_EQUAL ( (int)180170, this->txtReader_->position_[5][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)63, this->txtReader_->info_[91-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)63, this->txtReader_->content_[92-2][0] ); - - //Pf3D7_06_v3 180183 60 - CPPUNIT_ASSERT_EQUAL ( (int)180183, this->txtReader_->position_[5][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)60, this->txtReader_->info_[95-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)60, this->txtReader_->content_[95-2][0] ); - } - - - void compressed_checkInfo(){ - TxtReader gztxtReader; - gztxtReader.readFromFile("data/testData/txtReaderForTesting.txt.gz" ); - //Pf3D7_01_v3 93157 0 - CPPUNIT_ASSERT_EQUAL ( (int)93157, gztxtReader.position_[0][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.info_[0] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.content_[0][0] ); - - //Pf3D7_01_v3 95518 46 - CPPUNIT_ASSERT_EQUAL ( (int)95518, gztxtReader.position_[0][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)46, gztxtReader.info_[6-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)46, gztxtReader.content_[6-2][0] ); - - //Pf3D7_02_v3 100608 35 - CPPUNIT_ASSERT_EQUAL ( (int)100608, gztxtReader.position_[1][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)35, gztxtReader.info_[14-2] ); // it is at line 14 in the file, minus 1 for the header, minus another 1 for 0-indexing - CPPUNIT_ASSERT_EQUAL ( (double)35, gztxtReader.content_[14-2][0] ); - - //Pf3D7_02_v3 107823 0 - CPPUNIT_ASSERT_EQUAL ( (int)107823, gztxtReader.position_[1][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.info_[18-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.content_[18-2][0] ); - - //Pf3D7_02_v3 111040 20 - CPPUNIT_ASSERT_EQUAL ( (int)111040, gztxtReader.position_[1][5] ); - CPPUNIT_ASSERT_EQUAL ( (double)20, gztxtReader.info_[19-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)20, gztxtReader.content_[19-2][0] ); - - //Pf3D7_02_v3 114473 49 - CPPUNIT_ASSERT_EQUAL ( (int)114473, gztxtReader.position_[1][9] ); - CPPUNIT_ASSERT_EQUAL ( (double)49, gztxtReader.info_[23-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)49, gztxtReader.content_[23-2][0] ); - - //Pf3D7_04_v3 144877 47 - CPPUNIT_ASSERT_EQUAL ( (int)144877, gztxtReader.position_[3][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)47, gztxtReader.info_[47-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)47, gztxtReader.content_[47-2][0] ); - - //Pf3D7_06_v3 180170 63 - CPPUNIT_ASSERT_EQUAL ( (int)180170, gztxtReader.position_[5][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)63, gztxtReader.info_[91-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)63, gztxtReader.content_[92-2][0] ); - - //Pf3D7_06_v3 180183 60 - CPPUNIT_ASSERT_EQUAL ( (int)180183, gztxtReader.position_[5][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)60, gztxtReader.info_[95-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)60, gztxtReader.content_[95-2][0] ); - } - - - void checkRemoveMarkers(){ - CPPUNIT_ASSERT_NO_THROW ( this->txtReader_->findAndKeepMarkers (excludedMarkers_) ); - CPPUNIT_ASSERT_NO_THROW ( this->afterExclude_->findAndKeepMarkers (excludedMarkers_) ); - } - - void checkSizeAfter(){ - this->txtReader_->findAndKeepMarkers (excludedMarkers_); - CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->txtReader_->info_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->txtReader_->nLoci_ ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->chrom_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->position_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->txtReader_->position_[0].size() ); - //Pf3D7_02_v3 100608 - //Pf3D7_02_v3 107823 - //Pf3D7_02_v3 111040 - //Pf3D7_02_v3 114473 - CPPUNIT_ASSERT_EQUAL ( (size_t)(16-4), this->txtReader_->position_[1].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->txtReader_->position_[2].size() ); - //Pf3D7_04_v3 144877 - CPPUNIT_ASSERT_EQUAL ( (size_t)(24-1), this->txtReader_->position_[3].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->txtReader_->position_[4].size() ); - //Pf3D7_06_v3 180170 - //Pf3D7_06_v3 180183 - CPPUNIT_ASSERT_EQUAL ( (size_t)(11-2), this->txtReader_->position_[5].size() ); - - CPPUNIT_ASSERT_EQUAL ( (size_t)0, this->excludedMarkers_->info_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->chrom_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->position_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)4, this->excludedMarkers_->position_[0].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)1, this->excludedMarkers_->position_[1].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)2, this->excludedMarkers_->position_[2].size() ); - - //Pf3D7_01_v3 93157 0 - CPPUNIT_ASSERT_EQUAL ( (int)93157, this->txtReader_->position_[0][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[0] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[0][0] ); - - //Pf3D7_01_v3 95518 46 - CPPUNIT_ASSERT_EQUAL ( (int)95518, this->txtReader_->position_[0][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->info_[6-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->content_[6-2][0] ); - - //Pf3D7_02_v3 101269 33 - CPPUNIT_ASSERT_EQUAL ( (int)101269, this->txtReader_->position_[1][0] ); - CPPUNIT_ASSERT_EQUAL ( (double)33, this->txtReader_->info_[14-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)33, this->txtReader_->content_[14-2][0] ); - - //Pf3D7_02_v3 112038 28 - CPPUNIT_ASSERT_EQUAL ( (int)112038, this->txtReader_->position_[1][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)28, this->txtReader_->info_[18-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)28, this->txtReader_->content_[18-2][0] ); - - //Pf3D7_02_v3 113396 0 - CPPUNIT_ASSERT_EQUAL ( (int)113396, this->txtReader_->position_[1][5] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[19-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[19-2][0] ); - - //Pf3D7_02_v3 121693 0 - CPPUNIT_ASSERT_EQUAL ( (int)121693, this->txtReader_->position_[1][9] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[23-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[23-2][0] ); - - //Pf3D7_04_v3 149598 0 - CPPUNIT_ASSERT_EQUAL ( (int)149598, this->txtReader_->position_[3][4] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[47-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[47-2][0] ); - - //Pf3D7_06_v3 180217 0 - CPPUNIT_ASSERT_EQUAL ( (int)180217, this->txtReader_->position_[5][5] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[91-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[91-2][0] ); - - //Pf3D7_06_v3 180270 0 - CPPUNIT_ASSERT_EQUAL ( (int)180270, this->txtReader_->position_[5][8] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[94-2] ); - CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[94-2][0] ); - - this->afterExclude_->findAndKeepMarkers (excludedMarkers_); - CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->info_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->nLoci_ ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->chrom_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->position_.size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->afterExclude_->position_[0].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)(16-4), this->afterExclude_->position_[1].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->afterExclude_->position_[2].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)(24-1), this->afterExclude_->position_[3].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->afterExclude_->position_[4].size() ); - CPPUNIT_ASSERT_EQUAL ( (size_t)(11-2), this->afterExclude_->position_[5].size() ); - - CPPUNIT_ASSERT_EQUAL (this->txtReader_->info_.size(), this->afterExclude_->info_.size() ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->content_.size(), this->afterExclude_->content_.size() ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptContent_.size(), this->afterExclude_->keptContent_.size() ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptContent_.size(), (size_t)0 ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->nInfoLines_, this->afterExclude_->nInfoLines_ ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->nInfoLines_, (size_t)1); - - for ( size_t i = 0; i < 93; i++) { - CPPUNIT_ASSERT_EQUAL (this->txtReader_->info_[i], this->afterExclude_->info_[i] ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->content_[i][0], this->afterExclude_->content_[i][0] ); - } - - - for ( size_t i = 0; i < 93; i++) { - CPPUNIT_ASSERT_EQUAL (this->txtReader_->info_[i], this->afterExclude_->info_[i] ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->content_[i][0], this->afterExclude_->content_[i][0] ); - } - - CPPUNIT_ASSERT_EQUAL (this->txtReader_->chrom_.size(), this->afterExclude_->chrom_.size() ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->chrom_.size(), (size_t)6 ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->indexOfChromStarts_.size(), this->afterExclude_->indexOfChromStarts_.size() ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->indexOfChromStarts_.size(), (size_t)6 ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_.size(), this->afterExclude_->position_.size() ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_.size(), (size_t)6 ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptPosition_.size(), this->afterExclude_->keptPosition_.size() ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptPosition_.size(), (size_t)0 ); - - for ( size_t i = 0; i < 6; i++) { - CPPUNIT_ASSERT_EQUAL (this->txtReader_->chrom_[i], this->afterExclude_->chrom_[i] ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->indexOfChromStarts_[i], this->afterExclude_->indexOfChromStarts_[i] ); - CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_[i].size(), this->afterExclude_->position_[i].size() ); - for ( size_t j = 0; j < this->txtReader_->position_[i].size(); j++) { - CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_[i][j], this->afterExclude_->position_[i][j] ); - - } - } - CPPUNIT_ASSERT_EQUAL (this->txtReader_->nLoci_, this->afterExclude_->nLoci_ ); - } -}; - -CPPUNIT_TEST_SUITE_REGISTRATION( TestTxtReader ); diff --git a/tests/unittest/test_vcfReader.cpp b/tests/unittest/test_vcfReader.cpp deleted file mode 100755 index 87d198d..0000000 --- a/tests/unittest/test_vcfReader.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * dEploid is used for deconvoluting Plasmodium falciparum genome from - * mix-infected patient sample. - * - * Copyright (C) 2016-2017 University of Oxford - * - * Author: Sha (Joe) Zhu - * - * This file is part of dEploid. - * - * dEploid is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#include -#include -#include "src/vcfReader.hpp" - -class TestVCF : public CppUnit::TestCase { - CPPUNIT_TEST_SUITE(TestVCF); - CPPUNIT_TEST(testMainConstructor); - CPPUNIT_TEST(testInvalidSampleInVcf); - CPPUNIT_TEST_SUITE_END(); - - private: - VcfReader* vcf_; - VcfReader* vcfGz_; - double eps; - - public: - void setUp() { - this->vcf_ = new VcfReader("data/testData/PG0390-C.test.vcf", - "PG0390-C"); - this->vcfGz_ = new VcfReader("data/testData/PG0390-C.test.vcf.gz", - "PG0390-C"); - this->eps = 0.00000000001; - } - - void tearDown() { - delete this->vcf_; - delete this->vcfGz_; - } - - void testMainConstructor() { - CPPUNIT_ASSERT_NO_THROW(this->vcf_->finalize()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(8.08, this->vcf_->vqslod[0], this->eps); - CPPUNIT_ASSERT_DOUBLES_EQUAL(0.617, this->vcf_->vqslod[1], this->eps); - CPPUNIT_ASSERT_EQUAL(this->vcf_->vqslod.size(), - this->vcf_->refCount.size()); - } - - void testInvalidSampleInVcf() { - CPPUNIT_ASSERT_THROW(VcfReader("data/testData/PG0390-C.test.vcf.gz", - "PG0370-C"), InvalidSampleInVcf); - } -}; - -CPPUNIT_TEST_SUITE_REGISTRATION(TestVCF);