diff --git a/.gitmodules b/.gitmodules
index 374337f..c668738 100755
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
[submodule "utilities"]
path = utilities
url = https://github.com/DEploid-dev/DEploid-Utilities
+[submodule "src/vcf"]
+ path = src/vcf
+ url = https://github.com/DEploid-dev/DEploid-vcf-lib.git
diff --git a/Makefile.am b/Makefile.am
index 4cea9ed..84d1fc7 100755
--- a/Makefile.am
+++ b/Makefile.am
@@ -16,7 +16,7 @@ TESTS = unit_tests io_unit_tests
check_PROGRAMS = unit_tests dEploid_dbg io_unit_tests current_unit_tests # dEploid_prof
PROG = DEPLOID
-common_flags = -std=c++17 -Isrc/ -DDEPLOIDVERSION=\"${DEPLOIDVERSION}\" -DLASSOVERSION=\"${LASSOVERSION}\" -DCOMPILEDATE=\"${COMPILEDATE}\" -Wall -Wextra
+common_flags = -std=c++17 -Isrc/ -Isrc/vcf/src/ -DDEPLOIDVERSION=\"${DEPLOIDVERSION}\" -DLASSOVERSION=\"${LASSOVERSION}\" -DCOMPILEDATE=\"${COMPILEDATE}\" -Wall -Wextra
common_LDADD = -lz
@@ -32,17 +32,17 @@ common_src = src/random/fastfunc.cpp \
src/dEploidIO-operation.cpp \
src/dEploidIO-workflow.cpp \
src/updateHap.cpp \
- src/txtReader.cpp \
- src/vcfReader.cpp \
- src/variantIndex.cpp \
- src/gzstream/gzstream.cpp \
+ src/vcf/src/txtReader.cpp \
+ src/vcf/src/vcfReader.cpp \
+ src/vcf/src/variantIndex.cpp \
+ src/vcf/src/gzstream/gzstream.cpp \
src/export/dEploidIOExport.cpp \
src/export/dEploidIOExportPosteriorProb.cpp \
src/export/writeMcmcRelated.cpp \
src/lasso/src/lasso.cpp \
src/lasso/src/dEploidLasso.cpp
-debug_src = src/debug/mcmcDebug.cpp src/debug/vcfReaderDebug.cpp \
+debug_src = src/debug/mcmcDebug.cpp src/vcf/src/vcfReaderDebug.cpp \
src/lasso/src/lassoDBG.cpp
dEploid_SOURCES = src/dEploid.cpp $(common_src)
@@ -72,9 +72,7 @@ unit_tests_LDADD = -lcppunit -ldl $(common_LDADD)
io_unit_tests_SOURCES = $(common_src) \
tests/unittest/test_runner.cpp \
- tests/unittest/test_dEploidIO.cpp \
- tests/unittest/test_txtReader.cpp \
- tests/unittest/test_vcfReader.cpp
+ tests/unittest/test_dEploidIO.cpp
io_unit_tests_CXXFLAGS = $(common_flags) -DNDEBUG -DUNITTEST -Wno-write-strings --coverage
io_unit_tests_LDADD = -lcppunit -ldl $(common_LDADD)
diff --git a/src/debug/vcfReaderDebug.cpp b/src/debug/vcfReaderDebug.cpp
deleted file mode 100755
index 006295c..0000000
--- a/src/debug/vcfReaderDebug.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-#include // std::cout
-#include "global.hpp"
-#include "vcfReader.hpp"
-
-using std::endl;
-
-bool VcfReader::printSampleName() {
- dout << "Sample name is " << this->sampleName_ << endl;
- return true;
-}
diff --git a/src/txtReader.cpp b/src/txtReader.cpp
deleted file mode 100755
index 2f24dbf..0000000
--- a/src/txtReader.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-#include
-#include
-#include
-#include // std::distance
-#include "exceptions.hpp"
-#include "txtReader.hpp"
-
-using std::min;
-
-void TxtReader::readFromFileBase(const char inchar[]) {
- this->fileName_ = string(inchar);
- this->checkFileCompressed();
-
- if (this->isCompressed()) {
- this->inFileGz.open(this->fileName_.c_str(), std::ios::in);
- } else {
- this->inFile.open(this->fileName_.c_str(), std::ios::in);
- }
-
- if (this->isCompressed()) {
- if (!inFileGz.good()) {
- throw InvalidInputFile(this->fileName_);
- }
- } else {
- if (!inFile.good()) {
- throw InvalidInputFile(this->fileName_);
- }
- }
-
- tmpChromInex_ = -1;
- string tmp_line;
- // skip the first line, which is the header
- if (this->isCompressed()) {
- getline(inFileGz, tmp_line);
- } else {
- getline(inFile, tmp_line);
- }
- this->extractHeader(tmp_line);
-
- if (this->isCompressed()) {
- getline(inFileGz, tmp_line);
- } else {
- getline(inFile, tmp_line);
- }
-
- while (inFile.good() && tmp_line.size() > 0) {
- size_t field_start = 0;
- size_t field_end = 0;
- size_t field_index = 0;
- vector contentRow;
- while (field_end < tmp_line.size()) {
- field_end = min(
- min(
- min(tmp_line.find(' ', field_start),
- tmp_line.find(',', field_start)),
- tmp_line.find('\t', field_start)),
- tmp_line.find('\n', field_start));
-
- string tmp_str = tmp_line.substr(field_start,
- field_end - field_start);
- if (field_index > 1) {
- contentRow.push_back(strtod(tmp_str.c_str(), NULL));
- } else if (field_index == 0) {
- this->extractChrom(tmp_str);
- } else if (field_index == 1) {
- this->extractPOS(tmp_str);
- }
-
- field_start = field_end+1;
- field_index++;
- }
- this->content_.push_back(contentRow);
-
- if (this->isCompressed()) {
- getline(inFileGz, tmp_line);
- } else {
- getline(inFile, tmp_line);
- }
- }
-
- if (this->isCompressed()) {
- this->inFileGz.close();
- } else {
- this->inFile.close();
- }
-
- this->position_.push_back(this->tmpPosition_);
-
- this->nLoci_ = this->content_.size();
- this->nInfoLines_ = this->content_.back().size();
-
- if (this->nInfoLines_ == 1) {
- this->reshapeContentToInfo();
- }
-
- this->getIndexOfChromStarts();
- assert(tmpChromInex_ > -1);
- assert(chrom_.size() == position_.size());
- assert(this->doneGetIndexOfChromStarts_ == true);
- this->checkSortedPositions(this->fileName_);
-}
-
-
-void TxtReader::checkFileCompressed() {
- FILE *f = NULL;
- f = fopen(this->fileName_.c_str(), "rb");
- if (f == NULL) {
- throw InvalidInputFile(this->fileName_);
- }
-
- unsigned char magic[2];
-
- size_t freadResults = fread(reinterpret_cast(magic), 1, 2, f);
- dout << "Check if text file is compressed " << freadResults << std::endl;
- this->setIsCompressed((static_cast(magic[0]) == 0x1f) &&
- (static_cast(magic[1]) == 0x8b));
- fclose(f);
-}
-
-
-void TxtReader::extractHeader(const string &line) {
- this->header_.clear();
- size_t field_start = 0;
- size_t field_end = 0;
- size_t field_index = 0;
- while (field_end < line.size()) {
- field_end = min(
- min(
- min(line.find(' ', field_start),
- line.find(',', field_start)),
- line.find('\t', field_start)),
- line.find('\n', field_start));
-
- string tmp_str = line.substr(field_start,
- field_end - field_start);
- if (field_index > 1) {
- this->header_.push_back(tmp_str);
- }
-
- field_start = field_end+1;
- field_index++;
- }
-}
-
-
-void TxtReader::extractChrom(const string & tmp_str) {
- if (tmpChromInex_ >= 0) {
- if (tmp_str != this->chrom_.back()) {
- tmpChromInex_++;
- // save current positions
- this->position_.push_back(this->tmpPosition_);
-
- // start new chrom
- this->tmpPosition_.clear();
- this->chrom_.push_back(tmp_str);
- }
- } else {
- tmpChromInex_++;
- assert(this->chrom_.size() == 0);
- this->chrom_.push_back(tmp_str);
- assert(this->tmpPosition_.size() == 0);
- assert(this->position_.size() == 0);
- }
-}
-
-
-void TxtReader::extractPOS(const string & tmp_str) {
- if (tmp_str.find("e") != std::string::npos) {
- throw BadScientificNotation(tmp_str, this->fileName_);
- }
-
- if (tmp_str.find("E") != std::string::npos) {
- throw BadScientificNotation(tmp_str, this->fileName_);
- }
-
- int ret;
- try {
- ret = stoi(tmp_str.c_str(), NULL);
- } catch ( const std::exception &e) {
- throw BadConversion(tmp_str, this->fileName_);
- }
- this->tmpPosition_.push_back(ret);
-}
-
-
-void TxtReader::reshapeContentToInfo() {
- assert(this->info_.size() == 0);
- for (size_t i = 0; i < this->content_.size(); i++) {
- this->info_.push_back(this->content_[i][0]);
- }
-}
-
-
-void TxtReader::removeMarkers() {
- assert(this->keptContent_.size() == (size_t)0);
- for (auto const &value : this->indexOfContentToBeKept) {
- this->keptContent_.push_back(this->content_[value]);
- }
-
- this->content_.clear();
- assert(this->content_.size() == (size_t)0);
- this->content_ = this->keptContent_;
- this->keptContent_.clear();
-
- if (this->nInfoLines_ == 1) {
- this->info_.clear();
- this->reshapeContentToInfo();
- }
- this->nLoci_ = this->content_.size();
-}
diff --git a/src/txtReader.hpp b/src/txtReader.hpp
deleted file mode 100755
index 372ff5a..0000000
--- a/src/txtReader.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-
-#ifndef TXTREADER
-#define TXTREADER
-
-#include
-#include
-#include "variantIndex.hpp"
-#include "exceptions.hpp"
-#include "gzstream/gzstream.h"
-
-class TxtReader : public VariantIndex {
- #ifdef UNITTEST
- friend class TestPanel;
- friend class TestTxtReader;
- friend class TestInitialHaplotypes;
- #endif
- friend class McmcMachinery;
- friend class UpdateSingleHap;
- friend class UpdatePairHap;
- friend class UpdateHap;
- friend class Panel;
- friend class DEploidIO;
- private:
- // Members
- string fileName_;
- ifstream inFile;
- igzstream inFileGz;
- bool isCompressed_;
- bool isCompressed() const { return this->isCompressed_; }
- void setIsCompressed(const bool compressed) {
- this->isCompressed_ = compressed; }
- void checkFileCompressed();
- // content is a matrix of n.loci by n.strains, i.e. content length is n.loci
- vector < vector < double > > keptContent_;
- // info_ only refers to the first column of the content
- vector info_;
-
- vector header_;
- size_t nInfoLines_;
-
- int tmpChromInex_;
- vector < int > tmpPosition_;
-
- // Methods
- void extractChrom(const string & tmp_str);
- void extractPOS(const string & tmp_str);
- void extractHeader(const string &line);
- void reshapeContentToInfo();
-
- public: // move the following to private
- vector < vector < double > > content_;
- TxtReader() {}
- virtual void readFromFile(const char inchar[]) {
- this->readFromFileBase(inchar); }
- void readFromFileBase(const char inchar[]);
- virtual ~TxtReader() {}
- void removeMarkers();
-};
-
-
-
-
-class ExcludeMarker : public TxtReader {
- public:
- ExcludeMarker():TxtReader() {}
- ~ExcludeMarker() {}
-};
-
-
-#endif
diff --git a/src/variantIndex.cpp b/src/variantIndex.cpp
deleted file mode 100755
index 613c9dc..0000000
--- a/src/variantIndex.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-#include // find
-#include
-#include "exceptions.hpp"
-#include "txtReader.hpp"
-#include "variantIndex.hpp"
-
-using std::min;
-using std::endl;
-
-
-VariantIndex::VariantIndex() {
- this->init();
-}
-
-void VariantIndex::findWhoToBeKept(ExcludeMarker* excludedMarkers) {
- dout << " Starts findWhoToBeKept " << endl;
- assert(this->indexOfContentToBeKept.size() == 0);
- assert(this->indexOfPosToBeKept.size() == 0);
-
- for (size_t chromI = 0; chromI < this->chrom_.size(); chromI++) {
- dout << " Going through chrom "<< chrom_[chromI];
- vector < size_t > tmpindexOfPosToBeKept;
-
- // detemine if something needs to be removed from the current chrom.
- vector::iterator chromIt = find(excludedMarkers->chrom_.begin(),
- excludedMarkers->chrom_.end(),
- this->chrom_[chromI]);
-
- size_t hapIndex = indexOfChromStarts_[chromI];
- size_t chromIndexInExclude = std::distance(
- excludedMarkers->chrom_.begin(),
- chromIt);
- for (size_t posI = 0; posI < this->position_[chromI].size(); posI++) {
- if (chromIt == excludedMarkers->chrom_.end()) {
- indexOfContentToBeKept.push_back(hapIndex);
- tmpindexOfPosToBeKept.push_back(posI);
- } else if (
- std::find(excludedMarkers->position_[chromIndexInExclude].begin(),
- excludedMarkers->position_[chromIndexInExclude].end(),
- this->position_[chromI][posI]) ==
- excludedMarkers->position_[chromIndexInExclude].end()) {
- indexOfContentToBeKept.push_back(hapIndex);
- tmpindexOfPosToBeKept.push_back(posI);
- }
- hapIndex++;
- }
- indexOfPosToBeKept.push_back(tmpindexOfPosToBeKept);
-
- dout << " keeping " << tmpindexOfPosToBeKept.size() << endl;
- }
- assert(indexOfPosToBeKept.size() == this->chrom_.size());
-
- dout << indexOfContentToBeKept.size() << " sites need to be Kept " << endl;
-}
-
-
-void VariantIndex::findAndKeepMarkers(ExcludeMarker* excludedMarkers) {
- this->setDoneGetIndexOfChromStarts(false);
- dout << " findAndKeepMarkers called" <findWhoToBeKept(excludedMarkers);
- this->removePositions();
- this->getIndexOfChromStarts();
- this->removeMarkers();
-}
-
-
-void VariantIndex::findWhoToBeKeptGivenIndex(
- const vector & givenIndex) {
- dout << " Starts findWhoToBeKeptGivenIndex " << endl;
- assert(this->indexOfContentToBeKept.size() == 0);
- indexOfContentToBeKept = vector (givenIndex.begin(),
- givenIndex.end());
- // assert(this->indexOfPosToBeKept.size() == 0);
- vector oldChrom = vector (chrom_.begin(), chrom_.end());
- this->chrom_.clear();
-
- vector < vector < int > > oldposition = this->position_;
- this->position_.clear();
-
- for (size_t chromI = 0; chromI < oldChrom.size(); chromI++) {
- dout << " Going through chrom "<< oldChrom[chromI] << endl;
- size_t hapIndex = indexOfChromStarts_[chromI];
- vector newTrimmedPos;
- for (size_t posI = 0; posI < oldposition[chromI].size(); posI++) {
- if (std::find(givenIndex.begin(), givenIndex.end(), hapIndex)
- != givenIndex.end()) {
- if (newTrimmedPos.size() == 0) {
- this->chrom_.push_back(oldChrom[chromI]);
- }
- newTrimmedPos.push_back(oldposition[chromI][posI]);
- }
-
- hapIndex++;
- }
- this->position_.push_back(newTrimmedPos);
- }
-
- // assert(indexOfPosToBeKept.size() == this->chrom_.size());
-
- dout << indexOfContentToBeKept.size() << " sites need to be Kept " << endl;
-}
-
-
-void VariantIndex::findWhoToBeKeptGivenIndexHalf(
- const vector & givenIndex) {
- dout << " Starts findWhoToBeKeptGivenIndexHalf " << endl;
- assert(this->indexOfContentToBeKept.size() == 0);
- indexOfContentToBeKept = vector (givenIndex.begin(),
- givenIndex.end());
- // assert(this->indexOfPosToBeKept.size() == 0);
- vector oldChrom = vector (chrom_.begin(), chrom_.end());
- this->chrom_.clear();
-
- vector < vector < int > > oldposition = this->position_;
- this->position_.clear();
-
- for (size_t chromI = 0; chromI < oldChrom.size(); chromI++) {
- // if (chromI%2 == 0) {
- if (chromI > 10) {
- dout << " Going through chrom "<< oldChrom[chromI] << " ";
- size_t hapIndex = indexOfChromStarts_[chromI];
- vector newTrimmedPos;
- for (size_t posI = 0; posI < oldposition[chromI].size(); posI++) {
- if (std::find(givenIndex.begin(), givenIndex.end(), hapIndex)
- != givenIndex.end()) {
- if (newTrimmedPos.size() == 0) {
- this->chrom_.push_back(oldChrom[chromI]);
- }
- newTrimmedPos.push_back(oldposition[chromI][posI]);
- }
- hapIndex++;
- }
- // std::cout << newTrimmedPos.size () << endl;
- this->position_.push_back(newTrimmedPos);
- }
- }
-
- // assert(indexOfPosToBeKept.size() == this->chrom_.size());
-
- dout << indexOfContentToBeKept.size() << " sites need to be Kept, with "
- << this->chrom_.size() << endl;
-}
-
-
-void VariantIndex::removePositions() {
- assert(this->keptPosition_.size() == (size_t)0);
- for (size_t chromI = 0; chromI < this->chrom_.size(); chromI++) {
- vector tmpKeptPosition_;
- for (size_t i = 0; i < this->indexOfPosToBeKept[chromI].size(); i++) {
- tmpKeptPosition_.push_back(
- this->position_[chromI][this->indexOfPosToBeKept[chromI][i]]);
- }
- this->keptPosition_.push_back(tmpKeptPosition_);
- }
- this->position_.clear();
- this->position_ = this->keptPosition_;
- this->keptPosition_.clear();
-}
-
-
-void VariantIndex::getIndexOfChromStarts() {
- assert(this->doneGetIndexOfChromStarts_ == false);
- this->indexOfChromStarts_.clear();
- assert(indexOfChromStarts_.size() == 0);
- this->indexOfChromStarts_.push_back((size_t)0);
- for (size_t tmpChrom = 0;
- indexOfChromStarts_.size() < this->chrom_.size(); tmpChrom++ ) {
- indexOfChromStarts_.push_back(
- indexOfChromStarts_.back()+this->position_[tmpChrom].size());
- }
- assert(indexOfChromStarts_.size() == this->chrom_.size());
- this->setDoneGetIndexOfChromStarts(true);
-}
-
-
-void VariantIndex::getIndexOfChromStartsHalf() {
- assert(this->doneGetIndexOfChromStarts_ == false);
- this->indexOfChromStarts_.clear();
- assert(indexOfChromStarts_.size() == 0);
- this->indexOfChromStarts_.push_back((size_t)0);
- for (size_t tmpChrom = 0;
- indexOfChromStarts_.size() < this->chrom_.size(); tmpChrom++ ) {
- indexOfChromStarts_.push_back(
- indexOfChromStarts_.back()+this->position_[tmpChrom].size());
- }
- assert(indexOfChromStarts_.size() == this->chrom_.size());
- this->setDoneGetIndexOfChromStarts(true);
-}
-
-
-void VariantIndex::init() {
- this->setDoneGetIndexOfChromStarts(false);
-}
-
-
-void VariantIndex::removeMarkers() { throw VirtualFunctionShouldNotBeCalled();}
-
-
-void VariantIndex::checkSortedPositions(string fileName) {
- for (size_t chromI = 0; chromI < this->chrom_.size(); chromI++) {
- int previousPosition_ = 0;
- for (auto const &value : this->position_[chromI]) {
- if (value < previousPosition_) {
- throw PositionUnsorted(fileName);
- }
- previousPosition_ = value;
- }
- }
-}
diff --git a/src/variantIndex.hpp b/src/variantIndex.hpp
deleted file mode 100755
index ebfc9cd..0000000
--- a/src/variantIndex.hpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-#ifndef DEPLOID_SRC_VARIANTINDEX_HPP_
-#define DEPLOID_SRC_VARIANTINDEX_HPP_
-
-
-#include
-#include
-#include
-#include "global.hpp"
-
-
-using std::vector;
-using std::string;
-
-class ExcludeMarker;
-
-class VariantIndex {
- #ifdef UNITTEST
- friend class TestPanel;
- friend class TestTxtReader;
- friend class TestInitialHaplotypes;
- #endif
- friend class DEploidIO;
- friend class TxtReader;
- friend class ExcludeMarker;
- friend class Panel;
- friend class IBDrecombProbs;
- friend class VcfReader;
- friend class Rvcf;
-
- private:
- // Members
- size_t nLoci_;
- bool doneGetIndexOfChromStarts_;
- vector chrom_;
- vector < size_t > indexOfChromStarts_;
- vector < vector < int> > position_;
- vector < vector < int> > keptPosition_;
- /* Index of content/info will be kept */
- vector < size_t > indexOfContentToBeKept;
- /* Index of positions entry to be kept,
- * this will have the same size as this->chrom_, */
- vector < vector < size_t > > indexOfPosToBeKept;
-
- // Getter and Setter
- bool doneGetIndexOfChromStarts() const {
- return doneGetIndexOfChromStarts_; }
- void setDoneGetIndexOfChromStarts(const bool setTo) {
- this->doneGetIndexOfChromStarts_ = setTo; }
-
- // Methods
- void init();
- void getIndexOfChromStarts();
- void getIndexOfChromStartsHalf();
- void removePositions();
- void checkSortedPositions(string fileName);
- void findAndKeepMarkers(ExcludeMarker* excludedMarkers);
- virtual void removeMarkers();
- // For removing markers and positions
- void findWhoToBeKept(ExcludeMarker* excludedMarkers);
- void findWhoToBeKeptGivenIndex(const vector & givenIndex);
- void findWhoToBeKeptGivenIndexHalf(const vector & givenIndex);
-
- public:
- VariantIndex();
- virtual ~VariantIndex() {}
-};
-
-
-#endif // DEPLOID_SRC_VARIANTINDEX_HPP_
diff --git a/src/vcf b/src/vcf
new file mode 160000
index 0000000..46093c7
--- /dev/null
+++ b/src/vcf
@@ -0,0 +1 @@
+Subproject commit 46093c7ebb3bfad4b3e930ecef7a65f55f13e3af
diff --git a/src/vcfReader.cpp b/src/vcfReader.cpp
deleted file mode 100755
index c43fbbd..0000000
--- a/src/vcfReader.cpp
+++ /dev/null
@@ -1,481 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-#include // std::min
-#include // assert
-#include // std::cout
-#include "vcfReader.hpp"
-#include "global.hpp"
-
-// using namespace std;
-using std::min;
-
-/*! Initialize vcf file, search for the end of the vcf header.
- * Extract the first block of data ( "buffer_length" lines ) into buff
- */
-VcfReader::VcfReader(string fileName, string sampleName, bool extractPlaf) {
- /*! Initialize by read in the vcf header file */
- this->init(fileName);
- this->sampleName_ = sampleName;
- this->extractPlaf_ = extractPlaf;
- this->sampleColumnIndex_ = 0;
- this->readHeader();
- this->readVariants();
- this->getChromList();
- this->getIndexOfChromStarts();
- assert(this->doneGetIndexOfChromStarts_ == true);
- this->checkSortedPositions(fileName);
-}
-
-
-void VcfReader::checkFileCompressed() {
- FILE *f = NULL;
- f = fopen(this->fileName_.c_str(), "rb");
- if (f == NULL) {
- throw InvalidInputFile(this->fileName_);
- }
-
- unsigned char magic[2];
-
- size_t freadResults = fread(reinterpret_cast(magic), 1, 2, f);
- dout << "Check if vcf is compressed " << freadResults << std::endl;
- this->setIsCompressed((static_cast(magic[0]) == 0x1f) &&
- (static_cast(magic[1]) == 0x8b));
- fclose(f);
-}
-
-void VcfReader::init(string fileName) {
- /*! Initialize other VcfReader class members
- */
- this->fileName_ = fileName;
-
- this->checkFileCompressed();
-
- if ( this->isCompressed() ) {
- this->inFileGz.open(this->fileName_.c_str(), std::ios::in);
- } else {
- this->inFile.open(this->fileName_.c_str(), std::ios::in);
- }
-}
-
-
-void VcfReader::finalize() {
- for (size_t i = 0; i < this->variants.size(); i++) {
- this->refCount.push_back(static_cast(this->variants[i].ref));
- this->altCount.push_back(static_cast(this->variants[i].alt));
- this->vqslod.push_back(this->variants[i].vqslod);
- this->plaf.push_back(this->variants[i].plaf);
- }
-
- if ( this->isCompressed() ) {
- this->inFileGz.close();
- } else {
- this->inFile.close();
- }
-}
-
-
-void VcfReader::readHeader() {
- if (this->isCompressed()) {
- if (!inFileGz.good()) {
- throw InvalidInputFile(this->fileName_);
- }
- } else {
- if (!inFile.good()) {
- throw InvalidInputFile(this->fileName_);
- }
- }
-
- if (this->isCompressed()) {
- getline(inFileGz, this->tmpLine_);
- } else {
- getline(inFile, this->tmpLine_);
- }
-
- while (this->tmpLine_.size() > 0) {
- if (this->tmpLine_[0] == '#') {
- if (this->tmpLine_[1] == '#') {
- this->headerLines.push_back(this->tmpLine_);
- if (this->isCompressed()) {
- getline(inFileGz, this->tmpLine_);
- } else {
- getline(inFile, this->tmpLine_);
- }
- } else {
- this->checkFeilds();
- break; // end of the header
- }
- } else {
- this->checkFeilds();
- }
- }
-
- dout << " There are " << this->headerLines.size()
- << " lines in the header." <tmpLine_.size()) {
- field_end = min(this->tmpLine_.find('\t', feild_start),
- this->tmpLine_.find('\n', feild_start));
- this->tmpStr_ = this->tmpLine_.substr(feild_start,
- field_end-feild_start);
- string correctFieldValue;
- switch ( field_index ) {
- case 0: correctFieldValue = "#CHROM"; break;
- case 1: correctFieldValue = "POS"; break;
- case 2: correctFieldValue = "ID"; break;
- case 3: correctFieldValue = "REF"; break;
- case 4: correctFieldValue = "ALT"; break;
- case 5: correctFieldValue = "QUAL"; break;
- case 6: correctFieldValue = "FILTER"; break;
- case 7: correctFieldValue = "INFO"; break;
- case 8: correctFieldValue = "FORMAT"; break;
- }
-
- if (this->tmpStr_ != correctFieldValue && field_index < 9) {
- throw VcfInvalidHeaderFieldNames(correctFieldValue, this->tmpStr_);
- }
-
- if ((field_index == 9) & (this->sampleName_ == "")) {
- this->sampleName_ = this->tmpStr_;
- }
-
- if (this->tmpStr_ == this->sampleName_) {
- sampleColumnIndex_ = field_index;
- break;
- }
-
- feild_start = field_end+1;
- field_index++;
- } // End of while loop: field_end < line.size()
-
- if (sampleColumnIndex_ == 0) {
- throw InvalidSampleInVcf(this->sampleName_, this->fileName_);
- }
- assert(sampleColumnIndex_ != 0);
- assert(printSampleName());
-}
-
-
-void VcfReader::readVariants() {
- if (this->isCompressed()) {
- getline(inFileGz, this->tmpLine_);
- } else {
- getline(inFile, this->tmpLine_);
- }
- while (inFile.good() && this->tmpLine_.size() > 0) {
- VariantLine newVariant(this->tmpLine_, this->sampleColumnIndex_,
- this->extractPlaf_);
- // check variantLine quality
- this->variants.push_back(newVariant);
- if (this->isCompressed()) {
- getline(inFileGz, this->tmpLine_);
- } else {
- getline(inFile, this->tmpLine_);
- }
- }
-}
-
-
-void VcfReader::getChromList() {
- this->chrom_.clear();
- this->position_.clear();
-
- assert(this->chrom_.size() == (size_t)0);
- assert(this->position_.size() == (size_t)0);
-
- string previousChrom("");
- vector positionOfChrom_;
-
- for (size_t i = 0; i < this->variants.size() ; i++) {
- if (previousChrom != this->variants[i].chromStr &&
- previousChrom.size() > (size_t)0) {
- this->chrom_.push_back(previousChrom);
- this->position_.push_back(positionOfChrom_);
- positionOfChrom_.clear();
- }
- positionOfChrom_.push_back(
- stoi(this->variants[i].posStr.c_str(), NULL));
- previousChrom = this->variants[i].chromStr;
- }
-
- this->chrom_.push_back(previousChrom);
- this->position_.push_back(positionOfChrom_);
- assert(this->position_.size() == this->chrom_.size());
-}
-
-
-void VcfReader::removeMarkers() {
- assert(this->keptVariants.size() == (size_t)0);
- for (auto const &value : this->indexOfContentToBeKept) {
- this->keptVariants.push_back(this->variants[value]);
- }
- this->variants.clear();
- this->variants = this->keptVariants;
- this->keptVariants.clear();
- this->nLoci_ = this->variants.size();
- dout << " Vcf number of loci kept = " << this->nLoci_ << std::endl;
-}
-
-
-void VcfReader::findLegitSnpsGivenVQSLOD(double vqslodThreshold) {
- this->legitVqslodAt.clear();
- assert(legitVqslodAt.size() == 0);
- for (size_t i = 0; i < this->vqslod.size(); i++) {
- if (this->vqslod[i] > vqslodThreshold) {
- this->legitVqslodAt.push_back(i);
- }
- }
-}
-
-
-void VcfReader::findLegitSnpsGivenVQSLODHalf(double vqslodThreshold) {
- this->legitVqslodAt.clear();
- assert(legitVqslodAt.size() == 0);
-
- for (size_t chromI = 0;
- chromI < this->indexOfChromStarts_.size(); chromI++) {
- size_t start = this->indexOfChromStarts_[chromI];
- size_t length = this->position_[chromI].size();
- // if (chromI%2 == 0) {
- if (chromI > 10) {
- for ( size_t ii = start ; ii < (start+length); ii++ ) {
- if (this->vqslod[ii] > vqslodThreshold) {
- // std::cout << ii <legitVqslodAt.push_back(ii);
- }
- }
- }
- }
-}
-
-
-VariantLine::VariantLine(string tmpLine, size_t sampleColumnIndex,
- bool extractPlaf) {
- this->init(tmpLine, sampleColumnIndex, extractPlaf);
-
- while (fieldEnd_ < this->tmpLine_.size()) {
- fieldEnd_ = min(this->tmpLine_.find('\t', feildStart_),
- this->tmpLine_.find('\n', feildStart_));
- this->tmpStr_ = this->tmpLine_.substr(feildStart_,
- fieldEnd_-feildStart_);
- switch (fieldIndex_) {
- case 0: this->extract_field_CHROM(); break;
- case 1: this->extract_field_POS(); break;
- case 2: this->extract_field_ID(); break;
- case 3: this->extract_field_REF() ; break;
- case 4: this->extract_field_ALT() ; break;
- case 5: this->extract_field_QUAL() ; break;
- case 6: this->extract_field_FILTER(); break;
- case 7: this->extract_field_INFO(); break;
- case 8: this->extract_field_FORMAT(); break;
- }
-
- if (fieldIndex_ == this->sampleColumnIndex_) {
- this->extract_field_VARIANT();
- break;
- }
- feildStart_ = fieldEnd_+1;
- fieldIndex_++;
- }
-}
-
-
-void VariantLine::init(string tmpLine, size_t sampleColumnIndex,
- bool extractPlaf) {
- this->tmpLine_ = tmpLine;
- this->feildStart_ = 0;
- this->fieldEnd_ = 0;
- this->fieldIndex_ = 0;
- this->adFieldIndex_ = -1;
- this->sampleColumnIndex_ = sampleColumnIndex;
- this->extractPlaf_ = extractPlaf;
-}
-
-
-void VariantLine::extract_field_CHROM() {
- this->chromStr = this->tmpStr_;
-}
-
-
-void VariantLine::extract_field_POS() {
- this->posStr = this->tmpStr_;
-}
-
-
-void VariantLine::extract_field_ID() {
- this->idStr = this->tmpStr_;
-}
-
-
-void VariantLine::extract_field_REF() {
- this->refStr = this->tmpStr_;
-}
-
-
-void VariantLine::extract_field_ALT() {
- this->altStr = this->tmpStr_;
-}
-
-
-void VariantLine::extract_field_QUAL() { // Check for PASS
- this->qualStr = this->tmpStr_;
-}
-
-
-void VariantLine::extract_field_FILTER() {
- this->filterStr = this->tmpStr_;
-}
-
-void VariantLine::extract_field_INFO() {
- this->infoStr = this->tmpStr_;
- bool vqslodNotFound = true;
- size_t feild_start = 0;
- size_t field_end = 0;
- int field_index = 0;
-
- while (field_end < this->tmpStr_.size()) {
- field_end = min(this->tmpStr_.find(';', feild_start),
- this->tmpStr_.find('\t', feild_start));
- string filterFiledStr = this->tmpStr_.substr(feild_start,
- field_end-feild_start);
- size_t eqIndex = filterFiledStr.find('=', 0);
- string filterFiledName = filterFiledStr.substr(0, eqIndex);
- if ( "VQSLOD" == filterFiledName ) {
- vqslodNotFound = false;
- vqslod = stod(filterFiledStr.substr(eqIndex+1,
- filterFiledStr.size()));
- }
-
- if ( ("AF" == filterFiledName) & (this->extractPlaf_) ) {
- plaf = stod(filterFiledStr.substr(eqIndex+1,
- filterFiledStr.size()));
- }
-
- feild_start = field_end+1;
- field_index++;
- }
-
- if (vqslodNotFound) {
- throw VcfVQSLODNotFound(this->tmpStr_);
- }
-
- assert(vqslodNotFound == false);
-}
-
-
-void VariantLine::extract_field_FORMAT() {
- this->formatStr = this->tmpStr_;
- size_t feild_start = 0;
- size_t field_end = 0;
- size_t field_index = 0;
-
- while (field_end < this->formatStr.size()) {
- field_end = min(this->formatStr.find(':', feild_start),
- this->formatStr.find('\n', feild_start));
- if ( "AD" == this->formatStr.substr(feild_start,
- field_end-feild_start)) {
- adFieldIndex_ = field_index;
- break;
- }
- feild_start = field_end+1;
- field_index++;
- }
- if (adFieldIndex_ == -1) {
- throw VcfCoverageFieldNotFound(this->tmpStr_);
- }
- assert(adFieldIndex_ > -1);
-}
-
-
-int maybe_dot_to_integer(const string& s) {
- if (s == ".")
- return 0;
- else
- return stoi(s);
-}
-
-int n_fields(const string& s, char delim = ',') {
- int count = 0;
- for (auto ch : s)
- if (ch == delim)
- count++;
- return count+1;
-}
-
-void VariantLine::extract_field_VARIANT() {
- size_t feild_start = 0;
- size_t field_end = 0;
- int field_index = 0;
-
- while (field_end < this->tmpStr_.size()) {
- field_end = min(this->tmpStr_.find(':', feild_start),
- this->tmpStr_.find('\n', feild_start));
- if (field_index == adFieldIndex_) {
- string adStr = this->tmpStr_.substr(feild_start,
- field_end-feild_start);
- try {
- int n = n_fields(adStr);
- if (n != 2)
- throw std::runtime_error(
- "there should be exactly 2 AD entries, but found " +
- std::to_string(n) +
- ".\n Wrong number of ALT alleles!.");
-
- size_t commaIndex = adStr.find(',', 0);
- auto ref_AD = adStr.substr(0, commaIndex);
- auto alt_AD = adStr.substr(commaIndex+1, adStr.size());
-
- ref = maybe_dot_to_integer(ref_AD);
- alt = maybe_dot_to_integer(alt_AD);
- break;
- }
- catch (std::exception& e) {
- std::cerr << "Error parsing vcf AD field: '"
- << adStr << "': " << e.what() << "\n";
- exit(1);
- }
- }
- feild_start = field_end+1;
- field_index++;
- }
-}
-
-/*
-void VcfReader::findLegitSnpsGivenVQSLODandWsfGt0(double vqslodThreshold) {
- assert(legitVqslodAt.size() == 0);
- for (size_t i = 0; i < this->vqslod.size(); i++) {
- if (this->vqslod[i] > vqslodThreshold & this->altCount[i] > 0) {
- this->legitVqslodAt.push_back(i);
- }
- }
-}
-*/
diff --git a/src/vcfReader.hpp b/src/vcfReader.hpp
deleted file mode 100755
index ad1208a..0000000
--- a/src/vcfReader.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-#include /* strtol, strtod */
-#include /* string */
-#include /* vector */
-#include
-#include "exceptions.hpp"
-#include "variantIndex.hpp"
-#include "gzstream/gzstream.h"
-
-#ifndef DEPLOID_SRC_VCFREADER_HPP_
-#define DEPLOID_SRC_VCFREADER_HPP_
-
-// using namespace std;
-// using std::endl;
-
-struct InvalidVcf : public InvalidInput{
- explicit InvalidVcf(string str):InvalidInput(str) {
- }
- virtual ~InvalidVcf() throw() {}
- // virtual const char* what () const noexcept {
- // return throwMsg.c_str();
- // }
-};
-
-
-struct VcfInvalidHeaderFieldNames : public InvalidVcf{
- explicit VcfInvalidHeaderFieldNames(string str1, string str2):
- InvalidVcf(str1) {
- this->reason = " VCF field header expects: ";
- throwMsg = this->reason + this->src + ", " + str2 + " was found!";
- }
- ~VcfInvalidHeaderFieldNames() throw() {}
-};
-
-
-struct VcfInvalidVariantEntry : public InvalidVcf{
- explicit VcfInvalidVariantEntry(string str):InvalidVcf(str) {}
- virtual ~VcfInvalidVariantEntry() throw() {}
- // virtual const char* what () const noexcept {
- // return throwMsg.c_str();
- // }
-};
-
-
-struct VcfCoverageFieldNotFound : public VcfInvalidVariantEntry{
- explicit VcfCoverageFieldNotFound(string str):VcfInvalidVariantEntry(str) {
- this->reason = "Coverage field AD was not found in the FORMAT, found: ";
- throwMsg = this->reason + this->src;
- }
- ~VcfCoverageFieldNotFound() throw() {}
-};
-
-
-struct VcfVQSLODNotFound : public VcfInvalidVariantEntry{
- explicit VcfVQSLODNotFound(string str):VcfInvalidVariantEntry(str) {
- this->reason = "VQSLOD was note found, check: ";
- throwMsg = this->reason + this->src;
- }
- ~VcfVQSLODNotFound() throw() {}
-};
-
-
-class VariantLine{
- friend class VcfReader;
- friend class DEploidIO;
- public:
- explicit VariantLine(string tmpLine, size_t sampleColumnIndex,
- bool extractPlaf = false);
- ~VariantLine() {}
-
- private:
- string tmpLine_;
- string tmpStr_;
-
- void init(string tmpLine, size_t sampleColumnIndex, bool extractPlaf);
-
- void extract_field_CHROM();
- void extract_field_POS();
- void extract_field_ID();
- void extract_field_REF();
- void extract_field_ALT();
- void extract_field_QUAL();
- void extract_field_FILTER();
- void extract_field_INFO();
- void extract_field_FORMAT();
- void extract_field_VARIANT();
-
- size_t feildStart_;
- size_t fieldEnd_;
- size_t fieldIndex_;
-
- string chromStr;
- string posStr;
- string idStr;
- string refStr;
- string altStr;
- string qualStr;
- string filterStr;
- string infoStr;
- string formatStr;
- int adFieldIndex_;
-
- int ref;
- int alt;
- double vqslod;
- double plaf;
- size_t sampleColumnIndex_;
- bool extractPlaf_;
-};
-
-
-
-/*! \brief VCF file reader @ingroup group_data */
-class VcfReader : public VariantIndex {
-#ifdef UNITTEST
- friend class TestVCF;
-#endif
- friend class DEploidIO;
- public:
- // Constructors and Destructors
- explicit VcfReader(string fileName, string sampleName,
- bool extractPlaf = false);
- // parse in exclude sites
- ~VcfReader() {}
-
- // Members and Methods
- vector headerLines; // calling from python, need to be public
- vector refCount; // calling from python, need to be public
- vector altCount; // calling from python, need to be public
- vector vqslod; // calling from python, need to be public
- vector plaf;
- void finalize(); // calling from python, need to be public
-
- private:
- vector variants;
- vector keptVariants;
- vector legitVqslodAt;
- string fileName_;
- ifstream inFile;
- igzstream inFileGz;
- bool isCompressed_;
- bool isCompressed() const { return this->isCompressed_; }
- void setIsCompressed(const bool compressed) {
- this->isCompressed_ = compressed; }
- void checkFileCompressed();
- string sampleName_;
- size_t sampleColumnIndex_;
- string tmpLine_;
- string tmpStr_;
- bool extractPlaf_;
-
- // Methods
- void init(string fileName);
- void readVariants();
- void readHeader();
- void checkFeilds();
- void findLegitSnpsGivenVQSLOD(double vqslodThreshold);
- void findLegitSnpsGivenVQSLODHalf(double vqslodThreshold);
-
- void getChromList();
- void removeMarkers();
-
- // Debug tools
- bool printSampleName();
-};
-
-#endif // DEPLOID_SRC_VCFREADER_HPP_
diff --git a/tests/unittest/test_txtReader.cpp b/tests/unittest/test_txtReader.cpp
deleted file mode 100755
index 47b1b83..0000000
--- a/tests/unittest/test_txtReader.cpp
+++ /dev/null
@@ -1,317 +0,0 @@
-#include
-#include
-#include "src/txtReader.hpp"
-
-class TestTxtReader : public CppUnit::TestCase {
- CPPUNIT_TEST_SUITE( TestTxtReader );
- CPPUNIT_TEST( testMainConstructor );
- CPPUNIT_TEST( checkSizeBefore );
- CPPUNIT_TEST( checkInfo );
- CPPUNIT_TEST(compressed_checkInfo);
- CPPUNIT_TEST( checkRemoveMarkers );
- CPPUNIT_TEST( checkSizeAfter );
- CPPUNIT_TEST( checkSortedPositions );
- CPPUNIT_TEST( checkBadConversion );
- CPPUNIT_TEST( checkBadScientificNotation );
- CPPUNIT_TEST_SUITE_END();
-
- private:
- TxtReader * txtReader_;
- TxtReader * afterExclude_;
- ExcludeMarker* excludedMarkers_;
-
- public:
- void setUp() {
- this->txtReader_ = new TxtReader();
- this->afterExclude_ = new TxtReader();
- this->excludedMarkers_ = new ExcludeMarker();
- this->txtReader_->readFromFile("data/testData/txtReaderForTesting.txt" );
- this->afterExclude_->readFromFile("data/testData/txtReaderForTestingAfterExclude.txt");
- this->excludedMarkers_->readFromFile("data/testData/txtReaderForTestingToBeExclude.txt" );
- }
-
- void tearDown() {
- delete txtReader_;
- delete afterExclude_;
- delete excludedMarkers_;
- }
-
- void testMainConstructor(){ }
-
- void checkBadConversion(){
- TxtReader tmp;
- CPPUNIT_ASSERT_THROW ( tmp.readFromFile("data/testData/bad.plaf.txt"), BadConversion );
- }
-
- void checkBadScientificNotation(){
- TxtReader tmp2;
- CPPUNIT_ASSERT_THROW ( tmp2.readFromFile("data/testData/bad.plaf_scientific.txt"), BadScientificNotation );
-
- TxtReader tmp3;
- CPPUNIT_ASSERT_THROW ( tmp3.readFromFile("data/testData/bad.plaf_scientificE.txt"), BadScientificNotation );
- }
-
- void checkSortedPositions() {
- TxtReader tmp;
- CPPUNIT_ASSERT_THROW ( tmp.readFromFile("data/testData/bad.plaf_badpos.txt"), PositionUnsorted );
- }
-
- void checkSizeBefore(){
- CPPUNIT_ASSERT_EQUAL ( (size_t)100, this->txtReader_->info_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)100, this->txtReader_->nLoci_ );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->chrom_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->position_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->txtReader_->position_[0].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)16, this->txtReader_->position_[1].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->txtReader_->position_[2].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)24, this->txtReader_->position_[3].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->txtReader_->position_[4].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)11, this->txtReader_->position_[5].size() );
-
- CPPUNIT_ASSERT_EQUAL ( (size_t)0, this->excludedMarkers_->info_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->chrom_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->position_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)4, this->excludedMarkers_->position_[0].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)1, this->excludedMarkers_->position_[1].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)2, this->excludedMarkers_->position_[2].size() );
-
- CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->info_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->nLoci_ );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->chrom_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->position_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->afterExclude_->position_[0].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->afterExclude_->position_[1].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->afterExclude_->position_[2].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)23, this->afterExclude_->position_[3].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->afterExclude_->position_[4].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)9, this->afterExclude_->position_[5].size() );
- }
-
- void checkInfo(){
- //Pf3D7_01_v3 93157 0
- CPPUNIT_ASSERT_EQUAL ( (int)93157, this->txtReader_->position_[0][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[0] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[0][0] );
-
- //Pf3D7_01_v3 95518 46
- CPPUNIT_ASSERT_EQUAL ( (int)95518, this->txtReader_->position_[0][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->info_[6-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->content_[6-2][0] );
-
- //Pf3D7_02_v3 100608 35
- CPPUNIT_ASSERT_EQUAL ( (int)100608, this->txtReader_->position_[1][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)35, this->txtReader_->info_[14-2] ); // it is at line 14 in the file, minus 1 for the header, minus another 1 for 0-indexing
- CPPUNIT_ASSERT_EQUAL ( (double)35, this->txtReader_->content_[14-2][0] );
-
- //Pf3D7_02_v3 107823 0
- CPPUNIT_ASSERT_EQUAL ( (int)107823, this->txtReader_->position_[1][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[18-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[18-2][0] );
-
- //Pf3D7_02_v3 111040 20
- CPPUNIT_ASSERT_EQUAL ( (int)111040, this->txtReader_->position_[1][5] );
- CPPUNIT_ASSERT_EQUAL ( (double)20, this->txtReader_->info_[19-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)20, this->txtReader_->content_[19-2][0] );
-
- //Pf3D7_02_v3 114473 49
- CPPUNIT_ASSERT_EQUAL ( (int)114473, this->txtReader_->position_[1][9] );
- CPPUNIT_ASSERT_EQUAL ( (double)49, this->txtReader_->info_[23-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)49, this->txtReader_->content_[23-2][0] );
-
- //Pf3D7_04_v3 144877 47
- CPPUNIT_ASSERT_EQUAL ( (int)144877, this->txtReader_->position_[3][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)47, this->txtReader_->info_[47-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)47, this->txtReader_->content_[47-2][0] );
-
- //Pf3D7_06_v3 180170 63
- CPPUNIT_ASSERT_EQUAL ( (int)180170, this->txtReader_->position_[5][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)63, this->txtReader_->info_[91-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)63, this->txtReader_->content_[92-2][0] );
-
- //Pf3D7_06_v3 180183 60
- CPPUNIT_ASSERT_EQUAL ( (int)180183, this->txtReader_->position_[5][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)60, this->txtReader_->info_[95-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)60, this->txtReader_->content_[95-2][0] );
- }
-
-
- void compressed_checkInfo(){
- TxtReader gztxtReader;
- gztxtReader.readFromFile("data/testData/txtReaderForTesting.txt.gz" );
- //Pf3D7_01_v3 93157 0
- CPPUNIT_ASSERT_EQUAL ( (int)93157, gztxtReader.position_[0][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.info_[0] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.content_[0][0] );
-
- //Pf3D7_01_v3 95518 46
- CPPUNIT_ASSERT_EQUAL ( (int)95518, gztxtReader.position_[0][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)46, gztxtReader.info_[6-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)46, gztxtReader.content_[6-2][0] );
-
- //Pf3D7_02_v3 100608 35
- CPPUNIT_ASSERT_EQUAL ( (int)100608, gztxtReader.position_[1][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)35, gztxtReader.info_[14-2] ); // it is at line 14 in the file, minus 1 for the header, minus another 1 for 0-indexing
- CPPUNIT_ASSERT_EQUAL ( (double)35, gztxtReader.content_[14-2][0] );
-
- //Pf3D7_02_v3 107823 0
- CPPUNIT_ASSERT_EQUAL ( (int)107823, gztxtReader.position_[1][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.info_[18-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, gztxtReader.content_[18-2][0] );
-
- //Pf3D7_02_v3 111040 20
- CPPUNIT_ASSERT_EQUAL ( (int)111040, gztxtReader.position_[1][5] );
- CPPUNIT_ASSERT_EQUAL ( (double)20, gztxtReader.info_[19-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)20, gztxtReader.content_[19-2][0] );
-
- //Pf3D7_02_v3 114473 49
- CPPUNIT_ASSERT_EQUAL ( (int)114473, gztxtReader.position_[1][9] );
- CPPUNIT_ASSERT_EQUAL ( (double)49, gztxtReader.info_[23-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)49, gztxtReader.content_[23-2][0] );
-
- //Pf3D7_04_v3 144877 47
- CPPUNIT_ASSERT_EQUAL ( (int)144877, gztxtReader.position_[3][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)47, gztxtReader.info_[47-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)47, gztxtReader.content_[47-2][0] );
-
- //Pf3D7_06_v3 180170 63
- CPPUNIT_ASSERT_EQUAL ( (int)180170, gztxtReader.position_[5][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)63, gztxtReader.info_[91-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)63, gztxtReader.content_[92-2][0] );
-
- //Pf3D7_06_v3 180183 60
- CPPUNIT_ASSERT_EQUAL ( (int)180183, gztxtReader.position_[5][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)60, gztxtReader.info_[95-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)60, gztxtReader.content_[95-2][0] );
- }
-
-
- void checkRemoveMarkers(){
- CPPUNIT_ASSERT_NO_THROW ( this->txtReader_->findAndKeepMarkers (excludedMarkers_) );
- CPPUNIT_ASSERT_NO_THROW ( this->afterExclude_->findAndKeepMarkers (excludedMarkers_) );
- }
-
- void checkSizeAfter(){
- this->txtReader_->findAndKeepMarkers (excludedMarkers_);
- CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->txtReader_->info_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->txtReader_->nLoci_ );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->chrom_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->txtReader_->position_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->txtReader_->position_[0].size() );
- //Pf3D7_02_v3 100608
- //Pf3D7_02_v3 107823
- //Pf3D7_02_v3 111040
- //Pf3D7_02_v3 114473
- CPPUNIT_ASSERT_EQUAL ( (size_t)(16-4), this->txtReader_->position_[1].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->txtReader_->position_[2].size() );
- //Pf3D7_04_v3 144877
- CPPUNIT_ASSERT_EQUAL ( (size_t)(24-1), this->txtReader_->position_[3].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->txtReader_->position_[4].size() );
- //Pf3D7_06_v3 180170
- //Pf3D7_06_v3 180183
- CPPUNIT_ASSERT_EQUAL ( (size_t)(11-2), this->txtReader_->position_[5].size() );
-
- CPPUNIT_ASSERT_EQUAL ( (size_t)0, this->excludedMarkers_->info_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->chrom_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)3, this->excludedMarkers_->position_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)4, this->excludedMarkers_->position_[0].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)1, this->excludedMarkers_->position_[1].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)2, this->excludedMarkers_->position_[2].size() );
-
- //Pf3D7_01_v3 93157 0
- CPPUNIT_ASSERT_EQUAL ( (int)93157, this->txtReader_->position_[0][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[0] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[0][0] );
-
- //Pf3D7_01_v3 95518 46
- CPPUNIT_ASSERT_EQUAL ( (int)95518, this->txtReader_->position_[0][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->info_[6-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)46, this->txtReader_->content_[6-2][0] );
-
- //Pf3D7_02_v3 101269 33
- CPPUNIT_ASSERT_EQUAL ( (int)101269, this->txtReader_->position_[1][0] );
- CPPUNIT_ASSERT_EQUAL ( (double)33, this->txtReader_->info_[14-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)33, this->txtReader_->content_[14-2][0] );
-
- //Pf3D7_02_v3 112038 28
- CPPUNIT_ASSERT_EQUAL ( (int)112038, this->txtReader_->position_[1][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)28, this->txtReader_->info_[18-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)28, this->txtReader_->content_[18-2][0] );
-
- //Pf3D7_02_v3 113396 0
- CPPUNIT_ASSERT_EQUAL ( (int)113396, this->txtReader_->position_[1][5] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[19-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[19-2][0] );
-
- //Pf3D7_02_v3 121693 0
- CPPUNIT_ASSERT_EQUAL ( (int)121693, this->txtReader_->position_[1][9] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[23-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[23-2][0] );
-
- //Pf3D7_04_v3 149598 0
- CPPUNIT_ASSERT_EQUAL ( (int)149598, this->txtReader_->position_[3][4] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[47-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[47-2][0] );
-
- //Pf3D7_06_v3 180217 0
- CPPUNIT_ASSERT_EQUAL ( (int)180217, this->txtReader_->position_[5][5] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[91-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[91-2][0] );
-
- //Pf3D7_06_v3 180270 0
- CPPUNIT_ASSERT_EQUAL ( (int)180270, this->txtReader_->position_[5][8] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->info_[94-2] );
- CPPUNIT_ASSERT_EQUAL ( (double)0, this->txtReader_->content_[94-2][0] );
-
- this->afterExclude_->findAndKeepMarkers (excludedMarkers_);
- CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->info_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)93, this->afterExclude_->nLoci_ );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->chrom_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)6, this->afterExclude_->position_.size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)12, this->afterExclude_->position_[0].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)(16-4), this->afterExclude_->position_[1].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)17, this->afterExclude_->position_[2].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)(24-1), this->afterExclude_->position_[3].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)20, this->afterExclude_->position_[4].size() );
- CPPUNIT_ASSERT_EQUAL ( (size_t)(11-2), this->afterExclude_->position_[5].size() );
-
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->info_.size(), this->afterExclude_->info_.size() );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->content_.size(), this->afterExclude_->content_.size() );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptContent_.size(), this->afterExclude_->keptContent_.size() );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptContent_.size(), (size_t)0 );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->nInfoLines_, this->afterExclude_->nInfoLines_ );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->nInfoLines_, (size_t)1);
-
- for ( size_t i = 0; i < 93; i++) {
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->info_[i], this->afterExclude_->info_[i] );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->content_[i][0], this->afterExclude_->content_[i][0] );
- }
-
-
- for ( size_t i = 0; i < 93; i++) {
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->info_[i], this->afterExclude_->info_[i] );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->content_[i][0], this->afterExclude_->content_[i][0] );
- }
-
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->chrom_.size(), this->afterExclude_->chrom_.size() );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->chrom_.size(), (size_t)6 );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->indexOfChromStarts_.size(), this->afterExclude_->indexOfChromStarts_.size() );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->indexOfChromStarts_.size(), (size_t)6 );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_.size(), this->afterExclude_->position_.size() );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_.size(), (size_t)6 );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptPosition_.size(), this->afterExclude_->keptPosition_.size() );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->keptPosition_.size(), (size_t)0 );
-
- for ( size_t i = 0; i < 6; i++) {
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->chrom_[i], this->afterExclude_->chrom_[i] );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->indexOfChromStarts_[i], this->afterExclude_->indexOfChromStarts_[i] );
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_[i].size(), this->afterExclude_->position_[i].size() );
- for ( size_t j = 0; j < this->txtReader_->position_[i].size(); j++) {
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->position_[i][j], this->afterExclude_->position_[i][j] );
-
- }
- }
- CPPUNIT_ASSERT_EQUAL (this->txtReader_->nLoci_, this->afterExclude_->nLoci_ );
- }
-};
-
-CPPUNIT_TEST_SUITE_REGISTRATION( TestTxtReader );
diff --git a/tests/unittest/test_vcfReader.cpp b/tests/unittest/test_vcfReader.cpp
deleted file mode 100755
index 87d198d..0000000
--- a/tests/unittest/test_vcfReader.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * dEploid is used for deconvoluting Plasmodium falciparum genome from
- * mix-infected patient sample.
- *
- * Copyright (C) 2016-2017 University of Oxford
- *
- * Author: Sha (Joe) Zhu
- *
- * This file is part of dEploid.
- *
- * dEploid is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- *
- */
-
-#include
-#include
-#include "src/vcfReader.hpp"
-
-class TestVCF : public CppUnit::TestCase {
- CPPUNIT_TEST_SUITE(TestVCF);
- CPPUNIT_TEST(testMainConstructor);
- CPPUNIT_TEST(testInvalidSampleInVcf);
- CPPUNIT_TEST_SUITE_END();
-
- private:
- VcfReader* vcf_;
- VcfReader* vcfGz_;
- double eps;
-
- public:
- void setUp() {
- this->vcf_ = new VcfReader("data/testData/PG0390-C.test.vcf",
- "PG0390-C");
- this->vcfGz_ = new VcfReader("data/testData/PG0390-C.test.vcf.gz",
- "PG0390-C");
- this->eps = 0.00000000001;
- }
-
- void tearDown() {
- delete this->vcf_;
- delete this->vcfGz_;
- }
-
- void testMainConstructor() {
- CPPUNIT_ASSERT_NO_THROW(this->vcf_->finalize());
- CPPUNIT_ASSERT_DOUBLES_EQUAL(8.08, this->vcf_->vqslod[0], this->eps);
- CPPUNIT_ASSERT_DOUBLES_EQUAL(0.617, this->vcf_->vqslod[1], this->eps);
- CPPUNIT_ASSERT_EQUAL(this->vcf_->vqslod.size(),
- this->vcf_->refCount.size());
- }
-
- void testInvalidSampleInVcf() {
- CPPUNIT_ASSERT_THROW(VcfReader("data/testData/PG0390-C.test.vcf.gz",
- "PG0370-C"), InvalidSampleInVcf);
- }
-};
-
-CPPUNIT_TEST_SUITE_REGISTRATION(TestVCF);