Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/pachterlab/kallisto
Browse files Browse the repository at this point in the history
  • Loading branch information
pmelsted committed Oct 29, 2015
2 parents 7374cae + bb78504 commit f0678a2
Show file tree
Hide file tree
Showing 15 changed files with 946 additions and 423 deletions.
4 changes: 3 additions & 1 deletion src/EMAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ struct EMAlgorithm {
ecmap_(index.ecmap),
counts_(counts),
target_names_(index.target_names_),
post_bias_(4096,1.0),
alpha_(num_trans_, 1.0/num_trans_), // uniform distribution over targets
rho_(num_trans_, 0.0),
rho_set_(false),
Expand Down Expand Up @@ -72,7 +73,7 @@ struct EMAlgorithm {
int i;
for (i = 0; i < n_iter; ++i) {
if (recomputeEffLen && (i == min_rounds || i == min_rounds + 500)) {
eff_lens_ = update_eff_lens(all_fl_means, tc_, index_, alpha_, eff_lens_);
eff_lens_ = update_eff_lens(all_fl_means, tc_, index_, alpha_, eff_lens_, post_bias_);
weight_map_ = calc_weights (tc_.counts, ecmap_, eff_lens_);
}

Expand Down Expand Up @@ -289,6 +290,7 @@ struct EMAlgorithm {
const std::vector<std::string>& target_names_;
const std::vector<double>& all_fl_means;
std::vector<double> eff_lens_;
std::vector<double> post_bias_;
WeightMap weight_map_;
std::vector<double> alpha_;
std::vector<double> alpha_before_zeroes_;
Expand Down
18 changes: 12 additions & 6 deletions src/H5Writer.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#include "H5Writer.h"

void H5Writer::init(const std::string& fname, int num_bootstrap, int num_processed, uint compression,
size_t index_version, const std::string& shell_call,
const std::string& start_time)
void H5Writer::init(const std::string& fname, int num_bootstrap, int num_processed,
const std::vector<int>& fld,const std::vector<int>& preBias, const std::vector<double>& postBias,
uint compression, size_t index_version,
const std::string& shell_call, const std::string& start_time)
{
primed_ = true;
num_bootstrap_ = num_bootstrap;
Expand All @@ -19,7 +20,12 @@ void H5Writer::init(const std::string& fname, int num_bootstrap, int num_process
std::vector<int> n_proc {num_processed};
vector_to_h5(n_proc, aux_, "num_processed", false, compression_);


vector_to_h5(fld, aux_, "fld", false, compression_);

vector_to_h5(preBias, aux_, "bias_observed", false, compression_);

vector_to_h5(postBias, aux_, "bias_normalized", false, compression_);

// info about run
std::vector<std::string> kal_version{ KALLISTO_VERSION };
vector_to_h5(kal_version, aux_, "kallisto_version", true, compression_);
Expand Down Expand Up @@ -73,7 +79,7 @@ H5Converter::H5Converter(const std::string& h5_fname, const std::string& out_dir
// <aux info>
// read target ids
read_dataset(aux_, "ids", targ_ids_);
std::cerr << "[h5dump] number of targets: " << targ_ids_.size() <<
std::cerr << "[h5dump] number of targets: " << targ_ids_.size() <<
std::endl;

n_targs_ = targ_ids_.size();
Expand All @@ -94,7 +100,7 @@ H5Converter::H5Converter(const std::string& h5_fname, const std::string& out_dir
read_dataset(aux_, "num_processed", n_proc_vec);
n_proc_ = n_proc_vec[0];


std::cerr << "[h5dump] number of bootstraps: " << n_bs_ << std::endl;
// </aux info>
if (n_bs_ > 0) {
Expand Down
6 changes: 3 additions & 3 deletions src/H5Writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ class H5Writer {
H5Writer() : primed_(false) {}
~H5Writer();

void init(const std::string& fname, int num_bootstrap, int num_processed, uint compression,
size_t index_version, const std::string& shell_call,
const std::string& start_time);
void init(const std::string& fname, int num_bootstrap, int num_processed,
const std::vector<int>& fld, const std::vector<int>& preBias, const std::vector<double>& postBias, uint compression, size_t index_version,
const std::string& shell_call, const std::string& start_time);

void write_main(const EMAlgorithm& em,
const std::vector<std::string>& targ_ids,
Expand Down
73 changes: 56 additions & 17 deletions src/MinCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,11 @@ void MinCollector::init_mean_fl_trunc(double mean, double sd) {
has_mean_fl_trunc = true;
}

int MinCollector::collect(std::vector<std::pair<KmerEntry,int>>& v1,
std::vector<std::pair<KmerEntry,int>>& v2, bool nonpaired) {

/*if (v1.empty()) {
return -1;
} else if (!nonpaired && v2.empty()) {
return -1;
}
*/

int MinCollector::intersectKmers(std::vector<std::pair<KmerEntry,int>>& v1,
std::vector<std::pair<KmerEntry,int>>& v2, bool nonpaired, std::vector<int> &u) const {
std::vector<int> u1 = intersectECs(v1);
std::vector<int> u2 = intersectECs(v2);

std::vector<int> u;

if (u1.empty() && u2.empty()) {
return -1;
}
Expand All @@ -72,13 +62,55 @@ int MinCollector::collect(std::vector<std::pair<KmerEntry,int>>& v1,
if (u.empty()) {
return -1;
}
return increaseCount(u);
return 1;
}

int MinCollector::collect(std::vector<std::pair<KmerEntry,int>>& v1,
std::vector<std::pair<KmerEntry,int>>& v2, bool nonpaired) {
std::vector<int> u;
int r = intersectKmers(v1, v2, nonpaired, u);
if (r != -1) {
return increaseCount(u);
} else {
return -1;
}
}

int MinCollector::findEC(const std::vector<int>& u) const {
if (u.empty()) {
return -1;
}
if (u.size() == 1) {
return u[0];
}
auto search = index.ecmapinv.find(u);
if (search != index.ecmapinv.end()) {
return search ->second;
} else {
return -1;
}
}

int MinCollector::increaseCount(const std::vector<int>& u) {
int ec = findEC(u);

if (u.empty()) {
return -1;
} else {
if (ec != -1) {
++counts[ec];
return ec;
} else {
auto necs = counts.size();
//index.ecmap.insert({necs,u});
index.ecmap.push_back(u);
index.ecmapinv.insert({u,necs});
counts.push_back(1);
return necs;
}
}

/* -- removable
if (u.size() == 1) {
int ec = u[0];
++counts[ec];
Expand All @@ -98,6 +130,7 @@ int MinCollector::increaseCount(const std::vector<int>& u) {
counts.push_back(1);
return necs;
}
*/
}

int MinCollector::decreaseCount(const int ec) {
Expand Down Expand Up @@ -234,7 +267,9 @@ double MinCollector::get_mean_frag_len() const {
return mean_fl;
}

void MinCollector::get_mean_frag_lens_trunc() const {


void MinCollector::compute_mean_frag_lens_trunc() {

std::vector<int> counts(MAX_FRAG_LEN, 0);
std::vector<double> mass(MAX_FRAG_LEN, 0.0);
Expand All @@ -246,12 +281,12 @@ void MinCollector::get_mean_frag_lens_trunc() const {
mass[i] = static_cast<double>( flens[i] * i) + mass[i-1];
counts[i] = flens[i] + counts[i-1];
if (counts[i] > 0) {
const_cast<double&>(mean_fl_trunc[i]) = mass[i] / static_cast<double>(counts[i]);
mean_fl_trunc[i] = mass[i] / static_cast<double>(counts[i]);
}
// std::cerr << "--- " << i << '\t' << mean_fl_trunc[i] << std::endl;
}

const_cast<bool&>(has_mean_fl_trunc) = true;
has_mean_fl_trunc = true;

std::cerr << "[quant] estimated average fragment length: " <<
mean_fl_trunc[MAX_FRAG_LEN - 1] << std::endl;
Expand Down Expand Up @@ -285,6 +320,10 @@ int hexamerToInt(const char *s, bool revcomp) {
}

bool MinCollector::countBias(const char *s1, const char *s2, const std::vector<std::pair<KmerEntry,int>> v1, const std::vector<std::pair<KmerEntry,int>> v2, bool paired) {
return countBias(s1,s2,v1,v2,paired,bias5);
}

bool MinCollector::countBias(const char *s1, const char *s2, const std::vector<std::pair<KmerEntry,int>> v1, const std::vector<std::pair<KmerEntry,int>> v2, bool paired, std::vector<int>& biasOut) const {

const int pre = 2, post = 4;

Expand Down Expand Up @@ -356,7 +395,7 @@ bool MinCollector::countBias(const char *s1, const char *s2, const std::vector<s
*/

if (hex5 >=0) { // && (!paired || hex3 >= 0)) {
bias5[hex5]++;
biasOut[hex5]++;
//bias3[hex3]++;
} else {
return false;
Expand Down
7 changes: 6 additions & 1 deletion src/MinCollector.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ struct MinCollector {
int decreaseCount(const int ec);

std::vector<int> intersectECs(std::vector<std::pair<KmerEntry,int>>& v) const;
int intersectKmers(std::vector<std::pair<KmerEntry,int>>& v1,
std::vector<std::pair<KmerEntry,int>>& v2, bool nonpaired, std::vector<int> &u) const;
int findEC(const std::vector<int>& u) const;


void write(std::ostream& o) {
for (int id = 0; id < counts.size(); id++) {
Expand All @@ -58,12 +62,13 @@ struct MinCollector {
void loadCounts(ProgramOptions& opt);

bool countBias(const char *s1, const char *s2, const std::vector<std::pair<KmerEntry,int>> v1, const std::vector<std::pair<KmerEntry,int>> v2, bool paired);
bool countBias(const char *s1, const char *s2, const std::vector<std::pair<KmerEntry,int>> v1, const std::vector<std::pair<KmerEntry,int>> v2, bool paired, std::vector<int>& biasOut) const;

// DEPRECATED
double get_mean_frag_len() const;

// compute the conditional mean of each target given the FLD
void get_mean_frag_lens_trunc() const;
void compute_mean_frag_lens_trunc();

// this function should only be used for SE data
void init_mean_fl_trunc(double mean, double sd);
Expand Down
Loading

0 comments on commit f0678a2

Please sign in to comment.