Skip to content

Commit

Permalink
Merge pull request #409 from pachterlab/devel
Browse files Browse the repository at this point in the history
Devel
  • Loading branch information
Yenaled authored Nov 1, 2023
2 parents 5380a77 + 4ff3fdc commit 34e5814
Show file tree
Hide file tree
Showing 11 changed files with 516 additions and 112 deletions.
2 changes: 1 addition & 1 deletion ext/bifrost/src/Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ static const char alpha[4] = {'A','C','G','T'};

BFG_INLINE bool isDNA(const char c) {

static const size_t DNAbits[4] = {0x0ULL, 0x10008A0010008AULL, 0x0ULL, 0x0ULL};
static const size_t DNAbits[4] = {0x0ULL, static_cast<size_t>(0x10008A0010008AULL), 0x0ULL, 0x0ULL};

return static_cast<bool>((DNAbits[c >> 6] >> (c & 0x3F)) & 0x1ULL);
}
Expand Down
6 changes: 4 additions & 2 deletions ext/bifrost/src/TinyBitmap.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "TinyBitmap.hpp"
#include <random>

TinyBitmap::TinyBitmap() : tiny_bmp(nullptr) {}

Expand Down Expand Up @@ -1133,6 +1134,7 @@ bool TinyBitmap::test(const bool verbose) {
}

t_bmp.clear();
std::mt19937 g(44);

for (size_t j = 0; j < nb_rounds; ++j){

Expand Down Expand Up @@ -1162,7 +1164,7 @@ bool TinyBitmap::test(const bool verbose) {

if (verbose) cout << "TinyBitmap::test(): Removing values in random order from 0 to 65536-49 (round " << j << ")" << endl;

std::random_shuffle(val_added.begin(), val_added.end());
std::shuffle(val_added.begin(), val_added.end(), g);

for (const auto val : val_added){

Expand Down Expand Up @@ -1281,7 +1283,7 @@ bool TinyBitmap::test(const bool verbose) {

if (verbose) cout << "TinyBitmap::test(): Removing values in random order (round " << j << ")" << endl;

std::random_shuffle(val_added.begin(), val_added.end());
std::shuffle(val_added.begin(), val_added.end(), g);

for (const auto val : val_added){

Expand Down
13 changes: 13 additions & 0 deletions func_tests/runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,19 @@ cmdexec "$kallisto bus -o $test_dir/bus10xv3 -t 1 -i $test_dir/basic7.idx -x 10X
cmdexec "$bustools sort -o $test_dir/bus10xv3/output.s.bus -t 12 $test_dir/bus10xv3/output.bus"
checkcmdoutput "$bustools text -p $test_dir/bus10xv3/output.s.bus|cut -f1,2,4" 3991a31f0078b30e7f755b2df7a77106

# Test D-list and distinguish

cat $test_dir/simple.fasta|sed 's/^\>t/\>/g' > $test_dir/simple_distinguish.fasta
cmdexec "$kallisto index -t 2 -i $test_dir/basic7_dlist.idx --d-list=$test_dir/polyA.fasta -k 7 $test_dir/simple.fasta"
cmdexec "$kallisto bus -t 1 --num -x bulk -o $test_dir/busdlist -i $test_dir/basic7_dlist.idx $test_dir/small.fastq.gz"
checkcmdoutput "$bustools text -p $test_dir/busdlist/output.bus|wc -l|tr -d ' '" 1dcca23355272056f04fe8bf20edfce0
cmdexec "$kallisto index --distinguish -t 2 -i $test_dir/basic7_dlist.idx --d-list=$test_dir/polyA.fasta -k 7 $test_dir/simple.fasta"
cmdexec "$kallisto bus -t 1 --num -x bulk -o $test_dir/busdlist -i $test_dir/basic7_dlist.idx $test_dir/small.fastq.gz"
checkcmdoutput "$bustools text -p $test_dir/busdlist/output.bus|cut -f3" db2d82c814b606ac9deb38634f7659ae
cmdexec "$kallisto index --distinguish -t 2 -i $test_dir/basic7_dlist.idx --d-list=$test_dir/polyA.fasta -k 7 $test_dir/simple_distinguish.fasta"
cmdexec "$kallisto bus -t 1 --num -x bulk -o $test_dir/busdlist -i $test_dir/basic7_dlist.idx $test_dir/small.fastq.gz"
checkcmdoutput "$bustools text -p $test_dir/busdlist/output.bus|cut -f3" ce82711968bfe6d3b4a13be3e6b8ea00


# Try processing demultiplexed bulk RNA-seq with strand-specificity with EM and kallisto quant-tcc (and compare with quant)

Expand Down
10 changes: 8 additions & 2 deletions src/BlockArray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,14 @@ class BlockArray {
}

void print() const {
for (const auto& b : poly) {
std::cout << "[" << b.lb << ", " << b.ub << "): " << b.val << ", ";

if (flag == 0) return;
if (flag == 1) {
std::cout << "[" << mono.lb << ", " << mono.ub << "): " << mono.val;
} else {
for (const auto& b : poly) {
std::cout << "[" << b.lb << ", " << b.ub << "): " << b.val << ", ";
}
}
std::cout << std::endl;
}
Expand Down
43 changes: 43 additions & 0 deletions src/EMAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,49 @@ struct EMAlgorithm {

~EMAlgorithm() {}

static std::vector<double> read_priors(std::string path) {

std::cerr << "[ em] reading priors from file " << path << std::endl;
std::ifstream pfile(path);
std::string line;
std::vector<double> priors;
double p, sum = .0;

while (std::getline(pfile, line)) {

p = std::stod(line);
priors.push_back(p);
sum += p;
}

// If sum is greater than sum + eps, we got raw counts instead of priors,
// so we need to normalize them.
// We add a pseudocount to all raw counts, because if we set a zero prior,
// there is no returning from that
if (sum >= 1. + 1e-3) {

sum += priors.size();
for (size_t i = 0; i < priors.size(); ++i) {

priors[i] = (priors[i] + 1.) / sum;
}
}

return std::move(priors);
}

void set_priors(std::vector<double>& priors) {

if (alpha_.size() == priors.size()) {

alpha_.assign(priors.begin(), priors.end());
} else {

std::cerr << "[ em] number of priors does not match number of transcripts." << std::endl;
std::cerr << " defaulting to uniform priors." << std::endl;
}
}

void run(size_t n_iter = 10000, size_t min_rounds=50, bool verbose = true, bool recomputeEffLen = true) {
std::vector<double> next_alpha(alpha_.size(), 0.0);

Expand Down
Loading

0 comments on commit 34e5814

Please sign in to comment.