Skip to content

Commit

Permalink
Merge pull request #106 from pachterlab/devel
Browse files Browse the repository at this point in the history
Update to v0.42.5
  • Loading branch information
pmelsted committed Apr 4, 2016
2 parents f0678a2 + f48933d commit e5957cf
Show file tree
Hide file tree
Showing 13 changed files with 578 additions and 45 deletions.
69 changes: 69 additions & 0 deletions gulpfile.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
var gulp = require('gulp');
var exec = require('child_process').exec;

var cMakeCommand = 'cd build; cmake ..;';
var buildCommand = 'cd build; make;';

var indexCommand = 'build/src/kallisto' +
' index -i test/transcripts.kidx' +
' test/transcripts.fasta.gz';

var pairedEndCommand = 'build/src/kallisto' +
' quant -i test/transcripts.kidx' +
' -b 10' +
' -t 2' +
' -o test/paired_end' +
' test/reads_1.fastq.gz test/reads_2.fastq.gz';

var singleEndCommand = 'build/src/kallisto' +
' quant -i test/transcripts.kidx' +
' -b 10' +
' -t 2' +
' -l 200 -s 3' +
' -o test/single_end' +
' --single' +
' test/reads_1.fastq.gz';

console.log('build command: ' + buildCommand);

gulp.task('watch', function() {
gulp.watch('src/*.cpp', ['build']);
gulp.watch('src/*.h', ['build']);
gulp.watch('src/*.hpp', ['build']);
});

gulp.task('build', ['watch'], function() {
exec(buildCommand, function(error, standardOutput, standardError) {
if (error) {
console.error('There was an error: ' + error);
}
console.log(standardOutput);
console.log(standardError);
});
});

gulp.task('pairedEnd', ['build'], function() {
exec(pairedEndCommand, function(error, standardOut, standardError) {
if (error) {
console.error('There was a pairedEnd error');
}
console.log(standardOut);
console.log(standardError);
});
});

gulp.task('singleEnd', ['build'], function() {
exec(singleEndCommand, function(error, standardOut, standardError) {
if (error) {
console.error('There was a singleEnd error');
}
console.log(standardOut);
console.log(standardError);
});
});

// gulp.task('default', ['install', 'watch'], function() {});
// gulp.task('compile', ['build', 'watch'], function() {});
// gulp.task('pairedEnd', ['compile'], function() {});
// gulp.task('singleEnd', ['compile'], function() {});
gulp.task('default', ['pairedEnd', 'singleEnd'], function() {});
4 changes: 2 additions & 2 deletions src/EMAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ struct EMAlgorithm {
}

//std::cout << chcount << std::endl;
if (chcount == 0) {
if (chcount == 0 && i > min_rounds) {

stopEM=true;
}
Expand Down Expand Up @@ -275,7 +275,7 @@ struct EMAlgorithm {
}
}

std::cout << sum_big << " " << count_big << " " << n << std::endl;
//std::cout << sum_big << " " << count_big << " " << n << std::endl;

std::copy(em_start.alpha_before_zeroes_.begin(), em_start.alpha_before_zeroes_.end(),
alpha_.begin());
Expand Down
31 changes: 31 additions & 0 deletions src/MinCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,37 @@ void MinCollector::loadCounts(ProgramOptions& opt) {

}

void MinCollector::write(const std::string& pseudoprefix) const {
std::string ecfilename = pseudoprefix + ".ec";
std::string countsfilename = pseudoprefix + ".tsv";

std::ofstream ecof, countsof;
ecof.open(ecfilename.c_str(), std::ios::out);
// output equivalence classes in the form "EC TXLIST";
for (int i = 0; i < index.ecmap.size(); i++) {
ecof << i << "\t";
// output the rest of the class
const auto &v = index.ecmap[i];
bool first = true;
for (auto x : v) {
if (!first) {
ecof << ",";
} else {
first = false;
}
ecof << x;
}
ecof << "\n";
}
ecof.close();

countsof.open(countsfilename.c_str(), std::ios::out);
for (int i = 0; i < counts.size(); i++) {
countsof << i << "\t" << counts[i] << "\n";
}
countsof.close();
}

double MinCollector::get_mean_frag_len() const {
if (has_mean_fl) {
return mean_fl;
Expand Down
4 changes: 4 additions & 0 deletions src/MinCollector.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,17 @@ struct MinCollector {
int findEC(const std::vector<int>& u) const;


// deprecated
void write(std::ostream& o) {
for (int id = 0; id < counts.size(); id++) {
o << id << "\t" << counts[id] << "\n";
}
}
void write(const std::string& index_out) const;

void loadCounts(ProgramOptions& opt);


bool countBias(const char *s1, const char *s2, const std::vector<std::pair<KmerEntry,int>> v1, const std::vector<std::pair<KmerEntry,int>> v2, bool paired);
bool countBias(const char *s1, const char *s2, const std::vector<std::pair<KmerEntry,int>> v1, const std::vector<std::pair<KmerEntry,int>> v2, bool paired, std::vector<int>& biasOut) const;

Expand Down
61 changes: 61 additions & 0 deletions src/PlaintextWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,64 @@ void plaintext_aux(

of.close();
}


void writeBatchMatrix(
const std::string &prefix,
const KmerIndex &index,
const std::vector<std::string> &ids,
std::vector<std::vector<int>> &counts) {

std::string ecfilename = prefix + ".ec";
std::string countsfilename = prefix + ".tsv";

std::ofstream ecof, countsof;
ecof.open(ecfilename.c_str(), std::ios::out);
// output equivalence classes in the form "EC TXLIST";
for (int i = 0; i < index.ecmap.size(); i++) {
ecof << i << "\t";
// output the rest of the class
const auto &v = index.ecmap[i];
bool first = true;
for (auto x : v) {
if (!first) {
ecof << ",";
} else {
first = false;
}
ecof << x;
}
ecof << "\n";
}
ecof.close();

countsof.open(countsfilename.c_str(), std::ios::out);
for (int j = 0; j < ids.size(); j++) {
countsof << "\t" << ids[j];
}
countsof << "\n";
if (!counts.empty()) {
// write out the NxM matrix, N is # of ecs, M is number of samples
int M = counts.size();
int N = 0;
for (int j = 0; j < M; j++) {
if (N < counts[j].size()) {
N = counts[j].size();
}
}

for (int i = 0; i < N; i++) {
countsof << i;
for (int j = 0; j < M; j++) {
if (counts[j].size() <= i) {
countsof << "\t0";
} else {
countsof << "\t" << counts[j][i];
}
}
countsof << "\n";
}
}
countsof.close();

}
8 changes: 8 additions & 0 deletions src/PlaintextWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <string>
#include <vector>

#include "KmerIndex.h"

void plaintext_writer(
const std::string& out_name,
const std::vector<std::string>& targ_ids,
Expand All @@ -30,4 +32,10 @@ void plaintext_aux(
const std::string& start_time,
const std::string& call);

void writeBatchMatrix(
const std::string &prefix,
const KmerIndex &index,
const std::vector<std::string> &ids,
std::vector<std::vector<int>> &counts);

#endif
84 changes: 50 additions & 34 deletions src/ProcessReads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ bool isSubset(const std::vector<int>& x, const std::vector<int>& y) {
int ProcessReads(KmerIndex& index, const ProgramOptions& opt, MinCollector& tc) {

int limit = 1048576;
char *buf = new char[limit];
std::vector<std::pair<const char*, int>> seqs;
seqs.reserve(limit/50);

Expand Down Expand Up @@ -208,10 +207,29 @@ ReadProcessor::ReadProcessor(const KmerIndex& index, const ProgramOptions& opt,
clear();
}

ReadProcessor::ReadProcessor(ReadProcessor && o) :
paired(o.paired),
tc(o.tc),
index(o.index),
mp(o.mp),
bufsize(o.bufsize),
numreads(o.numreads),
seqs(std::move(o.seqs)),
names(std::move(o.names)),
quals(std::move(o.quals)),
newEcs(std::move(o.newEcs)),
flens(std::move(o.flens)),
bias5(std::move(o.bias5)),
counts(std::move(o.counts)) {
buffer = o.buffer;
o.buffer = nullptr;
o.bufsize = 0;
}

ReadProcessor::~ReadProcessor() {
if (buffer) {
/*delete[] buffer;
buffer = nullptr;*/
if (buffer != nullptr) {
delete[] buffer;
buffer = nullptr;
}
}

Expand Down Expand Up @@ -307,17 +325,12 @@ void ReadProcessor::processBuffer() {
// collect the target information
int ec = -1;
int r = tc.intersectKmers(v1, v2, !paired,u);
if (u.empty()) {
continue;
} else {
ec = tc.findEC(u);
}

/* -- possibly modify the pseudoalignment -- */

// If we have paired end reads where one end maps, check if some transcsripts
// If we have paired end reads where one end maps or single end reads, check if some transcsripts
// are not compatible with the mean fragment length
if (paired && !u.empty() && (v1.empty() || v2.empty()) && tc.has_mean_fl) {
if (!u.empty() && (!paired || v1.empty() || v2.empty()) && tc.has_mean_fl) {
vtmp.clear();
// inspect the positions
int fl = (int) tc.get_mean_frag_len();
Expand Down Expand Up @@ -365,33 +378,36 @@ void ReadProcessor::processBuffer() {
}
}

// count the pseudoalignment
if (ec == -1 || ec >= counts.size()) {
// something we haven't seen before
newEcs.push_back(u);
} else {
// add to count vector
++counts[ec];
}

// find the ec
if (!u.empty()) {
ec = tc.findEC(u);

// count the pseudoalignment
if (ec == -1 || ec >= counts.size()) {
// something we haven't seen before
newEcs.push_back(u);
} else {
// add to count vector
++counts[ec];
}

/* -- collect extra information -- */
// collect bias info
if (findBias && !u.empty() && biasgoal > 0) {
// collect sequence specific bias info
if (tc.countBias(s1, (paired) ? s2 : nullptr, v1, v2, paired, bias5)) {
biasgoal--;
/* -- collect extra information -- */
// collect bias info
if (findBias && !u.empty() && biasgoal > 0) {
// collect sequence specific bias info
if (tc.countBias(s1, (paired) ? s2 : nullptr, v1, v2, paired, bias5)) {
biasgoal--;
}
}
}

// collect fragment length info
if (findFragmentLength && flengoal > 0 && paired && 0 <= ec && ec < index.num_trans && !v1.empty() && !v2.empty()) {
// try to map the reads
int tl = index.mapPair(s1, l1, s2, l2, ec);
if (0 < tl && tl < flens.size()) {
flens[tl]++;
flengoal--;
// collect fragment length info
if (findFragmentLength && flengoal > 0 && paired && 0 <= ec && ec < index.num_trans && !v1.empty() && !v2.empty()) {
// try to map the reads
int tl = index.mapPair(s1, l1, s2, l2, ec);
if (0 < tl && tl < flens.size()) {
flens[tl]++;
flengoal--;
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/ProcessReads.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class MasterProcessor {
class ReadProcessor {
public:
ReadProcessor(const KmerIndex& index, const ProgramOptions& opt, const MinCollector& tc, MasterProcessor& mp);
ReadProcessor(ReadProcessor && o);
~ReadProcessor();
char *buffer;
size_t bufsize;
Expand Down
2 changes: 1 addition & 1 deletion src/PseudoBam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void outputPseudoBam(const KmerIndex &index, const std::vector<int> &u,
//o << seq1->name.s << "" << seq1->seq.s << "\t" << seq1->qual.s << "\n";
//o << seq2->name.s << "\t141\t*\t0\t0\t*\t*\t0\t0\t" << seq2->seq.s << "\t" << seq2->qual.s << "\n";
} else {
printf("%s\t4\t*\t0\t0\t*\t*\t0\t0\t%s\t%s\n", n1,s2,q1);
printf("%s\t4\t*\t0\t0\t*\t*\t0\t0\t%s\t%s\n", n1,s1,q1);
}
} else {
if (paired) {
Expand Down
7 changes: 6 additions & 1 deletion src/common.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef KALLISTO_COMMON_H
#define KALLISTO_COMMON_H

#define KALLISTO_VERSION "0.42.4"
#define KALLISTO_VERSION "0.42.5"

#include <string>
#include <vector>
Expand All @@ -20,6 +20,10 @@ struct ProgramOptions {
int min_range;
int bootstrap;
std::vector<std::string> transfasta;
bool batch_mode;
std::string batch_file_name;
std::vector<std::vector<std::string>> batch_files;
std::vector<std::string> batch_ids;
std::vector<std::string> files;
bool plaintext;
bool write_index;
Expand All @@ -42,6 +46,7 @@ ProgramOptions() :
sd(0.0),
min_range(1),
bootstrap(0),
batch_mode(false),
plaintext(false),
write_index(false),
single_end(false),
Expand Down
Loading

0 comments on commit e5957cf

Please sign in to comment.