Skip to content

Commit

Permalink
v1.1: mrcanavar-auto added for automated mapping & calling. Assumes t…
Browse files Browse the repository at this point in the history
…hat some files are available in the working directory. Use it with caution.
  • Loading branch information
calkan committed Nov 27, 2018
1 parent 93e9b7d commit 60fe874
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 9 deletions.
4 changes: 4 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Noveber 27, 2018:
version 1.1:
* mrcanavar-auto added for automated mapping & calling. Assumes that some files are available in the working directory. Use it with caution.

November 20, 2018:
version 1.0:
* SAM(.gz) files in READ mode can now be passed as comma-separated list.
Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ $(EXECUTABLE): $(OBJECTS)
.c.o:
$(CC) $(CFLAGS) $< -o $@

auto: utils.c mrcanavar-auto.c
$(CC) $(CFLAGS) utils.c
$(CC) mrcanavar-auto.c utils.o -o mrcanavar-auto $(LDFLAGS)

clean:
rm -f $(EXECUTABLE) *.o *~ \#*
rm -f $(EXECUTABLE) mrcanavar-auto *.o *~ \#*

install:
cp mrcanavar $(BIN)
29 changes: 23 additions & 6 deletions callcnv.c
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ void read_gene(char *glist, char *out_prefix){
char rest_of_list[1000];
float copy_n[10000];
char chr[100];

char *token;
char *ret = 0;

out_fname = (char *) getMem(sizeof (char) * (strlen(glist) + strlen(".out.bed") + 2));
Expand All @@ -808,9 +808,26 @@ void read_gene(char *glist, char *out_prefix){
g_out= my_fopen(out_fname, "w", 0);

fprintf(stdout, "Reading gene list: %s ....\n",glist);
fprintf(stdout, "Writing to: %s (This may take a while)\n",out_fname);

fprintf(g_out, "#%s\t%s\t%s\t%s\t%s\t%s\n\n", "CHROM", "START", "END", "BP_LENGTH" ,"MEDIAN", "AVERAGE");
fprintf(stdout, "Writing to: %s \n",out_fname);
i = fscanf(geneList, "%s\t%d\t%d", chr, &start, &end);
ret = fgets(rest_of_list, 1000, geneList);
rewind(geneList);

i=1;
token = strtok(rest_of_list, "\t");
while (token != NULL){
i++;
token = strtok(NULL, "\t");
}

fprintf(g_out, "#%s\t%s\t%s\t%s", "CHROM", "START", "END", "BP_LENGTH");

for (j=0; j<i; j++)
fprintf(g_out, "\t");

fprintf(g_out, "%s\t%s\n", "MEDIAN", "AVERAGE");



for (i = 0; i < num_chrom; i++){

Expand Down Expand Up @@ -872,9 +889,9 @@ void read_gene(char *glist, char *out_prefix){
qsort(copy_n, cur, sizeof (float), q_compare);
//Print the output of gene list
if (total != 0)
fprintf(g_out, "%s\t%d\t%d\t%d\t%s\t%f\t%f\n", chr, start, end, count, rest_of_list, copy_n[cur/2], (total/cur) );
fprintf(g_out, "%s\t%d\t%d\t%d%s\t%f\t%f\n", chr, start, end, count, rest_of_list, copy_n[cur/2], (total/cur) );
else
fprintf(g_out, "%s\t%d\t%d\t%d\t%s\t%f\t%f\n", chr, start, end, count, rest_of_list, 0.0, 0.0);
fprintf(g_out, "%s\t%d\t%d\t%d%s\t%f\t%f\n", chr, start, end, count, rest_of_list, 0.0, 0.0);

cur=0;
count=0;
Expand Down
4 changes: 2 additions & 2 deletions globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#define MAX_STR 256
#define GC_BIN 1000

#define VERSION "1.0"
#define LAST_UPDATE "November 20, 2018"
#define VERSION "1.1"
#define LAST_UPDATE "November7 26, 2018"


enum MODETYPE {NONE, PREP, READSAM, CALL, CONC};
Expand Down
167 changes: 167 additions & 0 deletions mrcanavar-auto.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/* this is an autopilot for mrsfast mapping & mrcanavar calling. Intended for automated use in cloud. It works locally, but assumes a lot of things. */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include "utils.h"

char *ref_genome;
char *input_files;
char *cnvr_conf;
char *gene_list;
int num_threads;
int skip_map;

void print_help(void){
fprintf(stderr, "Usage:\n\n");
fprintf(stderr, "mrcanavar-auto\n\t--input [list]: comma-separated FASTQ(.gz) files.\n\t--ref [ref.fa]: repeat masked reference genome. mrsFAST index (ref.fa.index) should be in the same directory.\n");
fprintf(stderr, "\t--conf [ref.cnvr]: mrCaNaVaR config file.\n\t--gene [genes.bed]: List of genes.\n\t--threads [int]: number of threads for mrsFAST.\n");
fprintf(stderr, "\t--skip-mapping: Skip mrsFAST mapping. Use this only if you have the mapping output and rerunning mrCaNaVaR for some reason.\n\n");
}

int parse_command_line( int argc, char** argv)
{

int o;
int index;

ref_genome = NULL; input_files = NULL; cnvr_conf = NULL; gene_list = NULL;

static struct option long_options[] =
{
{"input" , required_argument, 0, 'i'},
{"ref" , required_argument, 0, 'f'},
{"conf" , required_argument, 0, 'c'},
{"gene" , required_argument, 0, 'g'},
{"threads" , required_argument, 0, 't'},
{"skip-mapping" , no_argument, &skip_map, 1},
{0 , 0, 0, 0 }
};

if( argc == 1)
{
print_help();
return -1;
}

while( ( o = getopt_long( argc, argv, "i:f:c:g:t", long_options, &index)) != -1)
{
switch( o)
{
case 'i':
set_str(&input_files, optarg);
break;
case 'f':
set_str(&ref_genome, optarg);
break;
case 'g':
set_str(&gene_list, optarg);
break;
case 'c':
set_str(&cnvr_conf, optarg);
break;
case 't':
num_threads = atoi(optarg);
break;
}

}

if (input_files == NULL){
fprintf (stderr, "Input files (FASTQ) are missing.\n");
return -1;
}
if (ref_genome == NULL){
fprintf (stderr, "Reference genome (repeatmasked FASTA) is missing.\n");
return -1;
}
if (gene_list == NULL){
fprintf (stderr, "Gene list (BED) is missing.\n");
return -1;
}
if (cnvr_conf == NULL){
fprintf (stderr, "Config file (CNVR) is missing.\n");
return -1;
}

return 0;
}

int main (int argc, char **argv){
int ret;
int i;

int number_of_fastq = 0;
char **fastq_files;
char *safe_fastq_list = NULL;
char *token;

char cmd_line[1048576];
char tmp_line[1048576];

skip_map = 0;
num_threads = 4;

ret = parse_command_line(argc, argv);

if (ret == -1)
return ret;

set_str(&safe_fastq_list, input_files);
token = strtok(safe_fastq_list, ",");
while (token != NULL){
number_of_fastq++;
token = strtok(NULL, ",");
}

fastq_files = (char **) getMem(sizeof(char *) * number_of_fastq);
memset(fastq_files, 0, sizeof(char *) * number_of_fastq);

set_str(&safe_fastq_list, input_files);
token = strtok(safe_fastq_list, ",");
while(token != NULL){
set_str(&(fastq_files[i++]), token);
token = strtok (NULL, ",");
}

/* map */
for (i=0; i<number_of_fastq; i++){

if (endswith (fastq_files[i], ".gz"))
sprintf(cmd_line, "mrsfast --search %s --seq %s --seqcomp --outcomp -t %d --mem 6 --crop 36 -e 2", ref_genome, fastq_files[i], num_threads);
else
sprintf(cmd_line, "mrsfast --search %s --seq %s --outcomp -t %d --mem 6 --crop 36 -e 2", ref_genome, fastq_files[i], num_threads);

printf("[MAPPING] %s\n", cmd_line);

if (!skip_map)
system(cmd_line);
}

/* read */

if (number_of_fastq == 1){
sprintf(cmd_line, "mrcanavar --read --gz -conf %s -samlist %s.sam.gz -depth %s.depth", cnvr_conf, fastq_files[0], fastq_files[0]);
}
else{
tmp_line[0] = 0;
sprintf(tmp_line, "%s", fastq_files[0]);
for (i=1; i<number_of_fastq; i++){
strcat(tmp_line, ".sam.gz,");
strcat(tmp_line, fastq_files[i]);
}
strcat(tmp_line, ".sam.gz");

sprintf(cmd_line, "mrcanavar --read --gz -conf %s -samlist %s -depth %s.depth", cnvr_conf, tmp_line, fastq_files[0]);

}

printf("[READ SAM FILES] %s\n", cmd_line);
system(cmd_line);
/* call */

sprintf(cmd_line, "mrcanavar --call -conf %s -depth %s.depth -gene %s -o %s-out", cnvr_conf, fastq_files[0], gene_list, fastq_files[0]);
printf("[CALL COPY NUMBERS] %s\n", cmd_line);
system(cmd_line);
}

0 comments on commit 60fe874

Please sign in to comment.