Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for Bayesian bootstrap #201

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
19 changes: 16 additions & 3 deletions src/cl.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ int Read_Command_Line(option *io, int argc, char **argv)
{"precision", required_argument,NULL,82},
{"l_min", required_argument,NULL,83},
{"print_mat_and_exit", no_argument,NULL,84},
{"bayesian_bootstrap", no_argument,NULL,85},
{0,0,0,0}
};

Expand Down Expand Up @@ -213,7 +214,16 @@ int Read_Command_Line(option *io, int argc, char **argv)
{
io->do_tbe = YES;
io->do_boot = NO;
io->do_alrt = NO;
io->do_alrt = NO;
io->do_bayesboot = NO;
break;
}
case 85 :
{
io->do_tbe = NO;
io->do_boot = NO;
io->do_bayesboot = YES;
io->do_alrt = NO;
break;
}
case 79:
Expand Down Expand Up @@ -1179,12 +1189,14 @@ int Read_Command_Line(option *io, int argc, char **argv)
io->do_alrt = NO;
io->do_tbe = NO;
io->do_boot = NO;
io->do_bayesboot = NO;
io->ratio_test = 0;
}
else
{
io->do_alrt = YES;
io->do_tbe = NO;
io->do_bayesboot = NO;
io->do_boot = NO;
io->ratio_test = -(int)atoi(optarg);
}
Expand Down Expand Up @@ -1644,10 +1656,11 @@ int Read_Command_Line(option *io, int argc, char **argv)
{
io->do_alrt = NO;
io->do_boot = NO;
io->do_bayesboot = NO;
}
else
{
if(io->do_alrt == NO && io->n_boot_replicates > 0) io->do_boot = YES;
if(io->do_alrt == NO && io->n_boot_replicates > 0 && io->do_bayesboot == NO) io->do_boot = YES;
}

#ifndef PHYML
Expand Down Expand Up @@ -1732,7 +1745,7 @@ int Read_Command_Line(option *io, int argc, char **argv)
io->fp_out_trees = Openfile(io->out_trees_file,1);
}

if((io->print_boot_trees) && (io->do_boot == YES || io->do_tbe == YES) && (io->fp_in_align != NULL))
if((io->print_boot_trees) && (io->do_boot == YES || io->do_bayesboot == YES || io->do_tbe == YES) && (io->fp_in_align != NULL))
{
strcpy(io->out_boot_tree_file,io->in_align_file);
strcat(io->out_boot_tree_file,"_phyml_boot_trees");
Expand Down
2 changes: 1 addition & 1 deletion src/geo.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ int GEO_Simulate_Estimate(int argc, char **argv)
Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree);
Match_Tip_Numbers(tree,ori_tree);

rf = (phydbl)Compare_Bip(ori_tree,tree,NO)/(tree->n_otu-3);
rf = (phydbl)Compare_Bip(ori_tree,tree,NO,-1)/(tree->n_otu-3);
PhyML_Printf("\n. rf: %f",rf);

phydbl scale;
Expand Down
4 changes: 4 additions & 0 deletions src/help.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ void Usage(void)
PhyML_Printf("\t\tComputes TBE instead of FBP (standard) bootstrap support\n");
PhyML_Printf("\t\tHas no effect with -b <= 0\n");

PhyML_Printf("%s\n\t--bayesian_bootstrap%s\n",BOLD,FLAT);
PhyML_Printf("\t\tComputes Bayesian Bootstrap instead of FBP (standard) bootstrap support\n");
PhyML_Printf("\t\tHas no effect with -b <= 0\n");

#endif

PhyML_Printf("%s\n\t-m (or --model) %smodel%s\n",BOLD,LINE,FLAT);
Expand Down
37 changes: 33 additions & 4 deletions src/interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ void Launch_Interface_Data_Type(option *io)
}
#endif

if((io->do_boot || io->do_tbe) && (io->n_data_sets > 1))
if((io->do_boot || io->do_tbe || io->do_bayesboot) && (io->n_data_sets > 1))
{
PhyML_Printf("\n. Bootstrap option is not allowed with multiple data sets !\n");
PhyML_Printf("\n. Type any key to exit.");
Expand Down Expand Up @@ -1658,11 +1658,12 @@ void Launch_Interface_Branch_Support(option *io)
PhyML_Printf("\n");


strcpy(s,(io->do_boot || io->do_tbe)?("yes"):("no"));
if(io->n_boot_replicates > 0) sprintf(s+strlen(s)," (%d replicate%s%s)",
strcpy(s,(io->do_boot || io->do_tbe || io->do_bayesboot)?("yes"):("no"));
if(io->n_boot_replicates > 0) sprintf(s+strlen(s)," (%d replicate%s%s%s)",
io->n_boot_replicates,
(io->n_boot_replicates>1)?("s"):(""),
(io->do_tbe)?(", TBE"):(""));
(io->do_tbe)?(", TBE"):(""),
(io->do_bayesboot)?(", BAYES_BOOT"):(""));

/* PhyML_Printf(" [+] " */
PhyML_Printf(" [B] "
Expand Down Expand Up @@ -1799,12 +1800,39 @@ void Launch_Interface_Branch_Support(option *io)
{
io->do_tbe = YES;
io->do_boot = NO;
io->do_bayesboot = NO;
break;
}
case 'N' : case 'n' :
{
io->do_tbe = NO;
io->do_boot = YES;
io->do_bayesboot = NO;
break;
}
}

PhyML_Printf("\n. Compute Bayesian Bootstrap instead of FBP/TBE ? (%s) > ",
(io->do_bayesboot)?("Y/n"):("y/N"));

if(!scanf("%c",&answer)) Exit("\n");
if(answer == '\n') answer = (io->do_bayesboot)?('Y'):('N');
else getchar();

switch(answer)
{
case 'Y' : case 'y' :
{
io->do_tbe = NO;
io->do_boot = NO;
io->do_bayesboot = YES;
break;
}
case 'N' : case 'n' :
{
io->do_tbe = NO;
io->do_boot = YES;
io->do_bayesboot = NO;
break;
}
}
Expand All @@ -1815,6 +1843,7 @@ void Launch_Interface_Branch_Support(option *io)
case 'A' :
{
io->do_boot = NO;
io->do_bayesboot = NO;
io->do_tbe = NO;
io->do_alrt = YES;

Expand Down
9 changes: 7 additions & 2 deletions src/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ void R_wtree(t_node *pere, t_node *fils, t_edge *b, int *available, char **s_tre

if(tree->io && tree->io->print_support_val == YES)
{
if(tree->io->do_boot == YES)
if(tree->io->do_boot == YES || tree->io->do_bayesboot == YES)
{
sprintf(*s_tree+(int)strlen(*s_tree),"%.0f",fils->b[p]->support_val);
}
Expand Down Expand Up @@ -2974,6 +2974,11 @@ void Print_Settings(option *io)

PhyML_Printf("\n . Nb of bootstrapped data sets:\t\t\t %d", io->n_boot_replicates);

if (io->do_bayesboot)
PhyML_Printf("\n . Compute Bayesian Bootstrap:\t\t\t yes");
if (io->do_tbe)
PhyML_Printf("\n . Compute TBE:\t\t\t yes");

if (io->n_boot_replicates > 0)
PhyML_Printf("\n . Compute approximate likelihood ratio test:\t no");
else
Expand Down Expand Up @@ -6231,7 +6236,7 @@ void Collect_Edge_Support_Values(t_tree *tree)

for(i=0;i<2*tree->n_otu-3;++i)
{
if(tree->io->do_boot == YES)
if(tree->io->do_boot == YES || tree->io->do_bayesboot == YES)
{
tree->a_edges[i]->support_val = (phydbl)tree->a_edges[i]->bip_score;
}
Expand Down
2 changes: 1 addition & 1 deletion src/m4.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ int M4_main(int argc, char **argv)


/* Launch bootstrap analysis */
if(io->do_boot || io->do_tbe)
if(io->do_boot || io->do_tbe || io->do_bayesboot)
{
if(!io->quiet) PhyML_Printf("\n. Launch bootstrap analysis on the most likely tree...\n");

Expand Down
2 changes: 1 addition & 1 deletion src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ int main(int argc, char **argv)


/* Launch bootstrap analysis */
if(io->do_boot || io->do_tbe)
if(io->do_boot || io->do_tbe || io->do_bayesboot)
{
if(!io->quiet) PhyML_Printf("\n\n. Launch bootstrap analysis on the most likely tree...");

Expand Down
32 changes: 23 additions & 9 deletions src/mpi_boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ void Bootstrap_MPI(t_tree *tree)
int *site_num, n_site;
int replicate, j, k;
int position, init_len;
phydbl sum_weight;
phydbl* bayesboot_weights;
calign *boot_data;
t_tree *boot_tree;
t_mod *boot_mod;
Expand Down Expand Up @@ -74,7 +76,7 @@ void Bootstrap_MPI(t_tree *tree)
}
if (Global_myRank == 0)
{
PhyML_Printf("\n\n. Non parametric bootstrap analysis \n\n");
PhyML_Printf("\n\n. Non parametric bootstrap analysis %s\n\n",(tree->io->do_bayesboot ? "(Bayesian Bootstrap)" : "(Frequentist Bootstrap)"));
PhyML_Printf("\n [");
}

Expand Down Expand Up @@ -107,11 +109,22 @@ void Bootstrap_MPI(t_tree *tree)
for(j=0;j<boot_data->crunch_len;j++) boot_data->wght[j] = 0;

init_len = 0;
for(j=0;j<boot_data->init_len;j++)
if(tree->io->do_bayesboot)
{
position = Rand_Int(0,(int)(tree->data->init_len-1.0));
boot_data->wght[site_num[position]] += 1;
init_len++;
bayesboot_weights = Dirichlet(n_site);
for(j=0;j<n_site;j++)
{
boot_data->wght[site_num[j]] += bayesboot_weights[j];
init_len++;
}
free(bayesboot_weights);
}else{
for(j=0;j<boot_data->init_len;j++)
{
position = Rand_Int(0,(int)(tree->data->init_len-1.0));
boot_data->wght[site_num[position]] += 1;
init_len++;
}
}
if (init_len != tree->data->init_len)
{
Expand All @@ -120,11 +133,11 @@ void Bootstrap_MPI(t_tree *tree)
Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
}

init_len = 0;
sum_weight = 0;
for(j=0;j<boot_data->crunch_len;j++)
init_len += boot_data->wght[j];
sum_weight += boot_data->wght[j];

if (init_len != tree->data->init_len)
if((int)(roundf(sum_weight)) != tree->data->init_len)
{
printf("\n== thread: %d init: %d %d. Pb when copying sequences\n",Global_myRank,init_len,tree->data->init_len);
Generic_Exit(__FILE__,__LINE__,__FUNCTION__);
Expand Down Expand Up @@ -226,7 +239,8 @@ void Bootstrap_MPI(t_tree *tree)
boot_tree->a_nodes[0]->v[0],
boot_tree);

if(tree->io->do_boot) Compare_Bip(tree,boot_tree,NO);
if(tree->io->do_boot) Compare_Bip(tree,boot_tree,NO,-1);
else if(tree->io->do_bayesboot) Compare_Bip(tree,boot_tree, NO, 0.1/n_site);
else if(tree->io->do_tbe) Compare_Bip_Distance(tree, boot_tree);
else assert(FALSE);

Expand Down
33 changes: 33 additions & 0 deletions src/stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -5226,3 +5226,36 @@ phydbl Reflected(phydbl x, phydbl down, phydbl up)

return(ref);
}

/*////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////*/

// Dirichlet generates n_sites weights from a Dirichlet distribution D(n_sites,1,...,1).
// The weights are normalized such that their sum is = n_sites (and not 1)/
// This implements the interval method described in the paragraph "When each alpha is 1"
// of https://en.wikipedia.org/wiki/Dirichlet_distribution
phydbl* Dirichlet(int n_sites)
{
phydbl* weights;
phydbl* intervals;
int i;

if(n_sites <= 2) Generic_Exit(__FILE__,__LINE__,__FUNCTION__);

weights = (phydbl *)mCalloc(n_sites,sizeof(phydbl));
intervals = (phydbl *)mCalloc(n_sites+1,sizeof(phydbl));
intervals[0] = 0.0;
intervals[1] = 1.0;

for(i = 2; i < n_sites+1; i++){
intervals[i] = Uni();
}

Qksort(intervals,NULL,0,n_sites);

for(i = 1; i < n_sites+1; i++){
weights[i-1] = n_sites * (intervals[i] - intervals[i-1]);
}
free(intervals);
return(weights);
}
2 changes: 1 addition & 1 deletion src/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ phydbl Rnorm_Trunc_Algo4(phydbl alpha, phydbl beta, int *err);
phydbl Manhattan_Distance(t_geo_coord *x, t_geo_coord *y);
phydbl Haversine_Distance(t_geo_coord *x, t_geo_coord *y);
int *Reverse(int *v, int len);

phydbl* Dirichlet(int n_sites);



Expand Down
Loading
Loading