#!/usr/bin/perl -w
#bootstrap.pl

use strict;
use Class::Struct;

use vars qw ($opt_A $opt_B $opt_C $opt_b $opt_D $opt_e $opt_o $opt_s $opt_S $opt_t $opt_T $opt_v);  # required if strict used
use Getopt::Std;
use constant GNUPLOT => '/usr/bin/gnuplot';
getopts ('ABCb:Deos:S:t:T:v');


# Print a helpful message if the user provides no input file.
if (!@ARGV) {
        print "usage:  bootstrap.pl [options] msadir msaname\n\n";
        print "options:\n";
        print "-A       :  option for second test: branch opt only a la dnaml [default is CG_FULLMIN]\n";
	print "-B       :  option for second test: branch opt only using cg [default is CG_FULLMIN]\n";
	print "-C       :  option for third test: CG_FULLMIN [default is CG_FULLMIN with all parameters optimization]\n";
	print "-b <num> :  number of boostraps [ default 100]\n";
	print "-D       :  use dnaml-erate-dev [ default dnaml-erate]\n";
        print "-e       :  use dnaml-erate    [ default dnaml]\n";
        print "-s <num> :  NB=fraction of the whole alignment to sample (default 1.0)\n";
        print "-S <num> :  NS=number of seqs per trials (default 0 = all sequences)\n";
        print "-t <num> :  NTB=number of block trials (default 1)\n";
        print "-T <num> :  NTS=number of sequence trials (default 1)\n";
        print "-v       :  verbose\n";
 	exit;
}
my $msadir  = shift;
my $msaname = shift;

srand;

struct Tree => {
ntaxa  => '$',
parent => '@',
left   => '@',
right  => '@',
ld     => '@',
rd     => '@',
taxaparent    => '@', 
};

# $ERATEDIR (shell env variable)
my $eratedir =  $ENV{'ERATEDIR'};
my $msastats = "$eratedir/scripts/msastats.pl";

# tmp copy of the whole alignment
print "\nMSADIR: $msadir\n";
print "MSANAME: $msaname\n\n";
system("cp $msadir/$msaname $msaname\n");

# PHYLIP version 3.66
#
my $phylipdir = "$eratedir/src/phylip3.66-erate/src/";
my $which_phylip;
if (!$opt_o) {
    if    ($opt_e) { $which_phylip = "dnaml-erate";     }
    elsif ($opt_D) { $which_phylip = "dnaml-erate-dev"; }
    else           { $which_phylip = "dnaml";           }
}

#other phylip programs used here
my $treedist = $phylipdir."treedist";
my $seqboot  = $phylipdir."seqboot";
my $consense = $phylipdir."consense";

#options
my $nboot = 100;
if ($opt_b) { $nboot = $opt_b; }
my $verbose = 0;
if ($opt_v) { $verbose = 1; }
my $seeplots = 0;
my $NC  = 1;   if ($opt_o) { $NC  = 3; } # number of cases (1 or 3)
my $NTB = 1;   if ($opt_t) { $NTB = $opt_t; } # number of sequence samples from the MSA
my $NTS = 1;   if ($opt_T) { $NTS = $opt_T; } # number of groups of NS sequences sampled from the MSA
my $NB  = 1.0; if ($opt_s) { $NB  = $opt_s; } # fraction of the lenght of the alignment included in a pMSA (partial msa)
my $NS  = 0;   if ($opt_S) { $NS  = $opt_S; } # number of sequences included in a pMSA (partial msa)
if ($NB == 1.0) { $NTB = 1; }
if ($NS == 0)   { $NTS = 1; }
my $NT = $NTB * $NTS;

# the whole setup
my @averagesfile;
my @consensefile;
my @consensenhfile;
my @sconsensefile;
my @sconsensenhfile;
my @sconsenseavefile;
create_files($NC, $NB, $NS, $NTB, $NTS, $nboot, $msaname, \@averagesfile, \@consensefile, \@consensenhfile, \@sconsensefile, \@sconsensenhfile, \@sconsenseavefile);
run_boostrap($NC, $NB, $NS, $NTB, $NTS, $nboot, $msaname, \@averagesfile, \@consensefile, \@consensenhfile, \@sconsensefile, \@sconsensenhfile, \@sconsenseavefile);
wrapup($NC, $NT, \@averagesfile, \@consensenhfile);
system("rm $msaname\n"); # remove the tmp copy of the whole alignment


#########################################################
# routines
#########################################################
sub abl_from_nhtree {
    my ($phylip, $treenh) = @_;

    my $abl = 0.0;
    my $phyliptree;
    my $doesnotparse = 0;

    # if phylip cannot calculate times, it writes \s+nan, and treenh will 
    # end up empty
    if (!$treenh) { 
	print "phylip3.66 did not generate any tree\n"; }
    
    if ($phylip =~ /^dnaml$/ && !$treenh) {
	$doesnotparse = 1;
    }
    else {
    # the phylip tree has the first sequence as right from root node
    # my convention is first sequence left from root node.
    # this function reverses the order of the tree in  nh format
	reverse_tree(\$treenh);

	#phylip tree structure
	$phyliptree = Tree->new();
	my %sqhash = sq_hash_table($treenh);
	nh2tree(\%sqhash, $treenh, \$phyliptree, \$doesnotparse);
    }
    
    
    # there was something wrong here, do not parse this result
    if ($doesnotparse) {
	return;
    }

    # extract the average branch length
    $abl = tree_abl($phyliptree);    

    return $abl;
}

sub accumulate_averages {
    my ($val, $meanval_ref, $meansquareval_ref) = @_;

    $$meanval_ref       += $val;
    $$meansquareval_ref += $val*$val;
}

sub averages_of_averages {
    my ($NT, $averagesfile, $averages_ave_file) = @_;

    my $tave_alen = 0.;
    my $tstd_alen = 0.;
    my $tave_sqlg = 0.;
    my $tstd_sqlg = 0.;
    my $tave_id = 0.;
    my $tstd_id = 0.;
    my $tave_mut = 0.;
    my $tstd_mut = 0.;
    my $tave_indl = 0.;
    my $tstd_indl = 0.;
    my $tave_fo = 0.;
    my $tstd_fo = 0.;
    my $tave_time = 0.;
    my $tstd_time = 0.;
    my $tave_abl = 0.;
    my $tstd_abl = 0.;
    my $tave_alpha = 0.;
    my $tstd_alpha = 0.;
    my $tave_beta = 0.;
    my $tstd_beta = 0.;
    my $tave_lambda = 0.;
    my $tstd_lambda = 0.;
    my $tave_mu = 0.;
    my $tstd_mu = 0.;
    my $tave_ip = 0.;
    my $tstd_ip = 0.;
    my $tave_like = 0.;
    my $tstd_like = 0.;
    my $tave_ttr = 0.;
    my $tstd_ttr = 0.;
    my $tave_apb = 0.;
    my $tstd_apb = 0.;
    my $tave_frs = 0.;
    my $tstd_frs = 0.;
    my $tave_frc = 0.;
    my $tstd_frc = 0.;
    my $tave_bootstrap = 0.;
    my $tstd_bootstrap = 0.;

    my $nt = 0;

    open(FILE, "$averagesfile");
    while(<FILE>) {
	if (/^(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s+(\S+)\s+\S+\s*/) {
	    my $alen      = $1;
	    my $sqlg      = $2;
	    my $id        = $3;
	    my $mut       = $4;
	    my $indl      = $5;
	    my $fo        = $6;
	    my $time      = $7;
	    my $abl       = $8;
	    my $alpha     = $9;
	    my $beta      = $10;
	    my $lambda    = $11;
	    my $mu        = $12;
	    my $ip        = $13;
	    my $like      = $14;
	    my $ttr       = $15;
	    my $apb       = $16;
	    my $frs       = $17;
	    my $frc       = $18;
	    my $bootstrap = $19;
	    
	    if (0) {
		print "\naverage of averages\n";
		print "ave_alen $alen\n";
		print "ave_sqlg $sqlg\n";
		print "ave_id $id\n";
		print "ave_mut $mut\n";
		print "ave_indl $indl\n";
		print "ave_fo $fo\n";
		print "ave_time $time \n";
		print "ave_abl $abl \n";
		print "ave_alpha $alpha\n";
		print "ave_beta $beta\n";
		print "ave_lambda $lambda\n";
		print "ave_mu $mu\n";
		print "ave_ip $ip\n";
		print "ave_like $like\n";
		print "ave_ttr $ttr\n";
		print "ave_apb $apb\n";
		print "ave_frs $frs\n";
		print "ave_frc $frc\n";
		print "ave_boot $bootstrap\n";
	    }
	    
	    accumulate_averages($alen,      \$tave_alen,      \$tstd_alen);
	    accumulate_averages($sqlg,      \$tave_sqlg,      \$tstd_sqlg);
	    accumulate_averages($id,        \$tave_id,        \$tstd_id);
	    accumulate_averages($mut,       \$tave_mut,       \$tstd_mut);
	    accumulate_averages($indl,      \$tave_indl,      \$tstd_indl);
	    accumulate_averages($fo,        \$tave_fo,        \$tstd_fo);
	    accumulate_averages($time,      \$tave_time,      \$tstd_time);
	    accumulate_averages($abl,       \$tave_abl,       \$tstd_abl);
	    accumulate_averages($alpha,     \$tave_alpha,     \$tstd_alpha);
	    accumulate_averages($beta,      \$tave_beta,      \$tstd_beta);
	    accumulate_averages($lambda,    \$tave_lambda,    \$tstd_lambda);
	    accumulate_averages($mu,        \$tave_mu,        \$tstd_mu);
	    accumulate_averages($ip,        \$tave_ip,        \$tstd_ip);
	    accumulate_averages($like,      \$tave_like,      \$tstd_like);
	    accumulate_averages($ttr,       \$tave_ttr,       \$tstd_ttr);
	    accumulate_averages($apb,       \$tave_apb,       \$tstd_apb);
	    accumulate_averages($frs,       \$tave_frs,       \$tstd_frs);
	    accumulate_averages($frc,       \$tave_frc,       \$tstd_frc);
	    accumulate_averages($bootstrap, \$tave_bootstrap, \$tstd_bootstrap);
	    
	    $nt ++;
	}
    }
    close (FILE);
    
    if ($nt != $NT) { print "not all cases finished NT=$NT found $nt\n"; die; }
    
    calculate_averages(\$tave_alen,      \$tstd_alen,      $nt);
    calculate_averages(\$tave_sqlg,      \$tstd_sqlg,      $nt);
    calculate_averages(\$tave_id,        \$tstd_id,        $nt);
    calculate_averages(\$tave_mut,       \$tstd_mut,       $nt);
    calculate_averages(\$tave_indl,      \$tstd_indl,      $nt);
    calculate_averages(\$tave_fo,        \$tstd_fo,        $nt);
    calculate_averages(\$tave_time,      \$tstd_time,      $nt);
    calculate_averages(\$tave_abl,       \$tstd_abl,       $nt);
    calculate_averages(\$tave_alpha,     \$tstd_alpha,     $nt);
    calculate_averages(\$tave_beta,      \$tstd_beta,      $nt);
    calculate_averages(\$tave_lambda,    \$tstd_lambda,    $nt);
    calculate_averages(\$tave_mu,        \$tstd_mu,        $nt);
    calculate_averages(\$tave_ip,        \$tstd_ip,        $nt);
    calculate_averages(\$tave_like,      \$tstd_like,      $nt);
    calculate_averages(\$tave_ttr,       \$tstd_ttr,       $nt);
    calculate_averages(\$tave_apb,       \$tstd_apb,       $nt);
    calculate_averages(\$tave_frs,       \$tstd_frs,       $nt);
    calculate_averages(\$tave_frc,       \$tstd_frc,       $nt);
    calculate_averages(\$tave_bootstrap, \$tstd_bootstrap, $nt);

    write_to_average_averagesfile($averages_ave_file, 
				  $tave_alen, $tstd_alen, 
				  $tave_sqlg, $tstd_sqlg, 
				  $tave_id, $tstd_id, 
				  $tave_mut, $tstd_mut, 
				  $tave_indl, $tstd_indl, 
				  $tave_fo, $tstd_fo, 
				  $tave_time, $tstd_time, 
				  $tave_abl, $tstd_abl, 
				  $tave_alpha, $tstd_alpha, 
				  $tave_beta, $tstd_beta, 
				  $tave_lambda, $tstd_lambda, 
				  $tave_mu, $tstd_mu, 
				  $tave_ip, $tstd_ip, 
				  $tave_like, $tstd_like, 
				  $tave_ttr, $tstd_ttr, 
				  $tave_apb, $tstd_apb, 
				  $tave_frs, $tstd_frs, 
				  $tave_frc, $tstd_frc, 
				  $tave_bootstrap, $tstd_bootstrap);
    
}

sub analyze_seqboot {

    my ($which_phylip, $nboot, $msabootfile, $averages_file, $consense_file, $consensenh_file, $phylipmode) = @_;

    my $ave_time;
    my $std_time = 0.0; # cannot calculate this one
    my $ave_abl;
    my $std_abl;
    my $ave_alpha;
    my $std_alpha;
    my $ave_beta;
    my $std_beta;
    my $ave_lambda;
    my $std_lambda;
    my $ave_mu;
    my $std_mu;
    my $ave_ip;
    my $std_ip;
    my $ave_like;
    my $std_like;
    my $ave_ttr;
    my $std_ttr;
    my $ave_apb;
    my $std_apb;
    my $ave_frs;
    my $std_frs;
    my $ave_frc;
    my $std_frc;
    my $ave_bootstrap;
    my $std_bootstrap;

    my $tag = $which_phylip;
    if ($opt_o) {
	if ($phylipmode == 2) { $tag .= ".C"; } # dnaml-erate times + 4 parameters optimization
    }
    elsif ($opt_e || $opt_D) {
	if    ($opt_A) { $tag .= ".A"; } # dnaml-erate times optimization only a la dnaml
	elsif ($opt_B) { $tag .= ".B"; } # dnaml-erate times optimization only a la cg
	elsif ($opt_C) { $tag .= ".C"; } # dnaml-erate times + 4 parameters optimization
    }

    my $treebootfile = "$msaname.$tag.boot$nboot.treenh";

    run_phylip($which_phylip, $nboot, $msabootfile, $treebootfile, $phylipmode, 
	       \$ave_time, \$ave_abl, \$std_abl, 
	       \$ave_alpha, \$std_alpha, \$ave_beta, \$std_beta, 
	       \$ave_lambda, \$std_lambda, \$ave_mu, \$std_mu, 
	       \$ave_ip, \$std_ip, 
	       \$ave_like, \$std_like, \$ave_ttr, \$std_ttr, 
	       \$ave_apb, \$std_apb, \$ave_frs, \$std_frs, \$ave_frc, \$std_frc, $tag);

    consense($nboot, $treebootfile, $consense_file, $consensenh_file, \$ave_bootstrap, \$std_bootstrap, $tag);

    write_to_averagesfile($averages_file, 
			  $ave_time, $std_time, 
			  $ave_abl, $std_abl, 
			  $ave_alpha, $std_alpha, 
			  $ave_beta, $std_beta, 
			  $ave_lambda, $std_lambda, 
			  $ave_mu, $std_mu, 
			  $ave_ip, $std_ip, 
			  $ave_like, $std_like, 
			  $ave_ttr, $std_ttr, 
			  $ave_apb, $std_apb, 
			  $ave_frs, $std_frs, 
			  $ave_frc, $std_frc, 
			  $ave_bootstrap, $std_bootstrap);

    system("rm $treebootfile\n");
}

sub boostrap {
    my ($msaname, $nboot, $averages_file_ref, $consense_file_ref, $consensenh_file_ref) = @_;

    my $msabootfile  = "$msaname.boot$nboot.msa";

    system("rm outfile\n");
    system("rm outtree\n");

    my $otag;
    if    ($opt_e) { $otag = "dnaml-erate";     }
    elsif ($opt_D) { $otag = "dnaml-erate-dev"; }
    
    seqboot($nboot, $msaname, $msabootfile, $NC, $averages_file_ref);
    if ($which_phylip) {
	analyze_seqboot($which_phylip, $nboot, $msabootfile, $averages_file_ref->[0], $consense_file_ref->[0], $consensenh_file_ref->[0], 0); 
    }
    else {
	analyze_seqboot("dnaml", $nboot, $msabootfile, $averages_file_ref->[0], $consense_file_ref->[0], $consensenh_file_ref->[0], 0);
	analyze_seqboot($otag,   $nboot, $msabootfile, $averages_file_ref->[1], $consense_file_ref->[1], $consensenh_file_ref->[1], 1);
	analyze_seqboot($otag,   $nboot, $msabootfile, $averages_file_ref->[2], $consense_file_ref->[2], $consensenh_file_ref->[2], 2);
    }

    system("rm $msabootfile\n");
}

sub calculate_averages {
    my ($meanval_ref, $stdval_ref, $number) = @_;

    my $mean = $$meanval_ref;
    my $std  = $$stdval_ref;

    if ($number > 1) {
	$mean /= $number;
	
	$std -= $mean*$mean*$number;
	$std /= ($number-1);
	if ($std < 0. && $std> -0.00001) { $std = 0.0; }
	$std  = sqrt($std);
    }
    elsif ($number == 1) {
	$mean /= $number;
	$std   = 0.0;
    }
    else {
	$mean = 0.0;
	$std  = 0.0;
    }

    $$meanval_ref = $mean;
    $$stdval_ref  = $std;

}

sub consense {
    my ($nboot, $treefile, $consensefile, $consensetree, $ave_bootstrap_ref, $std_bootstrap_ref, $tag) = @_;

    my $ave_bootstrap = 0;
    my $std_bootstrap = 0;
    my $consenseinputfile = "$msaname.$tag.boot$nboot.consense.input";
    my $consensescreenoutfile = "$msaname.$tag.boot$nboot.consense.screenout";
    my $single_consensefile = "$msaname.$tag.boot$nboot.consense.file";
    my $single_consensetree = "$msaname.$tag.boot$nboot.consense.tree";

    # make consense inputfile
    open(IN, ">$consenseinputfile");
    print IN "$treefile\n";
    print IN "Y\n";
    close (IN);
    if ($verbose) {system ("more $consenseinputfile\n"); }

    # run CONSENSE
    # 
    my $cmd = "$consense < $consenseinputfile > $consensescreenoutfile";
    system("$cmd\n"); 
    system("mv outfile $single_consensefile\n");
    system("mv outtree $single_consensetree\n");

    # extract average boostrap
    extract_ave_bootstrap($single_consensefile, \$ave_bootstrap, \$std_bootstrap);

    system("cat $single_consensefile $consensefile >> $consensefile\n");
    system("cat $single_consensetree $consensetree >> $consensetree\n");
    system("rm $single_consensefile\n");
    system("rm $single_consensetree\n");
    system("rm $consenseinputfile\n");
    system("rm $consensescreenoutfile\n");

    # boostrap calculated as a fraction
    $$ave_bootstrap_ref = $ave_bootstrap;
    $$std_bootstrap_ref = $std_bootstrap;
}

sub consense_of_consenses {
    my ($NC, $NTB, $consensenh_pfile_ref, $consensenhfile_ref,
	$sconsense_file_ref, $sconsensenh_file_ref) = @_;

    for (my $c = 0; $c < $NC; $c ++) {
	
	my $tag = "";
	my $ave_bootstrap = 0.0;
	my $std_bootstrap = 0.0;
	my $sconsense_file = "$sconsense_file_ref->[$c]";
	my $sconsensenh_file = "$sconsensenh_file_ref->[$c]";

	consense($NTB, $consensenh_pfile_ref->[$c], $sconsense_file, $sconsensenh_file, \$ave_bootstrap, \$std_bootstrap, $tag);
	
    }
}

sub create_files {
    my ($NC, $NB, $NS, $NTB, $NTS, $nboot, $msaname, 
	$averages_file_ref, $consense_file_ref, $consensenh_file_ref, 
	$sconsense_file_ref, $sconsensenh_file_ref, $sconsenseave_file_ref) = @_; 
    
    my $ff = "NB$NB.TB$NTB.NS$NS.TS$NTS";

    if ($NC == 1) {
	my $tag = $which_phylip;
	if ($opt_e || $opt_D) {
	    if    ($opt_A) { $tag .= ".A"; } # dnaml-erate times optimization only a la dnaml
	    elsif ($opt_B) { $tag .= ".B"; } # dnaml-erate times optimization only a la cg
	    elsif ($opt_C) { $tag .= ".C"; } # dnaml-erate times + 4 parameters optimization
	}
	
	$averages_file_ref->[0]   = "$msaname.$tag.$ff.boot$nboot.averages";
	$consense_file_ref->[0]   = "$msaname.$tag.$ff.boot$nboot.consense";
	$consensenh_file_ref->[0] = "$msaname.$tag.$ff.boot$nboot.consense.nh";

	$sconsense_file_ref->[0]    = "$consense_file_ref->[0].consense";
	$sconsensenh_file_ref->[0]  = "$consense_file_ref->[0].consense.nh";
	$sconsenseave_file_ref->[0] = "$consense_file_ref->[0].consense.averages";
    }
    else {
	my $otag;
	if    ($opt_e) { $otag = "dnaml-erate";     }
	elsif ($opt_D) { $otag = "dnaml-erate-dev"; }

	$averages_file_ref->[0]   = "$msaname.dnaml.$ff.boot$nboot.averages";
	$consense_file_ref->[0]   = "$msaname.dnaml.$ff.boot$nboot.consense";
	$consensenh_file_ref->[0] = "$msaname.dnaml.$ff.boot$nboot.consense.nh";
	
	$sconsense_file_ref->[0]    = "$consense_file_ref->[0].consense";
	$sconsensenh_file_ref->[0]  = "$consense_file_ref->[0].consense.nh";
	$sconsenseave_file_ref->[0] = "$consense_file_ref->[0].consense.averages";

	$averages_file_ref->[1]   = "$msaname.$otag.$ff.boot$nboot.averages";
	$consense_file_ref->[1]   = "$msaname.$otag.$ff.boot$nboot.consense";
	$consensenh_file_ref->[1] = "$msaname.$otag.$ff.boot$nboot.consense.nh";
	
	$sconsense_file_ref->[1]    = "$consense_file_ref->[1].consense";
	$sconsensenh_file_ref->[1]  = "$consense_file_ref->[1].consense.nh";
	$sconsenseave_file_ref->[1] = "$consense_file_ref->[1].consense.averages";

	$averages_file_ref->[2]   = "$msaname.$otag.C.$ff.boot$nboot.averages";
	$consense_file_ref->[2]   = "$msaname.$otag.C.$ff.boot$nboot.consense";
	$consensenh_file_ref->[2] = "$msaname.$otag.C.$ff.boot$nboot.consense.nh";

	$sconsense_file_ref->[2]    = "$consense_file_ref->[2].consense";
	$sconsensenh_file_ref->[2]  = "$consense_file_ref->[2].consense.nh";
	$sconsenseave_file_ref->[2] = "$consense_file_ref->[2].consense.averages";
    }
    
    # we are going to append to this files, make sure they are empty
    for (my $nc = 0; $nc < $NC; $nc ++) {
	system("rm $averages_file_ref->[$nc]\n");
	system("rm $consense_file_ref->[$nc]\n");
	system("rm $consensenh_file_ref->[$nc]\n");

	system("rm $sconsense_file_ref->[$nc]\n");
	system("rm $sconsensenh_file_ref->[$nc]\n");
	system("rm $sconsenseave_file_ref->[$nc]\n");
    }
}

sub create_partial_msa_seqs {
    my ($nts, $NS, $msaname, $pmsaname_ref) = @_;

    my $ff = "NS$NS";
    my $pmsaname = "$msaname.TS$nts.$ff";
    my $pmsaname_statsfile = "$pmsaname\_stats";

    # sample NS sequences full length
    system("cp $msaname $pmsaname\n");
    msa_sample_seqs($NS, $pmsaname);

    #run stats
    system("$msastats $pmsaname>$pmsaname_statsfile\n");

    $$pmsaname_ref = $pmsaname;
}

sub create_partial_msa_blocks {
    my ($ntb, $NB, $msaname, $pmsaname_ref) = @_;

    if ($NB > 1.0) { print "bad fraction $NB\n"; die; }
    
    my $ff = "NB$NB";
    
    my $pmsaname = "$msaname.TB$ntb.$ff";
    my $pmsaname_statsfile = "$pmsaname\_stats";

    # sample a block of the alignment for all sequences
    system("cp $msaname $pmsaname\n");
    msa_sample_blocks($pmsaname, $NB);
        
    #run stats
    system("$msastats $pmsaname>$pmsaname_statsfile\n");

    $$pmsaname_ref = $pmsaname;
}

 sub decimal {
    my ($val_ref) =@_;

    my $val = $$val_ref;
    my $newval;
    my $root;
    my $power;
    my $tol = 0.000001;

    if ($val =~ /^(\S+)e-[0]+(\d+)$/) {
	$root = $1;
	$power = $2;

	while ($root >= 1) { $power --; $root /= 10.; }

	if ($root =~ /^0\.(\S+)/) { $newval = "0."; $root = $1; }
	else { print "decimal(): something went wrong val=$val newval=$newval root=$root power=$power\n"; die; }

	my $n = 0;
	while ($n++<$power) { $newval .= "0"; }

	$newval .= $root;
    }
    else {
	$newval = $val;
    }

    # check
    if (abs($val-$newval) > $tol){ 
	printf "decimal(): bad value newval %f val %f diff %f tol %f\n", $newval, $val, abs($val-$newval), $tol; 
	die; 
    }
    
    $val = $newval;
}

sub extract_ave_abl {
     my ($phylip, $nsample, $phyliptreefile, $ave_abl_ref, $std_abl_ref) = @_;

     my $abl;
     my $nali = 0;

     if ($verbose) { system("more $phyliptreefile\n"); }

     my $treenh = "";
     open(FILE, "$phyliptreefile");
     while(<FILE>) {
	 if (/^\s+$/) { next; }

	 elsif (/^(\S+\);)$/) { 
	     $nali ++;
	     $treenh .= "$1"; 

	     # calculate abl for this tree
	     $abl = abl_from_nhtree($phylip, $treenh);
	     accumulate_averages($abl, $ave_abl_ref, $std_abl_ref);
	     print "\n$treenh\nabl $abl ave $$ave_abl_ref std $$std_abl_ref\n";
	     $treenh = "";
	 }

	 elsif (/^(\S+)$/) { $treenh .= "$1"; }

	 else  { $treenh = ""; last; }
     }
     close (FILE);

    if ($nali != $nsample) {
	print "extract_ave_abl(): there should be $nsample alignments not $nali\n"; die; 
    }

    calculate_averages($ave_abl_ref, $std_abl_ref, $nali); 
}

sub extract_ave_bootstrap {
    my ($consensefile, $ave_bootstrap_ref, $std_bootstrap_ref) = @_;

    my $nb = 0;
    my $nali = 0;
    my $nboot;
    my $bootstrap;
    
    open(FILE, "$consensefile");
    while(<FILE>) {
	if (/^Sets included in the consensus tree/) {
	    $nali ++;
	}
	elsif (/How many times out of\s+(\S+)/) {
	    $nboot = $1;
	}
	elsif (/^[\.\*]+\s+(\d+\.00)/ || /^[\.\*\s]+\s+(\d+\.00)/) {
	    $nb ++;
	    $bootstrap = $1;
	    if ($nboot > 0) { $bootstrap /= $nboot; }
	    print "boot $bootstrap\n";
	    if ($bootstrap > 1.0) { print "extract_ave_bootstrap(): error bootstrap = $bootstrap\n"; die; }
	    accumulate_averages($bootstrap, $ave_bootstrap_ref, $std_bootstrap_ref);
	}
	elsif (/^Sets NOT included in consensus tree/) {
	    last;
	}
    }
    close (FILE);
    
    if ($nali != 1) {
	print "extract_ave_bootstrap(): there should be 1 alignments not $nali\n"; die; 
    }
    calculate_averages($ave_bootstrap_ref, $std_bootstrap_ref, $nb); 
}

sub extract_ave_param {
    my ($phylip, $nsample, $phylipoutputfile, 
	$ave_alpha_ref, $std_alpha_ref, $ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, $ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, $ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref, $ave_frs_ref, $std_frs_ref, 
	$ave_frc_ref, $std_frc_ref) = @_;

    my $ave_alpha  = 0.0;
    my $std_alpha  = 0.0;
    my $ave_beta   = 0.0;
    my $std_beta   = 0.0;
    my $ave_lambda = 0.0;
    my $std_lambda = 0.0;
    my $ave_mu     = 0.0;
    my $std_mu     = 0.0;
    my $ave_ip     = 0.0;
    my $std_ip     = 0.0;
    my $ave_like   = 0.0;
    my $std_like   = 0.0;
    my $ave_ttr    = 0.0;
    my $std_ttr    = 0.0;
    my $ave_apb    = 0.0;
    my $std_apb    = 0.0;
    my $ave_frs    = 0.0;
    my $std_frs    = 0.0;
    my $ave_frc    = 0.0;
    my $std_frc    = 0.0;

    if    ($phylip =~ /^dnaml$/) {
	parse_mphylip_dnaml($nsample, $phylipoutputfile, 
			    \$ave_alpha, \$std_alpha, 
			    \$ave_beta, \$std_beta, 
			    \$ave_lambda, \$std_lambda, 
			    \$ave_mu, \$std_mu, 
			    \$ave_ip, \$std_ip, 
			    \$ave_like, \$std_like, 
			    \$ave_ttr, \$std_ttr, 
			    \$ave_apb, \$std_apb, 
			    \$ave_frs, \$std_frs, 
			    \$ave_frc, \$std_frc);
    }
    elsif ($phylip =~ /^dnaml-erate$/  || $phylip =~ /^dnaml-erate-dev$/) {
	parse_mphylip_dnaml_erate($nsample, $phylipoutputfile, 			    
				  \$ave_alpha, \$std_alpha, 
				  \$ave_beta, \$std_beta, 
				  \$ave_lambda, \$std_lambda, 
				  \$ave_mu, \$std_mu, 
				  \$ave_ip, \$std_ip, 
				  \$ave_like, \$std_like, 
				  \$ave_ttr, \$std_ttr, 
				  \$ave_apb, \$std_apb, 
				  \$ave_frs, \$std_frs, 
				  \$ave_frc, \$std_frc);
    }
    else {
	print "extract_ave_param(): which phylyp is this? $phylip\n"; die;
    }

    $$ave_alpha_ref  = $ave_alpha;
    $$std_alpha_ref  = $std_alpha;
    $$ave_beta_ref   = $ave_beta;
    $$std_beta_ref   = $std_beta;
    $$ave_lambda_ref = $ave_lambda;
    $$std_lambda_ref = $std_lambda;
    $$ave_mu_ref     = $ave_mu;
    $$std_mu_ref     = $std_mu;
    $$ave_ip_ref     = $ave_ip;
    $$std_ip_ref     = $std_ip;
    $$ave_like_ref   = $ave_like;
    $$std_like_ref   = $std_like;
    $$ave_ttr_ref    = $ave_ttr;
    $$std_ttr_ref    = $std_ttr;
    $$ave_apb_ref    = $ave_apb;
    $$std_apb_ref    = $std_apb;
    $$ave_frs_ref    = $ave_frs;
    $$std_frs_ref    = $std_frs;
    $$ave_frc_ref    = $ave_frc;
    $$std_frc_ref    = $std_frc;
}

sub extract_ave_time {

    my ($timefile) = @_;

    my $ave_time = -1;

    if ($verbose) { system("more $timefile\n"); }

    open(FILE, $timefile);
    while(<FILE>) {
	if (/^real\s+(\S+)/) {
	    $ave_time = $1;
	}
    }
    close(FILE);
    if ($verbose) { print "ave_time $ave_time\n"; }

   if ($ave_time < 0) { print "did not get the right runtime $ave_time secs\n"; die; }

    return $ave_time;
}

sub fill_histo_array {
    my ($val, $len, $N, $k, $his_ref) = @_;
    my $dim = $N * $k;
    
    if ($len >=  $N) { $his_ref->[$dim] += 1; return; }

    for (my $i=0; $i<=$dim; $i++) { 
	if ( $i/$k <= $len && $len < ($i+1)/$k) { 
	    $his_ref->[$i] += $val; 
	    last; 
	} 
    }
}

sub gnuplot_histo {

    my ($filehisto, $title, $xlabel, $key) = @_;

    my $outplot = "$filehisto.ps";
    my $ylabel;

    open(GP,'|'.GNUPLOT) || die "Gnuplot: $!";

    print GP "set terminal postscript color solid 14\n";
    print GP "set style line 1 lt 1 lw 4\n";
    print GP "set style line 2 lt 2 lw 4\n";
    print GP "set style line 3  lt 3 lw 4\n";
    print GP "set style line 4 lt 4 lw 4\n";+
    print GP "set style line 5 lt 5 lw 4\n";
    print GP "set style line 6 lt 6 lw 4\n";
    print GP "set style line 7 lt 7 lw 4\n";
    print GP "set style line 8 lt 8 lw 4\n";
    print GP "set style line 9 lt 9 lw 4\n";
    print GP "set style line 11 lt 1 lw 2 pt 1 ps 1.2\n";
    print GP "set style line 22 lt 2 lw 2 pt 2 ps 1.2\n";
    print GP "set style line 33 lt 3 lw 2 pt 3 ps 1.2\n";
    print GP "set style line 44 lt 4 lw 2 pt 4 ps 1.2\n";
    print GP "set style line 55 lt 5 lw 2 pt 5 ps 1.2\n";
    print GP "set style line 66 lt 6 lw 2 pt 6 ps 1.2\n";
    print GP "set style line 77 lt 7 lw 2 pt 7 ps 1.2\n";
    print GP "set style line 88 lt 8 lw 2 pt 8 ps 1.2\n";
    print GP "set style line 99 lt 9 lw 2 pt 9 ps 1.2\n";

    print GP "set output '$outplot'\n";
    #print GP "set nokey\n";
    print GP "set xlabel '$xlabel'\n";

    print GP "set title \"$title\\n\\n$key\"\n";

    $ylabel = "\# CASES";
    print GP "set ylabel '$ylabel'\n";
    print GP "plot '$filehisto' using 1:2  with boxes title '$key' ls 1\n";

    close (GP);

    if ($seeplots) { system ("ggv -landscape -magstep -2 $outplot&\n"); }

}

sub init_histo_array {
    my ($N, $k, $his_ref) = @_;
    my $dim = $N * $k;
    for (my $i=0; $i<=$dim; $i++) { $his_ref->[$i] = 0; }    
}

sub msa_sample_seqs {

    my ($NS, $msaname) = @_;

    my @msa;
    my @name;
    my $nseq;
    my $alen;

    my @new_msa;
    my @new_name;
    my $new_nseq;
    my $new_alen;

    parse_msa ($msaname, \$nseq, \$alen, \@msa, \@name);
    $new_alen = $alen;

    if ($NS == 0) { $new_nseq = $nseq; }
    else          { $new_nseq = ($nseq > $NS)? $NS : $nseq; }

    print "MSA nseq $nseq alen $alen\n";

    my @seq;
    for (my $n = 0; $n < $nseq; $n ++) {
	$seq[$n] = $n;
    }

    if ($new_nseq == $nseq) {
	for (my $n = 0; $n < $new_nseq; $n ++) {
	    $new_msa[$n]  = $msa[$n];
	    $new_name[$n] = $name[$n];
	}
    }
    else {
	for (my $n = 0; $n < $new_nseq; $n ++) {
	    my $sq = int(rand()*$nseq);

	    $new_name[$n] = $name[$seq[$sq]];
	    $new_msa[$n]  = $msa[$seq[$sq]];

	    print "SAMPLE $seq[$sq]\n";
	    # sample without replacement
	    for (my $s = 0; $s < $nseq; $s ++) {
		if ($seq[$s] == $seq[$sq]) { 
		    for (my $x = $s; $x < $nseq-1; $x ++) { 
			$seq[$x] =  $seq[$x+1];
		    }
		    last;
		}
	    }
	    $nseq --;
	    	    
	}
    }

    for (my $n = 0; $n < $new_nseq; $n ++) {
	# dnaml does not accept these character as names
	$new_name[$n] =~ s/\(/x/g; 
	$new_name[$n] =~ s/\)/x/g; 
	$new_name[$n] =~ s/\:/x/g; 
	$new_name[$n] =~ s/\;/x/g; 
	$new_name[$n] =~ s/\,/x/g; 
	$new_name[$n] =~ s/\[/x/g; 
	$new_name[$n] =~ s/\]/x/g; 
    }
    
    write_msa_to_file($msaname, $new_nseq, \@new_msa, \@new_name);
    remove_common_gaps($msaname);   
}

sub msa_sample_blocks {

    my ($msaname, $fraqsize) = @_;

    my @msa;
    my @name;
    my $nseq;
    my $alen;

    my @new_msa;
    my @new_name;
    my $new_nseq;
    my $new_alen;

    my $npos;
    my $start;
    my $before;
    my $after;

    parse_msa ($msaname, \$nseq, \$alen, \@msa, \@name);
    $new_alen = int($fraqsize * $alen);
    $new_nseq = $nseq;

    for (my $n = 0; $n < $new_nseq; $n ++) {
	$new_name[$n] = $name[$n];
    }
    
    if ($new_alen == $alen) {
	for (my $n = 0; $n < $new_nseq; $n ++) {
	    $new_msa[$n]  = $msa[$n];
	}
    }
    else {
	$npos = $alen - $new_alen + 1;
	$start = int(rand()*$npos);

	$before = $start;
	$after = $alen - ($start + $new_alen);

	for (my $n = 0; $n < $new_nseq; $n ++) {
	    $msa[$n] =~ s/^\S{$before}(\S+)\S{$after}$//; 
	    $new_msa[$n] = $1;

	    if (length($new_msa[$n]) != $new_alen) { print "msa_sample_blocks(): bad len\nn"; die; }
	}
    }
    
    write_msa_to_file($msaname, $new_nseq, \@new_msa, \@new_name);
}

sub multiple_msa_stats {
    my ($msabootfile, $NC, $averages_file_ref) = @_;

    my $msabootfile_stats = "$msabootfile.stats";

    my $ave_alen   = 0.0;
    my $std_alen   = 0.0;
    my $ave_sqlg   = 0.0;
    my $std_sqlg   = 0.0;

    my $ave_id   = 0.0;
    my $std_id   = 0.0;
    my $ave_mut  = 0.0;
    my $std_mut  = 0.0;
    my $ave_indl = 0.0;
    my $std_indl = 0.0;
    my $ave_fo   = 0.0;
    my $std_fo   = 0.0;

    system("$msastats $msabootfile> $msabootfile_stats\n");

    parse_msastats($msabootfile_stats, 
		   \$ave_alen, \$std_alen, \$ave_sqlg, \$std_sqlg, 
		   \$ave_id, \$std_id, \$ave_mut, \$std_mut, 
		   \$ave_indl, \$std_indl, \$ave_fo, \$std_fo);

    if ($ave_alen == 0) {
	print "\nAlignment of length zero?\n";
	system("more $msabootfile\n");
	print "STATS\n";
	system("more $msabootfile_stats\n");
	die;
    }

    for (my $c = 0; $c < $NC; $c ++) {
	write_msastats_to_averagesfile($averages_file_ref->[$c], 
				       $ave_alen, $std_alen, $ave_sqlg, $std_sqlg,
				       $ave_id, $std_id, $ave_mut, $std_mut, 
				       $ave_indl, $std_indl, $ave_fo, $std_fo);
    }

    system("rm $msabootfile_stats\n");
}

# In Phylip format the names of the
# sequences have to be at most of length 10.
# Here we truncate names if necesary, and
# make sure that all names are different at that length
sub name_normalize {
    my ($nseq, $name_ref) = @_;

    my $maxlen = 10;
    
    if ($verbose) {
	for (my $n = 0; $n < $nseq; $n ++) {
	    print "OLD: $name_ref->[$n]\n";
	}
    }
    
    # Make names of length maxlen (10) either 
    # by truncating it, or by adding spaces
    for (my $n = 0; $n < $nseq; $n ++) {
	
	# truncate longer names
	if ($name_ref->[$n] =~ /^(.{$maxlen})/) {
	    $name_ref->[$n] = $1;
	}
	
	# add spaces for shorter names
	while (length($name_ref->[$n]) < $maxlen) {
	    $name_ref->[$n] .= " ";
	}
    }
    
    # After truncation, some names might be identical.
    # Change identical names by adding a counter.
    for (my $n = 0; $n < $nseq; $n ++) {
	my $c = 1;
	for (my $m = $n+1; $m < $nseq; $m ++) {
	    if ($name_ref->[$n] eq $name_ref->[$m]) {

		# change the first name
		if ($c == 1) {
		    # remove last two charecters and add the counter tag
		    $name_ref->[$n] =~ s/.{2}$//;
		    $name_ref->[$n] .= ".$c";
		}
		
		# first remove spaces at the end
		$name_ref->[$m] =~ s/ //g;
		
		# remove last two charecters and add the counter tag
		$c ++;
		$name_ref->[$m] =~ s/.{2}$//;
		$name_ref->[$m] .= ".$c";  
		
		# truncate or add spaces again
		if ($name_ref->[$m] =~ /^(.{$maxlen})/){
		    $name_ref->[$m] = $1;
		}		
		while (length($name_ref->[$m]) < $maxlen) {
		    $name_ref->[$m] .= " ";
		}
	    }
	}
    }  
    if ($verbose) {
	for (my $n = 0; $n < $nseq; $n ++) {
	    print "NEW: $name_ref->[$n]\n";
	}
    }
  
}

sub nh2tree {
    my ($hash_ref, $nhtree, $tree_ref, $doesnotparse_ref) = @_;

    my $tree         = $$tree_ref;
    my $doesnotparse = 0;

    my $N;                # number of ',' in the nh notation plus one
    my $nodestart;        # 
    my $dim;              # number of internal nodes 
    my $ncd = 0;          # counter for number of nodes+leaves in the tree 
    my $nsq = 0;          # counter for number of leaves in the tree 
    my $nnd = 0;          # counter for number of internal nodes
 
    my @stack;
    my @blen;
    my $name;
    my $time;
    my $node;
    my $lnode;
    my $rnode;
    my $pnode;
    my $otaxon;

    # number of leaves of the tree 
    $N = ( $nhtree =~ tr/,/,/ ) + 1;
    $tree->{"Tree::ntaxa"} = $N;

    # if the number of parenthesis+1 = number of leaves  -->  tree is of the form (a,b)   --> start with node N   (root)
    # if the number of parenthesis+2 = number of leaves  -->  tree is of the form (a,b,c) --> start with node N+1 (next to root)
    if    ($N == ( $nhtree =~ tr/\(/\(/ )+1) { $nodestart = $N; }
    elsif ($N == ( $nhtree =~ tr/\(/\(/ )+2) { $nodestart = $N+1; }

    if ($N == 1) { $dim = $N;     }
    else         { $dim = $N - 1; }

    my $string = $nhtree;

    # Counter is:
    #  0...N-1  leaves
    #  N...2N-2 nodes (N is the root)
    #
    # in the structure I follow easels' conventions
    #
    #  0,1,...,N-1     leaves (in taxaparent)
    #  0,-1,...,-(N-1) leaves 
    #  0,...,N-2       nodes (0 is the root)
    
    while($string) {
	if (0) {
	    print "STRING=$string\n";
	    print "stack=@stack\n\n";
	}

	if ($string =~ /^\((.+)/) {
	    $string = $1;
	    # push node on stack 
	    push(@stack, $nodestart+$nnd);
	    $nnd ++;
	    $ncd ++;	
	}
	elsif ($string =~ /^([^\(\)\,\:]+)\:(.+)/) {
	    $name = $1;
	    $string = $2;
	    # push leaf on stack 
	    push(@stack, $hash_ref->{$name});
 
	    $nsq ++;
            $ncd ++;
  	}
	elsif ($string =~ /^([\d\.e\-]+)(.+)/) {
	    $time   = $1;
	    decimal(\$time);
	    $string = $2;
	    # add branch length for node and put back on the stack 
	    $node = pop(@stack);
	    $blen[$node] = $time;
	    push(@stack, $node);
 	}
	elsif ($string =~ /^\)\;$/) {
	    # end case;
	    # have to deal differently whether the tree is of the general form (a,b) or (a,b,c)
	   
	    my $len = $#stack+1;
	    if ($len == 2) { # tree with one leaf only
		if ($N != 1) { print "this tree should have one taxon only\n"; die; }
		$lnode = pop(@stack);
		$pnode = pop(@stack);
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? $lnode : $lnode-$N;    # set left  node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode];                           # set left node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if a leaf, fill array taxaparent
	    }

	    elsif ($len == 3) { # tree of the form (a,b)
		$rnode = pop(@stack);
		$lnode = pop(@stack);
		$pnode = pop(@stack);
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? -$lnode : $lnode-$N;   # set left  node of root
		${$tree->right}[$pnode-$N] = ($rnode < $N)? -$rnode : $rnode-$N;   # set right node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode];                           # set left  node branch length
		${$tree->rd}[$pnode-$N] = $blen[$rnode];                           # set right node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if left  is a internal node, add the parent
		if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N;     } # if rigth is a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if left  is a leaf, fill array taxaparent
		if ($rnode <  $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; } # if rigth is a leaf, fill array taxaparent
	    }

	    elsif ($len == 4) { # tree of the form (a:ta,b:tb,c:tc) --> ((a:ta,b:tb):tc/2,c:tc/2)
		$rnode  = pop(@stack);
		$lnode  = pop(@stack);
		$otaxon = pop(@stack); if ($otaxon != 0) { print "bad zero taxon $otaxon\n"; die; }
		$pnode  = pop(@stack);
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? -$lnode : $lnode-$N;   # set left  node of root
		${$tree->right}[$pnode-$N] = ($rnode < $N)? -$rnode : $rnode-$N;   # set right node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode];                           # set left  node branch length
		${$tree->rd}[$pnode-$N] = $blen[$rnode];                           # set right node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if left  is a internal node, add the parent
		if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N;     } # if rigth is a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if left  is a leaf, fill array taxaparent
		if ($rnode <  $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; } # if rigth is a leaf, fill array taxaparent

		# now add the last node, the root
		$nnd ++;
		$ncd ++;	
		$lnode = $otaxon; # the convenction is to put the 0 taxon to the left of the root
		$rnode = $pnode;
		$pnode = $pnode-1; if ($pnode != $N) { print "bad root node=$node\n"; die; }
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? -$lnode : $lnode-$N;   # set left  node of root
		${$tree->right}[$pnode-$N] = ($rnode < $N)? -$rnode : $rnode-$N;   # set right node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode]/2.0;                       # set left  node branch length
		${$tree->rd}[$pnode-$N] = $blen[$lnode]/2.0;                       # set right node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if left  is a internal node, add the parent
		if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N;     } # if rigth is a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if left  is a leaf, fill array taxaparent
		if ($rnode <  $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; } # if rigth is a leaf, fill array taxaparent
		
	    }
	    
	    else { print "nh2tree not parse right\n"; $doesnotparse = 1;}
	    
	    # set parent for root to itself
	    if ($pnode != $N) { print "bad root node=$node\n"; die; }
	    ${$tree->parent}[$pnode-$N] = $pnode-$N;
	    
 	    undef($string);

	}
	elsif ($string =~ /^\)(.+)/) {
            #create a node 
	    $string = $1;
	    $rnode = pop(@stack);
	    $lnode = pop(@stack);
	    $pnode = pop(@stack);
	    
	    if ($pnode < $N) { print "bad tree pnode=$pnode\n"; die; }
	    
	    # if a internal node, add the parent
	    if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N; }
	    if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N; }
	    
	    # if a leaf, fill array taxaparent
	    if ($lnode < $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; }
	    if ($rnode < $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; }

            # identify left and right nodes
	    if ($lnode <  $N) { ${$tree->left}[$pnode-$N]  = -$lnode;   }
	    if ($rnode <  $N) { ${$tree->right}[$pnode-$N] = -$rnode;   }
	    if ($lnode >= $N) { ${$tree->left}[$pnode-$N]  = $lnode-$N; }
	    if ($rnode >= $N) { ${$tree->right}[$pnode-$N] = $rnode-$N; }
	    
	    # branch lengths of left and right nodes.
	    ${$tree->ld}[$pnode-$N] = $blen[$lnode];
	    ${$tree->rd}[$pnode-$N] = $blen[$rnode]; 
	    
	    # put node back parent node in the stack 
	    push(@stack, $pnode);
 	}
	elsif ($string =~ /^\,(.+)/) {
	    $string = $1;
	}
	elsif ($string =~ /^\:(.+)/) {
	    $string = $1;
	}
	else {
	    print "bad tree parsing sring=$string\n"; die;
	}
	
    }

    if ($nsq != $N)     { print "tree reading failed. N $N nsq $nsq\n"; die; }
    if ($nnd != $dim)   { print "tree reading failed. dim $dim nintnodes $nnd\n"; die; }
    if ($ncd != $N+$dim){ print "tree reading failed. nnodes ", $N+$dim, " found $ncd\n"; die; }

    # reorder the nodes to be able to compare with
    # the original tree
    tree_renumber_nodes(\$tree);

    if ($verbose) {     
	printf "\nnhtree %s\n", $nhtree;
	print_tree($tree); 
    }

    $$tree_ref         = $tree;
    $$doesnotparse_ref = $doesnotparse;
}

sub parse_msa {
    my ($msafile, $nseq_ref, $alen_ref, $msa_ref, $name_ref) = @_;

    my $nseq;
    my $alen;
    my $n = 0;

    my $isfirst = 0;

    open(MSA, "$msafile");
    while(<MSA>) {
	if (/\s*(\d+)\s+(\d+)/) {
	    $nseq = $1;
	    $alen = $2;
	    
	    #initialize
	    $isfirst = 1;
	    for (my $s = 0; $s < $nseq; $s ++) { $msa_ref->[$s] = ""; }
	}
	elsif ($isfirst && /^\S+/) {

		if (/^\s*(\S+)\s+(.+)/) {
		    $name_ref->[$n] = $1; $msa_ref->[$n++] .= $2;  
		}
		else {
		    if (/^(.{10})(.+)$/) { $name_ref->[$n] = $1; $msa_ref->[$n++] .= $2; }
		}
	}
	elsif (/^\s+$/) {
	    if ($n != $nseq) { print "bad alignment\n"; die; }
	    $n = 0;
	    $isfirst = 0;
	}
	elsif (/\s*\S+/) {
	    $msa_ref->[$n++] .= $_;
	}
    }
    close (MSA);
    
    if ($verbose) { print "MSA: nseq $nseq alen $alen\n"; }

    #Remove spaces and end-of-lines from alignemt if nay
    for (my $s = 0; $s < $nseq; $s ++) { 
	$msa_ref->[$s] =~ s/ //g;
	$msa_ref->[$s] =~ s/\n//g;
    }
    #Check the alignment is complete
    for (my $s = 0; $s < $nseq; $s ++) { if (length($msa_ref->[$s]) != $alen) { print "bad alignment\n"; die;} }
    
    $$nseq_ref = $nseq;
    $$alen_ref = $alen;
}

sub parse_msastats {
    my ($statsfile, 
	$ave_alen_ref, $std_alen_ref, $ave_sqlg_ref, $std_sqlg_ref, 
	$ave_id_ref, $std_id_ref, $ave_mut_ref, $std_mut_ref, 
	$ave_indl_ref, $std_indl_ref, $ave_fo_ref, $std_fo_ref) = @_;

    open(FILE, "$statsfile");
    while(<FILE>) {
	if (/Len alignment:\s+(\S+)/) {
	    $$ave_alen_ref = $1;
	    $$std_alen_ref = 0.0;
	}
	elsif (/seqs geometric mean:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_sqlg_ref = $1;
	    $$std_sqlg_ref = $2;
	}
	elsif (/pairwise ID:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_id_ref = $1;
	    $$std_id_ref = $2;
	}
	elsif (/pairwise MUT:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_mut_ref = $1;
	    $$std_mut_ref = $2;
	}
	elsif (/pairwise INDEL:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_indl_ref = $1;
	    $$std_indl_ref = $2;
	}
	elsif (/indel freq:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_fo_ref = $1;
	    $$std_fo_ref = $2;
	}
	
    }
    close(FILE);
}

sub parse_mphylip_dnaml {
    my ($nsample, $phylipoutfile,  			    
	$ave_alpha_ref, $std_alpha_ref, 
	$ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, 
	$ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, 
	$ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref, 
	$ave_frs_ref, $std_frs_ref, 
	$ave_frc_ref, $std_frc_ref) = @_;
    
    my $frqa;
    my $frqc;
    my $frqg;
    my $frqt;
    my $frqo = 0.0;

    my $alp;
    my $bet;
    my $ins = 0.0;
    my $del = 0.0;
    my $ip  = 0.0;
    
    my $like;
    my $ttr;
    my $apb  = 1.0;
    
    my $sum;
    
    my $frc;
    my $frs;

    my $frqr;
    my $frqy;
    my $frqar;
    my $frqcy;
    my $frqgr;
    my $frqty;

    my $aa;
    my $bb;

    my $nali = 0;
    open(FILE, $phylipoutfile);
    while(<FILE>) {
	if (/^\s+A\s+(\S+)/) {
	    $frqa = $1;
	}
	elsif (/^\s+C\s+(\S+)/) {
	    $frqc = $1;
	}
	elsif (/^\s+G\s+(\S+)/) {
	    $frqg = $1;
	}
	elsif (/^\s+T\(U\)\s+(\S+)/) {
	    $frqt = $1;
	}
	elsif (/^Transition\/transversion ratio\s+=\s+(\S+)/) {
	    $ttr = $1;
	}
	elsif (/^Ln Likelihood\s+=\s+(\S+)/) {
	    $nali ++;
	    $like = $1;

	    # add this run to the averages
	    if (!$like) {
		print "dnaml did not finish properly\n"; die; 
	    }
	    
	    # normalize frequencies
	    $sum = $frqa +  $frqc +  $frqg +  $frqt;
	    if ($sum > 0) {
		$frqa /= $sum;
		$frqc /= $sum;
		$frqg /= $sum;
		$frqt /= $sum;
	    }
	    
	    # calculate alp bet ins del
	    $frqr  = $frqa + $frqg;
	    $frqy  = $frqc + $frqt;
	    $frqar = $frqa / $frqr;
	    $frqcy = $frqc / $frqy;
	    $frqgr = $frqg / $frqr;
	    $frqty = $frqt / $frqy;
	    
	    $aa = $ttr * $frqr * $frqy - $frqa * $frqg - $frqc * $frqt;
	    $bb = $frqa * $frqgr + $frqc * $frqty;
	    $alp = $aa / ($aa + $bb);
	    $bet = 1.0 - $alp;
	    
	    $frs = 2.0 * $alp * $bb + 
		$bet * (1.0 - $frqa*$frqa - $frqc*$frqc - $frqg*$frqg - $frqt*$frqt);

	    $frc = $frs;

	    accumulate_averages($alp, $ave_alpha_ref,  $std_alpha_ref);
	    accumulate_averages($bet, $ave_beta_ref,   $std_beta_ref);
	    accumulate_averages($ins, $ave_lambda_ref, $std_lambda_ref);
	    accumulate_averages($del, $ave_mu_ref,     $std_mu_ref);
	    accumulate_averages($ip,  $ave_ip_ref,     $std_ip_ref);
	    
	    accumulate_averages($like, $ave_like_ref, $std_like_ref);
	    accumulate_averages($ttr,  $ave_ttr_ref,  $std_ttr_ref);
	    accumulate_averages($apb,  $ave_apb_ref,  $std_apb_ref);   
	    accumulate_averages($frs,  $ave_frs_ref,  $std_frs_ref);   
	    accumulate_averages($frc,  $ave_frc_ref,  $std_frc_ref);   
	}
    }
    close(FILE);
    
    if ($nali != $nsample) {
	print "parse_mphylip_dnaml(): there should be $nboot alingments not $nali\n"; die; 
    }

    calculate_averages($ave_alpha_ref,  $std_alpha_ref,  $nali);
    calculate_averages($ave_beta_ref,   $std_beta_ref,   $nali);
    calculate_averages($ave_lambda_ref, $std_lambda_ref, $nali);
    calculate_averages($ave_mu_ref,     $std_mu_ref,     $nali);
    calculate_averages($ave_ip_ref,     $std_ip_ref,     $nali);

    calculate_averages($ave_like_ref, $std_like_ref, $nali);
    calculate_averages($ave_ttr_ref,  $std_ttr_ref,  $nali);
    calculate_averages($ave_apb_ref,  $std_apb_ref,  $nali);
    calculate_averages($ave_frs_ref,  $std_frs_ref,  $nali);
    calculate_averages($ave_frc_ref,  $std_frc_ref,  $nali);
   
}

sub parse_mphylip_dnaml_erate {
    my ($nsample, $phylipoutfile,  			    
	$ave_alpha_ref, $std_alpha_ref, 
	$ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, 
	$ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, 
	$ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref, 
	$ave_frs_ref, $std_frs_ref, 
	$ave_frc_ref, $std_frc_ref) = @_;

    my $frqa  = -1;
    my $frqc  = -1;
    my $frqg  = -1;
    my $frqt  = -1;
    my $frqo  = -1;

    my $alp  = -1;
    my $bet  = -1;
    my $ins  = -1;
    my $del  = -1;
    my $ip   = -1;

    my $ttr  = -1;
    my $apb  = -1;

    my $frc  = -1;
    my $frs  = -1;

    my $like = -1;

    my $nali = 0;
    open(FILE, $phylipoutfile);
    while(<FILE>) {
	if (/^alpha\s+=\s+(\S+)/) {
	    $alp = $1;
	}
	elsif (/^beta\s+=\s+(\S+)/) {
	    $bet = $1;
	}
	elsif (/^Indel prior\s+=\s+(\S+)/) {
	    $ip = $1;
	}
	elsif (/^Insertions rate\s+=\s+(\S+)/) {
	    $ins = $1;
	}
	elsif (/^Deletions rate\s+=\s+(\S+)/) {
	    $del = $1;
	}
	elsif (/^\s+A\s+(\S+)/) {
	    $frqa = $1;
	}
	elsif (/^\s+C\s+(\S+)/) {
	    $frqc = $1;
	}
	elsif (/^\s+G\s+(\S+)/) {
	    $frqg = $1;
	}
	elsif (/^\s+T\(U\)\s+(\S+)/) {
	    $frqt = $1;
	}
	elsif (/^\s+\-\s+(\S+)/) {
	    $frqo = $1;
	}
	elsif (/^Transition\/transversion ratio\s+=\s+(\S+)/) {
	    $ttr = $1;
	}
	elsif (/^Average rate of subtitutions\s+=\s+(\S+)/) {
	    $frs = $1;
	}
	elsif (/^Average rate of changes\s+=\s+(\S+)/) {
	    $frc = $1;
	}
	elsif (/^Ln Likelihood\s+=\s+(\S+)/) {
	    $like = $1;
	    $nali ++;

	    # add this run to the averages
	    if (!$alp || !$bet || !$ins || !$del) {
		print "parse_mphylip_dnaml_erate(): cannot parse all the parameters from output\n"; die; 
	    }
	    if (!$like) {
		print "parse_mphylip_dnaml_erate(): cannot parse likelihood from output\n"; die; 
	    }
	    
	    # normalize frequencies
	    my $sum = $frqa +  $frqc +  $frqg +  $frqt;
	    if ($sum > 0) {
		$frqa /= $sum;
		$frqc /= $sum;
		$frqg /= $sum;
		$frqt /= $sum;
	    }
	    
	    $apb = $alp + $bet;

	    accumulate_averages($alp, $ave_alpha_ref,  $std_alpha_ref);
	    accumulate_averages($bet, $ave_beta_ref,   $std_beta_ref);
	    accumulate_averages($ins, $ave_lambda_ref, $std_lambda_ref);
	    accumulate_averages($del, $ave_mu_ref,     $std_mu_ref);
	    accumulate_averages($ip,  $ave_ip_ref,     $std_ip_ref);
	    
	    accumulate_averages($like, $ave_like_ref, $std_like_ref);
	    accumulate_averages($ttr,  $ave_ttr_ref,  $std_ttr_ref);
	    accumulate_averages($apb,  $ave_apb_ref,  $std_apb_ref);   
	    accumulate_averages($frs,  $ave_frs_ref,  $std_frs_ref);   
	    accumulate_averages($frc,  $ave_frc_ref,  $std_frc_ref);   
	    
	}
    }
    close(FILE);

    if ($nali != $nsample) {
	print "parse_mphylip_dnaml_erate(): there should be $nboot alingments not $nali\n"; die; 
    }

    calculate_averages($ave_alpha_ref,  $std_alpha_ref,  $nali);
    calculate_averages($ave_beta_ref,   $std_beta_ref,   $nali);
    calculate_averages($ave_lambda_ref, $std_lambda_ref, $nali);
    calculate_averages($ave_mu_ref,     $std_mu_ref,     $nali);
    calculate_averages($ave_ip_ref,     $std_ip_ref,     $nali);

    calculate_averages($ave_like_ref, $std_like_ref, $nali);
    calculate_averages($ave_ttr_ref,  $std_ttr_ref,  $nali);
    calculate_averages($ave_apb_ref,  $std_apb_ref,  $nali);
    calculate_averages($ave_frs_ref,  $std_frs_ref,  $nali);
    calculate_averages($ave_frc_ref,  $std_frc_ref,  $nali);
    
}

sub print_tree {
    my ($tree) = @_;

    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;

    printf "\nTREE STRUCTURE\n";
    printf "ntaxa:\t%d\n", $tree->{"Tree::ntaxa"};
    for (my $n = 0; $n < $nnode; $n ++) {
	printf "node\t%d\tparent\t%d\tleft\t%d\t%f\tright\t%d\t%f\n", 
	$n, ${$tree->parent}[$n], ${$tree->left}[$n], ${$tree->ld}[$n], ${$tree->right}[$n], ${$tree->rd}[$n];
    }
    printf "\n";      
    for (my $t = 0; $t < $ntaxa; $t ++) {
	printf "leaf\t%d\tparent\t%d\n", $t, ${$tree->taxaparent}[$t];
    }

    printf "\n";

}

sub remove_common_gaps {
    my ($msafile) = @_;

    my $nseq;
    my $alen;
    my @msa;
    my @new_msa;
    my @name;
    my @char;
    my $isallgaps;

    parse_msa ($msafile, \$nseq, \$alen, \@msa, \@name);
    
    for (my $s = 0; $s < $nseq; $s ++) { 
	$new_msa[$s] = "";
    }
    
    while ($msa[0]) {
	$isallgaps = 1;
	for (my $s = 0; $s < $nseq; $s ++) { 
	    $msa[$s] =~ s/^(\S)//; $char[$s] = $1;
	    if ($char[$s] =~ /^\-$/) { }
	    else { $isallgaps = 0; }
	}

	if ($isallgaps == 0) {
	    for (my $s = 0; $s < $nseq; $s ++) { 
		$new_msa[$s] .= "$char[$s]";
	    }
	}
	
    }

    write_msa_to_file($msafile, $nseq, \@new_msa, \@name);
}

# the phylip tree has the first sequence as right from root node
# my convention is first sequence left from root node.
# this function reverse the order of the tree in  nh format
sub reverse_tree {
    my ($nhtree_ref) = @_;

    my $nhtree = $$nhtree_ref;
    my $revnhtree = "";

    my @time;
    my $time;

    if (!$nhtree) { print "phylip did not generate any tree\n"; die; }

    while($nhtree) {
	if (0) {
	    print "nh   =$nhtree\n";
	    print "revnh=$revnhtree\n";
	}

	if ($nhtree =~ /^(.+)\)\;$/) {
	    $nhtree = $1;
	    $revnhtree = "(";
	}
	elsif ($nhtree =~ /^(.+\,)([^\(\)\:\,]+)\:([\d\.]+)$/) {
	    $nhtree = $1;
	    $revnhtree .= "$2\:$3";
	}
	elsif ($nhtree =~ /^(.*\()([^\(\)\:\,]+)\:([\d\.]+)$/) {
	    $nhtree = $1;
	    $revnhtree .= "$2\:$3";
	}
	elsif ($nhtree =~ /^(.+)\)\:([\d\.]+)$/) {
	    $nhtree = $1;
	    push(@time, $2);
	    $revnhtree .= "\(";
	}
	elsif ($nhtree =~ /^(.+)\,$/) {
	    $nhtree = $1;
	    $revnhtree .= "\,";
	}
	elsif ($nhtree =~ /^(.+)\:$/) {
	    $nhtree = $1;
	    $revnhtree .= "\:";
	}
	elsif ($nhtree =~ /^(.+)\)$/) {
	    $nhtree = $1;
	    $revnhtree .= "\(";
	}
	elsif ($nhtree =~ /^(.+)\($/) {
	    $nhtree = $1;
	    $time = pop(@time);
	    $revnhtree .= "\)\:$time";
	}
	elsif ($nhtree =~ /^\($/) {
	    $nhtree = "";
	    $revnhtree .= "\)\;";
	}
	else {
	    print "bad tree reversing string=$nhtree\n"; die;
	}
    }

    if (@time) { print "bad tree reversing sring=$nhtree\n"; die; }

    $$nhtree_ref = $revnhtree;

    if ($verbose) { print "reversed tree\n$$nhtree_ref\n"; }
}

sub run_boostrap {
    my ($NC, $NB, $NS, $NTB, $NTS, $nboot, $msaname, 
	$averages_file_ref, $consense_file_ref, $consensenh_file_ref, 
	$sconsense_file_ref, $sconsensenh_file_ref, $sconsenseave_file_ref) = @_; 

    my $pmsaname1;
    my $pmsaname2;
    my @averages_pfile;
    my @averages_ppfile;
    my @consense_pfile;
    my @consense_ppfile;
    my @consensenh_pfile;
    my @consensenh_ppfile;

    my $NT = $NTS * $NTB;
    my $nt;

    for (my $nts = 0; $nts < $NTS; $nts ++) {

	create_partial_msa_seqs($nts, $NS, $msaname, \$pmsaname1);
	create_files($NC, $NB, $NS, $NTB, $nts, $nboot, $pmsaname1, \@averages_pfile, \@consense_pfile, \@consensenh_pfile);

	for (my $ntb = 0; $ntb < $NTB; $ntb ++) {
	    $nt = $nts*$NTB + $ntb;

	    create_partial_msa_blocks($ntb, $NB, $pmsaname1, \$pmsaname2);
	    create_files($NC, $NB, $NS, $ntb, $nts, $nboot, $pmsaname2, \@averages_ppfile, \@consense_ppfile, \@consensenh_ppfile);

	    print "TRIAL $nt $pmsaname2\n\n";
	    system("more $pmsaname2\_stats\n");

	    boostrap($pmsaname2, $nboot, \@averages_ppfile, \@consense_ppfile, \@consensenh_ppfile);
	    system("rm $pmsaname2\n");
	    system("rm $pmsaname2\_stats\n");

	    for (my $c = 0; $c < $NC; $c ++) {
		system("cat $averages_ppfile[$c]   $averages_file_ref->[$c]   >> $averages_file_ref->[$c]\n");
		system("cat $consense_ppfile[$c]   $consense_file_ref->[$c]   >> $consense_file_ref->[$c]\n");
		system("cat $consensenh_ppfile[$c] $consensenh_file_ref->[$c] >> $consensenh_file_ref->[$c]\n");
	    }
	    for (my $c = 0; $c < $NC; $c ++) {
		system("cat $averages_ppfile[$c]   $averages_pfile[$c]   >> $averages_pfile[$c] \n");
		system("cat $consense_ppfile[$c]   $consense_pfile[$c]   >> $consense_pfile[$c] \n");
		system("cat $consensenh_ppfile[$c] $consensenh_pfile[$c] >> $consensenh_pfile[$c] \n");
	    }
	    # collect partial stats
	    wrapup($NC, $nt+1, $averages_file_ref, $consensenh_file_ref);

	    for (my $c = 0; $c < $NC; $c ++) {
		system("rm $averages_ppfile[$c]\n");
		system("rm $consense_ppfile[$c]\n");
		system("rm $consensenh_ppfile[$c]\n");
	    }
	}
	
	# consense of consenses
	consense_of_consenses($NC, $NTB, \@consensenh_pfile, $consensenh_file_ref, 
			      $sconsense_file_ref, $sconsensenh_file_ref);


	for (my $c = 0; $c < $NC; $c ++) {
	    averages_of_averages($NTB, $averages_pfile[$c], $sconsenseave_file_ref->[$c]);
	}
	
	system("rm $pmsaname1\n");
	system("rm $pmsaname1\_stats\n");
	for (my $c = 0; $c < $NC; $c ++) {
	    system("rm $averages_pfile[$c]\n");
	    system("rm $consense_pfile[$c]\n");
	    system("rm $consensenh_pfile[$c]\n");
	}	
    }
}

sub run_phylip {
    my ($phylip, $nboot, $msafile, $treefile, $phylipmode, 
	$ave_time_ref, $ave_abl_ref, $std_abl_ref, 
	$ave_alpha_ref, $std_alpha_ref, $ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, $ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, $ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref,  $ave_frs_ref, $std_frs_ref,  $ave_frc_ref, $std_frc_ref, $tag) = @_;

    my $ave_time   = 0;
    my $ave_abl    = 0;
    my $std_abl    = 0;
    my $ave_alpha  = 0;
    my $std_alpha  = 0;
    my $ave_beta   = 0;
    my $std_beta   = 0;
    my $ave_lambda = 0;
    my $std_lambda = 0;
    my $ave_mu     = 0;
    my $std_mu     = 0;
    my $ave_ip     = 0;
    my $std_ip     = 0;
    my $ave_like   = 0;
    my $std_like   = 0;
    my $ave_ttr    = 0;
    my $std_ttr    = 0;
    my $ave_apb    = 0;
    my $std_apb    = 0;
    my $ave_frs    = 0;
    my $std_frs    = 0;
    my $ave_frc    = 0;
    my $std_frc    = 0;

    my $phylipinputfile     = "$msafile.$tag.phylipinput";
    my $phylipscreenoutfile = "$msafile.$tag.phylipscreenout";
    my $phylipoutputfile    = "$msafile.$tag.phylipoutput";
    my $phyliptimefile      = "$msafile.$tag.phyliptimefile";

    my $oddran = int(rand()*100)*2 + 1;
    my $njumble = 1;

    # make phylip inputfile
    open(IN, ">$phylipinputfile");
    print IN "$msafile\n";

    # analize multiple data sets 
    print IN "M\nD\n$nboot\n$oddran\n$njumble\n";
    
    if (!$opt_o) {
	if ($opt_e || $opt_D) {
	    if    ($opt_A) { print IN "6\n"; }
	    elsif ($opt_B) { print IN "6\nB\n"; }	
	    elsif ($opt_C) { print IN "7\n"; }   # dnaml-erate option to optimize 2 param
	}
    }
    else {
	if    ($phylipmode == 0) { }
	elsif ($phylipmode == 1) { } # dnaml-erate option to optimize 2 param
	elsif ($phylipmode == 2) { print IN "7\n"; } # dnaml-erate option to optimize 4 param
    }
    print IN "Y\n";
    close (IN);
    if ($verbose) {system ("more $phylipinputfile\n"); }

    # run PHYLIP
    # 
    my $cmd = "$phylipdir$phylip < $phylipinputfile > $phylipscreenoutfile";
    system("(time -p $cmd) 2> $phyliptimefile\n"); 
    system("mv outfile $phylipoutputfile\n");
    system("mv outtree $treefile\n");

    # extract average time
    $ave_time = extract_ave_time($phyliptimefile)/$nboot;

    # extract average parameter
    extract_ave_param($phylip, $nboot, $phylipoutputfile, 
		      \$ave_alpha, \$std_alpha, \$ave_beta, \$std_beta, 
		      \$ave_lambda, \$std_lambda, \$ave_mu, \$std_mu, \$ave_ip, \$std_ip,
		      \$ave_like, \$std_like, \$ave_ttr, \$std_ttr, 
		      \$ave_apb, \$std_apb, \$ave_frs, \$std_frs, \$ave_frc, \$std_frc);

    # extract average abl
    extract_ave_abl($phylip, $nboot, $treefile, \$ave_abl, \$std_abl);

    print "ave_time $ave_time\n";

    system("rm $phylipinputfile\n");
    system("rm $phylipscreenoutfile\n");
    system("rm $phylipoutputfile\n");
    system("rm $phyliptimefile\n");

    $$ave_time_ref   = $ave_time;
    $$ave_abl_ref    = $ave_abl;
    $$std_abl_ref    = $std_abl;
    $$ave_alpha_ref  = $ave_alpha;
    $$std_alpha_ref  = $std_alpha;
    $$ave_beta_ref   = $ave_beta;
    $$std_beta_ref   = $std_beta;
    $$ave_lambda_ref = $ave_lambda;
    $$std_lambda_ref = $std_lambda;
    $$ave_mu_ref     = $ave_mu;
    $$std_mu_ref     = $std_mu;
    $$ave_ip_ref     = $ave_ip;
    $$std_ip_ref     = $std_ip;
    $$ave_like_ref   = $ave_like;
    $$std_like_ref   = $std_like;
    $$ave_ttr_ref    = $ave_ttr;
    $$std_ttr_ref    = $std_ttr;
    $$ave_apb_ref    = $ave_apb;
    $$std_apb_ref    = $std_apb;
    $$ave_frs_ref    = $ave_frs;
    $$std_frs_ref    = $std_frs;
    $$ave_frc_ref    = $ave_frc;
    $$std_frc_ref    = $std_frc;
    
}

sub seqboot {
    my ($nboot, $msaname, $msabootfile, $NC, $averages_file_ref) = @_;

    my $seqbootinputfile = "$msaname.boot$nboot.seqboot.input";
    my $seqbootscreenout = "$msaname.boot$nboot.seqboot.screenont";

    my $oddran = int(rand()*100)*2 + 1;

    # make seqboot inputfile
    open(IN, ">$seqbootinputfile");
    print IN "$msaname\n";
    print IN "R\n"; 
    print IN "$nboot\n"; 
    print IN "Y\n";
    print IN "$oddran\n";
    close (IN);
    if ($verbose) {system ("more $seqbootinputfile\n"); }

    # run SEQBOOT
    # 
    my $cmd = "$seqboot < $seqbootinputfile > $seqbootscreenout";
    system("$cmd\n"); 
    system("mv outfile $msabootfile\n");

    # get stats from the bootstrap alignments
    multiple_msa_stats($msabootfile, $NC, $averages_file_ref);

    if ($verbose) {
	system ("more $seqbootscreenout\n"); 
	system ("more $msabootfile\n"); 
    }

    system("rm $seqbootinputfile\n");
    system("rm $seqbootscreenout\n");
}

sub sq_hash_table {
    my ($nhtree) = @_;

    my $name;
    my $idx = 0;
    my %sqhash;

    my @nhtree = split(/\,/,$nhtree);
    for (my $n = 0; $n <= $#nhtree; $n ++) {
	if ($nhtree[$n] =~ /([^\(\)\,\:]+)\:/) { 
	    $name = $1;
	    $sqhash{$name} = $idx++;
	}
	else { print " you should've found a taxon!\n"; die; }
    }

    if ($verbose) {
	print "\nnames hash\n";
	foreach my $key (keys(%sqhash)) {
	    
	    print "$key $sqhash{$key}\n";
	}
    }
    return %sqhash;
}

sub tree_abl {
    my ($tree) = @_;
    
    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;
    my $nbranch = 2*$nnode; # it's a binary tree

    my $abl = tree_tbl($tree)/$nbranch;

    if ($verbose) {
	print_tree($tree);
	printf "\nTBL %f\n", tree_tbl($tree);
    }
          
    return $abl;
}

# /* Function:  esl_tree_RenumberNodes()
#  * Synopsis:  Assure nodes are numbered in preorder.
#  * Incept:    SRE, Fri Oct 27 09:33:26 2006 [Janelia]
#  *
#  * Purpose:   Given a tree <T> whose internal nodes might be numbered in
#  *            any order, with the sole requirement that node 0 is the
#  *            root; renumber the internal nodes (if necessary) to be in Easel's
#  *            convention of preorder traversal. No other aspect of <T> is
#  *            altered (including its allocation size).
#  *
#  * Returns:   <eslOK> on success.
#  *
#  * Throws:    <eslEMEM> on allocation failure.
#  *
#  * Xref:      STL11/77
#  */
sub tree_renumber_nodes {
    my ($tree_ref) = @_;
    
    my $tree  = $$tree_ref;
    my $tree2 = Tree->new(); 
    my @map;
    my @vs;
    my $v;
    my $new;
    my $needs_rearranging = 0;
    my $ntaxa = $tree->ntaxa;

    # Pass 1. Preorder traverse of T by its children links;
    #         construct map[old] -> new.
    #
    
    push(@vs, $ntaxa);
    $new = 0;
    while ($v = pop(@vs))
    {
	$v -= $ntaxa;
	if ($v != $new) { $needs_rearranging = 1; }
	$map[$v] = $new++;
	
	if (${$tree->right}[$v] > 0) { push(@vs, $ntaxa+${$tree->right}[$v]); } 
	if (${$tree->left}[$v]  > 0) { push(@vs, $ntaxa+${$tree->left}[$v]);  } 
    }
    if ($needs_rearranging == 0) { return; }
    
    # Pass 2. Construct the guts of correctly numbered new T2.
    #         (traversal order doesn't matter here)
    #
    $tree2->{"Tree::ntaxa"} = $ntaxa;
    
    for ($v = 0; $v < $ntaxa-1; $v++)
    {
	${$tree2->parent}[$map[$v]] = $map[${$tree->parent}[$v]];
	if (${$tree->left}[$v]  > 0) { ${$tree2->left}[$map[$v]]  = $map[${$tree->left}[$v]]; } # internal nodes renumbered... 
	else                         { ${$tree2->left}[$map[$v]]  = ${$tree->left}[$v];       } # ...taxon indices unchanged 
	if (${$tree->right}[$v] > 0) { ${$tree2->right}[$map[$v]] = $map[${$tree->right}[$v]];}
	else                         { ${$tree2->right}[$map[$v]] = ${$tree->right}[$v];      }
	${$tree2->ld}[$map[$v]] = ${$tree->ld}[$v];
	${$tree2->rd}[$map[$v]] = ${$tree->rd}[$v];
  
      if (${$tree->left}[$v]  <= 0) { ${$tree2->taxaparent}[-${$tree->left}[$v]]  = $map[$v]; }
      if (${$tree->right}[$v] <= 0) { ${$tree2->taxaparent}[-${$tree->right}[$v]] = $map[$v]; }

    }

    $$tree_ref = $tree2;
}

sub tree_tbl {
    my ($tree) = @_;
    
    my $tbl = 0.0;
    
    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;
    my $nbranch = 2*$nnode; # it's a binary tree
    
    #calculate the tbl
    for (my $n = 0; $n < $nnode; $n ++) {
	$tbl += ${$tree->ld}[$n];
	$tbl += ${$tree->rd}[$n];
    }
    
    return $tbl;
}

sub wrapup {
    my ($NC, $NT, $averagesfile_ref, $consensenhfile_ref) = @_;

    for (my $c = 0; $c < $NC; $c ++) {
	# calculate ave/std of averages over the nt trials
	# make histograms if requested
	my $averages_avefile = "$averagesfile_ref->[$c].ave";
	averages_of_averages($NT, $averagesfile_ref->[$c], $averages_avefile);	
    }
}

sub write_histogram {

    my ($N, $k, $histo_ref, $hfile, $NT) = @_;
    
    my $dim = $N * $k;

    open(HIS, ">$hfile");

    for (my $i=0; $i<=$dim; $i++) { 
	my $len = $i/$k;
	print HIS "$len\t$histo_ref->[$i]\n";
    }
    
    close (HIS);

    my $title = "N=$NT";
    my $xlabel = "AVERAGE BOOSTRAP FRACTION";
    my $key= "";
    #gnuplot_histo($hfile, $title, $xlabel, $key);

}

sub write_msa_to_file {
    my ($msafile, $nseq, $msa_ref, $name_ref) = @_;

    my $block = 50;

    my $alen = length($msa_ref->[0]);
    for (my $n = 1; $n < $nseq; $n ++) {
	if ($alen != length($msa_ref->[$n])) {
	    print "write_msa_to_file(): bad alignment\n"; die; 
	}
    }

    open(MSA, ">$msafile");
    print MSA " $nseq $alen\n";
        
    # Make sure that the names of the sequences are all different
    # and with max len 10
    name_normalize($nseq, $name_ref);

    for (my $n = 0; $n < $nseq; $n ++) {
	print MSA "$name_ref->[$n]\t";

	$msa_ref->[$n] =~ s/^(\S{$block})//; print MSA "$1\n";
    }
    
    while (length($msa_ref->[0]) >= $block) {
	print MSA "\n";
	for (my $n = 0; $n < $nseq; $n ++) {
	    $msa_ref->[$n] =~ s/^(\S{$block})//; print MSA "$1\n";
	}
    }
    if (length($msa_ref->[0]) > 0) {
	print MSA "\n";
	for (my $n = 0; $n < $nseq; $n ++) {
	    print MSA "$msa_ref->[$n]\n";
	}
    }
    
    close (MSA);
    
    if ($verbose) { system("more $msafile\n"); }
}

sub write_msastats_to_averagesfile {

    my ($averages_file,
	$ave_alen, $std_alen, 
	$ave_sqlg, $std_sqlg, 
	$ave_id, $std_id, 
	$ave_mut, $std_mut, 
	$ave_indl, $std_indl, 
	$ave_fo, $std_fo) = @_;

    open(FILE, ">>$averages_file");
    print "\nave_alen $ave_alen $std_alen\n";
    print "ave_sqlg $ave_sqlg $std_sqlg\n";
    print "ave_id $ave_id $std_id\n";
    print "ave_MUT $ave_mut $std_mut\n";
    print "ave_INDL $ave_indl $std_indl\n";
    print "ave_indlfreq $ave_fo $std_fo\n";

    print FILE "$ave_alen\t$std_alen\t$ave_sqlg\t$std_sqlg\t$ave_id\t$std_id\t$ave_mut\t$std_mut\t$ave_indl\t$std_indl\t$ave_fo\t$std_fo\t";
    close(FILE);
}

sub write_to_averagesfile {
    my ($averages_file,
	$ave_time, $std_time, $ave_abl, $std_abl, 
	$ave_alpha, $std_alpha, $ave_beta, $std_beta, 
	$ave_lambda, $std_lambda, $ave_mu, $std_mu, 
	$ave_ip, $std_ip, 
	$ave_like, $std_like, $ave_ttr, $std_ttr, 
	$ave_apb, $std_apb, $ave_frs, $std_frs, 
	$ave_frc, $std_frc, $ave_bootstrap, $std_bootstrap) = @_;

    open(FILE, ">>$averages_file");
    print "\nave_time $ave_time $std_time\n";
    print "ave_abl $ave_abl $std_abl \n";
    print "ave_alpha $ave_alpha $std_alpha\n";
    print "ave_beta $ave_beta $std_beta\n";
    print "ave_lambda $ave_lambda $std_lambda\n";
    print "ave_mu $ave_mu $std_mu \n";
    print "ave_ip $ave_ip $std_ip \n";
    print "ave_like $ave_like $std_like \n";
    print "ave_ttr $ave_ttr $std_ttr \n";
    print "ave_apb $ave_apb $std_apb \n";
    print "ave_frs $ave_frs $std_frs \n";
    print "ave_frs $ave_frc $std_frc \n";
    print "ave_boot $ave_bootstrap $std_bootstrap\n";
    print "$ave_time\t$std_time\t$ave_abl\t$std_abl\t$ave_alpha\t$std_alpha\t$ave_beta\t$std_beta\t$ave_lambda\t$std_lambda\t$ave_mu\t$std_mu\t$ave_ip\t$std_ip\t$ave_like\t$std_like\t$ave_ttr\t$std_ttr\t$ave_apb\t$std_apb\t$ave_frs\t$std_frs\t$ave_frc\t$std_frc\t$ave_bootstrap\t$std_bootstrap\n";
    print FILE "$ave_time\t$std_time\t$ave_abl\t$std_abl\t$ave_alpha\t$std_alpha\t$ave_beta\t$std_beta\t$ave_lambda\t$std_lambda\t$ave_mu\t$std_mu\t$ave_ip\t$std_ip\t$ave_like\t$std_like\t$ave_ttr\t$std_ttr\t$ave_apb\t$std_apb\t$ave_frs\t$std_frs\t$ave_frc\t$std_frc\t$ave_bootstrap\t$std_bootstrap\n";
    close(FILE);
}

sub write_to_average_averagesfile {
    my ($averages_file,
	$ave_alen, $std_alen, 
	$ave_sqlg, $std_sqlg, 
	$ave_id, $std_id, 
	$ave_mut, $std_mut, 
	$ave_indl, $std_indl, 
	$ave_fo, $std_fo, 
	$ave_time, $std_time, $ave_abl, $std_abl, 
	$ave_alpha, $std_alpha, $ave_beta, $std_beta, 
	$ave_lambda, $std_lambda, $ave_mu, $std_mu,
	$ave_ip, $std_ip, 
	$ave_like, $std_like, $ave_ttr, $std_ttr, $ave_apb, $std_apb, 
	$ave_frs, $std_frs, $ave_frc, $std_frc, 
	$ave_bootstrap, $std_bootstrap) = @_;

    open(FILE, ">>$averages_file");
    print "\nave_alen $ave_alen $std_alen\n";
    print "ave_sqlg $ave_sqlg $std_sqlg\n";
    print "ave_id $ave_id $std_id\n";
    print "ave_MUT $ave_mut $std_mut\n";
    print "ave_INDL $ave_indl $std_indl\n";
    print "ave_indlfreq $ave_fo $std_fo\n";
    print "ave_time $ave_time $std_time\n";
    print "ave_abl $ave_abl $std_abl \n";
    print "ave_alpha $ave_alpha $std_alpha\n";
    print "ave_beta $ave_beta $std_beta\n";
    print "ave_lambda $ave_lambda $std_lambda\n";
    print "ave_mu $ave_mu $std_mu \n";
    print "ave_ip $ave_ip $std_ip \n";
    print "ave_like $ave_like $std_like \n";
    print "ave_ttr $ave_ttr $std_ttr \n";
    print "ave_apb $ave_apb $std_apb \n";
    print "ave_frs $ave_frs $std_frs \n";
    print "ave_frc $ave_frc $std_frc \n";
    print "ave_boot $ave_bootstrap $std_bootstrap\n";
    print "$ave_alen\t$std_alen\t$ave_sqlg\t$std_sqlg\t$ave_id\t$std_id\t$ave_mut\t$std_mut\t$ave_indl\t$std_indl\t$ave_fo\t$std_fo\t$ave_time\t$std_time\t$ave_abl\t$std_abl\t$ave_alpha\t$std_alpha\t$ave_beta\t$std_beta\t$ave_lambda\t$std_lambda\t$ave_mu\t$std_mu\t$ave_ip\t$std_ip\t$ave_like\t$std_like\t$ave_ttr\t$std_ttr\t$ave_apb\t$std_apb\t$ave_frs\t$std_frs\t$ave_frc\t$std_frc\t$ave_bootstrap\t$std_bootstrap\n";
    print FILE "$ave_alen\t$std_alen\t$ave_sqlg\t$std_sqlg\t$ave_id\t$std_id\t$ave_mut\t$std_mut\t$ave_indl\t$std_indl\t$ave_fo\t$std_fo\t$ave_time\t$std_time\t$ave_abl\t$std_abl\t$ave_alpha\t$std_alpha\t$ave_beta\t$std_beta\t$ave_lambda\t$std_lambda\t$ave_mu\t$std_mu\t$ave_ip\t$std_ip\t$ave_like\t$std_like\t$ave_ttr\t$std_ttr\t$ave_apb\t$std_apb\t$ave_frs\t$std_frs\t$ave_frc\t$std_frc\t$ave_bootstrap\t$std_bootstrap\n";
    close(FILE);
}

