#!/usr/bin/perl -w
#concordance.pl

use strict;
use Class::Struct;

use vars qw ($opt_A $opt_b $opt_B $opt_C $opt_D $opt_e $opt_o $opt_s $opt_S $opt_T $opt_v);  # required if strict used
use Getopt::Std;
use constant GNUPLOT => '/usr/bin/gnuplot';
getopts ('AbBCDeosS:T:v');


# Print a helpful message if the user provides no input file.
if (!@ARGV) {
        print "usage:  concordance.pl [options] msadir msaname\n\n";
        print "options:\n";
        print "-A       :  option for second test: branch opt only a la dnaml [default is CG_FULLMIN]\n";
        print "-b       :  make a bootstrap of the alignment before cutting in two halves\n";
	print "-B       :  option for second test: branch opt only using cg [default is CG_FULLMIN]\n";
	print "-C       :  option for third test: CG_FULLMIN [default is CG_FULLMIN with all parameters optimization]\n";
	print "-D       :  use dnaml-erate-dev [ default dnaml-erate]\n";
        print "-e       :  use dnaml-erate    [ default dnaml]\n";
        print "-s       :  shuffle the alignment before cutting in two halves\n";
	print "-S <num> :  NS=number of seqs per trials (default 0 = all sequences)\n";
        print "-T <num> :  NTS=number of sequence trials (default 1)\n";
        print "-v       :  verbose\n";
 	exit;
}
my $msadir  = shift;
my $msaname = shift;

srand;

struct Tree => {
ntaxa  => '$',
parent => '@',
left   => '@',
right  => '@',
ld     => '@',
rd     => '@',
taxaparent    => '@', 
};

# $ERATEDIR (shell env variable)
my $eratedir =  $ENV{'ERATEDIR'};
my $msastats = "$eratedir/scripts/msastats.pl";

# tmp copy of the whole alignment
print "\nMSADIR: $msadir\n";
print "MSANAME: $msaname\n\n";
system("cp $msadir/$msaname $msaname\n");

# PHYLIP version 3.66
#
my $phylipdir = "$eratedir/src/phylip3.66-erate/src/";
my $which_phylip;
if (!$opt_o) {
    if    ($opt_e) { $which_phylip = "dnaml-erate";     }
    elsif ($opt_D) { $which_phylip = "dnaml-erate-dev"; }
    else           { $which_phylip = "dnaml";           }
}

#other phylip programs used here
my $treedist = $phylipdir."treedist";
my $seqboot  = $phylipdir."seqboot";
my $consense = $phylipdir."consense";

#options
my $verbose = 0;
if ($opt_v) { $verbose = 1; }
my $seeplots = 0;
my $NC  = 1;           if ($opt_o) { $NC  = 3; } # number of cases (1 or 3)
my $NS  = 0;           if ($opt_S) { $NS  = $opt_S; } # number of sequences included in a pMSA (partial msa)
my $NTS = 1;           if ($opt_T) { $NTS = $opt_T; } # number of groups of NS sequences sampled from the MSA
my $withbootstrap = 0; if ($opt_b) { $withbootstrap = 1; }
my $withshuffle   = 0; if ($opt_s) { $withshuffle = 1; }

# the whole setup
my @averagesfile;
create_files($NC, $NS, $NTS, $msaname, \@averagesfile);
run_concordance($NC, $NS, $NTS, $msaname, \@averagesfile, $withbootstrap, $withshuffle);
system("rm $msaname\n"); # remove the tmp copy of the whole alignment


#########################################################
# routines
#########################################################
sub abl_from_nhtree {
    my ($phylip, $treenh) = @_;

    my $abl = 0.0;
    my $phyliptree;
    my $doesnotparse = 0;

    # if phylip cannot calculate times, it writes \s+nan, and treenh will 
    # end up empty
    if (!$treenh) { 
	print "phylip3.66 did not generate any tree\n"; }
    
    if ($phylip =~ /^dnaml$/ && !$treenh) {
	$doesnotparse = 1;
    }
    else {
    # the phylip tree has the first sequence as right from root node
    # my convention is first sequence left from root node.
    # this function reverses the order of the tree in  nh format
	reverse_tree(\$treenh);

	#phylip tree structure
	$phyliptree = Tree->new();
	my %sqhash = sqhash_table($treenh);
	nh2tree(\%sqhash, $treenh, \$phyliptree, \$doesnotparse);
    }
    
    
    # there was something wrong here, do not parse this result
    if ($doesnotparse) {
	return;
    }

    # extract the average branch length
    $abl = tree_abl($phyliptree);    

    return $abl;
}

sub accumulate_averages {
    my ($val, $meanval_ref, $meansquareval_ref) = @_;

    $$meanval_ref       += $val;
    $$meansquareval_ref += $val*$val;
}

sub analyze_both_alignments {

    my ($NC, $pmsaname_stats, $which_phylip, $msafile1, $msafile2, $averages_file, $phylipmode) = @_;

    my $ave_alen;
    my $std_alen;
    my $ave_sqlg;
    my $std_sqlg; 
    my $ave_id;
    my $std_id;
    my $ave_mut;
    my $std_mut;
    my $ave_indl;
    my $std_indl;
    my $ave_fo;
    my $std_fo;

    my $ave_time;
    my $std_time;
    my $ave_abl;
    my $std_abl;
    my $ave_alpha;
    my $std_alpha;
    my $ave_beta;
    my $std_beta;
    my $ave_lambda;
    my $std_lambda;
    my $ave_mu;
    my $std_mu;
    my $ave_ip;
    my $std_ip;
    my $ave_like;
    my $std_like;
    my $ave_ttr;
    my $std_ttr;
    my $ave_apb;
    my $std_apb;
    my $ave_frs;
    my $std_frs;
    my $ave_frc;
    my $std_frc;

    my $tim  = 0.0;
    my $abl  = 0.0;
    my $alp  = 0.0;
    my $bet  = 0.0;
    my $ins  = 0.0;
    my $del  = 0.0;
    my $ip   = 0.0;
    my $like = 0.0;
    my $ttr  = 0.0;
    my $apb  = 0.0;
    my $frs  = 0.0;
    my $frc  = 0.0;

    my $tim_s  = 0.0;
    my $abl_s  = 0.0;
    my $alp_s  = 0.0;
    my $bet_s  = 0.0;
    my $ins_s  = 0.0;
    my $del_s  = 0.0;
    my $ip_s   = 0.0;
    my $like_s = 0.0;
    my $ttr_s  = 0.0;
    my $apb_s  = 0.0;
    my $frs_s  = 0.0;
    my $frc_s  = 0.0;

   my $tp;
    my $d1;
    my $d2;

    my $tag = $which_phylip;
    if ($opt_o) {
	if ($phylipmode == 2) { $tag .= ".C"; } # dnaml-erate times + 4 parameters optimization
    }
    elsif ($opt_e || $opt_D) {
	if    ($opt_A) { $tag .= ".A"; } # dnaml-erate times optimization only a la dnaml
	elsif ($opt_B) { $tag .= ".B"; } # dnaml-erate times optimization only a la cg
	elsif ($opt_C) { $tag .= ".C"; } # dnaml-erate times + 4 parameters optimization
    }
    
    my $treefile1 = "$msafile1.$tag.treenh";
    my $treefile2 = "$msafile2.$tag.treenh";
    my $phylipoutputfile1 = "$msafile1.$tag.phylipoutput";
    my $phylipoutputfile2 = "$msafile2.$tag.phylipoutput";

    run_phylip($which_phylip, $msafile1, $treefile1, $phylipoutputfile1, $phylipmode,	       
	       \$tim,  \$tim_s, \$abl, \$abl_s, 
	       \$alp,  \$alp_s, \$bet, \$bet_s, 
	       \$ins,  \$ins_s, \$del, \$del_s, 
	       \$ip,   \$ip_s, 
	       \$like, \$like_s,\$ttr, \$ttr_s, 
	       \$apb,  \$apb_s, \$frs, \$frs_s, \$frc, \$frc_s, $tag);
    accumulate_averages($tim,  \$ave_time,   \$std_time);
    accumulate_averages($abl,  \$ave_abl,    \$std_abl);
    accumulate_averages($alp,  \$ave_alpha,  \$std_alpha);
    accumulate_averages($bet,  \$ave_beta,   \$std_beta);
    accumulate_averages($ins,  \$ave_lambda, \$std_lambda);
    accumulate_averages($del,  \$ave_mu,     \$std_mu);
    accumulate_averages($ip,   \$ave_ip,     \$std_ip);
    accumulate_averages($like, \$ave_like,   \$std_like);
    accumulate_averages($ttr,  \$ave_ttr,    \$std_ttr);
    accumulate_averages($apb,  \$ave_apb,    \$std_apb);   
    accumulate_averages($frs,  \$ave_frs,    \$std_frs);   
    accumulate_averages($frc,  \$ave_frc,    \$std_frc);   

     run_phylip($which_phylip, $msafile2, $treefile2, $phylipoutputfile2, $phylipmode,	       
	       \$tim,  \$tim_s, \$abl, \$abl_s, 
	       \$alp,  \$alp_s, \$bet, \$bet_s, 
	       \$ins,  \$ins_s, \$del, \$del_s, 
	       \$ip,   \$ip_s, 
	       \$like, \$like_s,\$ttr, \$ttr_s, 
	       \$apb,  \$apb_s, \$frs, \$frs_s, \$frc, \$frc_s, $tag);
    accumulate_averages($tim,  \$ave_time,   \$std_time);
    accumulate_averages($abl,  \$ave_abl,    \$std_abl);
    accumulate_averages($alp,  \$ave_alpha,  \$std_alpha);
    accumulate_averages($bet,  \$ave_beta,   \$std_beta);
    accumulate_averages($ins,  \$ave_lambda, \$std_lambda);
    accumulate_averages($del,  \$ave_mu,     \$std_mu);
    accumulate_averages($ip,   \$ave_ip,     \$std_ip);
    accumulate_averages($like, \$ave_like,   \$std_like);
    accumulate_averages($ttr,  \$ave_ttr,    \$std_ttr);
    accumulate_averages($apb,  \$ave_apb,    \$std_apb);   
    accumulate_averages($frs,  \$ave_frs,    \$std_frs);   
    accumulate_averages($frc,  \$ave_frc,    \$std_frc);   

    my $nali = 2;
    calculate_averages(\$ave_time,   \$std_time,   $nali);
    calculate_averages(\$ave_alpha,  \$std_alpha,  $nali);
    calculate_averages(\$ave_beta,   \$std_beta,   $nali);
    calculate_averages(\$ave_lambda, \$std_lambda, $nali);
    calculate_averages(\$ave_mu,     \$std_mu,     $nali);
    calculate_averages(\$ave_ip,     \$std_ip,     $nali);
    calculate_averages(\$ave_like,   \$std_like,   $nali);
    calculate_averages(\$ave_ttr,    \$std_ttr,    $nali);
    calculate_averages(\$ave_apb,    \$std_apb,    $nali);
    calculate_averages(\$ave_frs,    \$std_frs,    $nali);
    calculate_averages(\$ave_frc,    \$std_frc,    $nali);

    compare_trees($phylipoutputfile1, $phylipoutputfile2, $treefile1, $treefile2, \$tp, \$d1, \$d2, $phylipmode);

    parse_msastats_file($NC, $pmsaname_stats, 
			\$ave_alen, \$std_alen,
			\$ave_sqlg, \$std_sqlg, 
			\$ave_id,   \$std_id,
			\$ave_mut,  \$std_mut,
			\$ave_indl, \$std_indl,
			\$ave_fo,   \$std_fo);

    write_to_averagesfile($averages_file, 
			  $ave_alen, $std_alen,
			  $ave_sqlg, $std_sqlg, 
			  $ave_id,   $std_id,
			  $ave_mut,  $std_mut,
			  $ave_indl, $std_indl,
			  $ave_fo,   $std_fo,
			  $ave_time, $std_time, 
			  $ave_abl, $std_abl, 
			  $ave_alpha, $std_alpha, 
			  $ave_beta, $std_beta, 
			  $ave_lambda, $std_lambda, 
			  $ave_mu, $std_mu, 
			  $ave_ip, $std_ip, 
			  $ave_like, $std_like, 
			  $ave_ttr, $std_ttr, 
			  $ave_apb, $std_apb, 
			  $ave_frs, $std_frs, 
			  $ave_frc, $std_frc, 
			  $tp, $d1, $d2);

    system("rm $treefile1\n");
    system("rm $treefile2\n");
    system("rm $phylipoutputfile1\n");
    system("rm $phylipoutputfile2\n");
}

sub calculate_averages {
    my ($meanval_ref, $stdval_ref, $number) = @_;

    my $mean = $$meanval_ref;
    my $std  = $$stdval_ref;

    if ($number > 1) {
	$mean /= $number;
	
	$std -= $mean*$mean*$number;
	$std /= ($number-1);
	if ($std < 0. && $std> -0.00001) { $std = 0.0; }
	$std  = sqrt($std);
    }
    elsif ($number == 1) {
	$mean /= $number;
	$std   = 0.0;
    }
    else {
	$mean = 0.0;
	$std  = 0.0;
    }

    $$meanval_ref = $mean;
    $$stdval_ref  = $std;
}

sub compare_trees {
    my ($phylipoutputfile1, $phylipoutputfile2, 
	$phyliptreefile1, $phyliptreefile2, 
	$tp_ref, $d1_ref, $d2_ref,
	$phylipmode) = @_;

    my $phyliptreenh1;
    my $phyliptreenh2;
    my $phyliptree1;
    my $phyliptree2;
    my $phyliptree;
    my $issametree = 0;
    my $doesnotparse = 0;

    my $tp;
    my $d1;
    my $d2;

    #extract phylip tree in nh format from outtree
    extract_nhtree_from_phylip_outtree($phyliptreefile1, \$phyliptreenh1);
    extract_nhtree_from_phylip_outtree($phyliptreefile2, \$phyliptreenh2);

    if ($phylipmode == 0 && (!$phyliptreenh1 ||  !$phyliptreenh2)) {
	$doesnotparse = 1;
    }
    else {
    # the phylip tree has the first sequence as right from root node
    # my convention is first sequence left from root node.
    # this function reverses the order of the tree in  nh format
	reverse_tree(\$phyliptreenh1);
	print "\nREV1\n$phyliptreenh1\n";

	reverse_tree(\$phyliptreenh2);
	print "\nREV2\n$phyliptreenh2\n";

	#phylip tree structure
	$phyliptree1 = Tree->new();
	$phyliptree2 = Tree->new();

	# create a hash to make correspond each taxa name to a number.
	# use the same hash to build the tree structures for both trees
	my %sqhash = sqhash_table($phyliptreenh1);

	nh2tree(\%sqhash, $phyliptreenh1, \$phyliptree1, \$doesnotparse);
	print "\nNH1\n";
	print_tree($phyliptree1);

	nh2tree(\%sqhash, $phyliptreenh2, \$phyliptree2, \$doesnotparse);
	print "\nNH2\n";
	print_tree($phyliptree2);
    }
    
    
    # there was something wrong here, do not parse this result
    if ($doesnotparse) {
	return;
    }


    # Before we compare the trees, we need to rescale the
    # branches of the trees produced with dnaml-erate.
    #
    # By using rose to add indels, we have increased the
    # degree of divergence of each branch by a unknown amount.
    #
    # plain dnaml does not care, since it replaces indels
    # with all possible nucleotides.
    #
    # dnaml-erate tries to model that extra divergence,
    # so the tree reflects that, and for alignments with indels
    # in general it produces trees with a larger abl than
    # the one we started with.
    #
    # I correct for this by rescaling the branch lengths
    # so that they have the target abl (average branch lenght)
    #
    # This effect affects dnaml-erate a lot more than
    # it does dnaml
    #
    # of course, this will ONLY affect the distance
    # that uses branch lenghts, 
    # and it is not just based on tree topology.
    #    
    # The above trick still does not allow us to compare BSD (branch score distances)
    # for different experiments with different target abl.
    #
    # In order to be able to compare across the bord, both the given tree, and
    # the found tree are rescaled to having an abl= 1.0 (an arbitrary number, but
    # the same for all cases.

    my $scale = 1.0;
    tree_rescale(\$phyliptree1, \$phyliptreenh1, $scale);
    tree_rescale(\$phyliptree2, \$phyliptreenh2, $scale);

   ###############################
    #
    # do the actual tree comparison 
    #
    ###############################
    $tp = compare_trees_really($phyliptree1, $phyliptree2); 
    
    # compare using tree distance: Branch Score Distance
    $d1 = compare_trees_TREEDIST($phyliptreenh1, $phyliptreenh2, 1); # use the scaled target tree

    # compare using tree distance: Symmetric Difference 
    $d2 = compare_trees_TREEDIST($phyliptreenh1, $phyliptreenh2,2); 
    
    $$tp_ref = $tp;
    $$d1_ref = $d1;
    $$d2_ref = $d2;

}

# copy of easel funcion
#
# Function:  esl_tree_Compare()
# Incept:    SRE, Fri Sep 22 14:05:09 2006 [Janelia]
#
# Purpose:   Given two trees <T1> and <T2> for the same
#            set of <N> taxa (represented in the trees by the same
#            indices, <0..N-1>), compare the topologies of the
#            two trees.
#
#            For comparing unrooted topologies, be sure that <T1> and
#            <T2> both obey the unrooted tree convention that the
#            "root" is placed on the branch to taxon 0. (That is,
#            <T->left[0] = 0>.)
#
sub compare_trees_really {
    my ($tree1, $tree2) = @_;
    
    my $issametree = 1;
    my @Mg;
    my $child;
    my $a;
    my $b;

    # We're going to use the tree mapping function M(g) [Goodman79]:
    # M[g] for node g in T1 is the index of the lowest node in T2
    # that contains the same children taxa as the subtree
    # under g in T1.
       
    # We use the SDI algorithm [ZmasekEddy01] to construct M(g),
    # by postorder traversal of T1
    #
    for (my $g = $tree1->{"Tree::ntaxa"}-2; $g >= 0; $g--)
    {
      $child = ${$tree1->left}[$g];
      if ($child <= 0) { $a = ${$tree2->taxaparent}[-$child]; }
      else             { $a = ${$tree2->parent}[$Mg[$child]]; }

      $child = ${$tree1->right}[$g];
      if ($child <= 0) { $b = ${$tree2->taxaparent}[-$child]; }
      else             { $b = ${$tree2->parent}[$Mg[$child]];   }

      if ($a != $b) {  # a shortcut in SDI: special case for exact tree comparison 
	  return 0; 
      }
      $Mg[$g] = $a;
    }

    if ($verbose) { print "tree comparison: $issametree\n"; }

    return $issametree;
}

sub compare_trees_TREEDIST {

    my ($tree1, $tree2, $method) = @_;

    my $treefile              = "treefile";
    my $treedistinputfile     = "treedistinput";
    my $treedistoutputfile    = "treedistoutput";
    my $treedistscreenoutfile = "treedistscreenout";

    my $distance = 0;

    # put both trees in one file
    open(FILE, ">$treefile");
    print FILE "$tree1\n";
    print FILE "$tree2\n";
    close (FILE);
    if ($verbose) { system ("more $treefile\n"); }

    # make treedist inputfile
    open(IN, ">$treedistinputfile");
    print IN "$treefile\n";
    if ($method == 1) { print IN "Y\n"; }
    if ($method == 2) { print IN "D\n"; print IN "Y\n"; }
    close (IN);

    if ($verbose) {system ("more $treedistinputfile\n"); }

    # run TREEDIST
    # 
    system("$treedist < $treedistinputfile > $treedistscreenoutfile\n"); 
    system("mv outfile $treedistoutputfile\n");

    if ($verbose) { 
	print "the output:\n";
	system ("more $treedistoutputfile\n"); 
	print "end output\n\n";
    }

    # parse the output
    open(OUT, "$treedistoutputfile");
    while(<OUT>) {
	if (/^Trees\s+1\s+and\s+2:\s+(\S+)\s*$/) {
	    $distance = $1;
	    decimal(\$distance);
	}
    }
    close (OUT);

    system("rm $treefile\n");
    system("rm $treedistinputfile\n");
    system("rm $treedistscreenoutfile\n");
    system("rm $treedistoutputfile\n");
    
    return $distance;
}

sub concordance {
    my ($NC, $pmsaname_stats, $pmsaname1, $pmsaname2, $averages_file_ref) = @_;
 
    system("rm outfile\n");
    system("rm outtree\n");
    
    my $otag;
    if    ($opt_e) { $otag = "dnaml-erate";     }
    elsif ($opt_D) { $otag = "dnaml-erate-dev"; }

    if ($which_phylip) {
	analyze_both_alignments($NC, $pmsaname_stats, $which_phylip, $pmsaname1, $pmsaname2, $averages_file_ref->[0], 0); 
    }
    else {
	analyze_both_alignments($NC, $pmsaname_stats, "dnaml", $pmsaname1, $pmsaname2, $averages_file_ref->[0], 0);
	analyze_both_alignments($NC, $pmsaname_stats, $otag,   $pmsaname1, $pmsaname2, $averages_file_ref->[1], 1);
	analyze_both_alignments($NC, $pmsaname_stats, $otag,   $pmsaname1, $pmsaname2, $averages_file_ref->[2], 2);
    }

}

sub create_files {
    my ($NC, $NS, $NTS, $msaname, 
	$averages_file_ref) = @_; 
    
    my $ff = "NS$NS.TS$NTS";

    if ($NC == 1) {
	my $tag = $which_phylip;
	if ($opt_e || $opt_D) {
	    if    ($opt_A) { $tag .= ".A"; } # dnaml-erate times optimization only a la dnaml
	    elsif ($opt_B) { $tag .= ".B"; } # dnaml-erate times optimization only a la cg
	    elsif ($opt_C) { $tag .= ".C"; } # dnaml-erate times + 4 parameters optimization
	}
	
	$averages_file_ref->[0]   = "$msaname.$tag.$ff.averages";
    }
    else {
	my $otag;
	if    ($opt_e) { $otag = "dnaml-erate";     }
	elsif ($opt_D) { $otag = "dnaml-erate-dev"; }

	$averages_file_ref->[0] = "$msaname.dnaml.$ff.averages";
	$averages_file_ref->[1] = "$msaname.$otag.$ff.averages";
	$averages_file_ref->[2] = "$msaname.$otag.C.$ff.averages";
    }
    
    # we are going to append to this files, make sure they are empty
    for (my $nc = 0; $nc < $NC; $nc ++) {
	system("rm $averages_file_ref->[$nc]\n");
    }
}

sub create_partial_msa_seqs {
    my ($nts, $NS, $msaname, $pmsaname_ref) = @_;

    my $ff = "NS$NS";
    my $pmsaname = "$msaname.TS$nts.$ff";

    # sample NS sequences full length
    system("cp $msaname $pmsaname\n");
    msa_sample_seqs($NS, $pmsaname);

    $$pmsaname_ref = $pmsaname;
}

sub create_half_msa {
    my ($msaname, $msaname_stats_ref,  $pmsaname1_ref, $pmsaname1_stats_ref, $pmsaname2_ref, $pmsaname2_stats_ref) = @_;

    
    my $pmsaname1 = "$msaname.h1";
    my $pmsaname2 = "$msaname.h2";
    my $pmsaname1_statsfile = "$pmsaname1\_stats";
    my $pmsaname2_statsfile = "$pmsaname2\_stats";
    my $msaname_statsfile   = "$msaname\_stats";

    # divide the alignment in two halves
    msa_divide($msaname, $pmsaname1, $pmsaname2);
        
    #run stats
    system("$msastats $msaname   > $msaname_statsfile\n");
    system("$msastats $pmsaname1 > $pmsaname1_statsfile\n");
    system("$msastats $pmsaname2 > $pmsaname2_statsfile\n");

    $$pmsaname1_ref = $pmsaname1;
    $$pmsaname2_ref = $pmsaname2;

    $$pmsaname1_stats_ref = $pmsaname1_statsfile;
    $$pmsaname2_stats_ref = $pmsaname2_statsfile;
    $$msaname_stats_ref   = $msaname_statsfile;
}

sub decimal {
    my ($val_ref) =@_;

    my $val = $$val_ref;
    my $newval;
    my $root;
    my $power;
    my $tol = 0.000001;

    if ($val =~ /^(\S+)e-[0]+(\d+)$/) {
	$root = $1;
	$power = $2;
	
	while ($root >= 1) { $power --; $root /= 10.; }

	if ($root =~ /^0\.(\S+)/) { $newval = "0."; $root = $1; }
	else { print "decimal(): something went wrong val=$val newval=$newval root=$root power=$power\n"; die; }

	my $n = 0;
	while ($n++<$power) { $newval .= "0"; }

	$newval .= $root;
    }
    elsif ($val =~ /^(\S+)e\+[0]+$/) {
	$newval = $1;
    }
    elsif ($val =~ /^(\S+)e\+[0]+(\d+)$/) {
	$root = $1;
	$power = $2;

	$newval = $root;
	while ($power > 0) { $power --; $newval *= 10.; }

    }
    else {
	$newval = $val;
    }

    # check
    if (abs($val-$newval) > $tol){ 
	printf "decimal(): bad value newval %f val %f diff %f tol %f\n", $newval, $val, abs($val-$newval), $tol; 
	die; 
    }

    $$val_ref = $newval;
}

sub extract_ave_abl {
     my ($phylip, $nsample, $phyliptreefile, $ave_abl_ref, $std_abl_ref) = @_;

     my $abl;
     my $nali = 0;

     if ($verbose) { system("more $phyliptreefile\n"); }

     my $treenh = "";
     open(FILE, "$phyliptreefile");
     while(<FILE>) {
	 if (/^\s+$/) { next; }

	 elsif (/^(\S+\);)$/) { 
	     $nali ++;
	     $treenh .= "$1"; 

	     # calculate abl for this tree
	     $abl = abl_from_nhtree($phylip, $treenh);
	     accumulate_averages($abl, $ave_abl_ref, $std_abl_ref);
	     if ($verbose) { print "\n$treenh\nabl $abl ave $$ave_abl_ref std $$std_abl_ref\n"; }
	     $treenh = "";
	 }

	 elsif (/^(\S+)$/) { $treenh .= "$1"; }

	 else  { $treenh = ""; last; }
     }
     close (FILE);

    if ($nali != $nsample) {
	print "extract_ave_abl(): there should be $nsample alignments not $nali\n"; die; 
    }

    calculate_averages($ave_abl_ref, $std_abl_ref, $nali); 
}

sub extract_ave_param {
    my ($phylip, $nsample, $phylipoutputfile, 
	$ave_alpha_ref, $std_alpha_ref, $ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, $ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, $ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref, $ave_frs_ref, $std_frs_ref, 
	$ave_frc_ref, $std_frc_ref) = @_;

    my $ave_alpha  = 0.0;
    my $std_alpha  = 0.0;
    my $ave_beta   = 0.0;
    my $std_beta   = 0.0;
    my $ave_lambda = 0.0;
    my $std_lambda = 0.0;
    my $ave_mu     = 0.0;
    my $std_mu     = 0.0;
    my $ave_ip     = 0.0;
    my $std_ip     = 0.0;
    my $ave_like   = 0.0;
    my $std_like   = 0.0;
    my $ave_ttr    = 0.0;
    my $std_ttr    = 0.0;
    my $ave_apb    = 0.0;
    my $std_apb    = 0.0;
    my $ave_frs    = 0.0;
    my $std_frs    = 0.0;
    my $ave_frc    = 0.0;
    my $std_frc    = 0.0;

    my $alp  = 0.0;
    my $bet  = 0.0;
    my $ins  = 0.0;
    my $del  = 0.0;
    my $ip   = 0.0;
    my $like = 0.0;
    my $ttr  = 0.0;
    my $apb  = 0.0;
    my $frs  = 0.0;
    my $frc  = 0.0;

    my $alp_s  = 0.0;
    my $bet_s  = 0.0;
    my $ins_s  = 0.0;
    my $del_s  = 0.0;
    my $ip_s   = 0.0;
    my $like_s = 0.0;
    my $ttr_s  = 0.0;
    my $apb_s  = 0.0;
    my $frs_s  = 0.0;
    my $frc_s  = 0.0;

    if    ($phylip =~ /^dnaml$/) {
	parse_mphylip_dnaml($nsample, $phylipoutputfile, 
			    \$alp,  \$alp_s, 
			    \$bet,  \$bet_s, 
			    \$ins,  \$ins_s, 
			    \$del,  \$del_s, 
			    \$ip,   \$ip_s, 
			    \$like, \$like_s, 
			    \$ttr,  \$ttr_s, 
			    \$apb,  \$apb_s, 
			    \$frs,  \$frs_s, 
			    \$frc,  \$frc_s);
    }
    elsif ($phylip =~ /^dnaml-erate$/  || $phylip =~ /^dnaml-erate-dev$/) {
	parse_mphylip_dnaml_erate($nsample, $phylipoutputfile, 
				  \$alp,  \$alp_s, 
				  \$bet,  \$bet_s, 
				  \$ins,  \$ins_s, 
				  \$del,  \$del_s, 
				  \$ip,   \$ip_s, 
				  \$like, \$like_s, 
				  \$ttr,  \$ttr_s, 
				  \$apb,  \$apb_s, 
				  \$frs,  \$frs_s, 
				  \$frc,  \$frc_s);
    }
    else {
	print "extract_ave_param(): which phylyp is this? $phylip\n"; die;
    }

    accumulate_averages($alp, \$ave_alpha,  \$std_alpha);
    accumulate_averages($bet, \$ave_beta,   \$std_beta);
    accumulate_averages($ins, \$ave_lambda, \$std_lambda);
    accumulate_averages($del, \$ave_mu,     \$std_mu);
    accumulate_averages($ip,  \$ave_ip,     \$std_ip);
    
    accumulate_averages($like, \$ave_like, \$std_like);
    accumulate_averages($ttr,  \$ave_ttr,  \$std_ttr);
    accumulate_averages($apb,  \$ave_apb,  \$std_apb);   
    accumulate_averages($frs,  \$ave_frs,  \$std_frs);   
    accumulate_averages($frc,  \$ave_frc,  \$std_frc);   
    
    $$ave_alpha_ref  = $ave_alpha;
    $$std_alpha_ref  = $std_alpha;
    $$ave_beta_ref   = $ave_beta;
    $$std_beta_ref   = $std_beta;
    $$ave_lambda_ref = $ave_lambda;
    $$std_lambda_ref = $std_lambda;
    $$ave_mu_ref     = $ave_mu;
    $$std_mu_ref     = $std_mu;
    $$ave_ip_ref     = $ave_ip;
    $$std_ip_ref     = $std_ip;
    $$ave_like_ref   = $ave_like;
    $$std_like_ref   = $std_like;
    $$ave_ttr_ref    = $ave_ttr;
    $$std_ttr_ref    = $std_ttr;
    $$ave_apb_ref    = $ave_apb;
    $$std_apb_ref    = $std_apb;
    $$ave_frs_ref    = $ave_frs;
    $$std_frs_ref    = $std_frs;
    $$ave_frc_ref    = $ave_frc;
    $$std_frc_ref    = $std_frc;
}

sub extract_ave_time {

    my ($timefile, $ave_time_ref, $std_time_ref) = @_;

    my $time = -1;

    if ($verbose) { system("more $timefile\n"); }

    open(FILE, $timefile);
    while(<FILE>) {
	if (/^real\s+(\S+)/) {
	    $time = $1;
	}
    }
    close(FILE);
    if ($verbose) { print "time $time\n"; }

   if ($time < 0) { print "did not get the right runtime $time secs\n"; die; }

    accumulate_averages($time, $ave_time_ref, $std_time_ref);
}

sub extract_nhtree_from_phylip_outtree {
    my ($phylipouttreefile, $treenh_ref) = @_;

    my $treenh = "";

    open(FILE, "$phylipouttreefile");
    while(<FILE>) {
	if (/^\s+$/)      { next; }
	elsif (/^(\S+)$/) { $treenh .= "$1"; }
	else              { $treenh = ""; last; }
    }
    close (FILE);

    # if phylip cannot calculate times, it writes \s+nan, and treenh will 
    # end up empty
    if (!$treenh) { 
	print OUT "phylip3.66 did not generate any tree\n"; 
	print "phylip3.66 did not generate any tree\n"; 
    }

    if ($verbose) {     
	printf "\nnhtree %s\n", $treenh;
    }

    $$treenh_ref = $treenh;
}

sub msa_sample_seqs {

    my ($NS, $msaname) = @_;

    my @msa;
    my @name;
    my $nseq;
    my $alen;

    my @new_msa;
    my @new_name;
    my $new_nseq;
    my $new_alen;

    parse_msa ($msaname, \$nseq, \$alen, \@msa, \@name);
    $new_alen = $alen;

    if ($NS == 0) { $new_nseq = $nseq; }
    else          { $new_nseq = ($nseq > $NS)? $NS : $nseq; }

    print "MSA nseq $nseq alen $alen\n";

    my @seq;
    for (my $n = 0; $n < $nseq; $n ++) {
	$seq[$n] = $n;
    }

    if ($new_nseq == $nseq) {
	for (my $n = 0; $n < $new_nseq; $n ++) {
	    $new_msa[$n]  = $msa[$n];
	    $new_name[$n] = $name[$n];
	}
    }
    else {
	for (my $n = 0; $n < $new_nseq; $n ++) {
	    my $sq = int(rand()*$nseq);

	    $new_name[$n] = $name[$seq[$sq]];
	    $new_msa[$n]  = $msa[$seq[$sq]];

	    print "SAMPLE $seq[$sq]\n";
	    # sample without replacement
	    for (my $s = 0; $s < $nseq; $s ++) {
		if ($seq[$s] == $seq[$sq]) { 
		    for (my $x = $s; $x < $nseq-1; $x ++) { 
			$seq[$x] =  $seq[$x+1];
		    }
		    last;
		}
	    }
	    $nseq --;
	    	    
	}
    }

    for (my $n = 0; $n < $new_nseq; $n ++) {
	# dnaml does not accept these character as names
	$new_name[$n] =~ s/\(/x/g; 
	$new_name[$n] =~ s/\)/x/g; 
	$new_name[$n] =~ s/\:/x/g; 
	$new_name[$n] =~ s/\;/x/g; 
	$new_name[$n] =~ s/\,/x/g; 
	$new_name[$n] =~ s/\[/x/g; 
	$new_name[$n] =~ s/\]/x/g; 
    }
    
    write_msa_to_file($msaname, $new_nseq, \@new_msa, \@new_name);
    remove_common_gaps($msaname);   
}

sub msa_divide {

    my ($msaname, $pmsaname1, $pmsaname2) = @_;

    my @msa;
    my @name;
    my $nseq;
    my $alen;

    my @msa1;
    my @name1;
    my $alen1;
    
    my @msa2;
    my @name2;
    my $alen2;

    system("cp $msaname $pmsaname1\n");
    system("cp $msaname $pmsaname2\n");

    parse_msa($msaname, \$nseq, \$alen, \@msa, \@name);

    $alen1 = int($alen/2.);
    $alen2 = $alen - $alen1;
    
    for (my $n = 0; $n < $nseq; $n ++) {
	$name1[$n] = $name[$n];
	$name2[$n] = $name[$n];

	$msa[$n] =~ s/^(\S{$alen1})(\S+)$//; 
	$msa1[$n] = $1;
	$msa2[$n] = $2;
	
	if (length($msa1[$n]) != $alen1) { print "msa_divide(): bad len1\nn"; die; }
	if (length($msa2[$n]) != $alen2) { print "msa_divide(): bad len2\nn"; die; }
    }
    
    write_msa_to_file($pmsaname1, $nseq, \@msa1, \@name1);
    write_msa_to_file($pmsaname2, $nseq, \@msa2, \@name2);
}

sub msa_row_2_col {
    my ($nseq, $alen, $row_ref, $col_ref) = @_;
    
    my @row;
    for (my $n = 0; $n < $nseq; $n ++) {
	$row[$n] = $row_ref->[$n];
    }
    
    for (my $i = 0; $i < $alen; $i ++) {
	$col_ref->[$i] = "";
	for (my $n = 0; $n < $nseq; $n ++) {
	    $row[$n] =~ s/^(\S)//; $col_ref->[$i] .= $1;
	}	
    }
    
    for (my $i = 0; $i < $alen; $i ++) {
	if (length($col_ref->[$i]) != $nseq) { 
	    print "bad conversion from rows to cols\n"; die;
	}
    }
}

sub msa_col_2_row {
    my ($nseq, $alen, $col_ref, $row_ref) = @_;
    
    my @col;
    for (my $i = 0; $i < $alen; $i ++) {
	$col[$i] = $col_ref->[$i];
    }
    
    for (my $n = 0; $n < $nseq; $n ++) {
	$row_ref->[$n] = "";
	
	for (my $i = 0; $i < $alen; $i ++) {
	    $col[$i] =~ s/^(\S)//; $row_ref->[$n] .= $1;
	}
    }
    
    for (my $n = 0; $n < $nseq; $n ++) {
	if (length($row_ref->[$n]) != $alen) { 
	    print "bad conversion from cols to rows\n"; die;
	}	
    }
}

# In Phylip format the names of the
# sequences have to be at most of length 10.
# Here we truncate names if necesary, and
# make sure that all names are different at that length
sub name_normalize {
    my ($nseq, $name_ref) = @_;

    my $maxlen = 10;
    
    if ($verbose) {
	for (my $n = 0; $n < $nseq; $n ++) {
	    print "OLD: $name_ref->[$n]\n";
	}
    }
    
    # Make names of length maxlen (10) either 
    # by truncating it, or by adding spaces
    for (my $n = 0; $n < $nseq; $n ++) {
	
	# truncate longer names
	if ($name_ref->[$n] =~ /^(.{$maxlen})/) {
	    $name_ref->[$n] = $1;
	}
	
	# add spaces for shorter names
	while (length($name_ref->[$n]) < $maxlen) {
	    $name_ref->[$n] .= " ";
	}
    }
    
    # After truncation, some names might be identical.
    # Change identical names by adding a counter.
    for (my $n = 0; $n < $nseq; $n ++) {
	my $c = 1;
	for (my $m = $n+1; $m < $nseq; $m ++) {
	    if ($name_ref->[$n] eq $name_ref->[$m]) {

		# change the first name
		if ($c == 1) {
		    # remove last two charecters and add the counter tag
		    $name_ref->[$n] =~ s/.{2}$//;
		    $name_ref->[$n] .= ".$c";
		}
		
		# first remove spaces at the end
		$name_ref->[$m] =~ s/ //g;
		
		# remove last two charecters and add the counter tag
		$c ++;
		$name_ref->[$m] =~ s/.{2}$//;
		$name_ref->[$m] .= ".$c";  
		
		# truncate or add spaces again
		if ($name_ref->[$m] =~ /^(.{$maxlen})/){
		    $name_ref->[$m] = $1;
		}		
		while (length($name_ref->[$m]) < $maxlen) {
		    $name_ref->[$m] .= " ";
		}
	    }
	}
    }  
    if ($verbose) {
	for (my $n = 0; $n < $nseq; $n ++) {
	    print "NEW: $name_ref->[$n]\n";
	}
    }
  
}

sub nh2tree {
    my ($hash_ref, $nhtree, $tree_ref, $doesnotparse_ref) = @_;

    my $tree         = $$tree_ref;
    my $doesnotparse = 0;

    my $N;                # number of ',' in the nh notation plus one
    my $nodestart;        # 
    my $dim;              # number of internal nodes 
    my $ncd = 0;          # counter for number of nodes+leaves in the tree 
    my $nsq = 0;          # counter for number of leaves in the tree 
    my $nnd = 0;          # counter for number of internal nodes
 
    my @stack;
    my @blen;
    my $name;
    my $time;
    my $node;
    my $lnode;
    my $rnode;
    my $pnode;
    my $otaxon;

    # number of leaves of the tree 
    $N = ( $nhtree =~ tr/,/,/ ) + 1;
    $tree->{"Tree::ntaxa"} = $N;

    # if the number of parenthesis+1 = number of leaves  -->  tree is of the form (a,b)   --> start with node N   (root)
    # if the number of parenthesis+2 = number of leaves  -->  tree is of the form (a,b,c) --> start with node N+1 (next to root)
    if    ($N == ( $nhtree =~ tr/\(/\(/ )+1) { $nodestart = $N; }
    elsif ($N == ( $nhtree =~ tr/\(/\(/ )+2) { $nodestart = $N+1; }

    if ($N == 1) { $dim = $N;     }
    else         { $dim = $N - 1; }

    my $string = $nhtree;

    # Counter is:
    #  0...N-1  leaves
    #  N...2N-2 nodes (N is the root)
    #
    # in the structure I follow easels' conventions
    #
    #  0,1,...,N-1     leaves (in taxaparent)
    #  0,-1,...,-(N-1) leaves 
    #  0,...,N-2       nodes (0 is the root)
    
    while($string) {
	if (0) {
	    print "STRING=$string\n";
	    print "stack=@stack\n\n";
	}

	if ($string =~ /^\((.+)/) {
	    $string = $1;
	    # push node on stack 
	    push(@stack, $nodestart+$nnd);
	    $nnd ++;
	    $ncd ++;	
	}
	elsif ($string =~ /^([^\(\)\,\:]+)\:(.+)/) {
	    $name = $1;
	    $string = $2;
	    # push leaf on stack 
	    push(@stack, $hash_ref->{$name});
 
	    $nsq ++;
            $ncd ++;
  	}
	elsif ($string =~ /^([\d\.e\-]+)(.+)/) {
	    $time   = $1;
	    decimal(\$time);
	    $string = $2;
	    # add branch length for node and put back on the stack 
	    $node = pop(@stack);
	    $blen[$node] = $time;
	    push(@stack, $node);
 	}
	elsif ($string =~ /^\)\;$/) {
	    # end case;
	    # have to deal differently whether the tree is of the general form (a,b) or (a,b,c)
	   
	    my $len = $#stack+1;
	    if ($len == 2) { # tree with one leaf only
		if ($N != 1) { print "this tree should have one taxon only\n"; die; }
		$lnode = pop(@stack);
		$pnode = pop(@stack);
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? $lnode : $lnode-$N;    # set left  node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode];                           # set left node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if a leaf, fill array taxaparent
	    }

	    elsif ($len == 3) { # tree of the form (a,b)
		$rnode = pop(@stack);
		$lnode = pop(@stack);
		$pnode = pop(@stack);
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? -$lnode : $lnode-$N;   # set left  node of root
		${$tree->right}[$pnode-$N] = ($rnode < $N)? -$rnode : $rnode-$N;   # set right node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode];                           # set left  node branch length
		${$tree->rd}[$pnode-$N] = $blen[$rnode];                           # set right node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if left  is a internal node, add the parent
		if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N;     } # if rigth is a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if left  is a leaf, fill array taxaparent
		if ($rnode <  $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; } # if rigth is a leaf, fill array taxaparent
	    }

	    elsif ($len == 4) { # tree of the form (a:ta,b:tb,c:tc) --> ((a:ta,b:tb):tc/2,c:tc/2)
		$rnode  = pop(@stack);
		$lnode  = pop(@stack);
		$otaxon = pop(@stack); if ($otaxon != 0) { print "bad zero taxon $otaxon\n"; die; }
		$pnode  = pop(@stack);
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? -$lnode : $lnode-$N;   # set left  node of root
		${$tree->right}[$pnode-$N] = ($rnode < $N)? -$rnode : $rnode-$N;   # set right node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode];                           # set left  node branch length
		${$tree->rd}[$pnode-$N] = $blen[$rnode];                           # set right node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if left  is a internal node, add the parent
		if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N;     } # if rigth is a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if left  is a leaf, fill array taxaparent
		if ($rnode <  $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; } # if rigth is a leaf, fill array taxaparent

		# now add the last node, the root
		$nnd ++;
		$ncd ++;	
		$lnode = $otaxon; # the convenction is to put the 0 taxon to the left of the root
		$rnode = $pnode;
		$pnode = $pnode-1; if ($pnode != $N) { print "bad root node=$node\n"; die; }
		${$tree->left}[$pnode-$N]  = ($lnode < $N)? -$lnode : $lnode-$N;   # set left  node of root
		${$tree->right}[$pnode-$N] = ($rnode < $N)? -$rnode : $rnode-$N;   # set right node of root
		${$tree->ld}[$pnode-$N] = $blen[$lnode]/2.0;                       # set left  node branch length
		${$tree->rd}[$pnode-$N] = $blen[$lnode]/2.0;                       # set right node branch length
		if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N;     } # if left  is a internal node, add the parent
		if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N;     } # if rigth is a internal node, add the parent
		if ($lnode <  $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; } # if left  is a leaf, fill array taxaparent
		if ($rnode <  $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; } # if rigth is a leaf, fill array taxaparent
		
	    }
	    
	    else { print "nh2tree not parse right\n"; $doesnotparse = 1;}
	    
	    # set parent for root to itself
	    if ($pnode != $N) { print "bad root node=$node\n"; die; }
	    ${$tree->parent}[$pnode-$N] = $pnode-$N;
	    
 	    undef($string);

	}
	elsif ($string =~ /^\)(.+)/) {
            #create a node 
	    $string = $1;
	    $rnode = pop(@stack);
	    $lnode = pop(@stack);
	    $pnode = pop(@stack);
	    
	    if ($pnode < $N) { print "bad tree pnode=$pnode\n"; die; }
	    
	    # if a internal node, add the parent
	    if ($lnode >= $N) { ${$tree->parent}[$lnode-$N] = $pnode-$N; }
	    if ($rnode >= $N) { ${$tree->parent}[$rnode-$N] = $pnode-$N; }
	    
	    # if a leaf, fill array taxaparent
	    if ($lnode < $N) { ${$tree->taxaparent}[$lnode] = $pnode-$N; }
	    if ($rnode < $N) { ${$tree->taxaparent}[$rnode] = $pnode-$N; }

            # identify left and right nodes
	    if ($lnode <  $N) { ${$tree->left}[$pnode-$N]  = -$lnode;   }
	    if ($rnode <  $N) { ${$tree->right}[$pnode-$N] = -$rnode;   }
	    if ($lnode >= $N) { ${$tree->left}[$pnode-$N]  = $lnode-$N; }
	    if ($rnode >= $N) { ${$tree->right}[$pnode-$N] = $rnode-$N; }
	    
	    # branch lengths of left and right nodes.
	    ${$tree->ld}[$pnode-$N] = $blen[$lnode];
	    ${$tree->rd}[$pnode-$N] = $blen[$rnode]; 
	    
	    # put node back parent node in the stack 
	    push(@stack, $pnode);
 	}
	elsif ($string =~ /^\,(.+)/) {
	    $string = $1;
	}
	elsif ($string =~ /^\:(.+)/) {
	    $string = $1;
	}
	else {
	    print "bad tree parsing sring=$string\n"; die;
	}
	
    }

    if ($nsq != $N)     { print "tree reading failed. N $N nsq $nsq\n"; die; }
    if ($nnd != $dim)   { print "tree reading failed. dim $dim nintnodes $nnd\n"; die; }
    if ($ncd != $N+$dim){ print "tree reading failed. nnodes ", $N+$dim, " found $ncd\n"; die; }

    # reorder the nodes to be able to compare with
    # the original tree
    tree_renumber_nodes(\$tree);

    if ($verbose) {     
	printf "\nnhtree %s\n", $nhtree;
	print_tree($tree); 
    }

    $$tree_ref         = $tree;
    $$doesnotparse_ref = $doesnotparse;
}

sub parse_mphylip_dnaml {
    my ($nsample, $phylipoutfile,  			    
	$ave_alpha_ref, $std_alpha_ref, 
	$ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, 
	$ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, 
	$ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref, 
	$ave_frs_ref, $std_frs_ref, 
	$ave_frc_ref, $std_frc_ref) = @_;
    
    my $frqa;
    my $frqc;
    my $frqg;
    my $frqt;
    my $frqo = 0.0;

    my $alp;
    my $bet;
    my $ins = 0.0;
    my $del = 0.0;
    my $ip  = 0.0;
    
    my $like;
    my $ttr;
    my $apb  = 1.0;
    
    my $sum;
    
    my $frc;
    my $frs;

    my $frqr;
    my $frqy;
    my $frqar;
    my $frqcy;
    my $frqgr;
    my $frqty;

    my $aa;
    my $bb;

    my $nali = 0;
    open(FILE, $phylipoutfile);
    while(<FILE>) {
	if (/^\s+A\s+(\S+)/) {
	    $frqa = $1;
	}
	elsif (/^\s+C\s+(\S+)/) {
	    $frqc = $1;
	}
	elsif (/^\s+G\s+(\S+)/) {
	    $frqg = $1;
	}
	elsif (/^\s+T\(U\)\s+(\S+)/) {
	    $frqt = $1;
	}
	elsif (/^Transition\/transversion ratio\s+=\s+(\S+)/) {
	    $ttr = $1;
	}
	elsif (/^Ln Likelihood\s+=\s+(\S+)/) {
	    $nali ++;
	    $like = $1;

	    # add this run to the averages
	    if (!$like) {
		print "dnaml did not finish properly\n"; die; 
	    }
	    
	    # normalize frequencies
	    $sum = $frqa +  $frqc +  $frqg +  $frqt;
	    if ($sum > 0) {
		$frqa /= $sum;
		$frqc /= $sum;
		$frqg /= $sum;
		$frqt /= $sum;
	    }
	    
	    # calculate alp bet ins del
	    $frqr  = $frqa + $frqg;
	    $frqy  = $frqc + $frqt;
	    $frqar = $frqa / $frqr;
	    $frqcy = $frqc / $frqy;
	    $frqgr = $frqg / $frqr;
	    $frqty = $frqt / $frqy;
	    
	    $aa = $ttr * $frqr * $frqy - $frqa * $frqg - $frqc * $frqt;
	    $bb = $frqa * $frqgr + $frqc * $frqty;
	    $alp = $aa / ($aa + $bb);
	    $bet = 1.0 - $alp;
	    
	    $frs = 2.0 * $alp * $bb + 
		$bet * (1.0 - $frqa*$frqa - $frqc*$frqc - $frqg*$frqg - $frqt*$frqt);

	    $frc = $frs;

	    accumulate_averages($alp, $ave_alpha_ref,  $std_alpha_ref);
	    accumulate_averages($bet, $ave_beta_ref,   $std_beta_ref);
	    accumulate_averages($ins, $ave_lambda_ref, $std_lambda_ref);
	    accumulate_averages($del, $ave_mu_ref,     $std_mu_ref);
	    accumulate_averages($ip,  $ave_ip_ref,     $std_ip_ref);
	    
	    accumulate_averages($like, $ave_like_ref, $std_like_ref);
	    accumulate_averages($ttr,  $ave_ttr_ref,  $std_ttr_ref);
	    accumulate_averages($apb,  $ave_apb_ref,  $std_apb_ref);   
	    accumulate_averages($frs,  $ave_frs_ref,  $std_frs_ref);   
	    accumulate_averages($frc,  $ave_frc_ref,  $std_frc_ref);   
	}
    }
    close(FILE);
    
    if ($nali != $nsample) {
	print "parse_mphylip_dnaml(): there should be $nsample alingments not $nali\n"; die; 
    }

    calculate_averages($ave_alpha_ref,  $std_alpha_ref,  $nali);
    calculate_averages($ave_beta_ref,   $std_beta_ref,   $nali);
    calculate_averages($ave_lambda_ref, $std_lambda_ref, $nali);
    calculate_averages($ave_mu_ref,     $std_mu_ref,     $nali);
    calculate_averages($ave_ip_ref,     $std_ip_ref,     $nali);

    calculate_averages($ave_like_ref, $std_like_ref, $nali);
    calculate_averages($ave_ttr_ref,  $std_ttr_ref,  $nali);
    calculate_averages($ave_apb_ref,  $std_apb_ref,  $nali);
    calculate_averages($ave_frs_ref,  $std_frs_ref,  $nali);
    calculate_averages($ave_frc_ref,  $std_frc_ref,  $nali);
   
}

sub parse_mphylip_dnaml_erate {
    my ($nsample, $phylipoutfile,  			    
	$ave_alpha_ref, $std_alpha_ref, 
	$ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, 
	$ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, 
	$ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref, 
	$ave_frs_ref, $std_frs_ref, 
	$ave_frc_ref, $std_frc_ref) = @_;

    my $frqa  = -1;
    my $frqc  = -1;
    my $frqg  = -1;
    my $frqt  = -1;
    my $frqo  = -1;

    my $alp  = -1;
    my $bet  = -1;
    my $ins  = -1;
    my $del  = -1;
    my $ip   = -1;

    my $ttr  = -1;
    my $apb  = -1;

    my $frc  = -1;
    my $frs  = -1;

    my $like = -1;

    my $nali = 0;
    open(FILE, $phylipoutfile);
    while(<FILE>) {
	if (/^alpha\s+=\s+(\S+)/) {
	    $alp = $1;
	}
	elsif (/^beta\s+=\s+(\S+)/) {
	    $bet = $1;
	}
	elsif (/^Indel prior\s+=\s+(\S+)/) {
	    $ip = $1;
	}
	elsif (/^Insertions rate\s+=\s+(\S+)/) {
	    $ins = $1;
	}
	elsif (/^Deletions rate\s+=\s+(\S+)/) {
	    $del = $1;
	}
	elsif (/^\s+A\s+(\S+)/) {
	    $frqa = $1;
	}
	elsif (/^\s+C\s+(\S+)/) {
	    $frqc = $1;
	}
	elsif (/^\s+G\s+(\S+)/) {
	    $frqg = $1;
	}
	elsif (/^\s+T\(U\)\s+(\S+)/) {
	    $frqt = $1;
	}
	elsif (/^\s+\-\s+(\S+)/) {
	    $frqo = $1;
	}
	elsif (/^Transition\/transversion ratio\s+=\s+(\S+)/) {
	    $ttr = $1;
	}
	elsif (/^Average rate of subtitutions\s+=\s+(\S+)/) {
	    $frs = $1;
	}
	elsif (/^Average rate of changes\s+=\s+(\S+)/) {
	    $frc = $1;
	}
	elsif (/^Ln Likelihood\s+=\s+(\S+)/) {
	    $like = $1;
	    $nali ++;

	    # add this run to the averages
	    if (!$alp || !$bet || !$ins || !$del) {
		print "parse_mphylip_dnaml_erate(): cannot parse all the parameters from output\n"; die; 
	    }
	    if (!$like) {
		print "parse_mphylip_dnaml_erate(): cannot parse likelihood from output\n"; die; 
	    }
	    
	    # normalize frequencies
	    my $sum = $frqa +  $frqc +  $frqg +  $frqt;
	    if ($sum > 0) {
		$frqa /= $sum;
		$frqc /= $sum;
		$frqg /= $sum;
		$frqt /= $sum;
	    }
	    
	    $apb = $alp + $bet;

	    accumulate_averages($alp, $ave_alpha_ref,  $std_alpha_ref);
	    accumulate_averages($bet, $ave_beta_ref,   $std_beta_ref);
	    accumulate_averages($ins, $ave_lambda_ref, $std_lambda_ref);
	    accumulate_averages($del, $ave_mu_ref,     $std_mu_ref);
	    accumulate_averages($ip,  $ave_ip_ref,     $std_ip_ref);
	    
	    accumulate_averages($like, $ave_like_ref, $std_like_ref);
	    accumulate_averages($ttr,  $ave_ttr_ref,  $std_ttr_ref);
	    accumulate_averages($apb,  $ave_apb_ref,  $std_apb_ref);   
	    accumulate_averages($frs,  $ave_frs_ref,  $std_frs_ref);   
	    accumulate_averages($frc,  $ave_frc_ref,  $std_frc_ref);   
	    
	}
    }
    close(FILE);

    if ($nali != $nsample) {
	print "parse_mphylip_dnaml_erate(): there should be $nsample alingments not $nali\n"; die; 
    }

    calculate_averages($ave_alpha_ref,  $std_alpha_ref,  $nali);
    calculate_averages($ave_beta_ref,   $std_beta_ref,   $nali);
    calculate_averages($ave_lambda_ref, $std_lambda_ref, $nali);
    calculate_averages($ave_mu_ref,     $std_mu_ref,     $nali);
    calculate_averages($ave_ip_ref,     $std_ip_ref,     $nali);

    calculate_averages($ave_like_ref, $std_like_ref, $nali);
    calculate_averages($ave_ttr_ref,  $std_ttr_ref,  $nali);
    calculate_averages($ave_apb_ref,  $std_apb_ref,  $nali);
    calculate_averages($ave_frs_ref,  $std_frs_ref,  $nali);
    calculate_averages($ave_frc_ref,  $std_frc_ref,  $nali);
    
}

sub parse_msa {
    my ($msafile, $nseq_ref, $alen_ref, $msa_ref, $name_ref) = @_;

    my $nseq;
    my $alen;
    my $n = 0;

    my $isfirst = 0;

    open(MSA, "$msafile");
    while(<MSA>) {
	if (/\s*(\d+)\s+(\d+)/) {
	    $nseq = $1;
	    $alen = $2;
	    
	    #initialize
	    $isfirst = 1;
	    for (my $s = 0; $s < $nseq; $s ++) { $msa_ref->[$s] = ""; }
	}
	elsif ($isfirst && /^\S+/) {

		if (/^\s*(\S+)\s+(.+)/) {
		    $name_ref->[$n] = $1; $msa_ref->[$n++] .= $2;  
		}
		else {
		    if (/^(.{10})(.+)$/) { $name_ref->[$n] = $1; $msa_ref->[$n++] .= $2; }
		}
	}
	elsif (/^\s+$/) {
	    if ($n != $nseq) { print "bad alignment n=$n nseq=$nseq\n"; die; }
	    $n = 0;
	    $isfirst = 0;
	}
	elsif (/\s*\S+/) {
	    $msa_ref->[$n++] .= $_;
	}
    }
    close (MSA);
    
    if ($verbose) { print "MSA: nseq $nseq alen $alen\n"; }

    #Remove spaces and end-of-lines from alignemt if nay
    for (my $s = 0; $s < $nseq; $s ++) { 
	$msa_ref->[$s] =~ s/ //g;
	$msa_ref->[$s] =~ s/\n//g;
    }
    #Check the alignment is complete
    for (my $s = 0; $s < $nseq; $s ++) { if (length($msa_ref->[$s]) != $alen) { print "bad alignment  $msafile\n$alen ", length($msa_ref->[$s]),"\n"; die;} }
    
    $$nseq_ref = $nseq;
    $$alen_ref = $alen;
}

sub parse_msastats {
    my ($statsfile, 
	$ave_alen_ref, $std_alen_ref, $ave_sqlg_ref, $std_sqlg_ref, 
	$ave_id_ref, $std_id_ref, $ave_mut_ref, $std_mut_ref, 
	$ave_indl_ref, $std_indl_ref, $ave_fo_ref, $std_fo_ref) = @_;

    open(FILE, "$statsfile");
    while(<FILE>) {
	if (/Len alignment:\s+(\S+)/) {
	    $$ave_alen_ref = $1;
	    $$std_alen_ref = 0.0;
	}
	elsif (/seqs geometric mean:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_sqlg_ref = $1;
	    $$std_sqlg_ref = $2;
	}
	elsif (/pairwise ID:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_id_ref = $1;
	    $$std_id_ref = $2;
	}
	elsif (/pairwise MUT:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_mut_ref = $1;
	    $$std_mut_ref = $2;
	}
	elsif (/pairwise INDEL:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_indl_ref = $1;
	    $$std_indl_ref = $2;
	}
	elsif (/indel freq:\s+(\S+)\s\+\/-\s(\S+)/) {
	    $$ave_fo_ref = $1;
	    $$std_fo_ref = $2;
	}
	
    }
    close(FILE);
}

sub parse_msastats_file {
    my ($NC, $msastatsfile, 
	$ave_alen_ref, $std_alen_ref,
	$ave_sqlg_ref, $std_sqlg_ref, 
	$ave_id_ref,   $std_id_ref,
	$ave_mut_ref,  $std_mut_ref,
	$ave_indl_ref, $std_indl_ref,
	$ave_fo_ref,   $std_fo_ref) = @_;

    my $ave_alen   = 0.0;
    my $std_alen   = 0.0;
    my $ave_sqlg   = 0.0;
    my $std_sqlg   = 0.0;

    my $ave_id   = 0.0;
    my $std_id   = 0.0;
    my $ave_mut  = 0.0;
    my $std_mut  = 0.0;
    my $ave_indl = 0.0;
    my $std_indl = 0.0;
    my $ave_fo   = 0.0;
    my $std_fo   = 0.0;

    parse_msastats($msastatsfile, 
		   \$ave_alen, \$std_alen, \$ave_sqlg, \$std_sqlg, 
		   \$ave_id, \$std_id, \$ave_mut, \$std_mut, 
		   \$ave_indl, \$std_indl, \$ave_fo, \$std_fo);

    if ($ave_alen == 0) {
	print "\nAlignment of length zero?\n";
	print "STATS\n";
	system("more $msastatsfile\n");
	die;
    }

    $$ave_alen_ref = $ave_alen;
    $$std_alen_ref = $std_alen;

    $$ave_sqlg_ref = $ave_sqlg;
    $$std_sqlg_ref = $std_sqlg;

    $$ave_id_ref   = $ave_id;
    $$std_id_ref   = $std_id;

    $$ave_mut_ref  = $ave_mut;
    $$std_mut_ref  = $std_mut;

    $$ave_indl_ref = $ave_indl;
    $$std_indl_ref = $std_indl;

    $$ave_fo_ref   = $ave_fo;
    $$std_fo_ref   = $std_fo;

}

sub print_tree {
    my ($tree) = @_;

    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;

    printf "\nTREE STRUCTURE\n";
    printf "ntaxa:\t%d\n", $tree->{"Tree::ntaxa"};
    for (my $n = 0; $n < $nnode; $n ++) {
	printf "node\t%d\tparent\t%d\tleft\t%d\t%f\tright\t%d\t%f\n", 
	$n, ${$tree->parent}[$n], ${$tree->left}[$n], ${$tree->ld}[$n], ${$tree->right}[$n], ${$tree->rd}[$n];
    }
    printf "\n";      
    for (my $t = 0; $t < $ntaxa; $t ++) {
	printf "leaf\t%d\tparent\t%d\n", $t, ${$tree->taxaparent}[$t];
    }

    printf "\n";

}

sub remove_common_gaps {
    my ($msafile) = @_;

    my $nseq;
    my $alen;
    my @msa;
    my @new_msa;
    my @name;
    my @char;
    my $isallgaps;

    parse_msa ($msafile, \$nseq, \$alen, \@msa, \@name);
    
    for (my $s = 0; $s < $nseq; $s ++) { 
	$new_msa[$s] = "";
    }
    
    while ($msa[0]) {
	$isallgaps = 1;
	for (my $s = 0; $s < $nseq; $s ++) { 
	    $msa[$s] =~ s/^(\S)//; $char[$s] = $1;
	    if ($char[$s] =~ /^\-$/) { }
	    else { $isallgaps = 0; }
	}

	if ($isallgaps == 0) {
	    for (my $s = 0; $s < $nseq; $s ++) { 
		$new_msa[$s] .= "$char[$s]";
	    }
	}
	
    }

    write_msa_to_file($msafile, $nseq, \@new_msa, \@name);
}

# the phylip tree has the first sequence as right from root node
# my convention is first sequence left from root node.
# this function reverse the order of the tree in  nh format
sub reverse_tree {
    my ($nhtree_ref) = @_;

    my $nhtree = $$nhtree_ref;
    my $revnhtree = "";

    my @time;
    my $time;

    if (!$nhtree) { print "phylip did not generate any tree\n"; die; }

    while($nhtree) {
	if (0) {
	    print "nh   =$nhtree\n";
	    print "revnh=$revnhtree\n";
	}

	if ($nhtree =~ /^(.+)\)\;$/) {
	    $nhtree = $1;
	    $revnhtree = "(";
	}
	elsif ($nhtree =~ /^(.+\,)([^\(\)\:\,]+)\:([\d\.]+)$/) {
	    $nhtree = $1;
	    $revnhtree .= "$2\:$3";
	}
	elsif ($nhtree =~ /^(.*\()([^\(\)\:\,]+)\:([\d\.]+)$/) {
	    $nhtree = $1;
	    $revnhtree .= "$2\:$3";
	}
	elsif ($nhtree =~ /^(.+)\)\:([\d\.]+)$/) {
	    $nhtree = $1;
	    push(@time, $2);
	    $revnhtree .= "\(";
	}
	elsif ($nhtree =~ /^(.+)\,$/) {
	    $nhtree = $1;
	    $revnhtree .= "\,";
	}
	elsif ($nhtree =~ /^(.+)\:$/) {
	    $nhtree = $1;
	    $revnhtree .= "\:";
	}
	elsif ($nhtree =~ /^(.+)\)$/) {
	    $nhtree = $1;
	    $revnhtree .= "\(";
	}
	elsif ($nhtree =~ /^(.+)\($/) {
	    $nhtree = $1;
	    $time = pop(@time);
	    $revnhtree .= "\)\:$time";
	}
	elsif ($nhtree =~ /^\($/) {
	    $nhtree = "";
	    $revnhtree .= "\)\;";
	}
	else {
	    print "bad tree reversing string=$nhtree\n"; die;
	}
    }

    if (@time) { print "bad tree reversing sring=$nhtree\n"; die; }

    if ($verbose) { 
	print "given tree\n$$nhtree_ref\n"; 
	print "reversed tree\n$revnhtree\n"; 
    }

    $$nhtree_ref = $revnhtree;
}

sub run_concordance {
    my ($NC, $NS, $NTS, $msaname, 
	$averages_file_ref, $withbootstrap, $withshuffle) = @_; 
    
    my $pmsafile;

    my $umsafile;
    my $umsafile1;
    my $umsafile2;

    my $umsafile_stats;
    my $umsafile1_stats;
    my $umsafile2_stats;
    
    my $nt;
    
    for (my $nts = 0; $nts < $NTS; $nts ++) {
	
	create_partial_msa_seqs($nts, $NS, $msaname, \$pmsafile);
	
	if ($withbootstrap) {
	    $umsafile = "$pmsafile.boot";
	    seqboot(1, $pmsafile, $umsafile);
	}
	elsif ($withshuffle) {
	    $umsafile = "$pmsafile.shuffle";
	    shuffle_msa($pmsafile, $umsafile);
	}
	else { $umsafile = $pmsafile; }

	create_half_msa($umsafile, \$umsafile_stats, \$umsafile1, \$umsafile1_stats, \$umsafile2, \$umsafile2_stats);
	
	print "TRIAL $nts $umsafile\n\n";
	if ($verbose) {
	    system("more $umsafile_stats\n");
	    system("more $umsafile1_stats\n");
	    system("more $umsafile2_stats\n");
	}
	
	concordance($NC, $umsafile_stats, $umsafile1, $umsafile2, $averages_file_ref);

	system("rm $pmsafile\n");
	system("rm $umsafile\n");
	system("rm $umsafile1\n");
	system("rm $umsafile2\n");
	system("rm $umsafile_stats\n");
	system("rm $umsafile1_stats\n");
	system("rm $umsafile2_stats\n");
    }
    
}

sub run_phylip {
    my ($phylip, $msafile, $phyliptreefile, $phylipoutfile, $phylipmode, 
	$ave_time_ref, $std_time_ref, $ave_abl_ref, $std_abl_ref, 
	$ave_alpha_ref, $std_alpha_ref, $ave_beta_ref, $std_beta_ref, 
	$ave_lambda_ref, $std_lambda_ref, $ave_mu_ref, $std_mu_ref, 
	$ave_ip_ref, $std_ip_ref, 
	$ave_like_ref, $std_like_ref, $ave_ttr_ref, $std_ttr_ref, 
	$ave_apb_ref, $std_apb_ref,  $ave_frs_ref, $std_frs_ref,  $ave_frc_ref, $std_frc_ref, $tag) = @_;

    my $ave_time   = 0;
    my $std_time   = 0;
    my $ave_abl    = 0;
    my $std_abl    = 0;
    my $ave_alpha  = 0;
    my $std_alpha  = 0;
    my $ave_beta   = 0;
    my $std_beta   = 0;
    my $ave_lambda = 0;
    my $std_lambda = 0;
    my $ave_mu     = 0;
    my $std_mu     = 0;
    my $ave_ip     = 0;
    my $std_ip     = 0;
    my $ave_like   = 0;
    my $std_like   = 0;
    my $ave_ttr    = 0;
    my $std_ttr    = 0;
    my $ave_apb    = 0;
    my $std_apb    = 0;
    my $ave_frs    = 0;
    my $std_frs    = 0;
    my $ave_frc    = 0;
    my $std_frc    = 0;

    my $nboot = 1;

    my $phylipinputfile     = "$msafile.$tag.phylipinput";
    my $phylipscreenoutfile = "$msafile.$tag.phylipscreenout";
    my $phyliptimefile      = "$msafile.$tag.phyliptimefile";

    # make phylip inputfile
    open(IN, ">$phylipinputfile");
    print IN "$msafile\n";

    if (!$opt_o) {
	if ($opt_e || $opt_D) {
	    if    ($opt_A) { print IN "6\n"; }
	    elsif ($opt_B) { print IN "6\nB\n"; }	
	    elsif ($opt_C) { print IN "7\n"; }   # dnaml-erate option to optimize 2 param
	}
    }
    else {
	if    ($phylipmode == 0) { }
	elsif ($phylipmode == 1) { } # dnaml-erate option to optimize 2 param
	elsif ($phylipmode == 2) { print IN "7\n"; } # dnaml-erate option to optimize 4 param
    }
    print IN "Y\n";
    close (IN);
    if ($verbose) {system ("more $phylipinputfile\n"); }

    # run PHYLIP
    # 
    my $cmd = "$phylipdir$phylip < $phylipinputfile > $phylipscreenoutfile";
    system("echo $phylipdir$phylip\n"); 
    system("(time -p $cmd) 2> $phyliptimefile\n"); 
    system("mv outfile $phylipoutfile\n");
    system("mv outtree $phyliptreefile\n");

    if ($verbose) {
	print "\nPHYLIP $phylip\n";
	system("more $phylipscreenoutfile\n");
	system("more $phylipoutfile\n");
    }

    # extract average time
    extract_ave_time($phyliptimefile, \$ave_time, \$std_time);

    # extract average parameter
    extract_ave_param($phylip, $nboot, $phylipoutfile, 
		      \$ave_alpha, \$std_alpha, \$ave_beta, \$std_beta, 
		      \$ave_lambda, \$std_lambda, \$ave_mu, \$std_mu, \$ave_ip, \$std_ip,
		      \$ave_like, \$std_like, \$ave_ttr, \$std_ttr, 
		      \$ave_apb, \$std_apb, \$ave_frs, \$std_frs, \$ave_frc, \$std_frc);

    # extract average abl
    extract_ave_abl($phylip, $nboot, $phyliptreefile, \$ave_abl, \$std_abl);

    system("rm $phylipinputfile\n");
    system("rm $phylipscreenoutfile\n");
    system("rm $phyliptimefile\n");
    
    $$ave_time_ref   = $ave_time;
    $$std_time_ref   = $std_time;
    $$ave_abl_ref    = $ave_abl;
    $$std_abl_ref    = $std_abl;
    $$ave_alpha_ref  = $ave_alpha;
    $$std_alpha_ref  = $std_alpha;
    $$ave_beta_ref   = $ave_beta;
    $$std_beta_ref   = $std_beta;
    $$ave_lambda_ref = $ave_lambda;
    $$std_lambda_ref = $std_lambda;
    $$ave_mu_ref     = $ave_mu;
    $$std_mu_ref     = $std_mu;
    $$ave_ip_ref     = $ave_ip;
    $$std_ip_ref     = $std_ip;
    $$ave_like_ref   = $ave_like;
    $$std_like_ref   = $std_like;
    $$ave_ttr_ref    = $ave_ttr;
    $$std_ttr_ref    = $std_ttr;
    $$ave_apb_ref    = $ave_apb;
    $$std_apb_ref    = $std_apb;
    $$ave_frs_ref    = $ave_frs;
    $$std_frs_ref    = $std_frs;
    $$ave_frc_ref    = $ave_frc;
    $$std_frc_ref    = $std_frc;
    
}

sub seqboot {
    my ($nboot, $msaname, $msabootfile) = @_;

    my $seqbootinputfile = "$msaname.boot$nboot.seqboot.input";
    my $seqbootscreenout = "$msaname.boot$nboot.seqboot.screenont";

    my $oddran = int(rand()*100)*2 + 1;

    # make seqboot inputfile
    open(IN, ">$seqbootinputfile");
    print IN "$msaname\n";
    print IN "R\n"; 
    print IN "$nboot\n"; 
    print IN "Y\n";
    print IN "$oddran\n";
    close (IN);
    if ($verbose) {system ("more $seqbootinputfile\n"); }

    # run SEQBOOT
    # 
    my $cmd = "$seqboot < $seqbootinputfile > $seqbootscreenout";
    system("$cmd\n"); 
    system("mv outfile $msabootfile\n");

    if ($verbose) {
	system ("more $seqbootscreenout\n"); 
	system ("more $msabootfile\n"); 
    }

    system("rm $seqbootinputfile\n");
    system("rm $seqbootscreenout\n");
}

sub shuffle_msa {
    my ($msafile, $shuffled_msafile) = @_;

    my @msa;
    my @name;
    my $nseq;
    my $alen;

    my @msa_col;
    my @shuffle_msa_col;
    my @shuffle_msa;

    parse_msa ($msafile, \$nseq, \$alen, \@msa, \@name);

    msa_row_2_col($nseq, $alen, \@msa, \@msa_col);

    my $max_count = 500;
    my $count = 0;
    while ($count < $max_count) {
	for (my $i = 0; $i < $alen; $i++) {
	    my $s = int(rand()*($alen-1));
	    
	    if ($s >= $alen || $s < 0) { print "bad shuffling\n"; die; }
	    
	    $shuffle_msa_col[$s] = $msa_col[$i];
	    $shuffle_msa_col[$i] = $msa_col[$s];
	}
	$count++;
    }

    msa_col_2_row($nseq, $alen, \@shuffle_msa_col, \@shuffle_msa);
    
    write_msa_to_file($shuffled_msafile, $nseq, \@shuffle_msa, \@name);    
}

sub sqhash_table {
    my ($nhtree) = @_;

    my %sqhash;
    my $name;
    my $idx = 0;

    my @nhtree = split(/\,/,$nhtree);
    for (my $n = 0; $n <= $#nhtree; $n ++) {
	if ($nhtree[$n] =~ /([^\(\)\,\:]+)\:/) { 
	    $name = $1;
	    $sqhash{$name} = $idx++;
	}
	else { print " you should've found a taxon!\n"; die; }
    }

    if ($verbose) {
	print "\nnames hash\n";
	foreach my $key (keys(%sqhash)) {
	    
	    print "$key $sqhash{$key}\n";
	}
    }
    
    return %sqhash;
}

sub tree_abl {
    my ($tree) = @_;
    
    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;
    my $nbranch = 2*$nnode; # it's a binary tree

    my $abl = tree_tbl($tree)/$nbranch;

    if ($verbose) {
	print_tree($tree);
	printf "\nTBL %f\n", tree_tbl($tree);
    }
          
    return $abl;
}

# /* Function:  esl_tree_RenumberNodes()
#  * Synopsis:  Assure nodes are numbered in preorder.
#  * Incept:    SRE, Fri Oct 27 09:33:26 2006 [Janelia]
#  *
#  * Purpose:   Given a tree <T> whose internal nodes might be numbered in
#  *            any order, with the sole requirement that node 0 is the
#  *            root; renumber the internal nodes (if necessary) to be in Easel's
#  *            convention of preorder traversal. No other aspect of <T> is
#  *            altered (including its allocation size).
#  *
#  * Returns:   <eslOK> on success.
#  *
#  * Throws:    <eslEMEM> on allocation failure.
#  *
#  * Xref:      STL11/77
#  */
sub tree_renumber_nodes {
    my ($tree_ref) = @_;
    
    my $tree  = $$tree_ref;
    my $tree2 = Tree->new(); 
    my @map;
    my @vs;
    my $v;
    my $new;
    my $needs_rearranging = 0;
    my $ntaxa = $tree->ntaxa;

    # Pass 1. Preorder traverse of T by its children links;
    #         construct map[old] -> new.
    #
    
    push(@vs, $ntaxa);
    $new = 0;
    while ($v = pop(@vs))
    {
	$v -= $ntaxa;
	if ($v != $new) { $needs_rearranging = 1; }
	$map[$v] = $new++;
	
	if (${$tree->right}[$v] > 0) { push(@vs, $ntaxa+${$tree->right}[$v]); } 
	if (${$tree->left}[$v]  > 0) { push(@vs, $ntaxa+${$tree->left}[$v]);  } 
    }
    if ($needs_rearranging == 0) { return; }
    
    # Pass 2. Construct the guts of correctly numbered new T2.
    #         (traversal order doesn't matter here)
    #
    $tree2->{"Tree::ntaxa"} = $ntaxa;
    
    for ($v = 0; $v < $ntaxa-1; $v++)
    {
	${$tree2->parent}[$map[$v]] = $map[${$tree->parent}[$v]];
	if (${$tree->left}[$v]  > 0) { ${$tree2->left}[$map[$v]]  = $map[${$tree->left}[$v]]; } # internal nodes renumbered... 
	else                         { ${$tree2->left}[$map[$v]]  = ${$tree->left}[$v];       } # ...taxon indices unchanged 
	if (${$tree->right}[$v] > 0) { ${$tree2->right}[$map[$v]] = $map[${$tree->right}[$v]];}
	else                         { ${$tree2->right}[$map[$v]] = ${$tree->right}[$v];      }
	${$tree2->ld}[$map[$v]] = ${$tree->ld}[$v];
	${$tree2->rd}[$map[$v]] = ${$tree->rd}[$v];
  
      if (${$tree->left}[$v]  <= 0) { ${$tree2->taxaparent}[-${$tree->left}[$v]]  = $map[$v]; }
      if (${$tree->right}[$v] <= 0) { ${$tree2->taxaparent}[-${$tree->right}[$v]] = $map[$v]; }

    }

    $$tree_ref = $tree2;
}

sub tree_rescale {
    my ($tree_ref, $treenh_ref, $scale) = @_;

    my $tree = $$tree_ref;
    my $treenh;

    tree_target_abl(\$tree, $scale, $verbose);
    
    # now rewrite the NH version of the tree
    tree_WriteNewick($tree, \$treenh);

    $$tree_ref   = $tree;
    $$treenh_ref = $treenh;
}

sub tree_scale_branch {
    my ($tree_ref, $scale) = @_;
    
    my $tree = $$tree_ref;
    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;
    my $nbranch = 2*$nnode; # it's a binary tree
    
    # do the scaling
    for (my $n = 0; $n < $nnode; $n ++) {
	${$tree->ld}[$n] *= $scale;
	${$tree->rd}[$n] *= $scale;
    }
    
    $$tree_ref = $tree;
}

sub tree_target_abl {
    my ($tree_ref, $abl, $verbose) = @_;
    
    my $tol = 0.00001;
    my $tbl;
    my $scale = 1.0;
    my $tree = $$tree_ref;
    
    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;
    my $nbranch = 2*$nnode; # it's a binary tree
    
    $tbl = tree_tbl($tree);
    
    # scaling factor
    if ($tbl > 0) { $scale *= $abl * $nbranch / $tbl; }
    
    # do the scaling
    tree_scale_branch(\$tree, $scale);

    if ($verbose) { 
	printf "original abl %f new abl %f scale %f\n", $tbl/$nbranch, $abl, $scale; 
    }

    #calculate the new tbl
    $tbl = tree_tbl($tree);
    if (abs($tbl - $abl * $nbranch) > $tol) { 
	printf "tree_target_abl(): bad rescaling %f %f\n", $tbl, $abl*$nbranch; die; 
    }

    $$tree_ref = $tree;
}

sub tree_tbl {
    my ($tree) = @_;
    
    my $tbl = 0.0;
    
    my $ntaxa = $tree->{"Tree::ntaxa"};
    my $nnode = ($ntaxa > 1)? $ntaxa-1 : $ntaxa;
    my $nbranch = 2*$nnode; # it's a binary tree
    
    #calculate the tbl
    for (my $n = 0; $n < $nnode; $n ++) {
	$tbl += ${$tree->ld}[$n];
	$tbl += ${$tree->rd}[$n];
    }
    
    return $tbl;
}

# Function:  esl_tree_WriteNewick()
# Incept:    SRE, Fri Oct  6 14:35:51 2006 [Janelia]
#
# Purpose:   Writes tree <T> to stream <fp> in Newick format.
#  
#            Certain options are set in <T> to control output style.
#            If <T->show_unrooted> is <TRUE>, <T> is printed as an
#            unrooted tree starting with a trifurcation, a la PHYLIP
#            format (default=<FALSE>). If <T->show_node_labels> is
#            <TRUE>, then labels are shown for internal nodes, if any
#            are available (default=<TRUE>). If
#            <T->show_branchlengths> is <TRUE>, then branch lengths
#            are shown, as opposed to just printing a labeled
#            topology (default=<TRUE>). If
#            <T->show_root_branchlength> is also <TRUE>, then a 0.0
#            branchlength is shown to the root node, a la Hein's
#            TreeAlign Newick format (default=<FALSE>). If
#            <T->show_quoted_labels> is <TRUE>, then all labels are
#            shown in Newick's quoted format, as opposed to only
#            using quoted labels where necessary (default=<FALSE>).
#
# Returns:   <eslOK> on success.
#
# Throws:    <eslEMEM> on allocation error.
#            <eslEINCONCEIVABLE> on internal error.
#
# Xref:      STL11/74
#
sub tree_WriteNewick {
    
    my ($tree, $treenh_ref, $sqhash_ref) = @_;
    
    my $treenh = "";
    my @vs;
    my @cs;
    my $v;
    my $c;

    # Initialization.
    # Push a trifurcation (swallowing the right internal node) if unrooted;
    # else push the first bifurcation.
    # 
    # When we push a trifurcation, the branch lengths will come out fine
    # on output, if the tree followed the correct convention of having
    # a T->rd[0] = 0.0.
    #
    $treenh .= "(";
    
    if (${$tree->right}[0] > 0)
    {
	$v = ${$tree->right}[0];
	push(@cs, "x");
	push(@vs, ${$tree->right}[$v]);
	push(@cs, ",");
	push(@cs, "x");
	push(@vs, ${$tree->left}[$v]);
    }
    else 
    {
	push(@cs, "x");
	push(@vs, ${$tree->right}[0]);
    }
    push(@cs, ",");
    push(@cs, "x");
    push(@vs, ${$tree->left}[0]);
    
    
    # Main iteration. Pop off stacks 'til they're empty.
    #
    while ($c = pop(@cs))
    {
	if ($c eq ",") { $treenh .= ","; next; } # comma doesn't have a v stacked with it 
	
	$v = pop(@vs);
	
	if ($c eq "x") {	# a subtree, which could be a node or a taxon: 
	    if ($v > 0)		# internal node 1..N-2
	    {
		$treenh .= "("; 
		push(@cs, ")");
		push(@vs, $v);
		push(@cs, "x");
		push(@vs, ${$tree->right}[$v]);
		push(@cs, ",");
		push(@cs, "x");
		push(@vs, ${$tree->left}[$v]);
	    }
	    else			# taxon -(N-1)..0 
	    { 	    # -v below to convert taxon code to 0..N-1 
		newick_write_taxonlabel(\$treenh, $tree, -$v, $sqhash_ref);
		newick_write_branchlength(\$treenh, $tree,  $v);
	    }
	}
	elsif ($c eq "\)") { # closing an internal node. v > 0 is a node code.
	    $treenh .= ")"; 
	    newick_write_branchlength(\$treenh, $tree, $v);
	}
	else {
	    print "bad state code $c\n"; die;
	}            
    }
    
    # Termination
    #
    $treenh .= ");"; 

    $$treenh_ref = $treenh;
}

# newick_write_branchlength()
#    Writes the branch length #to# <v>.
#    If <v> is negative, it's a leaf; if <v> is positive, it's an internal node.
#    You can't pass the root node 0 to this. 0 always means taxon 0.
#    There is no branch to the root node.
#
sub newick_write_branchlength {
    my ($treenh_ref, $tree, $v) = @_;
    
    my $branchlength;
    
    if ($v <= 0)			# leaf 
    {
	if    (${$tree->left} [${$tree->taxaparent}[-$v]] == $v) { 
	    $branchlength = decimal(\${$tree->ld}[${$tree->taxaparent}[-$v]]); }
	elsif (${$tree->right}[${$tree->taxaparent}[-$v]] == $v) { 
	    $branchlength = decimal(\${$tree->rd}[${$tree->taxaparent}[-$v]]); }
	else                                                     { 
	    print "Can't find branch length\n"; die; }
    }
    else				# internal node 
    {
	if    (${$tree->left }[${$tree->parent}[$v]] == $v) { $branchlength = decimal(\${$tree->ld}[${$tree->parent}[$v]]); }
	elsif (${$tree->right}[${$tree->parent}[$v]] == $v) { $branchlength = decimal(\${$tree->rd}[${$tree->parent}[$v]]); }
	else                                                { print "Can't find branch length\n"; die; }
    }
    
    $$treenh_ref .= ":$branchlength";
}

# newick_write_taxonlabel():
#    Print the label for taxon <v> to stream <fp>.
#    Tries to print label as an unquoted label, then
#    as a quoted label, (then fails).
#    If label isn't available, does nothing.
#    If label contains invalid characters, throws <eslECORRUPT>.
#
sub newick_write_taxonlabel {
    my ($treenh_ref, $tree, $v, $sqhash_ref) = @_;

    $$treenh_ref .= "ta$v";
    $sqhash_ref->{"ta$v"} = $v;    
}

sub write_msa_to_file {
    my ($msafile, $nseq, $msa_ref, $name_ref) = @_;

    my $block = 50;

    my $alen = length($msa_ref->[0]);
    for (my $n = 1; $n < $nseq; $n ++) {
	if ($alen != length($msa_ref->[$n])) {
	    print "write_msa_to_file(): bad alignment\n"; die; 
	}
    }

    open(MSA, ">$msafile");
    print MSA " $nseq $alen\n";
        
    # Make sure that the names of the sequences are all different
    # and with max len 10
    name_normalize($nseq, $name_ref);

    for (my $n = 0; $n < $nseq; $n ++) {
	print MSA "$name_ref->[$n]\t";

	$msa_ref->[$n] =~ s/^(\S{$block})//; print MSA "$1\n";
    }
    
    while (length($msa_ref->[0]) >= $block) {
	print MSA "\n";
	for (my $n = 0; $n < $nseq; $n ++) {
	    $msa_ref->[$n] =~ s/^(\S{$block})//; print MSA "$1\n";
	}
    }
    if (length($msa_ref->[0]) > 0) {
	print MSA "\n";
	for (my $n = 0; $n < $nseq; $n ++) {
	    print MSA "$msa_ref->[$n]\n";
	}
    }
    
    close (MSA);
    
    if ($verbose) { system("more $msafile\n"); }
}

sub write_to_averagesfile {
    my ($averages_file,
	$ave_alen, $std_alen,
	$ave_sqlg, $std_sqlg, 
	$ave_id,   $std_id,
	$ave_mut,  $std_mut,
	$ave_indl, $std_indl,
	$ave_fo,   $std_fo,
	$ave_time, $std_time, $ave_abl, $std_abl, 
	$ave_alpha, $std_alpha, $ave_beta, $std_beta, 
	$ave_lambda, $std_lambda, $ave_mu, $std_mu, 
	$ave_ip, $std_ip, 
	$ave_like, $std_like, $ave_ttr, $std_ttr, 
	$ave_apb, $std_apb, $ave_frs, $std_frs, 
	$ave_frc, $std_frc, $tp, $d1, $d2) = @_;

    print "\nave_alen $ave_alen $std_alen\n";
    print "ave_sqlg $ave_sqlg $std_sqlg\n";
    print "ave_id $ave_id $std_id\n";
    print "ave_MUT $ave_mut $std_mut\n";
    print "ave_INDL $ave_indl $std_indl\n";
    print "ave_indlfreq $ave_fo $std_fo\n";
    print "nave_time $ave_time $std_time\n";
    print "ave_abl $ave_abl $std_abl \n";
    print "ave_alpha $ave_alpha $std_alpha\n";
    print "ave_beta $ave_beta $std_beta\n";
    print "ave_lambda $ave_lambda $std_lambda\n";
    print "ave_mu $ave_mu $std_mu \n";
    print "ave_ip $ave_ip $std_ip \n";
    print "ave_like $ave_like $std_like \n";
    print "ave_ttr $ave_ttr $std_ttr \n";
    print "ave_apb $ave_apb $std_apb \n";
    print "ave_frs $ave_frs $std_frs \n";
    print "ave_frs $ave_frc $std_frc \n";
    print "TP $tp\n";
    print "D1 $d1\n";
    print "D2 $d2\n";

    my $cmd =   "$ave_alen\t$std_alen";
    $cmd   .= "\t$ave_sqlg\t$std_sqlg";
    $cmd   .= "\t$ave_id\t$std_id";
    $cmd   .= "\t$std_id\t$ave_mut";
    $cmd   .= "\t$ave_indl\t$std_indl";
    $cmd   .= "\t$ave_fo\t$std_fo";
    $cmd   .= "\t$ave_time\t$std_time";
    $cmd   .= "\t$ave_abl\t$std_abl";
    $cmd   .= "\t$ave_alpha\t$std_alpha";
    $cmd   .= "\t$ave_beta\t$std_beta";
    $cmd   .= "\t$ave_lambda\t$std_lambda";
    $cmd   .= "\t$ave_mu\t$std_mu";
    $cmd   .= "\t$ave_ip\t$std_ip";
    $cmd   .= "\t$ave_like\t$std_like";
    $cmd   .= "\t$ave_ttr\t$std_ttr";
    $cmd   .= "\t$ave_apb\t$std_apb";
    $cmd   .= "\t$ave_frs\t$std_frs";
    $cmd   .= "\t$ave_frc\t$std_frc";
    $cmd   .= "\t$tp\t$d1\t$d2";

    open(FILE, ">>$averages_file");
    print      "$cmd\n";
    print FILE "$cmd\n";
    close(FILE);
}
