/* erate-tree.c
 *
 * generate a random tree.
 *
 * ER, Fri Feb 29 09:31:10 EST 2008
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <easel.h>
#include <esl_getopts.h>
#include <esl_tree.h>
#include <esl_random.h>

static ESL_OPTIONS options[] = {
 /* name           type             default  env_var range    toggles req   incompat       help                            docgroup */
  { "-h",          eslARG_NONE,     FALSE,   NULL,   NULL,    NULL,   NULL, NULL,          "show help and usage",          0 },
  { "-v",          eslARG_NONE,     FALSE,   NULL,   NULL,    NULL,   NULL, NULL,          "be verbose?",                  0 },
  { "--abl",       eslARG_REAL,     "0.1",   NULL,   "x>0",   NULL,   NULL, NULL,          "tree's average branch length", 0 },
  { "--ntaxa",     eslARG_INT,      "8",     NULL,   "n>0",   NULL,   NULL, NULL,          "ntaxa",                        0 },
  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
};

static char usage[] = "\
Usage: ./erate-tree [-options] <outputfile>\n\
 where <tree> is a Newick formated tree.\n\
 Available options are:\n\
  -v       :       be verbose\n\
  --abl <x>:       tree's average branch length to <x>\n\
  --ntaxa <n>:     ntaxa to <n>\n\
";

static double esl_tree_AverageBranchLength(ESL_TREE *T);
static int    esl_tree_Rescale(double target_abl, ESL_TREE **ret_T);

double
esl_tree_AverageBranchLength(ESL_TREE *T)
{
  double abl = 0.0;
  int    nnode;
  int    nbranch;
  int    n;

  nnode = (T->N > 1)? T->N-1 : T->N;
  nbranch = 2*nnode; /*it's a binary tree */
  
  /* calculate the abl */
  for (n = 0; n < nnode; n ++) {
    abl += T->ld[n];
    abl += T->rd[n];
  }
  
  abl /= nbranch;

  return abl;
}

int
esl_tree_Rescale(double target_abl, ESL_TREE **ret_T)
{
  ESL_TREE *T;
  double abl;
  int    nnode;
  double scale = 1.0;
  double tol = 0.0000001;
  int    n;
  
  T = *ret_T;

  nnode = (T->N > 1)? T->N-1 : T->N;
  
  abl = esl_tree_AverageBranchLength(T);

  /* scaling factor */
  if (abl > 0.0) scale *= target_abl / abl; 
    
  /* do the scaling of branches */
  for (n = 0; n < nnode; n ++) {
    T->ld[n] *= scale;
    T->rd[n] *= scale;
  }
  
  /* paranoia */
    abl = esl_tree_AverageBranchLength(T);
    if (abs(abl - target_abl) > tol) {
      printf("esl_tree_Rescale(): bad rescaling abl=%f target_abl=%f \n", abl, target_abl); 
      return eslFAIL;
    }    
    
  *ret_T = T;
  
  return eslOK;
}

int
main(int argc, char **argv)
{
  char           *msg = "erate-tree failed.";
  char           *outfile;
  FILE           *fp;
  ESL_GETOPTS    *go;
  ESL_RANDOMNESS *r = NULL;
  ESL_TREE       *T;
  double          abl;
  int             ntaxa;
  int             be_verbose;

  /* Process command line
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf);

  if (esl_opt_GetBoolean(go, "-h") == TRUE) {
    puts(usage); 
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    return 0;
  }
  if (esl_opt_ArgNumber(go) != 1) esl_fatal("Incorrect number of command line arguments.\n%s\n", usage);
 
  be_verbose = esl_opt_GetBoolean(go, "-v");
  abl        = esl_opt_GetReal(go, "--abl");
  ntaxa      = esl_opt_GetInteger(go, "--ntaxa");

  outfile  = esl_opt_GetArg(go, 1);

  esl_getopts_Destroy(go);

  /* Open the output file. */
  if ((fp = fopen(outfile, "w")) == NULL) 
    esl_fatal("failed to open %s for output", outfile);

  r = esl_randomness_CreateTimeseeded();
 
  if (esl_tree_Simulate(r, ntaxa, &T) != eslOK) esl_fatal(msg);
  if (esl_tree_Rescale(abl, &T)       != eslOK) esl_fatal(msg);
  if (esl_tree_Validate(T, NULL)      != eslOK) esl_fatal(msg);

  if (be_verbose) esl_tree_WriteNewick(stdout, T);
  esl_tree_WriteNewick(fp, T);

  esl_tree_Destroy(T);
  fclose(fp);
  return eslOK;
}
