/* erate-generate.c
 *
 * generate sequences/alignments acording to
 * gap-augmented model.
 *
 * ER, Fri Feb 29 09:31:10 EST 2008
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <easel.h>
#include <esl_alphabet.h>
#include <esl_getopts.h>
#include <esl_msa.h>
#include <esl_sqio.h>
#include <esl_tree.h>
#include <esl_wuss.h>
#include <esl_random.h>

#include "erate_evolve.h"


static ESL_OPTIONS options[] = {
 /* name           type             default  env_var range    toggles req   incompat       help                                                  docgroup */
  { "-h",          eslARG_NONE,     FALSE,   NULL,   NULL,      NULL,   NULL, NULL,                    "show help and usage",                                 0 },
  { "-v",          eslARG_NONE,     FALSE,   NULL,   NULL,      NULL,   NULL, NULL,                    "be verbose?",                                         0 },
  { "--node",      eslARG_NONE,     FALSE,   NULL,   NULL,      NULL,   NULL, NULL,                    "include internal nodes in the alignment",             0 },
  { "--phylip",    eslARG_NONE,     FALSE,   NULL,   NULL,      NULL,   NULL, NULL,                    "write Phylip format (default Stockholm format)",      0 },
  { "--ttr",       eslARG_REAL,      "2.0",  NULL,   "x>0",     NULL,   NULL, NULL,                    "F84 transitions to transversion ratio",               0 },
  { "--ins",       eslARG_REAL,      "0.0",  NULL,   "x>=0",    NULL,   NULL, NULL,                    "insertion rate value",                                0 },
  { "--del",       eslARG_REAL,      "0.0",  NULL,   "x>=0",    NULL,   NULL, NULL,                    "deletion rate value",                                 0 },
  { "--bernoulli", eslARG_REAL,      "0.99", NULL,   "0<x<1",   NULL,   NULL, NULL,                    "probability parameter ancestral length distribution", 0 },
  { "--fixlen",    eslARG_INT,       "1000", NULL,   "n>0",     NULL,   NULL, "--bernoulli,--meanlen", "exact length of ancestral sequences",                 0 },
  { "--meanlen",   eslARG_INT,       "1000", NULL,   "n>0",     NULL,   NULL, "--bernoulli,--fixlen",  "expected length of ancestral sequences",              0 },
  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
};

static char usage[] = "\
Usage: ./erate-generate [-options] <tree> <outputfile>\n\
 where <tree> is a Newick formated tree.\n\
 Available options are:\n\
  -v       :       be verbose\n\
  --node   :       include internal nodes in the alignment\n\
  --phylip :       write alignment in Phylip format(default Stockholm format)\n\
  --ttr <x>:       F84 transitions to transversion ratio value to <x>\n\
  --ins <x>:       insertion rate value to <x>\n\
  --del <x>:       deletion rate value to <x>\n\
  --bernoulli <x>: probability parameter ancestral length distribution to <x>\n\
  --fixlen <n>:    exact length of ancestral sequences to <n>\n\
  --meanlen <n>:   expected length of ancestral sequences to <n>\n\
";

static void msa_sample(ERATE *erate, ESL_TREE *T, ESL_MSA **ret_msa, int incnode, int be_verbose);

int
main(int argc, char **argv)
{
  char         errbuf[eslERRBUFSIZE];
  ESL_GETOPTS  *go;
  char         *treefile;
  char         *outfile;
  FILE         *treefp;
  FILE         *fp;
  ERATE        *erate;
  ESL_TREE     *T;
  ESL_MSA      *msa; /* alignment of leaf sequences */
  double       *frq;
  double        ttr;
  double        ins;
  double        del;
  double        bernoulli;
  int           meanlen;
  int           fixlen;
  int           isfixlen = FALSE;
  int           incnode;
  int           phylip;
  int           dim = 4;
  int           be_verbose;

  /* Process command line
   */
  go = esl_getopts_Create(options);
  if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf);
  if (esl_opt_VerifyConfig(go)               != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf);

  if (esl_opt_GetBoolean(go, "-h") == TRUE) {
    puts(usage); 
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    return 0;
  }
  if (esl_opt_ArgNumber(go) != 2) esl_fatal("Incorrect number of command line arguments.\n%s\n", usage);
 
  be_verbose = esl_opt_GetBoolean(go, "-v");
  incnode    = esl_opt_GetBoolean(go, "--node");
  phylip     = esl_opt_GetBoolean(go, "--phylip");
  ttr        = esl_opt_GetReal(go, "--ttr");
  ins        = esl_opt_GetReal(go, "--ins");
  del        = esl_opt_GetReal(go, "--del");
  bernoulli  = esl_opt_GetReal(go, "--bernoulli");
  fixlen     = esl_opt_GetInteger(go, "--fixlen");
  meanlen    = esl_opt_GetInteger(go, "--meanlen");

  treefile = esl_opt_GetArg(go, 1);
  outfile  = esl_opt_GetArg(go, 2);

  esl_getopts_Destroy(go);

  if (meanlen) bernoulli = (double)meanlen/((double)meanlen+1.0);
  if (fixlen)  {
    isfixlen = TRUE;
    bernoulli = (double)fixlen/((double)fixlen+1.0);
  }

  dim = 4;
  frq = malloc(sizeof(double) * dim);
  frq[0] = 0.25;
  frq[1] = 0.25;
  frq[2] = 0.25;
  frq[3] = 0.25;

  /* Open tree input file and read in Newick tree. */
  if ((treefp = fopen(treefile, "r")) == NULL)
    esl_fatal("couldn't open tree file %s", treefile);
  if (esl_tree_ReadNewick(treefp, errbuf, &T) != eslOK) 
    esl_fatal("Failed to read tree: %s", errbuf);
  if (esl_tree_Validate(T, NULL) != eslOK) 
    esl_fatal("input tree does not validate.");

  /* Open the output file. */
  if ((fp = fopen(outfile, "w")) == NULL) 
    esl_fatal("failed to open %s for output", outfile);

  /* Create the evolutionary model with gaps */
  erate = Erate_Create(dim, ttr, ins, del, bernoulli, frq, isfixlen, errbuf);
  if (be_verbose) Erate_Dump(stdout, erate);

  /* This is it, generate the alignment */
  msa_sample(erate, T, &msa, incnode, be_verbose);

  /* redo if alignment of leave ends up having length zero */
  while (msa->alen == 0) {
    esl_msa_Destroy(msa);
    msa_sample(erate, T, &msa, incnode, be_verbose);
  }

  if (phylip) {
    Write_Phylip(fp, msa);
    if (be_verbose) 
      Write_Phylip(stdout, msa);
  }
  else {
    esl_msa_Write(fp, msa, eslMSAFILE_STOCKHOLM); 
    if (be_verbose) 
      esl_msa_Write(stdout, msa, eslMSAFILE_STOCKHOLM);    
  }
  
  free(frq);
  Erate_Destroy(erate);
  esl_tree_Destroy(T);
  esl_msa_Destroy(msa);
    
  fclose(treefp);
  fclose(fp);
  exit (0);
}

void
msa_sample (ERATE *erate, ESL_TREE *T, ESL_MSA **ret_msa, int incnode, int be_verbose)
{
  ESL_MSA *msa;     /* alignment of leaves sequences */
  ESL_MSA *msafull; /* alignment of leaves and internal node sequences */
  int     *useme = NULL;
  int      i;

  /* Generate the alignment */
  if (Erate_Generate_Alignment(erate, T, &msafull,  be_verbose) != eslOK)
    esl_fatal("failed to generate the alignment");
  if (be_verbose) 
    esl_msa_Write(stdout, msafull, eslMSAFILE_STOCKHOLM);
  
  /* The leaves-only alignment */
  useme = malloc(msafull->nseq * sizeof(int));
  for (i = 0; i < msafull->nseq; i++) {
    if (!incnode && strcmp(msafull->sqname[i], "v") == TRUE) 
      useme[i] = FALSE; 
    else                                      
      useme[i] = TRUE;
  }
  if (esl_msa_SequenceSubset(msafull, useme, &msa) != eslOK)
    esl_fatal("failed to generate leaf alignment");
  if (esl_msa_MinimGaps(msa, NULL, "-", FALSE) != eslOK) 
    esl_fatal("failed to generate leaf alignment");
  
  *ret_msa = msa;

  free(useme);
  esl_msa_Destroy(msafull);
}
