/*
 * $Id: evolve.c,v 1.33 2000/04/11 15:53:46 dirk Exp $ 
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_MATH_H
#include <math.h>
#endif

#include "global.h"
#include "lslist.h"
#include "mxerror.h"
#include "mxparser.h"
#include "evolve.h"

#define DEBUG

#define DEBUG_TREE_BUILDING 5
#define DEBUG_INSERTION 6
#define DEBUG_DELETION 6
#define DEBUG_CHAR_CREATION 7
#define DEBUG_CHAR_MUTATION 8
#define DEBUG_CHAR_MUTATION_INTERNAL 9
#define DEBUG_INSERT_INTERNAL 5
#define NOT_NOW 999
#define DEBUG_EVOL 5

#define MAX_INSERTLEN 10
#define INS_FREQUENCY 1

#define MAT_ARRAY_SIZE 16
/* array of matrices if no DNA-Model was given */
double *** MatArray;
double **  tempMatrix = NULL;
/* just a flag */
int float_warning = FALSE;

/* local defs, the outside world doesn't see this */
editop_u* insert_chars(node* n);
editop_u* insert_chars_er(node* n, int sim_coding);
editop_u* delete_chars(node* n);
editop_u* delete_chars_er(node* n, int sim_coding);
void evol(node *old, node *new);
void evol_er(node *old, node *new, int sim_coding);
void delete(char* sequence);
void insert(char* sequence);
double get_random_double(double hi);
char get_random_char2(char old_value, double dist);
int build_matrix(double t);
void mult_matrix(double **m1, double **m2);
void copy_matrix(double **m1, double **m2);
double return_max(double **matrix);
void normalize_matrix(double **matrix);

/* -------------------------------------------------------------- 
   FillNodes
   
   --------------------------------------------------------------  */
void
FillNodes(node *father){

  node *child;

  /* fill childs of this node with the correct info */
  for(child=father->childs; child!=NULL; child=child->brother) {

    MXDEBUG(5 MXDELIM
	    "calling evol for father(no:%d,name:%s "
	    "and child(no:%d,name:%s)" MXDELIM
	    father->number MXDELIM father->Name MXDELIM
	    child->number MXDELIM child->Name);
	    
    evol(father,child);
    MXDEBUG(DEBUG_EVOL MXDELIM
	    "[%d]old_seq:%s" MXDELIM
	    father->number MXDELIM father->sequence);
    
    MXDEBUG(DEBUG_EVOL MXDELIM
	    "[%d]new_seq:%s" MXDELIM
	    child->number MXDELIM child->sequence);
    
    FillNodes(child);
  }
  
  return;
}

/* -------------------------------------------------------------- 
   FillNodes_er uses evol_er() instead of evol()
   
   --------------------------------------------------------------  */
void
FillNodes_er(node *father, int sim_coding){

  node  *child;
  int    nchild = 0;

/* fill childs of this node with the correct info */
  for(child=father->childs; child!=NULL; child=child->brother) {

    nchild ++;

    MXDEBUG(5 MXDELIM
	    "calling evol for father(no:%d,name:%s "
	    "and child(no:%d,name:%s)" MXDELIM
	    father->number MXDELIM father->Name MXDELIM
	    child->number MXDELIM child->Name);
	    
    evol_er(father,child, sim_coding);
    MXDEBUG(DEBUG_EVOL MXDELIM
	    "[%d]old_seq:%s" MXDELIM
	    father->number MXDELIM father->sequence);
    
    MXDEBUG(DEBUG_EVOL MXDELIM
	    "[%d]new_seq:%s" MXDELIM
	    child->number MXDELIM child->sequence);
    
    FillNodes_er(child, sim_coding);
  }
  
  return;
}
/* -------------------------------------------------------------- 
   FillTree
   create all the nodes in the tree with the correct 
   interdistances
   --------------------------------------------------------------  */
void 
FillTree(){

  /* copy the Rootsequence to the root of the Tree*/
  TheTree->value.t->sequence=strdup(TheSequence->value.s);

  /* compute length of Rootsequence */
  TheTree->value.t->SequenceLen=strlen(TheTree->value.t->sequence);

  /* for the root node we need Insertion and DeletionFunctions and
     appropriate thresholds as well as mutation*/
  TheTree->value.t->InsertThreshold=TheInsertThreshold->value.d;
  TheTree->value.t->DeleteThreshold=TheDeleteThreshold->value.d;

  TheTree->value.t->MutationProbability=TheMutationProbability->value.dv;
  /* have alloced mem in root */
  TheTree->value.t->MutProbAlloc=TRUE;
  TheTree->value.t->InsertionFunction=TheInsertionFunction->value.dv;
  TheTree->value.t->DeletionFunction=TheDeletionFunction->value.dv;

  /*  TheTree->value.t->Name=strdup("Root");
  TheTree->value.t->number=0; */
    
  /*  call evol for every node 
      walks recursively thru the tree and fills it */
  FillNodes(TheTree->value.t);

  return;
}

/* -------------------------------------------------------------- 
   FillTree_er
   uses FillNodes_er() instead of FillNodes()
   --------------------------------------------------------------  */
void 
FillTree_er(int sim_coding){

  /* copy the Rootsequence to the root of the Tree*/
  TheTree->value.t->sequence=strdup(TheSequence->value.s);

  /* compute length of Rootsequence */
  TheTree->value.t->SequenceLen=strlen(TheTree->value.t->sequence);

  /* for the root node we need Insertion and DeletionFunctions and
     appropriate thresholds as well as mutation*/
  TheTree->value.t->InsertThreshold=TheInsertThreshold->value.d;
  TheTree->value.t->DeleteThreshold=TheDeleteThreshold->value.d;

  TheTree->value.t->MutationProbability=TheMutationProbability->value.dv;
  /* have alloced mem in root */
  TheTree->value.t->MutProbAlloc=TRUE;
  TheTree->value.t->InsertionFunction=TheInsertionFunction->value.dv;
  TheTree->value.t->DeletionFunction=TheDeletionFunction->value.dv;

  /*  TheTree->value.t->Name=strdup("Root");
  TheTree->value.t->number=0; */
    
  /*  call evol for every node 
      walks recursively thru the tree and fills it */
  FillNodes_er(TheTree->value.t, sim_coding);

  return;
}

/* -------------------------------------------------------------- 
   fill the new node
   --------------------------------------------------------------  */
void 
evol(node *old, node *new){
  
  int i;
  editop_u *eop, *insert_eop, *delete_eop;
  char newvalue;
  
  MXDEBUG(DEBUG_TREE_BUILDING MXDELIM
	  "Create new node no. %d from node no. %d" MXDELIM
	  new->number MXDELIM old->number); 
 
  /* init list of edit operations in new list element with NULL */
  new->edits = NULL;
  
  /* copy sequence from old to new */
  new->sequence = strdup (old->sequence); 
  if (new->sequence == NULL) {
    mxError(DO_EXIT,"could't allocate memory");
  }

  /*  fprintf(stdout,"[-1]new->sequence:%s\nold->sequence:%s\n",
      new->sequence,old->sequence); */

  /* set the sequencelen for the new node */
  new->SequenceLen=old->SequenceLen;

  /* for the moment 
     we have added support for differing rates of mutatin per site and
     different rates of insertion and deletion (even Thresholds) for
     different taxa, we are not just yet using it.
     ;-)*/
  new->InsertThreshold=old->InsertThreshold;
  new->DeleteThreshold=old->DeleteThreshold;
  new->DeletionFunction=old->DeletionFunction;
  new->InsertionFunction=old->InsertionFunction;
  new->MutationProbability=old->MutationProbability;
  /* have not alloced mem ourselves */
  new->MutProbAlloc = FALSE;

  /* substitutions along the sequence */
  for(i=0; i< new->SequenceLen; ++i) {
    
    /* get a replacement character for the current one 
       taking into account the site specific mutation rates in
       the MutationProbabilities
       */
    newvalue = get_random_char(new->sequence[i],
			       new->distance * new->MutationProbability[i]);
    
    /* if there was an edit operation do a protocol of the operation */
    if (newvalue != new->sequence[i] ) {
      
      eop = lsAppend( lsListp (new->edits) , sizeof(editop_u));

      /* if the list is empty, the list starts right here */
      if (new->edits==NULL)
	new->edits=eop;

      eop->editop.type = E_OP;
      eop->editop.position = i;
      eop->editop.new_value = newvalue;
      
      /* now we update the "real" sequence stored in this node */
      new->sequence[i] = newvalue;
    }
  }
  /* Handle DELETIONS and INSERTIONS we simply use dist iterations */
  
  for (i=0; i< ROUND(new->distance)*old->SequenceLen ; i++) {
    /* INSERTIONS */
      insert_eop = insert_chars(new);   
     /* DELETIONS */
       delete_eop = delete_chars(new);  
  }  

  MXDEBUG(4 MXDELIM "new node %d with sequence %s len: %d created" MXDELIM
	  new->number MXDELIM new->sequence MXDELIM new->SequenceLen);

  return ;
} /* evol() */


/*--------------------------------------------------------------------/
/ ER, Mon Oct  9 17:00:41 EDT 2006
/ modification of evol(). 
/ 
/ the number of iterations for insertions and deletions has changed
/ from     ROUND(new->distance)*old->SequenceLen
/ to       ROUND(new->distance*old->SequenceLen)
/
/--------------------------------------------------------------------*/
void 
evol_er(node *old, node *new, int sim_coding){
  
  int i;
  editop_u *eop, *insert_eop, *delete_eop;
  char newvalue;
  
  MXDEBUG(DEBUG_TREE_BUILDING MXDELIM
	  "Create new node no. %d from node no. %d" MXDELIM
	  new->number MXDELIM old->number); 
 
  /* init list of edit operations in new list element with NULL */
  new->edits = NULL;
  
  /* copy sequence from old to new */
  new->sequence = strdup (old->sequence); 
  if (new->sequence == NULL) {
    mxError(DO_EXIT,"could't allocate memory");
  }

  /*  fprintf(stdout,"[-1]new->sequence:%s\nold->sequence:%s\n",
      new->sequence,old->sequence); */

  /* set the sequencelen for the new node */
  new->SequenceLen=old->SequenceLen;

  /* for the moment 
     we have added support for differing rates of mutatin per site and
     different rates of insertion and deletion (even Thresholds) for
     different taxa, we are not just yet using it.
     ;-)*/
  new->InsertThreshold=old->InsertThreshold;
  new->DeleteThreshold=old->DeleteThreshold;
  new->DeletionFunction=old->DeletionFunction;
  new->InsertionFunction=old->InsertionFunction;
  new->MutationProbability=old->MutationProbability;
  /* have not alloced mem ourselves */
  new->MutProbAlloc = FALSE;

  /* substitutions along the sequence */
  for(i=0; i< new->SequenceLen; ++i) {
    
    /* get a replacement character for the current one 
       taking into account the site specific mutation rates in
       the MutationProbabilities
       */
    newvalue = get_random_char(new->sequence[i],
			       new->distance * new->MutationProbability[i]);
    
    /* if there was an edit operation do a protocol of the operation */
    if (newvalue != new->sequence[i] ) {
      
      eop = lsAppend( lsListp (new->edits) , sizeof(editop_u));

      /* if the list is empty, the list starts right here */
      if (new->edits==NULL)
	new->edits=eop;

      eop->editop.type = E_OP;
      eop->editop.position = i;
      eop->editop.new_value = newvalue;
      
      /* now we update the "real" sequence stored in this node */
      new->sequence[i] = newvalue;
    }
  }

  /* Handle DELETIONS and INSERTIONS we simply use dist iterations */
  for (i=0; i< ROUND(new->distance*old->SequenceLen); i++) {
    /* INSERTIONS */
    insert_eop = insert_chars_er(new, sim_coding);   
    /* DELETIONS */
    delete_eop = delete_chars_er(new, sim_coding);  
  }  
  
  MXDEBUG(4 MXDELIM "new node %d with sequence %s len: %d created" MXDELIM
	  new->number MXDELIM new->sequence MXDELIM new->SequenceLen);

  return ;
} /* evol_er() */

/* -------------------------------------------------------------- 
   handle possible deletes 
   the change are made to the input sequence 
   -------------------------------------------------------------- */
editop_u* 
delete_chars(node* n) {
  
  int DeletePos, DeleteLen, i, mDelLen, mDelOffset, oldlen;
  double do_delete,*old_vector, RoundFactor;
  double dellen;
  editop_u* eop;
  char *old_sequence;

  /* chance strikes here */
  do_delete = get_random_double(1.0);

  /* only do deletion if value is smaller than threshold */
  if (do_delete < n->DeleteThreshold) {

    /* set the DeletePosition */
    DeletePos=get_random_int(n->SequenceLen);
    
    if (n->SequenceLen==0 || n->MutationProbability[DeletePos] >= 1.0) {
      /* we need to know the cumulated probability for deletion*/
      RoundFactor = n->DeletionFunction[TOTAL];

      /* get another random number for deletion length*/
      dellen = get_random_double(RoundFactor);

      /* compute the deletelength */
      for (DeleteLen=1; 
	   n->DeletionFunction[DeleteLen]<dellen; 
	   ++DeleteLen);
      
      /* we have to make sure we don't delete over the end of the
	 sequence */
      if (DeletePos+DeleteLen > n->SequenceLen) {
	DeleteLen=n->SequenceLen-DeletePos;
      }

      /* we have to make sure that we don't delete into motifs */
      for(i=0;
          i<DeleteLen && n->MutationProbability[DeletePos+i]>=1.0;
          i++)
        /* nothing */ ;
      DeleteLen = i;

      /*      fprintf(stdout,"OLD::\n");
	      for(i=0; i <(n->SequenceLen); i++)
	      fprintf(stdout,"%3.3f ",n->MutationProbability[i]);
	      fprintf(stdout,"\n");*/
      
      /* save the new SequenceLen */
      oldlen = n->SequenceLen;
      n->SequenceLen-=DeleteLen;
      
      MXDEBUG(DEBUG_DELETION MXDELIM
	      "Deletion of length %d at position %d, len=%d" MXDELIM
	      DeleteLen MXDELIM DeletePos MXDELIM (n->SequenceLen));
      
      old_sequence=strdup(n->sequence);
      
      n->sequence=realloc(n->sequence,(n->SequenceLen)+1);
      if (n->sequence ==NULL ) 
	mxError(DO_EXIT,"couldn't allocate memory");
      
      /* do the copying */
      strncpy(n->sequence,old_sequence,DeletePos);
      strcpy((n->sequence)+DeletePos,old_sequence+DeletePos+DeleteLen);
      
      free(old_sequence); 
      
      /* 
	 save the shortened MutationProbability vector
	 */
      
      /* first we rescue the old data*/
      old_vector=n->MutationProbability;
      
      /* for the sake of simplicity */
      mDelLen=DeleteLen*sizeof(double);
      mDelOffset=DeletePos*sizeof(double);

      /* now allocate space for the new shortened vector */
      n->MutationProbability=(double *)malloc(MAX(1,n->SequenceLen)*
						 sizeof(double));
      
      if ( n->MutationProbability == NULL )  
 	mxError(DO_EXIT,"couldn't allocate memory for MutationProbability"); 


      
      /* copy the part before and after the deletion */
      memcpy(n->MutationProbability,
	     old_vector,
	     DeletePos * sizeof(double));
      
      memcpy(&(n->MutationProbability[DeletePos]),
	     &(old_vector[DeletePos+DeleteLen]),
	     (oldlen-(DeletePos+DeleteLen)) * sizeof(double));
      
      /* copy code as loop construction */
      /*
	{
	int j,i;
	
	for(j=0;j<DeletePos;j++)
	n->MutationProbability[j] = old_vector[j];
	
	for(i=DeletePos+DeleteLen;i<oldlen;i++)
	n->MutationProbability[j++] = old_vector[i];
	
	}
      */

      /* free old mem if we allocated it for this node */
      if(n->MutProbAlloc == TRUE)
	free(old_vector);
      /* we definitely allocated the mem this time */
      n->MutProbAlloc = TRUE;

      /* get the space for eop 
	 and append it to the existing lists of edit for this node*/
      eop = lsAppend(lsListp(n->edits), sizeof(editop_u));

      /* if the list is empty, the list starts right here */
      if (n->edits==NULL)
	n->edits=eop;

      /* return pointer to delete operation */
      eop->delop.type=D_OP;
      eop->delop.position = DeletePos;
      eop->delop.length = DeleteLen;

      return eop;
    } /* if deletion_probability */
  }
  
  /* if no deletions */
  return NULL;
}
  
/* -------------------------------------------------------------- 
   handle possible deletes 
   the change are made to the input sequence 

   ER: delete_chars_er() adds the option of simulating indels
   in coding regions by not breaking the frame.
   -------------------------------------------------------------- */
editop_u* 
delete_chars_er(node* n, int sim_coding) {
  
  int DeletePos, DeleteLen, i, mDelLen, mDelOffset, oldlen;
  double do_delete,*old_vector, RoundFactor;
  double dellen;
  editop_u* eop;
  char *old_sequence;

  /* chance strikes here */
  do_delete = get_random_double(1.0);

  /* only do deletion if value is smaller than threshold */
  if (do_delete < n->DeleteThreshold) {

    /* set the DeletePosition */
    DeletePos=get_random_int(n->SequenceLen);
    
    /* ER: if simulating indels in coding regions, add indels
       in frame.
    */
    if (sim_coding && DeletePos/3. != (int)(DeletePos/3.)) return NULL;
    
    if (n->SequenceLen==0 || n->MutationProbability[DeletePos] >= 1.0) {
      /* we need to know the cumulated probability for deletion*/
      RoundFactor = n->DeletionFunction[TOTAL];
  
      /* get another random number for deletion length*/
      dellen = get_random_double(RoundFactor);

      /* compute the deletelength */
      for (DeleteLen=1; 
	   n->DeletionFunction[DeleteLen]<dellen; 
	   ++DeleteLen);
      
      /* we have to make sure we don't delete over the end of the
	 sequence */
      if (DeletePos+DeleteLen > n->SequenceLen) {
	DeleteLen=n->SequenceLen-DeletePos;
      }

      /* we have to make sure that we don't delete into motifs */
      for(i=0;
          i<DeleteLen && n->MutationProbability[DeletePos+i]>=1.0;
          i++)
        /* nothing */ ;
      DeleteLen = i;

      /*      fprintf(stdout,"OLD::\n");
	      for(i=0; i <(n->SequenceLen); i++)
	      fprintf(stdout,"%3.3f ",n->MutationProbability[i]);
	      fprintf(stdout,"\n");*/
      
      /* save the new SequenceLen */
      oldlen = n->SequenceLen;
      n->SequenceLen-=DeleteLen;
      
      MXDEBUG(DEBUG_DELETION MXDELIM
	      "Deletion of length %d at position %d, len=%d" MXDELIM
	      DeleteLen MXDELIM DeletePos MXDELIM (n->SequenceLen));
      
      old_sequence=strdup(n->sequence);
      
      n->sequence=realloc(n->sequence,(n->SequenceLen)+1);
      if (n->sequence ==NULL ) 
	mxError(DO_EXIT,"couldn't allocate memory");
      
      /* do the copying */
      strncpy(n->sequence,old_sequence,DeletePos);
      strcpy((n->sequence)+DeletePos,old_sequence+DeletePos+DeleteLen);
      
      free(old_sequence); 
      
      /* 
	 save the shortened MutationProbability vector
	 */
      
      /* first we rescue the old data*/
      old_vector=n->MutationProbability;
      
      /* for the sake of simplicity */
      mDelLen=DeleteLen*sizeof(double);
      mDelOffset=DeletePos*sizeof(double);

      /* now allocate space for the new shortened vector */
      n->MutationProbability=(double *)malloc(MAX(1,n->SequenceLen)*
						 sizeof(double));
      
      if ( n->MutationProbability == NULL )  
 	mxError(DO_EXIT,"couldn't allocate memory for MutationProbability"); 


      
      /* copy the part before and after the deletion */
      memcpy(n->MutationProbability,
	     old_vector,
	     DeletePos * sizeof(double));
      
      memcpy(&(n->MutationProbability[DeletePos]),
	     &(old_vector[DeletePos+DeleteLen]),
	     (oldlen-(DeletePos+DeleteLen)) * sizeof(double));
      
      /* copy code as loop construction */
      /*
	{
	int j,i;
	
	for(j=0;j<DeletePos;j++)
	n->MutationProbability[j] = old_vector[j];
	
	for(i=DeletePos+DeleteLen;i<oldlen;i++)
	n->MutationProbability[j++] = old_vector[i];
	
	}
      */

      /* free old mem if we allocated it for this node */
      if(n->MutProbAlloc == TRUE)
	free(old_vector);
      /* we definitely allocated the mem this time */
      n->MutProbAlloc = TRUE;

      /* get the space for eop 
	 and append it to the existing lists of edit for this node*/
      eop = lsAppend(lsListp(n->edits), sizeof(editop_u));

      /* if the list is empty, the list starts right here */
      if (n->edits==NULL)
	n->edits=eop;

      /* return pointer to delete operation */
      eop->delop.type=D_OP;
      eop->delop.position = DeletePos;
      eop->delop.length = DeleteLen;

      return eop;
    } /* if deletion_probability */
  }
  
  /* if no deletions */
  return NULL;
}
  
/* -------------------------------------------------------------- 
   handle possible inserts
   the changes are made to the input sequence 
   -------------------------------------------------------------- */
editop_u* 
insert_chars(node *n) {

  char *old_sequence;
  double do_insert, RoundFactor, *old_vector, inslen;
  int InsertPos, InsertLen,i, old_seqlen;
  editop_u *eop;

  /* save the old length, we are gonna need it */
  old_seqlen=n->SequenceLen;

  /* chance strikes here */
  do_insert = get_random_double(1.0);

  /* only do insertion if value is smaller than threshold */  
  if (do_insert < n->InsertThreshold){

    /* set the insertposition */
    InsertPos = get_random_int(n->SequenceLen);
    
    printf("INNN %d %f %d\n", InsertPos, InsertPos/3.0, (int)(InsertPos/3.));
    if (InsertPos/3. != (int)(InsertPos/3.)) return NULL;
    printf("\n SI\n");

    /* only do this if the site is rite */
    if (n->SequenceLen==0 || n->MutationProbability[InsertPos] >=1.0) {

      /* we need to know the cumulated probability for insertion*/
      RoundFactor = n->InsertionFunction[TOTAL];
      
      /* get another random number for insertlength*/
      inslen = get_random_double(RoundFactor);      

      /* set the insertlength */
      for (InsertLen=1; 
          n->InsertionFunction[InsertLen]<inslen; 
          ++InsertLen);
      /* nothing */
      
      MXDEBUG(DEBUG_INSERT_INTERNAL MXDELIM
	      "InsertLen:%d, do_insert=%f" MXDELIM
	      InsertLen MXDELIM do_insert);

      /* save the new SequenceLen */
      n->SequenceLen=(n->SequenceLen)+InsertLen;   

      MXDEBUG(DEBUG_INSERTION MXDELIM
	      "Insertion (do_insert=%f) of length %d at position %d: " MXDELIM
	      do_insert MXDELIM InsertLen MXDELIM InsertPos);
    
      /* copy the sequence up to the InsertPos */
      /* fill the gap with randomly chosen characters */
      /* copy the sequence after the Insert */
      old_sequence=strdup(n->sequence);
      n->sequence=realloc(n->sequence,(n->SequenceLen)+1);
      
      if(n->sequence == NULL) 
          mxError(DO_EXIT,"couldn't allocate memory");

      strncpy(n->sequence,old_sequence,InsertPos);

      for(i=InsertPos; i< (InsertPos+InsertLen) ; ++i)  
          n->sequence[i]=get_random_char(ZUFALL, 0);  

      strcpy((n->sequence)+InsertPos+InsertLen,old_sequence+InsertPos); 
      
      free(old_sequence);
      
      /* copy and add to n->MutationProbability */
      old_vector=n->MutationProbability;
  
      n->MutationProbability=malloc(n->SequenceLen*sizeof(double));
      if (n->MutationProbability ==NULL ) 
	  mxError(DO_EXIT,"couldn't allocate memory");

      /* copy up to the insert */
      for(i=0; i<InsertPos; i++) {
	  n->MutationProbability[i]=old_vector[i];
      }
      /* fill the insertion */
      for(i=InsertPos; i<(InsertPos+InsertLen); i++){
	  n->MutationProbability[i]=1.0;
      }

      /* now copy the rest of the vector
	 here we need the old length */
      for( i=InsertPos; i<old_seqlen; i++){
	  n->MutationProbability[i+InsertLen]=old_vector[i];
      }

      /* free old mem if we allocated it for this node */
      if(n->MutProbAlloc == TRUE)
          free(old_vector);
      /* we definitely allocated the mem this time */
      n->MutProbAlloc = TRUE;

      /* get the space for eop and append to list */
      eop = lsAppend( lsListp (n->edits) , sizeof(editop_u));

      /* if the list is empty, the list starts right here */
      if (n->edits==NULL)
          n->edits=eop;

      /* write the edit operation info */
      eop->insop.type=I_OP;
      eop->insop.position = InsertPos;
      eop->insop.insert_sequence=malloc(InsertLen+1);
      strncpy(eop->insop.insert_sequence,(n->sequence)+InsertPos,
              InsertLen);
      eop->insop.insert_sequence[InsertLen]='\0';
     
      /* return pointer to edit operation */
      return eop;
    }
  }

  /* return NULL if no insert */
  return NULL;
}

/* -------------------------------------------------------------- 
   handle possible inserts
   the changes are made to the input sequence 

   ER: delete_chars_er() adds the option of simulating indels
   in coding regions by not breaking the frame.
   -------------------------------------------------------------- */
editop_u* 
insert_chars_er(node *n, int sim_coding) {

  char *old_sequence;
  double do_insert, RoundFactor, *old_vector, inslen;
  int InsertPos, InsertLen,i, old_seqlen;
  editop_u *eop;

  /* save the old length, we are gonna need it */
  old_seqlen=n->SequenceLen;

  /* chance strikes here */
  do_insert = get_random_double(1.0);

  /* only do insertion if value is smaller than threshold */  
  if (do_insert < n->InsertThreshold){

    /* set the insertposition */
    InsertPos = get_random_int(n->SequenceLen);
    
    /* ER: if simulating indels in coding regions, add indels
       in frame.
    */
    if (sim_coding && InsertPos/3. != (int)(InsertPos/3.)) return NULL;

    /* only do this if the site is rite */
    if (n->SequenceLen==0 || n->MutationProbability[InsertPos] >=1.0) {

      /* we need to know the cumulated probability for insertion*/
      RoundFactor = n->InsertionFunction[TOTAL];
      
      /* get another random number for insertlength*/
      inslen = get_random_double(RoundFactor);      

      /* set the insertlength */
      for (InsertLen=1; 
          n->InsertionFunction[InsertLen]<inslen; 
          ++InsertLen);
      /* nothing */
      
      MXDEBUG(DEBUG_INSERT_INTERNAL MXDELIM
	      "InsertLen:%d, do_insert=%f" MXDELIM
	      InsertLen MXDELIM do_insert);

      /* save the new SequenceLen */
      n->SequenceLen=(n->SequenceLen)+InsertLen;   

      MXDEBUG(DEBUG_INSERTION MXDELIM
	      "Insertion (do_insert=%f) of length %d at position %d: " MXDELIM
	      do_insert MXDELIM InsertLen MXDELIM InsertPos);
    
      /* copy the sequence up to the InsertPos */
      /* fill the gap with randomly chosen characters */
      /* copy the sequence after the Insert */
      old_sequence=strdup(n->sequence);
      n->sequence=realloc(n->sequence,(n->SequenceLen)+1);
      
      if(n->sequence == NULL) 
          mxError(DO_EXIT,"couldn't allocate memory");

      strncpy(n->sequence,old_sequence,InsertPos);

      for(i=InsertPos; i< (InsertPos+InsertLen) ; ++i)  
          n->sequence[i]=get_random_char(ZUFALL, 0);  

      strcpy((n->sequence)+InsertPos+InsertLen,old_sequence+InsertPos); 
      
      free(old_sequence);
      
      /* copy and add to n->MutationProbability */
      old_vector=n->MutationProbability;
  
      n->MutationProbability=malloc(n->SequenceLen*sizeof(double));
      if (n->MutationProbability ==NULL ) 
	  mxError(DO_EXIT,"couldn't allocate memory");

      /* copy up to the insert */
      for(i=0; i<InsertPos; i++) {
	  n->MutationProbability[i]=old_vector[i];
      }
      /* fill the insertion */
      for(i=InsertPos; i<(InsertPos+InsertLen); i++){
	  n->MutationProbability[i]=1.0;
      }

      /* now copy the rest of the vector
	 here we need the old length */
      for( i=InsertPos; i<old_seqlen; i++){
	  n->MutationProbability[i+InsertLen]=old_vector[i];
      }

      /* free old mem if we allocated it for this node */
      if(n->MutProbAlloc == TRUE)
          free(old_vector);
      /* we definitely allocated the mem this time */
      n->MutProbAlloc = TRUE;

      /* get the space for eop and append to list */
      eop = lsAppend( lsListp (n->edits) , sizeof(editop_u));

      /* if the list is empty, the list starts right here */
      if (n->edits==NULL)
          n->edits=eop;

      /* write the edit operation info */
      eop->insop.type=I_OP;
      eop->insop.position = InsertPos;
      eop->insop.insert_sequence=malloc(InsertLen+1);
      strncpy(eop->insop.insert_sequence,(n->sequence)+InsertPos,
              InsertLen);
      eop->insop.insert_sequence[InsertLen]='\0';
     
      /* return pointer to edit operation */
      return eop;
    }
  }

  /* return NULL if no insert */
  return NULL;
}


/* -------------------------------------------------------------- 
   create a random character from the correct alphabet
   
   if old_value is ZUFALL we create a random new char, 
   (we do not look at the second argument in this case!
   otherwise we generate a replacement char
   
   -------------------------------------------------------------- */
char 
get_random_char(char old_value, double dist) {
  
  char value, real_old_value;
  int column, i, int_dist; 
  int k=0;
  double random_number;

  /* save the original value for debug purposes */
  value=real_old_value=old_value;

  /* generate a new character */
  if ( old_value == ZUFALL ){

    random_number=get_random_double(TheFrequencies->value.dv[TOTAL]);
    
    /* walk thru vector until the summed up probabilities 
       match the random variable */
    for(k=0; 
	TheFrequencies->value.dv[(int)TheAlphabet->value.s[k]]<=random_number;
	++k )
      /* nothing */ ;
    
    /* get the char value of matrix element */
    value = TheAlphabet->value.s[k];
 
    MXDEBUG(DEBUG_CHAR_CREATION MXDELIM
	    "Created: %c (r=%3.3f<%3.3f<=%3.3f(TOTAL), k=%d)" MXDELIM
	    value MXDELIM random_number MXDELIM TheFrequencies->value.dv[k]
	    MXDELIM TheFrequencies->value.dv[TOTAL] MXDELIM k);

    return value;
  }
  /* mutate a character */
  else {
    /* if a DNA Model was given, the calculation is different */
    if (TheDNAmodel->value.s != NULL) {
      return get_random_char2(old_value,dist);
    }

    /* repeat this process dist times */
    int_dist = (int) dist;
    if (dist - int_dist != 0.0) {
      if (float_warning == FALSE) {
	mxError(DONT_EXIT,"Non-integer distances for Non-DNA-Matrices not implemented yet\nUsing rounded value...\n");
	float_warning = TRUE;  /*otherwise the warning is printed too often */
      }
      int_dist = ROUND(dist);
    }

    if (int_dist == 0) return old_value;

    for(i=0; i<int_dist; i++){  /* we simply use PAM1 at this stage dist-times */
         
      /* get the correct column */
      column = (int) old_value;
      
      /* get a random number modulo the TOTAL for this column*/
      random_number= get_random_double(TheMatrix->value.dm[TOTAL][column]);
      
      /* walk thru matrix until the summed up probabilities 
	 match the random variable */
      for(k=0; 
	  TheMatrix->value.dm[(int)TheAlphabet->value.s[k]][column]<=random_number; 
	  ++k)   
	/* nothing */ ;
      MXDEBUG(DEBUG_CHAR_MUTATION_INTERNAL MXDELIM
	      "k=%d\n,TheAlphabet->value.s[k]=%c\ncolumn=%d\n"
	      "random_number=%f\n"
	      "TheMatrix->value.dm[TheAlphabet->value.s[k]][column]=%f\n"
	      MXDELIM k MXDELIM TheAlphabet->value.s[k] MXDELIM column
	      MXDELIM random_number MXDELIM
	      TheMatrix->value.dm[TheAlphabet->value.s[k]][column]);

      /* get the char value of matrix element */
      value = TheAlphabet->value.s[k];

      MXDEBUG(DEBUG_CHAR_MUTATION_INTERNAL MXDELIM
	      "mutate from %c to %c (r=%5f ,Matrix=%5f,k=%d)" MXDELIM
	      real_old_value MXDELIM value MXDELIM random_number MXDELIM
	      TheMatrix->value.dm[TheAlphabet->value.s[k]][column] MXDELIM k);
      /* save the value of the last iteration */
      old_value= value;      

    }  

    /* after the completion of the loop we can now say what
       mutation really took place */

    if ((real_old_value != value) && (dist >0))
      MXDEBUG(DEBUG_CHAR_MUTATION MXDELIM
	      "mutate from %c to %c" MXDELIM
	      real_old_value MXDELIM value);
  
  }
  /* return the character */
  return value;
}

char get_random_char2(char old_value, double dist){

  int k,column;
  char value;
  double random_number;

  /* if dist is zero, no mutation takes place */
  if (dist <= 0.0) return old_value;

  /* build a new matrix with the dist value for the time t */
  build_matrix(dist);
  
  /* get the correct column */
  column = (int) old_value;
      
  /* get a random number modulo the TOTAL for this column*/
  random_number= get_random_double(TheMatrix->value.dm[TOTAL][column]);
      
  /* walk thru matrix until the summed up probabilities 
     match the random variable */
  for(k=0; 
      TheMatrix->value.dm[(int)TheAlphabet->value.s[k]][column]<=random_number; 
      ++k)   
    /* nothing */ ;
  MXDEBUG(DEBUG_CHAR_MUTATION_INTERNAL MXDELIM
	  "k=%d\n,TheAlphabet->value.s[k]=%c\ncolumn=%d\n"
	  "random_number=%f\n"
	  "TheMatrix->value.dm[TheAlphabet->value.s[k]][column]=%f\n"
	  MXDELIM k MXDELIM TheAlphabet->value.s[k] MXDELIM column
	  MXDELIM random_number MXDELIM
	  TheMatrix->value.dm[TheAlphabet->value.s[k]][column]);
  
  /* get the char value of matrix element */
  value = TheAlphabet->value.s[k];

  MXDEBUG(DEBUG_CHAR_MUTATION_INTERNAL MXDELIM
	  "mutate from %c to %c (r=%5f ,Matrix=%5f,k=%d)" MXDELIM
	  real_old_value MXDELIM value MXDELIM random_number MXDELIM
	  TheMatrix->value.dm[TheAlphabet->value.s[k]][column] MXDELIM k);
  
  
  /* after the completion of the loop we can now say what
     mutation really took place */
  
  if ((old_value != value) && (dist >0))
    MXDEBUG(DEBUG_CHAR_MUTATION MXDELIM
	    "mutate from %c to %c" MXDELIM
	    real_old_value MXDELIM value);
  
  /* return the character */
  return value;
  
  
}


/* -------------------------------------------------------------- 
   returns a random number [0..h)
   --------------------------------------------------------------  */
int
get_random_int(int hi) {

  int a;
  
  a=lrand48();

  /* we really don't want a divison by zero here*/
  if ( hi != 0)
    return (a % hi);
  
  return 0;
}

/* -------------------------------------------------------------- 
   returns a random number [0..hi)
   --------------------------------------------------------------  */
double
get_random_double(double hi) {

  double a;

  a=drand48();

  return (a*hi);
}


/* -------------------------------------------------------------
   function that multplies matrix m1 with matrix m2 and stores
   the result in m1
   ------------------------------------------------------------- */
void 
mult_matrix(double **m1, double **m2){

  double tmp;
  int i,j,k;
 
  /* allocate memory for the result matrix */

  if (tempMatrix == NULL) {
    tempMatrix = (double **)malloc((TheAlphabet->dim[0]) * sizeof(double *));
    if(tempMatrix == NULL)
	mxError(DO_EXIT, "failed to calloc");
    for(i=0;i<(TheAlphabet->dim[0]);i++) {
      tempMatrix[i] = (double *)malloc((TheAlphabet->dim[0]) * sizeof(double));
      if(tempMatrix[i] == NULL)
	mxError(DO_EXIT, "failed to calloc");	
    }
  }

  tmp = 0.0;
  for (i = 0; i < (TheAlphabet->dim[0]); i++) {
    for (j = 0; j < (TheAlphabet->dim[0]); j++) {
      for (k = 0; k < (TheAlphabet->dim[0]); k++) {
	tmp += m1[i][k] * m2[k][j];
      }
      tempMatrix[i][j] = tmp;
      tmp = 0.0;
    }
  }

  return;

}


/* --------------------------------------------------
   Normalize the matrix 'matrix' 
   -------------------------------------------------- */
void normalize_matrix(double **matrix){

  int i,j;
  double sum;

  /* get the value of one column (all column values should be
     the same 
     */

  sum = 0.0;
  for(i=0; i < (TheAlphabet->dim[0]); i++) {
    sum += matrix[i][0];
  }

  for(i=0; i < (TheAlphabet->dim[0]); i++) {
    for(j=0; j < (TheAlphabet->dim[0]); j++) {
      matrix[i][j] = matrix[i][j] / sum;
    }
  }

  return;

}


/* --------------------------------------------------
   auxiliary function, copies m2 to m1
   -------------------------------------------------- */
void 
copy_matrix(double **m1, double **m2){

  int i,j;
  
  for (i = 0; i < TheAlphabet->dim[0]; i++) {
    for (j = 0; j < TheAlphabet->dim[0]; j++) {
      m1[i][j] = m2[i][j];
    }
  }

  return;
}

/* --------------------------------------------------
   auxiliary function, returns the maximum entry of 
   matrix
   -------------------------------------------------- */
double 
return_max(double **matrix){

  double max = 0.0;
  int i,j;


  for (i = 0; i < (TheAlphabet->dim[0]); i++) {
    for (j = 0; j < (TheAlphabet->dim[0]); j++) {
      if (max < matrix[i][j]) max = matrix[i][j];
    }
  }
  
  return max;
}
