// TreeHelper.java
// Copyright (C) 1999-2000 Washington University School of Medicine
// and Howard Hughes Medical Institute
// All rights reserved


// Contains static methods mainly for reading and writing Trees.


package forester.tree;

import java.io.*;
import java.net.*;
import java.util.*;
 
/**

@author Christian M. Zmasek

@version 1.05 -- last modified: 07/17/00


*/
public class TreeHelper {


    /**

    Reads a Tree in NH or NHX format from a textfile f.
    If boolean check is true it will validate the correctness
    of the NH/NHX format (slow). 

    */
    public static Tree readNHtree( File f, boolean check ) throws Exception {

        String error = "", incoming = "", nh_string = "";

        if ( !f.exists() ) {
            throw new IOException( f.getAbsolutePath() + " does not exist." );
        }

        else if ( !f.isFile() ) {
            throw new IOException( f.getAbsolutePath() + " is not a file."  );
        }

        try {
            BufferedReader in = new BufferedReader( new FileReader( f ) );
            while ( ( incoming = in.readLine() ) != null ) {
                nh_string = nh_string + incoming;
            }
            in.close();
        }
        catch ( IOException e ) {
            throw new Exception( "readNHtree( File, boolean ): " + e );
        }

        if ( check ) {
            error = checkNHformat( nh_string );
            if ( error.length() >= 1 ) {
                throw new Exception( "readNHtree( File, boolean ): " + error );
            }
        }
        return new Tree( nh_string );

    }



    /**

    Reads a Tree in NH or NHX format from a URL url.
    If boolean check is true it will validate the correctness
    of the NH/NHX format (slow). 

    */
    public static Tree readNHtree( URL url, boolean check ) throws Exception {

        String error = "",
               incoming = "",
               nh_string = "";

        try {
            BufferedReader in = new BufferedReader(
            new InputStreamReader( url.openStream() ) );
            while ( ( incoming = in.readLine() ) != null ) {
                nh_string = nh_string + incoming;
            }
            in.close();
        }
        catch ( Exception e ) {
            throw new Exception( "Tree: readNHtree( URL, boolean ): " + e );
        }


        if ( check ) {
            error = checkNHformat( nh_string );
            if ( error.length() >= 1 ) {
                throw new Exception( "readNHtree( URL, boolean ): " + error );
            }
        }
        return new Tree( nh_string );

    }



    /**

    Writes a Tree t to a textfile f.
    Set boolean nhx to true to write Tree in New Hampshire X (NHX) format.
    Set boolean nhx to false to write Tree in New Hampshire (NH) format.
    Both overwrite1 and overwrite2 need to be true to allow for overwriting.

    */
    static public void writeNHtree(
    Tree t, File f, boolean nhx, boolean overwrite1, boolean overwrite2 )
    throws Exception {

        if ( t.isEmpty() ) {
            String message = "writeNHtree( Tree, File, boolean, boolean,";
            message += " boolean ): Tree must not be empty.";
            throw new Exception( message );
        }

        if ( f.exists() && !( overwrite1 && overwrite2 ) ) {
            throw new Exception ( f.getAbsolutePath() + " does already exist and is not allowed to be overwritten." );
        }
        if ( f.exists() && !f.isFile() ) {
            throw new Exception( f.getAbsolutePath() + " is not a file. Cannot be overwritten." );
        }

        String s;

        if ( nhx ) {
            s = t.toNewHampshireX();
        }
        else {
            s = t.toNewHampshire( false );
        }

        try {
            PrintWriter out = new PrintWriter( new FileWriter( f ), true );
            out.println( s );
            out.close();
        }

        catch ( Exception e ) {
            throw new Exception ( "writeNHtree( Tree, File, boolean, boolean, boolean ): " + e );
        }
    }

    
    
    /**
    
    Reads in multiple Trees from a File multipletreefile,
    containing Tree descriptions in New Hampshire (NH) or
    New Hampshire X (NHX) format separated by semicolons followed
    by a newline. Returns a Tree array.
    
    @param multipletreefile Textfile containg Tree descriptions 
                            in NH or NHX format separated by
                            semicolons followd by a newline
                            
    @return array of Trees                        
    
    */
    public static Tree[] readMultipleNHTrees( File multipletreefile )
    throws Exception {
        
        int number_of_trees = 0,
	        j               = 0;
	    String incoming  = "",
	           nh_string = "";
	    Tree[] t         = null;
	   
        if ( !multipletreefile.exists() ) {
		    throw new IOException( multipletreefile.getAbsolutePath()
		    + " does not exist." );
	    }

	    else if ( !multipletreefile.isFile() ) {
		    throw new IOException( multipletreefile.getAbsolutePath()
		    + " is not a file."  );
	    }

        // Counts trees by counting semicolons.
        BufferedReader in1 = new BufferedReader( new FileReader( multipletreefile ) );
        while ( ( incoming = in1.readLine() ) != null ) {
		    if ( incoming.indexOf( ";" ) != -1 ) {
		        number_of_trees++;
		    } 
	    }
        in1.close();
        

        t = new Tree[ number_of_trees ];
        
        incoming = "";

        BufferedReader in2 = new BufferedReader( new FileReader( multipletreefile ) );
        while ( ( incoming = in2.readLine() ) != null ) {
	        nh_string = nh_string + incoming;
		    if ( incoming.indexOf( ";" ) != -1 ) {
		        t[ j++ ] = new Tree( nh_string );
		        nh_string = "";
		    } 
	    }
        in2.close();

	    return t;

    }
    
    

    /**

    For each node of Tree tree: Extracts the species name
    (as SWISS-PROT abbreviation) from the sequence name
    (if SWISS-PROT names are used) and writes it to
    the species name field if this is empty.
    It extracts no more than five letters after
    a "_" (which must be present) and before potential
    "/", "_", "-", "\", ";", ".".

    */
    public static void extractSpeciesNameFromSPseqName( Tree tree ) {
    
        PreorderTreeIterator it = null;
        int i = 0;

        try {
            it = new PreorderTreeIterator( tree );
        }
        catch ( Exception e )  {
            e.printStackTrace();
            System.err.println( "Could not create iterator. Terminating." );
            System.exit( -1 );
        }

        String seqname = "";

        while ( !it.isDone() ) {

            if ( it.currentNode().getSpecies().trim().length() < 1 ) { 
                seqname = it.currentNode().getSeqName();
                i = seqname.indexOf( "_" );
                if ( i >= 0 ) {
                    seqname.trim();
                    seqname = seqname.substring( i + 1 );
                    i = seqname.indexOf( "/" );
                    if ( i >= 0 ) {
                        seqname = seqname.substring( 0, i );
                    }
                    i = seqname.indexOf( "_" );
                    if ( i >= 0 ) {
                        seqname = seqname.substring( 0, i );
                    }
                    i = seqname.indexOf( "-" );
                    if ( i >= 0 ) {
                        seqname = seqname.substring( 0, i );
                    }
                    i = seqname.indexOf( "\\" );
                    if ( i >= 0 ) {
                        seqname = seqname.substring( 0, i );
                    }
                    i = seqname.indexOf( ";" );
                    if ( i >= 0 ) {
                        seqname = seqname.substring( 0, i );
                    }
                    i = seqname.indexOf( "." );
                    if ( i >= 0 ) {
                        seqname = seqname.substring( 0, i );
                    }
                    if ( seqname.length() > 5 ) {
                        seqname = seqname.substring( 0, 5 );
                    }
                    seqname = seqname.toUpperCase();
                    it.currentNode().setSpecies( seqname );
                }
            }
            it.next();
        }

    }



    /**

    For each external node of Tree tree: Cleans up SWISS-PROT 
    species names: It removes everything (including ) after a potential 
    "/", "_", "-", "\", ";", ".". It removes everything which 
    comes after the fifth letter and it changes everything 
    to upper case.

    */
    public static void cleanSpeciesNamesInExtNodes( Tree tree ) {
    
        Node node = tree.getExtNode0();
       
        String species = "";
        int i = 0;

        while ( node !=null ) {

            species = node.getSpecies().trim();
            if ( species.length() > 0 ) { 
              
                i = species.indexOf( "/" );
                if ( i >= 0 ) {
                    species = species.substring( 0, i );
                }
                i = species.indexOf( "_" );
                if ( i >= 0 ) {
                    species = species.substring( 0, i );
                }
                i = species.indexOf( "-" );
                if ( i >= 0 ) {
                    species = species.substring( 0, i );
                }
                i = species.indexOf( "\\" );
                if ( i >= 0 ) {
                    species = species.substring( 0, i );
                }
                i = species.indexOf( ";" );
                if ( i >= 0 ) {
                    species = species.substring( 0, i );
                }
                i = species.indexOf( "." );
                if ( i >= 0 ) {
                    species = species.substring( 0, i );
                }
                if ( species.length() > 5 ) {
                    species = species.substring( 0, 5 );
                }
               
                species = species.toUpperCase();
                node.setSpecies( species );
                
            }
            node = node.getNextExtNode();
        }

    }

    
    /**

    Checks a String s potentially representing a Tree in
    NH or NHX format. Is quite slow.

    @return empty String if no error detected, error message for faulty string

    */
    public static String checkNHformat( String s ) {

        String error = "";
        int openparantheses = 0;
        StringBuffer sb = new StringBuffer( s );

        sb = removeWhiteSpace( sb );

        sb = removeCstyleComments( sb );

        // Remove anything before first "(", unless tree is just one node:
        while ( !isEmpty( sb ) && sb.charAt( 0 ) != '('
        && sb.toString().indexOf( "(" ) != -1 ) {
            sb = new StringBuffer( sb.toString().substring( 1 ) );
        }

        // If ';' at end, remove it:
        if ( !isEmpty( sb ) && sb.charAt( sb.length() - 1 ) == ';' ) {
            sb.setLength( sb.length() - 1 );
        }

        s = sb.toString();

        openparantheses = countAndCheckParantheses( s );
        if ( openparantheses <= -1 ) {
            return "Openparantheses != closeparantheses.";
        }
        if ( !checkCommas( s ) ) {
            return "Commas not properly set.";
        }
        error = checkForUnnessaryParentheses( s, openparantheses );
        if ( error.length() >= 1 ) {
            return error;
        }
        return "";
    }
    
    
    
    /**

    Checks whether StringBuffer sb is empty.

    @return true if empty, false for not empty StringBuffer

    */
    public static boolean isEmpty( StringBuffer sb )  {
        return sb.length() < 1;
    }



    /**

    Removes all white space from StringBuffer sb.

    @return StringBuffer with white space removed

    */
    public static StringBuffer removeWhiteSpace( StringBuffer sb ) {
        int i;
        for ( i = 0; i <= sb.length() - 1; i++ ) {
            if ( sb.charAt( i ) == ' ' || sb.charAt( i ) == '\t'
            || sb.charAt( i ) == '\n' || sb.charAt( i ) == '\r' ) {
                sb = new StringBuffer( sb.toString().substring( 0, i ) +
                sb.toString().substring( i + 1 ) );
                i--;
            }
        }
        return sb;
    }



    /**

    Removes C-style comments from StringBuffer sb.

    @return StringBuffer with C-style comments removed

    */
    public static StringBuffer removeCstyleComments( StringBuffer sb ) {
        int i, j;
        for ( i = 0; i <= sb.length() - 2; i++ ) {
            if ( sb.charAt( i ) == '/' && sb.charAt( i + 1 ) == '*' ) {
                j = i;
                while ( i <= sb.length() - 3
                && ( sb.charAt( i ) != '*' || sb.charAt( i + 1 ) != '/' ) ) {
                    i++;
                }
                sb = new StringBuffer( sb.toString().substring( 0, j ) +
                sb.toString().substring( i + 2 ) );
                i = j - 1;
            }
        }
        return sb;
    }



    /**

    Checks whether number of "(" equals number of ")" in String
    nh_string potentially representing a Tree in NH or NHX format.

    @return total number of  open parantheses if no error detected,
    -1 for faulty string

    */
    public static int countAndCheckParantheses( String nh_string ) {
        int openparantheses = 0, closeparantheses = 0, i;
        for ( i = 0; i <= nh_string.length() - 1; i++ ) {
            if ( nh_string.charAt( i ) == '(' ) openparantheses++;
            if ( nh_string.charAt( i ) == ')' ) closeparantheses++;
        }
        if ( closeparantheses != openparantheses ) {
            return -1;
        }
        else {
            return openparantheses;
        }
    }   
    
    
    
    /**

    Checks the commas of a String nh_string potentially representing a Tree in
    NH or NHX format. Checks for "()", "(" not preceded by a "("
    or ",", ",,", "(,", and ",)".

    @return true if no error detected, false for faulty string

    */
    public static boolean checkCommas( String nh_string ) {
        int i;
        for ( i = 0; i <= nh_string.length() - 2; i++ ) {
            if ( (   nh_string.charAt( i )     == '('
            && nh_string.charAt( i + 1 ) == ')' ) ||
            (   nh_string.charAt( i )     != ','
            && nh_string.charAt( i )     != '('
            && nh_string.charAt( i + 1 ) == '(' ) ||
            (   nh_string.charAt( i )     == ','
            && nh_string.charAt( i + 1 ) == ',' ) ||
            (   nh_string.charAt( i )     == '('
            && nh_string.charAt( i + 1 ) == ',' ) ||
            (   nh_string.charAt( i )     == ','
            && nh_string.charAt( i + 1 ) == ')' )
            ) {
                return false;
            }
        }
        return true;
    }
    
    
    
    /**

    Checks for unnessary parentheses in a String nh_string potentially
    representing a Tree in NH or NHX format. Such as: (((X,Y)),Z)

    @param nh_string NH or NHX
    @param openparantheses total number of open parantheses (int)

    @return empty String if no error detected, error message for faulty string

    */
    public static String checkForUnnessaryParentheses( String nh_string, int openparantheses ) {

        int bracketlevelA, bracketlevelB, positionA, positionB, diffA_B;
        boolean need_to_check;
        int i, j, k;

        for ( j = 1; j <= openparantheses; j++ ) {
            for ( i = 0; i <= nh_string.length() - 1; i++ ) {
                if ( nh_string.charAt( i ) == '(' ) {
                    // Check for unnessary parentheses: (X):
                    positionA = i + 1;
                    while ( nh_string.charAt( positionA ) != ','
                    && nh_string.charAt( positionA ) != '(' ) {
                        if ( nh_string.charAt( positionA ) == ')' ) {
                            return "Error in NH/X: Unnecessary parentheses at positions " + i + " and " + positionA + ".";
                        }
                        positionA++;
                    }
                    // Check for unnessary parentheses: ((X,Y)) or ((X,Y)Z):
                    if ( nh_string.charAt( i + 1 ) == '(' ) {
                        bracketlevelA = 1;
                        bracketlevelB = 2;
                        positionA = i + 2;
                        positionB = i + 2;
                        diffA_B = 1;
                        need_to_check = true;
                        while ( bracketlevelA != 0 ) {
                            if ( nh_string.charAt( positionA ) == '(' ) bracketlevelA++;
                            if ( nh_string.charAt( positionA ) == ')' ) bracketlevelA--;
                            positionA++;
                        }
                        // Get distance to next ), in case of e.g. ((X,Y)Z):
                        // (  (  X  ,  Y  )  Z  )
                        // i i+1          A     B  diffA_B=2
                        k = positionA;
                        while ( k <= nh_string.length() - 1
                        && nh_string.charAt( k ) != ')' ) {
                            if ( nh_string.charAt( k ) == ','
                            || nh_string.charAt( k ) == '(' ) {
                                diffA_B = 1;
                                need_to_check = false;
                                break;
                            }
                            diffA_B++;
                            k++;
                        }
                        if ( need_to_check ) {
                            while ( bracketlevelB != 0 ) {
                                if ( nh_string.charAt( positionB ) == '(' ) bracketlevelB++;
                                if ( nh_string.charAt( positionB ) == ')' ) {
                                    bracketlevelB--;
                                }
                                positionB++;
                            }
                            if ( positionA == ( positionB - diffA_B)  ) {
                                return "Error in NH/X: Unnecessary parentheses at " + i + " and " + ( positionA + diffA_B - 1 ) + ".";
                            }
                        }
                    } // End of check for unnessary parentheses.
                }
            } //end of i for loop
        } // end of j for loop
        
        return "";
        
    }



    /**

    Sets the species names of the external Nodes of Tree t to a random
    positive integer number between (and including) min and max. 

    @param t whose external species names are to be randomized
    @param min minimal value for random numbers
    @param max maximum value for random numbers

    */
    public static void randomizeSpecies( int min, int max, Tree t ) {
        if ( t == null || t.isEmpty() ) {
            return;
        }
        int mi = Math.abs( min );
        int ma = Math.abs( max );
        Random r = new Random();
        Node n = t.getExtNode0();
        while ( n != null ) {
            n.setSpecies( ( ( Math.abs( r.nextInt() ) % ( ma - mi + 1 ) ) + mi ) + "" );
            n = n.getNextExtNode();
        }
    }



    /**
    
    Sets the species namea of the external Nodes of Tree t to 
    ascending integers, starting with 1.

    */
    public static void numberSpeciesInOrder( Tree t ) {
        if ( t == null || t.isEmpty() ) {
            return;
        }
        Node n = t.getExtNode0();
        int j = 1;
        while ( n != null ) {
            n.setSpecies( j  + "" );
            j++;  
            n = n.getNextExtNode();
        }
        
    }


    /**
    
    Sets the species namea of the external Nodes of Tree t to 
    descending integers, ending with 1.

    */
    public static void numberSpeciesInDescOrder( Tree t ) {
        if ( t == null || t.isEmpty() ) {
            return;
        }
        Node n = t.getExtNode0();
        int j = t.getRoot().getSumExtNodes();
        while ( n != null ) {
            n.setSpecies( j  + "" );
            j--;  
            n = n.getNextExtNode();
        }
        
    }



    /**

    Sets the species name of the external Nodes of Tree t to
    1, 1+i, 2, 2+i, 3, 3+i, .... 
    Examples: i=2: 1, 3, 2, 4
              i=4: 1, 5, 2, 6, 3, 7, 4, 8
              i=8: 1, 9, 2, 10, 3, 11, 4, 12, ...

    */
    public static void intervalNumberSpecies( Tree t, int i ) {
        if ( t == null || t.isEmpty() ) {
            return;
        }
          
        Node n = t.getExtNode0();
        int j = 1;
        boolean odd = true;
        while ( n != null ) {
            if ( odd ) {
                n.setSpecies( j  + "" );
            }
            else {
                n.setSpecies( ( j + i )  + "" );
                j++;
            }
            odd = !odd;
            n = n.getNextExtNode();
        }
        
    }


    

    /**
    
    Creates a completely balanced Tree with 2^i external nodes.

    @return a newly created balanced Tree

    */
    public static Tree createBalancedTree( int i ) {
        
        Tree t1 = null,
             t2 = null;

        try {

            t1 = new Tree( "(:S=,:S=)" );       
            t1.setRooted( true );

            for ( int j = 1; j < i; ++j ) {
                t2 = t1.copyTree();
                t1 = t1.fuseTrees( t1.getRoot().getID(), t2 );
            }
        }

        catch ( Exception e ) {
            System.err.println( "Unexpected exception during \"createBalancedTree\":" );
            System.err.println( e.toString() );
            System.exit( -1 );
        }

        return t1;
    }



    /**
    
    Creates a completely unbalanced Tree with i external nodes.

    @return a newly created unbalanced Tree

    */
    public static Tree createUnbalancedTree( int i ) {
        
        Tree t1 = null;

        try {

            t1 = new Tree( ":S=" );       
            t1.setRooted( true );

            for ( int j = 1; j < i; ++j ) {
                t1.addNodeAndConnect( "", "" );
            }
            t1.setRoot( t1.getExtNode0().getRoot() );
            t1.calculateMostBranchesPerExtNode();
            t1.calculateLongestDistance();
            t1.calculateNumberOfBranches();
        }

        catch ( Exception e ) {
            System.err.println( "Unexpected exception during \"createUnbalancedTree\":" );
            System.err.println( e.toString() );
            System.exit( -1 );
        }

        return t1;
    }


}
