/*
 *
 * Copyright (c) 2007
 * Adrian Michel
 * http://www.tradery.com
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without fee,
 * provided that the above copyright notice appear in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation.  Adrian Michel makes no
 * representations about the suitability of this software for any
 * purpose.  It is provided "as is" without express or implied warranty.
*/


package com.tradery.contentmodel;

import com.tradery.collections.IntegerPair;
import com.tradery.collections.NaryTree;
import com.tradery.collections.SetAsHashtable;
import com.tradery.collections.IntegerSet;
import com.tradery.collections.IntegerVector;


import java.util.Hashtable;
import java.util.Dictionary;
import java.util.Enumeration;
import java.util.Stack;
import java.lang.Integer;
import java.util.Vector;
import java.lang.Math;

import java.io.IOException;
import java.io.OutputStream;

import com.tradery.contract.Contract;

/***************************************************
 * the new content model validation class
 * creates the syntax tree and generates the corresponding DFA 
 * using one of two algorithms: 
 * - one which also determines the content model expression ambiguousness, but doesn't
 *       handle ranges (interprets [0, n] as "*" and [ m, n ] as "+"
 *   this algorithm is based on the functions nullable, firstpos, lastpos and followpos
 * - another one that handles ranges, but is less efficient than the first one
 * 
 * In the syntax tree all non-leaf nodes are operators (AND, OR, *, +, ? or range) 
 * and all the leaf nodes are operands (symbols in the alphabet)
 * 
 * All content models (operator ALL: & ) are supported with the limitation that only optional first level elements
 * are accepted. For example: a&b&c, a?&b&c? are accepted. a&(b,c) , a?&(b,c)& not accepted
 *
 * 
 * TODO: create iterators for the class tree to traverse the tree in post/preorder 
 ***************************************************/
public class SyntaxTree extends NaryTree
{
  // the name of the model
  // can be used to identify the model in the cache 
  private final String name;

  // maps positions (integers) to the respective nodes
  // used in the reverse lookup - positions to symbols
  // note that 0 contains null or the empty element if there is one
  private Alphabet alphabet = new Alphabet();

  // used by the Aho, Sethi, Ullman algorithm
  private Integer terminalPos;

  // the state machine. It can be one of StateMachine (without ranges)
  // or StateMachine1 (with ranges)
  private AbstractStateMachine sm;

  // when set, this flag indicates that the model has been compiled
  private boolean compiled = false;

  // when set, indicates that the functions followpos have been calculated
  private boolean functionsCalculated = false;

  // Internal flag indicating that the binary option for subtrees starting at AND nodes
  // Subtrees starting at OR nodes do not need to be made binary (I changed the 
  // algorithm for calculating the functions so it takes n-ary subtrees at OR nodes)
  private static final boolean binary = false;

  // this will indicate the state.
  // since the initial state is 0, in the StateMachine, we start here with 1
  private int crtPosition = 1;

  /**
   * associate an unique integer with each range object
   * this is the current integer, and we start at 0
   */
  private int crtRangePosition = 0;

  /**
   * set for a syntax tree that has ranges
   * used to chose the state machine and state machine generation algorithm
   */
  private boolean hasRanges = false;

  /**
   * Indicates that the content model contains an All operator (&). In this case
   * the content model cannot be nested.
   */
  private boolean isAllContentModel = false;

  // this maps pairs of ints indicating transitions, to range nodes, to which they correspond
  // contains ranges that start and end on nodes represented by the pair of ints
  // used in generating the state machine with ranges (StataMachine2)
  private StatesPairsToRange statesPairToRange = new StatesPairsToRange();

  private Vector rangeNodes = new Vector();

  // this maps pairs end/start range states to the range they correspond to
  // it is used when generating the state machine with ranges (StateMachine2)
  // if two different ranges have the same pair of states, then the content
  // model is ambiguous
  // TODO: see if this extension to the notion of ambiguousness makes sense.
  private class StatesPairsToRange extends Hashtable
  {
    boolean add( Integer from, Integer to, OperatorRangeNode node )
    {
      if( Contract.REQUIRE )
        Contract.require( node != null );

      IntegerPair ip = new IntegerPair( from, to );

      // if it contains this pair already as a key,
      // the content model is ambiguous
      return put( ip, node ) == null;
    }
    OperatorRangeNode get( int from, int to )
    {
      return(OperatorRangeNode)get( new IntegerPair( from, to ) );
    }
  }

  /********************************************************
   * the alphabet is a list of all symbols in the language 
   * recognized by the regular expression it is implemented as 
   * a vector where all the symbols are at the indexes 
   * indicated by their position (as defined by the algorithm)
   * 
   * Note: It is assumed that the empty symbol (if there is at least one)
   * is always at index 0 , and non-empty symbol start at 1.
   ********************************************************/
  private class Alphabet extends Vector
  {
    private int size = 0;
    private SetAsHashtable uniqueSymbols = new SetAsHashtable();

    // sets index 0 to null, to signal that initially, there was no empty element
    Alphabet()
    {
      addElement( null );
    }

    void addSymbol( AbstractSymbolNode symbolNode )
    {
      int index = symbolNode.getPosition();
      if( index >= size() )
        setSize( index + 10 );
      setElementAt( symbolNode, index );
      size = Math.max( size, index + 1 );
      if( symbolNode.getValue().length() > 0 )
        uniqueSymbols.put( symbolNode.getValue() );
    }

    SymbolNode getSymbol( int pos )
    {
      return(SymbolNode)elementAt( pos );
    }

    SetAsHashtable getUniqueSymbols()
    {
      return uniqueSymbols;
    }

    int getSize()
    {
      return size;
    }
    // indicates whether the empty symbol is in the alphabet
    boolean hasEmptySymbol()
    {
      return elementAt( 0 ) != null;
    }

    public void dump( OutputStream os )
    throws IOException
    {
      String str = new String();
      str += "************ Alphabet ****************\n";
      str += "size: " + ( new Integer( size ) ).toString() + "\n";
      for( int n = 0; n < size; n++ )
      {
        AbstractSymbolNode node = (AbstractSymbolNode) elementAt( n );
        String str1 = null;
        if( node != null )
          str1 = node.getValue();
        str += str1 + " " + ( new Integer( n ) ).toString() + "\n";
      }

      os.write( str.getBytes() );
    }
  }

  /**************************************************
   * Base       class for all model     tree nodes
   **************************************************/
  protected static abstract class ModelNode extends NaryTree.Node
  {
    private boolean nullable;
    // these sets are calculated by each node
    // Note: for some of the nodes, they are references to objects in other nodes,
    // when it is known that they are not distinct.
    // Care should be taken not to change these inadvertently, by silently changing
    // the referenced object.
    private IntegerSet firstpos = null;
    private IntegerSet lastpos = null;
    private IntegerSet internalpos = new IntegerSet();

    // used by clone only
    private ModelNode( ModelNode node )
    {
      super( node );
    }  
    protected ModelNode()
    {
    }
    protected ModelNode( String name )
    {
      super( name );
    }

    public String toString()
    {
      return super.toString();
    }

    final boolean nullable()
    {
      return nullable;
    }
    final IntegerSet firstpos()
    {
      return firstpos;
    }
    final IntegerSet lastpos()
    {
      return lastpos;
    }
    final IntegerSet internalpos()
    {
      return internalpos;
    }

    final protected void setFirstPos( IntegerSet fp )
    {
      firstpos = fp;
    }
    final protected void setLastPos( IntegerSet lp )
    {
      lastpos = lp;
    }
    final protected void setNullable( boolean _nullable )
    {
      nullable = _nullable;
    }

    //          abstract void dumpExpression();
    void postCalculate() throws AmbiguousContentModelException { calculateInternalPos();}

    // calculates firstpos, lastpos and nullable for a node
    // default version doesn't do anything - the operator nodes will override this method
    abstract void calculate();
    // calculates followpos 
    void calculateFollowpos()
    {
    }

    abstract void calculateInternalPos();
    boolean isPosInternal( int pos )
    {
      return internalpos.contains( pos );
    }        

    public void dump( OutputStream os ) 
    throws IOException
    {
      //                        super.dump();  
      os.write( (" Nullable: " + ( new Boolean( nullable() ) ).toString() + ", ").getBytes() );
      if( firstpos() != null )
      {
        os.write( ( "Firstpos: " ).getBytes() );
        firstpos().dump( os );
        os.write( ( ", " ).getBytes() );
      }
      if( lastpos() != null )
      {
        os.write( ( "Lastpos: " ).getBytes() );
        lastpos().dump( os );
        //                              ModelWriter.print( "," );
      }
    }

    // checks that a particular node can have any more children
    abstract boolean canHaveMoreChildren();
    // checks that a particular node is valid
    abstract boolean check();
  }

  /**************************************************
   * Operator   node abstract class
   *  generic operator node class
   *  base class for the concrete operators classes
   ***************************************************/
  protected static abstract class OperatorNode extends ModelNode
  {

    OperatorNode( OperatorNode node )
    {
      super( node );
    }
    OperatorNode( String name )
    {
      super( name );
    }

    public String toString()
    {
      return super.toString();
    }

    boolean isAmbiguous()
    {
      ModelNode node1 = (ModelNode)getFirstChild();
      ModelNode node2 = (ModelNode) getLastChild();

      return isAmbiguous( node1, node2 );
    }

    protected abstract boolean isAmbiguous( ModelNode node1, ModelNode node2 );

    // this actually only does something for AND
    // OR knows how to deal with n-ary, and other operators have one and only one child
    OperatorNode makeBinary( OperatorNode node )
    {
      return node;
    }

    void calculateInternalPos()
    {
      for( ModelNode node = (ModelNode) getFirstChild(); node != null; node = (ModelNode)node.getNextSibling() )
        internalpos().reunion( node.internalpos() );
    }
  }

  static private abstract class UnaryOperatorNode extends OperatorNode
  {
    UnaryOperatorNode( String name )
    {
      super( name );
    }
    UnaryOperatorNode( UnaryOperatorNode node )
    {
      super( node );
    }
    // an unary operator cannot have more than 1 child
    boolean canHaveMoreChildren()
    {
      return getChildrenCount() == 0;
    }
    // an unary operator is valid if it has exactly one child
    boolean check()
    {
      return getChildrenCount() == 1;
    }
    protected void calculateFirstPos( ModelNode node )
    {
      setFirstPos( node.firstpos() );
    }
    protected void calculateLastPos( ModelNode node )
    {
      setLastPos( node.lastpos() );
    }
    final void calculate()
    {
      ModelNode node = (ModelNode)getFirstChild();

      calculateNullable( node );
      calculateFirstPos( node );
      calculateLastPos( node );
    }
    protected abstract void calculateNullable(ModelNode node );
  }

  static private abstract class NaryOperatorNode extends OperatorNode
  {
    final void calculate()
    {
      ModelNode node1 = (ModelNode)getFirstChild();
      ModelNode node2 = (ModelNode) getLastChild();

      calculateNullable( node1, node2 );
      calculateFirstPos( node1, node2 );
      calculateLastPos( node1, node2 );
    }
    protected abstract void calculateFirstPos( ModelNode node1, ModelNode node2 );
    protected abstract void calculateLastPos(ModelNode node1, ModelNode node2 );
    protected abstract void calculateNullable(ModelNode node1, ModelNode node2 );

    NaryOperatorNode( String name )
    {
      super( name );
    }
    NaryOperatorNode( NaryOperatorNode node )
    {
      super( node );
    }
    // an nary operator can have more children
    boolean canHaveMoreChildren()
    {
      return true;
    }
    // an nary operator is valid if it has at least 2 children
    boolean check()
    {
      return getChildrenCount() >= 2;
    }
  }

  /***********************************************
   * Abstract base class for symbols and empty symbols
   **********************************************/
  private static abstract class AbstractSymbolNode extends ModelNode
  {
    private int position = 0;
    protected String value = null;

    private AbstractSymbolNode( AbstractSymbolNode node )
    {
      super( node ); value = node.getValue(); position = node.getPosition();
    }
    protected AbstractSymbolNode( String _value, int _position )
    {
      super( _value ); value = _value; position = _position;
    }
    String getValue()
    {
      return value;
    }
    public String toString()
    {
      return "\"" + value + "\"";
    }
    boolean hasValue( String _value )
    {
      return value.equals( _value );
    }
    int getPosition()
    {
      return position;
    }
    // a symbol node cannot have any more children
    boolean canHaveMoreChildren()
    {
      return false;
    }
    // a symbol node is valid if it doesn't have any children (it is a leaf in the syntax tree)
    boolean check()
    {
      return getChildrenCount() == 0;
    }

    boolean isOptional()
    {
      ModelNode node = (ModelNode)super.getParent();

      if( node != null )
      {
        /** @todo use polymorphism here and not instanceof. For the moment use this to validate the ideas */
        if( node instanceof OperatorQMarkNode )
          return true;
        else
          return false;
      }
      else
        return false;
    }

  }


  /**************************************************
   * symbol node class
   ***************************************************/
  private static class SymbolNode extends AbstractSymbolNode
  {
    private IntegerSet followpos = new IntegerSet();
    private IntegerSet startRanges = new IntegerSet();
    private IntegerSet endRanges = new IntegerSet();                            

    IntegerSet startRanges()
    {
      return startRanges;
    }
    IntegerSet endRanges()
    {
      return endRanges;
    }

    private SymbolNode( SymbolNode node )
    {
      super( node );
    }
    SymbolNode( String _value, int _pos )
    {
      super( _value, _pos );
    }
    public String toString()
    {
      return super.toString();
    }
    public Object clone()
    {
      return new SymbolNode( this );
    }
    protected IntegerSet followpos()
    {
      return followpos;
    }
    boolean hasValue( String _value )
    {
      return value.equals( _value );
    }

    void calculate()
    {
      setFirstPos( new IntegerSet( getPosition() ) );
      setLastPos( new IntegerSet( getPosition() ) );
      setNullable( false );
    }

    public void dump( OutputStream os ) 
    throws IOException
    {
      os.write( ( "\"" + value + "\" " + ( new Integer( getPosition() ) ).toString() + " " ).getBytes() );
      os.write( ( " Followpos: " ).getBytes() );
      followpos.dump( os );
      super.dump( os ); 
    }

    void calculateInternalPos()
    {
      internalpos().add( getPosition() );
    }
    void addEndRange( OperatorRangeNode node )
    {
      endRanges.add( node.getPos() );
    }
    void addStartRange( OperatorRangeNode node )
    {
      startRanges.add( node.getPos() );
    }


    //          void dumpExpression() { ModelWriter.print( value ); }
    boolean isAmbiguous()
    {
      return false;
    }
  }

  /***********************************************
   * Empty symbol node class
   * Objects of this class are created to model the empty input
   * for example (a|<empty>) is the same as a?
   **********************************************/
  private class EmptySymbolNode extends AbstractSymbolNode 
  {
    private final static String empty = "";
    private EmptySymbolNode()
    {
      super( empty, 0 );
    }
    EmptySymbolNode( EmptySymbolNode node )
    {
      super( node );
    }
    public Object clone()
    {
      return new EmptySymbolNode( this );
    }
    public void dump( OutputStream os )
      throws IOException
    {
      os.write( ( "<E> " + (new Integer( getPosition() ) ).toString() ).getBytes() );
      super.dump( os );
    }
    void dumpExpression()
    {
      ModelWriter.print( toString() );
    }
    boolean isAmbiguous()
    {
      return false;
    }
    public String toString()
    {
      return "<E>";
    }

    void calculate() 
    {
      setNullable( true );
      setFirstPos( new IntegerSet() );
      setLastPos( new IntegerSet() );
    }
    void calculateInternalPos()
    {
    }
  }

  /***********************************************
   * Operator Star node
   **********************************************/
  static private class OperatorStarNode extends UnaryOperatorNode
  {
    Alphabet alphabet;

    OperatorStarNode( String name, Alphabet _alphabet )
    {
      super( name ); alphabet = _alphabet;
    }
    OperatorStarNode( OperatorStarNode node )
    {
      super( node );
    }
    public void dump( OutputStream os )
      throws IOException
    {
      os.write( ( "*" ).getBytes() );
      super.dump( os );
    }
    public String toString()
    {
      ModelNode node = (ModelNode)getFirstChild();
      String string = new String( node.toString() );
      string += "*";
      return string;
    }     
    void dumpExpression()
    {
      ModelWriter.print( toString() );
    }
    //          public String toString() { return "*"; }
    public Object clone()
    {
      return new OperatorStarNode( this );
    }

    // this operator only has a child, so node1 == node2
    protected void calculateNullable( ModelNode node )
    {
      setNullable( true );
    }
    void calculateFollowpos()
    {
      for( Enumeration e = lastpos().elements(); e.hasMoreElements(); )
      {
        SymbolNode node = ( SymbolNode )alphabet.elementAt( ( ( Integer)e.nextElement() ).intValue() );

        node.followpos().reunion( firstpos() ); 
      }                              
    }
    protected boolean isAmbiguous( ModelNode node1, ModelNode node2 )
    {
      return false;
    }
  }

  /***********************************************
   * Operator Plus node
   **********************************************/
  static private class OperatorPlusNode extends UnaryOperatorNode
  {
    Alphabet alphabet;
    OperatorPlusNode( String name, Alphabet _alphabet )
    {
      super( name ); alphabet = _alphabet;
    }
    OperatorPlusNode( OperatorPlusNode node )
    {
      super( node );
    }
    public void dump( OutputStream os )
      throws IOException
    {
      os.write( ( "+" ).getBytes() ); 
      super.dump( os );
    }
    public String toString()
    {
      String string = new String();
      ModelNode node = (ModelNode)getFirstChild();
      string = node.toString();
      string += "+";
      return string;
    }

    void dumpExpression()
    {
      ModelWriter.print( toString() );
    }
    //          public String toString() { return "+"; }
    public Object clone()
    {
      return new OperatorPlusNode( this );
    }

    // this operator only has a child, so node1 == node2
    protected void calculateNullable( ModelNode node )
    {
      setNullable( node.nullable() );
    }
    void calculateFollowpos()
    {
      for( Enumeration e = lastpos().elements(); e.hasMoreElements(); )
      {
        SymbolNode node = ( SymbolNode )alphabet.elementAt( ( ( Integer)e.nextElement() ).intValue() );

        node.followpos().reunion( firstpos() ); 
      }                              
    }
    protected boolean isAmbiguous( ModelNode node1, ModelNode node2 )
    {
      return false;
    }
  }


  /**************************************************
   * Operator   Range node class
   * INFINITY is used to model an unlimited max value
   ***************************************************/
  static private class OperatorRangeNode extends UnaryOperatorNode
  {
    private final Limits limits;
    private final int pos;
    Alphabet alphabet;
    StatesPairsToRange statesPairToRange;

    Limits getLimits()
    {
      return limits;
    }
    int getPos()
    {
      return pos;
    }
    OperatorRangeNode( OperatorRangeNode node )
    {
      super( node ); pos = node.getPos(); limits = node.getLimits();
    }
    OperatorRangeNode( String name, int _pos, Limits _limits, Alphabet _alphabet, StatesPairsToRange _statesPairToRange )
    throws BadLimitsException
    {