/*
*
* Copyright (c) 2007
* Adrian Michel
* http://www.tradery.com
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Adrian Michel makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*/
package com.tradery.contentmodel;
import com.tradery.collections.IntegerPair;
import com.tradery.collections.NaryTree;
import com.tradery.collections.SetAsHashtable;
import com.tradery.collections.IntegerSet;
import com.tradery.collections.IntegerVector;
import java.util.Hashtable;
import java.util.Dictionary;
import java.util.Enumeration;
import java.util.Stack;
import java.lang.Integer;
import java.util.Vector;
import java.lang.Math;
import java.io.IOException;
import java.io.OutputStream;
import com.tradery.contract.Contract;
/***************************************************
* the new content model validation class
* creates the syntax tree and generates the corresponding DFA
* using one of two algorithms:
* - one which also determines the content model expression ambiguousness, but doesn't
* handle ranges (interprets [0, n] as "*" and [ m, n ] as "+"
* this algorithm is based on the functions nullable, firstpos, lastpos and followpos
* - another one that handles ranges, but is less efficient than the first one
*
* In the syntax tree all non-leaf nodes are operators (AND, OR, *, +, ? or range)
* and all the leaf nodes are operands (symbols in the alphabet)
*
* All content models (operator ALL: & ) are supported with the limitation that only optional first level elements
* are accepted. For example: a&b&c, a?&b&c? are accepted. a&(b,c) , a?&(b,c)& not accepted
*
*
* TODO: create iterators for the class tree to traverse the tree in post/preorder
***************************************************/
public class SyntaxTree extends NaryTree
{
// the name of the model
// can be used to identify the model in the cache
private final String name;
// maps positions (integers) to the respective nodes
// used in the reverse lookup - positions to symbols
// note that 0 contains null or the empty element if there is one
private Alphabet alphabet = new Alphabet();
// used by the Aho, Sethi, Ullman algorithm
private Integer terminalPos;
// the state machine. It can be one of StateMachine (without ranges)
// or StateMachine1 (with ranges)
private AbstractStateMachine sm;
// when set, this flag indicates that the model has been compiled
private boolean compiled = false;
// when set, indicates that the functions followpos have been calculated
private boolean functionsCalculated = false;
// Internal flag indicating that the binary option for subtrees starting at AND nodes
// Subtrees starting at OR nodes do not need to be made binary (I changed the
// algorithm for calculating the functions so it takes n-ary subtrees at OR nodes)
private static final boolean binary = false;
// this will indicate the state.
// since the initial state is 0, in the StateMachine, we start here with 1
private int crtPosition = 1;
/**
* associate an unique integer with each range object
* this is the current integer, and we start at 0
*/
private int crtRangePosition = 0;
/**
* set for a syntax tree that has ranges
* used to chose the state machine and state machine generation algorithm
*/
private boolean hasRanges = false;
/**
* Indicates that the content model contains an All operator (&). In this case
* the content model cannot be nested.
*/
private boolean isAllContentModel = false;
// this maps pairs of ints indicating transitions, to range nodes, to which they correspond
// contains ranges that start and end on nodes represented by the pair of ints
// used in generating the state machine with ranges (StataMachine2)
private StatesPairsToRange statesPairToRange = new StatesPairsToRange();
private Vector rangeNodes = new Vector();
// this maps pairs end/start range states to the range they correspond to
// it is used when generating the state machine with ranges (StateMachine2)
// if two different ranges have the same pair of states, then the content
// model is ambiguous
// TODO: see if this extension to the notion of ambiguousness makes sense.
private class StatesPairsToRange extends Hashtable
{
boolean add( Integer from, Integer to, OperatorRangeNode node )
{
if( Contract.REQUIRE )
Contract.require( node != null );
IntegerPair ip = new IntegerPair( from, to );
// if it contains this pair already as a key,
// the content model is ambiguous
return put( ip, node ) == null;
}
OperatorRangeNode get( int from, int to )
{
return(OperatorRangeNode)get( new IntegerPair( from, to ) );
}
}
/********************************************************
* the alphabet is a list of all symbols in the language
* recognized by the regular expression it is implemented as
* a vector where all the symbols are at the indexes
* indicated by their position (as defined by the algorithm)
*
* Note: It is assumed that the empty symbol (if there is at least one)
* is always at index 0 , and non-empty symbol start at 1.
********************************************************/
private class Alphabet extends Vector
{
private int size = 0;
private SetAsHashtable uniqueSymbols = new SetAsHashtable();
// sets index 0 to null, to signal that initially, there was no empty element
Alphabet()
{
addElement( null );
}
void addSymbol( AbstractSymbolNode symbolNode )
{
int index = symbolNode.getPosition();
if( index >= size() )
setSize( index + 10 );
setElementAt( symbolNode, index );
size = Math.max( size, index + 1 );
if( symbolNode.getValue().length() > 0 )
uniqueSymbols.put( symbolNode.getValue() );
}
SymbolNode getSymbol( int pos )
{
return(SymbolNode)elementAt( pos );
}
SetAsHashtable getUniqueSymbols()
{
return uniqueSymbols;
}
int getSize()
{
return size;
}
// indicates whether the empty symbol is in the alphabet
boolean hasEmptySymbol()
{
return elementAt( 0 ) != null;
}
public void dump( OutputStream os )
throws IOException
{
String str = new String();
str += "************ Alphabet ****************\n";
str += "size: " + ( new Integer( size ) ).toString() + "\n";
for( int n = 0; n < size; n++ )
{
AbstractSymbolNode node = (AbstractSymbolNode) elementAt( n );
String str1 = null;
if( node != null )
str1 = node.getValue();
str += str1 + " " + ( new Integer( n ) ).toString() + "\n";
}
os.write( str.getBytes() );
}
}
/**************************************************
* Base class for all model tree nodes
**************************************************/
protected static abstract class ModelNode extends NaryTree.Node
{
private boolean nullable;
// these sets are calculated by each node
// Note: for some of the nodes, they are references to objects in other nodes,
// when it is known that they are not distinct.
// Care should be taken not to change these inadvertently, by silently changing
// the referenced object.
private IntegerSet firstpos = null;
private IntegerSet lastpos = null;
private IntegerSet internalpos = new IntegerSet();
// used by clone only
private ModelNode( ModelNode node )
{
super( node );
}
protected ModelNode()
{
}
protected ModelNode( String name )
{
super( name );
}
public String toString()
{
return super.toString();
}
final boolean nullable()
{
return nullable;
}
final IntegerSet firstpos()
{
return firstpos;
}
final IntegerSet lastpos()
{
return lastpos;
}
final IntegerSet internalpos()
{
return internalpos;
}
final protected void setFirstPos( IntegerSet fp )
{
firstpos = fp;
}
final protected void setLastPos( IntegerSet lp )
{
lastpos = lp;
}
final protected void setNullable( boolean _nullable )
{
nullable = _nullable;
}
// abstract void dumpExpression();
void postCalculate() throws AmbiguousContentModelException { calculateInternalPos();}
// calculates firstpos, lastpos and nullable for a node
// default version doesn't do anything - the operator nodes will override this method
abstract void calculate();
// calculates followpos
void calculateFollowpos()
{
}
abstract void calculateInternalPos();
boolean isPosInternal( int pos )
{
return internalpos.contains( pos );
}
public void dump( OutputStream os )
throws IOException
{
// super.dump();
os.write( (" Nullable: " + ( new Boolean( nullable() ) ).toString() + ", ").getBytes() );
if( firstpos() != null )
{
os.write( ( "Firstpos: " ).getBytes() );
firstpos().dump( os );
os.write( ( ", " ).getBytes() );
}
if( lastpos() != null )
{
os.write( ( "Lastpos: " ).getBytes() );
lastpos().dump( os );
// ModelWriter.print( "," );
}
}
// checks that a particular node can have any more children
abstract boolean canHaveMoreChildren();
// checks that a particular node is valid
abstract boolean check();
}
/**************************************************
* Operator node abstract class
* generic operator node class
* base class for the concrete operators classes
***************************************************/
protected static abstract class OperatorNode extends ModelNode
{
OperatorNode( OperatorNode node )
{
super( node );
}
OperatorNode( String name )
{
super( name );
}
public String toString()
{
return super.toString();
}
boolean isAmbiguous()
{
ModelNode node1 = (ModelNode)getFirstChild();
ModelNode node2 = (ModelNode) getLastChild();
return isAmbiguous( node1, node2 );
}
protected abstract boolean isAmbiguous( ModelNode node1, ModelNode node2 );
// this actually only does something for AND
// OR knows how to deal with n-ary, and other operators have one and only one child
OperatorNode makeBinary( OperatorNode node )
{
return node;
}
void calculateInternalPos()
{
for( ModelNode node = (ModelNode) getFirstChild(); node != null; node = (ModelNode)node.getNextSibling() )
internalpos().reunion( node.internalpos() );
}
}
static private abstract class UnaryOperatorNode extends OperatorNode
{
UnaryOperatorNode( String name )
{
super( name );
}
UnaryOperatorNode( UnaryOperatorNode node )
{
super( node );
}
// an unary operator cannot have more than 1 child
boolean canHaveMoreChildren()
{
return getChildrenCount() == 0;
}
// an unary operator is valid if it has exactly one child
boolean check()
{
return getChildrenCount() == 1;
}
protected void calculateFirstPos( ModelNode node )
{
setFirstPos( node.firstpos() );
}
protected void calculateLastPos( ModelNode node )
{
setLastPos( node.lastpos() );
}
final void calculate()
{
ModelNode node = (ModelNode)getFirstChild();
calculateNullable( node );
calculateFirstPos( node );
calculateLastPos( node );
}
protected abstract void calculateNullable(ModelNode node );
}
static private abstract class NaryOperatorNode extends OperatorNode
{
final void calculate()
{
ModelNode node1 = (ModelNode)getFirstChild();
ModelNode node2 = (ModelNode) getLastChild();
calculateNullable( node1, node2 );
calculateFirstPos( node1, node2 );
calculateLastPos( node1, node2 );
}
protected abstract void calculateFirstPos( ModelNode node1, ModelNode node2 );
protected abstract void calculateLastPos(ModelNode node1, ModelNode node2 );
protected abstract void calculateNullable(ModelNode node1, ModelNode node2 );
NaryOperatorNode( String name )
{
super( name );
}
NaryOperatorNode( NaryOperatorNode node )
{
super( node );
}
// an nary operator can have more children
boolean canHaveMoreChildren()
{
return true;
}
// an nary operator is valid if it has at least 2 children
boolean check()
{
return getChildrenCount() >= 2;
}
}
/***********************************************
* Abstract base class for symbols and empty symbols
**********************************************/
private static abstract class AbstractSymbolNode extends ModelNode
{
private int position = 0;
protected String value = null;
private AbstractSymbolNode( AbstractSymbolNode node )
{
super( node ); value = node.getValue(); position = node.getPosition();
}
protected AbstractSymbolNode( String _value, int _position )
{
super( _value ); value = _value; position = _position;
}
String getValue()
{
return value;
}
public String toString()
{
return "\"" + value + "\"";
}
boolean hasValue( String _value )
{
return value.equals( _value );
}
int getPosition()
{
return position;
}
// a symbol node cannot have any more children
boolean canHaveMoreChildren()
{
return false;
}
// a symbol node is valid if it doesn't have any children (it is a leaf in the syntax tree)
boolean check()
{
return getChildrenCount() == 0;
}
boolean isOptional()
{
ModelNode node = (ModelNode)super.getParent();
if( node != null )
{
/** @todo use polymorphism here and not instanceof. For the moment use this to validate the ideas */
if( node instanceof OperatorQMarkNode )
return true;
else
return false;
}
else
return false;
}
}
/**************************************************
* symbol node class
***************************************************/
private static class SymbolNode extends AbstractSymbolNode
{
private IntegerSet followpos = new IntegerSet();
private IntegerSet startRanges = new IntegerSet();
private IntegerSet endRanges = new IntegerSet();
IntegerSet startRanges()
{
return startRanges;
}
IntegerSet endRanges()
{
return endRanges;
}
private SymbolNode( SymbolNode node )
{
super( node );
}
SymbolNode( String _value, int _pos )
{
super( _value, _pos );
}
public String toString()
{
return super.toString();
}
public Object clone()
{
return new SymbolNode( this );
}
protected IntegerSet followpos()
{
return followpos;
}
boolean hasValue( String _value )
{
return value.equals( _value );
}
void calculate()
{
setFirstPos( new IntegerSet( getPosition() ) );
setLastPos( new IntegerSet( getPosition() ) );
setNullable( false );
}
public void dump( OutputStream os )
throws IOException
{
os.write( ( "\"" + value + "\" " + ( new Integer( getPosition() ) ).toString() + " " ).getBytes() );
os.write( ( " Followpos: " ).getBytes() );
followpos.dump( os );
super.dump( os );
}
void calculateInternalPos()
{
internalpos().add( getPosition() );
}
void addEndRange( OperatorRangeNode node )
{
endRanges.add( node.getPos() );
}
void addStartRange( OperatorRangeNode node )
{
startRanges.add( node.getPos() );
}
// void dumpExpression() { ModelWriter.print( value ); }
boolean isAmbiguous()
{
return false;
}
}
/***********************************************
* Empty symbol node class
* Objects of this class are created to model the empty input
* for example (a|<empty>) is the same as a?
**********************************************/
private class EmptySymbolNode extends AbstractSymbolNode
{
private final static String empty = "";
private EmptySymbolNode()
{
super( empty, 0 );
}
EmptySymbolNode( EmptySymbolNode node )
{
super( node );
}
public Object clone()
{
return new EmptySymbolNode( this );
}
public void dump( OutputStream os )
throws IOException
{
os.write( ( "<E> " + (new Integer( getPosition() ) ).toString() ).getBytes() );
super.dump( os );
}
void dumpExpression()
{
ModelWriter.print( toString() );
}
boolean isAmbiguous()
{
return false;
}
public String toString()
{
return "<E>";
}
void calculate()
{
setNullable( true );
setFirstPos( new IntegerSet() );
setLastPos( new IntegerSet() );
}
void calculateInternalPos()
{
}
}
/***********************************************
* Operator Star node
**********************************************/
static private class OperatorStarNode extends UnaryOperatorNode
{
Alphabet alphabet;
OperatorStarNode( String name, Alphabet _alphabet )
{
super( name ); alphabet = _alphabet;
}
OperatorStarNode( OperatorStarNode node )
{
super( node );
}
public void dump( OutputStream os )
throws IOException
{
os.write( ( "*" ).getBytes() );
super.dump( os );
}
public String toString()
{
ModelNode node = (ModelNode)getFirstChild();
String string = new String( node.toString() );
string += "*";
return string;
}
void dumpExpression()
{
ModelWriter.print( toString() );
}
// public String toString() { return "*"; }
public Object clone()
{
return new OperatorStarNode( this );
}
// this operator only has a child, so node1 == node2
protected void calculateNullable( ModelNode node )
{
setNullable( true );
}
void calculateFollowpos()
{
for( Enumeration e = lastpos().elements(); e.hasMoreElements(); )
{
SymbolNode node = ( SymbolNode )alphabet.elementAt( ( ( Integer)e.nextElement() ).intValue() );
node.followpos().reunion( firstpos() );
}
}
protected boolean isAmbiguous( ModelNode node1, ModelNode node2 )
{
return false;
}
}
/***********************************************
* Operator Plus node
**********************************************/
static private class OperatorPlusNode extends UnaryOperatorNode
{
Alphabet alphabet;
OperatorPlusNode( String name, Alphabet _alphabet )
{
super( name ); alphabet = _alphabet;
}
OperatorPlusNode( OperatorPlusNode node )
{
super( node );
}
public void dump( OutputStream os )
throws IOException
{
os.write( ( "+" ).getBytes() );
super.dump( os );
}
public String toString()
{
String string = new String();
ModelNode node = (ModelNode)getFirstChild();
string = node.toString();
string += "+";
return string;
}
void dumpExpression()
{
ModelWriter.print( toString() );
}
// public String toString() { return "+"; }
public Object clone()
{
return new OperatorPlusNode( this );
}
// this operator only has a child, so node1 == node2
protected void calculateNullable( ModelNode node )
{
setNullable( node.nullable() );
}
void calculateFollowpos()
{
for( Enumeration e = lastpos().elements(); e.hasMoreElements(); )
{
SymbolNode node = ( SymbolNode )alphabet.elementAt( ( ( Integer)e.nextElement() ).intValue() );
node.followpos().reunion( firstpos() );
}
}
protected boolean isAmbiguous( ModelNode node1, ModelNode node2 )
{
return false;
}
}
/**************************************************
* Operator Range node class
* INFINITY is used to model an unlimited max value
***************************************************/
static private class OperatorRangeNode extends UnaryOperatorNode
{
private final Limits limits;
private final int pos;
Alphabet alphabet;
StatesPairsToRange statesPairToRange;
Limits getLimits()
{
return limits;
}
int getPos()
{
return pos;
}
OperatorRangeNode( OperatorRangeNode node )
{
super( node ); pos = node.getPos(); limits = node.getLimits();
}
OperatorRangeNode( String name, int _pos, Limits _limits, Alphabet _alphabet, StatesPairsToRange _statesPairToRange )
throws BadLimitsException
{