/* * 01/07/2003 - 15:19:32 * * Automaton_Pattern.java - * Copyright (C) 2003 Buero fuer Softwarearchitektur GbR * ralf.meyer@karneim.com * http://jrexx.sf.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package com.karneim.util.collection.regex; import com.karneim.util.collection.set.*; import com.karneim.util.collection.automaton.*; import java.lang.ref.SoftReference; import java.text.*; import java.util.*; public class Automaton_Pattern extends com.karneim.util.collection.set.AutomatonSet_String { protected static class PProperties extends SProperties {} protected interface IPState extends ISState { } protected class PState extends AutomatonSet_String.SState implements IPState { public PState(boolean isFinal) { super(isFinal); } protected Transition addTransition(IProperties properties,ISet_char charSet,State toState) { return super.addTransition(properties,charSet,toState); } protected boolean removeTransition(Transition trans) { return super.removeTransition(trans); } protected void setFinal(boolean isFinal) { super.setFinal(isFinal); } protected IState getEClosure() { return super.getEClosure(); } } protected class LinkedSet_PState extends AutomatonSet_String.LinkedSet_SState implements IPState { protected LinkedSet_PState() { super(); } protected LinkedSet_PState(PState state) { super(state); } } private Map preDefinedAutomatons = null; protected String regEx = null; protected Automaton_Pattern(ISet_char fullSet) { super(fullSet); } protected Automaton_Pattern() { super(); this.regEx= ""; } protected Automaton_Pattern(String regEx) { super(); this.regEx = ""; this.addAll(regEx); } protected Automaton.State getStartState() { return super.getStartState(); } protected State createState() { return new PState(false); } protected SState createState(boolean isFinal) { return new PState(isFinal); } protected LinkedSet_State newLinkedSet_State() { return new LinkedSet_PState(); } protected LinkedSet_State newLinkedSet_State(State state) { return new LinkedSet_PState((PState)state); } protected void setStartState(SState state) { super.setStartState(state); } protected SState addState(boolean isFinal) { return super.addState(isFinal); } protected boolean removeState(PState removeState) { return super.removeState(removeState); } protected void clear() { super.clear(); this.regEx = ""; } protected LinkedSet_State getStates() { return super.getStates(); } protected void minimize() { super.minimize(); } protected void removeUselessStates() { super.removeUselessStates(); } protected void addAll(SState state) { super.addAll(state); } protected SState complement(SState state) { return super.complement(state); } protected SState concat(SState state_A,SState state_B) { return super.concat(state_A,state_B); } protected SState repeat(SState state,int minTimes,int maxTimes) { //performance leak if ((state instanceof PState)==false) throw new IllegalArgumentException("(state instanceof PState)==false"); return super.repeat(state,minTimes,maxTimes); } protected SState union(SState state_A,SState state_B) { return super.union(state_A,state_B); } protected SState intersect(SState state_A,SState state_B) { return super.intersect(state_A,state_B); } /* protected PState minus(PState state_A,PState state_B) { return (PState)super.minus(state_A,state_B); } */ protected void complement() { super.complement(); if (this.regEx==null) return; if (this.regEx=="") this.regEx = ".*"; else this.regEx = "!("+this.regEx+")"; } protected void addAll(String regEx) { if (this.regEx==null) return; if (this.regEx=="") this.regEx = regEx; else { this.regEx = new StringBuffer(this.regEx.length()+regEx.length()+5) .append('(').append(this.regEx).append(')') .append('|') .append('(').append(regEx).append(')') .toString(); } this.addAll(this.parseRegEx(regEx)); this.removeUselessStates(); } protected void retainAll(String regEx) { if (this.regEx==null) return; if (this.regEx=="" || regEx=="") this.regEx = ""; else { this.regEx = new StringBuffer(this.regEx.length()+regEx.length()+5) .append('(').append(this.regEx).append(')') .append('&') .append('(').append(regEx).append(')') .toString(); } this.retainAll(this.parseRegEx(regEx)); this.removeUselessStates(); } protected void removeAll(String regEx) { if (this.regEx==null) return; if (this.regEx=="") this.regEx = ""; else { this.regEx = new StringBuffer(this.regEx.length()+regEx.length()+6) .append('(').append(this.regEx).append(')') .append("&!") .append('(').append(regEx).append(')') .toString(); } this.removeAll(this.parseRegEx(regEx)); this.removeUselessStates(); } protected boolean isDeterministic() { return super.isDeterministic(); } protected boolean isDeterministic(State startState) { return super.isDeterministic(startState); } protected void addAll(AutomatonSet_String automaton) { super.addAll(automaton); Automaton_Pattern pAutomaton = (Automaton_Pattern)automaton; if (this.regEx==null || pAutomaton.regEx==null) return; if (this.regEx=="") this.regEx = pAutomaton.regEx; else { this.regEx = new StringBuffer(this.regEx.length()+pAutomaton.regEx.length()+5) .append('(').append(this.regEx).append(')') .append('|') .append('(').append(pAutomaton.regEx).append(')') .toString(); } } protected void retainAll(AutomatonSet_String automaton) { super.retainAll(automaton); Automaton_Pattern pAutomaton = (Automaton_Pattern)automaton; if (this.regEx==null || pAutomaton.regEx==null) return; if (this.regEx=="" || pAutomaton.regEx=="") this.regEx = ""; else { this.regEx = new StringBuffer(this.regEx.length()+pAutomaton.regEx.length()+5) .append('(').append(this.regEx).append(')') .append('&') .append('(').append(pAutomaton.regEx).append(')') .toString(); } } protected void removeAll(AutomatonSet_String automaton) { super.removeAll(automaton); Automaton_Pattern pAutomaton = (Automaton_Pattern)automaton; if (this.regEx==null || pAutomaton.regEx==null) return; if (this.regEx=="") this.regEx = ""; else if (pAutomaton.regEx!="") { this.regEx = new StringBuffer(this.regEx.length()+pAutomaton.regEx.length()+6) .append('(').append(this.regEx).append(')') .append("&!") .append('(').append(pAutomaton.regEx).append(')') .toString(); } } protected Object clone() { final Automaton_Pattern clone = (Automaton_Pattern)super.clone(); clone.scanner = clone.newScanner(); return clone; } ////////////////////////////////////////////////////////////////////////// ///// P A R S I N G ////////////////////////////////////////////////////////////////////////// private static final int ERROR = -2, // the possible parser SHIFT = -3, // actions used in ACTIONTABLE REDUCE = -4, // value -1 is reserved ACCEPT = -5, // for unknown constant value RE = 0, // NonTerminal Symbols TERM = 1, // IMPORTANT: the value represents the ELEMENT = 2, // rowNr in ACTIONTABLE notOp = 3, // Terminal Symbols andOp = 4, // . orOp = 5, // . groupBegin = 6, // . groupEnd = 7, // . repetition = 8, // . label = 9, // . regExp = 10, // IMPORTANT: the value represents the EOF = 11; // rowNr in ACTIONTABLE private static final int[][][] ACTIONTABLE = { // state RE TERM ELEMENT notOp andOp orOp groupBegin groupEnd repetition label regExp EOF /* 0 */ {{SHIFT,2},{SHIFT,7},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}}, /* 1 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3},{REDUCE,3}}, /* 2 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ACCEPT,0}}, /* 3 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,1},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,1}}, /* 4 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,13},{ERROR, 0},{ERROR, 0},{ERROR, 0},{ERROR, 0}}, /* 5 */ {{ERROR,0},{SHIFT,8},{SHIFT,5},{SHIFT,11},{SHIFT,10},{REDUCE,6},{SHIFT,14},{REDUCE,6},{SHIFT, 1},{SHIFT,12},{SHIFT,16},{REDUCE,6}}, /* 6 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{REDUCE,9},{ERROR, 0},{REDUCE,9},{SHIFT, 1},{SHIFT,12},{ERROR, 0},{REDUCE,9}}, /* 7 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{SHIFT,15},{ERROR, 0},{REDUCE,0},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,0}}, /* 8 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{REDUCE,7},{ERROR, 0},{REDUCE,7},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,7}}, /* 9 */ {{ERROR,0},{ERROR,0},{ERROR,0},{ERROR, 0},{ERROR, 0},{REDUCE,8},{ERROR, 0},{REDUCE,8},{ERROR, 0},{ERROR, 0},{ERROR, 0},{REDUCE,8}}, /* 10 */ {{ERROR,0},{SHIFT,9},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}}, /* 11 */ {{ERROR,0},{ERROR,0},{SHIFT,6},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}}, /* 12 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4},{REDUCE,4}}, /* 13 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2},{REDUCE,2}}, /* 14 */ {{SHIFT,4},{SHIFT,7},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}}, /* 15 */ {{SHIFT,3},{SHIFT,7},{SHIFT,5},{SHIFT,11},{ERROR, 0},{ERROR, 0},{SHIFT,14},{ERROR, 0},{ERROR, 0},{ERROR, 0},{SHIFT,16},{ERROR, 0}}, /* 16 */ {{ERROR,0},{ERROR,0},{ERROR,0},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5},{REDUCE,5}} }; // the number after a SHIFT action is the next state to go to (see case SHIFT) // the number after a REDUCE action is the number of a rule (see case REDUCE) private static final Integer[] INTEGERS = new Integer[ACTIONTABLE.length]; static { for (int i=0; i "); action = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][0]; PState finalState,aState; switch (action) { case Automaton_Pattern.SHIFT : //System.out.println("SHIFT "+ACTIONTABLE[stateNr][tokenSymbol][1]); stateStack.push( Automaton_Pattern.INTEGERS[stateNr] ); symbolStack.push( token ); stateNr = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][1]; ++extdTokenListIndex; token = extdTokenList[extdTokenListIndex]; tokenSymbol = -1; break; case Automaton_Pattern.REDUCE : //System.out.println("REDUCE "+ACTIONTABLE[stateNr][tokenSymbol][1]); final int ruleNr = Automaton_Pattern.ACTIONTABLE[stateNr][tokenSymbol][1]; Object node=null; int nodeSymbol=-1; switch(ruleNr) { case 0: // RE ::= TERM { node = symbolStack.pop(); nodeSymbol = Automaton_Pattern.RE; break; } case 1: // RE ::= TERM orOp RE { PState re = (PState)symbolStack.pop(); /*Terminal_OrOp =*/ symbolStack.pop(); PState term = (PState)symbolStack.pop(); node = this.union(term,re); nodeSymbol = Automaton_Pattern.RE; break; } case 2: // ELEMENT ::= groupBegin RE groupEnd { Terminal_GroupEnd end = (Terminal_GroupEnd)symbolStack.pop(); node = symbolStack.pop(); Terminal_GroupBegin begin =(Terminal_GroupBegin)symbolStack.pop(); if (begin.name==null && end.name!=null || begin.name!=null && begin.name.equals(end.name)==false) throw new IllegalArgumentException("endtag exspected for "+begin+" but found: "+end); nodeSymbol = Automaton_Pattern.ELEMENT; break; } case 3: // ELEMENT ::= ELEMENT repetition { Terminal_Repetition repetition = (Terminal_Repetition)symbolStack.pop(); PState element = (PState)symbolStack.pop(); node = repetition.to==Terminal_Repetition.UNLIMITED ? this.repeat(element,repetition.from,0) : this.repeat(element,repetition.from,repetition.to); nodeSymbol = Automaton_Pattern.ELEMENT; break; } case 4: // ELEMENT ::= ELEMENT label { String label = (String)symbolStack.pop(); String labelDot = null; PState element = (PState)symbolStack.pop(); node = element; nodeSymbol = Automaton_Pattern.ELEMENT; break; } case 5: // ELEMENT ::= regExp { node = symbolStack.pop(); if (node instanceof Terminal_RegExp) { // or instanceOf Terminal_RuntimeValue Automaton_Pattern preDefAutomaton; if (this.preDefinedAutomatons==null) preDefAutomaton = null; else { preDefAutomaton = (Automaton_Pattern)this.preDefinedAutomatons.get(((Terminal_RegExp)node).name); } if (preDefAutomaton==null) throw new IllegalArgumentException(((Terminal_RegExp)node).name+" is not defined"); final Automaton.State startState = preDefAutomaton.getStartState(); if (startState==null) { node = this.addState(false); } else { java.util.Map map = this.cloneState(startState); node = (Automaton_Pattern.PState)map.get(startState); } } nodeSymbol = Automaton_Pattern.ELEMENT; break; } case 6: // TERM ::= ELEMENT { node = symbolStack.pop(); nodeSymbol = Automaton_Pattern.TERM; break; } case 7: // TERM ::= ELEMENT TERM { PState term = (PState)symbolStack.pop(); PState element = (PState)symbolStack.pop(); node = this.concat(element,term); nodeSymbol = Automaton_Pattern.TERM; break; } case 8: // TERM ::= ELEMENT andOp TERM { PState term = (PState)symbolStack.pop(); /*Terminal_AndOp = */ symbolStack.pop(); PState element = (PState)symbolStack.pop(); node = this.intersect(element,term); nodeSymbol = Automaton_Pattern.TERM; break; } case 9: // TERM ::= notOp ELEMENT { PState element = (PState)symbolStack.pop(); /*Terminal_NotOp = */ symbolStack.pop(); node = this.complement(element); nodeSymbol = Automaton_Pattern.TERM; break; } default : String message = "\nProgramming error in RE-Parser:" +"\nACTIONTABLE contains wrong ruleNr "+ruleNr +"\nor case "+ruleNr+" statement missing"; throw new RuntimeException(message); } // end switch(rule) for (int i=stateStack.size()-symbolStack.size(); i>1; i--) stateStack.pop(); stateNr = ((Integer)stateStack.peek()).intValue(); symbolStack.push( node ); stateNr = Automaton_Pattern.ACTIONTABLE[stateNr][nodeSymbol][1]; break; } // end switch(action) } while (action!=Automaton_Pattern.ACCEPT && action!=Automaton_Pattern.ERROR); if (action==Automaton_Pattern.ERROR) { System.out.print("parsed:"); for (int i=0; i' : return null; default : { status.setIndex(index+1); final PState startState = (PState)Automaton_Pattern.this.addState(false); startState.addTransition( null, new CharSet(source[index]), Automaton_Pattern.this.addState(true) ); return startState; } } } public int maxLength() {return 2;} } final class TerminalFormat_LITERALSET implements TerminalFormat { private static final int START = 0; private static final int FIRSTCHAR = 1; private static final int NORMAL = 2; private static final int ESCAPED = 3; public TerminalFormat_LITERALSET() { // this.automaton = automaton; //startState = automaton.addState(false); //automaton.addTransition(new CharSet('.'),automaton.addState(true)); }; public Object parseObject(char[] source, ParsePosition status) { int index = status.getIndex(); final int sourceLength = source.length; ISet_char charSet = new CharSet(); StringBuffer chars = new StringBuffer(); boolean complement = false; boolean intervall = false; int state = START; while (indexch) return null; for (char c=++from; c<=ch; c++) charSet.add(c); intervall = false; } else { if (ch=='-') { if (chars.length()==0) return null; intervall = true; } else chars.append(ch); } // STATE = NORMAL; (not necessary because state is NORMAL) } break; case ESCAPED : switch(ch) { default : if (intervall) { char from = (char)(((int)chars.charAt(chars.length()-1))+1); for (char c=from; c<=ch; c++) charSet.add(c); intervall = false; } else chars.append(ch); state = NORMAL; } break; default : String message = "unknown state " + state; throw new RuntimeException(message); } index++; } return null; } public int maxLength() {return PScanner.UNLIMITED_MAX_LENGTH;} } final class TerminalFormat_GroupBegin implements TerminalFormat { public TerminalFormat_GroupBegin() {} public Object parseObject(char[] source, ParsePosition status) { final int sourceLength = source.length; int index = status.getIndex(); if (index>=sourceLength) { String message = ""; throw new ArrayIndexOutOfBoundsException(message); } if (source[index]!='<') return null; index++; final int startIndex = index; while (index=sourceLength) { String message = ""; throw new ArrayIndexOutOfBoundsException(message); } if (source[index]!='<') return null; index++; if (source[index]!='/') return null; index++; final int startIndex = index; while (index