/* * 01/07/2003 - 15:19:32 * * Pattern.java - * Copyright (C) 2003 Buero fuer Softwarearchitektur GbR * ralf.meyer@karneim.com * http://jrexx.sf.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package com.karneim.util.collection.regex; import com.karneim.util.collection.set.ISet_char; import com.karneim.util.collection.set.CharSet; import java.io.*; import java.util.*; import com.karneim.util.collection.automaton.*; import java.lang.ref.SoftReference; /** * Regular expression based on a minimized deterministic automaton (DFA) and designed as an immutable set of strings. *
Use this class to create a regular expression and match strings against it. *
for example: *
to check whether a given string is a number try
*
new Pattern("[0-9]+").contains(s) * * @author Ralf Meyer * @version 1.1 */ public class Pattern implements Cloneable { protected static final HashMap AUTOMATON_MAP = new HashMap(); protected static final Automaton_Pattern get(String regEx,boolean cache) { if (cache) { synchronized(AUTOMATON_MAP) { final SoftReference reference = (SoftReference)AUTOMATON_MAP.get(regEx); if (reference!=null) { Automaton_Pattern automaton = (Automaton_Pattern)reference.get(); if (automaton!=null) return automaton; } final Automaton_Pattern automaton = new Automaton_Pattern(regEx); final Automaton.LinkedSet_State states = automaton.getStates(); for (Automaton.Wrapper_State w = states.elements; w!=null; w=w.next) { for (Automaton.State.Transition trans=w.state.eTransitions; trans!=null; trans=trans.next) { trans.properties = null; } for (Automaton.State.Transition trans=w.state.transitions; trans!=null; trans=trans.next) { trans.properties = null; } } automaton.minimize(); AUTOMATON_MAP.put(regEx,new SoftReference(automaton)); return automaton; } } else { SoftReference reference = null; synchronized(AUTOMATON_MAP) { reference = (SoftReference)AUTOMATON_MAP.get(regEx); } if (reference!=null) { Automaton_Pattern automaton = (Automaton_Pattern)reference.get(); if (automaton!=null) return automaton; } final Automaton_Pattern automaton = new Automaton_Pattern(regEx); final Automaton.LinkedSet_State states = automaton.getStates(); for (Automaton.Wrapper_State w = states.elements; w!=null; w=w.next) { for (Automaton.State.Transition trans=w.state.eTransitions; trans!=null; trans=trans.next) { trans.properties = null; } for (Automaton.State.Transition trans=w.state.transitions; trans!=null; trans=trans.next) { trans.properties = null; } } automaton.minimize(); return automaton; } } protected Automaton_Pattern automaton; /* protected Pattern() { this(new Automaton_Pattern()); } */ protected Pattern(Automaton_Pattern automaton) { this.automaton = automaton; } protected Pattern(ISet_char fullSet) { this.automaton = new Automaton_Pattern(fullSet); } /** * creates a minimized deterministic automaton (DFA) from the given regEx pattern. */ public Pattern(String regEx) { this(Pattern.get(regEx,true)); } public boolean contains(String s) { return this.contains(s,0,s.length()); } public boolean contains(String s,int offset) { return this.contains(s,offset,s.length()-offset); } public boolean contains(String s,int offset,int length) { Automaton.State state = ((Automaton_Pattern)this.automaton).getStartState(); //int _offset = offset; //int _length = length; //long start = System.currentTimeMillis(); //try { if (state==null) return false; Automaton.LinkedSet_State states = this.automaton.newLinkedSet_State(state); Automaton.LinkedSet_State newStates = this.automaton.newLinkedSet_State(); loop: for (;length>0; ++offset, --length) { for (Automaton.State.Transition trans=state.transitions; trans!=null; trans=trans.next) { if (trans.charSet.contains(s.charAt(offset))) { state = trans.toState; continue loop; } } return false; } return ((Automaton_Pattern.PState)state).isFinal(); //} finally { // long end = System.currentTimeMillis(); // System.out.println("Pattern.contains: "+(end-start)); // if (length>0) { // System.out.println(this.automaton); // s = s.substring(_offset,_offset+_length); // offset = offset-_offset; // if (offset<=100) System.out.println(" can start with: "+s.substring(0,offset)+"\""); // else System.out.println(" can start with: \""+s.substring(0,100)+"...\""+s.length()); // if (s.length()-offset<=100) System.out.println(" stopped for : "+s.substring(offset)+"\""); // else System.out.println(" stopped for : "+s.substring(offset,offset+100)+"...\""+(s.length()-offset)); // System.out.println("currentState: "+state); // } //} } public boolean contains(char[] chars) { return this.contains(chars,0,chars.length); } public boolean contains(char[] chars,int offset) { return this.contains(chars,offset,chars.length-offset); } public boolean contains(char[] chars,int offset,int length) { Automaton.State state = ((Automaton_Pattern)this.automaton).getStartState(); if (state==null) return false; loop: for (;length>0; ++offset, --length) { for (Automaton.State.Transition trans=state.transitions; trans!=null; trans=trans.next) { if (trans.charSet.contains(chars[offset])) { state = trans.toState; continue loop; } } return false; } return ((Automaton_Pattern.PState)state).isFinal; } public boolean contains(java.io.Reader in) throws java.io.IOException { Automaton.State state = ((Automaton_Pattern)this.automaton).getStartState(); if (state==null) return false; loop: for (int ch=in.read(); ch!=-1; ch=in.read()) { for (Automaton.State.Transition trans=state.transitions; trans!=null; trans=trans.next) { if (trans.charSet.contains((char)ch)) { state = trans.toState; continue loop; } } return false; } return ((Automaton_Pattern.PState)state).isFinal; } public String getRegEx() { return this.automaton.regEx; } public String toString() { return this.getRegEx(); } /* private void writeObject(java.io.OutputStream out) throws IOException { out.writeObject(this.toAutomatonData()); } private void readObject(java.io.InputStream in) throws IOException,ClassNotFoundException { SAutomatonData a = (SAutomatonData)in.readObject(); this.init(a); } public SAutomatonData toAutomatonData() { return new PAutomaton(this.automaton).toData(); } public void toAutomatonData(OutputStream automatonDataStream) throws IOException { new OutputStream(automatonDataStream).writeObject(this); } protected void init(SAutomatonData a) { this.automaton = (Automaton_Pattern)new PAutomaton(a).getAutomaton(); } */ public Object clone() { try { Pattern clone = (Pattern)super.clone(); clone.automaton = (Automaton_Pattern)clone.automaton.clone(); return clone; } catch(CloneNotSupportedException e) { throw new Error("should never happen"); } } /* public Iterator iterator() { return new Iterator() { HashMap fromStates = null; HashMap toStates = new HashMap(); Iterator it_states = null; ISet_char.Iterator it_charSet = null; { Object startState = Pattern.this.automaton.getStartState(); if (startState!=null) { LinkedList lists = new LinkedList(); lists.add(new LinkedList()); toStates.put(startState,lists); } } public boolean hasNext() { return false; } public Object next() { it_charSet.next(); //if (this.it_states==null) it_states = fromStates.keySet().iterator(); if (it_states.hasNext()==false) { this.fromStates = toStates; this.toStates = new HashMap(); this.it_states } IStatePro.ITransition[] trans = state.getTransitions(); Iterator it_lists = lists.iterator(); while (it_lists.hasNext()) { LinkedList elements = (LinkedList)it_lists.next(); for (int i=0; i