http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1799 *** shadow/1799 Thu May 17 12:07:32 2001 --- shadow/1799.tmp.20552 Thu May 17 12:07:32 2001 *************** *** 0 **** --- 1,262 ---- + +============================================================================+ + | possible bug in ORO's Util.substitute | + +----------------------------------------------------------------------------+ + | Bug #: 1799 Product: ORO | + | Status: NEW Version: 2.0.2 | + | Resolution: Platform: PC | + | Severity: Normal OS/Version: Linux | + | Priority: Component: Main | + +----------------------------------------------------------------------------+ + | Assigned To: [EMAIL PROTECTED] | + | Reported By: [EMAIL PROTECTED] | + +----------------------------------------------------------------------------+ + | URL: | + +============================================================================+ + | DESCRIPTION | + No return from Util.substitute: + + I am a bit hesitant to report this as a bug (since it could be infinite + backtracking in my regex) but I notice that I never get out of a call + to Util.substitute in some cases, and Daniel F. Savarese suggested that + the problem might be in ORO. + + Try the included test program out, it has three sets of test input, two + of which do not return any time soon, another works as it is supposed to. + + __________________________________________________________________________ + + import org.apache.oro.text.regex.*; + import org.apache.oro.text.perl.*; + + public class RegexTest + { + public static void main(String[] args) + { + String input = ""; + int val = -1; + + //the first two kill us then the thrid one is ok + String[] messages = + { + "The original message was received at Mon, 14 May 2001 12:04:34 + -0400 \n from umc97 [127.0.0.1] \n ----- The following addresses had \n + permanent fatal errors ----- \n <[EMAIL PROTECTED]> \n <[EMAIL PROTECTED]> \n <[EMAIL PROTECTED]> \n <[EMAIL PROTECTED]> \n + ----- Transcript of session follows ----- \n 550 <[EMAIL PROTECTED]>... Host unknown (Name + server: b.c: host not found) \n 550 <[EMAIL PROTECTED]>... Host unknown (Name server: c.d: + host not found) \n 550 <[EMAIL PROTECTED]>... Host unknown (Name server: d.e: host not + found) \n 550 <[EMAIL PROTECTED]>... Host unknown (Name server: d.f: host not found) \n + MESSAGE/DELIVERY-STATUS download \n From: Daniel Shriver \n To: + [EMAIL PROTECTED] \n Subject: should trigger vacation response \n Date: + 5/14/01 12:04 PM \n TEXT/X-VCARD \n dshriver.vcf \n Save Address", + "From: Daniel Shriver <[EMAIL PROTECTED]> \n To: + [EMAIL PROTECTED] \n Sent: \n Subject: test message to mess up email + regex \n \n text \n [EMAIL PROTECTED] \n text \n [EMAIL PROTECTED] \n + text \n bob@[210.198.13.13] \n text \n x@y \n \n \"bob at\" [EMAIL PROTECTED] \n + \"joe is a shmuck and a big O'l one\" <[EMAIL PROTECTED]>", + "From: Daniel Shriver <[EMAIL PROTECTED]> \n To: + [EMAIL PROTECTED] \n Sent: \n Subject: test message to mess up email + regex, ok one \n \n > text \n > [EMAIL PROTECTED] \n > text \n > + [EMAIL PROTECTED] \n > text \n > bob@[210.198.13.13] \n > text \n > x@y" + }; + + System.out.println("We have ["+messages.length+"] messages to choose + from"); + + String warning = "Usage: java RegexTest #\n (where # is the number of + the message you want to parse as an integer, and is a value between 1 and + "+messages.length+")"; + + if (args.length < 1) + { + System.out.println(warning); + System.exit(1); + } + try + { + val = Integer.parseInt(args[0]); + } + catch (NumberFormatException e) + { + System.out.println("You did not enter an integer + number!\n"+warning); + } + if (val > messages.length ) + { + System.out.println("You did not enter a number in the valid range (1 + - "+messages.length+")!\n"+warning); + } + else + { + input = messages[val-1]; + //System.out.println("DEBUG MSG: The original message + is:\n"+input+"\n\n"); + } + + String regex = "([\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)((?: [^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] * + (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?: \\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?: [^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] * + (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)* \\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)*|(?: [^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] * + (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* + \")[^()<>\\@,;:\".\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]* (?: + (?:\\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)|\"[^\\x80-\\xff\\n\\015\"] * (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* + )* \")[^()<>\\@,;:\".\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]* )*< [\\040\\t]* + (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:\\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)*(?: , [\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*\\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)*)* :[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)?(?: [^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] * + (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?: \\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?: [^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] * + (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)* \\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?: + (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^ + (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?: + [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?: + \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]* + (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]* + )*\\)[\\040\\t]* )*)*>)"; + int regex_flags = 41; + String substitution = "$1<A + HREF='compose?op=compose&from_link=true&recipientsTO=$2'>$2</A>"; + int interpolations = -1; + + System.out.println("DEBUG MSG: just about to call PerlSubstituteTool"); + + String output = PerlSubstituteTool(regex, regex_flags, substitution, + interpolations, input); + + System.out.println("Your message is now:\n"+output); + } + + public static String PerlSubstituteTool(String reg_ex, int reg_ex_Flags, + String substitution, int interpolations, String input) + { + //fail safe -- if a regex doesn't work it might be set to + //empty string... test for that and if we don't have a regex skip + //alternately a flag could have an alpha character in it (causing + //a number format exception (before this is called) but that is + //kludgy + if(null == reg_ex || reg_ex.equals("")) + { + //do nothing -- we'll just send back the input + } + else + { + try + { + String temp = null; + Perl5Compiler compiler = new Perl5Compiler(); + Perl5Substitution sub = new Perl5Substitution(substitution); + Pattern pat = compiler.compile(reg_ex, reg_ex_Flags); + System.out.println("DEBUG MSG: just before Util.substitute"); + temp = Util.substitute( new Perl5Matcher(), + pat, + sub, + input, + interpolations ); + System.out.println("DEBUG MSG: just after Util.substitute"); + input = temp; + } + catch (MalformedPatternException mpe) + { + System.out.println("DEBUG MSG: in catch + MalformedPatternException"); + System.out.println("DEBUG MSG: Exception is "+mpe.getMessage()); + } + } + return input; + } + + }
