Author: vgritsenko Date: Wed Mar 7 16:28:21 2007 New Revision: 515865 URL: http://svn.apache.org/viewvc?view=rev&rev=515865 Log: Fix bug #38331: RE compiler creates incorrect program if pattern results in large program with offsets exceeding capacity of the short
Modified: jakarta/regexp/trunk/docs/changes.html jakarta/regexp/trunk/docs/jakarta-regexp.jar jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java jakarta/regexp/trunk/xdocs/changes.xml Modified: jakarta/regexp/trunk/docs/changes.html URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== --- jakarta/regexp/trunk/docs/changes.html (original) +++ jakarta/regexp/trunk/docs/changes.html Wed Mar 7 16:28:21 2007 @@ -92,6 +92,10 @@ <h3>Version 1.5-dev</h3> <ul> <li>Fixed Bug + <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>: + RE compiler creates incorrect program if pattern results in large program + with offsets exceeding capacity of the short (VG)</li> +<li>Fixed Bug <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=37275">37275</a>: RE incorrectly processes nested {n,m} closures (ex: (A{1}){0,2}) (VG)</li> <li>Added accessor for REProgram.prefix (VG)</li> Modified: jakarta/regexp/trunk/docs/jakarta-regexp.jar URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/jakarta-regexp.jar?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== Binary files - no diff available. Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Wed Mar 7 16:28:21 2007 @@ -812,7 +812,7 @@ for (int node = firstNode; node < lastNode; ) { opcode = instruction[node + offsetOpcode]; - next = node + (short)instruction[node + offsetNext]; + next = node + (short) instruction[node + offsetNext]; opdata = instruction[node + offsetOpdata]; switch (opcode) @@ -1292,7 +1292,7 @@ } // Try all available branches - short nextBranch; + int nextBranch; do { // Try matching the branch against the string @@ -1302,7 +1302,7 @@ } // Go to next branch (if any) - nextBranch = (short)instruction[node + offsetNext]; + nextBranch = (short) instruction[node + offsetNext]; node += nextBranch; } while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH)); Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Wed Mar 7 16:28:21 2007 @@ -148,7 +148,7 @@ // Move everything from insertAt to the end down nodeSize elements System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt); instruction[insertAt + RE.offsetOpcode] = opcode; - instruction[insertAt + RE.offsetOpdata] = (char)opdata; + instruction[insertAt + RE.offsetOpdata] = (char) opdata; instruction[insertAt + RE.offsetNext] = 0; lenInstruction += RE.nodeSize; } @@ -169,20 +169,29 @@ // if the node we are supposed to point to is in the chain then // point to the end of the program instead. // Michael McCallum <[EMAIL PROTECTED]> - // FIXME: // This is a _hack_ to stop infinite programs. + // FIXME: This is a _hack_ to stop infinite programs. // I believe that the implementation of the reluctant matches is wrong but // have not worked out a better way yet. if ( node == pointTo ) { - pointTo = lenInstruction; + pointTo = lenInstruction; } node += next; next = instruction[node + RE.offsetNext]; } + // if we have reached the end of the program then dont set the pointTo. // im not sure if this will break any thing but passes all the tests. if ( node < lenInstruction ) { + // Some patterns result in very large programs which exceed + // capacity of the short used for specifying signed offset of the + // next instruction. Example: a{1638} + int offset = pointTo - node; + if (offset != (short) offset) { + throw new RESyntaxException("Exceeded short jump range."); + } + // Point the last node in the chain to pointTo. - instruction[node + RE.offsetNext] = (char)(short)(pointTo - node); + instruction[node + RE.offsetNext] = (char) (short) offset; } } Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Wed Mar 7 16:28:21 2007 @@ -150,9 +150,9 @@ for (int i = 0; i < lenInstruction; ) { // Get opcode, opdata and next fields of current program node - char opcode = instruction[i + RE.offsetOpcode]; - char opdata = instruction[i + RE.offsetOpdata]; - short next = (short)instruction[i + RE.offsetNext]; + char opcode = instruction[i + RE.offsetOpcode]; + char opdata = instruction[i + RE.offsetOpdata]; + int next = (short) instruction[i + RE.offsetNext]; // Display the current program node p.print(i + ". " + nodeToString(i) + ", next = "); Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java Wed Mar 7 16:28:21 2007 @@ -117,7 +117,7 @@ if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) { // to the end node - char next = instruction[0 + RE.offsetNext]; + int next = (short) instruction[0 + RE.offsetNext]; if (instruction[next + RE.offsetOpcode] == RE.OP_END && lenInstruction >= (RE.nodeSize * 2)) { final char nextOp = instruction[RE.nodeSize + RE.offsetOpcode]; Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Wed Mar 7 16:28:21 2007 @@ -425,6 +425,14 @@ if (r.match("a\u2029b")) { fail("\"a\\u2029b\" matches \"^a.*b$\""); } + + // Bug 38331: Large program + try { + new RE("a{8192}"); + fail("a{8192} should fail to compile."); + } catch (RESyntaxException e) { + // expected + } } private void testPrecompiledRE() @@ -631,7 +639,7 @@ final String matchAgainst = br.readLine(); final boolean badPattern = "ERR".equals(matchAgainst); boolean shouldMatch = false; - int expectedParenCount = 0; + int expectedParenCount; String[] expectedParens = null; if (!badPattern) { Modified: jakarta/regexp/trunk/xdocs/changes.xml URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=515865&r1=515864&r2=515865 ============================================================================== --- jakarta/regexp/trunk/xdocs/changes.xml (original) +++ jakarta/regexp/trunk/xdocs/changes.xml Wed Mar 7 16:28:21 2007 @@ -35,6 +35,10 @@ <h3>Version 1.5-dev</h3> <ul> <li>Fixed Bug + <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>: + RE compiler creates incorrect program if pattern results in large program + with offsets exceeding capacity of the short (VG)</li> +<li>Fixed Bug <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=37275">37275</a>: RE incorrectly processes nested {n,m} closures (ex: (A{1}){0,2}) (VG)</li> <li>Added accessor for REProgram.prefix (VG)</li> --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]