Hi Ito, On Mon, 2006-01-30 at 22:19 +0900, Ito Kazumitsu wrote: > ChangeLog > 2006-01-30 Ito Kazumitsu <[EMAIL PROTECTED]> > > Fixes bug #26002 > * gnu/regexp/gnu/regexp/RE.java(initialize): Parse /\p{prop}/. > (NamedProperty): New inner class. > (getNamedProperty): New method. > (getRETokenNamedProperty): New Method. > * gnu/regexp/RESyntax.java(RE_NAMED_PROPERTY): New syntax falg. > * gnu/regexp/RETokenNamedProperty.java: New file.
Christian told me he needed a little extension to this for the "combined" properties "L", "M", "Z", "S", "N", "P" and "C" to get eclipse 3.2M4 working. They are explained here: http://www.regular-expressions.info/unicode.html This is my quick and dirty implementation of it. What do you think? I'll test and write a real ChangeLog for it later. Cheers, Mark
Index: gnu/regexp/RE.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v retrieving revision 1.14 diff -u -r1.14 RE.java --- gnu/regexp/RE.java 31 Jan 2006 14:39:08 -0000 1.14 +++ gnu/regexp/RE.java 31 Jan 2006 23:41:28 -0000 @@ -1210,7 +1210,10 @@ return new RETokenNamedProperty(subIndex, np.name, insens, np.negate); } catch (REException e) { - throw new REException(e.getMessage(), REException.REG_ESCAPE, index); + REException ree; + ree = new REException(e.getMessage(), REException.REG_ESCAPE, index); + ree.initCause(e); + throw ree; } } Index: gnu/regexp/RETokenNamedProperty.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenNamedProperty.java,v retrieving revision 1.1 diff -u -r1.1 RETokenNamedProperty.java --- gnu/regexp/RETokenNamedProperty.java 31 Jan 2006 14:39:08 -0000 1.1 +++ gnu/regexp/RETokenNamedProperty.java 31 Jan 2006 23:41:28 -0000 @@ -44,6 +44,44 @@ boolean negate; Handler handler; + // Grouped properties + static final byte[] LETTER = new byte[] { Character.LOWERCASE_LETTER, + Character.UPPERCASE_LETTER, + Character.TITLECASE_LETTER, + Character.MODIFIER_LETTER, + Character.OTHER_LETTER }; + + static final byte[] MARK = new byte[] { Character.NON_SPACING_MARK, + Character.COMBINING_SPACING_MARK, + Character.ENCLOSING_MARK }; + + static final byte[] SEPARATOR = new byte[] { Character.SPACE_SEPARATOR, + Character.LINE_SEPARATOR, + Character.PARAGRAPH_SEPARATOR }; + + static final byte[] SYMBOL = new byte[] { Character.MATH_SYMBOL, + Character.CURRENCY_SYMBOL, + Character.MODIFIER_SYMBOL, + Character.OTHER_SYMBOL }; + + static final byte[] NUMBER = new byte[] { Character.DECIMAL_DIGIT_NUMBER, + Character.LETTER_NUMBER, + Character.OTHER_NUMBER }; + + static final byte[] PUNCTUATION = new byte[] { Character.DASH_PUNCTUATION, + Character.START_PUNCTUATION, + Character.END_PUNCTUATION, + Character.CONNECTOR_PUNCTUATION, + Character.OTHER_PUNCTUATION, + Character.INITIAL_QUOTE_PUNCTUATION, + Character.FINAL_QUOTE_PUNCTUATION}; + + static final byte[] OTHER = new byte[] { Character.CONTROL, + Character.FORMAT, + Character.PRIVATE_USE, + Character.SURROGATE, + Character.UNASSIGNED }; + RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException { super(subIndex); this.name = name; @@ -108,6 +146,21 @@ if (name.startsWith("Is")) { name = name.substring(2); } + + // "grouped properties" + if (name.equals("L")) + return new UnicodeCategoriesHandler(LETTER); + if (name.equals("M")) + return new UnicodeCategoriesHandler(MARK); + if (name.equals("Z")) + return new UnicodeCategoriesHandler(SYMBOL); + if (name.equals("N")) + return new UnicodeCategoriesHandler(NUMBER); + if (name.equals("P")) + return new UnicodeCategoriesHandler(PUNCTUATION); + if (name.equals("C")) + return new UnicodeCategoriesHandler(OTHER); + if (name.equals("Mc")) return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK); if (name.equals("Pc")) @@ -199,5 +252,18 @@ return Character.getType(c) == category; } } - + + private static class UnicodeCategoriesHandler extends Handler { + public UnicodeCategoriesHandler(byte[] categories) { + this.categories = categories; + } + private byte[] categories; + public boolean includes(char c) { + int category = Character.getType(c); + for (int i = 0; i < categories.length; i++) + if (category == categories[i]) + return true; + return false; + } + } }
signature.asc
Description: This is a digitally signed message part