Hi Ito,
On Mon, 2006-01-30 at 22:19 +0900, Ito Kazumitsu wrote:
> ChangeLog
> 2006-01-30 Ito Kazumitsu <[EMAIL PROTECTED]>
>
> Fixes bug #26002
> * gnu/regexp/gnu/regexp/RE.java(initialize): Parse /\p{prop}/.
> (NamedProperty): New inner class.
> (getNamedProperty): New method.
> (getRETokenNamedProperty): New Method.
> * gnu/regexp/RESyntax.java(RE_NAMED_PROPERTY): New syntax falg.
> * gnu/regexp/RETokenNamedProperty.java: New file.Christian told me he needed a little extension to this for the "combined" properties "L", "M", "Z", "S", "N", "P" and "C" to get eclipse 3.2M4 working. They are explained here: http://www.regular-expressions.info/unicode.html This is my quick and dirty implementation of it. What do you think? I'll test and write a real ChangeLog for it later. Cheers, Mark
Index: gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.14
diff -u -r1.14 RE.java
--- gnu/regexp/RE.java 31 Jan 2006 14:39:08 -0000 1.14
+++ gnu/regexp/RE.java 31 Jan 2006 23:41:28 -0000
@@ -1210,7 +1210,10 @@
return new RETokenNamedProperty(subIndex, np.name, insens, np.negate);
}
catch (REException e) {
- throw new REException(e.getMessage(), REException.REG_ESCAPE, index);
+ REException ree;
+ ree = new REException(e.getMessage(), REException.REG_ESCAPE, index);
+ ree.initCause(e);
+ throw ree;
}
}
Index: gnu/regexp/RETokenNamedProperty.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenNamedProperty.java,v
retrieving revision 1.1
diff -u -r1.1 RETokenNamedProperty.java
--- gnu/regexp/RETokenNamedProperty.java 31 Jan 2006 14:39:08 -0000 1.1
+++ gnu/regexp/RETokenNamedProperty.java 31 Jan 2006 23:41:28 -0000
@@ -44,6 +44,44 @@
boolean negate;
Handler handler;
+ // Grouped properties
+ static final byte[] LETTER = new byte[] { Character.LOWERCASE_LETTER,
+ Character.UPPERCASE_LETTER,
+ Character.TITLECASE_LETTER,
+ Character.MODIFIER_LETTER,
+ Character.OTHER_LETTER };
+
+ static final byte[] MARK = new byte[] { Character.NON_SPACING_MARK,
+ Character.COMBINING_SPACING_MARK,
+ Character.ENCLOSING_MARK };
+
+ static final byte[] SEPARATOR = new byte[] { Character.SPACE_SEPARATOR,
+ Character.LINE_SEPARATOR,
+ Character.PARAGRAPH_SEPARATOR };
+
+ static final byte[] SYMBOL = new byte[] { Character.MATH_SYMBOL,
+ Character.CURRENCY_SYMBOL,
+ Character.MODIFIER_SYMBOL,
+ Character.OTHER_SYMBOL };
+
+ static final byte[] NUMBER = new byte[] { Character.DECIMAL_DIGIT_NUMBER,
+ Character.LETTER_NUMBER,
+ Character.OTHER_NUMBER };
+
+ static final byte[] PUNCTUATION = new byte[] { Character.DASH_PUNCTUATION,
+ Character.START_PUNCTUATION,
+ Character.END_PUNCTUATION,
+ Character.CONNECTOR_PUNCTUATION,
+ Character.OTHER_PUNCTUATION,
+ Character.INITIAL_QUOTE_PUNCTUATION,
+ Character.FINAL_QUOTE_PUNCTUATION};
+
+ static final byte[] OTHER = new byte[] { Character.CONTROL,
+ Character.FORMAT,
+ Character.PRIVATE_USE,
+ Character.SURROGATE,
+ Character.UNASSIGNED };
+
RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException {
super(subIndex);
this.name = name;
@@ -108,6 +146,21 @@
if (name.startsWith("Is")) {
name = name.substring(2);
}
+
+ // "grouped properties"
+ if (name.equals("L"))
+ return new UnicodeCategoriesHandler(LETTER);
+ if (name.equals("M"))
+ return new UnicodeCategoriesHandler(MARK);
+ if (name.equals("Z"))
+ return new UnicodeCategoriesHandler(SYMBOL);
+ if (name.equals("N"))
+ return new UnicodeCategoriesHandler(NUMBER);
+ if (name.equals("P"))
+ return new UnicodeCategoriesHandler(PUNCTUATION);
+ if (name.equals("C"))
+ return new UnicodeCategoriesHandler(OTHER);
+
if (name.equals("Mc"))
return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK);
if (name.equals("Pc"))
@@ -199,5 +252,18 @@
return Character.getType(c) == category;
}
}
-
+
+ private static class UnicodeCategoriesHandler extends Handler {
+ public UnicodeCategoriesHandler(byte[] categories) {
+ this.categories = categories;
+ }
+ private byte[] categories;
+ public boolean includes(char c) {
+ int category = Character.getType(c);
+ for (int i = 0; i < categories.length; i++)
+ if (category == categories[i])
+ return true;
+ return false;
+ }
+ }
}
signature.asc
Description: This is a digitally signed message part
