Hi Ito,

On Mon, 2006-01-30 at 22:19 +0900, Ito Kazumitsu wrote:
> ChangeLog
> 2006-01-30  Ito Kazumitsu  <[EMAIL PROTECTED]>
> 
>       Fixes bug #26002
>       * gnu/regexp/gnu/regexp/RE.java(initialize): Parse /\p{prop}/.
>       (NamedProperty): New inner class.
>       (getNamedProperty): New method.
>       (getRETokenNamedProperty): New Method.
>       * gnu/regexp/RESyntax.java(RE_NAMED_PROPERTY): New syntax falg.
>       * gnu/regexp/RETokenNamedProperty.java: New file.

Christian told me he needed a little extension to this for the
"combined" properties "L", "M", "Z", "S", "N", "P" and "C" to get
eclipse 3.2M4 working. They are explained here:
http://www.regular-expressions.info/unicode.html

This is my quick and dirty implementation of it. What do you think?
I'll test and write a real ChangeLog for it later.

Cheers,

Mark
Index: gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.14
diff -u -r1.14 RE.java
--- gnu/regexp/RE.java	31 Jan 2006 14:39:08 -0000	1.14
+++ gnu/regexp/RE.java	31 Jan 2006 23:41:28 -0000
@@ -1210,7 +1210,10 @@
 	return new RETokenNamedProperty(subIndex, np.name, insens, np.negate);
     }
     catch (REException e) {
-	throw new REException(e.getMessage(), REException.REG_ESCAPE, index);
+	REException ree;
+	ree = new REException(e.getMessage(), REException.REG_ESCAPE, index);
+	ree.initCause(e);
+	throw ree;
     }
   }
 
Index: gnu/regexp/RETokenNamedProperty.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenNamedProperty.java,v
retrieving revision 1.1
diff -u -r1.1 RETokenNamedProperty.java
--- gnu/regexp/RETokenNamedProperty.java	31 Jan 2006 14:39:08 -0000	1.1
+++ gnu/regexp/RETokenNamedProperty.java	31 Jan 2006 23:41:28 -0000
@@ -44,6 +44,44 @@
   boolean negate;
   Handler handler;
 
+  // Grouped properties
+  static final byte[] LETTER = new byte[] { Character.LOWERCASE_LETTER,
+                                          Character.UPPERCASE_LETTER,
+                                          Character.TITLECASE_LETTER,
+                                          Character.MODIFIER_LETTER,
+                                          Character.OTHER_LETTER };
+
+  static final byte[] MARK = new byte[] { Character.NON_SPACING_MARK,
+                                          Character.COMBINING_SPACING_MARK,
+                                          Character.ENCLOSING_MARK };
+
+  static final byte[] SEPARATOR = new byte[] { Character.SPACE_SEPARATOR,
+	                                  Character.LINE_SEPARATOR,
+					  Character.PARAGRAPH_SEPARATOR };
+
+  static final byte[] SYMBOL = new byte[] { Character.MATH_SYMBOL,
+	                                  Character.CURRENCY_SYMBOL,
+                                          Character.MODIFIER_SYMBOL,
+                                          Character.OTHER_SYMBOL };
+
+  static final byte[] NUMBER = new byte[] { Character.DECIMAL_DIGIT_NUMBER,
+	                                  Character.LETTER_NUMBER,
+                                          Character.OTHER_NUMBER };
+
+  static final byte[] PUNCTUATION = new byte[] { Character.DASH_PUNCTUATION,
+	                                  Character.START_PUNCTUATION,
+                                          Character.END_PUNCTUATION,
+                                          Character.CONNECTOR_PUNCTUATION,
+                                          Character.OTHER_PUNCTUATION,
+                                          Character.INITIAL_QUOTE_PUNCTUATION,
+                                          Character.FINAL_QUOTE_PUNCTUATION};
+
+  static final byte[] OTHER = new byte[] { Character.CONTROL,
+	                                  Character.FORMAT,
+                                          Character.PRIVATE_USE,
+                                          Character.SURROGATE,
+                                          Character.UNASSIGNED };
+
   RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException {
     super(subIndex);
     this.name = name;
@@ -108,6 +146,21 @@
       if (name.startsWith("Is")) {
           name = name.substring(2);
       }
+
+      // "grouped properties"
+      if (name.equals("L"))
+	  return new UnicodeCategoriesHandler(LETTER);
+      if (name.equals("M"))
+	  return new UnicodeCategoriesHandler(MARK);
+      if (name.equals("Z"))
+	  return new UnicodeCategoriesHandler(SYMBOL);
+      if (name.equals("N"))
+	  return new UnicodeCategoriesHandler(NUMBER);
+      if (name.equals("P"))
+	  return new UnicodeCategoriesHandler(PUNCTUATION);
+      if (name.equals("C"))
+	  return new UnicodeCategoriesHandler(OTHER);
+
       if (name.equals("Mc"))
           return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK);
       if (name.equals("Pc"))
@@ -199,5 +252,18 @@
           return Character.getType(c) == category;
       }
   }
- 
+
+  private static class UnicodeCategoriesHandler extends Handler {
+      public UnicodeCategoriesHandler(byte[] categories) {
+          this.categories = categories;
+      }
+      private byte[] categories;
+      public boolean includes(char c) {
+	  int category = Character.getType(c);
+          for (int i = 0; i < categories.length; i++)
+              if (category == categories[i])
+	          return true;
+	  return false;
+      }
+  }
 }

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to