On Wed, 2006-02-01 at 01:33 +0100, Mark Wielaard wrote:
> Christian told me he needed a little extension to this for the
> "combined" properties "L", "M", "Z", "S", "N", "P" and "C" to get
> eclipse 3.2M4 working. They are explained here:
> http://www.regular-expressions.info/unicode.html
> 
> This is my quick and dirty implementation of it. What do you think?
> I'll test and write a real ChangeLog for it later.

I wrote some Mauve tests for this which showed some mistakes in my
earlier code. Here is a fixed version which passes all new Mauve tests:

2006-02-01  Mark Wielaard  <[EMAIL PROTECTED]>

    * gnu/regexp/RE.java (getRETokenNamedProperty): Chain exception.
    * gnu/regexp/RETokenNamedProperty.java (LETTER, MARK, SEPARATOR,
    SYMBOL, NUMBER, PUNCTUATION, OTHER): New final byte[] fields.
    (getHandler): Check for grouped properties L, M, Z, S, N, P or C.
    (UnicodeCategoriesHandler): New private static class.

Cheers,

Mark
Index: gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.14
diff -u -r1.14 RE.java
--- gnu/regexp/RE.java	31 Jan 2006 14:39:08 -0000	1.14
+++ gnu/regexp/RE.java	1 Feb 2006 22:21:45 -0000
@@ -1210,7 +1210,10 @@
 	return new RETokenNamedProperty(subIndex, np.name, insens, np.negate);
     }
     catch (REException e) {
-	throw new REException(e.getMessage(), REException.REG_ESCAPE, index);
+	REException ree;
+	ree = new REException(e.getMessage(), REException.REG_ESCAPE, index);
+	ree.initCause(e);
+	throw ree;
     }
   }
 
Index: gnu/regexp/RETokenNamedProperty.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenNamedProperty.java,v
retrieving revision 1.1
diff -u -r1.1 RETokenNamedProperty.java
--- gnu/regexp/RETokenNamedProperty.java	31 Jan 2006 14:39:08 -0000	1.1
+++ gnu/regexp/RETokenNamedProperty.java	1 Feb 2006 22:21:45 -0000
@@ -44,6 +44,51 @@
   boolean negate;
   Handler handler;
 
+  // Grouped properties
+  static final byte[] LETTER = new byte[]
+  { Character.LOWERCASE_LETTER,
+    Character.UPPERCASE_LETTER,
+    Character.TITLECASE_LETTER,
+    Character.MODIFIER_LETTER,
+    Character.OTHER_LETTER };
+  
+  static final byte[] MARK = new byte[]
+  { Character.NON_SPACING_MARK,
+    Character.COMBINING_SPACING_MARK,
+    Character.ENCLOSING_MARK };
+  
+  static final byte[] SEPARATOR = new byte[]
+  { Character.SPACE_SEPARATOR,
+    Character.LINE_SEPARATOR,
+    Character.PARAGRAPH_SEPARATOR };
+  
+  static final byte[] SYMBOL = new byte[]
+  { Character.MATH_SYMBOL,
+    Character.CURRENCY_SYMBOL,
+    Character.MODIFIER_SYMBOL,
+    Character.OTHER_SYMBOL };
+  
+  static final byte[] NUMBER = new byte[]
+  { Character.DECIMAL_DIGIT_NUMBER,
+    Character.LETTER_NUMBER,
+    Character.OTHER_NUMBER };
+  
+  static final byte[] PUNCTUATION = new byte[]
+  { Character.DASH_PUNCTUATION,
+    Character.START_PUNCTUATION,
+    Character.END_PUNCTUATION,
+    Character.CONNECTOR_PUNCTUATION,
+    Character.OTHER_PUNCTUATION,
+    Character.INITIAL_QUOTE_PUNCTUATION,
+    Character.FINAL_QUOTE_PUNCTUATION};
+  
+  static final byte[] OTHER = new byte[]
+  { Character.CONTROL,
+    Character.FORMAT,
+    Character.PRIVATE_USE,
+    Character.SURROGATE,
+    Character.UNASSIGNED };
+
   RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException {
     super(subIndex);
     this.name = name;
@@ -108,6 +153,23 @@
       if (name.startsWith("Is")) {
           name = name.substring(2);
       }
+
+      // "grouped properties"
+      if (name.equals("L"))
+	  return new UnicodeCategoriesHandler(LETTER);
+      if (name.equals("M"))
+	  return new UnicodeCategoriesHandler(MARK);
+      if (name.equals("Z"))
+	  return new UnicodeCategoriesHandler(SEPARATOR);
+      if (name.equals("S"))
+	  return new UnicodeCategoriesHandler(SYMBOL);
+      if (name.equals("N"))
+	  return new UnicodeCategoriesHandler(NUMBER);
+      if (name.equals("P"))
+	  return new UnicodeCategoriesHandler(PUNCTUATION);
+      if (name.equals("C"))
+	  return new UnicodeCategoriesHandler(OTHER);
+
       if (name.equals("Mc"))
           return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK);
       if (name.equals("Pc"))
@@ -199,5 +261,18 @@
           return Character.getType(c) == category;
       }
   }
- 
+
+  private static class UnicodeCategoriesHandler extends Handler {
+      public UnicodeCategoriesHandler(byte[] categories) {
+          this.categories = categories;
+      }
+      private byte[] categories;
+      public boolean includes(char c) {
+	  int category = Character.getType(c);
+          for (int i = 0; i < categories.length; i++)
+              if (category == categories[i])
+	          return true;
+	  return false;
+      }
+  }
 }

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to