Re: review request for 6798511/6860431: Include functionality of Surrogate in Character

Ulf Zibis Sun, 21 Mar 2010 08:46:43 -0700

Am 21.03.2010 08:56, schrieb Martin Buchholz:

On Sat, Mar 20, 2010 at 14:52, Ulf Zibis<ulf.zi...@gmx.de>  wrote:

I now believe we should provide
Character.highSurrogate and Character.lowSurrogate
as you have been advocating.


If Sherman agrees, let's put a proper patch for this together.

- I too would move the charCount logic from String(int[], int, int) toclass Character, at least as package private helper method. There justis another charCount method in good neighbourhood.- Additionally, may be a logic to handle invalid surrogate code pointswould be interesting.

I've attached the newest version of my patch, which you can compare withyour current state, ignoring some style differences etc.


-Ulf

# HG changeset patch
# Parent 481a23384ae41a4fac1343622b524b5d18a67257
adding Character.charCount(int[] codePoints, int offset, int count, boolean 
validateNonSurrogates),
and use it from String(int[],int,int).
depends on patch "String_checkBounds".

diff --git a/src/share/classes/java/lang/AbstractStringBuilder.java 
b/src/share/classes/java/lang/AbstractStringBuilder.java
--- a/src/share/classes/java/lang/AbstractStringBuilder.java
+++ b/src/share/classes/java/lang/AbstractStringBuilder.java
@@ -802,23 +802,17 @@
      * {...@code codePoint} isn't a valid Unicode code point
      */
     public AbstractStringBuilder appendCodePoint(int codePoint) {
-        if (!Character.isValidCodePoint(codePoint)) {
+        int count = this.count;
+        if (Character.isBMPCodePoint(codePoint)) {
+            ensureCapacity(count + 1);
+            value[count] = (char)codePoint;
+            this.count = count + 1;
+        } else if (Character.isValidCodePoint(codePoint)) {
+            ensureCapacity(count + 2);
+            Character.toSurrogates(codePoint, value, count);
+            this.count = count + 2;
+        } else
             throw new IllegalArgumentException();
-        }
-        int n = 1;
-        if (codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
-            n++;
-        }
-        int newCount = count + n;
-        if (newCount > value.length) {
-            expandCapacity(newCount);
-        }
-        if (n == 1) {
-            value[count++] = (char) codePoint;
-        } else {
-            Character.toSurrogates(codePoint, value, count);
-            count += n;
-        }
         return this;
     }
 
diff --git a/src/share/classes/java/lang/Character.java 
b/src/share/classes/java/lang/Character.java
--- a/src/share/classes/java/lang/Character.java
+++ b/src/share/classes/java/lang/Character.java
@@ -28,7 +28,7 @@
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Locale;
-//import static java.lang.AbstractStringBuilder.checkBoundsOnCount;
+import static java.lang.AbstractStringBuilder.checkBoundsOnCount;
 //import static java.lang.AbstractStringBuilder.checkBoundsOnEnd;
 //import static java.lang.AbstractStringBuilder.checkIndexBound;
 
@@ -627,8 +627,8 @@
         /**
          * Constructs a new <code>Subset</code> instance.
          *
+         * @param  name  The name of this subset
          * @exception NullPointerException if name is <code>null</code>
-         * @param  name  The name of this subset
          */
         protected Subset(String name) {
             if (name == null) {
@@ -2574,13 +2574,14 @@
     }
 
     private static class CharacterCache {
-        private CharacterCache(){}
-
-        static final Character cache[] = new Character[127 + 1];
+//        private CharacterCache(){} // superfluous, as class is private
+
+        static final char SIZE = 0x80;
+        static final Character cache[] = new Character[SIZE];
 
         static {
-            for(int i = 0; i < cache.length; i++)
-                cache[i] = new Character((char)i);
+            for(char c = SIZE; c > 0;) // backwards saves comparison against 
non-zero limit
+                cache[--c] = new Character(c); // char saves i2c byte code
         }
     }
 
@@ -2611,9 +2612,8 @@
      * @since  1.5
      */
     public static Character valueOf(char c) {
-        if(c <= 127) { // must cache
-            return CharacterCache.cache[(int)c];
-        }
+        if(c < CharacterCache.SIZE) // must cache
+            return CharacterCache.cache[c];
         return new Character(c);
     }
 
@@ -2693,13 +2693,45 @@
      * @since  1.5
      */
     public static boolean isValidCodePoint(int codePoint) {
-        return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
+        return (codePoint >>> 16) < (MAX_CODE_POINT + 1 >>> 16);
+        // Optimized form of:
+        // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
+    }
+
+    /**
+     * Tells whether the specified character (Unicode code point) is in the
+     * Basic Multilingual Plane, and can be represented using a single {code@ 
char}.
+     * The method call is equivalent to the expression:
+     * <blockquote><pre>
+     * isValidCodePoint(codePoint) && !isSupplementaryCodePoint(codePoint)
+     * </pre></blockquote>
+     *
+     * <p><b>Note:</b> Does not check, if code point is outside illegal 
surrogate
+     * range, i.e. U+D800 ... U+DFFF, and, in case, falsely returns {...@code 
true}.
+     * To avoid this, check this by {...@link #isSurrogate(char)} method 
before.
+     *
+     * @param  codePoint the Unicode code point to be tested
+     * @return {...@code true} if the specified code point value is between
+     *         {...@link #MIN_VALUE} and {...@link #MAX_VALUE} inclusive;
+     *         {...@code false} otherwise.
+     * @since  1.7
+     */
+    public static boolean isBMPCodePoint(int codePoint) {
+        return (codePoint >>> 16) == 0;
+        // Optimized form of:
+        // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
+        // Use logical shift (>>>) to permit additional JIT compiler
+        // optimization if followed by method isValidCodePoint()
     }
 
     /**
      * Determines whether the specified character (Unicode code point)
      * is in the <a href="#supplementary">supplementary character</a> range.
      *
+     * <p><b>Note:</b> In combination with {...@link #isBMPCodePoint(int)} this
+     * method should be in 2nd place to permit additional JIT compiler
+     * optimization.
+     *
      * @param  codePoint the character (Unicode code point) to be tested
      * @return {...@code true} if the specified code point is between
      *         {...@link #MIN_SUPPLEMENTARY_CODE_POINT} and
@@ -2708,8 +2740,10 @@
      * @since  1.5
      */
     public static boolean isSupplementaryCodePoint(int codePoint) {
-        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
-            && codePoint <= MAX_CODE_POINT;
+        return !isBMPCodePoint(codePoint) && isValidCodePoint(codePoint);
+    }
+    public static boolean isSuppCPalaMartin(int codePoint) {
+        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT && codePoint < 
MAX_CODE_POINT + 1;
     }
 
     /**
@@ -2733,7 +2767,7 @@
      * @since  1.5
      */
     public static boolean isHighSurrogate(char ch) {
-        return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
+        return (ch -= MIN_HIGH_SURROGATE) >= 0 && ch < MAX_HIGH_SURROGATE + 1 
- MIN_HIGH_SURROGATE;
     }
 
     /**
@@ -2756,7 +2790,7 @@
      * @since  1.5
      */
     public static boolean isLowSurrogate(char ch) {
-        return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
+        return (ch -= MIN_LOW_SURROGATE) >= 0 && ch < MAX_LOW_SURROGATE + 1 - 
MIN_LOW_SURROGATE;
     }
 
     /**
@@ -2780,7 +2814,7 @@
      * @since  1.7
      */
     public static boolean isSurrogate(char ch) {
-        return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
+        return (ch -= MIN_SURROGATE) >= 0 && ch < MAX_SURROGATE + 1 - 
MIN_SURROGATE;
     }
 
     /**
@@ -2811,13 +2845,13 @@
      * specified character is equal to or greater than 0x10000, then
      * the method returns 2. Otherwise, the method returns 1.
      *
-     * <p>This method doesn't validate the specified character to be a
-     * valid Unicode code point. The caller must validate the
-     * character value using {...@link #isValidCodePoint(int) isValidCodePoint}
-     * if necessary.
+     * <p><b>Note:</b> This method doesn't validate the specified character
+     * to be a valid Unicode code point. The caller must validate the
+     * character value using {...@link #isValidCodePoint(int)} if necessary.
      *
-     * @param   codePoint the character (Unicode code point) to be tested.
-     * @return  2 if the character is a valid supplementary character; 1 
otherwise.
+     * @param   codePoint The character (Unicode code point) to be tested.
+     * @return  2 if the character is a valid supplementary character, 1 
otherwise.
+     * @see     #isBMPCodePoint(int)
      * @see     #isSupplementaryCodePoint(int)
      * @since   1.5
      */
@@ -2826,10 +2860,119 @@
     }
 
     /**
+     * Determines the number of {...@code char} values needed to represent
+     * the specified character (Unicode code point). If the specified
+     * character is equal to or greater than U+10000, then this method
+     * returns 2. Otherwise, this method returns 1.
+     *
+     * @param  codePoint The character (Unicode code point) to be tested.
+     * @param  validateNonSurrogates Determines if the character should be
+     *         validated using {...@link #isSurrogate(char)}.
+     * @return 2 if the character is a supplementary character, 1 otherwise, or
+     *         0 if {...@code codePoint} is outside the valid Unicode range 
and,
+     *         if {...@code validateNonSurrogates} is {...@code true}, within 
the
+     *         reserved surrogate range.
+     * @see    #isValidCodePoint(int)
+     * @see    #isBMPCodePoint(int)
+     * @see    #isSupplementaryCodePoint(int)
+     * @see    #isSurrogate(char)
+     * @since  1.7
+     */
+    public static int charCount(int codePoint, boolean validateNonSurrogates) {
+        if (isBMPCodePoint(codePoint)) {
+            if (!validateNonSurrogates || !isSurrogate((char)codePoint))
+                return 1;
+        } else if (isValidCodePoint(codePoint))
+            return 2;
+        return 0;
+    }
+
+    /**
+     * Determines the number of {...@code char} values needed to
+     * represent the specified characters (Unicode code point).
+     *
+     * <p><b>Note:</b> This method doesn't validate the specified characters 
to be
+     * valid Unicode code points if {...@code validateNonSurrogates} is 
{...@code false}.
+     *
+     * @param  codePoints The characters (Unicode code points) to be tested.
+     * @param  offset The initial offset.
+     * @param  count The length.
+     * @param  validateNonSurrogates Determines if the characters should be
+     *         validated using {...@link #isSurrogate(char)}.
+     * @return The number of {...@code char} values needed to represent
+     *         the specified characters.
+     * @throws IllegalArgumentException
+     *         If {...@code codePoint} is outside the valid Unicode range and, 
if
+     *         {...@code validateNonSurrogates} is {...@code true}, within the
+     *         reserved surrogate range.
+     * @throws StringIndexOutOfBoundsException
+     *         If the {...@code offset} and {...@code count} arguments index
+     *         characters outside the bounds of the {...@code codePoints} 
array.
+     * @see    #isValidCodePoint(int)
+     * @see    #isBMPCodePoint(int)
+     * @see    #isSupplementaryCodePoint(int)
+     * @see    #isSurrogate(char)
+     * @since  1.7
+     */
+    public static int charCount(int[] codePoints, int offset, int count, 
boolean validateNonSurrogates) {
+        checkBoundsOnCount(codePoints.length, offset, count);
+        int n = 0;
+        for (int i = offset; i < offset + count; i++) {
+            int m = charCount(codePoints[i], validateNonSurrogates);
+            if (m == 0)
+                throw new IllegalArgumentException("At position "+i);
+            n += m;
+        }
+        return n;
+    }
+    public static int charCount(int[] codePoints, int offset, int count) {
+        checkBoundsOnCount(codePoints.length, offset, count);
+        int n = 0;
+        for (int i = offset; i < offset + count; i++) {
+            int c = codePoints[i];
+            if (Character.isBMPCodePoint(c))
+                n += 1;
+            else if (Character.isValidCodePoint(c))
+                n += 2;
+            else
+                throw new IllegalArgumentException("At position "+i);
+        }
+        return n;
+    }
+    public static int charCount2(int[] codePoints, int offset, int count) {
+        checkBoundsOnCount(codePoints.length, offset, count);
+        int n = 0;
+        for (int i = offset; i < offset + count; i++) {
+            int c = codePoints[i];
+            if (Character.isBMPCodePoint(c))
+                n += 1;
+            else if (Character.isSupplementaryCodePoint(c))
+                n += 2;
+            else
+                throw new IllegalArgumentException("At position "+i);
+        }
+        return n;
+    }
+    public static int charCountAlaMartin(int[] codePoints, int offset, int 
count) {
+        checkBoundsOnCount(codePoints.length, offset, count);
+        int n = 0;
+        for (int i = offset; i < offset + count; i++) {
+            int c = codePoints[i];
+            if (Character.isBMPCodePoint(c))
+                n += 1;
+            else if (Character.isSuppCPalaMartin(c))
+                n += 2;
+            else
+                throw new IllegalArgumentException("At position "+i);
+        }
+        return n;
+    }
+
+    /**
      * Converts the specified surrogate pair to its supplementary code
      * point value. This method does not validate the specified
      * surrogate pair. The caller must validate it using {...@link
-     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
+     * #isSurrogatePair(char, char)} if necessary.
      *
      * @param  high the high-surrogate code unit
      * @param  low the low-surrogate code unit
@@ -3076,7 +3219,7 @@
      * @exception IllegalArgumentException if the specified
      * <code>codePoint</code> is not a valid Unicode code point.
      * @exception NullPointerException if the specified <code>dst</code> is 
null.
-     * @exception IndexOutOfBoundsException if <code>dstIndex</code>
+     * @exception ArrayIndexOutOfBoundsException if <code>dstIndex</code>
      * is negative or not less than <code>dst.length</code>, or if
      * <code>dst</code> at <code>dstIndex</code> doesn't have enough
      * array element(s) to store the resulting <code>char</code>
@@ -3088,15 +3231,13 @@
      * @since  1.5
      */
     public static int toChars(int codePoint, char[] dst, int dstIndex) {
-        if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
+        if (isBMPCodePoint(codePoint)) {
+            dst[dstIndex] = (char)codePoint;
+            return 1;
+        } else if (isValidCodePoint(codePoint))
+            return toSurrogates(codePoint, dst, dstIndex);
+        else
             throw new IllegalArgumentException();
-        }
-        if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
-            dst[dstIndex] = (char) codePoint;
-            return 1;
-        }
-        toSurrogates(codePoint, dst, dstIndex);
-        return 2;
     }
 
     /**
@@ -3116,22 +3257,22 @@
      * @since  1.5
      */
     public static char[] toChars(int codePoint) {
-        if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
+        if (isBMPCodePoint(codePoint))
+            return new char[] { (char)codePoint };
+        else if (isValidCodePoint(codePoint)) {
+            char[] result = new char[2];
+            toSurrogates(codePoint, result, 0);
+            return result;
+        } else
             throw new IllegalArgumentException();
-        }
-        if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
-                return new char[] { (char) codePoint };
-        }
-        char[] result = new char[2];
-        toSurrogates(codePoint, result, 0);
-        return result;
     }
 
-    static void toSurrogates(int codePoint, char[] dst, int index) {
+    static int toSurrogates(int codePoint, char[] dst, int index) {
         // We write elements "backwards" to guarantee all-or-nothing
         dst[index+1] = (char)((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
         dst[index] = (char)((codePoint >>> 10)
             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
+        return 2;
     }
 
     /**
@@ -4716,8 +4857,11 @@
      * @since   1.5
      */
     public static boolean isISOControl(int codePoint) {
-        return (codePoint >= 0x0000 && codePoint <= 0x001F) ||
-            (codePoint >= 0x007F && codePoint <= 0x009F);
+        return codePoint <= 0x9F &&
+                (codePoint >= 0x7F || (codePoint >>> 5 == 0));
+        // Optimized form of:
+        // codePoint >= 0x00 && codePoint <= 0x1F ||
+        //         codePoint >= 0x7F && codePoint <= 0x9F
     }
 
     /**
@@ -5035,9 +5179,8 @@
      * @since 1.4
      */
     static char[] toUpperCaseCharArray(int codePoint) {
-        // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
-        assert isValidCodePoint(codePoint) &&
-               !isSupplementaryCodePoint(codePoint);
+        // As of Unicode 5.1.0, 1:M uppercasings only happen in the BMP.
+        assert isBMPCodePoint(codePoint);
         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
     }
 
diff --git a/src/share/classes/java/lang/String.java 
b/src/share/classes/java/lang/String.java
--- a/src/share/classes/java/lang/String.java
+++ b/src/share/classes/java/lang/String.java
@@ -147,9 +147,9 @@
      * unnecessary since Strings are immutable.
      */
     public String() {
+        this.value = new char[0];
         this.offset = 0;
         this.count = 0;
-        this.value = new char[0];
     }
 
     /**
@@ -163,23 +163,23 @@
      *         A {...@code String}
      */
     public String(String original) {
-        int size = original.count;
         char[] originalValue = original.value;
+        int originalLength = original.count;
         char[] v;
-        if (originalValue.length > size) {
+        if (originalValue.length > originalLength) {
             // The array representing the String is bigger than the new
             // String itself.  Perhaps this constructor is being called
             // in order to trim the baggage, so make a copy of the array.
             int off = original.offset;
-            v = Arrays.copyOfRange(originalValue, off, off+size);
+            v = Arrays.copyOfRange(originalValue, off, off+originalLength);
         } else {
             // The array representing the String is the same
             // size as the String, so no point in making a copy.
             v = originalValue;
         }
+        this.value = v;
         this.offset = 0;
-        this.count = size;
-        this.value = v;
+        this.count = originalLength;
     }
 
     /**
@@ -193,9 +193,9 @@
      */
     public String(char value[]) {
         int size = value.length;
+        this.value = Arrays.copyOf(value, size);
         this.offset = 0;
         this.count = size;
-        this.value = Arrays.copyOf(value, size);
     }
 
     /**
@@ -221,9 +221,44 @@
      */
     public String(char value[], int offset, int count) {
         checkBoundsOnCount(value.length, offset, count);
+        this.value = Arrays.copyOfRange(value, offset, offset+count);
         this.offset = 0;
         this.count = count;
-        this.value = Arrays.copyOfRange(value, offset, offset+count);
+    }
+
+    /**
+     * Allocates a new {...@code String} that contains characters from a 
subarray
+     * of the <a href="Character.html#unicode">Unicode code point</a> array
+     * argument.  The {...@code offset} argument is the index of the first code
+     * point of the subarray and the {...@code count} argument specifies the
+     * length of the subarray.  The contents of the subarray are converted to
+     * {...@code char}s; subsequent modification of the {...@code int} array 
does not
+     * affect the newly created string.
+     *
+     * <p><b>Note:</b> Does not validate if the specified characters are within
+     * the reserved surrogate range. See {...@link 
Character#isSurrogate(char)}.
+     *
+     * @param  codePoints
+     *         Array that is the source of Unicode code points.
+     *
+     * @param  offset
+     *         The initial offset.
+     *
+     * @param  count
+     *         The length.
+     *
+     * @throws IllegalArgumentException
+     *         If any character outside the valid Unicode range is found in
+     *         {...@code codePoints} subarray.
+     *
+     * @throws IndexOutOfBoundsException
+     *         If the {...@code offset} and {...@code count} arguments index
+     *         characters outside the bounds of the {...@code codePoints} 
array.
+     *
+     * @since  1.5
+     */
+    public String(int[] codePoints, int offset, int count) {
+        this(codePoints, offset, count, false);
     }
 
     /**
@@ -236,55 +271,115 @@
      * affect the newly created string.
      *
      * @param  codePoints
-     *         Array that is the source of Unicode code points
+     *         Array that is the source of Unicode code points.
      *
      * @param  offset
-     *         The initial offset
+     *         The initial offset.
      *
      * @param  count
-     *         The length
+     *         The length.
      *
-     * @throws  IllegalArgumentException
-     *          If any invalid Unicode code point is found in {...@code
-     *          codePoints}
+     * @param  validateNonSurrogates Determines if the characters should be
+     *         validated using {...@link Character#isSurrogate(char)}.
      *
-     * @throws  IndexOutOfBoundsException
-     *          If the {...@code offset} and {...@code count} arguments index
-     *          characters outside the bounds of the {...@code codePoints} 
array
+     * @throws IllegalArgumentException
+     *         If any character is outside the valid Unicode range and, if
+     *         {...@code validateNonSurrogates} is {...@code true}, within the
+     *         reserved surrogate range is found in {...@code codePoints} 
subarray.
      *
-     * @since  1.5
+     * @throws IndexOutOfBoundsException
+     *         If the {...@code offset} and {...@code count} arguments index
+     *         characters outside the bounds of the {...@code codePoints} 
array.
+     *
+     * @see    Character#isValidCodePoint(int)
+     * @see    Character#isSurrogate(char)
+     * @since  1.7
      */
-    public String(int[] codePoints, int offset, int count) {
-        checkBoundsOnCount(codePoints.length, offset, count);
+    public String(int[] codePoints, int offset, int count, boolean 
validateNonSurrogates) {
+        // Pass 1: Compute precise size for the char[]
+        int n = Character.charCount(codePoints, offset, count, 
validateNonSurrogates);
+
+        // Pass 2: Allocate and fill in char[]
+        char[] v = new char[n];
+        // fill backwards for VM performance reasons, reduces register 
pressure, faster compare against 0
+        for (int i = offset + count; n > 0; ) {
+            int c = codePoints[--i];
+            if (Character.isBMPCodePoint(c))
+                v[--n] = (char)c;
+            else
+                Character.toSurrogates(c, v, n-=2);
+        }
+
+        this.value  = v;
+        this.offset = 0;
+        this.count  = v.length;
+    }
+    public String(int[] codePoints, int offset, int count,
+            boolean inlinedExceptions, boolean fastCharCount) {
+        if (inlinedExceptions) {
+            if (offset < 0)
+                throw new StringIndexOutOfBoundsException(offset);
+            if (count < 0)
+                throw new StringIndexOutOfBoundsException(count);
+            // Note: offset or count might be near -1>>>1.
+            if (offset > codePoints.length - count)
+                throw new StringIndexOutOfBoundsException(offset + count);
+        } else
+            checkBoundsOnCount(codePoints.length, offset, count);
 
         // Pass 1: Compute precise size of char[]
         int n = 0;
-        for (int i = offset; i < offset + count; i++) {
-            int c = codePoints[i];
-            if (c >= Character.MIN_CODE_POINT &&
-                c <  Character.MIN_SUPPLEMENTARY_CODE_POINT)
-                n += 1;
-            else if (Character.isSupplementaryCodePoint(c))
-                n += 2;
-            else
-                throw new IllegalArgumentException(Integer.toString(c));
-        }
+        if (fastCharCount)
+            for (int i = offset; i < offset + count; i++) {
+                int c = codePoints[i];
+                char plane = (char)(c >>> 16);
+                if (plane == 0)
+                    n += 1;
+//                else if (plane < (Character.MAX_SUPPLEMENTARY_CODE_POINT >>> 
16))
+                else if (plane < (Character.MAX_CODE_POINT >>> 16))
+                    n += 2;
+                else throw new IllegalArgumentException(Integer.toString(c));
+            }
+        else
+            for (int i = offset; i < offset + count; i++) {
+                int c = codePoints[i];
+                if (Character.isBMPCodePoint(c))
+//                if (c >= Character.MIN_CODE_POINT &&
+//                    c <  Character.MIN_SUPPLEMENTARY_CODE_POINT)
+                    n += 1;
+//                else if (Character.isSupplementaryCodePoint(c))
+                else if (Character.isSuppCPalaMartin(c))
+                    n += 2;
+                else
+                    throw new IllegalArgumentException(Integer.toString(c));
+            }
 
         // Pass 2: Allocate and fill in char[]
         char[] v = new char[n];
-        for (int i = offset, j = 0; i < offset + count; i++) {
-            int c = codePoints[i];
-            if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
-                v[j++] = (char) c;
-            } else {
-                Character.toSurrogates(c, v, j);
-                j += 2;
+        if (fastCharCount)
+            for (int i = offset + count; n > 0; ) {
+                int c = codePoints[--i];
+//                if (Character.isBMPCodePoint(c))
+                if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT)
+                    v[--n] = (char)c;
+                else
+                    Character.toSurrogates(c, v, n -= 2);
             }
-        }
+        else
+            for (int i = offset, j = 0; i < offset + count; i++) {
+                int c = codePoints[i];
+//                if (Character.isBMPCodePoint(c)) {
+                if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+                    v[j++] = (char) c;
+                } else {
+                    Character.toSurrogates(c, v, j);
+                    j += 2;
+                }
+            }
 
         this.value  = v;
+        this.offset = 0;
         this.count  = v.length;
-        this.offset = 0;
     }
 
     /**
@@ -341,9 +436,9 @@
                 value[i] = (char) (hibyte | (ascii[i + offset] & 0xff));
             }
         }
+        this.value = value;
         this.offset = 0;
         this.count = count;
-        this.value = value;
     }
 
     /**
@@ -420,9 +515,9 @@
             throw new NullPointerException("charsetName");
         checkBoundsOnCount(bytes.length, offset, length);
         char[] v = StringCoding.decode(charsetName, bytes, offset, length);
+        this.value = v;
         this.offset = 0;
         this.count = v.length;
-        this.value = v;
     }
 
     /**
@@ -460,9 +555,9 @@
             throw new NullPointerException("charset");
         checkBoundsOnCount(bytes.length, offset, length);
         char[] v = StringCoding.decode(charset, bytes, offset, length);
+        this.value = v;
         this.offset = 0;
         this.count = v.length;
-        this.value = v;
     }
 
     /**
@@ -546,9 +641,9 @@
     public String(byte bytes[], int offset, int length) {
         checkBoundsOnCount(bytes.length, offset, length);
         char[] v  = StringCoding.decode(bytes, offset, length);
+        this.value = v;
         this.offset = 0;
         this.count = v.length;
-        this.value = v;
     }
 
     /**
@@ -583,8 +678,8 @@
     public String(StringBuffer buffer) {
         String result = buffer.toString();
         this.value = result.value;
+        this.offset = result.offset;
         this.count = result.count;
-        this.offset = result.offset;
     }
 
     /**
@@ -605,8 +700,8 @@
     public String(StringBuilder builder) {
         String result = builder.toString();
         this.value = result.value;
+        this.offset = result.offset;
         this.count = result.count;
-        this.offset = result.offset;
     }
 
 
@@ -981,10 +1076,10 @@
             if (n == anotherString.count) {
                 char v1[] = value;
                 char v2[] = anotherString.value;
-                int i = offset;
-                int j = anotherString.offset;
+                int o1 = offset;
+                int o2 = anotherString.offset;
                 while (n-- != 0) {
-                    if (v1[i++] != v2[j++])
+                    if (v1[o1++] != v2[o2++])
                         return false;
                 }
                 return true;

Re: review request for 6798511/6860431: Include functionality of Surrogate in Character

Reply via email to