[cp-patches] Streamline String.toUpper/LowerCase

Ian Rogers Sat, 20 Oct 2007 06:18:31 -0700

Hi,

the attached patch separates out the special case of making a stringupper and lower case for the Turkish Locale. As Locale's language shouldbe interned, the "tr".equals test is replaced with "tr" ==.


Thanks,
Ian

Index: java/lang/String.java
===================================================================
RCS file: /sources/classpath/classpath/java/lang/String.java,v
retrieving revision 1.86
diff -u -r1.86 String.java
--- java/lang/String.java       2 Jan 2007 00:51:01 -0000       1.86
+++ java/lang/String.java       20 Oct 2007 11:44:55 -0000
@@ -1431,27 +1431,18 @@
   }
 
   /**
-   * Lowercases this String according to a particular locale. This uses
-   * Unicode's special case mappings, as applied to the given Locale, so the
-   * resulting string may be a different length.
-   *
-   * @param loc locale to use
-   * @return new lowercased String, or this if no characters were lowercased
-   * @throws NullPointerException if loc is null
-   * @see #toUpperCase(Locale)
-   * @since 1.1
+   * Convert string to lower case for a Turkish locale that requires special
+   * handling of '\u0049'
    */
-  public String toLowerCase(Locale loc)
+  private String toLowerCaseTurkish()
   {
     // First, see if the current string is already lower case.
-    boolean turkish = "tr".equals(loc.getLanguage());
     int i = count;
     int x = offset - 1;
     while (--i >= 0)
       {
         char ch = value[++x];
-        if ((turkish && ch == '\u0049')
-            || ch != Character.toLowerCase(ch))
+        if ((ch == '\u0049') || ch != Character.toLowerCase(ch))
           break;
       }
     if (i < 0)
@@ -1464,8 +1455,14 @@
       {
         char ch = value[x];
         // Hardcoded special case.
-        newStr[x++] = (turkish && ch == '\u0049') ? '\u0131'
-          : Character.toLowerCase(ch);
+        if (ch != '\u0049')
+          {
+            newStr[x++] = Character.toLowerCase(ch);
+          }
+        else
+          {
+            newStr[x++] = '\u0131';
+          }
       }
     while (--i >= 0);
     // Package constructor avoids an array copy.
@@ -1473,6 +1470,54 @@
   }
 
   /**
+   * Lowercases this String according to a particular locale. This uses
+   * Unicode's special case mappings, as applied to the given Locale, so the
+   * resulting string may be a different length.
+   *
+   * @param loc locale to use
+   * @return new lowercased String, or this if no characters were lowercased
+   * @throws NullPointerException if loc is null
+   * @see #toUpperCase(Locale)
+   * @since 1.1
+   */
+  public String toLowerCase(Locale loc)
+  {
+    // First, see if the current string is already lower case.
+
+    // Is loc turkish? String equality test is ok as Locale.language is 
interned
+    if ("tr" == loc.getLanguage())
+      {
+        return toLowerCaseTurkish();
+      }
+    else
+      {
+        int i = count;
+        int x = offset - 1;
+        while (--i >= 0)
+          {
+            char ch = value[++x];
+            if (ch != Character.toLowerCase(ch))
+              break;
+          }
+        if (i < 0)
+          return this;
+
+        // Now we perform the conversion. Fortunately, there are no
+        // multi-character lowercase expansions in Unicode 3.0.0.
+        char[] newStr = (char[]) value.clone();
+        do
+          {
+            char ch = value[x];
+            // Hardcoded special case.
+            newStr[x++] = Character.toLowerCase(ch);
+          }
+        while (--i >= 0);
+        // Package constructor avoids an array copy.
+        return new String(newStr, offset, count, true);
+     }
+  }
+
+  /**
    * Lowercases this String. This uses Unicode's special case mappings, as
    * applied to the platform's default Locale, so the resulting string may
    * be a different length.
@@ -1487,21 +1532,12 @@
   }
 
   /**
-   * Uppercases this String according to a particular locale. This uses
-   * Unicode's special case mappings, as applied to the given Locale, so the
-   * resulting string may be a different length.
-   *
-   * @param loc locale to use
-   * @return new uppercased String, or this if no characters were uppercased
-   * @throws NullPointerException if loc is null
-   * @see #toLowerCase(Locale)
-   * @since 1.1
+   * Uppercase this string for a Turkish locale
    */
-  public String toUpperCase(Locale loc)
+  private String toUpperCaseTurkish()
   {
     // First, see how many characters we have to grow by, as well as if the
     // current string is already upper case.
-    boolean turkish = "tr".equals(loc.getLanguage());
     int expand = 0;
     boolean unchanged = true;
     int i = count;
@@ -1511,7 +1547,7 @@
         char ch = value[--x];
         expand += upperCaseExpansion(ch);
         unchanged = (unchanged && expand == 0
-                     && ! (turkish && ch == '\u0069')
+                     && ch != '\u0069'
                      && ch == Character.toUpperCase(ch));
       }
     if (unchanged)
@@ -1526,8 +1562,14 @@
           {
             char ch = value[x];
             // Hardcoded special case.
-            newStr[x++] = (turkish && ch == '\u0069') ? '\u0130'
-              : Character.toUpperCase(ch);
+            if (ch != '\u0069')
+              {
+                newStr[x++] = Character.toUpperCase(ch);
+              }
+            else
+              {
+                newStr[x++] = '\u0130';
+              }
           }
         // Package constructor avoids an array copy.
         return new String(newStr, offset, count, true);
@@ -1540,7 +1582,7 @@
       {
         char ch = value[x++];
         // Hardcoded special case.
-        if (turkish && ch == '\u0069')
+        if (ch == '\u0069')
           {
             newStr[j++] = '\u0130';
             continue;
@@ -1560,6 +1602,77 @@
   }
 
   /**
+   * Uppercases this String according to a particular locale. This uses
+   * Unicode's special case mappings, as applied to the given Locale, so the
+   * resulting string may be a different length.
+   *
+   * @param loc locale to use
+   * @return new uppercased String, or this if no characters were uppercased
+   * @throws NullPointerException if loc is null
+   * @see #toLowerCase(Locale)
+   * @since 1.1
+   */
+  public String toUpperCase(Locale loc)
+  {
+    // First, see how many characters we have to grow by, as well as if the
+    // current string is already upper case.
+
+    // Is loc turkish? String equality test is ok as Locale.language is 
interned
+    if ("tr" == loc.getLanguage())
+      {
+        return toUpperCaseTurkish();
+      }
+    else
+      {
+        int expand = 0;
+        boolean unchanged = true;
+        int i = count;
+        int x = i + offset;
+        while (--i >= 0)
+          {
+            char ch = value[--x];
+            expand += upperCaseExpansion(ch);
+            unchanged = (unchanged && expand == 0
+                         && ch == Character.toUpperCase(ch));
+          }
+        if (unchanged)
+          return this;
+
+        // Now we perform the conversion.
+        i = count;
+        if (expand == 0)
+          {
+            char[] newStr = (char[]) value.clone();
+            while (--i >= 0)
+              {
+                char ch = value[x];
+                newStr[x++] = Character.toUpperCase(ch);
+              }
+            // Package constructor avoids an array copy.
+            return new String(newStr, offset, count, true);
+          }
+
+        // Expansion is necessary.
+        char[] newStr = new char[count + expand];
+        int j = 0;
+        while (--i >= 0)
+          {
+            char ch = value[x++];
+            expand = upperCaseExpansion(ch);
+            if (expand > 0)
+              {
+                int index = upperCaseIndex(ch);
+                while (expand-- >= 0)
+                  newStr[j++] = upperExpand[index++];
+              }
+            else
+              newStr[j++] = Character.toUpperCase(ch);
+          }
+        // Package constructor avoids an array copy.
+        return new String(newStr, 0, newStr.length, true);
+      }
+  }
+  /**
    * Uppercases this String. This uses Unicode's special case mappings, as
    * applied to the platform's default Locale, so the resulting string may
    * be a different length.

[cp-patches] Streamline String.toUpper/LowerCase

Reply via email to