Author: bayard Date: Mon Jun 26 17:28:43 2006 New Revision: 417319 URL: http://svn.apache.org/viewvc?rev=417319&view=rev Log: Adding a test and a fix for LANG-100. This is a bug in which the randomly created String can sometimes be illegal unicode; because the code does not consider when relationships exist between characters. High and low surrogates are now dealt with, but I'm skipping private high surrogates because I can't find out what to do. Need to go plod very slowly through the spec. This site was very useful: http://www.alanwood.net/unicode/private_use_high_surrogates.html
Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java Modified: jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java?rev=417319&r1=417318&r2=417319&view=diff ============================================================================== --- jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java (original) +++ jakarta/commons/proper/lang/trunk/src/java/org/apache/commons/lang/RandomStringUtils.java Mon Jun 26 17:28:43 2006 @@ -18,6 +18,14 @@ import java.util.Random; /** * <p>Operations for random <code>String</code>s.</p> + * <p>Currently <em>private high surrogate</em> characters are ignored. + * These are unicode characters that fall between the values 56192 (db80) + * and 56319 (dbff) as we don't know how to handle them. + * High and low surrogates are correctly dealt with - that is if a + * high surrogate is randomly chosen, 55296 (d800) to 56191 (db7f) + * then it is followed by a low surrogate. If a low surrogate is chosen, + * 56320 (dc00) to 57343 (dfff) then it is placed after a randomly + * chosen high surrogate. </p> * * @author GenerationJava Core library * @author <a href="mailto:[EMAIL PROTECTED]">Henri Yandell</a> @@ -243,8 +251,32 @@ } if ((letters && Character.isLetter(ch)) || (numbers && Character.isDigit(ch)) - || (!letters && !numbers)) { - buffer[count] = ch; + || (!letters && !numbers)) + { + if(ch >= 56320 && ch <= 57343) { + if(count == 0) { + count++; + } else { + // low surrogate, insert high surrogate after putting it in + buffer[count] = ch; + count--; + buffer[count] = (char) (55296 + random.nextInt(128)); + } + } else if(ch >= 55296 && ch <= 56191) { + if(count == 0) { + count++; + } else { + // high surrogate, insert low surrogate before putting it in + buffer[count] = (char) (56320 + random.nextInt(128)); + count--; + buffer[count] = ch; + } + } else if(ch >= 56192 && ch <= 56319) { + // private high surrogate, no effing clue, so skip it + count++; + } else { + buffer[count] = ch; + } } else { count++; } Modified: jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java URL: http://svn.apache.org/viewvc/jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java?rev=417319&r1=417318&r2=417319&view=diff ============================================================================== --- jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java (original) +++ jakarta/commons/proper/lang/trunk/src/test/org/apache/commons/lang/RandomStringUtilsTest.java Mon Jun 26 17:28:43 2006 @@ -315,7 +315,33 @@ } return sumSq; } - + + /** + * Checks if the string got by [EMAIL PROTECTED] RandomStringUtils#random(int)} + * can be converted to UTF-8 and back without loss. + * + * @author [EMAIL PROTECTED] + * @throws Exception + */ + public void testLang100() throws Exception { + int size = 5000; + String encoding = "UTF-8"; + String orig = RandomStringUtils.random(size); + byte[] bytes = orig.getBytes(encoding); + String copy = new String(bytes, encoding); + + // for a verbose compare: + for (int i=0; i < orig.length() && i < copy.length(); i++) { + char o = orig.charAt(i); + char c = copy.charAt(i); + assertEquals("differs at " + i + "(" + Integer.toHexString((new Character(o)).hashCode()) + "," + + Integer.toHexString((new Character(c)).hashCode()) + ")", o, c); + } + // compare length also + assertEquals(orig.length(), copy.length()); + // just to be complete + assertEquals(orig, copy); + } public static void main(String args[]) { TestRunner.run(suite()); --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]