jenkins-bot has submitted this change and it was merged.

Change subject: jquery.byteLength: Improve documentation and tests
......................................................................


jquery.byteLength: Improve documentation and tests

Change-Id: I6793487b7cd9f58b23554bc29c853bd3f02da49c
---
M resources/jquery/jquery.byteLength.js
M tests/qunit/suites/resources/jquery/jquery.byteLength.test.js
2 files changed, 25 insertions(+), 11 deletions(-)

Approvals:
  Krinkle: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/resources/jquery/jquery.byteLength.js 
b/resources/jquery/jquery.byteLength.js
index 3d5b720..398937e 100644
--- a/resources/jquery/jquery.byteLength.js
+++ b/resources/jquery/jquery.byteLength.js
@@ -4,6 +4,8 @@
  * Calculate the byte length of a string (accounting for UTF-8).
  *
  * @author Jan Paul Posma, 2011
+ * @author Timo Tijhof, 2012
+ * @author David Chan, 2013
  */
 jQuery.byteLength = function ( str ) {
 
@@ -12,8 +14,18 @@
        // Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, 
since there's two of them
        // and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not 
work perfectly in
        // edge cases such as illegal sequences, but that should never happen.
+
+       // https://en.wikipedia.org/wiki/UTF-8#Description
+       // The mapping from UTF-16 code units to UTF-8 bytes is as follows:
+       // > Range 0000-007F: codepoints that become 1 byte of UTF-8
+       // > Range 0080-07FF: codepoints that become 2 bytes of UTF-8
+       // > Range 0800-D7FF: codepoints that become 3 bytes of UTF-8
+       // > Range D800-DFFF: Surrogates (each pair becomes 4 bytes of UTF-8)
+       // > Range E000-FFFF: codepoints that become 3 bytes of UTF-8 
(continued)
+
        return str
                .replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
                .replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
                .length;
+
 };
diff --git a/tests/qunit/suites/resources/jquery/jquery.byteLength.test.js 
b/tests/qunit/suites/resources/jquery/jquery.byteLength.test.js
index e4e579b..e6aa3aa 100644
--- a/tests/qunit/suites/resources/jquery/jquery.byteLength.test.js
+++ b/tests/qunit/suites/resources/jquery/jquery.byteLength.test.js
@@ -16,20 +16,22 @@
 
        } );
 
-       QUnit.test( 'Special text', 5, function ( assert ) {
-               // http://en.wikipedia.org/wiki/UTF-8
+       QUnit.test( 'Special text', 4, function ( assert ) {
+               // https://en.wikipedia.org/wiki/UTF-8
                var u0024 = '$',
+                       // Cent symbol
                        u00A2 = '\u00A2',
+                       // Euro symbol
                        u20AC = '\u20AC',
-                       u024B62 = '\u024B62',
-                       // The normal one doesn't display properly, try the 
below which is the same
-                       // according to 
http://www.fileformat.info/info/unicode/char/24B62/index.htm
-                       u024B62alt = '\uD852\uDF62';
+                       // Character \U00024B62 (Han script) can't be 
represented in javascript as a single
+                       // code point, instead it is composed as a surrogate 
pair of two separate code units.
+                       // http://codepoints.net/U+24B62
+                       // 
http://www.fileformat.info/info/unicode/char/24B62/index.htm
+                       u024B62 = '\uD852\uDF62';
 
-               assert.strictEqual( $.byteLength( u0024 ), 1, 'U+0024: 1 byte. 
$ (dollar sign)' );
-               assert.strictEqual( $.byteLength( u00A2 ), 2, 'U+00A2: 2 bytes. 
\u00A2 (cent sign)' );
-               assert.strictEqual( $.byteLength( u20AC ), 3, 'U+20AC: 3 bytes. 
\u20AC (euro sign)' );
-               assert.strictEqual( $.byteLength( u024B62 ), 4, 'U+024B62: 4 
bytes. \uD852\uDF62 (a Han character)' );
-               assert.strictEqual( $.byteLength( u024B62alt ), 4, 'U+024B62: 4 
bytes. \uD852\uDF62 (a Han character) - alternative method' );
+               assert.strictEqual( $.byteLength( u0024 ), 1, 'U+0024' );
+               assert.strictEqual( $.byteLength( u00A2 ), 2, 'U+00A2' );
+               assert.strictEqual( $.byteLength( u20AC ), 3, 'U+20AC' );
+               assert.strictEqual( $.byteLength( u024B62 ), 4, 'U+024B62 
(surrogate pair: \\uD852\\uDF62)' );
        } );
 }( jQuery ) );

-- 
To view, visit https://gerrit.wikimedia.org/r/83057
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I6793487b7cd9f58b23554bc29c853bd3f02da49c
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Divec <da...@sheetmusic.org.uk>
Gerrit-Reviewer: Jack Phoenix <j...@countervandalism.net>
Gerrit-Reviewer: Krinkle <krinklem...@gmail.com>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to