Diff
Modified: trunk/LayoutTests/ChangeLog (266456 => 266457)
--- trunk/LayoutTests/ChangeLog 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/ChangeLog 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,3 +1,12 @@
+2020-09-02 Alex Christensen <achristen...@webkit.org>
+
+ Align UTF-16 decoder with Chrome, Firefox, and specification
+ https://bugs.webkit.org/show_bug.cgi?id=216058
+
+ Reviewed by Youenn Fablet.
+
+ * imported/blink/fast/encoding/utf-16-odd-byte-expected.txt:
+
2020-09-02 Diego Pino Garcia <dp...@igalia.com>
[GLIB] Unreviewed test gardening. Update baselines and expectations after r266452.
Modified: trunk/LayoutTests/fast/parser/test-unicode-characters-in-attribute-name-expected.txt (266456 => 266457)
--- trunk/LayoutTests/fast/parser/test-unicode-characters-in-attribute-name-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/fast/parser/test-unicode-characters-in-attribute-name-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -3,4 +3,4 @@
0. id : content
1. title : http://www.315.com.cn/newwebsite/images0821/315_ca_03.gif
2. la ng : zh-cn
-
+�
Modified: trunk/LayoutTests/imported/blink/fast/encoding/utf-16-odd-byte-expected.txt (266456 => 266457)
--- trunk/LayoutTests/imported/blink/fast/encoding/utf-16-odd-byte-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/blink/fast/encoding/utf-16-odd-byte-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1 +1 @@
-This document has an odd number of bytes - the UTF-16 decoder should emit a replacement character here:
+This document has an odd number of bytes - the UTF-16 decoder should emit a replacement character here: �
Modified: trunk/LayoutTests/imported/w3c/ChangeLog (266456 => 266457)
--- trunk/LayoutTests/imported/w3c/ChangeLog 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/w3c/ChangeLog 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,3 +1,17 @@
+2020-09-02 Alex Christensen <achristen...@webkit.org>
+
+ Align UTF-16 decoder with Chrome, Firefox, and specification
+ https://bugs.webkit.org/show_bug.cgi?id=216058
+
+ Reviewed by Youenn Fablet.
+
+ * web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt:
+ * web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt:
+ * web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt:
+ * web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt:
+ * web-platform-tests/encoding/textdecoder-utf16-surrogates.any-expected.txt:
+ * web-platform-tests/encoding/textdecoder-utf16-surrogates.any.worker-expected.txt:
+
2020-09-01 Alex Christensen <achristen...@webkit.org>
Align ISO-2022-JP and Shift_JIS encodings with Chrome, Firefox, and the specification
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt (266456 => 266457)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,10 +1,10 @@
PASS TextDecoderStream should be able to decode UTF-16BE
-FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16BE assert_equals: output should be replacement character expected "\ufffd" but got "�"
-FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16BE assert_unreached: Should have rejected: readable should be errored Reached unreachable code
+PASS TextDecoderStream should be able to decode invalid sequences in UTF-16BE
+PASS TextDecoderStream should be able to reject invalid sequences in UTF-16BE
PASS TextDecoderStream should be able to decode UTF-16LE
-FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16LE assert_equals: output should be replacement character expected "\ufffd" but got "�"
-FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16LE assert_unreached: Should have rejected: readable should be errored Reached unreachable code
+PASS TextDecoderStream should be able to decode invalid sequences in UTF-16LE
+PASS TextDecoderStream should be able to reject invalid sequences in UTF-16LE
PASS TextDecoderStream should be able to decode Shift_JIS
FAIL TextDecoderStream should be able to decode invalid sequences in Shift_JIS assert_equals: output should be replacement character expected "\ufffd" but got "\x1a"
FAIL TextDecoderStream should be able to reject invalid sequences in Shift_JIS assert_unreached: Should have rejected: readable should be errored Reached unreachable code
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt (266456 => 266457)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-non-utf8.any.worker-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,10 +1,10 @@
PASS TextDecoderStream should be able to decode UTF-16BE
-FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16BE assert_equals: output should be replacement character expected "\ufffd" but got "�"
-FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16BE assert_unreached: Should have rejected: readable should be errored Reached unreachable code
+PASS TextDecoderStream should be able to decode invalid sequences in UTF-16BE
+PASS TextDecoderStream should be able to reject invalid sequences in UTF-16BE
PASS TextDecoderStream should be able to decode UTF-16LE
-FAIL TextDecoderStream should be able to decode invalid sequences in UTF-16LE assert_equals: output should be replacement character expected "\ufffd" but got "�"
-FAIL TextDecoderStream should be able to reject invalid sequences in UTF-16LE assert_unreached: Should have rejected: readable should be errored Reached unreachable code
+PASS TextDecoderStream should be able to decode invalid sequences in UTF-16LE
+PASS TextDecoderStream should be able to reject invalid sequences in UTF-16LE
PASS TextDecoderStream should be able to decode Shift_JIS
FAIL TextDecoderStream should be able to decode invalid sequences in Shift_JIS assert_equals: output should be replacement character expected "\ufffd" but got "\x1a"
FAIL TextDecoderStream should be able to reject invalid sequences in Shift_JIS assert_unreached: Should have rejected: readable should be errored Reached unreachable code
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt (266456 => 266457)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,4 +1,4 @@
-FAIL Fatal flag, non-streaming cases assert_equals: Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset expected "\ufffd" but got ""
+PASS Fatal flag, non-streaming cases
FAIL Fatal flag, streaming cases assert_equals: expected "\0" but got ""
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt (266456 => 266457)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.worker-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,4 +1,4 @@
-FAIL Fatal flag, non-streaming cases assert_equals: Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset expected "\ufffd" but got ""
+PASS Fatal flag, non-streaming cases
FAIL Fatal flag, streaming cases assert_equals: expected "\0" but got ""
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any-expected.txt (266456 => 266457)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,22 +1,12 @@
-FAIL utf-16le - lone surrogate lead assert_equals: expected "\ufffd" but got "�"
-FAIL utf-16le - lone surrogate lead (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - lone surrogate trail assert_equals: expected "\ufffd" but got "�"
-FAIL utf-16le - lone surrogate trail (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - unmatched surrogate lead assert_equals: expected "\ufffd\0" but got "�\0"
-FAIL utf-16le - unmatched surrogate lead (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - unmatched surrogate trail assert_equals: expected "\ufffd\0" but got "�\0"
-FAIL utf-16le - unmatched surrogate trail (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - swapped surrogate pair assert_equals: expected "\ufffd\ufffd" but got "��"
-FAIL utf-16le - swapped surrogate pair (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
+PASS utf-16le - lone surrogate lead
+PASS utf-16le - lone surrogate lead (fatal flag set)
+PASS utf-16le - lone surrogate trail
+PASS utf-16le - lone surrogate trail (fatal flag set)
+PASS utf-16le - unmatched surrogate lead
+PASS utf-16le - unmatched surrogate lead (fatal flag set)
+PASS utf-16le - unmatched surrogate trail
+PASS utf-16le - unmatched surrogate trail (fatal flag set)
+PASS utf-16le - swapped surrogate pair
+PASS utf-16le - swapped surrogate pair (fatal flag set)
Modified: trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any.worker-expected.txt (266456 => 266457)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any.worker-expected.txt 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-utf16-surrogates.any.worker-expected.txt 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,22 +1,12 @@
-FAIL utf-16le - lone surrogate lead assert_equals: expected "\ufffd" but got "�"
-FAIL utf-16le - lone surrogate lead (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - lone surrogate trail assert_equals: expected "\ufffd" but got "�"
-FAIL utf-16le - lone surrogate trail (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - unmatched surrogate lead assert_equals: expected "\ufffd\0" but got "�\0"
-FAIL utf-16le - unmatched surrogate lead (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - unmatched surrogate trail assert_equals: expected "\ufffd\0" but got "�\0"
-FAIL utf-16le - unmatched surrogate trail (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
-FAIL utf-16le - swapped surrogate pair assert_equals: expected "\ufffd\ufffd" but got "��"
-FAIL utf-16le - swapped surrogate pair (fatal flag set) assert_throws_js: function "function () {
- new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input))
- }" did not throw
+PASS utf-16le - lone surrogate lead
+PASS utf-16le - lone surrogate lead (fatal flag set)
+PASS utf-16le - lone surrogate trail
+PASS utf-16le - lone surrogate trail (fatal flag set)
+PASS utf-16le - unmatched surrogate lead
+PASS utf-16le - unmatched surrogate lead (fatal flag set)
+PASS utf-16le - unmatched surrogate trail
+PASS utf-16le - unmatched surrogate trail (fatal flag set)
+PASS utf-16le - swapped surrogate pair
+PASS utf-16le - swapped surrogate pair (fatal flag set)
Modified: trunk/Source/WebCore/ChangeLog (266456 => 266457)
--- trunk/Source/WebCore/ChangeLog 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/Source/WebCore/ChangeLog 2020-09-02 14:45:32 UTC (rev 266457)
@@ -1,3 +1,17 @@
+2020-09-02 Alex Christensen <achristen...@webkit.org>
+
+ Align UTF-16 decoder with Chrome, Firefox, and specification
+ https://bugs.webkit.org/show_bug.cgi?id=216058
+
+ Reviewed by Youenn Fablet.
+
+ Covered by web platform tests.
+
+ * platform/text/TextCodecUTF16.cpp:
+ (WebCore::TextCodecUTF16::decode):
+ * platform/text/TextCodecUTF16.h:
+ (): Deleted.
+
2020-09-02 Youenn Fablet <you...@apple.com>
Safari is not able to hear audio when using WebRTC in multiple tabs
Modified: trunk/Source/WebCore/platform/text/TextCodecUTF16.cpp (266456 => 266457)
--- trunk/Source/WebCore/platform/text/TextCodecUTF16.cpp 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/Source/WebCore/platform/text/TextCodecUTF16.cpp 2020-09-02 14:45:32 UTC (rev 266457)
@@ -27,6 +27,7 @@
#include "TextCodecUTF16.h"
#include <wtf/text/CString.h>
+#include <wtf/text/StringBuilder.h>
#include <wtf/text/WTFString.h>
namespace WebCore {
@@ -61,54 +62,82 @@
});
}
-String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool&)
+// https://encoding.spec.whatwg.org/#shared-utf-16-decoder
+String TextCodecUTF16::decode(const char* bytes, size_t length, bool flush, bool, bool& sawError)
{
- if (!length)
- return String();
+ const auto* p = reinterpret_cast<const uint8_t*>(bytes);
+ const auto* const end = p + length;
+ const auto* const endMinusOneOrNull = end ? end - 1 : nullptr;
- // FIXME: This should generate an error if there is an unpaired surrogate.
+ StringBuilder result;
+ result.reserveCapacity(length / 2);
- const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
- size_t numBytes = length + m_haveBufferedByte;
- size_t numCodeUnits = numBytes / 2;
- RELEASE_ASSERT(numCodeUnits <= std::numeric_limits<unsigned>::max());
+ Function<void(UChar)> processBytesShared;
+ processBytesShared = [&] (UChar codeUnit) {
+ if (m_leadSurrogate) {
+ auto leadSurrogate = *std::exchange(m_leadSurrogate, WTF::nullopt);
+ if (codeUnit >= 0xDC00 && codeUnit <= 0xDFFF) {
+ result.appendCharacter(0x10000 + ((leadSurrogate - 0xD800) << 10) + codeUnit - 0xDC00);
+ return;
+ }
+ sawError = true;
+ result.append(replacementCharacter);
+ processBytesShared(codeUnit);
+ return;
+ }
+ if (codeUnit >= 0xD800 && codeUnit <= 0xDBFF) {
+ m_leadSurrogate = codeUnit;
+ return;
+ }
+ if (codeUnit >= 0xDC00 && codeUnit <=0xDFFF) {
+ sawError = true;
+ result.append(replacementCharacter);
+ return;
+ }
+ result.append(codeUnit);
+ };
+ auto processBytesLE = [&] (uint8_t first, uint8_t second) {
+ processBytesShared(first | (second << 8));
+ };
+ auto processBytesBE = [&] (uint8_t first, uint8_t second) {
+ processBytesShared((first << 8) | second);
+ };
- UChar* q;
- auto result = String::createUninitialized(numCodeUnits, q);
-
- if (m_haveBufferedByte) {
- UChar c;
+ if (m_leadByte && p < end) {
+ auto leadByte = *std::exchange(m_leadByte, WTF::nullopt);
if (m_littleEndian)
- c = m_bufferedByte | (p[0] << 8);
+ processBytesLE(leadByte, p[0]);
else
- c = (m_bufferedByte << 8) | p[0];
- *q++ = c;
- m_haveBufferedByte = false;
- p += 1;
- numCodeUnits -= 1;
+ processBytesBE(leadByte, p[0]);
+ p++;
}
if (m_littleEndian) {
- for (size_t i = 0; i < numCodeUnits; ++i) {
- UChar c = p[0] | (p[1] << 8);
+ while (p < endMinusOneOrNull) {
+ processBytesLE(p[0], p[1]);
p += 2;
- *q++ = c;
}
} else {
- for (size_t i = 0; i < numCodeUnits; ++i) {
- UChar c = (p[0] << 8) | p[1];
+ while (p < endMinusOneOrNull) {
+ processBytesBE(p[0], p[1]);
p += 2;
- *q++ = c;
}
}
- if (numBytes & 1) {
- ASSERT(!m_haveBufferedByte);
- m_haveBufferedByte = true;
- m_bufferedByte = p[0];
+ if (p && p == endMinusOneOrNull) {
+ ASSERT(!m_leadByte);
+ m_leadByte = p[0];
+ } else
+ ASSERT(!p || p == end);
+
+ if (flush && (m_leadByte || m_leadSurrogate)) {
+ m_leadByte = WTF::nullopt;
+ m_leadSurrogate = WTF::nullopt;
+ sawError = true;
+ result.append(replacementCharacter);
}
- return result;
+ return result.toString();
}
Vector<uint8_t> TextCodecUTF16::encode(StringView string, UnencodableHandling)
Modified: trunk/Source/WebCore/platform/text/TextCodecUTF16.h (266456 => 266457)
--- trunk/Source/WebCore/platform/text/TextCodecUTF16.h 2020-09-02 14:42:54 UTC (rev 266456)
+++ trunk/Source/WebCore/platform/text/TextCodecUTF16.h 2020-09-02 14:45:32 UTC (rev 266457)
@@ -41,8 +41,8 @@
Vector<uint8_t> encode(StringView, UnencodableHandling) final;
bool m_littleEndian;
- bool m_haveBufferedByte { false };
- unsigned char m_bufferedByte;
+ Optional<uint8_t> m_leadByte;
+ Optional<UChar> m_leadSurrogate;
};
} // namespace WebCore