Modified: trunk/Source/_javascript_Core/ChangeLog (278858 => 278859)
--- trunk/Source/_javascript_Core/ChangeLog 2021-06-15 03:34:40 UTC (rev 278858)
+++ trunk/Source/_javascript_Core/ChangeLog 2021-06-15 04:45:09 UTC (rev 278859)
@@ -1,3 +1,28 @@
+2021-06-14 Yusuke Suzuki <ysuz...@apple.com>
+
+ [JSC] Workaround ICU uloc_addLikelySubtags / uloc_minimizeSubtags bugs
+ https://bugs.webkit.org/show_bug.cgi?id=226996
+ rdar://79250513
+
+ Reviewed by Ross Kirsling.
+
+ ICU has bugs that uloc_addLikelySubtags / uloc_minimizeSubtags cannot handle very long locale ID that exceeds ULOC_FULLNAME_CAPACITY,
+ while these functions can take arbitrary sized buffer for output. This can be achieved simply by (1) attaching many unicode extensions,
+ or (2) having many variants.
+
+ In this patch, we add a workaround: if uloc_addLikelySubtags / uloc_minimizeSubtags failed, we perform them without having locale ID
+ keywords part. After performing the operations, we append these keywords back.
+
+ This is workaround, and still this workaround is not complete since we could have many variants. In that case, uloc_addLikelySubtags / uloc_minimizeSubtags
+ still fails, and in that case, for now, we give up performing uloc_addLikelySubtags / uloc_minimizeSubtags. Fixing this needs to be
+ done in ICU side: https://unicode-org.atlassian.net/browse/ICU-21639
+
+ * runtime/IntlLocale.cpp:
+ (JSC::IntlLocale::keywordValue const):
+ (JSC::IntlLocale::maximal):
+ (JSC::IntlLocale::minimal):
+ (JSC::IntlLocale::baseName):
+
2021-06-14 Mark Lam <mark....@apple.com>
Add ldp and stp support for FP registers, plus some bug fixes.
Modified: trunk/Source/_javascript_Core/runtime/IntlLocale.cpp (278858 => 278859)
--- trunk/Source/_javascript_Core/runtime/IntlLocale.cpp 2021-06-15 03:34:40 UTC (rev 278858)
+++ trunk/Source/_javascript_Core/runtime/IntlLocale.cpp 2021-06-15 04:45:09 UTC (rev 278859)
@@ -201,7 +201,7 @@
if (needsToGrowToProduceCString(status)) {
buffer.grow(bufferLength + 1);
status = U_ZERO_ERROR;
- uloc_getKeywordValue(m_localeID.data(), key.characters(), buffer.data(), bufferLength + 1, &status);
+ uloc_getKeywordValue(m_localeID.data(), key.characters(), buffer.data(), buffer.size(), &status);
}
ASSERT(U_SUCCESS(status));
if (isBoolean)
@@ -322,6 +322,11 @@
const String& IntlLocale::maximal()
{
if (m_maximal.isNull()) {
+ // ICU has a serious bug that it fails to perform uloc_addLikelySubtags when the input localeID is longer than ULOC_FULLNAME_CAPACITY,
+ // and that can be achieved if we add many unicode extensions. While ICU needs to be fixed, we work-around this bug for now: We pass
+ // non-keyword part of ICU locale ID and later, concatenate keyword part to the output.
+ // Note that ICU locale ID consists of Language, Script, Country (unicode language tag's region.
+ // FIXME: ICU tracking bug https://unicode-org.atlassian.net/browse/ICU-21639.
UErrorCode status = U_ZERO_ERROR;
Vector<char, 32> buffer(32);
auto bufferLength = uloc_addLikelySubtags(m_localeID.data(), buffer.data(), buffer.size(), &status);
@@ -328,11 +333,38 @@
if (needsToGrowToProduceCString(status)) {
buffer.grow(bufferLength + 1);
status = U_ZERO_ERROR;
- uloc_addLikelySubtags(m_localeID.data(), buffer.data(), bufferLength + 1, &status);
+ uloc_addLikelySubtags(m_localeID.data(), buffer.data(), buffer.size(), &status);
}
- ASSERT(U_SUCCESS(status));
- m_maximal = languageTagForLocaleID(buffer.data());
+ if (U_SUCCESS(status))
+ m_maximal = languageTagForLocaleID(buffer.data());
+ else {
+ status = U_ZERO_ERROR;
+ Vector<char, 32> baseNameID;
+ auto bufferLength = uloc_getBaseName(m_localeID.data(), baseNameID.data(), baseNameID.size(), &status);
+ if (needsToGrowToProduceCString(status)) {
+ baseNameID.grow(bufferLength + 1);
+ status = U_ZERO_ERROR;
+ uloc_getBaseName(m_localeID.data(), baseNameID.data(), baseNameID.size(), &status);
+ }
+ ASSERT(U_SUCCESS(status));
+
+ Vector<char, 32> maximal;
+ status = callBufferProducingFunction(uloc_addLikelySubtags, baseNameID.data(), maximal);
+ // We fail if,
+ // 1. uloc_addLikelySubtags still fails.
+ // 2. New maximal locale ID includes newly-added keywords.
+ if (!U_SUCCESS(status) || maximal.find(ULOC_KEYWORD_SEPARATOR) != notFound) {
+ m_maximal = toString();
+ return m_maximal;
+ }
+
+ auto endOfLanguageScriptRegionVariant = WTF::find(m_localeID.data(), m_localeID.length(), ULOC_KEYWORD_SEPARATOR);
+ if (endOfLanguageScriptRegionVariant != notFound)
+ maximal.appendRange(m_localeID.data() + endOfLanguageScriptRegionVariant, m_localeID.data() + m_localeID.length());
+ maximal.append('\0');
+ m_maximal = languageTagForLocaleID(maximal.data());
+ }
}
return m_maximal;
}
@@ -341,6 +373,11 @@
const String& IntlLocale::minimal()
{
if (m_minimal.isNull()) {
+ // ICU has a serious bug that it fails to perform uloc_minimizeSubtags when the input localeID is longer than ULOC_FULLNAME_CAPACITY,
+ // and that can be achieved if we add many unicode extensions. While ICU needs to be fixed, we work-around this bug for now: We pass
+ // non-keyword part of ICU locale ID and later, concatenate keyword part to the output.
+ // Note that ICU locale ID consists of Language, Script, Country (unicode language tag's region.
+ // FIXME: ICU tracking bug https://unicode-org.atlassian.net/browse/ICU-21639.
UErrorCode status = U_ZERO_ERROR;
Vector<char, 32> buffer(32);
auto bufferLength = uloc_minimizeSubtags(m_localeID.data(), buffer.data(), buffer.size(), &status);
@@ -347,11 +384,38 @@
if (needsToGrowToProduceCString(status)) {
buffer.grow(bufferLength + 1);
status = U_ZERO_ERROR;
- uloc_minimizeSubtags(m_localeID.data(), buffer.data(), bufferLength + 1, &status);
+ uloc_minimizeSubtags(m_localeID.data(), buffer.data(), buffer.size(), &status);
}
- ASSERT(U_SUCCESS(status));
- m_minimal = languageTagForLocaleID(buffer.data());
+ if (U_SUCCESS(status))
+ m_minimal = languageTagForLocaleID(buffer.data());
+ else {
+ status = U_ZERO_ERROR;
+ Vector<char, 32> baseNameID;
+ auto bufferLength = uloc_getBaseName(m_localeID.data(), baseNameID.data(), baseNameID.size(), &status);
+ if (needsToGrowToProduceCString(status)) {
+ baseNameID.grow(bufferLength + 1);
+ status = U_ZERO_ERROR;
+ uloc_getBaseName(m_localeID.data(), baseNameID.data(), baseNameID.size(), &status);
+ }
+ ASSERT(U_SUCCESS(status));
+
+ Vector<char, 32> minimal;
+ auto status = callBufferProducingFunction(uloc_minimizeSubtags, baseNameID.data(), minimal);
+ // We fail if,
+ // 1. uloc_minimizeSubtags still fails.
+ // 2. New minimal locale ID includes newly-added keywords.
+ if (!U_SUCCESS(status) || minimal.find(ULOC_KEYWORD_SEPARATOR) != notFound) {
+ m_minimal = toString();
+ return m_minimal;
+ }
+
+ auto endOfLanguageScriptRegionVariant = WTF::find(m_localeID.data(), m_localeID.length(), ULOC_KEYWORD_SEPARATOR);
+ if (endOfLanguageScriptRegionVariant != notFound)
+ minimal.appendRange(m_localeID.data() + endOfLanguageScriptRegionVariant, m_localeID.data() + m_localeID.length());
+ minimal.append('\0');
+ m_minimal = languageTagForLocaleID(minimal.data());
+ }
}
return m_minimal;
}
@@ -374,7 +438,7 @@
if (needsToGrowToProduceCString(status)) {
buffer.grow(bufferLength + 1);
status = U_ZERO_ERROR;
- uloc_getBaseName(m_localeID.data(), buffer.data(), bufferLength + 1, &status);
+ uloc_getBaseName(m_localeID.data(), buffer.data(), buffer.size(), &status);
}
ASSERT(U_SUCCESS(status));