Diff
Modified: trunk/LayoutTests/ChangeLog (176472 => 176473)
--- trunk/LayoutTests/ChangeLog 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/LayoutTests/ChangeLog 2014-11-21 22:33:17 UTC (rev 176473)
@@ -1,3 +1,12 @@
+2014-11-21 Glenn Adams <gl...@skynav.com> and Myles C. Maxfield <mmaxfi...@apple.com>
+
+ CSS3: line-break property support
+ https://bugs.webkit.org/show_bug.cgi?id=89235
+
+ Reviewed by Eric Seidel and Dave Hyatt.
+
+ * platform/mac/TestExpectations: Mark css3/line-break tests as passing.
+
2014-11-21 Zalan Bujtas <za...@apple.com>
REGRESSION(r175259) Simple line layout text measuring behavior changed.
Modified: trunk/LayoutTests/platform/mac/TestExpectations (176472 => 176473)
--- trunk/LayoutTests/platform/mac/TestExpectations 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/LayoutTests/platform/mac/TestExpectations 2014-11-21 22:33:17 UTC (rev 176473)
@@ -1429,3 +1429,8 @@
webkit.org/b/138075 [ Yosemite ] fast/text/international/hindi-spacing.html [ Pass Failure ]
webkit.org/b/82980 http/tests/navigation/back-twice-without-commit.html [ Timeout ]
+
+# Verified passing, so override generic skip
+webkit.org/b/89235 css3/line-break [ Pass ]
+webkit.org/b/138115 css3/line-break/line-break-auto-hyphens.html [ ImageOnlyFailure ]
+webkit.org/b/138115 css3/line-break/line-break-auto-sound-marks.html [ ImageOnlyFailure ]
Modified: trunk/Source/WebCore/ChangeLog (176472 => 176473)
--- trunk/Source/WebCore/ChangeLog 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/ChangeLog 2014-11-21 22:33:17 UTC (rev 176473)
@@ -1,3 +1,150 @@
+2014-11-21 Glenn Adams <gl...@skynav.com> and Myles C. Maxfield <mmaxfi...@apple.com>
+
+ Add support to -webkit-line-break property for CSS3 Text line-break property values and semantics.
+ https://bugs.webkit.org/show_bug.cgi?id=89235
+
+ Reviewed by Eric Seidel and Dave Hyatt.
+
+ This patch adds semantic support for the CSS3 line-break property (qua -webkit-line-break),
+ and enables testing on (apple) mac ports. Follow on patches will enable these tests on
+ other ports as they are incrementally verified.
+
+ See also wiki documentation at:
+ [1] http://trac.webkit.org/wiki/LineBreaking
+ [2] http://trac.webkit.org/wiki/LineBreakingCSS3Mapping
+
+ Tests: css3/line-break/line-break-auto-centered.html
+ css3/line-break/line-break-auto-half-kana.html
+ css3/line-break/line-break-auto-hyphens.html
+ css3/line-break/line-break-auto-inseparables.html
+ css3/line-break/line-break-auto-iteration-marks.html
+ css3/line-break/line-break-auto-postfixes.html
+ css3/line-break/line-break-auto-prefixes.html
+ css3/line-break/line-break-auto-sound-marks.html
+ css3/line-break/line-break-loose-centered.html
+ css3/line-break/line-break-loose-half-kana.html
+ css3/line-break/line-break-loose-hyphens.html
+ css3/line-break/line-break-loose-inseparables.html
+ css3/line-break/line-break-loose-iteration-marks.html
+ css3/line-break/line-break-loose-postfixes.html
+ css3/line-break/line-break-loose-prefixes.html
+ css3/line-break/line-break-loose-sound-marks.html
+ css3/line-break/line-break-normal-centered.html
+ css3/line-break/line-break-normal-half-kana.html
+ css3/line-break/line-break-normal-hyphens.html
+ css3/line-break/line-break-normal-inseparables.html
+ css3/line-break/line-break-normal-iteration-marks.html
+ css3/line-break/line-break-normal-postfixes.html
+ css3/line-break/line-break-normal-prefixes.html
+ css3/line-break/line-break-normal-sound-marks.html
+ css3/line-break/line-break-strict-centered.html
+ css3/line-break/line-break-strict-half-kana.html
+ css3/line-break/line-break-strict-hyphens.html
+ css3/line-break/line-break-strict-inseparables.html
+ css3/line-break/line-break-strict-iteration-marks.html
+ css3/line-break/line-break-strict-postfixes.html
+ css3/line-break/line-break-strict-prefixes.html
+ css3/line-break/line-break-strict-sound-marks.html
+
+ These tests were previously added in http://trac.webkit.org/changeset/143378, but skipped
+ in generic TestExpectations. In this patch, they are marked as Pass for the (apple) mac ports.
+
+ * platform/text/LineBreakIteratorPoolICU.h:
+ (WebCore::LineBreakIteratorPool::makeLocaleWithBreakKeyword):
+ Add static function to construct ICU locale argument (also used as pool key) with additional
+ break keyword.
+ (LineBreakIteratorPool):
+ (WebCore::LineBreakIteratorPool::take):
+ (WebCore::LineBreakIteratorPool::put):
+ Remove direct dependency from ICU library (and types), moving that dependency into
+ new {open,close}LineBreakIterator() functions (defined in TextBreakIteratorICU.cpp).
+ Update to take line break mode into account.
+ Create (and cache) different break iterators depending on line break mode (in addition to locale),
+ which entails expanding pool entry key format to optionally append "@break=" +
+ "loose"|"normal"|"strict" keyword to locale string.
+
+ * platform/text/TextBreakIterator.h:
+ (WebCore::LazyLineBreakIterator::LazyLineBreakIterator):
+ (WebCore::LazyLineBreakIterator::isLooseCJKMode):
+ (WebCore::LazyLineBreakIterator::get):
+ (WebCore::LazyLineBreakIterator::reset):
+ (LazyLineBreakIterator):
+ Define LineBreakIteratorMode enumeration for use in TextBreakIterator et al.
+ Add state member to indicate line break mode.
+
+ * platform/text/TextBreakIteratorICU.cpp:
+ (WebCore::acquireLineBreakIterator):
+ Use new line break mode when making iterator from pool.
+ Handle change of return type of LineBreakIteratorPool::take() to non-ICU type,
+ i.e., TextBreakIterator* instead of ICU's UBreakIterator*.
+ (WebCore::releaseLineBreakIterator):
+ Handle change of parameter type of LineBreakIteratorPool::put() to non-ICU type,
+ i.e., TextBreakIterator* instead of ICU's UBreakIterator*.
+ (WebCore):
+ (WebCore::isCJKLocale):
+ New functions for determining if CJK rules apply.
+ (WebCore::openLineBreakIterator):
+ New function for abstracting opening of ICU style line break iterator. This is now
+ used in LineBreakIteratorPoolICU.h rather than having direct ICU API dependency there.
+ This function also takes into account the line break mode.
+
+ Note that this function only calls ubrk_openRules() when the author has opted-in via
+ using the -webkit-line-break CSS property. Eventually, we would like to be able to
+ customize the rules that ICU's line breaking algorithm uses (especially for CJK text);
+ however, ubrk_openRules() currently parses its input string to create a DFA and is
+ therefore very slow. In fact, it's so slow that increasing our cache size in
+ LineBreakIteratorPool doesn't actually help enough. Also note that the default value
+ for the line-break CSS property is 'auto'.
+ (WebCore::closeLineBreakIterator):
+ (WebCore::mapLineIteratorModeToRules):
+ New function for abstracting closing of ICU style line break iterator. This is now
+ used in LineBreakIteratorPoolICU.h rather than having direct ICU API dependency there.
+
+ * rendering/RenderBlockLineLayout.cpp:
+ (WebCore::RenderBlock::LineBreaker::nextSegmentBreak):
+ Pass line break iterator mode flag when reseting LazyLineBreakIterator.
+ Add looseMode local variable to prevent need for computing under isBreakable().
+
+ * rendering/RenderText.cpp:
+ (WebCore::mapLineBreakToIteratorMode):
+ Add implementation for mapLineBreakToIteratorMode(), used by both RenderText::computePreferredLogicalWidths
+ and RenderBlock::LineBreaker::nextLineBreak.
+ (WebCore):
+ (WebCore::RenderText::computePreferredLogicalWidths):
+ Ensure (lazy line) breakIterator is initialized for line break mode.
+ Ensure isBreakable() is passed loose mode flag to match behavior in RenderBlock::LineBreaker::nextLineBreak.
+
+ * rendering/RenderText.h:
+ (WebCore):
+ Add declaration for mapLineBreakToIteratorMode(), used by both RenderText::computePreferredLogicalWidths
+ and RenderBlock::LineBreaker::nextLineBreak.
+
+ * rendering/break_lines.cpp:
+ (WebCore):
+ Introduce (local) enum NBSPBehavior for expanding template on nextBreakablePosition.
+ (WebCore::isBreakableSpace):
+ Add externally specified loose mode parameter to prevent need to invoke line break iterator
+ accessor method on each invocation. Use new loose mode flavors off NBP functions.
+ (WebCore::needsLineBreakIterator):
+ Use enum NBSP behavior template parameter rather than boolean.
+ (WebCore::nextBreakablePositionNonLoosely):
+ Extend name to distinguish from loose flavor of this function.
+ (WebCore::nextBreakablePositionLoosely):
+ Add loose flavor of NBP invoked only when loose mode applies, in which case ASCII shortcut
+ table cannot be used.
+ (WebCore::nextBreakablePosition):
+ (WebCore::nextBreakablePositionIgnoringNBSP):
+ Use (renamed) non-loose flavor of NBP.
+ (WebCore::nextBreakablePositionLoose):
+ (WebCore::nextBreakablePositionIgnoringNBSPLoose):
+ Introduce loose flavor of NBP template expansions.
+
+ * rendering/break_lines.h:
+ (WebCore):
+ (WebCore::isBreakable):
+ Add externally specified loose mode parameter to prevent need to invoke line break iterator
+ accessor method on each invocation.
+
2014-11-21 Anders Carlsson <ander...@apple.com>
More build fixes.
Modified: trunk/Source/WebCore/WebCore.exp.in (176472 => 176473)
--- trunk/Source/WebCore/WebCore.exp.in 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/WebCore.exp.in 2014-11-21 22:33:17 UTC (rev 176473)
@@ -2593,6 +2593,7 @@
__ZN7WebCore11MathMLNames4initEv
__ZN7WebCore11MemoryCache18pruneDeadResourcesEv
__ZN7WebCore11MemoryCache18pruneLiveResourcesEb
+__ZN7WebCore11isCJKLocaleERKN3WTF12AtomicStringE
__ZN7WebCore11isEndOfLineERKNS_15VisiblePositionE
__ZN7WebCore11prefetchDNSERKN3WTF6StringE
__ZN7WebCore12AudioSession11setCategoryENS0_12CategoryTypeE
@@ -2705,7 +2706,7 @@
__ZN7WebCore24FloatingPointEnvironment21enableDenormalSupportEv
__ZN7WebCore24FloatingPointEnvironment25saveMainThreadEnvironmentEv
__ZN7WebCore24FloatingPointEnvironment6sharedEv
-__ZN7WebCore24acquireLineBreakIteratorEN3WTF10StringViewERKNS0_12AtomicStringEPKtj
+__ZN7WebCore24acquireLineBreakIteratorEN3WTF10StringViewERKNS0_12AtomicStringEPKtjNS_21LineBreakIteratorModeEb
__ZN7WebCore24charactersAroundPositionERKNS_15VisiblePositionERiS3_S3_
__ZN7WebCore24createTemporaryDirectoryEP8NSString
__ZN7WebCore24distanceBetweenPositionsERKNS_15VisiblePositionES2_
Modified: trunk/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h (176472 => 176473)
--- trunk/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h 2014-11-21 22:33:17 UTC (rev 176473)
@@ -26,6 +26,7 @@
#ifndef LineBreakIteratorPoolICU_h
#define LineBreakIteratorPoolICU_h
+#include "TextBreakIterator.h"
#include "TextBreakIteratorInternalICU.h"
#include <unicode/ubrk.h>
#include <wtf/Assertions.h>
@@ -34,6 +35,7 @@
#include <wtf/ThreadSpecific.h>
#include <wtf/text/AtomicString.h>
#include <wtf/text/CString.h>
+#include <wtf/text/StringBuilder.h>
namespace WebCore {
@@ -48,11 +50,39 @@
static PassOwnPtr<LineBreakIteratorPool> create() { return adoptPtr(new LineBreakIteratorPool); }
- UBreakIterator* take(const AtomicString& locale)
+ static String makeLocaleWithBreakKeyword(const AtomicString& locale, LineBreakIteratorMode mode)
{
- UBreakIterator* iterator = 0;
+ StringBuilder localeWithKeyword;
+ localeWithKeyword.append(locale);
+ localeWithKeyword.appendLiteral("@break=");
+ switch (mode) {
+ case LineBreakIteratorModeUAX14:
+ ASSERT_NOT_REACHED();
+ break;
+ case LineBreakIteratorModeUAX14Loose:
+ localeWithKeyword.appendLiteral("loose");
+ break;
+ case LineBreakIteratorModeUAX14Normal:
+ localeWithKeyword.appendLiteral("normal");
+ break;
+ case LineBreakIteratorModeUAX14Strict:
+ localeWithKeyword.appendLiteral("strict");
+ break;
+ }
+ return localeWithKeyword.toString();
+ }
+
+ TextBreakIterator* take(const AtomicString& locale, LineBreakIteratorMode mode, bool isCJK)
+ {
+ AtomicString localeWithOptionalBreakKeyword;
+ if (mode == LineBreakIteratorModeUAX14)
+ localeWithOptionalBreakKeyword = locale;
+ else
+ localeWithOptionalBreakKeyword = makeLocaleWithBreakKeyword(locale, mode);
+
+ TextBreakIterator* iterator = 0;
for (size_t i = 0; i < m_pool.size(); ++i) {
- if (m_pool[i].first == locale) {
+ if (m_pool[i].first == localeWithOptionalBreakKeyword) {
iterator = m_pool[i].second;
m_pool.remove(i);
break;
@@ -60,33 +90,22 @@
}
if (!iterator) {
- UErrorCode openStatus = U_ZERO_ERROR;
- bool localeIsEmpty = locale.isEmpty();
- iterator = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus);
- // locale comes from a web page and it can be invalid, leading ICU
- // to fail, in which case we fall back to the default locale.
- if (!localeIsEmpty && U_FAILURE(openStatus)) {
- openStatus = U_ZERO_ERROR;
- iterator = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus);
- }
-
- if (U_FAILURE(openStatus)) {
- LOG_ERROR("ubrk_open failed with status %d", openStatus);
+ iterator = openLineBreakIterator(localeWithOptionalBreakKeyword, mode, isCJK);
+ if (!iterator)
return 0;
- }
}
ASSERT(!m_vendedIterators.contains(iterator));
- m_vendedIterators.set(iterator, locale);
+ m_vendedIterators.set(iterator, localeWithOptionalBreakKeyword);
return iterator;
}
- void put(UBreakIterator* iterator)
+ void put(TextBreakIterator* iterator)
{
ASSERT_ARG(iterator, m_vendedIterators.contains(iterator));
if (m_pool.size() == capacity) {
- ubrk_close(m_pool[0].second);
+ closeLineBreakIterator(m_pool[0].second);
m_pool.remove(0);
}
@@ -98,10 +117,10 @@
static const size_t capacity = 4;
- typedef std::pair<AtomicString, UBreakIterator*> Entry;
+ typedef std::pair<AtomicString, TextBreakIterator*> Entry;
typedef Vector<Entry, capacity> Pool;
Pool m_pool;
- HashMap<UBreakIterator*, AtomicString> m_vendedIterators;
+ HashMap<TextBreakIterator*, AtomicString> m_vendedIterators;
friend WTF::ThreadSpecific<LineBreakIteratorPool>::operator LineBreakIteratorPool*();
};
Modified: trunk/Source/WebCore/platform/text/TextBreakIterator.cpp (176472 => 176473)
--- trunk/Source/WebCore/platform/text/TextBreakIterator.cpp 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/platform/text/TextBreakIterator.cpp 2014-11-21 22:33:17 UTC (rev 176473)
@@ -256,9 +256,9 @@
return setTextForIterator(*staticCursorMovementIterator, string);
}
-TextBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
+TextBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode mode, bool isCJK)
{
- TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(LineBreakIteratorPool::sharedPool().take(locale));
+ TextBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale, mode, isCJK);
if (!iterator)
return nullptr;
@@ -269,9 +269,475 @@
{
ASSERT_ARG(iterator, iterator);
- LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator));
+ LineBreakIteratorPool::sharedPool().put(iterator);
}
+static const char* uax14Prologue =
+ "!!chain;"
+ "!!LBCMNoChain;"
+ "!!lookAheadHardBreak;";
+
+static const char* uax14AssignmentsBefore =
+ // explicitly enumerate $CJ since ICU versions prior to 49 don't support :LineBreak=Conditional_Japanese_Starter:
+ "$CJ = ["
+#if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9)
+ ":LineBreak=Conditional_Japanese_Starter:"
+#else
+ "\\u3041\\u3043\\u3045\\u3047\\u3049\\u3063\\u3083\\u3085\\u3087\\u308E\\u3095\\u3096\\u30A1\\u30A3\\u30A5\\u30A7"
+ "\\u30A9\\u30C3\\u30E3\\u30E5\\u30E7\\u30EE\\u30F5\\u30F6\\u30FC"
+ "\\u31F0\\u31F1\\u31F2\\u31F3\\u31F4\\u31F5\\u31F6\\u31F7\\u31F8\\u31F9\\u31FA\\u31FB\\u31FC\\u31FD\\u31FE\\u31FF"
+ "\\uFF67\\uFF68\\uFF69\\uFF6A\\uFF6B\\uFF6C\\uFF6D\\uFF6E\\uFF6F\\uFF70"
+#endif
+ "];";
+
+static const char* uax14AssignmentsCustomLooseCJK =
+ "$BA_SUB = [\\u2010\\u2013];"
+ "$EX_SUB = [\\u0021\\u003F\\uFF01\\uFF1F];"
+ "$ID_SUB = '';"
+ "$IN_SUB = [\\u2025\\u2026];"
+ "$IS_SUB = [\\u003A\\u003B];"
+ "$NS_SUB = [\\u203C\\u2047\\u2048\\u2049\\u3005\\u301C\\u303B\\u309D\\u309E\\u30A0\\u30FB\\u30FD\\u30FE\\uFF1A\\uFF1B\\uFF65];"
+ "$PO_SUB = [\\u0025\\u00A2\\u00B0\\u2030\\u2032\\u2033\\u2103\\uFF05\\uFFE0];"
+ "$PR_SUB = [\\u0024\\u00A3\\u00A5\\u20AC\\u2116\\uFF04\\uFFE1\\uFFE5];"
+ "$ID_ADD = [$CJ $BA_SUB $EX_SUB $IN_SUB $IS_SUB $NS_SUB $PO_SUB $PR_SUB];"
+ "$NS_ADD = '';";
+
+static const char* uax14AssignmentsCustomLooseNonCJK =
+ "$BA_SUB = '';"
+ "$EX_SUB = '';"
+ "$ID_SUB = '';"
+ "$IN_SUB = [\\u2025\\u2026];"
+ "$IS_SUB = '';"
+ "$NS_SUB = [\\u3005\\u303B\\u309D\\u309E\\u30FD\\u30FE];"
+ "$PO_SUB = '';"
+ "$PR_SUB = '';"
+ "$ID_ADD = [$CJ $IN_SUB $NS_SUB];"
+ "$NS_ADD = '';";
+
+static const char* uax14AssignmentsCustomNormalCJK =
+ "$BA_SUB = [\\u2010\\u2013];"
+ "$EX_SUB = '';"
+ "$IN_SUB = '';"
+ "$ID_SUB = '';"
+ "$IS_SUB = '';"
+ "$NS_SUB = [\\u301C\\u30A0];"
+ "$PO_SUB = '';"
+ "$PR_SUB = '';"
+ "$ID_ADD = [$CJ $BA_SUB $NS_SUB];"
+ "$NS_ADD = '';";
+
+static const char* uax14AssignmentsCustomNormalNonCJK =
+ "$BA_SUB = '';"
+ "$EX_SUB = '';"
+ "$ID_SUB = '';"
+ "$IN_SUB = '';"
+ "$IS_SUB = '';"
+ "$NS_SUB = '';"
+ "$PO_SUB = '';"
+ "$PR_SUB = '';"
+ "$ID_ADD = [$CJ];"
+ "$NS_ADD = '';";
+
+static const char* uax14AssignmentsCustomStrictCJK =
+ "$BA_SUB = '';"
+ "$EX_SUB = '';"
+ "$ID_SUB = '';"
+ "$IN_SUB = '';"
+ "$IS_SUB = '';"
+ "$NS_SUB = '';"
+ "$PO_SUB = '';"
+ "$PR_SUB = '';"
+ "$ID_ADD = '';"
+ "$NS_ADD = [$CJ];";
+
+#define uax14AssignmentsCustomStrictNonCJK uax14AssignmentsCustomStrictCJK
+#define uax14AssignmentsCustomDefaultCJK uax14AssignmentsCustomNormalCJK
+#define uax14AssignmentsCustomDefaultNonCJK uax14AssignmentsCustomStrictNonCJK
+
+static const char* uax14AssignmentsAfter =
+ "$AI = [:LineBreak = Ambiguous:];"
+ "$AL = [:LineBreak = Alphabetic:];"
+ "$BA = [[:LineBreak = Break_After:] - $BA_SUB];"
+ "$BB = [:LineBreak = Break_Before:];"
+ "$BK = [:LineBreak = Mandatory_Break:];"
+ "$B2 = [:LineBreak = Break_Both:];"
+ "$CB = [:LineBreak = Contingent_Break:];"
+ "$CL = [:LineBreak = Close_Punctuation:];"
+ "$CM = [:LineBreak = Combining_Mark:];"
+ "$CP = [:LineBreak = Close_Parenthesis:];"
+ "$CR = [:LineBreak = Carriage_Return:];"
+ "$EX = [[:LineBreak = Exclamation:] - $EX_SUB];"
+ "$GL = [:LineBreak = Glue:];"
+#if (U_ICU_VERSION_MAJOR_NUM >= 4) && (U_ICU_VERSION_MINOR_NUM >= 9)
+ "$HL = [:LineBreak = Hebrew_Letter:];"
+#else
+ "$HL = [[:Hebrew:] & [:Letter:]];"
+#endif
+ "$HY = [:LineBreak = Hyphen:];"
+ "$H2 = [:LineBreak = H2:];"
+ "$H3 = [:LineBreak = H3:];"
+ "$ID = [[[[:LineBreak = Ideographic:] - $CJ] $ID_ADD] - $ID_SUB];"
+ "$IN = [[:LineBreak = Inseparable:] - $IN_SUB];"
+ "$IS = [[:LineBreak = Infix_Numeric:] - $IS_SUB];"
+ "$JL = [:LineBreak = JL:];"
+ "$JV = [:LineBreak = JV:];"
+ "$JT = [:LineBreak = JT:];"
+ "$LF = [:LineBreak = Line_Feed:];"
+ "$NL = [:LineBreak = Next_Line:];"
+ "$NS = [[[[:LineBreak = Nonstarter:] - $CJ] $NS_ADD] - $NS_SUB];"
+ "$NU = [:LineBreak = Numeric:];"
+ "$OP = [:LineBreak = Open_Punctuation:];"
+ "$PO = [[:LineBreak = Postfix_Numeric:] - $PO_SUB];"
+ "$PR = [[:LineBreak = Prefix_Numeric:] - $PR_SUB];"
+ "$QU = [:LineBreak = Quotation:];"
+ "$SA = [:LineBreak = Complex_Context:];"
+ "$SG = [:LineBreak = Surrogate:];"
+ "$SP = [:LineBreak = Space:];"
+ "$SY = [:LineBreak = Break_Symbols:];"
+ "$WJ = [:LineBreak = Word_Joiner:];"
+ "$XX = [:LineBreak = Unknown:];"
+ "$ZW = [:LineBreak = ZWSpace:];"
+ "$dictionary = [:LineBreak = Complex_Context:];"
+ "$ALPlus = [$AL $AI $SA $SG $XX];"
+ "$ALcm = $ALPlus $CM*;"
+ "$BAcm = $BA $CM*;"
+ "$BBcm = $BB $CM*;"
+ "$B2cm = $B2 $CM*;"
+ "$CLcm = $CL $CM*;"
+ "$CPcm = $CP $CM*;"
+ "$EXcm = $EX $CM*;"
+ "$GLcm = $GL $CM*;"
+ "$HLcm = $HL $CM*;"
+ "$HYcm = $HY $CM*;"
+ "$H2cm = $H2 $CM*;"
+ "$H3cm = $H3 $CM*;"
+ "$IDcm = $ID $CM*;"
+ "$INcm = $IN $CM*;"
+ "$IScm = $IS $CM*;"
+ "$JLcm = $JL $CM*;"
+ "$JVcm = $JV $CM*;"
+ "$JTcm = $JT $CM*;"
+ "$NScm = $NS $CM*;"
+ "$NUcm = $NU $CM*;"
+ "$OPcm = $OP $CM*;"
+ "$POcm = $PO $CM*;"
+ "$PRcm = $PR $CM*;"
+ "$QUcm = $QU $CM*;"
+ "$SYcm = $SY $CM*;"
+ "$WJcm = $WJ $CM*;";
+
+static const char* uax14Forward =
+ "!!forward;"
+ "$CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM];"
+ "$CANT_CM = [$SP $BK $CR $LF $NL $ZW $CM];"
+ "$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];"
+ "$AL_FOLLOW_CM = [$CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $ALPlus];"
+ "$AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];"
+ "$LB4Breaks = [$BK $CR $LF $NL];"
+ "$LB4NonBreaks = [^$BK $CR $LF $NL];"
+ "$LB8Breaks = [$LB4Breaks $ZW];"
+ "$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];"
+ "$LB18NonBreaks = [$LB8NonBreaks - [$SP]];"
+ "$LB18Breaks = [$LB8Breaks $SP];"
+ "$LB20NonBreaks = [$LB18NonBreaks - $CB];"
+ "$ALPlus $CM+;"
+ "$BA $CM+;"
+ "$BB $CM+;"
+ "$B2 $CM+;"
+ "$CL $CM+;"
+ "$CP $CM+;"
+ "$EX $CM+;"
+ "$GL $CM+;"
+ "$HL $CM+;"
+ "$HY $CM+;"
+ "$H2 $CM+;"
+ "$H3 $CM+;"
+ "$ID $CM+;"
+ "$IN $CM+;"
+ "$IS $CM+;"
+ "$JL $CM+;"
+ "$JV $CM+;"
+ "$JT $CM+;"
+ "$NS $CM+;"
+ "$NU $CM+;"
+ "$OP $CM+;"
+ "$PO $CM+;"
+ "$PR $CM+;"
+ "$QU $CM+;"
+ "$SY $CM+;"
+ "$WJ $CM+;"
+ "$CR $LF {100};"
+ "$LB4NonBreaks? $LB4Breaks {100};"
+ "$CAN_CM $CM* $LB4Breaks {100};"
+ "$CM+ $LB4Breaks {100};"
+ "$LB4NonBreaks [$SP $ZW];"
+ "$CAN_CM $CM* [$SP $ZW];"
+ "$CM+ [$SP $ZW];"
+ "$CAN_CM $CM+;"
+ "$CM+;"
+ "$CAN_CM $CM* $WJcm;"
+ "$LB8NonBreaks $WJcm;"
+ "$CM+ $WJcm;"
+ "$WJcm $CANT_CM;"
+ "$WJcm $CAN_CM $CM*;"
+ "$GLcm $CAN_CM $CM*;"
+ "$GLcm $CANT_CM;"
+ "[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GLcm;"
+ "$CM+ GLcm;"
+ "$LB8NonBreaks $CL;"
+ "$CAN_CM $CM* $CL;"
+ "$CM+ $CL;"
+ "$LB8NonBreaks $CP;"
+ "$CAN_CM $CM* $CP;"
+ "$CM+ $CP;"
+ "$LB8NonBreaks $EX;"
+ "$CAN_CM $CM* $EX;"
+ "$CM+ $EX;"
+ "$LB8NonBreaks $IS;"
+ "$CAN_CM $CM* $IS;"
+ "$CM+ $IS;"
+ "$LB8NonBreaks $SY;"
+ "$CAN_CM $CM* $SY;"
+ "$CM+ $SY;"
+ "$OPcm $SP* $CAN_CM $CM*;"
+ "$OPcm $SP* $CANT_CM;"
+ "$OPcm $SP+ $CM+ $AL_FOLLOW?;"
+ "$QUcm $SP* $OPcm;"
+ "($CLcm | $CPcm) $SP* $NScm;"
+ "$B2cm $SP* $B2cm;"
+ "$LB18NonBreaks $CM* $QUcm;"
+ "$CM+ $QUcm;"
+ "$QUcm .?;"
+ "$QUcm $LB18NonBreaks $CM*;"
+ "$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm); "
+ "$BBcm [^$CB];"
+ "$BBcm $LB20NonBreaks $CM*;"
+ "$HLcm ($HYcm | $BAcm) [^$CB]?;"
+ "($ALcm | $HLcm) $INcm;"
+ "$CM+ $INcm;"
+ "$IDcm $INcm;"
+ "$INcm $INcm;"
+ "$NUcm $INcm;"
+ "$IDcm $POcm;"
+ "$ALcm $NUcm;"
+ "$HLcm $NUcm;"
+ "$CM+ $NUcm;"
+ "$NUcm $ALcm;"
+ "$NUcm $HLcm;"
+ "$PRcm $IDcm;"
+ "$PRcm ($ALcm | $HLcm);"
+ "$POcm ($ALcm | $HLcm);"
+ "($PRcm | $POcm)? ($OPcm | $HYcm)? $NUcm ($NUcm | $SYcm | $IScm)* ($CLcm | $CPcm)? ($PRcm | $POcm)?;"
+ "$JLcm ($JLcm | $JVcm | $H2cm | $H3cm);"
+ "($JVcm | $H2cm) ($JVcm | $JTcm);"
+ "($JTcm | $H3cm) $JTcm;"
+ "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm;"
+ "($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm;"
+ "$PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);"
+ "($ALcm | $HLcm) ($ALcm | $HLcm);"
+ "$CM+ ($ALcm | $HLcm);"
+ "$IScm ($ALcm | $HLcm);"
+ "($ALcm | $HLcm | $NUcm) $OPcm;"
+ "$CM+ $OPcm;"
+ "$CPcm ($ALcm | $HLcm | $NUcm);";
+
+static const char* uax14Reverse =
+ "!!reverse;"
+ "$CM+ $ALPlus;"
+ "$CM+ $BA;"
+ "$CM+ $BB;"
+ "$CM+ $B2;"
+ "$CM+ $CL;"
+ "$CM+ $CP;"
+ "$CM+ $EX;"
+ "$CM+ $GL;"
+ "$CM+ $HL;"
+ "$CM+ $HY;"
+ "$CM+ $H2;"
+ "$CM+ $H3;"
+ "$CM+ $ID;"
+ "$CM+ $IN;"
+ "$CM+ $IS;"
+ "$CM+ $JL;"
+ "$CM+ $JV;"
+ "$CM+ $JT;"
+ "$CM+ $NS;"
+ "$CM+ $NU;"
+ "$CM+ $OP;"
+ "$CM+ $PO;"
+ "$CM+ $PR;"
+ "$CM+ $QU;"
+ "$CM+ $SY;"
+ "$CM+ $WJ;"
+ "$CM+;"
+ "$AL_FOLLOW $CM+ / ([$BK $CR $LF $NL $ZW {eof}] | $SP+ $CM+ $SP | $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));"
+ "[$PR] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}];"
+ "$LB4Breaks [$LB4NonBreaks-$CM];"
+ "$LB4Breaks $CM+ $CAN_CM;"
+ "$LF $CR;"
+ "[$SP $ZW] [$LB4NonBreaks-$CM];"
+ "[$SP $ZW] $CM+ $CAN_CM;"
+ "$CM+ $CAN_CM;"
+ "$CM* $WJ $CM* $CAN_CM;"
+ "$CM* $WJ [$LB8NonBreaks-$CM];"
+ "$CANT_CM $CM* $WJ;"
+ "$CM* $CAN_CM $CM* $WJ;"
+ "$CM* $GL $CM* [$LB8NonBreaks-[$CM $SP $BA $HY]];"
+ "$CANT_CM $CM* $GL;"
+ "$CM* $CAN_CM $CM* $GL;"
+ "$CL $CM+ $CAN_CM;"
+ "$CP $CM+ $CAN_CM;"
+ "$EX $CM+ $CAN_CM;"
+ "$IS $CM+ $CAN_CM;"
+ "$SY $CM+ $CAN_CM;"
+ "$CL [$LB8NonBreaks-$CM];"
+ "$CP [$LB8NonBreaks-$CM];"
+ "$EX [$LB8NonBreaks-$CM];"
+ "$IS [$LB8NonBreaks-$CM];"
+ "$SY [$LB8NonBreaks-$CM];"
+ "[$CL $CP $EX $IS $SY] $CM+ $SP+ $CM* $OP; "
+ "$CM* $CAN_CM $SP* $CM* $OP;"
+ "$CANT_CM $SP* $CM* $OP;"
+ "$AL_FOLLOW? $CM+ $SP $SP* $CM* $OP;"
+ "$AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;"
+ "$CM* $AL_FOLLOW_CM $CM+ $SP+ $CM* $OP;"
+ "$SY $CM $SP+ $OP;"
+ "$CM* $OP $SP* $CM* $QU;"
+ "$CM* $NS $SP* $CM* ($CL | $CP);"
+ "$CM* $B2 $SP* $CM* $B2;"
+ "$CM* $QU $CM* $CAN_CM;"
+ "$CM* $QU $LB18NonBreaks;"
+ "$CM* $CAN_CM $CM* $QU;"
+ "$CANT_CM $CM* $QU;"
+ "$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];"
+ "$CM* [$LB20NonBreaks-$CM] $CM* $BB;"
+ "[^$CB] $CM* $BB;"
+ "[^$CB] $CM* ($HY | $BA) $CM* $HL;"
+ "$CM* $IN $CM* ($ALPlus | $HL);"
+ "$CM* $IN $CM* $ID;"
+ "$CM* $IN $CM* $IN;"
+ "$CM* $IN $CM* $NU;"
+ "$CM* $PO $CM* $ID;"
+ "$CM* $NU $CM* ($ALPlus | $HL);"
+ "$CM* ($ALPlus | $HL) $CM* $NU;"
+ "$CM* $ID $CM* $PR;"
+ "$CM* ($ALPlus | $HL) $CM* $PR;"
+ "$CM* ($ALPlus | $HL) $CM* $PO;"
+ "($CM* ($PR | $PO))? ($CM* ($CL | $CP))? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP | $HY))? ($CM* ($PR | $PO))?;"
+ "$CM* ($H3 | $H2 | $JV | $JL) $CM* $JL;"
+ "$CM* ($JT | $JV) $CM* ($H2 | $JV);"
+ "$CM* $JT $CM* ($H3 | $JT);"
+ "$CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL);"
+ "$CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL);"
+ "$CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;"
+ "$CM* ($ALPlus | $HL) $CM* ($ALPlus | $HL);"
+ "$CM* ($ALPlus | $HL) $CM* $IS;"
+ "$CM* $OP $CM* ($ALPlus | $HL | $NU);"
+ "$CM* ($ALPlus | $HL | $NU) $CM* $CP;";
+
+static const char* uax14SafeForward =
+ "!!safe_forward;"
+ "[$CM $OP $QU $CL $CP $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $CP $B2 $PR $HY $BA $dictionary];"
+ "$dictionary $dictionary;";
+
+static const char* uax14SafeReverse =
+ "!!safe_reverse;"
+ "$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];"
+ "$CM+ $SP / .;"
+ "$SP+ $CM* $OP;"
+ "$SP+ $CM* $QU;"
+ "$SP+ $CM* ($CL | $CP);"
+ "$SP+ $CM* $B2;"
+ "$CM* ($HY | $BA) $CM* $HL;"
+ "($CM* ($IS | $SY))+ $CM* $NU;"
+ "($CL | $CP) $CM* ($NU | $IS | $SY);"
+ "$dictionary $dictionary;";
+
+static String mapLineIteratorModeToRules(LineBreakIteratorMode mode, bool isCJK)
+{
+ StringBuilder rulesBuilder;
+ rulesBuilder.append(uax14Prologue);
+ rulesBuilder.append(uax14AssignmentsBefore);
+ switch (mode) {
+ case LineBreakIteratorModeUAX14:
+ rulesBuilder.append(isCJK ? uax14AssignmentsCustomDefaultCJK : uax14AssignmentsCustomDefaultNonCJK);
+ break;
+ case LineBreakIteratorModeUAX14Loose:
+ rulesBuilder.append(isCJK ? uax14AssignmentsCustomLooseCJK : uax14AssignmentsCustomLooseNonCJK);
+ break;
+ case LineBreakIteratorModeUAX14Normal:
+ rulesBuilder.append(isCJK ? uax14AssignmentsCustomNormalCJK : uax14AssignmentsCustomNormalNonCJK);
+ break;
+ case LineBreakIteratorModeUAX14Strict:
+ rulesBuilder.append(isCJK ? uax14AssignmentsCustomStrictCJK : uax14AssignmentsCustomStrictNonCJK);
+ break;
+ }
+ rulesBuilder.append(uax14AssignmentsAfter);
+ rulesBuilder.append(uax14Forward);
+ rulesBuilder.append(uax14Reverse);
+ rulesBuilder.append(uax14SafeForward);
+ rulesBuilder.append(uax14SafeReverse);
+ return rulesBuilder.toString();
+}
+
+// Recognize BCP47 compliant primary language values of 'zh', 'ja', 'ko'
+// (in any combination of case), optionally followed by subtags. Don't
+// recognize 3-letter variants 'chi'/'zho', 'jpn', or 'kor' since BCP47
+// requires use of shortest language tag.
+bool isCJKLocale(const AtomicString& locale)
+{
+ size_t length = locale.length();
+ if (length < 2)
+ return false;
+ auto c1 = locale[0];
+ auto c2 = locale[1];
+ auto c3 = length == 2 ? 0 : locale[2];
+ if (!c3 || c3 == '-' || c3 == '_' || c3 == '@') {
+ if (c1 == 'z' || c1 == 'Z')
+ return c2 == 'h' || c2 == 'H';
+ if (c1 == 'j' || c1 == 'J')
+ return c2 == 'a' || c2 == 'A';
+ if (c1 == 'k' || c1 == 'K')
+ return c2 == 'o' || c2 == 'O';
+ }
+ return false;
+}
+
+TextBreakIterator* openLineBreakIterator(const AtomicString& locale, LineBreakIteratorMode mode, bool isCJK)
+{
+ UBreakIterator* ubrkIter;
+ UErrorCode openStatus = U_ZERO_ERROR;
+ bool localeIsEmpty = locale.isEmpty();
+ if (mode == LineBreakIteratorModeUAX14)
+ ubrkIter = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus);
+ else {
+ UParseError parseStatus;
+ auto rules = mapLineIteratorModeToRules(mode, isCJK);
+ ubrkIter = ubrk_openRules(StringView(rules).upconvertedCharacters(), rules.length(), 0, 0, &parseStatus, &openStatus);
+ }
+ // locale comes from a web page and it can be invalid, leading ICU
+ // to fail, in which case we fall back to the default locale.
+ if (!localeIsEmpty && U_FAILURE(openStatus)) {
+ openStatus = U_ZERO_ERROR;
+ ubrkIter = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus);
+ }
+
+ if (U_FAILURE(openStatus)) {
+ LOG_ERROR("ubrk_open failed with status %d", openStatus);
+ return nullptr;
+ }
+
+ return reinterpret_cast<TextBreakIterator*>(ubrkIter);
+}
+
+void closeLineBreakIterator(TextBreakIterator*& iterator)
+{
+ UBreakIterator* ubrkIter = reinterpret_cast<UBreakIterator*>(iterator);
+ ASSERT(ubrkIter);
+ ubrk_close(ubrkIter);
+ iterator = nullptr;
+}
+
static TextBreakIterator* nonSharedCharacterBreakIterator;
static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
Modified: trunk/Source/WebCore/platform/text/TextBreakIterator.h (176472 => 176473)
--- trunk/Source/WebCore/platform/text/TextBreakIterator.h 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/platform/text/TextBreakIterator.h 2014-11-21 22:33:17 UTC (rev 176473)
@@ -31,6 +31,13 @@
// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
+enum LineBreakIteratorMode {
+ LineBreakIteratorModeUAX14,
+ LineBreakIteratorModeUAX14Loose,
+ LineBreakIteratorModeUAX14Normal,
+ LineBreakIteratorModeUAX14Strict,
+};
+
// This is similar to character break iterator in most cases, but is subject to
// platform UI conventions. One notable example where this can be different
// from character break iterator is Thai prepend characters, see bug 24342.
@@ -40,8 +47,10 @@
TextBreakIterator* wordBreakIterator(StringView);
TextBreakIterator* sentenceBreakIterator(StringView);
-WEBCORE_EXPORT TextBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength);
+WEBCORE_EXPORT TextBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode, bool isCJK);
WEBCORE_EXPORT void releaseLineBreakIterator(TextBreakIterator*);
+TextBreakIterator* openLineBreakIterator(const AtomicString& locale, LineBreakIteratorMode, bool isCJK);
+void closeLineBreakIterator(TextBreakIterator*&);
int textBreakFirst(TextBreakIterator*);
int textBreakLast(TextBreakIterator*);
@@ -55,24 +64,30 @@
const int TextBreakDone = -1;
+bool isCJKLocale(const AtomicString&);
+
class LazyLineBreakIterator {
public:
LazyLineBreakIterator()
- : m_iterator(0)
- , m_cachedPriorContext(0)
+ : m_iterator(nullptr)
+ , m_cachedPriorContext(nullptr)
+ , m_mode(LineBreakIteratorModeUAX14)
, m_cachedPriorContextLength(0)
+ , m_isCJK(false)
{
resetPriorContext();
}
- LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString())
+ LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString(), LineBreakIteratorMode mode = LineBreakIteratorModeUAX14)
: m_string(string)
, m_locale(locale)
- , m_iterator(0)
- , m_cachedPriorContext(0)
+ , m_iterator(nullptr)
+ , m_cachedPriorContext(nullptr)
+ , m_mode(mode)
, m_cachedPriorContextLength(0)
{
resetPriorContext();
+ m_isCJK = isCJKLocale(locale);
}
~LazyLineBreakIterator()
@@ -82,35 +97,41 @@
}
String string() const { return m_string; }
+ bool isLooseCJKMode() const { return m_isCJK && m_mode == LineBreakIteratorModeUAX14Loose; }
UChar lastCharacter() const
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
return m_priorContext[1];
}
+
UChar secondToLastCharacter() const
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
return m_priorContext[0];
}
+
void setPriorContext(UChar last, UChar secondToLast)
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
m_priorContext[0] = secondToLast;
m_priorContext[1] = last;
}
+
void updatePriorContext(UChar last)
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
m_priorContext[0] = m_priorContext[1];
m_priorContext[1] = last;
}
+
void resetPriorContext()
{
COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
m_priorContext[0] = 0;
m_priorContext[1] = 0;
}
+
unsigned priorContextLength() const
{
unsigned priorContextLength = 0;
@@ -122,6 +143,7 @@
}
return priorContextLength;
}
+
// Obtain text break iterator, possibly previously cached, where this iterator is (or has been)
// initialized to use the previously stored string as the primary breaking context and using
// previously stored prior context if non-empty.
@@ -130,23 +152,26 @@
ASSERT(priorContextLength <= priorContextCapacity);
const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0;
if (!m_iterator) {
- m_iterator = acquireLineBreakIterator(m_string, m_locale, priorContext, priorContextLength);
+ m_iterator = acquireLineBreakIterator(m_string, m_locale, priorContext, priorContextLength, m_mode, m_isCJK);
m_cachedPriorContext = priorContext;
m_cachedPriorContextLength = priorContextLength;
} else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) {
- this->resetStringAndReleaseIterator(m_string, m_locale);
+ resetStringAndReleaseIterator(m_string, m_locale, m_mode);
return this->get(priorContextLength);
}
return m_iterator;
}
- void resetStringAndReleaseIterator(String string, const AtomicString& locale)
+
+ void resetStringAndReleaseIterator(String string, const AtomicString& locale, LineBreakIteratorMode mode)
{
if (m_iterator)
releaseLineBreakIterator(m_iterator);
m_string = string;
m_locale = locale;
- m_iterator = 0;
- m_cachedPriorContext = 0;
+ m_iterator = nullptr;
+ m_cachedPriorContext = nullptr;
+ m_mode = mode;
+ m_isCJK = isCJKLocale(locale);
m_cachedPriorContextLength = 0;
}
@@ -155,9 +180,11 @@
String m_string;
AtomicString m_locale;
TextBreakIterator* m_iterator;
- UChar m_priorContext[priorContextCapacity];
const UChar* m_cachedPriorContext;
+ LineBreakIteratorMode m_mode;
unsigned m_cachedPriorContextLength;
+ UChar m_priorContext[priorContextCapacity];
+ bool m_isCJK;
};
// Iterates over "extended grapheme clusters", as defined in UAX #29.
Modified: trunk/Source/WebCore/rendering/RenderText.cpp (176472 => 176473)
--- trunk/Source/WebCore/rendering/RenderText.cpp 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/rendering/RenderText.cpp 2014-11-21 22:33:17 UTC (rev 176473)
@@ -597,6 +597,22 @@
return m_maxWidth;
}
+LineBreakIteratorMode mapLineBreakToIteratorMode(LineBreak lineBreak)
+{
+ switch (lineBreak) {
+ case LineBreakAuto:
+ case LineBreakAfterWhiteSpace:
+ return LineBreakIteratorModeUAX14;
+ case LineBreakLoose:
+ return LineBreakIteratorModeUAX14Loose;
+ case LineBreakNormal:
+ return LineBreakIteratorModeUAX14Normal;
+ case LineBreakStrict:
+ return LineBreakIteratorModeUAX14Strict;
+ }
+ return LineBreakIteratorModeUAX14;
+}
+
void RenderText::computePreferredLogicalWidths(float leadWidth)
{
HashSet<const SimpleFontData*> fallbackFonts;
@@ -673,7 +689,7 @@
const Font& font = style.font(); // FIXME: This ignores first-line.
float wordSpacing = font.wordSpacing();
int len = textLength();
- LazyLineBreakIterator breakIterator(m_text, style.locale());
+ LazyLineBreakIterator breakIterator(m_text, style.locale(), mapLineBreakToIteratorMode(style.lineBreak()));
bool needsWordSpacing = false;
bool ignoringSpaces = false;
bool isSpace = false;
@@ -708,6 +724,7 @@
bool breakNBSP = style.autoWrap() && style.nbspMode() == SPACE;
bool breakAll = (style.wordBreak() == BreakAllWordBreak || style.wordBreak() == BreakWordBreak) && style.autoWrap();
+ bool isLooseCJKMode = breakIterator.isLooseCJKMode();
for (int i = 0; i < len; i++) {
UChar c = uncheckedCharacterAt(i);
@@ -755,7 +772,7 @@
continue;
}
- bool hasBreak = breakAll || isBreakable(breakIterator, i, nextBreakable, breakNBSP);
+ bool hasBreak = breakAll || isBreakable(breakIterator, i, nextBreakable, breakNBSP, isLooseCJKMode);
bool betweenWords = true;
int j = i;
while (c != '\n' && !isSpaceAccordingToStyle(c, style) && c != '\t' && (c != softHyphen || style.hyphens() == HyphensNone)) {
@@ -763,7 +780,7 @@
if (j == len)
break;
c = uncheckedCharacterAt(j);
- if (isBreakable(breakIterator, j, nextBreakable, breakNBSP) && characterAt(j - 1) != softHyphen)
+ if (isBreakable(breakIterator, j, nextBreakable, breakNBSP, isLooseCJKMode) && characterAt(j - 1) != softHyphen)
break;
if (breakAll) {
betweenWords = false;
Modified: trunk/Source/WebCore/rendering/RenderText.h (176472 => 176473)
--- trunk/Source/WebCore/rendering/RenderText.h 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/rendering/RenderText.h 2014-11-21 22:33:17 UTC (rev 176473)
@@ -27,6 +27,7 @@
#include "RenderTextLineBoxes.h"
#include "SimpleLineLayout.h"
#include "Text.h"
+#include "TextBreakIterator.h"
#include <wtf/Forward.h>
namespace WebCore {
@@ -274,6 +275,7 @@
void applyTextTransform(const RenderStyle&, String&, UChar);
void makeCapitalized(String*, UChar previous);
+LineBreakIteratorMode mapLineBreakToIteratorMode(LineBreak);
inline RenderText* Text::renderer() const
{
Modified: trunk/Source/WebCore/rendering/SimpleLineLayout.cpp (176472 => 176473)
--- trunk/Source/WebCore/rendering/SimpleLineLayout.cpp 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/rendering/SimpleLineLayout.cpp 2014-11-21 22:33:17 UTC (rev 176473)
@@ -161,6 +161,8 @@
return false;
if (style.borderFit() == BorderFitLines)
return false;
+ if (style.lineBreak() != LineBreakAuto)
+ return false;
const RenderText& textRenderer = downcast<RenderText>(*flow.firstChild());
if (flow.containsFloats()) {
// We can't use the code path if any lines would need to be shifted below floats. This is because we don't keep per-line y coordinates.
Modified: trunk/Source/WebCore/rendering/SimpleLineLayoutFlowContents.cpp (176472 => 176473)
--- trunk/Source/WebCore/rendering/SimpleLineLayoutFlowContents.cpp 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/rendering/SimpleLineLayoutFlowContents.cpp 2014-11-21 22:33:17 UTC (rev 176473)
@@ -53,7 +53,7 @@
unsigned FlowContents::findNextBreakablePosition(unsigned position) const
{
String string = m_lineBreakIterator.string();
- unsigned breakablePosition = nextBreakablePosition<LChar, false>(m_lineBreakIterator, string.characters8(), string.length(), position);
+ unsigned breakablePosition = nextBreakablePositionNonLoosely<LChar, NBSPBehavior::IgnoreNBSP>(m_lineBreakIterator, string.characters8(), string.length(), position);
if (appendNextRendererContentIfNeeded(breakablePosition))
return findNextBreakablePosition(position);
ASSERT(breakablePosition >= position);
@@ -157,7 +157,7 @@
return false;
++m_lastRendererIndex;
- m_lineBreakIterator.resetStringAndReleaseIterator(string + String(nextRenderer->text()), m_flow.style().locale());
+ m_lineBreakIterator.resetStringAndReleaseIterator(string + String(nextRenderer->text()), m_flow.style().locale(), LineBreakIteratorModeUAX14);
return true;
}
Modified: trunk/Source/WebCore/rendering/break_lines.h (176472 => 176473)
--- trunk/Source/WebCore/rendering/break_lines.h 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/rendering/break_lines.h 2014-11-21 22:33:17 UTC (rev 176473)
@@ -35,10 +35,12 @@
extern const unsigned char asciiLineBreakTable[][asciiLineBreakTableColumnCount];
-int nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator&, int pos);
-int nextBreakablePosition(LazyLineBreakIterator&, int pos);
+enum class NBSPBehavior {
+ IgnoreNBSP,
+ TreatNBSPAsBreak,
+};
-template<bool treatNoBreakSpaceAsBreak>
+template<NBSPBehavior nbspBehavior>
static inline bool isBreakableSpace(UChar ch)
{
switch (ch) {
@@ -47,7 +49,7 @@
case '\t':
return true;
case noBreakSpace:
- return treatNoBreakSpaceAsBreak;
+ return nbspBehavior == NBSPBehavior::TreatNBSPAsBreak;
default:
return false;
}
@@ -71,16 +73,17 @@
return false;
}
-template<bool treatNoBreakSpaceAsBreak>
+template<NBSPBehavior nbspBehavior>
inline bool needsLineBreakIterator(UChar ch)
{
- if (treatNoBreakSpaceAsBreak)
+ if (nbspBehavior == NBSPBehavior::TreatNBSPAsBreak)
return ch > asciiLineBreakTableLastChar;
return ch > asciiLineBreakTableLastChar && ch != noBreakSpace;
}
-template<typename CharacterType, bool treatNoBreakSpaceAsBreak>
-inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos)
+// When in non-loose mode, we can use the ASCII shortcut table.
+template<typename CharacterType, NBSPBehavior nbspBehavior>
+inline int nextBreakablePositionNonLoosely(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos)
{
int len = static_cast<int>(length);
int nextBreak = -1;
@@ -91,10 +94,12 @@
for (int i = pos; i < len; i++) {
CharacterType ch = str[i];
- if (isBreakableSpace<treatNoBreakSpaceAsBreak>(ch) || shouldBreakAfter(lastLastCh, lastCh, ch))
+ // Non-loose mode, so use ASCII shortcut (shouldBreakAfter) if not breakable space.
+ if (isBreakableSpace<nbspBehavior>(ch) || shouldBreakAfter(lastLastCh, lastCh, ch))
return i;
- if (needsLineBreakIterator<treatNoBreakSpaceAsBreak>(ch) || needsLineBreakIterator<treatNoBreakSpaceAsBreak>(lastCh)) {
+ // Non-loose mode, so conditionally use break iterator.
+ if (needsLineBreakIterator<nbspBehavior>(ch) || needsLineBreakIterator<nbspBehavior>(lastCh)) {
if (nextBreak < i) {
// Don't break if positioned at start of primary context and there is no prior context.
if (i || priorContextLength) {
@@ -106,7 +111,7 @@
}
}
}
- if (i == nextBreak && !isBreakableSpace<treatNoBreakSpaceAsBreak>(lastCh))
+ if (i == nextBreak && !isBreakableSpace<nbspBehavior>(lastCh))
return i;
}
@@ -117,26 +122,89 @@
return len;
}
+// When in loose mode, we can't use the ASCII shortcut table since loose mode allows "$100" to break after '$' in content marked as CJK.
+// N.B. It should be possible to combine the following with the non-loose version above by adding a LooseBehavior template parameter;
+// however, when doing this, a 10% performance regression appeared on chromium-win (https://bugs.webkit.org/show_bug.cgi?id=89235#c112).
+template<typename CharacterType, NBSPBehavior nbspBehavior>
+static inline int nextBreakablePositionLoosely(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos)
+{
+ int len = static_cast<int>(length);
+ int nextBreak = -1;
+
+ CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
+ unsigned priorContextLength = lazyBreakIterator.priorContextLength();
+ for (int i = pos; i < len; i++) {
+ CharacterType ch = str[i];
+
+ // Always loose mode, so don't use ASCII shortcut (shouldBreakAfter).
+ if (isBreakableSpace<nbspBehavior>(ch))
+ return i;
+
+ // Always use line break iterator in loose mode.
+ if (nextBreak < i) {
+ // Don't break if positioned at start of primary context and there is no prior context.
+ if (i || priorContextLength) {
+ TextBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
+ if (breakIterator) {
+ nextBreak = textBreakFollowing(breakIterator, i - 1 + priorContextLength);
+ if (nextBreak >= 0)
+ nextBreak -= priorContextLength;
+ }
+ }
+ }
+ if (i == nextBreak && !isBreakableSpace<nbspBehavior>(lastCh))
+ return i;
+
+ lastCh = ch;
+ }
+
+ return len;
+}
+
+inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, int pos)
+{
+ String string = lazyBreakIterator.string();
+ if (string.is8Bit())
+ return nextBreakablePositionNonLoosely<LChar, NBSPBehavior::TreatNBSPAsBreak>(lazyBreakIterator, string.characters8(), string.length(), pos);
+ return nextBreakablePositionNonLoosely<UChar, NBSPBehavior::TreatNBSPAsBreak>(lazyBreakIterator, string.characters16(), string.length(), pos);
+}
+
inline int nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, int pos)
{
String string = lazyBreakIterator.string();
if (string.is8Bit())
- return nextBreakablePosition<LChar, false>(lazyBreakIterator, string.characters8(), string.length(), pos);
- return nextBreakablePosition<UChar, false>(lazyBreakIterator, string.characters16(), string.length(), pos);
+ return nextBreakablePositionNonLoosely<LChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters8(), string.length(), pos);
+ return nextBreakablePositionNonLoosely<UChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters16(), string.length(), pos);
}
-inline int nextBreakablePosition(LazyLineBreakIterator& lazyBreakIterator, int pos)
+inline int nextBreakablePositionLoose(LazyLineBreakIterator& lazyBreakIterator, int pos)
{
String string = lazyBreakIterator.string();
if (string.is8Bit())
- return nextBreakablePosition<LChar, true>(lazyBreakIterator, string.characters8(), string.length(), pos);
- return nextBreakablePosition<UChar, true>(lazyBreakIterator, string.characters16(), string.length(), pos);
+ return nextBreakablePositionLoosely<LChar, NBSPBehavior::TreatNBSPAsBreak>(lazyBreakIterator, string.characters8(), string.length(), pos);
+ return nextBreakablePositionLoosely<UChar, NBSPBehavior::TreatNBSPAsBreak>(lazyBreakIterator, string.characters16(), string.length(), pos);
}
-inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, int pos, int& nextBreakable, bool breakNBSP)
+inline int nextBreakablePositionIgnoringNBSPLoose(LazyLineBreakIterator& lazyBreakIterator, int pos)
{
- if (pos > nextBreakable) {
+ String string = lazyBreakIterator.string();
+ if (string.is8Bit())
+ return nextBreakablePositionLoosely<LChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters8(), string.length(), pos);
+ return nextBreakablePositionLoosely<UChar, NBSPBehavior::IgnoreNBSP>(lazyBreakIterator, string.characters16(), string.length(), pos);
+}
+
+inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, int pos, int& nextBreakable, bool breakNBSP, bool isLooseMode)
+{
+ if (pos <= nextBreakable)
+ return pos == nextBreakable;
+
+ if (isLooseMode) {
if (breakNBSP)
+ nextBreakable = nextBreakablePositionLoose(lazyBreakIterator, pos);
+ else
+ nextBreakable = nextBreakablePositionIgnoringNBSPLoose(lazyBreakIterator, pos);
+ } else {
+ if (breakNBSP)
nextBreakable = nextBreakablePosition(lazyBreakIterator, pos);
else
nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, pos);
Modified: trunk/Source/WebCore/rendering/line/BreakingContextInlineHeaders.h (176472 => 176473)
--- trunk/Source/WebCore/rendering/line/BreakingContextInlineHeaders.h 2014-11-21 21:34:30 UTC (rev 176472)
+++ trunk/Source/WebCore/rendering/line/BreakingContextInlineHeaders.h 2014-11-21 22:33:17 UTC (rev 176473)
@@ -624,6 +624,7 @@
bool midWordBreak = false;
bool breakAll = m_currentStyle->wordBreak() == BreakAllWordBreak && m_autoWrap;
float hyphenWidth = 0;
+ bool isLooseCJKMode = false;
if (isSVGText) {
breakWords = false;
@@ -635,7 +636,8 @@
m_renderTextInfo.m_text = &renderText;
m_renderTextInfo.m_font = &font;
m_renderTextInfo.m_layout = font.createLayout(&renderText, m_width.currentWidth(), m_collapseWhiteSpace);
- m_renderTextInfo.m_lineBreakIterator.resetStringAndReleaseIterator(renderText.text(), style.locale());
+ m_renderTextInfo.m_lineBreakIterator.resetStringAndReleaseIterator(renderText.text(), style.locale(), mapLineBreakToIteratorMode(m_blockStyle.lineBreak()));
+ isLooseCJKMode = m_renderTextInfo.m_lineBreakIterator.isLooseCJKMode();
} else if (m_renderTextInfo.m_layout && m_renderTextInfo.m_font != &font) {
m_renderTextInfo.m_font = &font;
m_renderTextInfo.m_layout = font.createLayout(&renderText, m_width.currentWidth(), m_collapseWhiteSpace);
@@ -676,7 +678,7 @@
}
int nextBreakablePosition = m_current.nextBreakablePosition();
- bool betweenWords = c == '\n' || (m_currWS != PRE && !m_atStart && isBreakable(m_renderTextInfo.m_lineBreakIterator, m_current.offset(), nextBreakablePosition, breakNBSP)
+ bool betweenWords = c == '\n' || (m_currWS != PRE && !m_atStart && isBreakable(m_renderTextInfo.m_lineBreakIterator, m_current.offset(), nextBreakablePosition, breakNBSP, isLooseCJKMode)
&& (style.hyphens() != HyphensNone || (m_current.previousInSameNode() != softHyphen)));
m_current.setNextBreakablePosition(nextBreakablePosition);