Diff
Modified: trunk/Source/WTF/ChangeLog (213019 => 213020)
--- trunk/Source/WTF/ChangeLog 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WTF/ChangeLog 2017-02-27 00:36:02 UTC (rev 213020)
@@ -1,3 +1,58 @@
+2017-02-26 Myles C. Maxfield <mmaxfi...@apple.com>
+
+ Stop compiling our own cursorMovementIterator()
+ https://bugs.webkit.org/show_bug.cgi?id=168211
+
+ Reviewed by David Hyatt.
+
+ This patch creates a unified Text Breaking API, which can be backed by either ICU
+ or CoreFoundation (for ports which can use it). Rather than using inheritance and
+ virtual functions to implement this, because there are only two subclasses, the
+ simpler option of just using a Variant is used instead. There is also a cache which
+ allows you to reuse iterators without reconstructing them. This cache is better
+ than the previous method of having a single static iterator because the cache
+ lets you use two iterators simultaneously.
+
+ In the future, I will hook up all iterators to use this shared class. However, for
+ this patch, I've only hooked up the caret position iterator (backed by CoreFoundation
+ on Cocoa ports and UBRK_CHARACTER on other ports).
+
+ * WTF.xcodeproj/project.pbxproj:
+ * wtf/spi/cf/CFStringSPI.h: Added.
+ * wtf/text/TextBreakIterator.cpp:
+ (WTF::initializeIteratorWithRules): Deleted.
+ (WTF::cursorMovementIterator): Deleted.
+ * wtf/text/TextBreakIterator.h:
+ (WTF::TextBreakIterator::preceding):
+ (WTF::TextBreakIterator::following):
+ (WTF::TextBreakIterator::isBoundary):
+ (WTF::TextBreakIterator::setText):
+ (WTF::TextBreakIterator::mode):
+ (WTF::TextBreakIterator::locale):
+ (WTF::TextBreakIteratorCache::singleton):
+ (WTF::TextBreakIteratorCache::take):
+ (WTF::TextBreakIteratorCache::put):
+ (WTF::TextBreakIteratorCache::TextBreakIteratorCache):
+ * wtf/text/cf/TextBreakIteratorCF.h: Added.
+ (WTF::TextBreakIteratorCF::TextBreakIteratorCF):
+ (WTF::TextBreakIteratorCF::setText):
+ (WTF::TextBreakIteratorCF::preceding):
+ (WTF::TextBreakIteratorCF::following):
+ (WTF::TextBreakIteratorCF::isBoundary):
+ * wtf/text/icu/TextBreakIteratorICU.h: Added.
+ (WTF::TextBreakIteratorICU::set8BitText):
+ (WTF::TextBreakIteratorICU::TextBreakIteratorICU):
+ (WTF::TextBreakIteratorICU::operator=):
+ (WTF::TextBreakIteratorICU::~TextBreakIteratorICU):
+ (WTF::TextBreakIteratorICU::setText):
+ (WTF::TextBreakIteratorICU::preceding):
+ (WTF::TextBreakIteratorICU::following):
+ (WTF::TextBreakIteratorICU::isBoundary):
+ * wtf/text/icu/UTextProviderLatin1.h:
+ * wtf/text/mac/TextBreakIteratorInternalICUMac.mm:
+ (WTF::mapModeToBackingIterator):
+ (WTF::TextBreakIterator::TextBreakIterator):
+
2017-02-24 Joseph Pecoraro <pecor...@apple.com>
[Resource Timing] Gather timing information with reliable responseEnd time
Modified: trunk/Source/WTF/WTF.xcodeproj/project.pbxproj (213019 => 213020)
--- trunk/Source/WTF/WTF.xcodeproj/project.pbxproj 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WTF/WTF.xcodeproj/project.pbxproj 2017-02-27 00:36:02 UTC (rev 213020)
@@ -117,6 +117,9 @@
1C181C931D307AB800F5FA16 /* UTextProviderUTF16.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C181C8D1D307AB800F5FA16 /* UTextProviderUTF16.cpp */; };
1C181C941D307AB800F5FA16 /* UTextProviderUTF16.h in Headers */ = {isa = PBXBuildFile; fileRef = 1C181C8E1D307AB800F5FA16 /* UTextProviderUTF16.h */; };
1C181C961D30800A00F5FA16 /* TextBreakIteratorInternalICUMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1C181C951D30800A00F5FA16 /* TextBreakIteratorInternalICUMac.mm */; };
+ 1CCDB14B1E566626006C73C0 /* TextBreakIteratorCF.h in Headers */ = {isa = PBXBuildFile; fileRef = 1CCDB1491E566626006C73C0 /* TextBreakIteratorCF.h */; };
+ 1CCDB14F1E566898006C73C0 /* TextBreakIteratorICU.h in Headers */ = {isa = PBXBuildFile; fileRef = 1CCDB14D1E566898006C73C0 /* TextBreakIteratorICU.h */; };
+ 1CCDB1531E566BC5006C73C0 /* CFStringSPI.h in Headers */ = {isa = PBXBuildFile; fileRef = 1CCDB1511E566BC5006C73C0 /* CFStringSPI.h */; };
1FA47C8A152502DA00568D1B /* WebCoreThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1FA47C88152502DA00568D1B /* WebCoreThread.cpp */; };
1FA47C8B152502DA00568D1B /* WebCoreThread.h in Headers */ = {isa = PBXBuildFile; fileRef = 1FA47C89152502DA00568D1B /* WebCoreThread.h */; };
26147B0A15DDCCDC00DDB907 /* IntegerToStringConversion.h in Headers */ = {isa = PBXBuildFile; fileRef = 26147B0815DDCCDC00DDB907 /* IntegerToStringConversion.h */; };
@@ -493,6 +496,9 @@
1C181C8D1D307AB800F5FA16 /* UTextProviderUTF16.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UTextProviderUTF16.cpp; sourceTree = "<group>"; };
1C181C8E1D307AB800F5FA16 /* UTextProviderUTF16.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UTextProviderUTF16.h; sourceTree = "<group>"; };
1C181C951D30800A00F5FA16 /* TextBreakIteratorInternalICUMac.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = TextBreakIteratorInternalICUMac.mm; path = mac/TextBreakIteratorInternalICUMac.mm; sourceTree = "<group>"; };
+ 1CCDB1491E566626006C73C0 /* TextBreakIteratorCF.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TextBreakIteratorCF.h; path = cf/TextBreakIteratorCF.h; sourceTree = "<group>"; };
+ 1CCDB14D1E566898006C73C0 /* TextBreakIteratorICU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TextBreakIteratorICU.h; sourceTree = "<group>"; };
+ 1CCDB1511E566BC5006C73C0 /* CFStringSPI.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CFStringSPI.h; path = cf/CFStringSPI.h; sourceTree = "<group>"; };
1FA47C88152502DA00568D1B /* WebCoreThread.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WebCoreThread.cpp; sourceTree = "<group>"; };
1FA47C89152502DA00568D1B /* WebCoreThread.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WebCoreThread.h; sourceTree = "<group>"; };
26147B0815DDCCDC00DDB907 /* IntegerToStringConversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = IntegerToStringConversion.h; sourceTree = "<group>"; };
@@ -797,6 +803,7 @@
1C181C8C1D307AB800F5FA16 /* UTextProviderLatin1.h */,
1C181C8D1D307AB800F5FA16 /* UTextProviderUTF16.cpp */,
1C181C8E1D307AB800F5FA16 /* UTextProviderUTF16.h */,
+ 1CCDB14D1E566898006C73C0 /* TextBreakIteratorICU.h */,
);
path = icu;
sourceTree = "<group>";
@@ -879,6 +886,7 @@
A5BA15F8182435A600A82E69 /* StringCF.cpp */,
A5BA15F9182435A600A82E69 /* StringImplCF.cpp */,
93934BD418A1F16900D0D6A1 /* StringViewCF.cpp */,
+ 1CCDB1491E566626006C73C0 /* TextBreakIteratorCF.h */,
);
name = cf;
sourceTree = "<group>";
@@ -1300,6 +1308,7 @@
isa = PBXGroup;
children = (
DCEE21FA1CEA7538000C2396 /* CFBundleSPI.h */,
+ 1CCDB1511E566BC5006C73C0 /* CFStringSPI.h */,
);
name = cf;
sourceTree = "<group>";
@@ -1382,7 +1391,9 @@
DE5A09FC1BA36992003D4424 /* CommonCryptoSPI.h in Headers */,
0F8F2B91172E00FC007DBDA5 /* CompilationThread.h in Headers */,
A8A47398151A825B004123FF /* Compiler.h in Headers */,
+ 1CCDB1531E566BC5006C73C0 /* CFStringSPI.h in Headers */,
0FDB698E1B7C643A000C1078 /* Condition.h in Headers */,
+ 1CCDB14F1E566898006C73C0 /* TextBreakIteratorICU.h in Headers */,
A8A4748C151A8264004123FF /* config.h in Headers */,
0F8F2B9C172F2596007DBDA5 /* ConversionMode.h in Headers */,
515F794F1CFC9F4A00CCED93 /* CrossThreadCopier.h in Headers */,
@@ -1473,6 +1484,7 @@
0F9495841C571CC900413A48 /* OrderMaker.h in Headers */,
14E785E81DFB330100209BD1 /* OrdinalNumber.h in Headers */,
A8A473F6151A825B004123FF /* OSAllocator.h in Headers */,
+ 1CCDB14B1E566626006C73C0 /* TextBreakIteratorCF.h in Headers */,
7CBBA07419BB7FDC00BBF025 /* OSObjectPtr.h in Headers */,
A8A473FA151A825B004123FF /* OSRandomSource.h in Headers */,
A8A473FE151A825B004123FF /* PackedIntVector.h in Headers */,
Added: trunk/Source/WTF/wtf/spi/cf/CFStringSPI.h (0 => 213020)
--- trunk/Source/WTF/wtf/spi/cf/CFStringSPI.h (rev 0)
+++ trunk/Source/WTF/wtf/spi/cf/CFStringSPI.h 2017-02-27 00:36:02 UTC (rev 213020)
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <CoreFoundation/CoreFoundation.h>
+
+#if USE(APPLE_INTERNAL_SDK)
+
+#import <CoreFoundation/CFPriv.h>
+
+#else
+
+extern "C" {
+
+typedef CF_ENUM(CFIndex, CFStringCharacterClusterType)
+{
+ kCFStringComposedCharacterCluster = 2,
+ kCFStringCursorMovementCluster = 3,
+ kCFStringBackwardDeletionCluster = 4
+};
+
+}
+
+#endif
+
+extern "C" {
+
+CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef, CFIndex charIndex, CFStringCharacterClusterType);
+
+}
+
Property changes on: trunk/Source/WTF/wtf/spi/cf/CFStringSPI.h
___________________________________________________________________
Added: svn:eol-style
+native
\ No newline at end of property
Added: svn:keywords
+Author Date Id Rev URL
\ No newline at end of property
Added: trunk/Source/WTF/wtf/text/NullTextBreakIterator.h (0 => 213020)
--- trunk/Source/WTF/wtf/text/NullTextBreakIterator.h (rev 0)
+++ trunk/Source/WTF/wtf/text/NullTextBreakIterator.h 2017-02-27 00:36:02 UTC (rev 213020)
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#pragma once
+
+namespace WTF {
+
+class NullTextBreakIterator {
+public:
+ NullTextBreakIterator() = default;
+ NullTextBreakIterator(const NullTextBreakIterator&) = delete;
+ NullTextBreakIterator(NullTextBreakIterator&&) = default;
+ NullTextBreakIterator& operator=(const NullTextBreakIterator&) = delete;
+ NullTextBreakIterator& operator=(NullTextBreakIterator&&) = default;
+
+ std::optional<unsigned> preceding(unsigned) const
+ {
+ ASSERT_NOT_REACHED();
+ return { };
+ }
+
+ std::optional<unsigned> following(unsigned) const
+ {
+ ASSERT_NOT_REACHED();
+ return { };
+ }
+
+ bool isBoundary(unsigned) const
+ {
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ void setText(StringView)
+ {
+ ASSERT_NOT_REACHED();
+ }
+};
+
+}
+
Property changes on: trunk/Source/WTF/wtf/text/NullTextBreakIterator.h
___________________________________________________________________
Added: svn:eol-style
+native
\ No newline at end of property
Added: svn:keywords
+Author Date Id Rev URL
\ No newline at end of property
Modified: trunk/Source/WTF/wtf/text/TextBreakIterator.cpp (213019 => 213020)
--- trunk/Source/WTF/wtf/text/TextBreakIterator.cpp 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WTF/wtf/text/TextBreakIterator.cpp 2017-02-27 00:36:02 UTC (rev 213020)
@@ -36,32 +36,42 @@
namespace WTF {
-// Iterator initialization
+#if !PLATFORM(MAC) && !PLATFORM(IOS)
-static UBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID())
+static Variant<TextBreakIteratorICU, TextBreakIteratorPlatform> mapModeToBackingIterator(StringView string, TextBreakIterator::Mode mode, const AtomicString& locale)
{
- UErrorCode openStatus = U_ZERO_ERROR;
- UBreakIterator* iterator = ubrk_open(type, locale, 0, 0, &openStatus);
- ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
- return iterator;
+ switch (mode) {
+ case TextBreakIterator::Mode::Line:
+ return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Line, locale.string().utf8().data());
+ case TextBreakIterator::Mode::Cursor:
+ return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Character, locale.string().utf8().data());
+ case TextBreakIterator::Mode::Delete:
+ return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Character, locale.string().utf8().data());
+ default:
+ ASSERT_NOT_REACHED();
+ return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Character, locale.string().utf8().data());
+ }
}
-#if !PLATFORM(IOS)
+TextBreakIterator::TextBreakIterator(StringView string, Mode mode, const AtomicString& locale)
+ : m_backing(mapModeToBackingIterator(string, mode, locale))
+ , m_mode(mode)
+ , m_locale(locale)
+{
+}
-static UBreakIterator* initializeIteratorWithRules(const char* breakRules)
+#endif
+
+// Iterator initialization
+
+static UBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID())
{
- UParseError parseStatus;
UErrorCode openStatus = U_ZERO_ERROR;
- unsigned length = strlen(breakRules);
- auto upconvertedCharacters = StringView(reinterpret_cast<const LChar*>(breakRules), length).upconvertedCharacters();
- UBreakIterator* iterator = ubrk_openRules(upconvertedCharacters, length, 0, 0, &parseStatus, &openStatus);
+ UBreakIterator* iterator = ubrk_open(type, locale, 0, 0, &openStatus);
ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
return iterator;
}
-#endif
-
-
// Iterator text setting
static UBreakIterator* setTextForIterator(UBreakIterator& iterator, StringView string)
@@ -164,145 +174,6 @@
return setTextForIterator(*staticSentenceBreakIterator, string);
}
-UBreakIterator* cursorMovementIterator(StringView string)
-{
-#if !PLATFORM(IOS)
- // This rule set is based on character-break iterator rules of ICU 57
- // <http://source.icu-project.org/repos/icu/icu/tags/release-57-1/source/data/brkitr/>.
- // The major differences from the original ones are listed below:
- // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
- // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
- // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
- // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
- // * Added rules for regional indicator symbols.
- static const char* kRules =
- "$CR = [\\p{Grapheme_Cluster_Break = CR}];"
- "$LF = [\\p{Grapheme_Cluster_Break = LF}];"
- "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
- "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks
- "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
- "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
- "$L = [\\p{Grapheme_Cluster_Break = L}];"
- "$V = [\\p{Grapheme_Cluster_Break = V}];"
- "$T = [\\p{Grapheme_Cluster_Break = T}];"
- "$LV = [\\p{Grapheme_Cluster_Break = LV}];"
- "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];"
- "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha
- "$HinV = \\u094D;" // Devanagari Sign Virama
- "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha
- "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha
- "$BenV = \\u09CD;" // Bengali Sign Virama
- "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha
- "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha
- "$PanV = \\u0A4D;" // Gurmukhi Sign Virama
- "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha
- "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha
- "$GujV = \\u0ACD;" // Gujarati Sign Virama
- "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha
- "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha
- "$OriV = \\u0B4D;" // Oriya Sign Virama
- "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha
- "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha
- "$TelV = \\u0C4D;" // Telugu Sign Virama
- "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha
- "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha
- "$KanV = \\u0CCD;" // Kannada Sign Virama
- "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha
- "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha
- "$MalV = \\u0D4D;" // Malayalam Sign Virama
- "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha
- "$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
- "$ZWJ = \\u200D;" // Zero width joiner
- "$EmojiVar = [\\uFE0F];" // Emoji-style variation selector
-#if ADDITIONAL_EMOJI_SUPPORT
- "$EmojiForSeqs = [\\u2640 \\u2642 \\u26F9 \\u2764 \\U0001F308 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA-\\U0001F3CC \\U0001F3F3 \\U0001F441 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F46F \\U0001F471 \\U0001F473 \\U0001F477 \\U0001F481-\\U0001F482 \\U0001F486-\\U0001F487 \\U0001F48B \\U0001F575 \\U0001F5E8 \\U0001F645-\\U0001F647 \\U0001F64B \\U0001F64D-\\U0001F64E \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\u2695-\\u2696 \\u2708 \\U0001F33E \\U0001F373 \\U0001F393 \\U0001F3A4 \\U0001F3A8 \\U0001F3EB \\U0001F3ED \\U0001F4BB-\\U0001F4BC \\U0001F527 \\U0001F52C \\U0001F680 \\U0001F692 \\U0001F926 \\U0001F937-\\U0001F939 \\U0001F93C-\\U0001F93E];" // Emoji that participate in ZWJ sequences
- "$EmojiForMods = [\\u261D \\u26F9 \\u270A-\\u270D \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA \\U0001F3CB \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F575 \\U0001F590 \\U0001F595 \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0 \\U0001F918 \\U0001F3C2 \\U0001F3C7 \\U0001F3CC \\U0001F574 \\U0001F57A \\U0001F6CC \\U0001F919-\\U0001F91E \\U0001F926 \\U0001F930 \\U0001F933-\\U0001F939 \\U0001F93C-\\U0001F93E] ;" // Emoji that take Fitzpatrick modifiers
-#else
- "$EmojiForSeqs = [\\u2764 \\U0001F466-\\U0001F469 \\U0001F48B];" // Emoji that participate in ZWJ sequences
- "$EmojiForMods = [\\u261D \\u270A-\\u270C \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3C7 \\U0001F3CA \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0] ;" // Emoji that take Fitzpatrick modifiers
-#endif
- "$EmojiMods = [\\U0001F3FB-\\U0001F3FF];" // Fitzpatrick modifiers
- "!!chain;"
-#if ADDITIONAL_EMOJI_SUPPORT
- "!!RINoChain;"
-#endif
- "!!forward;"
- "$CR $LF;"
- "$L ($L | $V | $LV | $LVT);"
- "($LV | $V) ($V | $T);"
- "($LVT | $T) $T;"
-#if ADDITIONAL_EMOJI_SUPPORT
- "$RI $RI $Extend* / $RI;"
- "$RI $RI $Extend*;"
- "[^$Control $CR $LF] $Extend;"
- "[^$Control $CR $LF] $SpacingMark;"
-#else
- "[^$Control $CR $LF] $Extend;"
- "[^$Control $CR $LF] $SpacingMark;"
- "$RI $RI / $RI;"
- "$RI $RI;"
-#endif
- "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward)
- "$Ben0 $BenV $Ben1;" // Bengali Virama (forward)
- "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward)
- "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward)
- "$Ori0 $OriV $Ori1;" // Oriya Virama (forward)
- "$Tel0 $TelV $Tel1;" // Telugu Virama (forward)
- "$Kan0 $KanV $Kan1;" // Kannada Virama (forward)
- "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward)
- "$ZWJ $EmojiForSeqs;" // Don't break in emoji ZWJ sequences
- "$EmojiForMods $EmojiVar? $EmojiMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier
- "!!reverse;"
- "$LF $CR;"
- "($L | $V | $LV | $LVT) $L;"
- "($V | $T) ($LV | $V);"
- "$T ($LVT | $T);"
-#if ADDITIONAL_EMOJI_SUPPORT
- "$Extend* $RI $RI / $Extend* $RI $RI;"
- "$Extend* $RI $RI;"
- "$Extend [^$Control $CR $LF];"
- "$SpacingMark [^$Control $CR $LF];"
-#else
- "$Extend [^$Control $CR $LF];"
- "$SpacingMark [^$Control $CR $LF];"
- "$RI $RI / $RI $RI;"
- "$RI $RI;"
-#endif
- "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward)
- "$Ben1 $BenV $Ben0;" // Bengali Virama (backward)
- "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward)
- "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward)
- "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward)
- "$Tel1 $TelV $Tel0;" // Telugu Virama (backward)
- "$Kan1 $KanV $Kan0;" // Kannada Virama (backward)
- "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward)
- "$EmojiForSeqs $ZWJ;" // Don't break in emoji ZWJ sequences
- "$EmojiMods $EmojiVar? $EmojiForMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier
-#if ADDITIONAL_EMOJI_SUPPORT
- "!!safe_reverse;"
- "$RI $RI+;"
- "[$EmojiVar $EmojiMods]+ $EmojiForMods;"
- "!!safe_forward;"
- "$RI $RI+;"
- "$EmojiForMods [$EmojiVar $EmojiMods]+;";
-#else
- "[$EmojiVar $EmojiMods]+ $EmojiForMods;"
- "$EmojiForMods [$EmojiVar $EmojiMods]+;"
- "!!safe_reverse;"
- "!!safe_forward;";
-#endif
- static UBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules);
-#else // PLATFORM(IOS)
- // Use the special Thai character break iterator for all locales
- static UBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th");
-#endif // !PLATFORM(IOS)
-
- if (!staticCursorMovementIterator)
- return nullptr;
-
- return setTextForIterator(*staticCursorMovementIterator, string);
-}
-
UBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode mode)
{
UBreakIterator* iterator = LineBreakIteratorPool::sharedPool().take(locale, mode);
Modified: trunk/Source/WTF/wtf/text/TextBreakIterator.h (213019 => 213020)
--- trunk/Source/WTF/wtf/text/TextBreakIterator.h 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WTF/wtf/text/TextBreakIterator.h 2017-02-27 00:36:02 UTC (rev 213020)
@@ -21,20 +21,139 @@
#pragma once
-#include <wtf/text/StringView.h>
+#include "StringView.h"
+#include <wtf/NeverDestroyed.h>
+#include <wtf/Variant.h>
+#include <wtf/text/icu/TextBreakIteratorICU.h>
+#if PLATFORM(MAC) || PLATFORM(IOS)
+#include <wtf/text/cf/TextBreakIteratorCF.h>
+#else
+#include <wtf/text/NullTextBreakIterator.h>
+#endif
+
namespace WTF {
+#if PLATFORM(MAC) || PLATFORM(IOS)
+typedef TextBreakIteratorCF TextBreakIteratorPlatform;
+#else
+typedef NullTextBreakIterator TextBreakIteratorPlatform;
+#endif
+
+class TextBreakIteratorCache;
+
+class TextBreakIterator {
+public:
+ enum class Mode {
+ Line,
+ Cursor,
+ Delete
+ };
+
+ TextBreakIterator() = delete;
+ TextBreakIterator(const TextBreakIterator&) = delete;
+ TextBreakIterator(TextBreakIterator&&) = default;
+ TextBreakIterator& operator=(const TextBreakIterator&) = delete;
+ TextBreakIterator& operator=(TextBreakIterator&&) = default;
+
+ std::optional<unsigned> preceding(unsigned location) const
+ {
+ return switchOn(m_backing, [&](const auto& iterator) {
+ return iterator.preceding(location);
+ });
+ }
+
+ std::optional<unsigned> following(unsigned location) const
+ {
+ return switchOn(m_backing, [&](const auto& iterator) {
+ return iterator.following(location);
+ });
+ }
+
+ bool isBoundary(unsigned location) const
+ {
+ return switchOn(m_backing, [&](const auto& iterator) {
+ return iterator.isBoundary(location);
+ });
+ }
+
+private:
+ friend class TextBreakIteratorCache;
+
+ // Use TextBreakIteratorCache instead of constructing one of these directly.
+ WTF_EXPORT TextBreakIterator(StringView, Mode, const AtomicString& locale);
+
+ void setText(StringView string)
+ {
+ return switchOn(m_backing, [&](auto& iterator) {
+ return iterator.setText(string);
+ });
+ }
+
+ Mode mode() const
+ {
+ return m_mode;
+ }
+
+ const AtomicString& locale() const
+ {
+ return m_locale;
+ }
+
+ Variant<TextBreakIteratorICU, TextBreakIteratorPlatform> m_backing;
+ Mode m_mode;
+ AtomicString m_locale;
+};
+
+class TextBreakIteratorCache {
+public:
+ static TextBreakIteratorCache& singleton()
+ {
+ static NeverDestroyed<TextBreakIteratorCache> cache;
+ return cache.get();
+ }
+
+ TextBreakIteratorCache(const TextBreakIteratorCache&) = delete;
+ TextBreakIteratorCache(TextBreakIteratorCache&&) = delete;
+ TextBreakIteratorCache& operator=(const TextBreakIteratorCache&) = delete;
+ TextBreakIteratorCache& operator=(TextBreakIteratorCache&&) = delete;
+
+ TextBreakIterator take(StringView string, TextBreakIterator::Mode mode, const AtomicString& locale)
+ {
+ auto iter = std::find_if(m_unused.begin(), m_unused.end(), [&](TextBreakIterator& candidate) {
+ return candidate.mode() == mode && candidate.locale() == locale;
+ });
+ if (iter == m_unused.end())
+ return TextBreakIterator(string, mode, locale);
+ auto result = WTFMove(*iter);
+ m_unused.remove(iter - m_unused.begin());
+ result.setText(string);
+ return result;
+
+ }
+
+ void put(TextBreakIterator&& iterator)
+ {
+ m_unused.append(WTFMove(iterator));
+ if (m_unused.size() > capacity)
+ m_unused.remove(0);
+ }
+
+private:
+ friend class NeverDestroyed<TextBreakIteratorCache>;
+
+ TextBreakIteratorCache()
+ {
+ }
+
+ static constexpr int capacity = 2;
+ Vector<TextBreakIterator, capacity> m_unused;
+};
+
// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
enum class LineBreakIteratorMode { Default, Loose, Normal, Strict };
-// This is similar to character break iterator in most cases, but is subject to
-// platform UI conventions. One notable example where this can be different
-// from character break iterator is Thai prepend characters, see bug 24342.
-// Use this for insertion point and selection manipulations.
-WTF_EXPORT_PRIVATE UBreakIterator* cursorMovementIterator(StringView);
-
WTF_EXPORT_PRIVATE UBreakIterator* wordBreakIterator(StringView);
WTF_EXPORT_PRIVATE UBreakIterator* sentenceBreakIterator(StringView);
@@ -188,4 +307,6 @@
using WTF::LazyLineBreakIterator;
using WTF::LineBreakIteratorMode;
using WTF::NonSharedCharacterBreakIterator;
+using WTF::TextBreakIterator;
+using WTF::TextBreakIteratorCache;
using WTF::isWordTextBreak;
Added: trunk/Source/WTF/wtf/text/cf/TextBreakIteratorCF.h (0 => 213020)
--- trunk/Source/WTF/wtf/text/cf/TextBreakIteratorCF.h (rev 0)
+++ trunk/Source/WTF/wtf/text/cf/TextBreakIteratorCF.h 2017-02-27 00:36:02 UTC (rev 213020)
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#pragma once
+
+#include <wtf/Optional.h>
+#include <wtf/spi/cf/CFStringSPI.h>
+
+namespace WTF {
+
+class TextBreakIteratorCF {
+public:
+ enum class Mode {
+ Cursor,
+ Delete
+ };
+
+ TextBreakIteratorCF(StringView string, Mode mode)
+ : m_string(string.createCFStringWithoutCopying())
+ {
+ switch (mode) {
+ case Mode::Cursor:
+ m_type = kCFStringComposedCharacterCluster;
+ break;
+ case Mode::Delete:
+ m_type = kCFStringBackwardDeletionCluster;
+ break;
+ }
+ }
+
+ TextBreakIteratorCF() = delete;
+ TextBreakIteratorCF(const TextBreakIteratorCF&) = delete;
+ TextBreakIteratorCF(TextBreakIteratorCF&&) = default;
+ TextBreakIteratorCF& operator=(const TextBreakIteratorCF&) = delete;
+ TextBreakIteratorCF& operator=(TextBreakIteratorCF&&) = default;
+
+ void setText(StringView string)
+ {
+ m_string = string.createCFStringWithoutCopying();
+ }
+
+ std::optional<unsigned> preceding(unsigned location) const
+ {
+ if (!location)
+ return { };
+ auto length = static_cast<unsigned long>(CFStringGetLength(m_string.get()));
+ if (location > length)
+ return length;
+ auto range = CFStringGetRangeOfCharacterClusterAtIndex(m_string.get(), location - 1, m_type);
+ return range.location;
+ }
+
+ std::optional<unsigned> following(unsigned location) const
+ {
+ if (location >= static_cast<unsigned long>(CFStringGetLength(m_string.get())))
+ return { };
+ auto range = CFStringGetRangeOfCharacterClusterAtIndex(m_string.get(), location, m_type);
+ return range.location + range.length;
+ }
+
+ bool isBoundary(unsigned location) const
+ {
+ if (location == static_cast<unsigned long>(CFStringGetLength(m_string.get())))
+ return true;
+ auto range = CFStringGetRangeOfCharacterClusterAtIndex(m_string.get(), location, m_type);
+ return static_cast<unsigned long>(range.location) == location;
+ }
+
+private:
+ RetainPtr<CFStringRef> m_string;
+ CFStringCharacterClusterType m_type;
+};
+
+}
Property changes on: trunk/Source/WTF/wtf/text/cf/TextBreakIteratorCF.h
___________________________________________________________________
Added: svn:eol-style
+native
\ No newline at end of property
Added: svn:keywords
+Author Date Id Rev URL
\ No newline at end of property
Added: trunk/Source/WTF/wtf/text/icu/TextBreakIteratorICU.h (0 => 213020)
--- trunk/Source/WTF/wtf/text/icu/TextBreakIteratorICU.h (rev 0)
+++ trunk/Source/WTF/wtf/text/icu/TextBreakIteratorICU.h 2017-02-27 00:36:02 UTC (rev 213020)
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#pragma once
+
+#include <unicode/ubrk.h>
+#include <wtf/Optional.h>
+#include <wtf/text/icu/UTextProviderLatin1.h>
+
+#define USE_ICU_CURSOR_ITERATOR (PLATFORM(MAC) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101200)
+
+namespace WTF {
+
+#if USE_ICU_CURSOR_ITERATOR
+static String cursorRules()
+{
+ return ASCIILiteral(
+ // This rule set is based on character-break iterator rules of ICU 57
+ // <http://source.icu-project.org/repos/icu/icu/tags/release-57-1/source/data/brkitr/>.
+ // The major differences from the original ones are listed below:
+ // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
+ // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
+ // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
+ // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
+ // * Added rules for regional indicator symbols.
+ "$CR = [\\p{Grapheme_Cluster_Break = CR}];"
+ "$LF = [\\p{Grapheme_Cluster_Break = LF}];"
+ "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
+ "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks
+ "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
+ "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
+ "$L = [\\p{Grapheme_Cluster_Break = L}];"
+ "$V = [\\p{Grapheme_Cluster_Break = V}];"
+ "$T = [\\p{Grapheme_Cluster_Break = T}];"
+ "$LV = [\\p{Grapheme_Cluster_Break = LV}];"
+ "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];"
+ "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha
+ "$HinV = \\u094D;" // Devanagari Sign Virama
+ "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha
+ "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha
+ "$BenV = \\u09CD;" // Bengali Sign Virama
+ "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha
+ "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha
+ "$PanV = \\u0A4D;" // Gurmukhi Sign Virama
+ "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha
+ "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha
+ "$GujV = \\u0ACD;" // Gujarati Sign Virama
+ "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha
+ "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha
+ "$OriV = \\u0B4D;" // Oriya Sign Virama
+ "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha
+ "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha
+ "$TelV = \\u0C4D;" // Telugu Sign Virama
+ "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha
+ "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha
+ "$KanV = \\u0CCD;" // Kannada Sign Virama
+ "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha
+ "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha
+ "$MalV = \\u0D4D;" // Malayalam Sign Virama
+ "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha
+ "$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
+ "$ZWJ = \\u200D;" // Zero width joiner
+ "$EmojiVar = [\\uFE0F];" // Emoji-style variation selector
+ "$EmojiForSeqs = [\\u2640 \\u2642 \\u26F9 \\u2764 \\U0001F308 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA-\\U0001F3CC \\U0001F3F3 \\U0001F441 \\U0001F466-\\U0001F469 \\U0001F46E-\\U0001F46F \\U0001F471 \\U0001F473 \\U0001F477 \\U0001F481-\\U0001F482 \\U0001F486-\\U0001F487 \\U0001F48B \\U0001F575 \\U0001F5E8 \\U0001F645-\\U0001F647 \\U0001F64B \\U0001F64D-\\U0001F64E \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\u2695-\\u2696 \\u2708 \\U0001F33E \\U0001F373 \\U0001F393 \\U0001F3A4 \\U0001F3A8 \\U0001F3EB \\U0001F3ED \\U0001F4BB-\\U0001F4BC \\U0001F527 \\U0001F52C \\U0001F680 \\U0001F692 \\U0001F926 \\U0001F937-\\U0001F939 \\U0001F93C-\\U0001F93E];" // Emoji that participate in ZWJ sequences
+ "$EmojiForMods = [\\u261D \\u26F9 \\u270A-\\u270D \\U0001F385 \\U0001F3C3-\\U0001F3C4 \\U0001F3CA \\U0001F3CB \\U0001F442-\\U0001F443 \\U0001F446-\\U0001F450 \\U0001F466-\\U0001F478 \\U0001F47C \\U0001F481-\\U0001F483 \\U0001F485-\\U0001F487 \\U0001F4AA \\U0001F575 \\U0001F590 \\U0001F595 \\U0001F596 \\U0001F645-\\U0001F647 \\U0001F64B-\\U0001F64F \\U0001F6A3 \\U0001F6B4-\\U0001F6B6 \\U0001F6C0 \\U0001F918 \\U0001F3C2 \\U0001F3C7 \\U0001F3CC \\U0001F574 \\U0001F57A \\U0001F6CC \\U0001F919-\\U0001F91E \\U0001F926 \\U0001F930 \\U0001F933-\\U0001F939 \\U0001F93C-\\U0001F93E] ;" // Emoji that take Fitzpatrick modifiers
+ "$EmojiMods = [\\U0001F3FB-\\U0001F3FF];" // Fitzpatrick modifiers
+ "!!chain;"
+ "!!RINoChain;"
+ "!!forward;"
+ "$CR $LF;"
+ "$L ($L | $V | $LV | $LVT);"
+ "($LV | $V) ($V | $T);"
+ "($LVT | $T) $T;"
+ "$RI $RI $Extend* / $RI;"
+ "$RI $RI $Extend*;"
+ "[^$Control $CR $LF] $Extend;"
+ "[^$Control $CR $LF] $SpacingMark;"
+ "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward)
+ "$Ben0 $BenV $Ben1;" // Bengali Virama (forward)
+ "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward)
+ "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward)
+ "$Ori0 $OriV $Ori1;" // Oriya Virama (forward)
+ "$Tel0 $TelV $Tel1;" // Telugu Virama (forward)
+ "$Kan0 $KanV $Kan1;" // Kannada Virama (forward)
+ "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward)
+ "$ZWJ $EmojiForSeqs;" // Don't break in emoji ZWJ sequences
+ "$EmojiForMods $EmojiVar? $EmojiMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier
+ "!!reverse;"
+ "$LF $CR;"
+ "($L | $V | $LV | $LVT) $L;"
+ "($V | $T) ($LV | $V);"
+ "$T ($LVT | $T);"
+ "$Extend* $RI $RI / $Extend* $RI $RI;"
+ "$Extend* $RI $RI;"
+ "$Extend [^$Control $CR $LF];"
+ "$SpacingMark [^$Control $CR $LF];"
+ "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward)
+ "$Ben1 $BenV $Ben0;" // Bengali Virama (backward)
+ "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward)
+ "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward)
+ "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward)
+ "$Tel1 $TelV $Tel0;" // Telugu Virama (backward)
+ "$Kan1 $KanV $Kan0;" // Kannada Virama (backward)
+ "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward)
+ "$EmojiForSeqs $ZWJ;" // Don't break in emoji ZWJ sequences
+ "$EmojiMods $EmojiVar? $EmojiForMods;" // Don't break between relevant emoji (possibly with variation selector) and Fitzpatrick modifier
+ "!!safe_reverse;"
+ "$RI $RI+;"
+ "[$EmojiVar $EmojiMods]+ $EmojiForMods;"
+ "!!safe_forward;"
+ "$RI $RI+;"
+ "$EmojiForMods [$EmojiVar $EmojiMods]+;"
+ );
+}
+#endif
+
+class TextBreakIteratorICU {
+public:
+ enum class Mode {
+ Line,
+ Character,
+#if USE_ICU_CURSOR_ITERATOR
+ Cursor,
+#endif
+ };
+
+ void set8BitText(const LChar* buffer, unsigned length)
+ {
+ UTextWithBuffer textLocal;
+ textLocal.text = UTEXT_INITIALIZER;
+ textLocal.text.extraSize = sizeof(textLocal.buffer);
+ textLocal.text.pExtra = textLocal.buffer;
+
+ UErrorCode status = U_ZERO_ERROR;
+ UText* text = openLatin1UTextProvider(&textLocal, buffer, length, &status);
+ ASSERT(U_SUCCESS(status));
+ ASSERT(text);
+
+ ubrk_setUText(m_iterator, text, &status);
+ ASSERT(U_SUCCESS(status));
+
+ utext_close(text);
+ }
+
+ TextBreakIteratorICU(StringView string, Mode mode, const char *locale)
+ {
+ UBreakIteratorType type;
+ switch (mode) {
+ case Mode::Line:
+ type = UBRK_LINE;
+ break;
+ case Mode::Character:
+ type = UBRK_CHARACTER;
+ break;
+#if USE_ICU_CURSOR_ITERATOR
+ case Mode::Cursor:
+ type = UBRK_CHARACTER;
+ break;
+#endif
+ default:
+ ASSERT_NOT_REACHED();
+ type = UBRK_CHARACTER;
+ break;
+ }
+
+ bool requiresSet8BitText = string.is8Bit();
+
+ const UChar *text = requiresSet8BitText ? nullptr : string.characters16();
+ int32_t textLength = requiresSet8BitText ? 0 : string.length();
+
+ // FIXME: Handle weak / normal / strict line breaking.
+ UErrorCode status = U_ZERO_ERROR;
+#if USE_ICU_CURSOR_ITERATOR
+ if (mode == Mode::Cursor) {
+ static NeverDestroyed<String> cursorRules = WTF::cursorRules();
+ static NeverDestroyed<StringView::UpconvertedCharacters> upconvertedRules = StringView(cursorRules).upconvertedCharacters();
+ UParseError parseError;
+ m_iterator = ubrk_openRules(upconvertedRules.get(), cursorRules.get().length(), text, textLength, &parseError, &status);
+ } else
+#endif
+ m_iterator = ubrk_open(type, locale, text, textLength, &status);
+ ASSERT(U_SUCCESS(status));
+
+ if (requiresSet8BitText)
+ set8BitText(string.characters8(), string.length());
+ }
+
+ TextBreakIteratorICU() = delete;
+ TextBreakIteratorICU(const TextBreakIteratorICU&) = delete;
+
+ TextBreakIteratorICU(TextBreakIteratorICU&& other)
+ : m_iterator(other.m_iterator)
+ {
+ other.m_iterator = nullptr;
+ }
+
+ TextBreakIteratorICU& operator=(const TextBreakIteratorICU&) = delete;
+
+ TextBreakIteratorICU& operator=(TextBreakIteratorICU&& other)
+ {
+ if (m_iterator)
+ ubrk_close(m_iterator);
+ m_iterator = other.m_iterator;
+ other.m_iterator = nullptr;
+ return *this;
+ }
+
+ ~TextBreakIteratorICU()
+ {
+ if (m_iterator)
+ ubrk_close(m_iterator);
+ }
+
+ void setText(StringView string)
+ {
+ if (string.is8Bit()) {
+ set8BitText(string.characters8(), string.length());
+ return;
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ ubrk_setText(m_iterator, string.characters16(), string.length(), &status);
+ ASSERT(U_SUCCESS(status));
+ }
+
+ std::optional<unsigned> preceding(unsigned location) const
+ {
+ auto result = ubrk_preceding(m_iterator, location);
+ if (result == UBRK_DONE)
+ return { };
+ return result;
+ }
+
+ std::optional<unsigned> following(unsigned location) const
+ {
+ auto result = ubrk_following(m_iterator, location);
+ if (result == UBRK_DONE)
+ return { };
+ return result;
+ }
+
+ bool isBoundary(unsigned location) const
+ {
+ return ubrk_isBoundary(m_iterator, location);
+ }
+
+private:
+ UBreakIterator* m_iterator;
+};
+
+}
Property changes on: trunk/Source/WTF/wtf/text/icu/TextBreakIteratorICU.h
___________________________________________________________________
Added: svn:eol-style
+native
\ No newline at end of property
Added: svn:keywords
+Author Date Id Rev URL
\ No newline at end of property
Modified: trunk/Source/WTF/wtf/text/icu/UTextProviderLatin1.h (213019 => 213020)
--- trunk/Source/WTF/wtf/text/icu/UTextProviderLatin1.h 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WTF/wtf/text/icu/UTextProviderLatin1.h 2017-02-27 00:36:02 UTC (rev 213020)
@@ -38,7 +38,7 @@
UChar buffer[UTextWithBufferInlineCapacity];
};
-UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status);
+WTF_EXPORT UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status);
UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status);
} // namespace WTF
Modified: trunk/Source/WTF/wtf/text/mac/TextBreakIteratorInternalICUMac.mm (213019 => 213020)
--- trunk/Source/WTF/wtf/text/mac/TextBreakIteratorInternalICUMac.mm 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WTF/wtf/text/mac/TextBreakIteratorInternalICUMac.mm 2017-02-27 00:36:02 UTC (rev 213020)
@@ -21,10 +21,34 @@
#include "config.h"
#include "TextBreakIteratorInternalICU.h"
+#include "TextBreakIterator.h"
#include <wtf/RetainPtr.h>
namespace WTF {
+static Variant<TextBreakIteratorICU, TextBreakIteratorPlatform> mapModeToBackingIterator(StringView string, TextBreakIterator::Mode mode, const AtomicString& locale)
+{
+ switch (mode) {
+ case TextBreakIterator::Mode::Line:
+ return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Line, locale.string().utf8().data());
+ case TextBreakIterator::Mode::Cursor:
+#if USE_ICU_CURSOR_ITERATOR
+ return TextBreakIteratorICU(string, TextBreakIteratorICU::Mode::Cursor, locale.string().utf8().data());
+#else
+ return TextBreakIteratorCF(string, TextBreakIteratorCF::Mode::Cursor);
+#endif
+ case TextBreakIterator::Mode::Delete:
+ return TextBreakIteratorCF(string, TextBreakIteratorCF::Mode::Delete);
+ }
+}
+
+TextBreakIterator::TextBreakIterator(StringView string, Mode mode, const AtomicString& locale)
+ : m_backing(mapModeToBackingIterator(string, mode, locale))
+ , m_mode(mode)
+ , m_locale(locale)
+{
+}
+
static const int maxLocaleStringLength = 32;
static inline RetainPtr<CFStringRef> textBreakLocalePreference()
Modified: trunk/Source/WebCore/ChangeLog (213019 => 213020)
--- trunk/Source/WebCore/ChangeLog 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WebCore/ChangeLog 2017-02-27 00:36:02 UTC (rev 213020)
@@ -1,3 +1,21 @@
+2017-02-26 Myles C. Maxfield <mmaxfi...@apple.com>
+
+ Stop compiling our own cursorMovementIterator()
+ https://bugs.webkit.org/show_bug.cgi?id=168211
+
+ Reviewed by David Hyatt.
+
+ Covered by existing tests.
+
+ Hook up the caret iterator.
+
+ * platform/graphics/ComplexTextController.cpp:
+ (WebCore::ComplexTextController::offsetForPosition):
+ * rendering/RenderText.cpp:
+ (WebCore::RenderText::previousOffset):
+ (WebCore::RenderText::nextOffset):
+ * rendering/RenderText.h:
+
2017-02-26 Commit Queue <commit-qu...@webkit.org>
Unreviewed, rolling out r212942.
Modified: trunk/Source/WebCore/platform/graphics/ComplexTextController.cpp (213019 => 213020)
--- trunk/Source/WebCore/platform/graphics/ComplexTextController.cpp 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WebCore/platform/graphics/ComplexTextController.cpp 2017-02-27 00:36:02 UTC (rev 213020)
@@ -217,21 +217,20 @@
// could use the glyph's "ligature carets". This is available in CoreText via CTFontGetLigatureCaretPositions().
unsigned hitIndex = hitGlyphStart + (hitGlyphEnd - hitGlyphStart) * (m_run.ltr() ? x / adjustedAdvance : 1 - x / adjustedAdvance);
unsigned stringLength = complexTextRun.stringLength();
- UBreakIterator* cursorPositionIterator = cursorMovementIterator(StringView(complexTextRun.characters(), stringLength));
+ TextBreakIterator cursorPositionIterator = TextBreakIteratorCache::singleton().take(StringView(complexTextRun.characters(), stringLength), TextBreakIterator::Mode::Cursor, nullAtom);
unsigned clusterStart;
- if (ubrk_isBoundary(cursorPositionIterator, hitIndex))
+ if (cursorPositionIterator.isBoundary(hitIndex))
clusterStart = hitIndex;
- else {
- int preceeding = ubrk_preceding(cursorPositionIterator, hitIndex);
- clusterStart = preceeding == UBRK_DONE ? 0 : preceeding;
- }
+ else
+ clusterStart = cursorPositionIterator.preceding(hitIndex).value_or(0);
if (!includePartialGlyphs)
return complexTextRun.stringLocation() + clusterStart;
- int following = ubrk_following(cursorPositionIterator, hitIndex);
- unsigned clusterEnd = following == UBRK_DONE ? stringLength : following;
+ unsigned clusterEnd = cursorPositionIterator.following(hitIndex).value_or(stringLength);
+ TextBreakIteratorCache::singleton().put(WTFMove(cursorPositionIterator));
+
float clusterWidth;
// FIXME: The search stops at the boundaries of complexTextRun. In theory, it should go on into neighboring ComplexTextRuns
// derived from the same CTLine. In practice, we do not expect there to be more than one CTRun in a CTLine, as no
Modified: trunk/Source/WebCore/rendering/RenderText.cpp (213019 => 213020)
--- trunk/Source/WebCore/rendering/RenderText.cpp 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WebCore/rendering/RenderText.cpp 2017-02-27 00:36:02 UTC (rev 213020)
@@ -1505,15 +1505,9 @@
return current - 1;
StringImpl* textImpl = m_text.impl();
- UBreakIterator* iterator = cursorMovementIterator(StringView(textImpl->characters16(), textImpl->length()));
- if (!iterator)
- return current - 1;
-
- long result = ubrk_preceding(iterator, current);
- if (result == UBRK_DONE)
- result = current - 1;
-
-
+ TextBreakIterator iterator = TextBreakIteratorCache::singleton().take(StringView(textImpl->characters16(), textImpl->length()), TextBreakIterator::Mode::Cursor, nullAtom);
+ auto result = iterator.preceding(current).value_or(current - 1);
+ TextBreakIteratorCache::singleton().put(WTFMove(iterator));
return result;
}
@@ -1685,14 +1679,9 @@
return current + 1;
StringImpl* textImpl = m_text.impl();
- UBreakIterator* iterator = cursorMovementIterator(StringView(textImpl->characters16(), textImpl->length()));
- if (!iterator)
- return current + 1;
-
- long result = ubrk_following(iterator, current);
- if (result == UBRK_DONE)
- result = current + 1;
-
+ TextBreakIterator iterator = TextBreakIteratorCache::singleton().take(StringView(textImpl->characters16(), textImpl->length()), TextBreakIterator::Mode::Cursor, nullAtom);
+ auto result = iterator.following(current).value_or(current + 1);
+ TextBreakIteratorCache::singleton().put(WTFMove(iterator));
return result;
}
Modified: trunk/Source/WebCore/rendering/RenderText.h (213019 => 213020)
--- trunk/Source/WebCore/rendering/RenderText.h 2017-02-27 00:32:10 UTC (rev 213019)
+++ trunk/Source/WebCore/rendering/RenderText.h 2017-02-27 00:36:02 UTC (rev 213020)
@@ -138,6 +138,7 @@
bool containsCaretOffset(unsigned) const;
bool hasRenderedText() const;
+ // FIXME: These should return unsigneds.
int previousOffset(int current) const final;
int previousOffsetForBackwardDeletion(int current) const final;
int nextOffset(int current) const final;