Author: serge-sans-paille Date: 2020-12-03T20:11:11+01:00 New Revision: 9501419e879e56273f504beda3b13bf6bf82ae2b
URL: https://github.com/llvm/llvm-project/commit/9501419e879e56273f504beda3b13bf6bf82ae2b DIFF: https://github.com/llvm/llvm-project/commit/9501419e879e56273f504beda3b13bf6bf82ae2b.diff LOG: Speedup some unicode rendering Use a fast path for column width computation for ascii characters. Especially relevant for llvm-objdump. before: % time ./bin/llvm-objdump -D -j .text /lib/libc.so.6 >/dev/null ./bin/llvm-objdump -D -j .text /lib/libc.so.6 > /dev/null 0.75s user 0.01s system 99% cpu 0.757 total after: % time ./bin/llvm-objdump -D -j .text /lib/libc.so.6 >/dev/null ./bin/llvm-objdump -D -j .text /lib/libc.so.6 > /dev/null 0.37s user 0.01s system 99% cpu 0.378 total Differential Revision: https://reviews.llvm.org/D92180 Added: Modified: llvm/lib/Support/Unicode.cpp llvm/unittests/Support/UnicodeTest.cpp Removed: ################################################################################ diff --git a/llvm/lib/Support/Unicode.cpp b/llvm/lib/Support/Unicode.cpp index 4d195069682b..bb6e75555b4c 100644 --- a/llvm/lib/Support/Unicode.cpp +++ b/llvm/lib/Support/Unicode.cpp @@ -339,11 +339,22 @@ static inline int charWidth(int UCS) return 1; } +static bool isprintableascii(char c) { return c > 31 && c < 127; } + int columnWidthUTF8(StringRef Text) { unsigned ColumnWidth = 0; unsigned Length; for (size_t i = 0, e = Text.size(); i < e; i += Length) { Length = getNumBytesForUTF8(Text[i]); + + // fast path for ASCII characters + if (Length == 1) { + if (!isprintableascii(Text[i])) + return ErrorNonPrintableCharacter; + ColumnWidth += 1; + continue; + } + if (Length <= 0 || i + Length > Text.size()) return ErrorInvalidUTF8; UTF32 buf[1]; diff --git a/llvm/unittests/Support/UnicodeTest.cpp b/llvm/unittests/Support/UnicodeTest.cpp index 376fbee4ae66..6ce323dc8f38 100644 --- a/llvm/unittests/Support/UnicodeTest.cpp +++ b/llvm/unittests/Support/UnicodeTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Unicode.h" +#include "llvm/Support/ConvertUTF.h" #include "gtest/gtest.h" namespace llvm { @@ -23,6 +24,7 @@ TEST(Unicode, columnWidthUTF8) { EXPECT_EQ(6, columnWidthUTF8("abcdef")); EXPECT_EQ(-1, columnWidthUTF8("\x01")); + EXPECT_EQ(-1, columnWidthUTF8("\t")); EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01")); EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE @@ -84,6 +86,19 @@ TEST(Unicode, isPrintable) { EXPECT_TRUE(isPrintable(0x20000)); // CJK UNIFIED IDEOGRAPH-20000 EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter + + // test the validity of a fast path in columnWidthUTF8 + for (unsigned char c = 0; c < 128; ++c) { + const UTF8 buf8[2] = {c, 0}; + const UTF8 *Target8 = &buf8[0]; + UTF32 buf32[1]; + UTF32 *Target32 = &buf32[0]; + auto status = ConvertUTF8toUTF32(&Target8, Target8 + 1, &Target32, + Target32 + 1, strictConversion); + EXPECT_TRUE(status == conversionOK); + EXPECT_TRUE((columnWidthUTF8(reinterpret_cast<const char *>(buf8)) == 1) == + (bool)isPrintable(buf32[0])); + } } } // namespace _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits