On Wed, Dec 14, 2011 at 02:02:33PM +0400, Alexander V. Lukyanov wrote:
> It means that it is possible to avoid is_cjk_encoding() calling at all,
> because uc_width only uses encoding for cjk checking and uc_width is only
> called by wcwidth for UTF-8 case (which is not a cjk encoding).
Here is a patch for this. Not a big optimization, though.
--
Alexander.
diff --git a/lib/uniwidth.in.h b/lib/uniwidth.in.h
index e806744..0af2359 100644
--- a/lib/uniwidth.in.h
+++ b/lib/uniwidth.in.h
@@ -44,6 +44,13 @@ extern int
#endif
;
+extern int
+ uc_width1 (ucs4_t uc)
+#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+ __attribute__ ((__pure__))
+#endif
+ ;
+
/* Determine number of column positions required for first N units
(or fewer if S ends before this) in S. */
extern int
diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c
index a314e71..00c7789 100644
--- a/lib/uniwidth/width.c
+++ b/lib/uniwidth/width.c
@@ -312,7 +312,7 @@ static const signed char nonspacing_table_ind[240] = {
/* Determine number of column positions required for UC. */
int
-uc_width (ucs4_t uc, const char *encoding)
+uc_width1 (ucs4_t uc)
{
/* Test for non-spacing or control character. */
if ((uc >> 9) < 240)
@@ -359,10 +359,17 @@ uc_width (ucs4_t uc, const char *encoding)
|| (uc >= 0x30000 && uc <= 0x3ffff) /* Tertiary Ideographic Plane */
) )
return 2;
+ return 1;
+}
+
+int
+uc_width (ucs4_t uc, const char *encoding)
+{
+ int w = uc_width1 (uc);
/* In ancient CJK encodings, Cyrillic and most other characters are
double-width as well. */
if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9
- && is_cjk_encoding (encoding))
+ && w == 1 && is_cjk_encoding (encoding))
return 2;
- return 1;
+ return w;
}
diff --git a/lib/wcwidth.c b/lib/wcwidth.c
index a006ca7..1b5fb36 100644
--- a/lib/wcwidth.c
+++ b/lib/wcwidth.c
@@ -36,7 +36,7 @@ wcwidth (wchar_t wc)
{
/* We assume that in a UTF-8 locale, a wide character is the same as a
Unicode character. */
- return uc_width (wc, encoding);
+ return uc_width1 (wc);
}
else
{