raster pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=abc146f37f87200142d44d0feab8986dd219e1fd

commit abc146f37f87200142d44d0feab8986dd219e1fd
Author: Boris Faure <bill...@gmail.com>
Date:   Sat Jun 20 11:37:56 2020 +0100

    eina_unicode: have explicit type conversions
    
    Summary:
    Found by running terminology's tests with UBSAN:
    include/eina-1/eina/eina_inline_unicode.x:
    runtime error: implicit conversion from type 'char' of value
    -62 (8-bit, signed) to type 'unsigned char' changed the value to 194
    (8-bit, unsigned)
    
    Reviewers: #reviewers, vtorri
    
    Subscribers: cedric, #reviewers, #committers
    
    Tags: #efl
    
    Differential Revision: https://phab.enlightenment.org/D11972
---
 src/lib/eina/eina_inline_unicode.x |  7 +++---
 src/lib/eina/eina_unicode.c        | 48 ++++++++++++++++++++++++--------------
 2 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/lib/eina/eina_inline_unicode.x 
b/src/lib/eina/eina_inline_unicode.x
index 7b7f7592b4..08e1e15018 100644
--- a/src/lib/eina/eina_inline_unicode.x
+++ b/src/lib/eina/eina_inline_unicode.x
@@ -40,7 +40,7 @@ eina_unicode_utf8_next_get(const char *buf, int *iindex)
    ind = *iindex;
 
    /* if this char is the null terminator, exit */
-   if ((d = buf[ind++]) == 0) return 0;
+   if ((d = (unsigned char)buf[ind++]) == 0) return 0;
 
    if ((d & 0x80) == 0)
      { // 1 byte (7bit) - 0xxxxxxx
@@ -51,7 +51,8 @@ eina_unicode_utf8_next_get(const char *buf, int *iindex)
    if ((d & 0xe0) == 0xc0)
      { // 2 byte (11bit) - 110xxxxx 10xxxxxx
         r  = (d & 0x1f) << 6;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f);
         if (r <= 0x7F) goto error;
@@ -65,7 +66,7 @@ eina_unicode_utf8_next_get(const char *buf, int *iindex)
  * we just use the invalid unicode codepoints 8 lower bits represent
  * the original char */
 error:
-   d = buf[*iindex];
+   d = (unsigned char)buf[*iindex];
    (*iindex)++;
    return ERROR_REPLACEMENT_BASE | d;
 }
diff --git a/src/lib/eina/eina_unicode.c b/src/lib/eina/eina_unicode.c
index 0bb70ffae9..6ede02ccec 100644
--- a/src/lib/eina/eina_unicode.c
+++ b/src/lib/eina/eina_unicode.c
@@ -193,7 +193,7 @@ eina_unicode_escape(const Eina_Unicode *str)
 EAPI Eina_Unicode
 _eina_unicode_utf8_next_get(int ind,
                             unsigned char d,
-                            const char *buf, 
+                            const char *buf,
                            int *iindex)
 {
    Eina_Unicode r;
@@ -201,10 +201,12 @@ _eina_unicode_utf8_next_get(int ind,
    if ((d & 0xf0) == 0xe0)
      { // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
         r  = (d & 0x0f) << 12;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 6;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f);
         if (r <= 0x7FF) goto error;
@@ -214,13 +216,16 @@ _eina_unicode_utf8_next_get(int ind,
    if ((d & 0xf8) == 0xf0)
      { // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
         r  = (d & 0x07) << 18;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 12;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 6;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f);
         if (r <= 0xFFFF) goto error;
@@ -230,16 +235,20 @@ _eina_unicode_utf8_next_get(int ind,
    if ((d & 0xfc) == 0xf8)
      { // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
         r  = (d & 0x03) << 24;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 18;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 12;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 6;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f);
         if (r <= 0x1FFFFF) goto error;
@@ -249,19 +258,24 @@ _eina_unicode_utf8_next_get(int ind,
    if ((d & 0xfe) == 0xfc)
      { // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 
10xxxxxx
         r  = (d & 0x01) << 30;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0)
+            || EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 24;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char) buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 18;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 12;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f) << 6;
-        if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
+        if (((d = (unsigned char)buf[ind++]) == 0) ||
+            EINA_IS_INVALID_BYTE(d) ||
             !EINA_IS_CONTINUATION_BYTE(d)) goto error;
         r |= (d & 0x3f);
         if (r <= 0x3FFFFFF) goto error;
@@ -273,7 +287,7 @@ _eina_unicode_utf8_next_get(int ind,
  * we just use the invalid unicode codepoints 8 lower bits represent
  * the original char */
 error:
-   d = buf[*iindex];
+   d = (unsigned char)buf[*iindex];
    (*iindex)++;
    return ERROR_REPLACEMENT_BASE | d;
 }
@@ -298,7 +312,7 @@ eina_unicode_utf8_get_prev(const char *buf, int *iindex)
    /* Next advance iindex to previous codepoint */
    ind = *iindex;
    ind--;
-   while ((ind > 0) && ((buf[ind] & 0xc0) == 0x80))
+   while ((ind > 0) && (((unsigned char)buf[ind] & 0xc0) == 0x80))
      ind--;
 
    *iindex = ind;

-- 


Reply via email to