Hi, Valgrind memory checker detects the following access to uninitialized memory:
==15698== Conditional jump or move depends on uninitialised value(s) ==15698== at 0x811CC12: utfc_ptr2len (mbyte.c:1709) ==15698== by 0x805CB9F: str_foldcase (charset.c:493) ==15698== by 0x819AEDE: check_keyword_id (syntax.c:3192) ==15698== by 0x8198DD9: syn_current_attr (syntax.c:1907) ==15698== by 0x8198B19: get_syntax_attr (syntax.c:1771) ==15698== by 0x8165729: win_line (screen.c:3895) ==15698== by 0x81614E5: win_update (screen.c:1765) ==15698== by 0x815F6F0: update_screen (screen.c:522) ==15698== by 0x80D4CD6: vgetorpeek (getchar.c:2672) ==15698== by 0x80D396F: vgetc (getchar.c:1710) ==15698== by 0x80D3A15: safe_vgetc (getchar.c:1757) ==15698== by 0x8063FB0: edit (edit.c:711) ==15698== by 0x812DDB0: invoke_edit (normal.c:8813) ==15698== by 0x812DD56: nv_edit (normal.c:8786) ==15698== by 0x81216DE: normal_cmd (normal.c:1152) ==15698== by 0x80E4E2E: main_loop (main.c:1177) ==15698== by 0x80E497E: main (main.c:936) (...then follow many other errors...) I can reproduce it 100% of the time but the way to reproduce it is too complicated to attempt to explain it here. I have not found a simpler test case. Attached patch should hopefully suffice to clarify the problem anyway. The code in charset.c is: 447 #ifdef FEAT_MBYTE 448 if (enc_utf8 || (has_mbyte && MB_BYTE2LEN(STR_CHAR(i)) > 1)) 449 { 450 if (enc_utf8) 451 { 452 int c, lc; 453 454 c = utf_ptr2char(STR_PTR(i)); 455 lc = utf_tolower(c); 456 if (c != lc) 457 { 458 int ol = utf_char2len(c); 459 int nl = utf_char2len(lc); 460 461 /* If the byte length changes need to shift the following 462 * characters forward or backward. */ 463 if (ol != nl) 464 { 465 if (nl > ol) 466 { 467 if (buf == NULL ? ga_grow(&ga, nl - ol + 1) == FAIL 468 : len + nl - ol >= buflen) 469 { 470 /* out of memory, keep old char */ 471 lc = c; 472 nl = ol; 473 } 474 } 475 if (ol != nl) 476 { 477 if (buf == NULL) 478 { 479 STRMOVE(GA_PTR(i) + nl, GA_PTR(i) + ol); 480 ga.ga_len += nl - ol; 481 } 482 else 483 { 484 STRMOVE(buf + i + nl, buf + i + ol); 485 len += nl - ol; 486 } 487 } 488 } !489 (void)utf_char2bytes(lc, STR_PTR(i)); 490 } 491 } 492 /* skip to next multi-byte char */ !493 i += (*mb_ptr2len)(STR_PTR(i)); 494 } 495 else 496 #endif Bug happens when string STR_PTR(i) contains a truncated utf-8 sequence. In that case, utf_ptr2char(STR_PTR(i)) at line 454 returns only the first byte of the truncated utf-8 sequence. If this first byte is >= 0x80, then utf_char2len(c) at line 458 sets 'ol' to something > 1 (which is inconsistent with the fact that line 454 only returned the first byte). Then call to utf_char2bytes(lc, STR_PTR(i)) at line 489 can write multiple bytes, hence overwriting NUL end of string and then function keep accessing several bytes beyond end of string (which is uninitialized). Attached patch fixes it. It uses utf_ptr2len(STR_PTR(i)) rather than utf_char2len(c) to compute the length of the utf-8 sequence, so invalid utf-8 sequence can be detected and skipped (converting case for invalid utf-8 sequence does not make sense anyway). I'm using Vim-7.2a BETA (huge) with patches 1-5 on Linux x86, utf-8 locale. -- Dominique --~--~---------~--~----~------------~-------~--~----~ You received this message from the "vim_dev" maillist. For more information, visit http://www.vim.org/maillist.php -~----------~----~----~----~------~----~------~--~---
Index: charset.c =================================================================== RCS file: /cvsroot/vim/vim7/src/charset.c,v retrieving revision 1.29 diff -c -r1.29 charset.c *** charset.c 24 Jun 2008 21:25:31 -0000 1.29 --- charset.c 28 Jun 2008 07:50:55 -0000 *************** *** 449,461 **** { if (enc_utf8) { ! int c, lc; ! ! c = utf_ptr2char(STR_PTR(i)); ! lc = utf_tolower(c); ! if (c != lc) { - int ol = utf_char2len(c); int nl = utf_char2len(lc); /* If the byte length changes need to shift the following --- 449,468 ---- { if (enc_utf8) { ! int c = utf_ptr2char(STR_PTR(i)); ! int ol = utf_ptr2len(STR_PTR(i)); ! int lc = utf_tolower(c); ! ! /* Above call to utf_ptr2char(STR_PTR(i)) may return the ! * first byte when STR_PTR(i) points to an invalid/truncated ! * utf-8 sequence. Guard against invalid/truncated utf-8 ! * sequence or else call to utf_char2bytes(lc, STR_PTR(i)) ! * could overwrite NUL and then function would access beyond ! * end of string. Converting case does not make sense for ! * invalid/truncated utf-8 sequence anyway. */ ! if (c != lc ! && (c < 0x80 || ol > 1)) /* valid utf-8 sequence */ { int nl = utf_char2len(lc); /* If the byte length changes need to shift the following