Summary: Certain Unicode code points expand to more than two code points when run through u_strToUpper().
SQLite's src/ext/icu/icu.c contains icuCaseFunc16() which implements custom upper() and lower() functions. It allocates a buffer of twice the input size because some code points take more space when uppercased (or lowercased) than the input. Code points such as U+FB04 (ffl ligature) uppercase to _three_ code points, so this can lead to a buffer overflow because the result is not nul-terminated. The following patch catches the U_BUFFER_OVERFLOW_ERROR result and re-allocates to the actual size needed. CL patching Chromium is: https://codereview.chromium.org/1704103002/ -scott --- third_party/sqlite/src/ext/icu/icu.c | 31 +++++++++++++++++++++++++------ third_party/sqlite/src/test/icu.test | 7 +++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/third_party/sqlite/src/ext/icu/icu.c b/third_party/sqlite/src/ext/icu/icu.c index 7e2b800..d384f71 100644 --- a/third_party/sqlite/src/ext/icu/icu.c +++ b/third_party/sqlite/src/ext/icu/icu.c @@ -341,26 +341,45 @@ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ if( !zInput ){ return; } - nInput = sqlite3_value_bytes16(apArg[0]); + nOutput = nInput = sqlite3_value_bytes16(apArg[0]); - nOutput = nInput * 2 + 2; zOutput = sqlite3_malloc(nOutput); if( !zOutput ){ return; } if( sqlite3_user_data(p) ){ - u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); + nOutput = u_strToUpper( + zOutput, nOutput/2, zInput, nInput/2, zLocale, &status) * 2; }else{ - u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); + nOutput = u_strToLower( + zOutput, nOutput/2, zInput, nInput/2, zLocale, &status) * 2; } - if( !U_SUCCESS(status) ){ + if ( status == U_BUFFER_OVERFLOW_ERROR ) { + UChar* newOutput = sqlite3_realloc(zOutput, nOutput); + if( !newOutput ){ + sqlite3_free(zOutput); + return; + } + zOutput = newOutput; + status = U_ZERO_ERROR; + if( sqlite3_user_data(p) ){ + nOutput = u_strToUpper( + zOutput, nOutput/2, zInput, nInput/2, zLocale, &status) * 2; + }else{ + nOutput = u_strToLower( + zOutput, nOutput/2, zInput, nInput/2, zLocale, &status) * 2; + } + } + + if( U_FAILURE(status) ){ icuFunctionError(p, "u_strToLower()/u_strToUpper", status); + sqlite3_free(zOutput); return; } - sqlite3_result_text16(p, zOutput, -1, xFree); + sqlite3_result_text16(p, zOutput, nOutput, xFree); } /* diff --git a/third_party/sqlite/src/test/icu.test b/third_party/sqlite/src/test/icu.test index 73cb9b9..22948aa 100644 --- a/third_party/sqlite/src/test/icu.test +++ b/third_party/sqlite/src/test/icu.test @@ -56,6 +56,10 @@ set ::ograve "\xF2" # set ::szlig "\xDF" +# U+FB03 (ffi ligature) and U+FB04 (ffl ligature). They're uppercased +# to 'FFI' and 'FFL'. +set ::ffi_ffl "\ufb03\ufb04" + # Tests of the upper()/lower() functions. # test_expr icu-2.1 {i1='HellO WorlD'} {upper(i1)} {HELLO WORLD} @@ -72,6 +76,9 @@ test_expr icu-2.6 {i1=$::OGRAVE} {upper(i1)} $::OGRAVE test_expr icu-2.7 {i1=$::szlig} {upper(i1)} "SS" test_expr icu-2.8 {i1='SS'} {lower(i1)} "ss" +test_expr icu-2.9 {i1=$::ffi_ffl} {upper(i1)} "FFIFFL" +test_expr icu-2.10 {i1=$::ffi_ffl} {lower(i1)} $::ffi_ffl + # In turkish (locale="tr_TR"), the lower case version of I # is "small dotless i" (code point 0x131 (decimal 305)). # -- 2.7.0