On Mon, May 02, 2016 at 08:47:03PM +1200, Michael Cree wrote: > On Mon, May 02, 2016 at 12:01:18AM -0400, James McCoy wrote: > > Thanks for the analysis and working on a patch. I'll note that > > _Alignas, if it had worked, would likely have been too new a compiler > > feature to use. > > > > It looks like Debian's ia64 porterbox is still running, so I can try > > reproducing the problem there (using prctl to force SIGBUS) and then > > fiddling around with the code. > > Here goes further analysis. Running utf-test under gdb I set a break > point at line 807 of utf-test.c and after a number of iterations of > the enclosing loop I get: > > Breakpoint 1, test_utf_conversions (pool=0x20001018028) > at > /extra/mjc/debian/subversion/subversion-1.9.4/subversion/tests/libsvn_subr/utf-test.c:807 > 807 SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result)); > (gdb) print tc->source > $20 = 0x120003117 "\330\064\335\036" > (gdb) print result->data > $21 = 0x20001018218 "\360\220\204\236" > (gdb) print tc->result > $22 = 0x12000373c "\360\235\204\236" > (gdb) cont > Continuing. > svn_tests: E235000: In file > '/extra/mjc/debian/subversion/subversion-1.9.4/subversion/tests/libsvn_subr/utf-test.c' > line 807: assertion failed (0 == strcmp(result->data, tc->result)) > FAIL: lt-utf-test 9: test svn_utf__utf{16,32}_to_utf8 > [Inferior 1 (process 24827) exited with code 01] > > > > On this iteration I believe it has just read line 769 of utf-test.c, > i.e., this line: > > { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* > U+01D11E */ > > into variable tc, and it passes tc->source (which is at address > 0x120003117, i.e., misaligned for apr_uint16_t type) to > svn_utf__utf16_to_utf8() and the result is incorrect and results in > the failed assert.
Thanks (again) for the very thorough analysis. Sorry it's taken so long for me to circle back to this. If I understand correctly, the easiest fix would be to memcpy() tc->source to an appropriately sized apr_uint16_t/apr_int32_t array. That could then be used for the svn_utf__utf{16,32}_to_utf8() calls. If so, could you try the attached patch? Cheers, -- James GPG Key: 4096R/91BF BF4D 6956 BD5D F7B7 2D23 DFE6 91AE 331B A3DB
Index: trunk/subversion/tests/libsvn_subr/utf-test.c =================================================================== --- trunk/subversion/tests/libsvn_subr/utf-test.c (revision 1825748) +++ trunk/subversion/tests/libsvn_subr/utf-test.c (working copy) @@ -752,8 +752,10 @@ { svn_boolean_t sixteenbit; svn_boolean_t bigendian; + apr_size_t sourcelen; const char *source; const char *result; + svn_boolean_t counted; } tests[] = { #define UTF_32_LE FALSE, FALSE @@ -762,33 +764,37 @@ #define UTF_16_BE TRUE, TRUE /* Normal character conversion */ - { UTF_32_LE, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test" }, - { UTF_32_BE, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test" }, - { UTF_16_LE, "t\0" "e\0" "s\0" "t\0" "\0\0", "test" }, - { UTF_16_BE, "\0t" "\0e" "\0s" "\0t" "\0\0", "test" }, + { UTF_32_LE, 4, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test", FALSE }, + { UTF_32_BE, 4, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test", FALSE }, + { UTF_16_LE, 4, "t\0" "e\0" "s\0" "t\0" "\0\0", "test", FALSE }, + { UTF_16_BE, 4, "\0t" "\0e" "\0s" "\0t" "\0\0", "test", FALSE }, /* Valid surrogate pairs */ - { UTF_16_LE, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */ - { UTF_16_LE, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */ - { UTF_16_LE, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */ + { UTF_16_LE, 2, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */ + { UTF_16_LE, 2, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */ + { UTF_16_LE, 2, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */ - { UTF_16_BE, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */ - { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */ - { UTF_16_BE, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */ + { UTF_16_BE, 2, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */ + { UTF_16_BE, 2, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */ + { UTF_16_BE, 2, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */ /* Swapped, single and trailing surrogate pairs */ - { UTF_16_LE, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" }, - { UTF_16_LE, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*" }, - { UTF_16_LE, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*" }, - { UTF_16_LE, "\x1E\xDD" "\0\0", "\xed\xb4\x9e" }, - { UTF_16_LE, "\xFF\xDB" "\0\0", "\xed\xaf\xbf" }, + { UTF_16_LE, 4, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE }, + { UTF_16_LE, 3, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*", FALSE }, + { UTF_16_LE, 3, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*", FALSE }, + { UTF_16_LE, 1, "\x1E\xDD" "\0\0", "\xed\xb4\x9e", FALSE }, + { UTF_16_LE, 1, "\xFF\xDB" "\0\0", "\xed\xaf\xbf", FALSE }, - { UTF_16_BE, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" }, - { UTF_16_BE, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*" }, - { UTF_16_BE, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*" }, - { UTF_16_BE, "\xDD\x1E" "\0\0", "\xed\xb4\x9e" }, - { UTF_16_BE, "\xDB\xFF" "\0\0", "\xed\xaf\xbf" }, + { UTF_16_BE, 4, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE }, + { UTF_16_BE, 3, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*", FALSE }, + { UTF_16_BE, 3, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*", FALSE }, + { UTF_16_BE, 1, "\xDD\x1E" "\0\0", "\xed\xb4\x9e", FALSE }, + { UTF_16_BE, 1, "\xDB\xFF" "\0\0", "\xed\xaf\xbf", FALSE }, + /* Counted strings with NUL characters */ + { UTF_16_LE, 3, "x\0" "\0\0" "y\0" "*\0", "x\0y", TRUE }, + { UTF_32_BE, 3, "\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*", "x\0y", TRUE }, + #undef UTF_32_LE #undef UTF_32_BE #undef UTF_16_LE @@ -799,33 +805,35 @@ const struct cvt_test_t *tc; const svn_string_t *result; - int i; +#define SRCLEN 5 + apr_uint16_t source16[SRCLEN]; + apr_int32_t source32[SRCLEN]; - for (i = 1, tc = tests; tc->source; ++tc, ++i) + for (tc = tests; tc->source; ++tc) { if (tc->sixteenbit) - SVN_ERR(svn_utf__utf16_to_utf8(&result, (const void*)tc->source, - SVN_UTF__UNKNOWN_LENGTH, - tc->bigendian, pool, pool)); + { + memset(&source16, 0, SRCLEN * sizeof(*source16)); + memcpy(&source16, tc->source, (tc->sourcelen + 1) * sizeof(*source16)); + SVN_ERR(svn_utf__utf16_to_utf8(&result, source16, + tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH, + tc->bigendian, pool, pool)); + } else - SVN_ERR(svn_utf__utf32_to_utf8(&result, (const void*)tc->source, - SVN_UTF__UNKNOWN_LENGTH, - tc->bigendian, pool, pool)); - SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result)); + { + memset(&source32, 0, SRCLEN * sizeof(*source32)); + memcpy(&source32, tc->source, (tc->sourcelen + 1) * sizeof(*source32)); + SVN_ERR(svn_utf__utf32_to_utf8(&result, source32, + tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH, + tc->bigendian, pool, pool)); + } + if (tc->counted) + SVN_ERR_ASSERT(0 == memcmp(result->data, tc->result, tc->sourcelen)); + else + SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result)); } +#undef SRCLEN - /* Test counted strings with NUL characters */ - SVN_ERR(svn_utf__utf16_to_utf8( - &result, (void*)("x\0" "\0\0" "y\0" "*\0"), 3, - FALSE, pool, pool)); - SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3)); - - SVN_ERR(svn_utf__utf32_to_utf8( - &result, - (void*)("\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*"), 3, - TRUE, pool, pool)); - SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3)); - return SVN_NO_ERROR; }