This test has been failing for some time on Debian's alpha buildd[0].
One of the alpha porters looked into it and gave a thorough diagnosis[1]
of the issue which basically boils down to: using a char* as
apr_uint16_t*/apr_int32_t* is going to result in unaligned access.
In a (much belated) reply[2], I proposed the attached patch which
memcpy()s the string to an array of the appropriate type before passing
it through to svn_utf__utf{16,32}_to_utf8(). I also took the
opportunity to consolidate the handing of counted and non-counted
conversions.
Does this look appropriate? If so, I can commit to trunk and nominate
for 1.10.
[0]: https://buildd.debian.org/status/logs.php?pkg=subversion&arch=alpha
[1]: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=823133#22
[2]: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=823133#27
Cheers,
--
James
GPG Key: 4096R/91BF BF4D 6956 BD5D F7B7 2D23 DFE6 91AE 331B A3DB
Index: trunk/subversion/tests/libsvn_subr/utf-test.c
===================================================================
--- trunk/subversion/tests/libsvn_subr/utf-test.c (revision 1825748)
+++ trunk/subversion/tests/libsvn_subr/utf-test.c (working copy)
@@ -752,8 +752,10 @@
{
svn_boolean_t sixteenbit;
svn_boolean_t bigendian;
+ apr_size_t sourcelen;
const char *source;
const char *result;
+ svn_boolean_t counted;
} tests[] = {
#define UTF_32_LE FALSE, FALSE
@@ -762,33 +764,37 @@
#define UTF_16_BE TRUE, TRUE
/* Normal character conversion */
- { UTF_32_LE, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test" },
- { UTF_32_BE, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test" },
- { UTF_16_LE, "t\0" "e\0" "s\0" "t\0" "\0\0", "test" },
- { UTF_16_BE, "\0t" "\0e" "\0s" "\0t" "\0\0", "test" },
+ { UTF_32_LE, 4, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test", FALSE },
+ { UTF_32_BE, 4, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test", FALSE },
+ { UTF_16_LE, 4, "t\0" "e\0" "s\0" "t\0" "\0\0", "test", FALSE },
+ { UTF_16_BE, 4, "\0t" "\0e" "\0s" "\0t" "\0\0", "test", FALSE },
/* Valid surrogate pairs */
- { UTF_16_LE, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
- { UTF_16_LE, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
- { UTF_16_LE, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+ { UTF_16_LE, 2, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */
+ { UTF_16_LE, 2, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */
+ { UTF_16_LE, 2, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */
- { UTF_16_BE, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
- { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
- { UTF_16_BE, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+ { UTF_16_BE, 2, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */
+ { UTF_16_BE, 2, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */
+ { UTF_16_BE, 2, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */
/* Swapped, single and trailing surrogate pairs */
- { UTF_16_LE, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
- { UTF_16_LE, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*" },
- { UTF_16_LE, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*" },
- { UTF_16_LE, "\x1E\xDD" "\0\0", "\xed\xb4\x9e" },
- { UTF_16_LE, "\xFF\xDB" "\0\0", "\xed\xaf\xbf" },
+ { UTF_16_LE, 4, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE },
+ { UTF_16_LE, 3, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*", FALSE },
+ { UTF_16_LE, 3, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*", FALSE },
+ { UTF_16_LE, 1, "\x1E\xDD" "\0\0", "\xed\xb4\x9e", FALSE },
+ { UTF_16_LE, 1, "\xFF\xDB" "\0\0", "\xed\xaf\xbf", FALSE },
- { UTF_16_BE, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
- { UTF_16_BE, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*" },
- { UTF_16_BE, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*" },
- { UTF_16_BE, "\xDD\x1E" "\0\0", "\xed\xb4\x9e" },
- { UTF_16_BE, "\xDB\xFF" "\0\0", "\xed\xaf\xbf" },
+ { UTF_16_BE, 4, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE },
+ { UTF_16_BE, 3, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*", FALSE },
+ { UTF_16_BE, 3, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*", FALSE },
+ { UTF_16_BE, 1, "\xDD\x1E" "\0\0", "\xed\xb4\x9e", FALSE },
+ { UTF_16_BE, 1, "\xDB\xFF" "\0\0", "\xed\xaf\xbf", FALSE },
+ /* Counted strings with NUL characters */
+ { UTF_16_LE, 3, "x\0" "\0\0" "y\0" "*\0", "x\0y", TRUE },
+ { UTF_32_BE, 3, "\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*", "x\0y", TRUE },
+
#undef UTF_32_LE
#undef UTF_32_BE
#undef UTF_16_LE
@@ -799,33 +805,35 @@
const struct cvt_test_t *tc;
const svn_string_t *result;
- int i;
+#define SRCLEN 5
+ apr_uint16_t source16[SRCLEN];
+ apr_int32_t source32[SRCLEN];
- for (i = 1, tc = tests; tc->source; ++tc, ++i)
+ for (tc = tests; tc->source; ++tc)
{
if (tc->sixteenbit)
- SVN_ERR(svn_utf__utf16_to_utf8(&result, (const void*)tc->source,
- SVN_UTF__UNKNOWN_LENGTH,
- tc->bigendian, pool, pool));
+ {
+ memset(&source16, 0, SRCLEN * sizeof(*source16));
+ memcpy(&source16, tc->source, (tc->sourcelen + 1) * sizeof(*source16));
+ SVN_ERR(svn_utf__utf16_to_utf8(&result, source16,
+ tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH,
+ tc->bigendian, pool, pool));
+ }
else
- SVN_ERR(svn_utf__utf32_to_utf8(&result, (const void*)tc->source,
- SVN_UTF__UNKNOWN_LENGTH,
- tc->bigendian, pool, pool));
- SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
+ {
+ memset(&source32, 0, SRCLEN * sizeof(*source32));
+ memcpy(&source32, tc->source, (tc->sourcelen + 1) * sizeof(*source32));
+ SVN_ERR(svn_utf__utf32_to_utf8(&result, source32,
+ tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH,
+ tc->bigendian, pool, pool));
+ }
+ if (tc->counted)
+ SVN_ERR_ASSERT(0 == memcmp(result->data, tc->result, tc->sourcelen));
+ else
+ SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
}
+#undef SRCLEN
- /* Test counted strings with NUL characters */
- SVN_ERR(svn_utf__utf16_to_utf8(
- &result, (void*)("x\0" "\0\0" "y\0" "*\0"), 3,
- FALSE, pool, pool));
- SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
-
- SVN_ERR(svn_utf__utf32_to_utf8(
- &result,
- (void*)("\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*"), 3,
- TRUE, pool, pool));
- SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
-
return SVN_NO_ERROR;
}