On Mon, May 02, 2016 at 08:47:03PM +1200, Michael Cree wrote:
> On Mon, May 02, 2016 at 12:01:18AM -0400, James McCoy wrote:
> > Thanks for the analysis and working on a patch.  I'll note that
> > _Alignas, if it had worked, would likely have been too new a compiler
> > feature to use.
> > 
> > It looks like Debian's ia64 porterbox is still running, so I can try
> > reproducing the problem there (using prctl to force SIGBUS) and then
> > fiddling around with the code.
> 
> Here goes further analysis.  Running utf-test under gdb I set a break
> point at line 807 of utf-test.c and after a number of iterations of
> the enclosing loop I get:
> 
> Breakpoint 1, test_utf_conversions (pool=0x20001018028)
>     at 
> /extra/mjc/debian/subversion/subversion-1.9.4/subversion/tests/libsvn_subr/utf-test.c:807
> 807         SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
> (gdb) print tc->source
> $20 = 0x120003117 "\330\064\335\036"
> (gdb) print result->data
> $21 = 0x20001018218 "\360\220\204\236"
> (gdb) print tc->result
> $22 = 0x12000373c "\360\235\204\236"
> (gdb) cont
> Continuing.
> svn_tests: E235000: In file 
> '/extra/mjc/debian/subversion/subversion-1.9.4/subversion/tests/libsvn_subr/utf-test.c'
>  line 807: assertion failed (0 == strcmp(result->data, tc->result))
> FAIL:  lt-utf-test 9: test svn_utf__utf{16,32}_to_utf8
> [Inferior 1 (process 24827) exited with code 01]
> 
> 
> 
> On this iteration I believe it has just read line 769 of utf-test.c,
> i.e., this line:
> 
>     { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* 
> U+01D11E */
> 
> into variable tc, and it passes tc->source (which is at address
> 0x120003117, i.e., misaligned for apr_uint16_t type) to
> svn_utf__utf16_to_utf8() and the result is incorrect and results in
> the failed assert.

Thanks (again) for the very thorough analysis.  Sorry it's taken so long
for me to circle back to this.

If I understand correctly, the easiest fix would be to memcpy()
tc->source to an appropriately sized apr_uint16_t/apr_int32_t array.
That could then be used for the svn_utf__utf{16,32}_to_utf8() calls.

If so, could you try the attached patch?

Cheers,
-- 
James
GPG Key: 4096R/91BF BF4D 6956 BD5D F7B7  2D23 DFE6 91AE 331B A3DB
Index: trunk/subversion/tests/libsvn_subr/utf-test.c
===================================================================
--- trunk/subversion/tests/libsvn_subr/utf-test.c	(revision 1825748)
+++ trunk/subversion/tests/libsvn_subr/utf-test.c	(working copy)
@@ -752,8 +752,10 @@
   {
     svn_boolean_t sixteenbit;
     svn_boolean_t bigendian;
+    apr_size_t sourcelen;
     const char *source;
     const char *result;
+    svn_boolean_t counted;
   } tests[] = {
 
 #define UTF_32_LE FALSE, FALSE
@@ -762,33 +764,37 @@
 #define UTF_16_BE TRUE, TRUE
 
     /* Normal character conversion */
-    { UTF_32_LE, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test" },
-    { UTF_32_BE, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test" },
-    { UTF_16_LE, "t\0" "e\0" "s\0" "t\0" "\0\0", "test" },
-    { UTF_16_BE, "\0t" "\0e" "\0s" "\0t" "\0\0", "test" },
+    { UTF_32_LE, 4, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test", FALSE },
+    { UTF_32_BE, 4, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test", FALSE },
+    { UTF_16_LE, 4, "t\0" "e\0" "s\0" "t\0" "\0\0", "test", FALSE },
+    { UTF_16_BE, 4, "\0t" "\0e" "\0s" "\0t" "\0\0", "test", FALSE },
 
     /* Valid surrogate pairs */
-    { UTF_16_LE, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
-    { UTF_16_LE, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
-    { UTF_16_LE, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+    { UTF_16_LE, 2, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */
+    { UTF_16_LE, 2, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */
+    { UTF_16_LE, 2, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */
 
-    { UTF_16_BE, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
-    { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
-    { UTF_16_BE, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+    { UTF_16_BE, 2, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80", FALSE }, /* U+010000 */
+    { UTF_16_BE, 2, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e", FALSE }, /* U+01D11E */
+    { UTF_16_BE, 2, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd", FALSE }, /* U+10FFFD */
 
     /* Swapped, single and trailing surrogate pairs */
-    { UTF_16_LE, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
-    { UTF_16_LE, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*" },
-    { UTF_16_LE, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*" },
-    { UTF_16_LE, "\x1E\xDD" "\0\0", "\xed\xb4\x9e" },
-    { UTF_16_LE, "\xFF\xDB" "\0\0", "\xed\xaf\xbf" },
+    { UTF_16_LE, 4, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE },
+    { UTF_16_LE, 3, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*", FALSE },
+    { UTF_16_LE, 3, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*", FALSE },
+    { UTF_16_LE, 1, "\x1E\xDD" "\0\0", "\xed\xb4\x9e", FALSE },
+    { UTF_16_LE, 1, "\xFF\xDB" "\0\0", "\xed\xaf\xbf", FALSE },
 
-    { UTF_16_BE, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
-    { UTF_16_BE, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*" },
-    { UTF_16_BE, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*" },
-    { UTF_16_BE, "\xDD\x1E" "\0\0", "\xed\xb4\x9e" },
-    { UTF_16_BE, "\xDB\xFF" "\0\0", "\xed\xaf\xbf" },
+    { UTF_16_BE, 4, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*", FALSE },
+    { UTF_16_BE, 3, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*", FALSE },
+    { UTF_16_BE, 3, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*", FALSE },
+    { UTF_16_BE, 1, "\xDD\x1E" "\0\0", "\xed\xb4\x9e", FALSE },
+    { UTF_16_BE, 1, "\xDB\xFF" "\0\0", "\xed\xaf\xbf", FALSE },
 
+    /* Counted strings with NUL characters */
+    { UTF_16_LE, 3, "x\0" "\0\0" "y\0" "*\0", "x\0y", TRUE },
+    { UTF_32_BE, 3, "\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*", "x\0y", TRUE },
+
 #undef UTF_32_LE
 #undef UTF_32_BE
 #undef UTF_16_LE
@@ -799,33 +805,35 @@
 
   const struct cvt_test_t *tc;
   const svn_string_t *result;
-  int i;
+#define SRCLEN 5
+  apr_uint16_t source16[SRCLEN];
+  apr_int32_t source32[SRCLEN];
 
-  for (i = 1, tc = tests; tc->source; ++tc, ++i)
+  for (tc = tests; tc->source; ++tc)
     {
       if (tc->sixteenbit)
-        SVN_ERR(svn_utf__utf16_to_utf8(&result, (const void*)tc->source,
-                                       SVN_UTF__UNKNOWN_LENGTH,
-                                       tc->bigendian, pool, pool));
+        {
+          memset(&source16, 0, SRCLEN * sizeof(*source16));
+          memcpy(&source16, tc->source, (tc->sourcelen + 1) * sizeof(*source16));
+          SVN_ERR(svn_utf__utf16_to_utf8(&result, source16,
+                                         tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH,
+                                         tc->bigendian, pool, pool));
+        }
       else
-        SVN_ERR(svn_utf__utf32_to_utf8(&result, (const void*)tc->source,
-                                       SVN_UTF__UNKNOWN_LENGTH,
-                                       tc->bigendian, pool, pool));
-      SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
+        {
+          memset(&source32, 0, SRCLEN * sizeof(*source32));
+          memcpy(&source32, tc->source, (tc->sourcelen + 1) * sizeof(*source32));
+          SVN_ERR(svn_utf__utf32_to_utf8(&result, source32,
+                                         tc->counted ? tc->sourcelen : SVN_UTF__UNKNOWN_LENGTH,
+                                         tc->bigendian, pool, pool));
+        }
+      if (tc->counted)
+        SVN_ERR_ASSERT(0 == memcmp(result->data, tc->result, tc->sourcelen));
+      else
+        SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
     }
+#undef SRCLEN
 
-  /* Test counted strings with NUL characters */
-  SVN_ERR(svn_utf__utf16_to_utf8(
-              &result, (void*)("x\0" "\0\0" "y\0" "*\0"), 3,
-              FALSE, pool, pool));
-  SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
-
-  SVN_ERR(svn_utf__utf32_to_utf8(
-              &result,
-              (void*)("\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*"), 3,
-              TRUE, pool, pool));
-  SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
-
   return SVN_NO_ERROR;
 }
 

Reply via email to