I have enhanced the collation test, 22.locale.collate.cpp with a bunch of cases that deal with embedded strings, inside the input strings as well as at the edges. More defects became apparent, and they have been fixed.

I have re-worked the src/collate.cpp patch and I am attaching it here. All collation tests (old and new) pass. If there are no objections, I will check it in later in the week.

Thanks,
Liviu

On 10/13/12 11:16, Liviu Nicoara wrote:
The initial patch does not pass the following test case. Have re-worked
the patch and attached it to the incident, and I am also attaching it
here. It passes all collate tests.



Index: tests/localization/22.locale.collate.cpp
===================================================================
--- tests/localization/22.locale.collate.cpp    (revision 1397847)
+++ tests/localization/22.locale.collate.cpp    (working copy)
@@ -1029,7 +1029,105 @@ check_hash_eff (const char* charTname)
 
 template <class charT>
 void
-check_NUL_locale (const char* charTname, const char* locname)
+check_NUL_transform (const char* charTname, const char* locname,
+                     const charT* src, size_t src_len,
+                     const char*  pat, size_t pat_len)
+{
+    std::locale loc (locname);
+
+    typedef typename std::collate<charT> Collate;
+    typedef typename Collate::string_type String;
+
+    const Collate &col = std::use_facet<Collate> (loc);
+
+    String s = col.transform (src, src + src_len);    
+
+    for (size_t i = 0, j = 0; i < pat_len; ++i) { 
+
+        if (j >= s.size ()) {
+            rw_assert (false, __FILE__, __LINE__,
+                       "collate<%s>::transform (%{*.*Ac}) ->  %{*.*Ac}"
+                       ", incomplete transformation (locale \"%s\")", 
+                       charTname, sizeof (charT), src_len, src, 
+                       sizeof (charT), s.size (), s.c_str (), locname);
+            break;
+        }
+
+        switch (pat [i]) {
+        case 0:
+            rw_assert (j < s.size () && 0 == s [j], __FILE__, __LINE__,
+                       "collate<%s>::transform (\"%{*.*Ac}\") ->  \"%{*.*Ac}\""
+                       ", expected \\0, got %d in position %lu (locale 
\"%s\")", 
+                       charTname, sizeof (charT), src_len, src, sizeof 
(charT), 
+                       s.size (), s.c_str (), s [j], j, locname);
+            ++j;
+            break;
+
+        case '*':
+            rw_assert (j < s.size () && s [j], __FILE__, __LINE__,
+                       "collate<%s>::transform (\"%{*.*Ac}\") ->  \"%{*.*Ac}\""
+                       ", got NUL in position %lu (locale \"%s\")", charTname,
+                       sizeof (charT), src_len, src, sizeof (charT), s.size 
(), 
+                       s.c_str (), j, locname);
+            for (; j < s.size () && s [j]; ++j) ;
+            break;
+
+        default:
+            break;
+        }
+    }
+}
+
+static void
+check_NUL_transform_narrow (const char* charTname, const char* locname)
+{
+#define T(src, pat)                                         \
+    check_NUL_transform (charTname, locname,                \
+                         src, sizeof src / sizeof *src - 1, \
+                         pat, sizeof pat / sizeof *pat - 1)
+
+    T ("\0",        "\0"       );
+    T ("\0\0",      "\0\0"     );
+    T ("\0\0\0",    "\0\0\0"   );
+    T ("a\0",       "*\0"      );
+    T ("\0a\0",     "\0*\0"    );
+    T ("\0a\0",     "\0*\0"    );
+    T ("a\0\0",     "*\0\0"    );
+    T ("a\0\0\0",   "*\0\0\0"  );
+    T ("a\0b",      "*\0*"     );
+    T ("a\0b\0",    "*\0*\0"   );
+    T ("a\0b\0\0",  "*\0*\0\0" );
+}
+
+static void
+check_NUL_transform_wide (const char* charTname, const char* locname)
+{
+    T (L"\0",       "\0"       );
+    T (L"\0\0",     "\0\0"     );
+    T (L"\0\0\0",   "\0\0\0"   );
+    T (L"a\0",      "*\0"      );
+    T (L"\0a\0",    "\0*\0"    );
+    T (L"\0a\0",    "\0*\0"    );
+    T (L"a\0\0",    "*\0\0"    );
+    T (L"a\0\0\0",  "*\0\0\0"  );
+    T (L"a\0b",     "*\0*"     );
+    T (L"a\0b\0",   "*\0*\0"   );
+    T (L"a\0b\0\0", "*\0*\0\0" );
+
+#undef T
+}
+
+template <class charT>
+void
+check_NUL_transform (const char* charTname, const char* locname)
+{
+    check_NUL_transform_narrow (charTname, locname);
+    check_NUL_transform_wide   (charTname, locname);
+}
+
+template <class charT>
+void
+check_NUL_collate (const char* charTname, const char* locname)
 {
     std::locale loc (locname);
 
@@ -1090,6 +1188,14 @@ check_NUL_locale (const char* charTname,
 
 template <class charT>
 void
+check_NUL (const char* charTname, const char* locname)
+{
+    check_NUL_transform< charT > (charTname, locname);
+    check_NUL_collate< charT >   (charTname, locname);
+}
+
+template <class charT>
+void
 check_NUL (const char* charTname)
 {
     // Verify that the collate facet correctly handles character
@@ -1103,7 +1209,7 @@ check_NUL (const char* charTname)
     for (const char* locname = rw_locales (LC_COLLATE); 
          *locname; locname += std::strlen (locname) + 1, ++i) {
         try {
-            check_NUL_locale<charT> (charTname, locname);
+            check_NUL<charT> (charTname, locname);
         }
         catch (...) {
         }
@@ -1135,7 +1241,6 @@ run_test (int /*argc*/, char* /*argv*/ [
     return 0;
 }
 
-
 int
 main (int argc, char* argv [])
 {

Index: src/collate.cpp
===================================================================
--- src/collate.cpp     (revision 1397825)
+++ src/collate.cpp     (working copy)
@@ -488,6 +488,8 @@ __rw_strnxfrm (const char *src, size_t n
 
     while (nchars) {
 
+        if (src [0]) {
+
         // using a C-style cast instead of static_cast to avoid
         // a gcc 2.95.2 bug causing an error on some platforms:
         //   static_cast from `void *' to `const char *'
@@ -515,7 +517,6 @@ __rw_strnxfrm (const char *src, size_t n
             nchars        = 0;
         }
         else {
-
             // terminating NUL found in the source buffer
             nchars -= (last - src) + 1;
             psrc    = _RWSTD_CONST_CAST (char*, src);
@@ -531,7 +532,7 @@ __rw_strnxfrm (const char *src, size_t n
         // it's buggy (such as MSVC's) and tries to write to
         // the buffer even if it's 0
         char just_in_case_buf [8];
-#endif
+#endif // _RWSTD_OS_SUNOS
 
         const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0);
 
@@ -547,7 +548,7 @@ __rw_strnxfrm (const char *src, size_t n
 
         _TRY {
             // resize the result string to fit itself plus the result
-            // of the transformation including the terminatin NUL
+                // of the transformation including the terminating NUL
             // appended by strxfrm()
             res.resize (res_size + dst_size + 1);
         }
@@ -557,31 +558,32 @@ __rw_strnxfrm (const char *src, size_t n
             _RETHROW;
         }
 
-        // transfor the source string up to the terminating NUL
-        size_t xfrm_size =
             strxfrm (&res [0] + res_size, psrc, dst_size + 1);
+            res.resize (res.size () - !last);
+        }
+        else {
+
+            // count and append the consecutive NULs embedded in the 
+            // input string
 
-#if defined _MSC_VER && _MSC_VER < 1400
-        // compute the correct value that should have been returned from
-        // strxfrm() after the transformation has completed (MSVC strxfrm()
-        // returns a bogus result; see PR #29935)
-        xfrm_size = strlen (&res [0] + res_size);
-#endif   // MSVC < 8.0
-
-        // increment the size of the result string by the number
-        // of transformed characters excluding the terminating NUL
-        // if strxfrm() transforms the empty string into the empty
-        // string, keep the terminating NUL, otherwise drop it
-        res_size += xfrm_size + (last && !*psrc && !xfrm_size);
+            size_t i = 0;
+            for (; i < nchars && 0 == src [i]; ++i) ;
 
         _TRY {
-            res.resize (res_size);
+                // resize the result string to fit itself plus the result
+                // of the transformation including the terminating NUL
+                // appended by strxfrm()
+                res.resize (res.size () + i);
         }
         _CATCH (...) {
             if (pbuf != buf)
                 delete[] pbuf;
             _RETHROW;
         }
+
+            nchars -= i;
+            src += i;
+        }
     }
 
     if (pbuf != buf)
@@ -702,6 +704,8 @@ __rw_wcsnxfrm (const wchar_t *src, size_
 
     while (nchars) {
 
+        if (src [0]) {
+
         typedef _STD::char_traits<wchar_t> Traits;
 
         const wchar_t* const last = Traits::find (src, nchars, L'\0');
@@ -771,31 +775,33 @@ __rw_wcsnxfrm (const wchar_t *src, size_
             _RETHROW;
         }
 
-        // transfor the source string up to the terminating NUL
-        size_t xfrm_size =
+            // transform the source string up to the terminating NUL
             _RWSTD_WCSXFRM (&res [0] + res_size, psrc, dst_size + 1);
+            res.resize (res.size () - !last);
+        }
+        else {
+
+            // count and append the consecutive NULs embedded in the 
+            // input string
 
-#  if defined _MSC_VER && _MSC_VER < 1400
-        // compute the correct value that should have been returned from
-        // strxfrm() after the transformation has completed (MSVC strxfrm()
-        // returns a bogus result; see PR #29935)
-        xfrm_size = Traits::length (&res [0] + res_size);
-#  endif   // MSVC < 8.0
-
-        // increment the size of the result string by the number
-        // of transformed characters excluding the terminating NUL
-        // if strxfrm() transforms the empty string into the empty
-        // string, keep the terminating NUL, otherwise drop it
-        res_size += xfrm_size + (last && !*psrc && !xfrm_size);
+            size_t i = 0;
+            for (; i < nchars && 0 == src [i]; ++i) ;
 
         _TRY {
-            res.resize (res_size);
+                // resize the result string to fit itself plus the result
+                // of the transformation including the terminating NUL
+                // appended by strxfrm()
+                res.resize (res.size () + i);
         }
         _CATCH (...) {
             if (pbuf != buf)
                 delete[] pbuf;
             _RETHROW;
         }
+
+            nchars -= i;
+            src += i;
+        }
     }
 
     if (pbuf != buf)

Reply via email to