On 10/18/12 11:02, Martin Sebor wrote:
On 10/16/2012 10:38 AM, Liviu Nicoara wrote:
I have enhanced the collation test, 22.locale.collate.cpp with a bunch
of cases that deal with embedded strings, inside the input strings as
well as at the edges. More defects became apparent, and they have been
fixed.

I have re-worked the src/collate.cpp patch and I am attaching it here.
All collation tests (old and new) pass. If there are no objections, I
will check it in later in the week.

There are tabs in the patch (we need spaces). They also make
the patch harder to review than it otherwise would be. I would
also suggest keeping the MSVC < 8.0 block and cleaning things
up separately. That will also make the patch easier to review.
(I'm so swamped that this matters to me.)

I looked at both (latest) patches and they do not have tabs in them. They were 
sent in my last post on the 16th, the one you replied to. The diff without -w 
is quite a bit verbose because I moved a whole block in both collate.cpp 
functions, inside an if, so there are whitespace differences. svn diff -w gave 
me a quite decent view of the changes without having to looks at the actual 
patched source file.

I am re-attaching the patches, this time created without -w so they would be a 
bit verbose. No tabs this time either.


I haven't looked at the test patch yet. If you could clean up
the tab issue and reduce the amount of unnecessary whitespace
changes I promise to review it this weekend.

The test enhancements are pretty plain, just a bunch of added stuff.
Thanks,
Liviu

Index: tests/localization/22.locale.collate.cpp
===================================================================
--- tests/localization/22.locale.collate.cpp    (revision 1399886)
+++ tests/localization/22.locale.collate.cpp    (working copy)
@@ -1029,7 +1029,105 @@ check_hash_eff (const char* charTname)
 
 template <class charT>
 void
-check_NUL_locale (const char* charTname, const char* locname)
+check_NUL_transform (const char* charTname, const char* locname,
+                     const charT* src, size_t src_len,
+                     const char*  pat, size_t pat_len)
+{
+    std::locale loc (locname);
+
+    typedef typename std::collate<charT> Collate;
+    typedef typename Collate::string_type String;
+
+    const Collate &col = std::use_facet<Collate> (loc);
+
+    String s = col.transform (src, src + src_len);    
+
+    for (size_t i = 0, j = 0; i < pat_len; ++i) { 
+
+        if (j >= s.size ()) {
+            rw_assert (false, __FILE__, __LINE__,
+                       "collate<%s>::transform (%{*.*Ac}) ->  %{*.*Ac}"
+                       ", incomplete transformation (locale \"%s\")", 
+                       charTname, sizeof (charT), src_len, src, 
+                       sizeof (charT), s.size (), s.c_str (), locname);
+            break;
+        }
+
+        switch (pat [i]) {
+        case 0:
+            rw_assert (j < s.size () && 0 == s [j], __FILE__, __LINE__,
+                       "collate<%s>::transform (\"%{*.*Ac}\") ->  \"%{*.*Ac}\""
+                       ", expected \\0, got %d in position %lu (locale 
\"%s\")", 
+                       charTname, sizeof (charT), src_len, src, sizeof 
(charT), 
+                       s.size (), s.c_str (), s [j], j, locname);
+            ++j;
+            break;
+
+        case '*':
+            rw_assert (j < s.size () && s [j], __FILE__, __LINE__,
+                       "collate<%s>::transform (\"%{*.*Ac}\") ->  \"%{*.*Ac}\""
+                       ", got NUL in position %lu (locale \"%s\")", charTname,
+                       sizeof (charT), src_len, src, sizeof (charT), s.size 
(), 
+                       s.c_str (), j, locname);
+            for (; j < s.size () && s [j]; ++j) ;
+            break;
+
+        default:
+            break;
+        }
+    }
+}
+
+static void
+check_NUL_transform_narrow (const char* charTname, const char* locname)
+{
+#define T(src, pat)                                         \
+    check_NUL_transform (charTname, locname,                \
+                         src, sizeof src / sizeof *src - 1, \
+                         pat, sizeof pat / sizeof *pat - 1)
+
+    T ("\0",        "\0"       );
+    T ("\0\0",      "\0\0"     );
+    T ("\0\0\0",    "\0\0\0"   );
+    T ("a\0",       "*\0"      );
+    T ("\0a\0",     "\0*\0"    );
+    T ("\0a\0",     "\0*\0"    );
+    T ("a\0\0",     "*\0\0"    );
+    T ("a\0\0\0",   "*\0\0\0"  );
+    T ("a\0b",      "*\0*"     );
+    T ("a\0b\0",    "*\0*\0"   );
+    T ("a\0b\0\0",  "*\0*\0\0" );
+}
+
+static void
+check_NUL_transform_wide (const char* charTname, const char* locname)
+{
+    T (L"\0",       "\0"       );
+    T (L"\0\0",     "\0\0"     );
+    T (L"\0\0\0",   "\0\0\0"   );
+    T (L"a\0",      "*\0"      );
+    T (L"\0a\0",    "\0*\0"    );
+    T (L"\0a\0",    "\0*\0"    );
+    T (L"a\0\0",    "*\0\0"    );
+    T (L"a\0\0\0",  "*\0\0\0"  );
+    T (L"a\0b",     "*\0*"     );
+    T (L"a\0b\0",   "*\0*\0"   );
+    T (L"a\0b\0\0", "*\0*\0\0" );
+
+#undef T
+}
+
+template <class charT>
+void
+check_NUL_transform (const char* charTname, const char* locname)
+{
+    check_NUL_transform_narrow (charTname, locname);
+    check_NUL_transform_wide   (charTname, locname);
+}
+
+template <class charT>
+void
+check_NUL_collate (const char* charTname, const char* locname)
 {
     std::locale loc (locname);
 
@@ -1090,6 +1188,14 @@ check_NUL_locale (const char* charTname,
 
 template <class charT>
 void
+check_NUL (const char* charTname, const char* locname)
+{
+    check_NUL_transform< charT > (charTname, locname);
+    check_NUL_collate< charT >   (charTname, locname);
+}
+
+template <class charT>
+void
 check_NUL (const char* charTname)
 {
     // Verify that the collate facet correctly handles character
@@ -1103,7 +1209,7 @@ check_NUL (const char* charTname)
     for (const char* locname = rw_locales (LC_COLLATE); 
          *locname; locname += std::strlen (locname) + 1, ++i) {
         try {
-            check_NUL_locale<charT> (charTname, locname);
+            check_NUL<charT> (charTname, locname);
         }
         catch (...) {
         }
@@ -1135,7 +1241,6 @@ run_test (int /*argc*/, char* /*argv*/ [
     return 0;
 }
 
-
 int
 main (int argc, char* argv [])
 {
Index: src/collate.cpp
===================================================================
--- src/collate.cpp     (revision 1399886)
+++ src/collate.cpp     (working copy)
@@ -488,99 +488,100 @@ __rw_strnxfrm (const char *src, size_t n
 
     while (nchars) {
 
-        // using a C-style cast instead of static_cast to avoid
-        // a gcc 2.95.2 bug causing an error on some platforms:
-        //   static_cast from `void *' to `const char *'
-        const char* const last = (const char*)memchr (src, '\0', nchars);
-
-        if (0 == last) {
-
-            // no NUL found in the initial portion of the source string
-            // that fits into the local temporary buffer; copy as many
-            // characters as fit into the buffer
+        if (src [0]) {
 
-            if (bufsize <= nchars) {
-                if (pbuf != buf)
-                    delete[] pbuf;
-                pbuf = new char [nchars + 1];
+            // using a C-style cast instead of static_cast to avoid
+            // a gcc 2.95.2 bug causing an error on some platforms:
+            //   static_cast from `void *' to `const char *'
+            const char* const last = (const char*)memchr (src, '\0', nchars);
+
+            if (0 == last) {
+
+                // no NUL found in the initial portion of the source string
+                // that fits into the local temporary buffer; copy as many
+                // characters as fit into the buffer
+
+                if (bufsize <= nchars) {
+                    if (pbuf != buf)
+                        delete[] pbuf;
+                    pbuf = new char [nchars + 1];
+                }
+
+                psrc = pbuf;
+                memcpy (psrc, src, nchars);
+
+                // append a terminating NUL and decrement the number
+                // of characters that remain to be processed
+                psrc [nchars] = '\0';
+                src          += nchars;
+                nchars        = 0;
+            }
+            else {
+                // terminating NUL found in the source buffer
+                nchars -= (last - src) + 1;
+                psrc    = _RWSTD_CONST_CAST (char*, src);
+                src    += (last - src) + 1;
             }
-
-            psrc = pbuf;
-            memcpy (psrc, src, nchars);
-
-            // append a terminating NUL and decrement the number
-            // of characters that remain to be processed
-            psrc [nchars] = '\0';
-            src          += nchars;
-            nchars        = 0;
-        }
-        else {
-
-            // terminating NUL found in the source buffer
-            nchars -= (last - src) + 1;
-            psrc    = _RWSTD_CONST_CAST (char*, src);
-            src    += (last - src) + 1;
-        }
 
 #ifdef _RWSTD_OS_SUNOS
-        // Solaris 10u5 on AMD64 overwrites memory past the end of
-        // just_in_case_buf[8], to avoid this, pass a null pointer
-        char* const just_in_case_buf = 0;
+            // Solaris 10u5 on AMD64 overwrites memory past the end of
+            // just_in_case_buf[8], to avoid this, pass a null pointer
+            char* const just_in_case_buf = 0;
 #else
-        // provide a destination buffer to strxfrm() in case
-        // it's buggy (such as MSVC's) and tries to write to
-        // the buffer even if it's 0
-        char just_in_case_buf [8];
-#endif
+            // provide a destination buffer to strxfrm() in case
+            // it's buggy (such as MSVC's) and tries to write to
+            // the buffer even if it's 0
+            char just_in_case_buf [8];
+#endif // _RWSTD_OS_SUNOS
 
-        const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0);
+            const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0);
 
-        // check for strxfrm() errors
-        if (0 == (dst_size << 1)) {
-            if (pbuf != buf)
-                delete[] pbuf;
+            // check for strxfrm() errors
+            if (0 == (dst_size << 1)) {
+                if (pbuf != buf)
+                    delete[] pbuf;
 
-            return _STD::string ();
-        }
+                return _STD::string ();
+            }
 
-        size_t res_size = res.size ();
+            size_t res_size = res.size ();
 
-        _TRY {
-            // resize the result string to fit itself plus the result
-            // of the transformation including the terminatin NUL
-            // appended by strxfrm()
-            res.resize (res_size + dst_size + 1);
-        }
-        _CATCH (...) {
-            if (pbuf != buf)
-                delete[] pbuf;
-            _RETHROW;
-        }
+            _TRY {
+                // resize the result string to fit itself plus the result
+                // of the transformation including the terminating NUL
+                // appended by strxfrm()
+                res.resize (res_size + dst_size + 1);
+            }
+            _CATCH (...) {
+                if (pbuf != buf)
+                    delete[] pbuf;
+                _RETHROW;
+            }
 
-        // transfor the source string up to the terminating NUL
-        size_t xfrm_size =
             strxfrm (&res [0] + res_size, psrc, dst_size + 1);
+            res.resize (res.size () - !last);
+        }
+        else {
 
-#if defined _MSC_VER && _MSC_VER < 1400
-        // compute the correct value that should have been returned from
-        // strxfrm() after the transformation has completed (MSVC strxfrm()
-        // returns a bogus result; see PR #29935)
-        xfrm_size = strlen (&res [0] + res_size);
-#endif   // MSVC < 8.0
-
-        // increment the size of the result string by the number
-        // of transformed characters excluding the terminating NUL
-        // if strxfrm() transforms the empty string into the empty
-        // string, keep the terminating NUL, otherwise drop it
-        res_size += xfrm_size + (last && !*psrc && !xfrm_size);
+            // count and append the consecutive NULs embedded in the 
+            // input string
 
-        _TRY {
-            res.resize (res_size);
-        }
-        _CATCH (...) {
-            if (pbuf != buf)
-                delete[] pbuf;
-            _RETHROW;
+            size_t i = 0;
+            for (; i < nchars && 0 == src [i]; ++i) ;
+
+            _TRY {
+                // resize the result string to fit itself plus the 
+                // embedded NULs
+                res.resize (res.size () + i);
+            }
+            _CATCH (...) {
+                if (pbuf != buf)
+                    delete[] pbuf;
+                _RETHROW;
+            }
+
+            nchars -= i;
+            src += i;
         }
     }
 
@@ -702,99 +703,102 @@ __rw_wcsnxfrm (const wchar_t *src, size_
 
     while (nchars) {
 
-        typedef _STD::char_traits<wchar_t> Traits;
+        if (src [0]) {
 
-        const wchar_t* const last = Traits::find (src, nchars, L'\0');
+            typedef _STD::char_traits<wchar_t> Traits;
 
-        if (0 == last) {
+            const wchar_t* const last = Traits::find (src, nchars, L'\0');
 
-            // no NUL found in the initial portion of the source string
-            // that fits into the local temporary buffer; copy as many
-            // characters as fit into the buffer
+            if (0 == last) {
 
-            if (bufsize <= nchars) {
-                if (pbuf != buf)
-                    delete[] pbuf;
-                pbuf = new wchar_t [nchars + 1];
-            }
+                // no NUL found in the initial portion of the source string
+                // that fits into the local temporary buffer; copy as many
+                // characters as fit into the buffer
 
-            psrc = pbuf;
-            memcpy (psrc, src, nchars * sizeof *psrc);
+                if (bufsize <= nchars) {
+                    if (pbuf != buf)
+                        delete[] pbuf;
+                    pbuf = new wchar_t [nchars + 1];
+                }
 
-            // append a terminating NUL and decrement the number
-            // of characters that remain to be processed
-            psrc [nchars] = 0;
-            src          += nchars;
-            nchars        = 0;
-        }
-        else {
+                psrc = pbuf;
+                memcpy (psrc, src, nchars * sizeof *psrc);
 
-            // terminating NUL found in the source buffer
-            nchars -= (last - src) + 1;
-            psrc    = _RWSTD_CONST_CAST (wchar_t*, src);
-            src    += (last - src) + 1;
-        }
+                // append a terminating NUL and decrement the number
+                // of characters that remain to be processed
+                psrc [nchars] = 0;
+                src          += nchars;
+                nchars        = 0;
+            }
+            else {
+
+                // terminating NUL found in the source buffer
+                nchars -= (last - src) + 1;
+                psrc    = _RWSTD_CONST_CAST (wchar_t*, src);
+                src    += (last - src) + 1;
+            }
 
 #ifdef _RWSTD_OS_SUNOS
-        // just in case Solaris wcsxfrm() has the same bug
-        // as its strxfrm() (see above)
-        wchar_t* const just_in_case_buf = 0;
+            // just in case Solaris wcsxfrm() has the same bug
+            // as its strxfrm() (see above)
+            wchar_t* const just_in_case_buf = 0;
 #else
-        // provide a destination buffer to strxfrm() in case
-        // it's buggy (such as MSVC's) and tries to write to
-        // the buffer even if it's 0
-        wchar_t just_in_case_buf [8];
+            // provide a destination buffer to strxfrm() in case
+            // it's buggy (such as MSVC's) and tries to write to
+            // the buffer even if it's 0
+            wchar_t just_in_case_buf [8];
 #endif
 
-        const size_t dst_size =
-            _RWSTD_WCSXFRM (just_in_case_buf, psrc, 0);
+            const size_t dst_size =
+                _RWSTD_WCSXFRM (just_in_case_buf, psrc, 0);
 
-        // check for wcsxfrm() errors
-        if (_RWSTD_SIZE_MAX == dst_size) {
-            if (pbuf != buf)
-                delete[] pbuf;
+            // check for wcsxfrm() errors
+            if (_RWSTD_SIZE_MAX == dst_size) {
+                if (pbuf != buf)
+                    delete[] pbuf;
 
-            return _STD::wstring ();
-        }
+                return _STD::wstring ();
+            }
 
-        size_t res_size = res.size ();
+            size_t res_size = res.size ();
 
-        _TRY {
-            // resize the result string to fit itself plus the result
-            // of the transformation including the terminatin NUL
-            // appended by strxfrm()
-            res.resize (res_size + dst_size + 1);
-        }
-        _CATCH (...) {
-            if (pbuf != buf)
-                delete[] pbuf;
-            _RETHROW;
-        }
+            _TRY {
+                // resize the result string to fit itself plus the result
+                // of the transformation including the terminatin NUL
+                // appended by strxfrm()
+                res.resize (res_size + dst_size + 1);
+            }
+            _CATCH (...) {
+                if (pbuf != buf)
+                    delete[] pbuf;
+                _RETHROW;
+            }
 
-        // transfor the source string up to the terminating NUL
-        size_t xfrm_size =
+            // transform the source string up to the terminating NUL
             _RWSTD_WCSXFRM (&res [0] + res_size, psrc, dst_size + 1);
+            res.resize (res.size () - !last);
+        }
+        else {
 
-#  if defined _MSC_VER && _MSC_VER < 1400
-        // compute the correct value that should have been returned from
-        // strxfrm() after the transformation has completed (MSVC strxfrm()
-        // returns a bogus result; see PR #29935)
-        xfrm_size = Traits::length (&res [0] + res_size);
-#  endif   // MSVC < 8.0
-
-        // increment the size of the result string by the number
-        // of transformed characters excluding the terminating NUL
-        // if strxfrm() transforms the empty string into the empty
-        // string, keep the terminating NUL, otherwise drop it
-        res_size += xfrm_size + (last && !*psrc && !xfrm_size);
+            // count and append the consecutive NULs embedded in the 
+            // input string
 
-        _TRY {
-            res.resize (res_size);
-        }
-        _CATCH (...) {
-            if (pbuf != buf)
-                delete[] pbuf;
-            _RETHROW;
+            size_t i = 0;
+            for (; i < nchars && 0 == src [i]; ++i) ;
+
+            _TRY {
+                // resize the result string to fit itself plus the 
+                // embedded NULs
+                res.resize (res.size () + i);
+            }
+            _CATCH (...) {
+                if (pbuf != buf)
+                    delete[] pbuf;
+                _RETHROW;
+            }
+
+            nchars -= i;
+            src += i;
         }
     }
 

Reply via email to