Author: kotkov
Date: Thu Aug 21 15:47:45 2025
New Revision: 1927953

Log:
On Windows, use the WC_NO_BEST_FIT_CHARS [1] flag when converting individual
command-line arguments to the ANSI code page.

If a best-fit mapping occurs, return an error instead of silently accepting
the results of the mapping.  This avoids unintended input changes (e.g., a
fullwidth hyphen-minus being converted to a regular hyphen).

This is an interim fix. A long-term solution is to convert arguments to UTF-8
and work with UTF-8 directly, but until then failing fast should be better
than changing the user input.

* subversion/libsvn_subr/cmdline.c
  (svn_cmdline__win32_get_cstring_argv): Specify WC_NO_BEST_FIT_CHARS during
   conversion and fail if best-fit mapping occurs.

* subversion/tests/cmdline/basic_tests.py
  (argv_with_best_fit_chars): Update test expectations, and expect an error.
   Check only the error prefix, since the exact message and its OS portion
   may vary due to localization.

[1]: 
https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte

Modified:
   subversion/trunk/subversion/libsvn_subr/cmdline.c
   subversion/trunk/subversion/tests/cmdline/basic_tests.py

Modified: subversion/trunk/subversion/libsvn_subr/cmdline.c
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/cmdline.c   Thu Aug 21 14:46:09 
2025        (r1927952)
+++ subversion/trunk/subversion/libsvn_subr/cmdline.c   Thu Aug 21 15:47:45 
2025        (r1927953)
@@ -1409,23 +1409,36 @@ svn_cmdline__win32_get_cstring_argv(cons
       const wchar_t *arg = argv[i];
       char *cstring_arg;
       int rv;
+      BOOL used_default_char;
 
       /* Passing -1 for the string length guarantees that the returned length
          will account for a terminating null character. */
-      rv = WideCharToMultiByte(CP_ACP, 0, arg, -1, NULL, 0, NULL, NULL);
+      rv = WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, arg, -1,
+                               NULL, 0, NULL, &used_default_char);
       if (rv <= 0)
         {
           return svn_error_wrap_apr(apr_get_os_error(),
                                     _("Conversion from UTF-16 failed"));
         }
+      else if (used_default_char)
+        {
+          return 
svn_error_wrap_apr(APR_FROM_OS_ERROR(ERROR_NO_UNICODE_TRANSLATION),
+                                    _("Conversion from UTF-16 failed"));
+        }
 
       cstring_arg = apr_palloc(result_pool, rv);
-      rv = WideCharToMultiByte(CP_ACP, 0, arg, -1, cstring_arg, rv, NULL, 
NULL);
+      rv = WideCharToMultiByte(CP_ACP, WC_NO_BEST_FIT_CHARS, arg, -1,
+                               cstring_arg, rv, NULL, &used_default_char);
       if (rv <= 0)
         {
           return svn_error_wrap_apr(apr_get_os_error(),
                                     _("Conversion from UTF-16 failed"));
         }
+      else if (used_default_char)
+        {
+          return 
svn_error_wrap_apr(APR_FROM_OS_ERROR(ERROR_NO_UNICODE_TRANSLATION),
+                                    _("Conversion from UTF-16 failed"));
+        }
 
       APR_ARRAY_PUSH(cstring_argv, const char *) = cstring_arg;
     }

Modified: subversion/trunk/subversion/tests/cmdline/basic_tests.py
==============================================================================
--- subversion/trunk/subversion/tests/cmdline/basic_tests.py    Thu Aug 21 
14:46:09 2025        (r1927952)
+++ subversion/trunk/subversion/tests/cmdline/basic_tests.py    Thu Aug 21 
15:47:45 2025        (r1927953)
@@ -3356,19 +3356,20 @@ def argv_with_best_fit_chars(sbox):
       yield chr(c), mbcs
 
   count = 0
-  expected_stderr = svntest.verify.RegexListOutput(
-    [r'^"foo.+bar": unknown command\.\n$', '\n'], match_all=True)
+  # E721113: Conversion from UTF-16 failed: No mapping for the Unicode
+  # character exists in the target multi-byte code page.
+  expected_stderr = 'svn: E721113: '
   for wc, mbcs in iter_bestfit_chars():
     count += 1
     logger.info('Code page %r - U+%04x -> 0x%s', codepage, ord(wc), mbcs.hex())
     if mbcs == b'"':
-      svntest.actions.run_and_verify_svn2(None, expected_stderr, 0, 'help',
+      svntest.actions.run_and_verify_svn2(None, expected_stderr, 1, 'help',
                                           'foo{0} {0}bar'.format(wc))
     elif mbcs == b'\\':
-      svntest.actions.run_and_verify_svn2(None, expected_stderr, 0, 'help',
+      svntest.actions.run_and_verify_svn2(None, expected_stderr, 1, 'help',
                                           'foo{0}" {0}"bar'.format(wc))
     elif mbcs == b' ':
-      svntest.actions.run_and_verify_svn2(None, expected_stderr, 0, 'help',
+      svntest.actions.run_and_verify_svn2(None, expected_stderr, 1, 'help',
                                           'foo{0}bar'.format(wc))
   if count == 0:
     raise svntest.Skip('No best fit characters in code page %r' % codepage)

Reply via email to