On Sat, Jun 7, 2025 at 7:22 AM Branko Čibej <[email protected]> wrote:
> On 7. 6. 25 03:11, Jun Omae wrote:
>
> On 2025/06/07 1:22, Timofei Zhakov wrote:
>
> Yay! It makes sense. Thank you so much for pointing me out.
>
> Is it worthed to change the locale to utf8 for all tests?
>
> However, if not, I think I could temporarily modify it, and set it back to
> C after the test body. The patch:
>
> [[[
> Index: subversion/tests/cmdline/basic_tests.py
> ===================================================================
> --- subversion/tests/cmdline/basic_tests.py (revision 1926036)
> +++ subversion/tests/cmdline/basic_tests.py (working copy)
> @@ -3349,37 +3349,42 @@
> def unicode_arguments_test(sbox: svntest.sandbox.Sandbox):
> """test unicode arguments"""
>
> - UNICODE_TEST_STRING = '\U0001f449\U0001f448'
> - sbox.build(read_only=False, empty=True)
> + oldlocale = os.environ["LC_ALL"]
> + os.environ["LC_ALL"] = "C.utf8"
>
> - unicode_item = sbox.ospath(UNICODE_TEST_STRING)
> - test_item = sbox.ospath("test")
> + try:
> + UNICODE_TEST_STRING = '\U0001f449\U0001f448'
> + sbox.build(read_only=False, empty=True)
>
> - svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir", unicode_item)
> - svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir", test_item)
> - svntest.actions.run_and_verify_svn2(None, [], 0, "propset",
> - "name", UNICODE_TEST_STRING,
> unicode_item)
> - svntest.actions.run_and_verify_svn2(None, [], 0, "ci", sbox.wc_dir,
> - "-m", UNICODE_TEST_STRING,
> - "--with-revprop",
> - "revprop=" + UNICODE_TEST_STRING)
> + unicode_item = sbox.ospath(UNICODE_TEST_STRING)
> + test_item = sbox.ospath("test")
>
> - expected_disk = wc.State("", {
> - UNICODE_TEST_STRING: Item(props={ "name": UNICODE_TEST_STRING }),
> - "test" : Item(),
> - })
> + svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir",
> unicode_item)
> + svntest.actions.run_and_verify_svn2(None, [], 0, "mkdir", test_item)
> + svntest.actions.run_and_verify_svn2(None, [], 0, "propset",
> + "name", UNICODE_TEST_STRING,
> unicode_item)
> + svntest.actions.run_and_verify_svn2(None, [], 0, "ci", sbox.wc_dir,
> + "-m", UNICODE_TEST_STRING,
> + "--with-revprop",
> + "revprop=" + UNICODE_TEST_STRING)
>
> - svntest.actions.verify_disk(sbox.wc_dir, expected_disk,
> check_props=True)
> - os.chdir(sbox.wc_dir)
> - svntest.actions.run_and_verify_log_xml(
> - expected_revprops=[{
> - "svn:author": "jrandom",
> - "svn:date": "",
> - "svn:log": UNICODE_TEST_STRING,
> - "revprop": UNICODE_TEST_STRING
> - }],
> - args=["-r1", "--with-all-revprops"])
> + expected_disk = wc.State("", {
> + UNICODE_TEST_STRING: Item(props={ "name": UNICODE_TEST_STRING }),
> + "test" : Item(),
> + })
>
> + svntest.actions.verify_disk(sbox.wc_dir, expected_disk,
> check_props=True)
> + os.chdir(sbox.wc_dir)
> + svntest.actions.run_and_verify_log_xml(
> + expected_revprops=[{
> + "svn:author": "jrandom",
> + "svn:date": "",
> + "svn:log": UNICODE_TEST_STRING,
> + "revprop": UNICODE_TEST_STRING
> + }],
> + args=["-r1", "--with-all-revprops"])
> + finally:
> + os.environ["LC_ALL"] = oldlocale
>
> ########################################################################
> # Run the tests
> ]]]
>
> --
> Timofei Zhakov
>
> Sounds good. However, I think we could add a decorator to set UTF-8 locale
> and restore original locale like the following patch.
>
> Also, C.UTF-8 (or en_US.UTF-8) locale is not available in all POSIX
> environments. We should check the locale available before the using. The
> unit test will be skipped if the locale unavailable in the proposed patch.
>
> [[[
> diff --git a/subversion/tests/cmdline/basic_tests.py
> b/subversion/tests/cmdline/basic_tests.py
> index 213c07d82..419f24ca3 100755
> --- a/subversion/tests/cmdline/basic_tests.py
> +++ b/subversion/tests/cmdline/basic_tests.py
> @@ -40,6 +40,7 @@ XFail = svntest.testcase.XFail_deco
> Issues = svntest.testcase.Issues_deco
> Issue = svntest.testcase.Issue_deco
> Wimp = svntest.testcase.Wimp_deco
> +Utf8LocaleTest = svntest.testcase.Utf8LocaleTest_deco
> Item = wc.StateItem
>
> # Generic UUID-matching regular expression
> @@ -3346,6 +3347,7 @@ def argv_with_best_fit_chars(sbox):
> if count == 0:
> raise svntest.Skip('No best fit characters in code page %r' % codepage)
>
> +@Utf8LocaleTest
> def unicode_arguments_test(sbox: svntest.sandbox.Sandbox):
> """test unicode arguments"""
>
> diff --git a/subversion/tests/cmdline/svntest/testcase.py
> b/subversion/tests/cmdline/svntest/testcase.py
> index 85deec4c6..aff866944 100644
> --- a/subversion/tests/cmdline/svntest/testcase.py
> +++ b/subversion/tests/cmdline/svntest/testcase.py
> @@ -349,3 +349,48 @@ def SkipDumpLoadCrossCheck_deco(cond_func = lambda:
> True):
>
> # Create a singular alias, for linguistic correctness
> Issue_deco = Issues_deco
> +
> +
> +def _detect_utf8_locale():
> + import locale
> +
> + orig = None
> + try:
> + for name in ('C.UTF-8', 'en_US.UTF-8'):
> + try:
> + orig = locale.setlocale(locale.LC_ALL, name)
> + except locale.Error:
> + continue
> + else:
> + return name
> + finally:
> + if orig is not None:
> + locale.setlocale(locale.LC_ALL, orig)
> +
> + return None # utf-8 locale unavailable
> +
> +
> +_utf8_locale = _detect_utf8_locale() if os.name != 'nt' else False
> +
> +
> +def Utf8LocaleTest_deco(f):
> +
> + import functools
> +
> + @functools.wraps(f)
> + def wrapper(sbox: svntest.sandbox.Sandbox):
> + if _utf8_locale is None:
> + raise svntest.Skip
> + if _utf8_locale is not False:
> + orig = os.environ.get('LC_ALL')
> + os.environ['LC_ALL'] = _utf8_locale
> + try:
> + return f(sbox)
> + finally:
> + if _utf8_locale is not False:
> + if orig is None:
> + del os.environ['LC_ALL']
> + else:
> + os.environ['LC_ALL'] = orig
> +
> + return wrapper
> ]]]
>
> --
> Jun Omae <[email protected]> <[email protected]> (大前 潤)
>
>
> +1 for this solution with the decorator, I'd only prefer to call it
> @RequireUtf8 – but that's a minor point.
>
>
+1 from me as well.
> Setting Python's global default encoding is a non-starter; not only
> because Subversion runs on systems that don't support UTF-8, but also
> because it would make the tests stop exercising our encoding conversion
> functions.
>
Yeah. I agree.
--
Timofei Zhakov