Closed by commit rHG04a3ae7aba14: chg: force-set LC_CTYPE on server start to actual value from the environment (authored by spectral). This revision was automatically updated to reflect the committed changes. This revision was not accepted when it landed; it landed in state "Needs Review".
REPOSITORY rHG Mercurial CHANGES SINCE LAST UPDATE https://phab.mercurial-scm.org/D8039?vs=19876&id=19933 CHANGES SINCE LAST ACTION https://phab.mercurial-scm.org/D8039/new/ REVISION DETAIL https://phab.mercurial-scm.org/D8039 AFFECTED FILES contrib/chg/chg.c mercurial/chgserver.py tests/test-chg.t CHANGE DETAILS diff --git a/tests/test-chg.t b/tests/test-chg.t --- a/tests/test-chg.t +++ b/tests/test-chg.t @@ -332,8 +332,8 @@ YYYY/MM/DD HH:MM:SS (PID)> log -R cached YYYY/MM/DD HH:MM:SS (PID)> loaded repo into cache: $TESTTMP/cached (in ...s) -Test that chg works even when python "coerces" the locale (py3.7+, which is done -by default if none of LC_ALL, LC_CTYPE, or LANG are set in the environment) +Test that chg works (sets to the user's actual LC_CTYPE) even when python +"coerces" the locale (py3.7+) $ cat > $TESTTMP/debugenv.py <<EOF > from mercurial import encoding @@ -347,9 +347,22 @@ > if v is not None: > ui.write(b'%s=%s\n' % (k, encoding.environ[k])) > EOF +(hg keeps python's modified LC_CTYPE, chg doesn't) + $ (unset LC_ALL; unset LANG; LC_CTYPE= "$CHGHG" \ + > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv) + LC_CTYPE=C.UTF-8 (py37 !) + LC_CTYPE= (no-py37 !) + $ (unset LC_ALL; unset LANG; LC_CTYPE= chg \ + > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv) + LC_CTYPE= + $ (unset LC_ALL; unset LANG; LC_CTYPE=unsupported_value chg \ + > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv) + LC_CTYPE=unsupported_value + $ (unset LC_ALL; unset LANG; LC_CTYPE= chg \ + > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv) + LC_CTYPE= $ LANG= LC_ALL= LC_CTYPE= chg \ > --config extensions.debugenv=$TESTTMP/debugenv.py debugenv LC_ALL= - LC_CTYPE=C.UTF-8 (py37 !) - LC_CTYPE= (no-py37 !) + LC_CTYPE= LANG= diff --git a/mercurial/chgserver.py b/mercurial/chgserver.py --- a/mercurial/chgserver.py +++ b/mercurial/chgserver.py @@ -550,40 +550,6 @@ raise ValueError(b'unexpected value in setenv request') self.ui.log(b'chgserver', b'setenv: %r\n', sorted(newenv.keys())) - # Python3 has some logic to "coerce" the C locale to a UTF-8 capable - # one, and it sets LC_CTYPE in the environment to C.UTF-8 if none of - # 'LC_CTYPE', 'LC_ALL' or 'LANG' are set (to any value). This can be - # disabled with PYTHONCOERCECLOCALE=0 in the environment. - # - # When fromui is called via _inithashstate, python has already set - # this, so that's in the environment right when we start up the hg - # process. Then chg will call us and tell us to set the environment to - # the one it has; this might NOT have LC_CTYPE, so we'll need to - # carry-forward the LC_CTYPE that was coerced in these situations. - # - # If this is not handled, we will fail config+env validation and fail - # to start chg. If this is just ignored instead of carried forward, we - # may have different behavior between chg and non-chg. - if pycompat.ispy3: - # Rename for wordwrapping purposes - oldenv = encoding.environ - if not any( - e.get(b'PYTHONCOERCECLOCALE') == b'0' for e in [oldenv, newenv] - ): - keys = [b'LC_CTYPE', b'LC_ALL', b'LANG'] - old_keys = [k for k, v in oldenv.items() if k in keys and v] - new_keys = [k for k, v in newenv.items() if k in keys and v] - # If the user's environment (from chg) doesn't have ANY of the - # keys that python looks for, and the environment (from - # initialization) has ONLY LC_CTYPE and it's set to C.UTF-8, - # carry it forward. - if ( - not new_keys - and old_keys == [b'LC_CTYPE'] - and oldenv[b'LC_CTYPE'] == b'C.UTF-8' - ): - newenv[b'LC_CTYPE'] = oldenv[b'LC_CTYPE'] - encoding.environ.clear() encoding.environ.update(newenv) @@ -730,6 +696,16 @@ # environ cleaner. if b'CHGINTERNALMARK' in encoding.environ: del encoding.environ[b'CHGINTERNALMARK'] + # Python3.7+ "coerces" the LC_CTYPE environment variable to a UTF-8 one if + # it thinks the current value is "C". This breaks the hash computation and + # causes chg to restart loop. + if b'CHGORIG_LC_CTYPE' in encoding.environ: + encoding.environ[b'LC_CTYPE'] = encoding.environ[b'CHGORIG_LC_CTYPE'] + del encoding.environ[b'CHGORIG_LC_CTYPE'] + elif b'CHG_CLEAR_LC_CTYPE' in encoding.environ: + if b'LC_CTYPE' in encoding.environ: + del encoding.environ[b'LC_CTYPE'] + del encoding.environ[b'CHG_CLEAR_LC_CTYPE'] if repo: # one chgserver can serve multiple repos. drop repo information diff --git a/contrib/chg/chg.c b/contrib/chg/chg.c --- a/contrib/chg/chg.c +++ b/contrib/chg/chg.c @@ -226,6 +226,16 @@ } argv[argsize - 1] = NULL; + const char *lc_ctype_env = getenv("LC_CTYPE"); + if (lc_ctype_env == NULL) { + if (putenv("CHG_CLEAR_LC_CTYPE=") != 0) + abortmsgerrno("failed to putenv CHG_CLEAR_LC_CTYPE"); + } else { + if (setenv("CHGORIG_LC_CTYPE", lc_ctype_env, 1) != 0) { + abortmsgerrno("failed to setenv CHGORIG_LC_CTYYPE"); + } + } + if (putenv("CHGINTERNALMARK=") != 0) abortmsgerrno("failed to putenv"); if (execvp(hgcmd, (char **)argv) < 0) To: spectral, #hg-reviewers Cc: quark, yuja, mjpieters, mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel