Author: futatuki Date: Sat Feb 22 17:39:12 2020 New Revision: 1874393 URL: http://svn.apache.org/viewvc?rev=1874393&view=rev Log: Fix handling for non-ascii characters on internal path and stderr
* tools/hook-scripts/validate-files.py (docstring): Add note to handle non-ascii path and/or stderr output. (Commands.svnlook_changed): + Decode changed paths as 'utf-8' explicitly on Python 3 and leave them as ('utf-8' encoded) bytes in Python 2.7 + Decode stderr out message from svnlook as sys.stderr.encoding (Commands.user_command): Decode stderr out message from user commands as sys.stderr.encoding Modified: subversion/trunk/tools/hook-scripts/validate-files.py Modified: subversion/trunk/tools/hook-scripts/validate-files.py URL: http://svn.apache.org/viewvc/subversion/trunk/tools/hook-scripts/validate-files.py?rev=1874393&r1=1874392&r2=1874393&view=diff ============================================================================== --- subversion/trunk/tools/hook-scripts/validate-files.py (original) +++ subversion/trunk/tools/hook-scripts/validate-files.py Sat Feb 22 17:39:12 2020 @@ -19,7 +19,13 @@ """Subversion pre-commit hook script that runs user configured commands to validate files in the commit and reject the commit if the commands exit with a non-zero exit code. The script expects a validate-files.conf -file placed in the conf dir under the repo the commit is for.""" +file placed in the conf dir under the repo the commit is for. + +Note: As changed file paths $FILE are always represented as a Unicode (Py3) + or UTF-8 (Py2) strings, you might need to set apropriate locale and + PYTHONIOENCODING environment variable for this script and + commands to handle non-ascii path and command outputs, especially + you want to use svnlook cat command to inspect file contents.""" import sys import os @@ -82,18 +88,26 @@ class Commands: line = p.stdout.readline() if not line: break - line = line.decode().strip() + line = line.strip() text_mod = line[0:1] # Only if the contents of the file changed (by addition or update) # directories always end in / in the svnlook changed output - if line[-1] != "/" and (text_mod == "A" or text_mod == "U"): - changed.append(line[4:]) + if line[-1:] != b"/" and (text_mod == b"A" or text_mod == b"U"): + changed_path = line[4:] + if not isinstance(changed_path, str): + # svnlook always uses UTF-8 for internal path + changed_path = changed_path.decode('utf-8') + changed.append(changed_path) # wait on the command to finish so we can get the # returncode/stderr output data = p.communicate() if p.returncode != 0: - sys.stderr.write(data[1].decode()) + err_mesg = data[1] + if sys.stderr.encoding: + err_mesg =err_mesg.decode(sys.stderr.encoding, + 'backslashreplace') + sys.stderr.write(err_mesg) sys.exit(2) return changed @@ -111,7 +125,11 @@ class Commands: cmd_env['FILE'] = fn p = subprocess.Popen(cmd, shell=True, env=cmd_env, stderr=subprocess.PIPE) data = p.communicate() - return (p.returncode, data[1].decode()) + err_mesg = data[1] + if sys.stderr.encoding: + err_mesg = err_mesg.decode(sys.stderr.encoding, + 'backslashreplace') + return (p.returncode, err_mesg) def main(repo, txn): exitcode = 0