Nir Soffer has uploaded a new change for review. Change subject: startup: Change system default encoding to utf8 ......................................................................
startup: Change system default encoding to utf8 In Python 2, the system default encoding is 'ascii'. This cause mixing of unicode and non-ascii strings (e.g. utf8 encoded) to fail with UnicodeDecodeError or UnicodeEncodeError. The trigger for this failures is starting using the built-in json library, that return all values as unicode strings, even if the value is ascii. For example: >>> u'ascii' + '\xd7\x90' Traceback (most recent call last): File "<stdin>", line 1, in <module> UnicodeDecodeError: 'ascii' codec can't decode byte 0xd7 in position 0: ordinal not in range(128) Python try to decode the second value implicitly, and fail since this is a utf8 encoded string. To avoid such issue, the entire application must be changed to use only strings or only unicode internally, and never mix these types. In the distant future, when we run on Python 3, this will be true. For now, the only way to prevent these issues systematically is to change the system default encoding to 'utf8'. Changing the default encoding is done with sys.setdefaultencoding(), but this function exists only during startup, and can be called only from sitecustomize module. We keep now sitecustomize.py module, and change PYTHONPATH so it will be loaded on startup. Change-Id: Icc3f072a499ba4034bdbedd09eb60d7a3a32f9c8 Bug-Url: https://bugzilla.redhat.com/1281940 Signed-off-by: Nir Soffer <nsof...@redhat.com> --- M debian/vdsm.install M init/daemonAdapter M tests/Makefile.am A tests/unicode_test.py M vdsm.spec.in A vdsm/sitecustomize.py 6 files changed, 82 insertions(+), 0 deletions(-) git pull ssh://gerrit.ovirt.org:29418/vdsm refs/changes/61/48661/1 diff --git a/debian/vdsm.install b/debian/vdsm.install index 007913b..786469b 100644 --- a/debian/vdsm.install +++ b/debian/vdsm.install @@ -129,6 +129,7 @@ ./usr/share/vdsm/storage/threadLocal.py ./usr/share/vdsm/storage/threadPool.py ./usr/share/vdsm/storage/volume.py +./usr/share/vdsm/sitecustomize.py ./usr/share/vdsm/supervdsmServer ./usr/share/vdsm/v2v.py ./usr/share/vdsm/vdsm diff --git a/init/daemonAdapter b/init/daemonAdapter index a112af0..c67ff57 100755 --- a/init/daemonAdapter +++ b/init/daemonAdapter @@ -69,12 +69,20 @@ os.nice(config.getint('vars', 'vdsm_nice')) env = os.environ.copy() + + # Python path is needed to get sitecustomize.py imported from + # target directory. + pythonpath = os.path.dirname(self._args.target[0]) + if 'PYTHONPATH' in env: + pythonpath += ':' + env['PYTHONPATH'] + env.update({ 'LIBVIRT_LOG_FILTERS': config.get( 'vars', 'libvirt_env_variable_log_filters'), 'LIBVIRT_LOG_OUTPUTS': config.get( 'vars', 'libvirt_env_variable_log_outputs'), 'LC_ALL': 'C', + 'PYTHONPATH': pythonpath, }) cmd = [self._args.target[0]] + self._args.targetOptions diff --git a/tests/Makefile.am b/tests/Makefile.am index eab7dc7..3baaf84 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -112,6 +112,7 @@ testlibTests.py \ toolTests.py \ transportWrapperTests.py \ + unicode_test.py \ utilsTests.py \ vdscliTests.py \ vdsClientTests.py \ diff --git a/tests/unicode_test.py b/tests/unicode_test.py new file mode 100644 index 0000000..e6f6f35 --- /dev/null +++ b/tests/unicode_test.py @@ -0,0 +1,67 @@ +# +# Copyright 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +from testlib import VdsmTestCase +from testlib import permutations, expandPermutations + +ENCODE = [ + # value, encoded (utf8) + (u'\u05d0', '\xd7\x90'), + ('\xd7\x90', '\xd7\x90'), + (u'ascii', 'ascii'), + ('ascii', 'ascii'), +] + +DECODE = [ + # value (utf8), decoded + ('\xd7\x90', u'\u05d0'), + (u'\u05d0', u'\u05d0'), + ('ascii', u'ascii'), + (u'ascii', u'ascii'), +] + + +@expandPermutations +class TestUnicode(VdsmTestCase): + + @permutations(ENCODE) + def test_encode(self, value, encoded): + self.assertEquals(value.encode("utf8"), encoded) + + @permutations(ENCODE) + def test_str(self, value, encoded): + self.assertEqual(str(value), encoded) + + @permutations(DECODE) + def test_decode(self, value, decoded): + self.assertEquals(value.decode("utf8"), decoded) + + @permutations(DECODE) + def test_unicode(self, value, decoded): + self.assertEquals(unicode(value), decoded) + + def test_mix_add(self): + self.assertEquals(u'\u05d0' + '\xd7\x91', u'\u05d0\u05d1') + + def test_mix_format_str(self): + self.assertEquals(u'\u05d0%s' % '\xd7\x91', u'\u05d0\u05d1') + + def test_mix_format_unicode(self): + self.assertEquals('\xd7\x90%s' % u'\u05d1', u'\u05d0\u05d1') diff --git a/vdsm.spec.in b/vdsm.spec.in index 6b2c1d1..eace810 100644 --- a/vdsm.spec.in +++ b/vdsm.spec.in @@ -830,6 +830,7 @@ %{_datadir}/%{vdsm_name}/mkimage.py* %{_datadir}/%{vdsm_name}/numaUtils.py* %{_datadir}/%{vdsm_name}/protocoldetector.py* +%{_datadir}/%{vdsm_name}/sitecustomize.py* %{_datadir}/%{vdsm_name}/supervdsmServer %{_datadir}/%{vdsm_name}/v2v.py* %{_datadir}/%{vdsm_name}/vdsm diff --git a/vdsm/sitecustomize.py b/vdsm/sitecustomize.py new file mode 100644 index 0000000..5810e26 --- /dev/null +++ b/vdsm/sitecustomize.py @@ -0,0 +1,4 @@ +import sys + +# Allow mixing of unicode objecdts and strings encoding in utf8 +sys.setdefaultencoding('utf8') -- To view, visit https://gerrit.ovirt.org/48661 To unsubscribe, visit https://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Icc3f072a499ba4034bdbedd09eb60d7a3a32f9c8 Gerrit-PatchSet: 1 Gerrit-Project: vdsm Gerrit-Branch: master Gerrit-Owner: Nir Soffer <nsof...@redhat.com> _______________________________________________ vdsm-patches mailing list vdsm-patches@lists.fedorahosted.org https://lists.fedorahosted.org/mailman/listinfo/vdsm-patches