Author: Brian Kearns <[email protected]>
Branch:
Changeset: r61997:ad521204bedf
Date: 2013-03-04 01:43 -0800
http://bitbucket.org/pypy/pypy/changeset/ad521204bedf/
Log: Merged in krono/pypy (pull request #130)
Small dotviewer fixes.
diff --git a/dotviewer/msgstruct.py b/dotviewer/msgstruct.py
--- a/dotviewer/msgstruct.py
+++ b/dotviewer/msgstruct.py
@@ -1,5 +1,6 @@
import sys, os
from struct import pack, unpack, calcsize
+from strunicode import tryencode
MAGIC = -0x3b83728b
@@ -24,15 +25,10 @@
long_max = 2147483647
-def _encodeme(x):
- if type(x) is unicode:
- x = x.encode('utf-8')
- return x
-
def message(tp, *values):
#print >> sys.stderr, tp, values
typecodes = ['']
- values = map(_encodeme, values)
+ values = map(tryencode, values)
for v in values:
if type(v) is str:
typecodes.append('%ds' % len(v))
diff --git a/dotviewer/strunicode.py b/dotviewer/strunicode.py
--- a/dotviewer/strunicode.py
+++ b/dotviewer/strunicode.py
@@ -1,9 +1,15 @@
RAW_ENCODING = "utf-8"
-
+ENCODING_ERROR_HANDLING = "replace"
def forceunicode(name):
- return name if isinstance(name, unicode) else name.decode(RAW_ENCODING)
+ """ returns `name` as unicode, even if it wasn't before """
+ return name if isinstance(name, unicode) else name.decode(RAW_ENCODING,
ENCODING_ERROR_HANDLING)
def forcestr(name):
- return name if isinstance(name, str) else name.encode(RAW_ENCODING)
+ """ returns `name` as (possibly `RAW_ENCODING` encoded) string, even if it
wasn't before """
+ return name if isinstance(name, str) else name.encode(RAW_ENCODING,
ENCODING_ERROR_HANDLING)
+
+def tryencode(name):
+ """ returns `name` as encoded string if it was unicode before """
+ return name.encode(RAW_ENCODING, ENCODING_ERROR_HANDLING) if
isinstance(name, unicode) else name
diff --git a/dotviewer/test/test_unicode_util.py
b/dotviewer/test/test_unicode_util.py
--- a/dotviewer/test/test_unicode_util.py
+++ b/dotviewer/test/test_unicode_util.py
@@ -3,7 +3,7 @@
#
import py
import codecs
-from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode
+from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode,
tryencode
SOURCE1 = u"""digraph G{
λ -> b
@@ -18,7 +18,7 @@
def test_idempotent(self):
x = u"a"
assert forceunicode(forcestr(x)) == x
-
+
x = u"λ"
assert forceunicode(forcestr(x)) == x
@@ -40,7 +40,7 @@
x_u = forceunicode(x_e)
assert forceunicode(x_u) == x_u
- def test_file(self):
+ def test_file(self):
udir = py.path.local.make_numbered_dir(prefix='usession-dot-', keep=3)
full_filename = str(udir.join(FILENAME))
f = codecs.open(full_filename, 'wb', RAW_ENCODING)
@@ -55,3 +55,30 @@
f3.close()
result = (c == SOURCE1)
assert result
+
+ def test_only_unicode_encode(self):
+
+ sut = [1, u"a", "miau", u"λ"]
+ expected = [int, str, str , str ]
+
+ results = map(tryencode, sut)
+
+
+ for result, expected_type in zip(results, expected):
+ assert isinstance(result, expected_type)
+
+ def test_forceunicode_should_not_fail(self):
+
+ garbage = "\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage with a lambda
+
+ result = forceunicode(garbage)
+ assert True, "should not raise"
+
+ def test_forcestr_should_not_fail(self):
+
+ garbage = u"\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage
+
+ result = forcestr(garbage)
+ assert True, "should not raise"
+
+
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit