# HG changeset patch
# User Yuya Nishihara <y...@tcha.org>
# Date 1488980906 -32400
#      Wed Mar 08 22:48:26 2017 +0900
# Node ID aae1bd4b8a03ffe6cb47c23da55b2f70ff5607ef
# Parent  f39291c08d86082579b891e844cbd56752b44a59
pycompat: add bytestr wrapper which mostly acts as a Python 2 str

This allows us to handle bytes in mostly the same manner as Python 2 str,
so we can get rid of ugly s[i:i + 1] hacks:

  s = bytestr(s)
  while i < len(s):
      c = s[i]
      ...

This is the simpler version of the previous RFC patch which tried to preserve
the bytestr type if possible. New version simply drops the bytestr wrapping
so we aren't likely to pass a bytestr to a function that expects Python 3
bytes.

diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
--- a/mercurial/pycompat.py
+++ b/mercurial/pycompat.py
@@ -76,6 +76,67 @@ if ispy3:
 
     bytechr = struct.Struct('>B').pack
 
+    class bytestr(bytes):
+        """A bytes which mostly acts as a Python 2 str
+
+        >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), 
bytestr(1)
+        (b'', b'foo', b'ascii', b'1')
+        >>> s = bytestr(b'foo')
+        >>> assert s is bytestr(s)
+
+        There's no implicit conversion from non-ascii str as its encoding is
+        unknown:
+
+        >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
+        Traceback (most recent call last):
+          ...
+        UnicodeEncodeError: ...
+
+        Comparison between bytestr and bytes should work:
+
+        >>> assert bytestr(b'foo') == b'foo'
+        >>> assert b'foo' == bytestr(b'foo')
+        >>> assert b'f' in bytestr(b'foo')
+        >>> assert bytestr(b'f') in b'foo'
+
+        Sliced elements should be bytes, not integer:
+
+        >>> s[1], s[:2]
+        (b'o', b'fo')
+        >>> list(s), list(reversed(s))
+        ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
+
+        As bytestr type isn't propagated across operations, you need to cast
+        bytes to bytestr explicitly:
+
+        >>> s = bytestr(b'foo').upper()
+        >>> t = bytestr(s)
+        >>> s[0], t[0]
+        (70, b'F')
+
+        Be careful to not pass a bytestr object to a function which expects
+        bytearray-like behavior.
+
+        >>> t = bytes(t)  # cast to bytes
+        >>> assert type(t) is bytes
+        """
+
+        def __new__(cls, s=b''):
+            if isinstance(s, bytestr):
+                return s
+            if not isinstance(s, (bytes, bytearray)):
+                s = str(s).encode(u'ascii')
+            return bytes.__new__(cls, s)
+
+        def __getitem__(self, key):
+            s = bytes.__getitem__(self, key)
+            if not isinstance(s, bytes):
+                s = bytechr(s)
+            return s
+
+        def __iter__(self):
+            return iterbytestr(bytes.__iter__(self))
+
     def iterbytestr(s):
         """Iterate bytes as if it were a str object of Python 2"""
         return map(bytechr, s)
@@ -146,6 +207,7 @@ else:
     import cStringIO
 
     bytechr = chr
+    bytestr = str
     iterbytestr = iter
 
     def sysstr(s):
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -34,6 +34,7 @@ testmod('mercurial.minirst')
 testmod('mercurial.patch')
 testmod('mercurial.pathutil')
 testmod('mercurial.parser')
+testmod('mercurial.pycompat', py3=True)
 testmod('mercurial.revsetlang')
 testmod('mercurial.smartset')
 testmod('mercurial.store')
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to