Re: [Python-3000] PEP 3137 plan of attack

Alexandre Vassalotti Tue, 09 Oct 2007 20:27:50 -0700

On 10/8/07, Alexandre Vassalotti <[EMAIL PROTECTED]> wrote:
> On 10/8/07, Guido van Rossum <[EMAIL PROTECTED]> wrote:
> > - change indexing and iteration over PyString to return ints, not
> > 1-char PyStrings
>
> I will try do this one.


This took a bit longer than I expected. Changing the PyString iterator
to return ints was easy, but I ran into some issues with the codec
registry.

I won't have the time this week to work on my patch any further.
Meanwhile if someone would like to improve it, feel free to do so (the
patch is attached to this email). Otherwise, I will continue to work
on it next weekend.

Cheers,
-- Alexandre

Index: Objects/stringobject.c
===================================================================
--- Objects/stringobject.c	(revision 58376)
+++ Objects/stringobject.c	(working copy)
@@ -985,23 +985,13 @@
 static PyObject *
 string_item(PyStringObject *a, register Py_ssize_t i)
 {
-	char pchar;
-	PyObject *v;
+	if (i < 0)
+		i += Py_Size(a);
 	if (i < 0 || i >= Py_Size(a)) {
 		PyErr_SetString(PyExc_IndexError, "string index out of range");
 		return NULL;
 	}
-	pchar = a->ob_sval[i];
-	v = (PyObject *)characters[pchar & UCHAR_MAX];
-	if (v == NULL)
-		v = PyString_FromStringAndSize(&pchar, 1);
-	else {
-#ifdef COUNT_ALLOCS
-		one_strings++;
-#endif
-		Py_INCREF(v);
-	}
-	return v;
+	return PyInt_FromLong((unsigned char)a->ob_sval[i]);
 }
 
 static PyObject*
@@ -4913,8 +4903,8 @@
 	assert(PyString_Check(seq));
 
 	if (it->it_index < PyString_GET_SIZE(seq)) {
-		item = PyString_FromStringAndSize(
-			PyString_AS_STRING(seq)+it->it_index, 1);
+		item = PyInt_FromLong(
+			(unsigned char)seq->ob_sval[it->it_index]);
 		if (item != NULL)
 			++it->it_index;
 		return item;
Index: Lib/modulefinder.py
===================================================================
--- Lib/modulefinder.py	(revision 58376)
+++ Lib/modulefinder.py	(working copy)
@@ -367,7 +367,7 @@
         consts = co.co_consts
         LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
         while code:
-            c = code[0]
+            c = chr(code[0])
             if c in STORE_OPS:
                 oparg, = unpack('<H', code[1:3])
                 yield "store", (names[oparg],)
Index: Lib/encodings/__init__.py
===================================================================
--- Lib/encodings/__init__.py	(revision 58376)
+++ Lib/encodings/__init__.py	(working copy)
@@ -54,7 +54,10 @@
     """
     chars = []
     punct = False
-    for c in encoding:
+    # 'encoding' is a bytes (PyString) object, thus its iterator returns
+    # integers. So, convert 'encoding' to Unicode before iterating over it. 
+    # XXX Should _PyCodec_Lookup() pass a Unicode object, instead?
+    for c in str(encoding):
         if c.isalnum() or c == '.':
             if punct and chars:
                 chars.append('_')
Index: Lib/test/test_bytes.py
===================================================================
--- Lib/test/test_bytes.py	(revision 58376)
+++ Lib/test/test_bytes.py	(working copy)
@@ -345,7 +345,7 @@
         sample = str8("Hello world\n\x80\x81\xfe\xff")
         buf = memoryview(sample)
         b = bytes(buf)
-        self.assertEqual(b, bytes(map(ord, sample)))
+        self.assertEqual(b, bytes(sample))
 
     def test_to_str(self):
         sample = "Hello world\n\x80\x81\xfe\xff"
Index: Lib/dis.py
===================================================================
--- Lib/dis.py	(revision 58376)
+++ Lib/dis.py	(working copy)
@@ -117,8 +117,7 @@
     extended_arg = 0
     free = None
     while i < n:
-        c = code[i]
-        op = ord(c)
+        op = code[i]
         if i in linestarts:
             if i > 0:
                 print()
@@ -134,7 +133,7 @@
         print(opname[op].ljust(20), end=' ')
         i = i+1
         if op >= HAVE_ARGUMENT:
-            oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
+            oparg = code[i] + code[i+1]*256 + extended_arg
             extended_arg = 0
             i = i+2
             if op == EXTENDED_ARG:
@@ -162,8 +161,7 @@
     n = len(code)
     i = 0
     while i < n:
-        c = code[i]
-        op = ord(c)
+        op = code[i]
         if i == lasti: print('-->', end=' ')
         else: print('   ', end=' ')
         if i in labels: print('>>', end=' ')
@@ -172,7 +170,7 @@
         print(opname[op].ljust(15), end=' ')
         i = i+1
         if op >= HAVE_ARGUMENT:
-            oparg = ord(code[i]) + ord(code[i+1])*256
+            oparg = code[i] + code[i+1]*256
             i = i+2
             print(repr(oparg).rjust(5), end=' ')
             if op in hasconst:
@@ -208,11 +206,10 @@
     n = len(code)
     i = 0
     while i < n:
-        c = code[i]
-        op = ord(c)
+        op = code[i]
         i = i+1
         if op >= HAVE_ARGUMENT:
-            oparg = ord(code[i]) + ord(code[i+1])*256
+            oparg = code[i] + code[i+1]*256
             i = i+2
             label = -1
             if op in hasjrel:
@@ -230,8 +227,8 @@
     Generate pairs (offset, lineno) as described in Python/compile.c.
 
     """
-    byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
-    line_increments = [ord(c) for c in code.co_lnotab[1::2]]
+    byte_increments = list(code.co_lnotab[0::2])
+    line_increments = list(code.co_lnotab[1::2])
 
     lastlineno = None
     lineno = code.co_firstlineno

_______________________________________________
Python-3000 mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000
Unsubscribe: 
http://mail.python.org/mailman/options/python-3000/archive%40mail-archive.com

Re: [Python-3000] PEP 3137 plan of attack

Reply via email to