On 2/21/07, Guido van Rossum <[EMAIL PROTECTED]> wrote:
Patch anyone?
See attachement. It's preliminary -- it just calls the global name 'bytes'
currently (and not even using the 'right' AST concretion mechanism) which
means you can override what the bytes literal creates by assigning to
'bytes' (although I'm sure there's people out there that would love to keep
it that way ;-P) It should probably get its own bytecode (no pun intended.)
On 2/21/07, Josiah Carlson <[EMAIL PROTECTED]> wrote:
>
> "Jim Jewett" <[EMAIL PROTECTED]> wrote:
> >
> > On 2/21/07, Guido van Rossum <[EMAIL PROTECTED]> wrote:
> > > If the spelling of a bytes string with an ASCII character value is
all
> > > you are complaining about, you should have said so right away.
> >
> > That is my main objection.
> >
> > A literal form does clear it up, though I'm not sure "b" is the right
> > prefix. (I keep wanting to read "binary" or "boolean", rather than
> > "ASCII")
> >
> > To be honest, it would probably be enough if there were an ascii
> > builtin, or if the example uses of the bytes constructor showed
> >
> > bytes(text) # no encoding
> >
> > just copying the low-order byte, and raising exceptions if any
> > high-order bytes were non-zero.
>
> That's more or less changing the signature of bytes to be bytes(<text>,
> codec='ascii'), but it breaks when faced with hex or octal escapes
> greater than 127. Making it codec='latin-1' is marginally better, but
> having a default, regardless of the default, is begging for trouble
> (especially when dealing with unicode).
>
> - Josiah
>
> _______________________________________________
> Python-3000 mailing list
> [email protected]
> http://mail.python.org/mailman/listinfo/python-3000
> Unsubscribe:
http://mail.python.org/mailman/options/python-3000/guido%40python.org
>
--
--Guido van Rossum (home page: http://www.python.org/~guido/)
_______________________________________________
Python-3000 mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000
Unsubscribe:
http://mail.python.org/mailman/options/python-3000/thomas%40python.org
--
Thomas Wouters <[EMAIL PROTECTED]>
Hi! I'm a .signature virus! copy me into your .signature file to help me
spread!
Index: Python/ast.c
===================================================================
--- Python/ast.c (revision 53867)
+++ Python/ast.c (working copy)
@@ -33,8 +33,9 @@
static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
static PyObject *parsenumber(const char *);
-static PyObject *parsestr(const char *s, const char *encoding);
-static PyObject *parsestrplus(struct compiling *, const node *n);
+static PyObject *parsestr(const node *n, const char *encoding, int *bytesmode);
+static PyObject *parsestrplus(struct compiling *, const node *n,
+ int *bytesmode);
#ifndef LINENO
#define LINENO(n) ((n)->n_lineno)
@@ -1383,6 +1384,7 @@
| '{' [dictsetmaker] '}' | NAME | NUMBER | STRING+
*/
node *ch = CHILD(n, 0);
+ int bytesmode = 0;
switch (TYPE(ch)) {
case NAME:
@@ -1390,12 +1392,15 @@
changed. */
return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), n->n_col_offset, c->c_arena);
case STRING: {
- PyObject *str = parsestrplus(c, n);
+ PyObject *str = parsestrplus(c, n, &bytesmode);
if (!str)
return NULL;
PyArena_AddPyObject(c->c_arena, str);
- return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
+ if (bytesmode)
+ return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena);
+ else
+ return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
}
case NUMBER: {
PyObject *pynum = parsenumber(STR(ch));
@@ -3254,9 +3259,10 @@
* parsestr parses it, and returns the decoded Python string object.
*/
static PyObject *
-parsestr(const char *s, const char *encoding)
+parsestr(const node *n, const char *encoding, int *bytesmode)
{
size_t len;
+ const char *s = STR(n);
int quote = Py_CHARMASK(*s);
int rawmode = 0;
int need_encoding;
@@ -3267,6 +3273,10 @@
quote = *++s;
unicode = 1;
}
+ if (quote == 'b' || quote == 'B') {
+ quote = *++s;
+ *bytesmode = 1;
+ }
if (quote == 'r' || quote == 'R') {
quote = *++s;
rawmode = 1;
@@ -3276,6 +3286,10 @@
PyErr_BadInternalCall();
return NULL;
}
+ if (unicode && *bytesmode) {
+ ast_error(n, "string cannot be both bytes and unicode");
+ return NULL;
+ }
s++;
len = strlen(s);
if (len > INT_MAX) {
@@ -3300,7 +3314,18 @@
return decode_unicode(s, len, rawmode, encoding);
}
#endif
- need_encoding = (encoding != NULL &&
+ if (*bytesmode) {
+ /* Disallow non-ascii characters (but not escapes) */
+ const char *c;
+ for (c = s; *c; c++) {
+ if (Py_CHARMASK(*c) >= 0x80) {
+ ast_error(n, "bytes can only contain ASCII "
+ "literal characters.");
+ return NULL;
+ }
+ }
+ }
+ need_encoding = (!*bytesmode && encoding != NULL &&
strcmp(encoding, "utf-8") != 0 &&
strcmp(encoding, "iso-8859-1") != 0);
if (rawmode || strchr(s, '\\') == NULL) {
@@ -3332,18 +3357,25 @@
* pasting the intermediate results together.
*/
static PyObject *
-parsestrplus(struct compiling *c, const node *n)
+parsestrplus(struct compiling *c, const node *n, int *bytesmode)
{
PyObject *v;
int i;
REQ(CHILD(n, 0), STRING);
- if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
+ v = parsestr(CHILD(n, 0), c->c_encoding, bytesmode);
+ if (v != NULL) {
/* String literal concatenation */
for (i = 1; i < NCH(n); i++) {
PyObject *s;
- s = parsestr(STR(CHILD(n, i)), c->c_encoding);
+ int subbm = 0;
+ s = parsestr(CHILD(n, i), c->c_encoding, &subbm);
if (s == NULL)
goto onError;
+ if (*bytesmode != subbm) {
+ ast_error(n, "cannot mix bytes and nonbytes"
+ "literals");
+ goto onError;
+ }
if (PyString_Check(v) && PyString_Check(s)) {
PyString_ConcatAndDel(&v, s);
if (v == NULL)
Index: Python/compile.c
===================================================================
--- Python/compile.c (revision 53867)
+++ Python/compile.c (working copy)
@@ -164,6 +164,7 @@
static int compiler_augassign(struct compiler *, stmt_ty);
static int compiler_visit_slice(struct compiler *, slice_ty,
expr_context_ty);
+static int compiler_visit_bytes(struct compiler *, expr_ty);
static int compiler_push_fblock(struct compiler *, enum fblocktype,
basicblock *);
@@ -3077,6 +3078,9 @@
case Str_kind:
ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts);
break;
+ case Bytes_kind:
+ return compiler_visit_bytes(c, e);
+ break;
case Ellipsis_kind:
ADDOP_O(c, LOAD_CONST, Py_Ellipsis, consts);
break;
@@ -3426,6 +3430,22 @@
return compiler_handle_subscr(c, kindname, ctx);
}
+static int
+compiler_visit_bytes(struct compiler *c, expr_ty e)
+{
+ static identifier bytes_str;
+
+ assert(e->kind == Bytes_kind);
+ if (!bytes_str) {
+ bytes_str = PyString_InternFromString("bytes");
+ if (bytes_str == NULL)
+ return 0;
+ }
+ ADDOP_NAME(c, LOAD_GLOBAL, bytes_str, names);
+ ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts);
+ ADDOP_I(c, CALL_FUNCTION, 1);
+ return 1;
+}
/* End of the compiler section, beginning of the assembler section */
Index: Parser/tokenizer.c
===================================================================
--- Parser/tokenizer.c (revision 53867)
+++ Parser/tokenizer.c (working copy)
@@ -1244,6 +1244,14 @@
if (c == '"' || c == '\'')
goto letter_quote;
break;
+ case 'b':
+ case 'B':
+ c = tok_nextc(tok);
+ if (c == 'r' || c == 'R')
+ c = tok_nextc(tok);
+ if (c == '"' || c == '\'')
+ goto letter_quote;
+ break;
}
while (isalnum(c) || c == '_') {
c = tok_nextc(tok);
Index: Parser/Python.asdl
===================================================================
--- Parser/Python.asdl (revision 53867)
+++ Parser/Python.asdl (working copy)
@@ -60,6 +60,7 @@
expr? starargs, expr? kwargs)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
+ | Bytes(string s)
| Ellipsis
-- other literals? bools?
_______________________________________________
Python-3000 mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000
Unsubscribe:
http://mail.python.org/mailman/options/python-3000/archive%40mail-archive.com