Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: Changeset: r57526:a79482e85abd Date: 2012-09-24 23:45 +0200 http://bitbucket.org/pypy/pypy/changeset/a79482e85abd/
Log: merge heads diff --git a/lib-python/2.7/test/test_csv.py b/lib-python/2.7/test/test_csv.py --- a/lib-python/2.7/test/test_csv.py +++ b/lib-python/2.7/test/test_csv.py @@ -20,7 +20,8 @@ """ def _test_arg_valid(self, ctor, arg): self.assertRaises(TypeError, ctor) - self.assertRaises(TypeError, ctor, None) + # PyPy gets an AttributeError instead of a TypeError + self.assertRaises((TypeError, AttributeError), ctor, None) self.assertRaises(TypeError, ctor, arg, bad_attr = 0) self.assertRaises(TypeError, ctor, arg, delimiter = 0) self.assertRaises(TypeError, ctor, arg, delimiter = 'XX') @@ -59,7 +60,8 @@ self.assertRaises((TypeError, AttributeError), setattr, obj.dialect, 'delimiter', ':') self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting') - self.assertRaises(AttributeError, setattr, obj.dialect, + # PyPy gets a TypeError instead of an AttributeError + self.assertRaises((AttributeError, TypeError), setattr, obj.dialect, 'quoting', None) def test_reader_attrs(self): @@ -133,7 +135,8 @@ os.unlink(name) def test_write_arg_valid(self): - self.assertRaises(csv.Error, self._write_test, None, '') + # PyPy gets a TypeError instead of a csv.Error for "not a sequence" + self.assertRaises((csv.Error, TypeError), self._write_test, None, '') self._write_test((), '') self._write_test([None], '""') self.assertRaises(csv.Error, self._write_test, diff --git a/lib-python/conftest.py b/lib-python/conftest.py --- a/lib-python/conftest.py +++ b/lib-python/conftest.py @@ -183,7 +183,7 @@ RegrTest('test_cpickle.py', core=True), RegrTest('test_cprofile.py'), RegrTest('test_crypt.py', usemodules='crypt', skip=skip_win32), - RegrTest('test_csv.py'), + RegrTest('test_csv.py', usemodules='_csv'), RegrTest('test_curses.py', skip="unsupported extension module"), RegrTest('test_datetime.py'), diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py --- a/pypy/config/pypyoption.py +++ b/pypy/config/pypyoption.py @@ -34,7 +34,7 @@ "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array", "_bisect", "binascii", "_multiprocessing", '_warnings', "_collections", "_multibytecodec", "micronumpy", "_ffi", - "_continuation", "_cffi_backend"] + "_continuation", "_cffi_backend", "_csv"] )) translation_modules = default_modules.copy() diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -1,11 +1,13 @@ from pypy.interpreter.mixedmodule import MixedModule +from pypy.rlib import rdynload + class Module(MixedModule): appleveldefs = { } interpleveldefs = { - '__version__': 'space.wrap("0.3")', + '__version__': 'space.wrap("0.4")', 'nonstandard_integer_types': 'misc.nonstandard_integer_types', @@ -42,3 +44,12 @@ 'FFI_DEFAULT_ABI': 'ctypefunc._get_abi(space, "FFI_DEFAULT_ABI")', 'FFI_CDECL': 'ctypefunc._get_abi(space,"FFI_DEFAULT_ABI")',#win32 name } + +for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL", + "RTLD_NODELETE", "RTLD_NOLOAD", "RTLD_DEEPBIND"]: + if getattr(rdynload.cConfig, _name) is not None: + Module.interpleveldefs[_name] = 'space.wrap(%d)' % ( + getattr(rdynload.cConfig, _name),) + +for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL"]: + Module.interpleveldefs.setdefault(_name, 'space.wrap(0)') diff --git a/pypy/module/_cffi_backend/libraryobj.py b/pypy/module/_cffi_backend/libraryobj.py --- a/pypy/module/_cffi_backend/libraryobj.py +++ b/pypy/module/_cffi_backend/libraryobj.py @@ -5,7 +5,6 @@ from pypy.interpreter.typedef import TypeDef from pypy.rpython.lltypesystem import lltype, rffi from pypy.rlib.rdynload import DLLHANDLE, dlopen, dlsym, dlclose, DLOpenError -from pypy.rlib.rdynload import RTLD_GLOBAL from pypy.module._cffi_backend.cdataobj import W_CData from pypy.module._cffi_backend.ctypeobj import W_CType @@ -15,17 +14,13 @@ _immutable_ = True handle = rffi.cast(DLLHANDLE, 0) - def __init__(self, space, filename, is_global): + def __init__(self, space, filename, flags): self.space = space - if is_global and RTLD_GLOBAL is not None: - mode = RTLD_GLOBAL - else: - mode = -1 # default value, corresponds to RTLD_LOCAL with rffi.scoped_str2charp(filename) as ll_libname: if filename is None: filename = "<None>" try: - self.handle = dlopen(ll_libname, mode) + self.handle = dlopen(ll_libname, flags) except DLOpenError, e: raise operationerrfmt(space.w_OSError, "cannot load library %s: %s", @@ -100,7 +95,7 @@ W_Library.acceptable_as_base_class = False -@unwrap_spec(filename="str_or_None", is_global=int) -def load_library(space, filename, is_global=0): - lib = W_Library(space, filename, is_global) +@unwrap_spec(filename="str_or_None", flags=int) +def load_library(space, filename, flags=0): + lib = W_Library(space, filename, flags) return space.wrap(lib) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -42,19 +42,34 @@ return sizeof(BPtr) -def find_and_load_library(name, is_global=0): +def find_and_load_library(name, flags=RTLD_NOW): import ctypes.util if name is None: path = None else: path = ctypes.util.find_library(name) - return load_library(path, is_global) + return load_library(path, flags) def test_load_library(): x = find_and_load_library('c') assert repr(x).startswith("<clibrary '") - x = find_and_load_library('c', 1) + x = find_and_load_library('c', RTLD_NOW | RTLD_GLOBAL) assert repr(x).startswith("<clibrary '") + x = find_and_load_library('c', RTLD_LAZY) + assert repr(x).startswith("<clibrary '") + +def test_all_rtld_symbols(): + import sys + FFI_DEFAULT_ABI # these symbols must be defined + FFI_CDECL + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + if sys.platform.startswith("linux"): + RTLD_NODELETE + RTLD_NOLOAD + RTLD_DEEPBIND def test_nonstandard_integer_types(): d = nonstandard_integer_types() diff --git a/pypy/module/_cffi_backend/test/test_c.py b/pypy/module/_cffi_backend/test/test_c.py --- a/pypy/module/_cffi_backend/test/test_c.py +++ b/pypy/module/_cffi_backend/test/test_c.py @@ -22,7 +22,6 @@ from pypy.tool.udir import udir from pypy.conftest import gettestobjspace, option from pypy.interpreter import gateway -from pypy.module._cffi_backend.test import _backend_test_c from pypy.module._cffi_backend import Module from pypy.translator.platform import host from pypy.translator.tool.cbuild import ExternalCompilationInfo @@ -87,20 +86,24 @@ all_names = ', '.join(Module.interpleveldefs.keys()) +backend_test_c = py.path.local(__file__).join('..', '_backend_test_c.py') + lst = [] -for name, value in _backend_test_c.__dict__.items(): - if name.startswith('test_'): - lst.append(value) -lst.sort(key=lambda func: func.func_code.co_firstlineno) +with backend_test_c.open('r') as f: + for line in f: + if line.startswith('def test_'): + line = line[4:] + line = line[:line.index('():')] + lst.append(line) tmpdir = udir.join('test_c').ensure(dir=1) tmpname = tmpdir.join('_test_c.py') with tmpname.open('w') as f: for func in lst: - print >> f, 'def %s(self):' % (func.__name__,) + print >> f, 'def %s(self):' % (func,) print >> f, ' import _all_test_c' - print >> f, ' _all_test_c.%s()' % (func.__name__,) + print >> f, ' _all_test_c.%s()' % (func,) tmpname2 = tmpdir.join('_all_test_c.py') with tmpname2.open('w') as f: @@ -110,7 +113,7 @@ print >> f, ' class test:' print >> f, ' raises = staticmethod(raises)' print >> f, ' skip = staticmethod(skip)' - print >> f, py.path.local(__file__).join('..', '_backend_test_c.py').read() + print >> f, backend_test_c.read() mod = tmpname.pyimport() diff --git a/pypy/module/_csv/__init__.py b/pypy/module/_csv/__init__.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/__init__.py @@ -0,0 +1,87 @@ +from pypy.interpreter.mixedmodule import MixedModule + + +class Module(MixedModule): + """CSV parsing and writing. + +This module provides classes that assist in the reading and writing +of Comma Separated Value (CSV) files, and implements the interface +described by PEP 305. Although many CSV files are simple to parse, +the format is not formally defined by a stable specification and +is subtle enough that parsing lines of a CSV file with something +like line.split(\",\") is bound to fail. The module supports three +basic APIs: reading, writing, and registration of dialects. + + +DIALECT REGISTRATION: + +Readers and writers support a dialect argument, which is a convenient +handle on a group of settings. When the dialect argument is a string, +it identifies one of the dialects previously registered with the module. +If it is a class or instance, the attributes of the argument are used as +the settings for the reader or writer: + + class excel: + delimiter = ',' + quotechar = '\"' + escapechar = None + doublequote = True + skipinitialspace = False + lineterminator = '\\r\\n' + quoting = QUOTE_MINIMAL + +SETTINGS: + + * quotechar - specifies a one-character string to use as the + quoting character. It defaults to '\"'. + * delimiter - specifies a one-character string to use as the + field separator. It defaults to ','. + * skipinitialspace - specifies how to interpret whitespace which + immediately follows a delimiter. It defaults to False, which + means that whitespace immediately following a delimiter is part + of the following field. + * lineterminator - specifies the character sequence which should + terminate rows. + * quoting - controls when quotes should be generated by the writer. + It can take on any of the following module constants: + + csv.QUOTE_MINIMAL means only when required, for example, when a + field contains either the quotechar or the delimiter + csv.QUOTE_ALL means that quotes are always placed around fields. + csv.QUOTE_NONNUMERIC means that quotes are always placed around + fields which do not parse as integers or floating point + numbers. + csv.QUOTE_NONE means that quotes are never placed around fields. + * escapechar - specifies a one-character string used to escape + the delimiter when quoting is set to QUOTE_NONE. + * doublequote - controls the handling of quotes inside fields. When + True, two consecutive quotes are interpreted as one during read, + and when writing, each quote character embedded in the data is + written as two quotes. +""" + + appleveldefs = { + 'register_dialect': 'app_csv.register_dialect', + 'unregister_dialect': 'app_csv.unregister_dialect', + 'get_dialect': 'app_csv.get_dialect', + 'list_dialects': 'app_csv.list_dialects', + '_dialects': 'app_csv._dialects', + + 'Error': 'app_csv.Error', + } + + interpleveldefs = { + '__version__': 'space.wrap("1.0")', + + 'QUOTE_MINIMAL': 'space.wrap(interp_csv.QUOTE_MINIMAL)', + 'QUOTE_ALL': 'space.wrap(interp_csv.QUOTE_ALL)', + 'QUOTE_NONNUMERIC': 'space.wrap(interp_csv.QUOTE_NONNUMERIC)', + 'QUOTE_NONE': 'space.wrap(interp_csv.QUOTE_NONE)', + + 'Dialect': 'interp_csv.W_Dialect', + + 'reader': 'interp_reader.csv_reader', + 'field_size_limit': 'interp_reader.csv_field_size_limit', + + 'writer': 'interp_writer.csv_writer', + } diff --git a/pypy/module/_csv/app_csv.py b/pypy/module/_csv/app_csv.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/app_csv.py @@ -0,0 +1,33 @@ +import _csv + +class Error(Exception): + pass + + +_dialects = {} + +def register_dialect(name, dialect=None, **kwargs): + """Create a mapping from a string name to a dialect class.""" + if not isinstance(name, basestring): + raise TypeError("dialect name must be a string or unicode") + + dialect = _csv.Dialect(dialect, **kwargs) + _dialects[name] = dialect + +def unregister_dialect(name): + """Delete the name/dialect mapping associated with a string name.""" + try: + del _dialects[name] + except KeyError: + raise Error("unknown dialect") + +def get_dialect(name): + """Return the dialect instance associated with name.""" + try: + return _dialects[name] + except KeyError: + raise Error("unknown dialect") + +def list_dialects(): + """Return a list of all know dialect names.""" + return list(_dialects) diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/interp_csv.py @@ -0,0 +1,175 @@ +from pypy.interpreter.baseobjspace import Wrappable +from pypy.interpreter.error import OperationError, operationerrfmt +from pypy.interpreter.typedef import TypeDef, interp_attrproperty +from pypy.interpreter.typedef import GetSetProperty +from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped + + +QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE = range(4) + + +class W_Dialect(Wrappable): + _immutable_fields_ = [ + "dialect", + "delimiter", + "doublequote", + "escapechar", + "lineterminator", + "quotechar", + "quoting", + "skipinitialspace", + "strict", + ] + +def _fetch(space, w_dialect, name): + return space.findattr(w_dialect, space.wrap(name)) + +def _get_bool(space, w_src, default): + if w_src is None: + return default + return space.is_true(w_src) + +def _get_int(space, w_src, default): + if w_src is None: + return default + return space.int_w(w_src) + +def _get_str(space, w_src, default): + if w_src is None: + return default + return space.str_w(w_src) + +def _get_char(space, w_src, default, name): + if w_src is None: + return default + if space.is_w(w_src, space.w_None): + return '\0' + src = space.str_w(w_src) + if len(src) == 1: + return src[0] + if len(src) == 0: + return '\0' + raise operationerrfmt(space.w_TypeError, + '"%s" must be a 1-character string', name) + +def _build_dialect(space, w_dialect, w_delimiter, w_doublequote, + w_escapechar, w_lineterminator, w_quotechar, w_quoting, + w_skipinitialspace, w_strict): + if w_dialect is not None: + if space.isinstance_w(w_dialect, space.w_basestring): + w_module = space.getbuiltinmodule('_csv') + w_dialect = space.call_method(w_module, 'get_dialect', w_dialect) + + dialect = space.interpclass_w(w_dialect) + if (isinstance(dialect, W_Dialect) and + w_delimiter is None and + w_doublequote is None and + w_escapechar is None and + w_lineterminator is None and + w_quotechar is None and + w_quoting is None and + w_skipinitialspace is None and + w_strict is None): + return dialect + + if w_delimiter is None: + w_delimiter = _fetch(space, w_dialect, 'delimiter') + if w_doublequote is None: + w_doublequote = _fetch(space, w_dialect, 'doublequote') + if w_escapechar is None: + w_escapechar = _fetch(space, w_dialect, 'escapechar') + if w_lineterminator is None: + w_lineterminator = _fetch(space, w_dialect, 'lineterminator') + if w_quotechar is None: + w_quotechar = _fetch(space, w_dialect, 'quotechar') + if w_quoting is None: + w_quoting = _fetch(space, w_dialect, 'quoting') + if w_skipinitialspace is None: + w_skipinitialspace = _fetch(space, w_dialect, 'skipinitialspace') + if w_strict is None: + w_strict = _fetch(space, w_dialect, 'strict') + + dialect = W_Dialect() + dialect.delimiter = _get_char(space, w_delimiter, ',', 'delimiter') + dialect.doublequote = _get_bool(space, w_doublequote, True) + dialect.escapechar = _get_char(space, w_escapechar, '\0', 'escapechar') + dialect.lineterminator = _get_str(space, w_lineterminator, '\r\n') + dialect.quotechar = _get_char(space, w_quotechar, '"', 'quotechar') + tmp_quoting = _get_int(space, w_quoting, QUOTE_MINIMAL) + dialect.skipinitialspace = _get_bool(space, w_skipinitialspace, False) + dialect.strict = _get_bool(space, w_strict, False) + + # validate options + if not (0 <= tmp_quoting < 4): + raise OperationError(space.w_TypeError, + space.wrap('bad "quoting" value')) + + if dialect.delimiter == '\0': + raise OperationError(space.w_TypeError, + space.wrap('delimiter must be set')) + + if space.is_w(w_quotechar, space.w_None) and w_quoting is None: + tmp_quoting = QUOTE_NONE + if tmp_quoting != QUOTE_NONE and dialect.quotechar == '\0': + raise OperationError(space.w_TypeError, + space.wrap('quotechar must be set if quoting enabled')) + dialect.quoting = tmp_quoting + return dialect + +def W_Dialect___new__(space, w_subtype, w_dialect = NoneNotWrapped, + w_delimiter = NoneNotWrapped, + w_doublequote = NoneNotWrapped, + w_escapechar = NoneNotWrapped, + w_lineterminator = NoneNotWrapped, + w_quotechar = NoneNotWrapped, + w_quoting = NoneNotWrapped, + w_skipinitialspace = NoneNotWrapped, + w_strict = NoneNotWrapped, + ): + dialect = _build_dialect(space, w_dialect, w_delimiter, w_doublequote, + w_escapechar, w_lineterminator, w_quotechar, + w_quoting, w_skipinitialspace, w_strict) + if space.is_w(w_subtype, space.gettypeobject(W_Dialect.typedef)): + return space.wrap(dialect) + else: + subdialect = space.allocate_instance(W_Dialect, w_subtype) + subdialect.delimiter = dialect.delimiter + subdialect.doublequote = dialect.doublequote + subdialect.escapechar = dialect.escapechar + subdialect.lineterminator = dialect.lineterminator + subdialect.quotechar = dialect.quotechar + subdialect.quoting = dialect.quoting + subdialect.skipinitialspace = dialect.skipinitialspace + subdialect.strict = dialect.strict + return space.wrap(subdialect) + + +def _get_escapechar(space, dialect): + if dialect.escapechar == '\0': + return space.w_None + return space.wrap(dialect.escapechar) + +def _get_quotechar(space, dialect): + if dialect.quotechar == '\0': + return space.w_None + return space.wrap(dialect.quotechar) + + +W_Dialect.typedef = TypeDef( + 'Dialect', + __module__ = '_csv', + __new__ = interp2app(W_Dialect___new__), + + delimiter = interp_attrproperty('delimiter', W_Dialect), + doublequote = interp_attrproperty('doublequote', W_Dialect), + escapechar = GetSetProperty(_get_escapechar, cls=W_Dialect), + lineterminator = interp_attrproperty('lineterminator', W_Dialect), + quotechar = GetSetProperty(_get_quotechar, cls=W_Dialect), + quoting = interp_attrproperty('quoting', W_Dialect), + skipinitialspace = interp_attrproperty('skipinitialspace', W_Dialect), + strict = interp_attrproperty('strict', W_Dialect), + + __doc__ = """CSV dialect + +The Dialect type records CSV parsing and generation options. +""") diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/interp_reader.py @@ -0,0 +1,263 @@ +from pypy.rlib.rstring import StringBuilder +from pypy.interpreter.baseobjspace import Wrappable +from pypy.interpreter.error import OperationError +from pypy.interpreter.gateway import NoneNotWrapped, unwrap_spec +from pypy.interpreter.typedef import TypeDef, interp2app +from pypy.interpreter.typedef import interp_attrproperty_w, interp_attrproperty +from pypy.module._csv.interp_csv import _build_dialect +from pypy.module._csv.interp_csv import (QUOTE_MINIMAL, QUOTE_ALL, + QUOTE_NONNUMERIC, QUOTE_NONE) + +(START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, + IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, + EAT_CRNL) = range(8) + + +class W_Reader(Wrappable): + + def __init__(self, space, dialect, w_iter): + self.space = space + self.dialect = dialect + self.w_iter = w_iter + self.line_num = 0 + + def iter_w(self): + return self.space.wrap(self) + + def error(self, msg): + space = self.space + msg = 'line %d: %s' % (self.line_num, msg) + w_module = space.getbuiltinmodule('_csv') + w_error = space.getattr(w_module, space.wrap('Error')) + raise OperationError(w_error, space.wrap(msg)) + error._dont_inline_ = True + + def add_char(self, field_builder, c): + assert field_builder is not None + if field_builder.getlength() >= field_limit.limit: + raise self.error("field larger than field limit") + field_builder.append(c) + + def save_field(self, field_builder): + field = field_builder.build() + if self.numeric_field: + from pypy.objspace.std.strutil import ParseStringError + from pypy.objspace.std.strutil import string_to_float + self.numeric_field = False + try: + ff = string_to_float(field) + except ParseStringError, e: + raise OperationError(self.space.w_ValueError, + self.space.wrap(e.msg)) + w_obj = self.space.wrap(ff) + else: + w_obj = self.space.wrap(field) + self.fields_w.append(w_obj) + + def next_w(self): + space = self.space + dialect = self.dialect + self.fields_w = [] + self.numeric_field = False + field_builder = None # valid iff state not in [START_RECORD, EAT_CRNL] + state = START_RECORD + # + while True: + try: + w_line = space.next(self.w_iter) + except OperationError, e: + if e.match(space, space.w_StopIteration): + if field_builder is not None: + raise self.error("newline inside string") + raise + self.line_num += 1 + line = space.str_w(w_line) + for c in line: + if c == '\0': + raise self.error("line contains NULL byte") + + if state == START_RECORD: + if c == '\n' or c == '\r': + state = EAT_CRNL + continue + # normal character - handle as START_FIELD + state = START_FIELD + # fall-through to the next case + + if state == START_FIELD: + field_builder = StringBuilder(64) + # expecting field + if c == '\n' or c == '\r': + # save empty field + self.save_field(field_builder) + state = EAT_CRNL + elif (c == dialect.quotechar and + dialect.quoting != QUOTE_NONE): + # start quoted field + state = IN_QUOTED_FIELD + elif c == dialect.escapechar: + # possible escaped character + state = ESCAPED_CHAR + elif c == ' ' and dialect.skipinitialspace: + # ignore space at start of field + pass + elif c == dialect.delimiter: + # save empty field + self.save_field(field_builder) + else: + # begin new unquoted field + if dialect.quoting == QUOTE_NONNUMERIC: + self.numeric_field = True + self.add_char(field_builder, c) + state = IN_FIELD + + elif state == ESCAPED_CHAR: + self.add_char(field_builder, c) + state = IN_FIELD + + elif state == IN_FIELD: + # in unquoted field + if c == '\n' or c == '\r': + # end of line + self.save_field(field_builder) + state = EAT_CRNL + elif c == dialect.escapechar: + # possible escaped character + state = ESCAPED_CHAR + elif c == dialect.delimiter: + # save field - wait for new field + self.save_field(field_builder) + state = START_FIELD + else: + # normal character - save in field + self.add_char(field_builder, c) + + elif state == IN_QUOTED_FIELD: + # in quoted field + if c == dialect.escapechar: + # Possible escape character + state = ESCAPE_IN_QUOTED_FIELD + elif (c == dialect.quotechar and + dialect.quoting != QUOTE_NONE): + if dialect.doublequote: + # doublequote; " represented by "" + state = QUOTE_IN_QUOTED_FIELD + else: + # end of quote part of field + state = IN_FIELD + else: + # normal character - save in field + self.add_char(field_builder, c) + + elif state == ESCAPE_IN_QUOTED_FIELD: + self.add_char(field_builder, c) + state = IN_QUOTED_FIELD + + elif state == QUOTE_IN_QUOTED_FIELD: + # doublequote - seen a quote in an quoted field + if (dialect.quoting != QUOTE_NONE and + c == dialect.quotechar): + # save "" as " + self.add_char(field_builder, c) + state = IN_QUOTED_FIELD + elif c == dialect.delimiter: + # save field - wait for new field + self.save_field(field_builder) + state = START_FIELD + elif c == '\n' or c == '\r': + # end of line + self.save_field(field_builder) + state = EAT_CRNL + elif not dialect.strict: + self.add_char(field_builder, c) + state = IN_FIELD + else: + # illegal + raise self.error("'%s' expected after '%s'" % ( + dialect.delimiter, dialect.quotechar)) + + elif state == EAT_CRNL: + if not (c == '\n' or c == '\r'): + raise self.error("new-line character seen in unquoted " + "field - do you need to open the file " + "in universal-newline mode?") + + if state == IN_FIELD or state == QUOTE_IN_QUOTED_FIELD: + self.save_field(field_builder) + break + elif state == ESCAPED_CHAR: + self.add_char(field_builder, '\n') + state = IN_FIELD + elif state == IN_QUOTED_FIELD: + pass + elif state == ESCAPE_IN_QUOTED_FIELD: + self.add_char(field_builder, '\n') + state = IN_QUOTED_FIELD + elif state == START_FIELD: + # save empty field + field_builder = StringBuilder(1) + self.save_field(field_builder) + break + else: + break + # + w_result = space.newlist(self.fields_w) + self.fields_w = None + return w_result + + +def csv_reader(space, w_iterator, w_dialect=NoneNotWrapped, + w_delimiter = NoneNotWrapped, + w_doublequote = NoneNotWrapped, + w_escapechar = NoneNotWrapped, + w_lineterminator = NoneNotWrapped, + w_quotechar = NoneNotWrapped, + w_quoting = NoneNotWrapped, + w_skipinitialspace = NoneNotWrapped, + w_strict = NoneNotWrapped, + ): + """ + csv_reader = reader(iterable [, dialect='excel'] + [optional keyword args]) + for row in csv_reader: + process(row) + + The "iterable" argument can be any object that returns a line + of input for each iteration, such as a file object or a list. The + optional \"dialect\" parameter is discussed below. The function + also accepts optional keyword arguments which override settings + provided by the dialect. + + The returned object is an iterator. Each iteration returns a row + of the CSV file (which can span multiple input lines)""" + w_iter = space.iter(w_iterator) + dialect = _build_dialect(space, w_dialect, w_delimiter, w_doublequote, + w_escapechar, w_lineterminator, w_quotechar, + w_quoting, w_skipinitialspace, w_strict) + return W_Reader(space, dialect, w_iter) + +W_Reader.typedef = TypeDef( + 'reader', + __module__ = '_csv', + dialect = interp_attrproperty_w('dialect', W_Reader), + line_num = interp_attrproperty('line_num', W_Reader), + __iter__ = interp2app(W_Reader.iter_w), + next = interp2app(W_Reader.next_w), + __doc__ = """CSV reader + +Reader objects are responsible for reading and parsing tabular data +in CSV format.""") +W_Reader.typedef.acceptable_as_base_class = False + +# ____________________________________________________________ + +class FieldLimit: + limit = 128 * 1024 # max parsed field size +field_limit = FieldLimit() + +@unwrap_spec(new_limit=int) +def csv_field_size_limit(space, new_limit=-1): + old_limit = field_limit.limit + if new_limit >= 0: + field_limit.limit = new_limit + return space.wrap(old_limit) diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/interp_writer.py @@ -0,0 +1,172 @@ +from pypy.rlib.rstring import StringBuilder +from pypy.interpreter.baseobjspace import Wrappable +from pypy.interpreter.error import OperationError +from pypy.interpreter.gateway import NoneNotWrapped +from pypy.interpreter.typedef import TypeDef, interp2app +from pypy.interpreter.typedef import interp_attrproperty_w +from pypy.module._csv.interp_csv import _build_dialect +from pypy.module._csv.interp_csv import (QUOTE_MINIMAL, QUOTE_ALL, + QUOTE_NONNUMERIC, QUOTE_NONE) + + +class W_Writer(Wrappable): + + def __init__(self, space, dialect, w_fileobj): + self.space = space + self.dialect = dialect + self.w_filewrite = space.getattr(w_fileobj, space.wrap('write')) + # precompute this + special = dialect.delimiter + dialect.lineterminator + if dialect.escapechar != '\0': special += dialect.escapechar + if dialect.quotechar != '\0': special += dialect.quotechar + self.special_characters = special + + def error(self, msg): + space = self.space + w_module = space.getbuiltinmodule('_csv') + w_error = space.getattr(w_module, space.wrap('Error')) + raise OperationError(w_error, space.wrap(msg)) + error._dont_inline_ = True + + def writerow(self, w_fields): + """Construct and write a CSV record from a sequence of fields. + Non-string elements will be converted to string.""" + space = self.space + fields_w = space.listview(w_fields) + dialect = self.dialect + rec = StringBuilder(80) + # + for field_index in range(len(fields_w)): + w_field = fields_w[field_index] + if space.is_w(w_field, space.w_None): + field = "" + elif space.isinstance_w(w_field, space.w_float): + field = space.str_w(space.repr(w_field)) + else: + field = space.str_w(space.str(w_field)) + # + if dialect.quoting == QUOTE_NONNUMERIC: + try: + space.float_w(w_field) # is it an int/long/float? + quoted = False + except OperationError, e: + if e.async(space): + raise + quoted = True + elif dialect.quoting == QUOTE_ALL: + quoted = True + elif dialect.quoting == QUOTE_MINIMAL: + # Find out if we really quoting + special_characters = self.special_characters + for c in field: + if c in special_characters: + if c != dialect.quotechar or dialect.doublequote: + quoted = True + break + else: + quoted = False + else: + quoted = False + + # If field is empty check if it needs to be quoted + if len(field) == 0 and len(fields_w) == 1: + if dialect.quoting == QUOTE_NONE: + raise self.error("single empty field record " + "must be quoted") + quoted = True + + # If this is not the first field we need a field separator + if field_index > 0: + rec.append(dialect.delimiter) + + # Handle preceding quote + if quoted: + rec.append(dialect.quotechar) + + # Copy field data + special_characters = self.special_characters + for c in field: + if c in special_characters: + if dialect.quoting == QUOTE_NONE: + want_escape = True + else: + want_escape = False + if c == dialect.quotechar: + if dialect.doublequote: + rec.append(dialect.quotechar) + else: + want_escape = True + if want_escape: + if dialect.escapechar == '\0': + raise self.error("need to escape, " + "but no escapechar set") + rec.append(dialect.escapechar) + else: + assert quoted + # Copy field character into record buffer + rec.append(c) + + # Handle final quote + if quoted: + rec.append(dialect.quotechar) + + # Add line terminator + rec.append(dialect.lineterminator) + + line = rec.build() + return space.call_function(self.w_filewrite, space.wrap(line)) + + def writerows(self, w_seqseq): + """Construct and write a series of sequences to a csv file. + Non-string elements will be converted to string.""" + space = self.space + w_iter = space.iter(w_seqseq) + while True: + try: + w_seq = space.next(w_iter) + except OperationError, e: + if e.match(space, space.w_StopIteration): + break + raise + self.writerow(w_seq) + + +def csv_writer(space, w_fileobj, w_dialect=NoneNotWrapped, + w_delimiter = NoneNotWrapped, + w_doublequote = NoneNotWrapped, + w_escapechar = NoneNotWrapped, + w_lineterminator = NoneNotWrapped, + w_quotechar = NoneNotWrapped, + w_quoting = NoneNotWrapped, + w_skipinitialspace = NoneNotWrapped, + w_strict = NoneNotWrapped, + ): + """ + csv_writer = csv.writer(fileobj [, dialect='excel'] + [optional keyword args]) + for row in sequence: + csv_writer.writerow(row) + + [or] + + csv_writer = csv.writer(fileobj [, dialect='excel'] + [optional keyword args]) + csv_writer.writerows(rows) + + The \"fileobj\" argument can be any object that supports the file API.""" + dialect = _build_dialect(space, w_dialect, w_delimiter, w_doublequote, + w_escapechar, w_lineterminator, w_quotechar, + w_quoting, w_skipinitialspace, w_strict) + return W_Writer(space, dialect, w_fileobj) + +W_Writer.typedef = TypeDef( + 'writer', + __module__ = '_csv', + dialect = interp_attrproperty_w('dialect', W_Writer), + writerow = interp2app(W_Writer.writerow), + writerows = interp2app(W_Writer.writerows), + __doc__ = """CSV writer + +Writer objects are responsible for generating tabular data +in CSV format from sequence input.""") +W_Writer.typedef.acceptable_as_base_class = False diff --git a/pypy/module/_csv/test/test_dialect.py b/pypy/module/_csv/test/test_dialect.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/test/test_dialect.py @@ -0,0 +1,107 @@ +from pypy.conftest import gettestobjspace + + +class AppTestDialect(object): + def setup_class(cls): + cls.space = gettestobjspace(usemodules=['_csv']) + + def test_register_dialect(self): + import _csv + + attrs = [('delimiter', ','), + ('doublequote', True), + ('escapechar', None), + ('lineterminator', '\r\n'), + ('quotechar', '"'), + ('quoting', _csv.QUOTE_MINIMAL), + ('skipinitialspace', False), + ('strict', False), + ] + + for changeattr, newvalue in [('delimiter', ':'), + ('doublequote', False), + ('escapechar', '/'), + ('lineterminator', '---\n'), + ('quotechar', '%'), + ('quoting', _csv.QUOTE_NONNUMERIC), + ('skipinitialspace', True), + ('strict', True)]: + kwargs = {changeattr: newvalue} + _csv.register_dialect('foo1', **kwargs) + d = _csv.get_dialect('foo1') + assert d.__class__.__name__ == 'Dialect' + for attr, default in attrs: + if attr == changeattr: + expected = newvalue + else: + expected = default + assert getattr(d, attr) == expected + + def test_register_dialect_base_1(self): + import _csv + _csv.register_dialect('foo1', escapechar='!') + _csv.register_dialect('foo2', 'foo1', strict=True) + d1 = _csv.get_dialect('foo1') + assert d1.escapechar == '!' + assert d1.strict == False + d2 = _csv.get_dialect('foo2') + assert d2.escapechar == '!' + assert d2.strict == True + + def test_register_dialect_base_2(self): + import _csv + class Foo1: + escapechar = '?' + _csv.register_dialect('foo2', Foo1, strict=True) + d2 = _csv.get_dialect('foo2') + assert d2.escapechar == '?' + assert d2.strict == True + + def test_typeerror(self): + import _csv + attempts = [("delimiter", '', 123), + ("escapechar", Ellipsis, 'foo', 0), + ("lineterminator", -132), + ("quotechar", '', 25), + ("quoting", 4, '', '\x00'), + ] + for attempt in attempts: + name = attempt[0] + for value in attempt[1:]: + kwargs = {name: value} + raises(TypeError, _csv.register_dialect, 'foo1', **kwargs) + + def test_bool_arg(self): + # boolean arguments take *any* object and use its truth-value + import _csv + _csv.register_dialect('foo1', doublequote=[]) + assert _csv.get_dialect('foo1').doublequote == False + _csv.register_dialect('foo1', skipinitialspace=2) + assert _csv.get_dialect('foo1').skipinitialspace == True + _csv.register_dialect('foo1', strict=_csv) # :-/ + assert _csv.get_dialect('foo1').strict == True + + def test_line_terminator(self): + # lineterminator can be the empty string + import _csv + _csv.register_dialect('foo1', lineterminator='') + assert _csv.get_dialect('foo1').lineterminator == '' + + def test_unregister_dialect(self): + import _csv + _csv.register_dialect('foo1') + _csv.unregister_dialect('foo1') + raises(_csv.Error, _csv.get_dialect, 'foo1') + raises(_csv.Error, _csv.unregister_dialect, 'foo1') + + def test_list_dialects(self): + import _csv + lst = _csv.list_dialects() + assert type(lst) is list + assert 'neverseen' not in lst + _csv.register_dialect('neverseen') + lst = _csv.list_dialects() + assert 'neverseen' in lst + _csv.unregister_dialect('neverseen') + lst = _csv.list_dialects() + assert 'neverseen' not in lst diff --git a/pypy/module/_csv/test/test_reader.py b/pypy/module/_csv/test/test_reader.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/test/test_reader.py @@ -0,0 +1,101 @@ +from pypy.conftest import gettestobjspace + + +class AppTestReader(object): + def setup_class(cls): + cls.space = gettestobjspace(usemodules=['_csv']) + + w__read_test = cls.space.appexec([], r"""(): + import _csv + def _read_test(input, expect, **kwargs): + reader = _csv.reader(input, **kwargs) + if expect == 'Error': + raises(_csv.Error, list, reader) + return + result = list(reader) + assert result == expect, 'result: %r\nexpect: %r' % ( + result, expect) + return _read_test + """) + if type(w__read_test) is type(lambda:0): + w__read_test = staticmethod(w__read_test) + cls.w__read_test = w__read_test + + def test_simple_reader(self): + self._read_test(['foo:bar\n'], [['foo', 'bar']], delimiter=':') + + def test_read_oddinputs(self): + self._read_test([], []) + self._read_test([''], [[]]) + self._read_test(['"ab"c'], 'Error', strict = 1) + # cannot handle null bytes for the moment + self._read_test(['ab\0c'], 'Error', strict = 1) + self._read_test(['"ab"c'], [['abc']], doublequote = 0) + + def test_read_eol(self): + self._read_test(['a,b'], [['a','b']]) + self._read_test(['a,b\n'], [['a','b']]) + self._read_test(['a,b\r\n'], [['a','b']]) + self._read_test(['a,b\r'], [['a','b']]) + self._read_test(['a,b\rc,d'], 'Error') + self._read_test(['a,b\nc,d'], 'Error') + self._read_test(['a,b\r\nc,d'], 'Error') + + def test_read_escape(self): + self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\') + self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\') + self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\') + self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\') + self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\') + self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\') + + def test_read_quoting(self): + import _csv as csv + self._read_test(['1,",3,",5'], [['1', ',3,', '5']]) + self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], + quotechar=None, escapechar='\\') + self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], + quoting=csv.QUOTE_NONE, escapechar='\\') + # will this fail where locale uses comma for decimals? + self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]], + quoting=csv.QUOTE_NONNUMERIC) + self._read_test(['"a\nb", 7'], [['a\nb', ' 7']]) + raises(ValueError, self._read_test, + ['abc,3'], [[]], + quoting=csv.QUOTE_NONNUMERIC) + + def test_read_bigfield(self): + # This exercises the buffer realloc functionality and field size + # limits. + import _csv as csv + limit = csv.field_size_limit() + try: + size = 150 + bigstring = 'X' * size + bigline = '%s,%s' % (bigstring, bigstring) + self._read_test([bigline], [[bigstring, bigstring]]) + csv.field_size_limit(size) + self._read_test([bigline], [[bigstring, bigstring]]) + assert csv.field_size_limit() == size + csv.field_size_limit(size-1) + self._read_test([bigline], 'Error') + raises(TypeError, csv.field_size_limit, None) + raises(TypeError, csv.field_size_limit, 1, None) + finally: + csv.field_size_limit(limit) + + def test_read_linenum(self): + import _csv as csv + r = csv.reader(['line,1', 'line,2', 'line,3']) + assert r.line_num == 0 + r.next() + assert r.line_num == 1 + r.next() + assert r.line_num == 2 + r.next() + assert r.line_num == 3 + raises(StopIteration, r.next) + assert r.line_num == 3 + + def test_dubious_quote(self): + self._read_test(['12,12,1",'], [['12', '12', '1"', '']]) diff --git a/pypy/module/_csv/test/test_writer.py b/pypy/module/_csv/test/test_writer.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/test/test_writer.py @@ -0,0 +1,90 @@ +from pypy.conftest import gettestobjspace + + +class AppTestWriter(object): + def setup_class(cls): + cls.space = gettestobjspace(usemodules=['_csv']) + + w__write_test = cls.space.appexec([], r"""(): + import _csv + + class DummyFile(object): + def __init__(self): + self._parts = [] + self.write = self._parts.append + def getvalue(self): + return ''.join(self._parts) + + def _write_test(fields, expect, **kwargs): + fileobj = DummyFile() + writer = _csv.writer(fileobj, **kwargs) + if len(fields) > 0 and type(fields[0]) is list: + writer.writerows(fields) + else: + writer.writerow(fields) + result = fileobj.getvalue() + expect += writer.dialect.lineterminator + assert result == expect, 'result: %r\nexpect: %r' % ( + result, expect) + return _write_test + """) + if type(w__write_test) is type(lambda:0): + w__write_test = staticmethod(w__write_test) + cls.w__write_test = w__write_test + + def test_write_arg_valid(self): + import _csv as csv + raises(TypeError, self._write_test, None, '') # xxx different API! + self._write_test((), '') + self._write_test([None], '""') + raises(csv.Error, self._write_test, + [None], None, quoting = csv.QUOTE_NONE) + # Check that exceptions are passed up the chain + class BadList: + def __len__(self): + return 10; + def __getitem__(self, i): + if i > 2: + raise IOError + raises(IOError, self._write_test, BadList(), '') + class BadItem: + def __str__(self): + raise IOError + raises(IOError, self._write_test, [BadItem()], '') + + def test_write_quoting(self): + import _csv as csv + self._write_test(['a',1,'p,q'], 'a,1,"p,q"') + raises(csv.Error, self._write_test, + ['a',1,'p,q'], 'a,1,p,q', + quoting = csv.QUOTE_NONE) + self._write_test(['a',1,'p,q'], 'a,1,"p,q"', + quoting = csv.QUOTE_MINIMAL) + self._write_test(['a',1,'p,q'], '"a",1,"p,q"', + quoting = csv.QUOTE_NONNUMERIC) + self._write_test(['a',1,'p,q'], '"a","1","p,q"', + quoting = csv.QUOTE_ALL) + self._write_test(['a\nb',1], '"a\nb","1"', + quoting = csv.QUOTE_ALL) + + def test_write_escape(self): + import _csv as csv + self._write_test(['a',1,'p,q'], 'a,1,"p,q"', + escapechar='\\') + raises(csv.Error, self._write_test, + ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""', + escapechar=None, doublequote=False) + self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""', + escapechar='\\', doublequote = False) + self._write_test(['"'], '""""', + escapechar='\\', quoting = csv.QUOTE_MINIMAL) + self._write_test(['"'], '\\"', + escapechar='\\', quoting = csv.QUOTE_MINIMAL, + doublequote = False) + self._write_test(['"'], '\\"', + escapechar='\\', quoting = csv.QUOTE_NONE) + self._write_test(['a',1,'p,q'], 'a,1,p\\,q', + escapechar='\\', quoting = csv.QUOTE_NONE) + + def test_writerows(self): + self._write_test([['a'],['b','c']], 'a\r\nb,c') diff --git a/pypy/module/_csv/test/test_ztranslation.py b/pypy/module/_csv/test/test_ztranslation.py new file mode 100644 --- /dev/null +++ b/pypy/module/_csv/test/test_ztranslation.py @@ -0,0 +1,4 @@ +from pypy.objspace.fake.checkmodule import checkmodule + +def test_checkmodule(): + checkmodule('_csv') diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -697,11 +697,11 @@ return ListStrategy.contains(self, w_list, w_obj) def _safe_contains(self, w_list, obj): - l = self.unerase(w_list.lstorage) - for i in l: - if i == obj: - return True - return False + l = self.unerase(w_list.lstorage) + for i in l: + if i == obj: + return True + return False def length(self, w_list): return len(self.unerase(w_list.lstorage)) @@ -732,7 +732,6 @@ items = self.unerase(w_list.lstorage)[:] return self.erase(items) - def getslice(self, w_list, start, stop, step, length): if step == 1 and 0 <= start <= stop: l = self.unerase(w_list.lstorage) @@ -754,7 +753,6 @@ return W_ListObject.from_storage_and_strategy(self.space, storage, self) def append(self, w_list, w_item): - if self.is_correct_type(w_item): self.unerase(w_list.lstorage).append(self.unwrap(w_item)) return diff --git a/pypy/rlib/rdynload.py b/pypy/rlib/rdynload.py --- a/pypy/rlib/rdynload.py +++ b/pypy/rlib/rdynload.py @@ -44,6 +44,10 @@ RTLD_LOCAL = rffi_platform.DefinedConstantInteger('RTLD_LOCAL') RTLD_GLOBAL = rffi_platform.DefinedConstantInteger('RTLD_GLOBAL') RTLD_NOW = rffi_platform.DefinedConstantInteger('RTLD_NOW') + RTLD_LAZY = rffi_platform.DefinedConstantInteger('RTLD_LAZY') + RTLD_NODELETE = rffi_platform.DefinedConstantInteger('RTLD_NODELETE') + RTLD_NOLOAD = rffi_platform.DefinedConstantInteger('RTLD_NOLOAD') + RTLD_DEEPBIND = rffi_platform.DefinedConstantInteger('RTLD_DEEPBIND') class cConfig: pass @@ -72,6 +76,7 @@ RTLD_LOCAL = cConfig.RTLD_LOCAL RTLD_GLOBAL = cConfig.RTLD_GLOBAL RTLD_NOW = cConfig.RTLD_NOW + RTLD_LAZY = cConfig.RTLD_LAZY def dlerror(): # XXX this would never work on top of ll2ctypes, because @@ -90,7 +95,8 @@ mode = RTLD_LOCAL else: mode = 0 - mode |= RTLD_NOW + if (mode & (RTLD_LAZY | RTLD_NOW)) == 0: + mode |= RTLD_NOW res = c_dlopen(name, rffi.cast(rffi.INT, mode)) if not res: err = dlerror() _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit