Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: py3.6-wordcode
Changeset: r94588:f0d5731c5251
Date: 2018-05-14 23:25 +0200
http://bitbucket.org/pypy/pypy/changeset/f0d5731c5251/
Log: fix code.dump()
diff --git a/pypy/interpreter/test/test_pycode.py
b/pypy/interpreter/test/test_pycode.py
--- a/pypy/interpreter/test/test_pycode.py
+++ b/pypy/interpreter/test/test_pycode.py
@@ -16,4 +16,4 @@
print '>>>\n' + output + '\n<<<'
assert ' 1 (7)' in output
assert ' 4 (None)' in output
- assert ' 19 RETURN_VALUE ' in output
+ assert ' 16 RETURN_VALUE' in output
diff --git a/pypy/tool/dis3.py b/pypy/tool/dis3.py
--- a/pypy/tool/dis3.py
+++ b/pypy/tool/dis3.py
@@ -1,20 +1,40 @@
-"""Disassembler of Python byte code into mnemonics.
-Python 3 dis.py partly backported to Python 2"""
+"""Disassembler of Python byte code into mnemonics."""
+
+from __future__ import print_function
import sys
import types
+import collections
+import io
from opcode3 import *
from opcode3 import __all__ as _opcodes_all
-__all__ = ["dis", "disassemble", "distb", "disco",
- "findlinestarts", "findlabels"] + _opcodes_all
+__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
+ "findlinestarts", "findlabels", "show_code",
+ "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
del _opcodes_all
-_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
+_have_code = (types.MethodType, types.FunctionType, types.CodeType,
+ classmethod, staticmethod, type)
+
+FORMAT_VALUE = opmap['FORMAT_VALUE']
+
+def _try_compile(source, name):
+ """Attempts to compile the given source, first as an expression and
+ then as a statement if the first approach fails.
+
+ Utility function to accept strings in functions that otherwise
+ expect code objects
+ """
+ try:
+ c = compile(source, name, 'eval')
+ except SyntaxError:
+ c = compile(source, name, 'exec')
+ return c
def dis(x=None):
- """Disassemble classes, methods, functions, or code.
+ """Disassemble classes, methods, functions, generators, or code.
With no argument, disassemble the last traceback.
@@ -22,30 +42,31 @@
if x is None:
distb()
return
- if isinstance(x, types.InstanceType):
- x = x.__class__
- if hasattr(x, 'im_func'):
- x = x.im_func
- if hasattr(x, 'func_code'):
- x = x.func_code
- if hasattr(x, 'co_code'): # PyCode needs co_code before __dict__
+ if hasattr(x, '__func__'): # Method
+ x = x.__func__
+ if hasattr(x, '__code__'): # Function
+ x = x.__code__
+ if hasattr(x, 'gi_code'): # Generator
+ x = x.gi_code
+ if hasattr(x, 'co_code'): # Code object
disassemble(x)
- elif hasattr(x, '__dict__'):
- items = x.__dict__.items()
- items.sort()
+ elif hasattr(x, '__dict__'): # Class or module
+ items = sorted(x.__dict__.items())
for name, x1 in items:
if isinstance(x1, _have_code):
- print "Disassembly of %s:" % name
+ print("Disassembly of %s:" % name)
try:
dis(x1)
except TypeError as msg:
- print "Sorry:", msg
- print
- elif isinstance(x, str):
- disassemble_string(x)
+ print("Sorry:", msg)
+ print()
+ elif isinstance(x, (bytes, bytearray)): # Raw bytecode
+ _disassemble_bytes(x)
+ elif isinstance(x, str): # Source code
+ _disassemble_str(x)
else:
- raise TypeError("don't know how to disassemble %s objects" % \
- type(x).__name__)
+ raise TypeError("don't know how to disassemble %s objects" %
+ type(x).__name__)
def distb(tb=None):
"""Disassemble a traceback (default: last traceback)."""
@@ -57,104 +78,290 @@
while tb.tb_next: tb = tb.tb_next
disassemble(tb.tb_frame.f_code, tb.tb_lasti)
+# The inspect module interrogates this dictionary to build its
+# list of CO_* constants. It is also used by pretty_flags to
+# turn the co_flags field into a human readable list.
+COMPILER_FLAG_NAMES = {
+ 1: "OPTIMIZED",
+ 2: "NEWLOCALS",
+ 4: "VARARGS",
+ 8: "VARKEYWORDS",
+ 16: "NESTED",
+ 32: "GENERATOR",
+ 64: "NOFREE",
+ 128: "COROUTINE",
+ 256: "ITERABLE_COROUTINE",
+ 512: "ASYNC_GENERATOR",
+}
+
+def pretty_flags(flags):
+ """Return pretty representation of code flags."""
+ names = []
+ for i in range(32):
+ flag = 1<<i
+ if flags & flag:
+ names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
+ flags ^= flag
+ if not flags:
+ break
+ else:
+ names.append(hex(flags))
+ return ", ".join(names)
+
+def _get_code_object(x):
+ """Helper to handle methods, functions, generators, strings and raw code
objects"""
+ if hasattr(x, '__func__'): # Method
+ x = x.__func__
+ if hasattr(x, '__code__'): # Function
+ x = x.__code__
+ if hasattr(x, 'gi_code'): # Generator
+ x = x.gi_code
+ if isinstance(x, str): # Source code
+ x = _try_compile(x, "<disassembly>")
+ if hasattr(x, 'co_code'): # Code object
+ return x
+ raise TypeError("don't know how to disassemble %s objects" %
+ type(x).__name__)
+
+def code_info(x):
+ """Formatted details of methods, functions, or code."""
+ return _format_code_info(_get_code_object(x))
+
+def _format_code_info(co):
+ lines = []
+ lines.append("Name: %s" % co.co_name)
+ lines.append("Filename: %s" % co.co_filename)
+ lines.append("Argument count: %s" % co.co_argcount)
+ lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
+ lines.append("Number of locals: %s" % co.co_nlocals)
+ lines.append("Stack size: %s" % co.co_stacksize)
+ lines.append("Flags: %s" % pretty_flags(co.co_flags))
+ if co.co_consts:
+ lines.append("Constants:")
+ for i_c in enumerate(co.co_consts):
+ lines.append("%4d: %r" % i_c)
+ if co.co_names:
+ lines.append("Names:")
+ for i_n in enumerate(co.co_names):
+ lines.append("%4d: %s" % i_n)
+ if co.co_varnames:
+ lines.append("Variable names:")
+ for i_n in enumerate(co.co_varnames):
+ lines.append("%4d: %s" % i_n)
+ if co.co_freevars:
+ lines.append("Free variables:")
+ for i_n in enumerate(co.co_freevars):
+ lines.append("%4d: %s" % i_n)
+ if co.co_cellvars:
+ lines.append("Cell variables:")
+ for i_n in enumerate(co.co_cellvars):
+ lines.append("%4d: %s" % i_n)
+ return "\n".join(lines)
+
+def show_code(co, file=None):
+ """Print details of methods, functions, or code to *file*.
+
+ If *file* is not provided, the output is printed on stdout.
+ """
+ print(code_info(co))
+
+_Instruction = collections.namedtuple("_Instruction",
+ "opname opcode arg argval argrepr offset starts_line is_jump_target")
+
+class Instruction(_Instruction):
+ """Details for a bytecode operation
+
+ Defined fields:
+ opname - human readable name for operation
+ opcode - numeric code for operation
+ arg - numeric argument to operation (if any), otherwise None
+ argval - resolved arg value (if known), otherwise same as arg
+ argrepr - human readable description of operation argument
+ offset - start index of operation within bytecode sequence
+ starts_line - line started by this opcode (if any), otherwise None
+ is_jump_target - True if other code jumps to here, otherwise False
+ """
+
+ def _disassemble(self, lineno_width=3, mark_as_current=False):
+ """Format instruction details for inclusion in disassembly output
+
+ *lineno_width* sets the width of the line number field (0 omits it)
+ *mark_as_current* inserts a '-->' marker arrow as part of the line
+ """
+ fields = []
+ # Column: Source code line number
+ if lineno_width:
+ if self.starts_line is not None:
+ lineno_fmt = "%%%dd" % lineno_width
+ fields.append(lineno_fmt % self.starts_line)
+ else:
+ fields.append(' ' * lineno_width)
+ # Column: Current instruction indicator
+ if mark_as_current:
+ fields.append('-->')
+ else:
+ fields.append(' ')
+ # Column: Jump target marker
+ if self.is_jump_target:
+ fields.append('>>')
+ else:
+ fields.append(' ')
+ # Column: Instruction offset from start of code sequence
+ fields.append(repr(self.offset).rjust(4))
+ # Column: Opcode name
+ fields.append(self.opname.ljust(20))
+ # Column: Opcode argument
+ if self.arg is not None:
+ fields.append(repr(self.arg).rjust(5))
+ # Column: Opcode argument details
+ if self.argrepr:
+ fields.append('(' + self.argrepr + ')')
+ return ' '.join(fields).rstrip()
+
+
+def get_instructions(x, first_line=None):
+ """Iterator for the opcodes in methods, functions or code
+
+ Generates a series of Instruction named tuples giving the details of
+ each operations in the supplied code.
+
+ If *first_line* is not None, it indicates the line number that should
+ be reported for the first source line in the disassembled code.
+ Otherwise, the source line information (if any) is taken directly from
+ the disassembled code object.
+ """
+ co = _get_code_object(x)
+ cell_names = co.co_cellvars + co.co_freevars
+ linestarts = dict(findlinestarts(co))
+ if first_line is not None:
+ line_offset = first_line - co.co_firstlineno
+ else:
+ line_offset = 0
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, cell_names, linestarts,
+ line_offset)
+
+def _get_const_info(const_index, const_list):
+ """Helper to get optional details about const references
+
+ Returns the dereferenced constant and its repr if the constant
+ list is defined.
+ Otherwise returns the constant index and its repr().
+ """
+ argval = const_index
+ if const_list is not None:
+ argval = const_list[const_index]
+ return argval, repr(argval)
+
+def _get_name_info(name_index, name_list):
+ """Helper to get optional details about named references
+
+ Returns the dereferenced name as both value and repr if the name
+ list is defined.
+ Otherwise returns the name index and its repr().
+ """
+ argval = name_index
+ if name_list is not None:
+ argval = name_list[name_index]
+ argrepr = argval
+ else:
+ argrepr = repr(argval)
+ return argval, argrepr
+
+
+def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
+ cells=None, linestarts=None, line_offset=0):
+ """Iterate over the instructions in a bytecode string.
+
+ Generates a sequence of Instruction namedtuples giving the details of each
+ opcode. Additional information about the code's runtime environment
+ (e.g. variable names, constants) can be specified using optional
+ arguments.
+
+ """
+ labels = findlabels(code)
+ starts_line = None
+ for offset, op, arg in _unpack_opargs(code):
+ if linestarts is not None:
+ starts_line = linestarts.get(offset, None)
+ if starts_line is not None:
+ starts_line += line_offset
+ is_jump_target = offset in labels
+ argval = None
+ argrepr = ''
+ if arg is not None:
+ # Set argval to the dereferenced value of the argument when
+ # available, and argrepr to the string representation of argval.
+ # _disassemble_bytes needs the string repr of the
+ # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
+ argval = arg
+ if op in hasconst:
+ argval, argrepr = _get_const_info(arg, constants)
+ elif op in hasname:
+ argval, argrepr = _get_name_info(arg, names)
+ elif op in hasjrel:
+ argval = offset + 2 + arg
+ argrepr = "to " + repr(argval)
+ elif op in haslocal:
+ argval, argrepr = _get_name_info(arg, varnames)
+ elif op in hascompare:
+ argval = cmp_op[arg]
+ argrepr = argval
+ elif op in hasfree:
+ argval, argrepr = _get_name_info(arg, cells)
+ elif op == FORMAT_VALUE:
+ argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
+ argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
+ if argval[1]:
+ if argrepr:
+ argrepr += ', '
+ argrepr += 'with format'
+ yield Instruction(opname[op], op,
+ arg, argval, argrepr,
+ offset, starts_line, is_jump_target)
+
def disassemble(co, lasti=-1):
"""Disassemble a code object."""
- XXX
- code = co.co_code
- labels = findlabels(code)
+ cell_names = co.co_cellvars + co.co_freevars
linestarts = dict(findlinestarts(co))
- n = len(code)
- i = 0
- extended_arg = 0
- free = None
- while i < n:
- c = code[i]
- op = ord(c)
- if i in linestarts:
- if i > 0:
- print
- print "%3d" % linestarts[i],
- else:
- print ' ',
+ _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
+ co.co_consts, cell_names, linestarts)
- if i == lasti: print '-->',
- else: print ' ',
- if i in labels: print '>>',
- else: print ' ',
- print repr(i).rjust(4),
- print opname[op].ljust(20),
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
- extended_arg = 0
- i = i+2
- if op == EXTENDED_ARG:
- extended_arg = oparg*65536L
- print repr(oparg).rjust(5),
- if op in hasconst:
- print '(' + repr(co.co_consts[oparg]) + ')',
- elif op in hasname:
- print '(' + co.co_names[oparg] + ')',
- elif op in hasjrel:
- print '(to ' + repr(i + oparg) + ')',
- elif op in haslocal:
- print '(' + co.co_varnames[oparg] + ')',
- elif op in hascompare:
- print '(' + cmp_op[oparg] + ')',
- elif op in hasfree:
- if free is None:
- free = co.co_cellvars + co.co_freevars
- print '(' + free[oparg] + ')',
- elif op in hasnargs:
- print '(%d positional, %d keyword pair)' % \
- (ord(code[i-2]), ord(code[i-1])),
- print
+def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
+ constants=None, cells=None, linestarts=None,
+ line_offset=0):
+ # Omit the line number column entirely if we have no line number info
+ show_lineno = linestarts is not None
+ # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
+ lineno_width = 3 if show_lineno else 0
+ for instr in _get_instructions_bytes(code, varnames, names,
+ constants, cells, linestarts,
+ line_offset=line_offset):
+ new_source_line = (show_lineno and
+ instr.starts_line is not None and
+ instr.offset > 0)
+ if new_source_line:
+ print()
+ is_current_instr = instr.offset == lasti
+ print(instr._disassemble(lineno_width, is_current_instr))
-def disassemble_string(code, lasti=-1, varnames=None, names=None,
- constants=None):
- labels = findlabels(code)
- n = len(code)
- i = 0
- while i < n:
- c = code[i]
- op = ord(c)
- if i == lasti: print '-->',
- else: print ' ',
- if i in labels: print '>>',
- else: print ' ',
- print repr(i).rjust(4),
- print opname[op].ljust(15),
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256
- i = i+2
- print repr(oparg).rjust(5),
- if op in hasconst:
- if constants:
- print '(' + repr(constants[oparg]) + ')',
- else:
- print '(%d)'%oparg,
- elif op in hasname:
- if names is not None:
- print '(' + names[oparg] + ')',
- else:
- print '(%d)'%oparg,
- elif op in hasjrel:
- print '(to ' + repr(i + oparg) + ')',
- elif op in haslocal:
- if varnames:
- print '(' + varnames[oparg] + ')',
- else:
- print '(%d)' % oparg,
- elif op in hascompare:
- print '(' + cmp_op[oparg] + ')',
- elif op in hasnargs:
- print '(%d positional, %d keyword pair)' % \
- (ord(code[i-2]), ord(code[i-1])),
- print
+def _disassemble_str(source):
+ """Compile the source string, then disassemble the code object."""
+ disassemble(_try_compile(source, '<dis>'))
disco = disassemble # XXX For backwards compatibility
+def _unpack_opargs(code):
+ extended_arg = 0
+ for i in range(0, len(code), 2):
+ op = ord(code[i])
+ if op >= HAVE_ARGUMENT:
+ arg = ord(code[i+1]) | extended_arg
+ extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
+ else:
+ arg = None
+ yield (i, op, arg)
+
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
@@ -162,23 +369,16 @@
"""
labels = []
- n = len(code)
- i = 0
- while i < n:
- c = code[i]
- op = ord(c)
- i = i+1
- if op >= HAVE_ARGUMENT:
- oparg = ord(code[i]) + ord(code[i+1])*256
- i = i+2
- label = -1
+ for offset, op, arg in _unpack_opargs(code):
+ if arg is not None:
if op in hasjrel:
- label = i+oparg
+ label = offset + 2 + arg
elif op in hasjabs:
- label = oparg
- if label >= 0:
- if label not in labels:
- labels.append(label)
+ label = arg
+ else:
+ continue
+ if label not in labels:
+ labels.append(label)
return labels
def findlinestarts(code):
@@ -187,8 +387,8 @@
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
- byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
- line_increments = [ord(c) for c in code.co_lnotab[1::2]]
+ byte_increments = code.co_lnotab[0::2]
+ line_increments = code.co_lnotab[1::2]
lastlineno = None
lineno = code.co_firstlineno
@@ -206,27 +406,77 @@
if lineno != lastlineno:
yield (addr, lineno)
+class Bytecode:
+ """The bytecode operations of a piece of code
+
+ Instantiate this with a function, method, string of code, or a code object
+ (as returned by compile()).
+
+ Iterating over this yields the bytecode operations as Instruction
instances.
+ """
+ def __init__(self, x, first_line=None, current_offset=None):
+ self.codeobj = co = _get_code_object(x)
+ if first_line is None:
+ self.first_line = co.co_firstlineno
+ self._line_offset = 0
+ else:
+ self.first_line = first_line
+ self._line_offset = first_line - co.co_firstlineno
+ self._cell_names = co.co_cellvars + co.co_freevars
+ self._linestarts = dict(findlinestarts(co))
+ self._original_object = x
+ self.current_offset = current_offset
+
+ def __iter__(self):
+ co = self.codeobj
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, self._cell_names,
+ self._linestarts,
+ line_offset=self._line_offset)
+
+ def __repr__(self):
+ return "{}({!r})".format(self.__class__.__name__,
+ self._original_object)
+
+ @classmethod
+ def from_traceback(cls, tb):
+ """ Construct a Bytecode from the given traceback """
+ while tb.tb_next:
+ tb = tb.tb_next
+ return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
+
+ def info(self):
+ """Return formatted information about the code object."""
+ return _format_code_info(self.codeobj)
+
+ def dis(self):
+ """Return a formatted view of the bytecode operations."""
+ co = self.codeobj
+ if self.current_offset is not None:
+ offset = self.current_offset
+ else:
+ offset = -1
+ with io.StringIO() as output:
+ _disassemble_bytes(co.co_code, varnames=co.co_varnames,
+ names=co.co_names, constants=co.co_consts,
+ cells=self._cell_names,
+ linestarts=self._linestarts,
+ line_offset=self._line_offset,
+ file=output,
+ lasti=offset)
+ return output.getvalue()
+
+
def _test():
"""Simple test program to disassemble a file."""
- if sys.argv[1:]:
- if sys.argv[2:]:
- sys.stderr.write("usage: python dis.py [-|file]\n")
- sys.exit(2)
- fn = sys.argv[1]
- if not fn or fn == "-":
- fn = None
- else:
- fn = None
- if fn is None:
- f = sys.stdin
- else:
- f = open(fn)
- source = f.read()
- if fn is not None:
- f.close()
- else:
- fn = "<stdin>"
- code = compile(source, fn, "exec")
+ import argparse
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('infile', type=argparse.FileType(), nargs='?',
default='-')
+ args = parser.parse_args()
+ with args.infile as infile:
+ source = infile.read()
+ code = compile(source, args.infile.name, "exec")
dis(code)
if __name__ == "__main__":
diff --git a/pypy/tool/opcode3.py b/pypy/tool/opcode3.py
--- a/pypy/tool/opcode3.py
+++ b/pypy/tool/opcode3.py
@@ -2,10 +2,8 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
-"Backported" from Python 3 to Python 2 land - an excact copy of
lib-python/3/opcode.py
"""
-
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG", "hasnargs"]
@@ -33,10 +31,12 @@
haslocal = []
hascompare = []
hasfree = []
-hasnargs = [] # unused
+hasnargs = []
opmap = {}
-opname = ['<%r>' % (op,) for op in range(256)]
+opname = [''] * 256
+for op in range(256): opname[op] = '<%r>' % (op,)
+del op
def def_op(name, op):
opname[op] = name
@@ -174,9 +174,11 @@
name_op('STORE_ANNOTATION', 127) # Index in name list
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
-def_op('CALL_FUNCTION', 131) # #args
-def_op('MAKE_FUNCTION', 132) # Flags
+def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
+hasnargs.append(131)
+def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
+def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
@@ -186,8 +188,12 @@
def_op('DELETE_DEREF', 138)
hasfree.append(138)
-def_op('CALL_FUNCTION_KW', 141) # #args + #kwargs
-def_op('CALL_FUNCTION_EX', 142) # Flags
+def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
+hasnargs.append(140)
+def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
+hasnargs.append(141)
+def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
+hasnargs.append(142)
jrel_op('SETUP_WITH', 143)
@@ -198,6 +204,8 @@
def_op('LOAD_CLASSDEREF', 148)
hasfree.append(148)
+jrel_op('SETUP_ASYNC_WITH', 154)
+
def_op('EXTENDED_ARG', 144)
EXTENDED_ARG = 144
@@ -207,12 +215,8 @@
def_op('BUILD_TUPLE_UNPACK', 152)
def_op('BUILD_SET_UNPACK', 153)
-jrel_op('SETUP_ASYNC_WITH', 154)
-
-def_op('FORMAT_VALUE', 155)
-def_op('BUILD_CONST_KEY_MAP', 156)
-def_op('BUILD_STRING', 157)
-def_op('BUILD_TUPLE_UNPACK_WITH_CALL', 158)
+def_op('FORMAT_VALUE', 155) # in CPython 3.6, but available in PyPy from 3.5
+def_op('BUILD_STRING', 157) # in CPython 3.6, but available in PyPy from 3.5
# pypy modification, experimental bytecode
def_op('LOOKUP_METHOD', 201) # Index in name list
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit