This is an automated email from the ASF dual-hosted git repository. jrmccluskey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new fad7f96dc85 Add support for CPython 3.11 opcodes (#25795) fad7f96dc85 is described below commit fad7f96dc85047eb1a01536f817565d6f959ee35 Author: Jack McCluskey <34928439+jrmcclus...@users.noreply.github.com> AuthorDate: Fri Mar 17 16:14:44 2023 -0400 Add support for CPython 3.11 opcodes (#25795) * Add support for CPython 3.11 opcodes * Remove unused utility function * Uncomment test * Drop camel case form * Add addition documentation comments --- sdks/python/apache_beam/typehints/opcodes.py | 36 +++++- .../apache_beam/typehints/trivial_inference.py | 129 +++++++++++++++++++-- 2 files changed, 151 insertions(+), 14 deletions(-) diff --git a/sdks/python/apache_beam/typehints/opcodes.py b/sdks/python/apache_beam/typehints/opcodes.py index 1ac3538c691..5a35b56b932 100644 --- a/sdks/python/apache_beam/typehints/opcodes.py +++ b/sdks/python/apache_beam/typehints/opcodes.py @@ -178,7 +178,9 @@ binary_lshift = inplace_lshift = binary_rshift = inplace_rshift = pop_top binary_and = inplace_and = symmetric_binary_op binary_xor = inplace_xor = symmetric_binary_op -binary_or = inpalce_or = symmetric_binary_op +binary_or = inplace_or = symmetric_binary_op + +binary_op = symmetric_binary_op def store_subscr(unused_state, unused_args): @@ -403,6 +405,8 @@ import_from = push_value(Any) def load_global(state, arg): + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + arg = arg >> 1 state.stack.append(state.get_global(arg)) @@ -428,10 +432,18 @@ def gen_start(state, arg): def load_closure(state, arg): + # The arg is no longer offset by len(covar_names) as of 3.11 + # See https://docs.python.org/3/library/dis.html#opcode-LOAD_CLOSURE + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + arg -= len(state.co.co_varnames) state.stack.append(state.get_closure(arg)) def load_deref(state, arg): + # The arg is no longer offset by len(covar_names) as of 3.11 + # See https://docs.python.org/3/library/dis.html#opcode-LOAD_DEREF + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + arg -= len(state.co.co_varnames) state.stack.append(state.closure_type(arg)) @@ -440,9 +452,19 @@ def make_function(state, arg): """ # TODO(luke-zhu): Handle default argument types globals = state.f.__globals__ # Inherits globals from the current frame - func_name = state.stack[-1].value - func_code = state.stack[-2].value - pop_count = 2 + tos = state.stack[-1].value + # In Python 3.11 lambdas no longer have fully qualified names on the stack, + # so we check for this case (AKA the code is top of stack.) + if isinstance(tos, types.CodeType): + func_name = None + func_code = tos + pop_count = 1 + is_lambda = True + else: + func_name = tos + func_code = state.stack[-2].value + pop_count = 2 + is_lambda = False closure = None # arg contains flags, with corresponding stack values if positive. # https://docs.python.org/3.6/library/dis.html#opcode-MAKE_FUNCTION @@ -450,8 +472,12 @@ def make_function(state, arg): if arg & 0x08: # Convert types in Tuple constraint to a tuple of CPython cells. # https://stackoverflow.com/a/44670295 + if is_lambda: + closureTuplePos = -2 + else: + closureTuplePos = -3 closure = tuple((lambda _: lambda: _)(t).__closure__[0] - for t in state.stack[-3].tuple_types) + for t in state.stack[closureTuplePos].tuple_types) func = types.FunctionType(func_code, globals, name=func_name, closure=closure) diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index f69a87192e3..f4b350e8f05 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -126,11 +126,12 @@ class Const(object): class FrameState(object): """Stores the state of the frame at a particular point of execution. """ - def __init__(self, f, local_vars=None, stack=()): + def __init__(self, f, local_vars=None, stack=(), kw_names=None): self.f = f self.co = f.__code__ self.vars = list(local_vars) self.stack = list(stack) + self.kw_names = kw_names def __eq__(self, other): return isinstance(other, FrameState) and self.__dict__ == other.__dict__ @@ -139,7 +140,7 @@ class FrameState(object): return hash(tuple(sorted(self.__dict__.items()))) def copy(self): - return FrameState(self.f, self.vars, self.stack) + return FrameState(self.f, self.vars, self.stack, self.kw_names) def const_type(self, i): return Const(self.co.co_consts[i]) @@ -352,7 +353,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): if debug: print() print(f, id(f), input_types) - dis.dis(f) + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + dis.dis(f, show_caches=True) + else: + dis.dis(f) from . import opcodes simple_ops = dict((k.upper(), v) for k, v in opcodes.__dict__.items()) @@ -374,7 +378,12 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): # In Python 3, use dis library functions to disassemble bytecode and handle # EXTENDED_ARGs. ofs_table = {} # offset -> instruction - for instruction in dis.get_instructions(f): + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + dis_ints = dis.get_instructions(f, show_caches=True) + else: + dis_ints = dis.get_instructions(f) + + for instruction in dis_ints: ofs_table[instruction.offset] = instruction # Python 3.6+: 1 byte opcode + 1 byte arg (2 bytes, arg may be ignored). @@ -384,7 +393,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): # Python 3.10: bpo-27129 changes jump offsets to use instruction offsets, # not byte offsets. The offsets were halved (16 bits fro instructions vs 8 # bits for bytes), so we have to double the value of arg. - if (sys.version_info.major, sys.version_info.minor) == (3, 10): + if (sys.version_info.major, sys.version_info.minor) >= (3, 10): jump_multiplier = 2 else: jump_multiplier = 1 @@ -400,6 +409,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): print(dis.opname[op].ljust(20), end=' ') pc += inst_size + arg = None if op >= dis.HAVE_ARGUMENT: arg = instruction.arg pc += opt_arg_size @@ -408,9 +418,14 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): if op in dis.hasconst: print('(' + repr(co.co_consts[arg]) + ')', end=' ') elif op in dis.hasname: - print('(' + co.co_names[arg] + ')', end=' ') + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + # Pre-emptively bit-shift so the print doesn't go out of index + print_arg = arg >> 1 + else: + print_arg = arg + print('(' + co.co_names[print_arg] + ')', end=' ') elif op in dis.hasjrel: - print('(to ' + repr(pc + arg) + ')', end=' ') + print('(to ' + repr(pc + (arg * jump_multiplier)) + ')', end=' ') elif op in dis.haslocal: print('(' + co.co_varnames[arg] + ')', end=' ') elif op in dis.hascompare: @@ -418,7 +433,12 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): elif op in dis.hasfree: if free is None: free = co.co_cellvars + co.co_freevars - print('(' + free[arg] + ')', end=' ') + # From 3.11 on the arg is no longer offset by len(co_varnames) + # so we adjust it back + print_arg = arg + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + print_arg = arg - len(co.co_varnames) + print('(' + free[print_arg] + ')', end=' ') # Actually emulate the op. if state is None and states[start] is None: @@ -498,6 +518,40 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): else: return_type = typehints.Any state.stack[-pop_count:] = [return_type] + elif opname == 'CALL': + pop_count = 1 + arg + # Keyword Args case + if state.kw_names is not None: + if isinstance(state.stack[-pop_count], Const): + from apache_beam.pvalue import Row + if state.stack[-pop_count].value == Row: + fields = state.kw_names + return_type = row_type.RowTypeConstraint.from_fields( + list( + zip(fields, + Const.unwrap_all(state.stack[-pop_count + 1:])))) + else: + return_type = Any + state.kw_names = None + else: + # Handle lambdas always having an arg of 0 for CALL + # See https://github.com/python/cpython/issues/102403 for context. + if pop_count == 1: + while pop_count <= len(state.stack): + if isinstance(state.stack[-pop_count], Const): + break + pop_count += 1 + if depth <= 0 or pop_count > len(state.stack): + return_type = Any + elif isinstance(state.stack[-pop_count], Const): + return_type = infer_return_type( + state.stack[-pop_count].value, + state.stack[1 - pop_count:], + debug=debug, + depth=depth - 1) + else: + return_type = Any + state.stack[-pop_count:] = [return_type] elif opname in simple_ops: if debug: print("Executing simple op " + opname) @@ -511,6 +565,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): jmp = pc + arg * jump_multiplier jmp_state = state state = None + elif opname in ('JUMP_BACKWARD', 'JUMP_BACKWARD_NO_INTERRUPT'): + jmp = pc - (arg * jump_multiplier) + jmp_state = state + state = None elif opname == 'JUMP_ABSOLUTE': jmp = arg * jump_multiplier jmp_state = state @@ -519,8 +577,30 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): state.stack.pop() jmp = arg * jump_multiplier jmp_state = state.copy() + elif opname in ('POP_JUMP_FORWARD_IF_TRUE', 'POP_JUMP_FORWARD_IF_FALSE'): + state.stack.pop() + jmp = pc + arg * jump_multiplier + jmp_state = state.copy() + elif opname in ('POP_JUMP_BACKWARD_IF_TRUE', 'POP_JUMP_BACKWARD_IF_FALSE'): + state.stack.pop() + jmp = pc - (arg * jump_multiplier) + jmp_state = state.copy() + elif opname in ('POP_JUMP_FORWARD_IF_NONE', 'POP_JUMP_FORWARD_IF_NOT_NONE'): + state.stack.pop() + jmp = pc + arg * jump_multiplier + jmp_state = state.copy() + elif opname in ('POP_JUMP_BACKWARD_IF_NONE', + 'POP_JUMP_BACKWARD_IF_NOT_NONE'): + state.stack.pop() + jmp = pc - (arg * jump_multiplier) + jmp_state = state.copy() elif opname in ('JUMP_IF_TRUE_OR_POP', 'JUMP_IF_FALSE_OR_POP'): - jmp = arg * jump_multiplier + # The arg was changed to be a relative delta instead of an absolute + # in 3.11 + if (sys.version_info.major, sys.version_info.minor) >= (3, 11): + jmp = pc + arg * jump_multiplier + else: + jmp = arg * jump_multiplier jmp_state = state.copy() state.stack.pop() elif opname == 'FOR_ITER': @@ -528,6 +608,37 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): jmp_state = state.copy() jmp_state.stack.pop() state.stack.append(element_type(state.stack[-1])) + elif opname == 'COPY_FREE_VARS': + # Helps with calling closures, but since we aren't executing + # them we can treat this as a no-op + pass + elif opname == 'KW_NAMES': + tup = co.co_consts[arg] + state.kw_names = tup + elif opname == 'RESUME': + # RESUME is a no-op + pass + elif opname == 'PUSH_NULL': + # We're treating this as a no-op to avoid having to check + # for extra None values on the stack when we extract return + # values + pass + elif opname == 'PRECALL': + # PRECALL is a no-op. + pass + elif opname == 'MAKE_CELL': + # TODO: see if we need to implement cells like this + pass + elif opname == 'RETURN_GENERATOR': + # TODO: see what this behavior is supposed to be beyond + # putting something on the stack to be popped off + state.stack.append(None) + pass + elif opname == 'CACHE': + # No-op introduced in 3.11. Without handling this some + # instructions have functionally > 2 byte size. + pass + else: raise TypeInferenceError('unable to handle %s' % opname)