This is an automated email from the ASF dual-hosted git repository.

jrmccluskey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new fad7f96dc85 Add support for CPython 3.11 opcodes (#25795)
fad7f96dc85 is described below

commit fad7f96dc85047eb1a01536f817565d6f959ee35
Author: Jack McCluskey <34928439+jrmcclus...@users.noreply.github.com>
AuthorDate: Fri Mar 17 16:14:44 2023 -0400

    Add support for CPython 3.11 opcodes (#25795)
    
    * Add support for CPython 3.11 opcodes
    
    * Remove unused utility function
    
    * Uncomment test
    
    * Drop camel case form
    
    * Add addition documentation  comments
---
 sdks/python/apache_beam/typehints/opcodes.py       |  36 +++++-
 .../apache_beam/typehints/trivial_inference.py     | 129 +++++++++++++++++++--
 2 files changed, 151 insertions(+), 14 deletions(-)

diff --git a/sdks/python/apache_beam/typehints/opcodes.py 
b/sdks/python/apache_beam/typehints/opcodes.py
index 1ac3538c691..5a35b56b932 100644
--- a/sdks/python/apache_beam/typehints/opcodes.py
+++ b/sdks/python/apache_beam/typehints/opcodes.py
@@ -178,7 +178,9 @@ binary_lshift = inplace_lshift = binary_rshift = 
inplace_rshift = pop_top
 
 binary_and = inplace_and = symmetric_binary_op
 binary_xor = inplace_xor = symmetric_binary_op
-binary_or = inpalce_or = symmetric_binary_op
+binary_or = inplace_or = symmetric_binary_op
+
+binary_op = symmetric_binary_op
 
 
 def store_subscr(unused_state, unused_args):
@@ -403,6 +405,8 @@ import_from = push_value(Any)
 
 
 def load_global(state, arg):
+  if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+    arg = arg >> 1
   state.stack.append(state.get_global(arg))
 
 
@@ -428,10 +432,18 @@ def gen_start(state, arg):
 
 
 def load_closure(state, arg):
+  # The arg is no longer offset by len(covar_names) as of 3.11
+  # See https://docs.python.org/3/library/dis.html#opcode-LOAD_CLOSURE
+  if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+    arg -= len(state.co.co_varnames)
   state.stack.append(state.get_closure(arg))
 
 
 def load_deref(state, arg):
+  # The arg is no longer offset by len(covar_names) as of 3.11
+  # See https://docs.python.org/3/library/dis.html#opcode-LOAD_DEREF
+  if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+    arg -= len(state.co.co_varnames)
   state.stack.append(state.closure_type(arg))
 
 
@@ -440,9 +452,19 @@ def make_function(state, arg):
   """
   # TODO(luke-zhu): Handle default argument types
   globals = state.f.__globals__  # Inherits globals from the current frame
-  func_name = state.stack[-1].value
-  func_code = state.stack[-2].value
-  pop_count = 2
+  tos = state.stack[-1].value
+  # In Python 3.11 lambdas no longer have fully qualified names on the stack,
+  # so we check for this case (AKA the code is top of stack.)
+  if isinstance(tos, types.CodeType):
+    func_name = None
+    func_code = tos
+    pop_count = 1
+    is_lambda = True
+  else:
+    func_name = tos
+    func_code = state.stack[-2].value
+    pop_count = 2
+    is_lambda = False
   closure = None
   # arg contains flags, with corresponding stack values if positive.
   # https://docs.python.org/3.6/library/dis.html#opcode-MAKE_FUNCTION
@@ -450,8 +472,12 @@ def make_function(state, arg):
   if arg & 0x08:
     # Convert types in Tuple constraint to a tuple of CPython cells.
     # https://stackoverflow.com/a/44670295
+    if is_lambda:
+      closureTuplePos = -2
+    else:
+      closureTuplePos = -3
     closure = tuple((lambda _: lambda: _)(t).__closure__[0]
-                    for t in state.stack[-3].tuple_types)
+                    for t in state.stack[closureTuplePos].tuple_types)
 
   func = types.FunctionType(func_code, globals, name=func_name, 
closure=closure)
 
diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py 
b/sdks/python/apache_beam/typehints/trivial_inference.py
index f69a87192e3..f4b350e8f05 100644
--- a/sdks/python/apache_beam/typehints/trivial_inference.py
+++ b/sdks/python/apache_beam/typehints/trivial_inference.py
@@ -126,11 +126,12 @@ class Const(object):
 class FrameState(object):
   """Stores the state of the frame at a particular point of execution.
   """
-  def __init__(self, f, local_vars=None, stack=()):
+  def __init__(self, f, local_vars=None, stack=(), kw_names=None):
     self.f = f
     self.co = f.__code__
     self.vars = list(local_vars)
     self.stack = list(stack)
+    self.kw_names = kw_names
 
   def __eq__(self, other):
     return isinstance(other, FrameState) and self.__dict__ == other.__dict__
@@ -139,7 +140,7 @@ class FrameState(object):
     return hash(tuple(sorted(self.__dict__.items())))
 
   def copy(self):
-    return FrameState(self.f, self.vars, self.stack)
+    return FrameState(self.f, self.vars, self.stack, self.kw_names)
 
   def const_type(self, i):
     return Const(self.co.co_consts[i])
@@ -352,7 +353,10 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
   if debug:
     print()
     print(f, id(f), input_types)
-    dis.dis(f)
+    if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+      dis.dis(f, show_caches=True)
+    else:
+      dis.dis(f)
   from . import opcodes
   simple_ops = dict((k.upper(), v) for k, v in opcodes.__dict__.items())
 
@@ -374,7 +378,12 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
   # In Python 3, use dis library functions to disassemble bytecode and handle
   # EXTENDED_ARGs.
   ofs_table = {}  # offset -> instruction
-  for instruction in dis.get_instructions(f):
+  if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+    dis_ints = dis.get_instructions(f, show_caches=True)
+  else:
+    dis_ints = dis.get_instructions(f)
+
+  for instruction in dis_ints:
     ofs_table[instruction.offset] = instruction
 
   # Python 3.6+: 1 byte opcode + 1 byte arg (2 bytes, arg may be ignored).
@@ -384,7 +393,7 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
   # Python 3.10: bpo-27129 changes jump offsets to use instruction offsets,
   # not byte offsets. The offsets were halved (16 bits fro instructions vs 8
   # bits for bytes), so we have to double the value of arg.
-  if (sys.version_info.major, sys.version_info.minor) == (3, 10):
+  if (sys.version_info.major, sys.version_info.minor) >= (3, 10):
     jump_multiplier = 2
   else:
     jump_multiplier = 1
@@ -400,6 +409,7 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
       print(dis.opname[op].ljust(20), end=' ')
 
     pc += inst_size
+    arg = None
     if op >= dis.HAVE_ARGUMENT:
       arg = instruction.arg
       pc += opt_arg_size
@@ -408,9 +418,14 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
         if op in dis.hasconst:
           print('(' + repr(co.co_consts[arg]) + ')', end=' ')
         elif op in dis.hasname:
-          print('(' + co.co_names[arg] + ')', end=' ')
+          if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+            # Pre-emptively bit-shift so the print doesn't go out of index
+            print_arg = arg >> 1
+          else:
+            print_arg = arg
+          print('(' + co.co_names[print_arg] + ')', end=' ')
         elif op in dis.hasjrel:
-          print('(to ' + repr(pc + arg) + ')', end=' ')
+          print('(to ' + repr(pc + (arg * jump_multiplier)) + ')', end=' ')
         elif op in dis.haslocal:
           print('(' + co.co_varnames[arg] + ')', end=' ')
         elif op in dis.hascompare:
@@ -418,7 +433,12 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
         elif op in dis.hasfree:
           if free is None:
             free = co.co_cellvars + co.co_freevars
-          print('(' + free[arg] + ')', end=' ')
+          # From 3.11 on the arg is no longer offset by len(co_varnames)
+          # so we adjust it back
+          print_arg = arg
+          if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+            print_arg = arg - len(co.co_varnames)
+          print('(' + free[print_arg] + ')', end=' ')
 
     # Actually emulate the op.
     if state is None and states[start] is None:
@@ -498,6 +518,40 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
       else:
         return_type = typehints.Any
       state.stack[-pop_count:] = [return_type]
+    elif opname == 'CALL':
+      pop_count = 1 + arg
+      # Keyword Args case
+      if state.kw_names is not None:
+        if isinstance(state.stack[-pop_count], Const):
+          from apache_beam.pvalue import Row
+          if state.stack[-pop_count].value == Row:
+            fields = state.kw_names
+            return_type = row_type.RowTypeConstraint.from_fields(
+                list(
+                    zip(fields,
+                        Const.unwrap_all(state.stack[-pop_count + 1:]))))
+          else:
+            return_type = Any
+        state.kw_names = None
+      else:
+        # Handle lambdas always having an arg of 0 for CALL
+        # See https://github.com/python/cpython/issues/102403 for context.
+        if pop_count == 1:
+          while pop_count <= len(state.stack):
+            if isinstance(state.stack[-pop_count], Const):
+              break
+            pop_count += 1
+        if depth <= 0 or pop_count > len(state.stack):
+          return_type = Any
+        elif isinstance(state.stack[-pop_count], Const):
+          return_type = infer_return_type(
+              state.stack[-pop_count].value,
+              state.stack[1 - pop_count:],
+              debug=debug,
+              depth=depth - 1)
+        else:
+          return_type = Any
+      state.stack[-pop_count:] = [return_type]
     elif opname in simple_ops:
       if debug:
         print("Executing simple op " + opname)
@@ -511,6 +565,10 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
       jmp = pc + arg * jump_multiplier
       jmp_state = state
       state = None
+    elif opname in ('JUMP_BACKWARD', 'JUMP_BACKWARD_NO_INTERRUPT'):
+      jmp = pc - (arg * jump_multiplier)
+      jmp_state = state
+      state = None
     elif opname == 'JUMP_ABSOLUTE':
       jmp = arg * jump_multiplier
       jmp_state = state
@@ -519,8 +577,30 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
       state.stack.pop()
       jmp = arg * jump_multiplier
       jmp_state = state.copy()
+    elif opname in ('POP_JUMP_FORWARD_IF_TRUE', 'POP_JUMP_FORWARD_IF_FALSE'):
+      state.stack.pop()
+      jmp = pc + arg * jump_multiplier
+      jmp_state = state.copy()
+    elif opname in ('POP_JUMP_BACKWARD_IF_TRUE', 'POP_JUMP_BACKWARD_IF_FALSE'):
+      state.stack.pop()
+      jmp = pc - (arg * jump_multiplier)
+      jmp_state = state.copy()
+    elif opname in ('POP_JUMP_FORWARD_IF_NONE', 
'POP_JUMP_FORWARD_IF_NOT_NONE'):
+      state.stack.pop()
+      jmp = pc + arg * jump_multiplier
+      jmp_state = state.copy()
+    elif opname in ('POP_JUMP_BACKWARD_IF_NONE',
+                    'POP_JUMP_BACKWARD_IF_NOT_NONE'):
+      state.stack.pop()
+      jmp = pc - (arg * jump_multiplier)
+      jmp_state = state.copy()
     elif opname in ('JUMP_IF_TRUE_OR_POP', 'JUMP_IF_FALSE_OR_POP'):
-      jmp = arg * jump_multiplier
+      # The arg was changed to be a relative delta instead of an absolute
+      # in 3.11
+      if (sys.version_info.major, sys.version_info.minor) >= (3, 11):
+        jmp = pc + arg * jump_multiplier
+      else:
+        jmp = arg * jump_multiplier
       jmp_state = state.copy()
       state.stack.pop()
     elif opname == 'FOR_ITER':
@@ -528,6 +608,37 @@ def infer_return_type_func(f, input_types, debug=False, 
depth=0):
       jmp_state = state.copy()
       jmp_state.stack.pop()
       state.stack.append(element_type(state.stack[-1]))
+    elif opname == 'COPY_FREE_VARS':
+      # Helps with calling closures, but since we aren't executing
+      # them we can treat this as a no-op
+      pass
+    elif opname == 'KW_NAMES':
+      tup = co.co_consts[arg]
+      state.kw_names = tup
+    elif opname == 'RESUME':
+      # RESUME is a no-op
+      pass
+    elif opname == 'PUSH_NULL':
+      # We're treating this as a no-op to avoid having to check
+      # for extra None values on the stack when we extract return
+      # values
+      pass
+    elif opname == 'PRECALL':
+      # PRECALL is a no-op.
+      pass
+    elif opname == 'MAKE_CELL':
+      # TODO: see if we need to implement cells like this
+      pass
+    elif opname == 'RETURN_GENERATOR':
+      # TODO: see what this behavior is supposed to be beyond
+      # putting something on the stack to be popped off
+      state.stack.append(None)
+      pass
+    elif opname == 'CACHE':
+      # No-op introduced in 3.11. Without handling this some
+      # instructions have functionally > 2 byte size.
+      pass
+
     else:
       raise TypeInferenceError('unable to handle %s' % opname)
 

Reply via email to