https://github.com/kastiglione updated 
https://github.com/llvm/llvm-project/pull/113734

>From 57223942e91c47d0a61b148a65247cd9cbb16496 Mon Sep 17 00:00:00 2001
From: Dave Lee <[email protected]>
Date: Fri, 25 Oct 2024 12:56:00 -0700
Subject: [PATCH 1/3] [lldb] Proof of concept data formatter compiler for
 Python

---
 .../formatter-bytecode/optional_summary.py    |  14 ++
 .../formatter-bytecode/python_to_assembly.py  | 145 ++++++++++++++++++
 2 files changed, 159 insertions(+)
 create mode 100644 lldb/examples/formatter-bytecode/optional_summary.py
 create mode 100755 lldb/examples/formatter-bytecode/python_to_assembly.py

diff --git a/lldb/examples/formatter-bytecode/optional_summary.py 
b/lldb/examples/formatter-bytecode/optional_summary.py
new file mode 100644
index 0000000000000..68e672d86613d
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/optional_summary.py
@@ -0,0 +1,14 @@
+def OptionalSummaryProvider(valobj, _):
+    failure = 2
+    storage = valobj.GetChildMemberWithName("Storage")
+    hasVal = 
storage.GetChildMemberWithName("hasVal").GetValueAsUnsigned(failure)
+    if hasVal == failure:
+        return "<could not read Optional>"
+
+    if hasVal == 0:
+        return "None"
+
+    underlying_type = storage.GetType().GetTemplateArgumentType(0)
+    value = storage.GetChildMemberWithName("value")
+    value = value.Cast(underlying_type)
+    return value.GetSummary()
diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py 
b/lldb/examples/formatter-bytecode/python_to_assembly.py
new file mode 100755
index 0000000000000..6e2adbe093fda
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/python_to_assembly.py
@@ -0,0 +1,145 @@
+#!/usr/bin/python3
+
+import ast
+import io
+import sys
+from typing import Any
+
+BUILTINS = {
+    "Cast": "@cast",
+    "GetChildMemberWithName": "@get_child_with_name",
+    "GetSummary": "@get_summary",
+    "GetTemplateArgumentType": "@get_template_argument_type",
+    "GetType": "@get_type",
+    "GetValueAsUnsigned": "@get_value_as_unsigned",
+}
+
+COMPS = {
+    ast.Eq: "=",
+    ast.NotEq: "!=",
+    ast.Lt: "<",
+    ast.LtE: "=<",
+    ast.Gt: ">",
+    ast.GtE: "=>",
+}
+
+class Compiler(ast.NodeVisitor):
+    # Track the stack index of locals variables.
+    #
+    # This is essentially an ordered dictionary, where the key is an index on
+    # the stack, and the value is the name of the variable whose value is at
+    # that index.
+    #
+    # Ex: `locals[0]` is the name of the first value pushed on the stack, etc.
+    locals: list[str]
+
+    buffer: io.StringIO
+    final_buffer: io.StringIO
+
+    def __init__(self) -> None:
+        self.locals = []
+        self.buffer = io.StringIO()
+        self.final_buffer = io.StringIO()
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+        # Initialize `locals` with the (positional) arguments.
+        self.locals = [arg.arg for arg in node.args.args]
+        self.generic_visit(node)
+        self.locals.clear()
+
+    def visit_Compare(self, node: ast.Compare) -> None:
+        self.visit(node.left)
+        # XXX: Does not handle multiple comparisons, ex: `0 < x < 10`
+        self.visit(node.comparators[0])
+        self._output(COMPS[type(node.ops[0])])
+
+    def visit_If(self, node: ast.If) -> None:
+        self.visit(node.test)
+
+        # Does the body `return`?
+        has_return = any(isinstance(x, ast.Return) for x in node.body)
+
+        self._output("{")
+        self._visit_each(node.body)
+        if not node.orelse and not has_return:
+            # No else, and no early exit: a simple `if`
+            self._output("} if")
+            return
+
+        self._output("}")
+        if node.orelse:
+            # Handle else.
+            self._output("{")
+            self._visit_each(node.orelse)
+            self._output("} ifelse")
+        elif has_return:
+            # Convert early exit into an `ifelse`.
+            self._output("{")
+            self._output("} ifelse", final=True)
+
+    def visit_Constant(self, node: ast.Constant) -> None:
+        if isinstance(node.value, str):
+            self._output(f'"{node.value}"')
+        elif isinstance(node.value, bool):
+            self._output(int(node.value))
+        else:
+            self._output(node.value)
+
+    def visit_Call(self, node: ast.Call) -> None:
+        if isinstance(node.func, ast.Attribute):
+            # The receiver is the left hande side of the dot.
+            receiver = node.func.value
+            method = node.func.attr
+            if selector := BUILTINS.get(method):
+                # Visit the method's receiver to have its value on the stack.
+                self.visit(receiver)
+                # Visit the args to position them on the stack.
+                self._visit_each(node.args)
+                self._output(f"{selector} call")
+            else:
+                # TODO: fail
+                print(f"error: unsupported method {node.func.attr}", 
file=sys.stderr)
+
+    def visit_Assign(self, node: ast.Assign) -> None:
+        # Visit RHS first, putting values on the stack.
+        self.visit(node.value)
+        # Determine the name(s). Either a single Name, or a Tuple of Names.
+        target = node.targets[0]
+        if isinstance(target, ast.Name):
+            names = [target.id]
+        elif isinstance(target, ast.Tuple):
+            # These tuple elements are Name nodes.
+            names = [x.id for x in target.elts]
+
+        # Forget any previous bindings of these names.
+        # Their values are orphaned on the stack.
+        for local in self.locals:
+            if local in names:
+                old_idx = self.locals.index(local)
+                self.locals[old_idx] = ""
+
+        self.locals.extend(names)
+
+    def visit_Name(self, node: ast.Name) -> None:
+        idx = self.locals.index(node.id)
+        self._output(f"{idx} pick # {node.id}")
+
+    def _visit_each(self, nodes: list[ast.AST]) -> None:
+        for child in nodes:
+            self.visit(child)
+
+    def _output(self, x: Any, final: bool = False) -> None:
+        dest = self.final_buffer if final else self.buffer
+        print(x, file=dest)
+
+    @property
+    def output(self) -> str:
+        return compiler.buffer.getvalue() + compiler.final_buffer.getvalue()
+
+
+if __name__ == "__main__":
+    with open(sys.argv[1]) as f:
+        root = ast.parse(f.read())
+    compiler = Compiler()
+    compiler.visit(root)
+    print(compiler.output)

>From 282cb22fef121671e5ee7b18fb007cac4f64ceaa Mon Sep 17 00:00:00 2001
From: Dave Lee <[email protected]>
Date: Fri, 3 Jan 2025 14:20:48 -0800
Subject: [PATCH 2/3] Support the return operation

---
 .../formatter-bytecode/python_to_assembly.py  | 32 +++++++------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py 
b/lldb/examples/formatter-bytecode/python_to_assembly.py
index 6e2adbe093fda..98c03832227cc 100755
--- a/lldb/examples/formatter-bytecode/python_to_assembly.py
+++ b/lldb/examples/formatter-bytecode/python_to_assembly.py
@@ -34,12 +34,10 @@ class Compiler(ast.NodeVisitor):
     locals: list[str]
 
     buffer: io.StringIO
-    final_buffer: io.StringIO
 
     def __init__(self) -> None:
         self.locals = []
         self.buffer = io.StringIO()
-        self.final_buffer = io.StringIO()
 
     def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
         # Initialize `locals` with the (positional) arguments.
@@ -56,26 +54,19 @@ def visit_Compare(self, node: ast.Compare) -> None:
     def visit_If(self, node: ast.If) -> None:
         self.visit(node.test)
 
-        # Does the body `return`?
-        has_return = any(isinstance(x, ast.Return) for x in node.body)
-
         self._output("{")
         self._visit_each(node.body)
-        if not node.orelse and not has_return:
-            # No else, and no early exit: a simple `if`
-            self._output("} if")
-            return
-
-        self._output("}")
         if node.orelse:
-            # Handle else.
-            self._output("{")
+            self._output("} {")
             self._visit_each(node.orelse)
             self._output("} ifelse")
-        elif has_return:
-            # Convert early exit into an `ifelse`.
-            self._output("{")
-            self._output("} ifelse", final=True)
+        else:
+            self._output("} if")
+
+    def visit_Return(self, node: ast.Return) -> None:
+        if node.value:
+            self.visit(node.value)
+        self._output("return")
 
     def visit_Constant(self, node: ast.Constant) -> None:
         if isinstance(node.value, str):
@@ -128,13 +119,12 @@ def _visit_each(self, nodes: list[ast.AST]) -> None:
         for child in nodes:
             self.visit(child)
 
-    def _output(self, x: Any, final: bool = False) -> None:
-        dest = self.final_buffer if final else self.buffer
-        print(x, file=dest)
+    def _output(self, x: Any) -> None:
+        print(x, file=self.buffer)
 
     @property
     def output(self) -> str:
-        return compiler.buffer.getvalue() + compiler.final_buffer.getvalue()
+        return compiler.buffer.getvalue()
 
 
 if __name__ == "__main__":

>From 11d4cb99157cd0c9dea5ada737ae370d229a76ab Mon Sep 17 00:00:00 2001
From: Dave Lee <[email protected]>
Date: Tue, 6 Jan 2026 10:49:21 -0800
Subject: [PATCH 3/3] Add Python bytecode translator

---
 .../bytecode_to_bytecode.py                   | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100755 lldb/examples/formatter-bytecode/bytecode_to_bytecode.py

diff --git a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py 
b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
new file mode 100755
index 0000000000000..89227b094957c
--- /dev/null
+++ b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py
@@ -0,0 +1,141 @@
+#!/usr/bin/python3
+
+import dis
+import sys
+from types import CodeType
+from typing import Iterable, Iterator, cast
+
+
+# TODO: strlen, fmt
+_SELECTORS = {
+    "Cast": "@cast",
+    "GetChildAtIndex": "@get_child_at_index",
+    "GetChildIndex": "@get_child_index",
+    "GetChildMemberWithName": "@get_child_with_name",
+    "GetNumChildren": "@get_num_children",
+    "GetSummary": "@summary",
+    "GetTemplateArgumentType": "@get_template_argument_type",
+    "GetType": "@get_type",
+    "GetValue": "@get_value",
+    "GetValueAsAddress": "@get_value_as_address",
+    "GetValueAsSigned": "@get_value_as_signed",
+    "GetValueAsUnsigned": "@get_value_as_unsigned",
+}
+
+
+def _main(source_file):
+    with open(source_file) as f:
+        source_code = f.read()
+    bytecode = dis.Bytecode(source_code)
+    for func_body in _function_bodies(bytecode):
+        instructions = dis.get_instructions(func_body)
+        for op in _translate(instructions):
+            print(op)
+
+
+def _function_bodies(bytecode: dis.Bytecode) -> Iterable[CodeType]:
+    """
+    Iterate the function bodies (code object children) of the given Bytecode.
+    """
+    for const in bytecode.codeobj.co_consts:
+        if hasattr(const, "co_code"):
+            yield const
+
+
+def _translate(instructions: Iterator[dis.Instruction]) -> list[str]:
+    """
+    Convert Python instructions to LLDB data formatter bytecode operations.
+    """
+    result = []
+    _translate_list(list(instructions), result)
+    return result
+
+
+def _translate_list(instructions: list[dis.Instruction], result: list[str]):
+    """
+    Convert sequences of Python bytecode to sequences of LLDB data formatter
+    bytecode.
+
+    This function performs course grained translations - sequences of input to
+    sequences of output. For translations of individual instructions, see
+    `_translate_instruction`.
+    """
+    while instructions:
+        inst = instructions.pop(0)
+        op = inst.opname
+        if op == "LOAD_METHOD":
+            # Method call sequences begin with a LOAD_METHOD instruction, then
+            # load the arguments on to the stack, and end with the CALL_METHOD
+            # instruction.
+            if selector := _SELECTORS.get(inst.argval):
+                while instructions:
+                    if instructions[0] == "LOAD_METHOD":
+                        # Begin a nested method call.
+                        _translate_list(instructions, result)
+                    else:
+                        # TODO: Can LOAD_METHOD, ..., CALL_METHOD sequences
+                        # contain flow control? If so this needs to gather
+                        # instructions and call `_translate_list`, instead of
+                        # handling each instruction individually.
+                        x = instructions.pop(0)
+                        if x.opname != "CALL_METHOD":
+                            result.append(_translate_instruction(x))
+                        else:
+                            result.append(f"{selector} call")
+                            break
+        elif op == "POP_JUMP_IF_FALSE":
+            # Convert to an `{ ... } if` sequence.
+            result.append("{")
+            offset = cast(int, inst.arg)
+            idx = _index_of_offset(instructions, offset)
+            # Split the condional block prefix from the remaining instructions.
+            block = instructions[:idx]
+            del instructions[:idx]
+            _translate_list(block, result)
+            result.append("} if")
+        else:
+            result.append(_translate_instruction(inst))
+
+
+def _translate_instruction(inst: dis.Instruction) -> str:
+    """
+    Convert a single Python bytecode instruction to an LLDB data formatter
+    bytecode operation.
+
+    This function performs one-to-one translations. For translations of
+    sequences of instructions, see `_translate_list`.
+    """
+    op = inst.opname
+    if op == "COMPARE_OP":
+        if inst.argval == "==":
+            return "="
+    elif op == "LOAD_CONST":
+        if isinstance(inst.argval, str):
+            # TODO: Handle strings with inner double quotes ("). Alternatively,
+            # use `repr()` and allow the bytecode assembly to use single 
quotes.
+            return f'"{inst.argval}"'
+        elif isinstance(inst.argval, bool):
+            num = int(inst.argval)
+            return f"{num}"
+        else:
+            return inst.argrepr
+    elif op == "LOAD_FAST":
+        return f"{inst.arg} pick # {inst.argval}"
+    elif op == "RETURN_VALUE":
+        return "return"
+    elif op in ("STORE_FAST", "STORE_NAME"):
+        # This is fake. There is no `put` operation (yet?).
+        return f"{inst.arg} put # {inst.argval}"
+    return op
+
+
+def _index_of_offset(instructions: list[dis.Instruction], offset) -> int:
+    """Find the index of the instruction having the given offset."""
+    for i, inst in enumerate(instructions):
+        if inst.offset == offset:
+            return i
+    raise ValueError(f"invalid offset: {offset}")
+
+
+if __name__ == "__main__":
+    _main(sys.argv[1])

_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to