https://github.com/kastiglione updated https://github.com/llvm/llvm-project/pull/113734
>From 57223942e91c47d0a61b148a65247cd9cbb16496 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Fri, 25 Oct 2024 12:56:00 -0700 Subject: [PATCH 1/3] [lldb] Proof of concept data formatter compiler for Python --- .../formatter-bytecode/optional_summary.py | 14 ++ .../formatter-bytecode/python_to_assembly.py | 145 ++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 lldb/examples/formatter-bytecode/optional_summary.py create mode 100755 lldb/examples/formatter-bytecode/python_to_assembly.py diff --git a/lldb/examples/formatter-bytecode/optional_summary.py b/lldb/examples/formatter-bytecode/optional_summary.py new file mode 100644 index 0000000000000..68e672d86613d --- /dev/null +++ b/lldb/examples/formatter-bytecode/optional_summary.py @@ -0,0 +1,14 @@ +def OptionalSummaryProvider(valobj, _): + failure = 2 + storage = valobj.GetChildMemberWithName("Storage") + hasVal = storage.GetChildMemberWithName("hasVal").GetValueAsUnsigned(failure) + if hasVal == failure: + return "<could not read Optional>" + + if hasVal == 0: + return "None" + + underlying_type = storage.GetType().GetTemplateArgumentType(0) + value = storage.GetChildMemberWithName("value") + value = value.Cast(underlying_type) + return value.GetSummary() diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py new file mode 100755 index 0000000000000..6e2adbe093fda --- /dev/null +++ b/lldb/examples/formatter-bytecode/python_to_assembly.py @@ -0,0 +1,145 @@ +#!/usr/bin/python3 + +import ast +import io +import sys +from typing import Any + +BUILTINS = { + "Cast": "@cast", + "GetChildMemberWithName": "@get_child_with_name", + "GetSummary": "@get_summary", + "GetTemplateArgumentType": "@get_template_argument_type", + "GetType": "@get_type", + "GetValueAsUnsigned": "@get_value_as_unsigned", +} + +COMPS = { + ast.Eq: "=", + ast.NotEq: "!=", + ast.Lt: "<", + ast.LtE: "=<", + ast.Gt: ">", + ast.GtE: "=>", +} + +class Compiler(ast.NodeVisitor): + # Track the stack index of locals variables. + # + # This is essentially an ordered dictionary, where the key is an index on + # the stack, and the value is the name of the variable whose value is at + # that index. + # + # Ex: `locals[0]` is the name of the first value pushed on the stack, etc. + locals: list[str] + + buffer: io.StringIO + final_buffer: io.StringIO + + def __init__(self) -> None: + self.locals = [] + self.buffer = io.StringIO() + self.final_buffer = io.StringIO() + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + # Initialize `locals` with the (positional) arguments. + self.locals = [arg.arg for arg in node.args.args] + self.generic_visit(node) + self.locals.clear() + + def visit_Compare(self, node: ast.Compare) -> None: + self.visit(node.left) + # XXX: Does not handle multiple comparisons, ex: `0 < x < 10` + self.visit(node.comparators[0]) + self._output(COMPS[type(node.ops[0])]) + + def visit_If(self, node: ast.If) -> None: + self.visit(node.test) + + # Does the body `return`? + has_return = any(isinstance(x, ast.Return) for x in node.body) + + self._output("{") + self._visit_each(node.body) + if not node.orelse and not has_return: + # No else, and no early exit: a simple `if` + self._output("} if") + return + + self._output("}") + if node.orelse: + # Handle else. + self._output("{") + self._visit_each(node.orelse) + self._output("} ifelse") + elif has_return: + # Convert early exit into an `ifelse`. + self._output("{") + self._output("} ifelse", final=True) + + def visit_Constant(self, node: ast.Constant) -> None: + if isinstance(node.value, str): + self._output(f'"{node.value}"') + elif isinstance(node.value, bool): + self._output(int(node.value)) + else: + self._output(node.value) + + def visit_Call(self, node: ast.Call) -> None: + if isinstance(node.func, ast.Attribute): + # The receiver is the left hande side of the dot. + receiver = node.func.value + method = node.func.attr + if selector := BUILTINS.get(method): + # Visit the method's receiver to have its value on the stack. + self.visit(receiver) + # Visit the args to position them on the stack. + self._visit_each(node.args) + self._output(f"{selector} call") + else: + # TODO: fail + print(f"error: unsupported method {node.func.attr}", file=sys.stderr) + + def visit_Assign(self, node: ast.Assign) -> None: + # Visit RHS first, putting values on the stack. + self.visit(node.value) + # Determine the name(s). Either a single Name, or a Tuple of Names. + target = node.targets[0] + if isinstance(target, ast.Name): + names = [target.id] + elif isinstance(target, ast.Tuple): + # These tuple elements are Name nodes. + names = [x.id for x in target.elts] + + # Forget any previous bindings of these names. + # Their values are orphaned on the stack. + for local in self.locals: + if local in names: + old_idx = self.locals.index(local) + self.locals[old_idx] = "" + + self.locals.extend(names) + + def visit_Name(self, node: ast.Name) -> None: + idx = self.locals.index(node.id) + self._output(f"{idx} pick # {node.id}") + + def _visit_each(self, nodes: list[ast.AST]) -> None: + for child in nodes: + self.visit(child) + + def _output(self, x: Any, final: bool = False) -> None: + dest = self.final_buffer if final else self.buffer + print(x, file=dest) + + @property + def output(self) -> str: + return compiler.buffer.getvalue() + compiler.final_buffer.getvalue() + + +if __name__ == "__main__": + with open(sys.argv[1]) as f: + root = ast.parse(f.read()) + compiler = Compiler() + compiler.visit(root) + print(compiler.output) >From 282cb22fef121671e5ee7b18fb007cac4f64ceaa Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Fri, 3 Jan 2025 14:20:48 -0800 Subject: [PATCH 2/3] Support the return operation --- .../formatter-bytecode/python_to_assembly.py | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py index 6e2adbe093fda..98c03832227cc 100755 --- a/lldb/examples/formatter-bytecode/python_to_assembly.py +++ b/lldb/examples/formatter-bytecode/python_to_assembly.py @@ -34,12 +34,10 @@ class Compiler(ast.NodeVisitor): locals: list[str] buffer: io.StringIO - final_buffer: io.StringIO def __init__(self) -> None: self.locals = [] self.buffer = io.StringIO() - self.final_buffer = io.StringIO() def visit_FunctionDef(self, node: ast.FunctionDef) -> None: # Initialize `locals` with the (positional) arguments. @@ -56,26 +54,19 @@ def visit_Compare(self, node: ast.Compare) -> None: def visit_If(self, node: ast.If) -> None: self.visit(node.test) - # Does the body `return`? - has_return = any(isinstance(x, ast.Return) for x in node.body) - self._output("{") self._visit_each(node.body) - if not node.orelse and not has_return: - # No else, and no early exit: a simple `if` - self._output("} if") - return - - self._output("}") if node.orelse: - # Handle else. - self._output("{") + self._output("} {") self._visit_each(node.orelse) self._output("} ifelse") - elif has_return: - # Convert early exit into an `ifelse`. - self._output("{") - self._output("} ifelse", final=True) + else: + self._output("} if") + + def visit_Return(self, node: ast.Return) -> None: + if node.value: + self.visit(node.value) + self._output("return") def visit_Constant(self, node: ast.Constant) -> None: if isinstance(node.value, str): @@ -128,13 +119,12 @@ def _visit_each(self, nodes: list[ast.AST]) -> None: for child in nodes: self.visit(child) - def _output(self, x: Any, final: bool = False) -> None: - dest = self.final_buffer if final else self.buffer - print(x, file=dest) + def _output(self, x: Any) -> None: + print(x, file=self.buffer) @property def output(self) -> str: - return compiler.buffer.getvalue() + compiler.final_buffer.getvalue() + return compiler.buffer.getvalue() if __name__ == "__main__": >From 11d4cb99157cd0c9dea5ada737ae370d229a76ab Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Tue, 6 Jan 2026 10:49:21 -0800 Subject: [PATCH 3/3] Add Python bytecode translator --- .../bytecode_to_bytecode.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100755 lldb/examples/formatter-bytecode/bytecode_to_bytecode.py diff --git a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py new file mode 100755 index 0000000000000..89227b094957c --- /dev/null +++ b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py @@ -0,0 +1,141 @@ +#!/usr/bin/python3 + +import dis +import sys +from types import CodeType +from typing import Iterable, Iterator, cast + + +# TODO: strlen, fmt +_SELECTORS = { + "Cast": "@cast", + "GetChildAtIndex": "@get_child_at_index", + "GetChildIndex": "@get_child_index", + "GetChildMemberWithName": "@get_child_with_name", + "GetNumChildren": "@get_num_children", + "GetSummary": "@summary", + "GetTemplateArgumentType": "@get_template_argument_type", + "GetType": "@get_type", + "GetValue": "@get_value", + "GetValueAsAddress": "@get_value_as_address", + "GetValueAsSigned": "@get_value_as_signed", + "GetValueAsUnsigned": "@get_value_as_unsigned", +} + + +def _main(source_file): + with open(source_file) as f: + source_code = f.read() + bytecode = dis.Bytecode(source_code) + for func_body in _function_bodies(bytecode): + instructions = dis.get_instructions(func_body) + for op in _translate(instructions): + print(op) + + +def _function_bodies(bytecode: dis.Bytecode) -> Iterable[CodeType]: + """ + Iterate the function bodies (code object children) of the given Bytecode. + """ + for const in bytecode.codeobj.co_consts: + if hasattr(const, "co_code"): + yield const + + +def _translate(instructions: Iterator[dis.Instruction]) -> list[str]: + """ + Convert Python instructions to LLDB data formatter bytecode operations. + """ + result = [] + _translate_list(list(instructions), result) + return result + + +def _translate_list(instructions: list[dis.Instruction], result: list[str]): + """ + Convert sequences of Python bytecode to sequences of LLDB data formatter + bytecode. + + This function performs course grained translations - sequences of input to + sequences of output. For translations of individual instructions, see + `_translate_instruction`. + """ + while instructions: + inst = instructions.pop(0) + op = inst.opname + if op == "LOAD_METHOD": + # Method call sequences begin with a LOAD_METHOD instruction, then + # load the arguments on to the stack, and end with the CALL_METHOD + # instruction. + if selector := _SELECTORS.get(inst.argval): + while instructions: + if instructions[0] == "LOAD_METHOD": + # Begin a nested method call. + _translate_list(instructions, result) + else: + # TODO: Can LOAD_METHOD, ..., CALL_METHOD sequences + # contain flow control? If so this needs to gather + # instructions and call `_translate_list`, instead of + # handling each instruction individually. + x = instructions.pop(0) + if x.opname != "CALL_METHOD": + result.append(_translate_instruction(x)) + else: + result.append(f"{selector} call") + break + elif op == "POP_JUMP_IF_FALSE": + # Convert to an `{ ... } if` sequence. + result.append("{") + offset = cast(int, inst.arg) + idx = _index_of_offset(instructions, offset) + # Split the condional block prefix from the remaining instructions. + block = instructions[:idx] + del instructions[:idx] + _translate_list(block, result) + result.append("} if") + else: + result.append(_translate_instruction(inst)) + + +def _translate_instruction(inst: dis.Instruction) -> str: + """ + Convert a single Python bytecode instruction to an LLDB data formatter + bytecode operation. + + This function performs one-to-one translations. For translations of + sequences of instructions, see `_translate_list`. + """ + op = inst.opname + if op == "COMPARE_OP": + if inst.argval == "==": + return "=" + elif op == "LOAD_CONST": + if isinstance(inst.argval, str): + # TODO: Handle strings with inner double quotes ("). Alternatively, + # use `repr()` and allow the bytecode assembly to use single quotes. + return f'"{inst.argval}"' + elif isinstance(inst.argval, bool): + num = int(inst.argval) + return f"{num}" + else: + return inst.argrepr + elif op == "LOAD_FAST": + return f"{inst.arg} pick # {inst.argval}" + elif op == "RETURN_VALUE": + return "return" + elif op in ("STORE_FAST", "STORE_NAME"): + # This is fake. There is no `put` operation (yet?). + return f"{inst.arg} put # {inst.argval}" + return op + + +def _index_of_offset(instructions: list[dis.Instruction], offset) -> int: + """Find the index of the instruction having the given offset.""" + for i, inst in enumerate(instructions): + if inst.offset == offset: + return i + raise ValueError(f"invalid offset: {offset}") + + +if __name__ == "__main__": + _main(sys.argv[1]) _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
