expand-macro.py: helper script exploding macros

Alex Bennée Fri, 15 May 2026 09:31:50 -0700

QEMU makes heavy use of C Macros which can be confusing to humans and
seems almost impossible for AI agents to follow. In the past I've
dealt with this by compiling with V=1 and manually copying and pasting
the gcc command line and appending -E to run the pre-processor step.


With the modern build system we now have a compile_commands.json so we
can automate the process with a script.

There is some trickiness involved in following the line markers so we
know where in the source file we are. To handle this we implement a
PreprocessorState object to track where in the include chain we are.
This allows us to show the including location when we dump the
expanded macro.

Signed-off-by: Alex Bennée <[email protected]>

---
v3
  - added missing SPDX identifier
---
 scripts/expand-macro.py | 274 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 274 insertions(+)
 create mode 100755 scripts/expand-macro.py

diff --git a/scripts/expand-macro.py b/scripts/expand-macro.py
new file mode 100755
index 00000000000..c0e728dfcc1
--- /dev/null
+++ b/scripts/expand-macro.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Automate the expansion of QEMU macros based on compile_commands.json.
+#
+# This script runs the C preprocessor over a file to expand macros
+# in a specified line range, using the compilation flags defined in
+# compile_commands.json.
+#
+# Copyright (c) Linaro 2026
+#
+import os
+import sys
+import json
+import shlex
+import subprocess
+import argparse
+import re
+
+
+def find_compile_command(target_file, compile_commands):
+    """
+    Search compile_commands to find the rule to build target_file
+    """
+    target_abs = os.path.abspath(target_file)
+    for entry in compile_commands:
+        dir_path = entry.get('directory', '.')
+        file_abs = os.path.abspath(os.path.join(dir_path, entry['file']))
+        if file_abs == target_abs:
+            return entry
+    return None
+
+
+def process_command(command_entry):
+    """
+    Strip out output related options and return a command line that will
+    run the pre-processor only.
+    """
+    command = command_entry.get('command')
+    if not command:
+        args = command_entry.get('arguments', [])
+    else:
+        args = shlex.split(command)
+
+    if not args:
+        return None
+
+    out = []
+    it = iter(args)
+    for arg in it:
+        # the -M* options all deal with generating deps
+        if arg in ('-o', '-MF', '-MQ', '-MT', '-MD', '-MP'):
+            next(it, None)  # Skip the option's argument
+            continue
+        if arg == '-c':
+            continue
+        out.append(arg)
+
+    # Enable pre-processor output, don't strip comments, trace includes
+    out.extend(['-E', '-CC', '-H'])
+    return out
+
+
+def normalize_path(raw_path, working_dir):
+    """Normalize and make paths absolute."""
+    if not os.path.isabs(raw_path):
+        return os.path.abspath(os.path.join(working_dir, raw_path))
+    return os.path.normpath(raw_path)
+
+
+class PreprocessorState:
+    """Tracks the state of the preprocessor as we parse its output."""
+    def __init__(self):
+        self.stack = []
+        self.current_path = None
+        self.current_line = 0
+        self.current_instance_id = 0
+        self.next_instance_id = 1
+        self.sections = {}
+
+    def update_on_marker(self, new_line, flags, path):
+        """Update the file stack and instance tracking based on markers."""
+        # entering new file
+        if "1" in flags:
+            if self.current_path is not None:
+                self.stack.append((self.current_path, self.current_line,
+                                   self.current_instance_id))
+            self.current_path = path
+            self.current_line = new_line
+            self.current_instance_id = self.next_instance_id
+            self.next_instance_id += 1
+            return
+
+        # leaving file
+        if "2" in flags:
+            if self.stack:
+                _, _, popped_instance_id = self.stack.pop()
+                self.current_path = path
+                self.current_line = new_line
+                self.current_instance_id = popped_instance_id
+            else:
+                self.current_path = path
+                self.current_line = new_line
+                self.current_instance_id = self.next_instance_id
+                self.next_instance_id += 1
+            return
+
+        # return to previous file without explicit flag 2
+        if self.current_path != path:
+            if self.stack and self.stack[-1][0] == path:
+                _, _, popped_instance_id = self.stack.pop()
+                self.current_path = path
+                self.current_line = new_line
+                self.current_instance_id = popped_instance_id
+            else:
+                self.current_path = path
+                self.current_line = new_line
+                self.current_instance_id = self.next_instance_id
+                self.next_instance_id += 1
+            return
+            
+        self.current_line = new_line
+
+    def get_context_string(self, target_abs, working_dir):
+        """Generate a descriptive string showing the inclusion context."""
+        if self.stack:
+            ctx_path, ctx_line, _ = self.stack[-1]
+            try:
+                rel_ctx = os.path.relpath(ctx_path, working_dir)
+            except ValueError:
+                rel_ctx = ctx_path
+            return f"{rel_ctx}:{ctx_line}"
+
+        try:
+            rel_ctx = os.path.relpath(target_abs, working_dir)
+        except ValueError:
+            rel_ctx = target_abs
+        return f"{rel_ctx} (main file)"
+
+    def add_line(self, line, line_range, target_abs, working_dir):
+        """Add a line to the sections if it is within the requested range."""
+        start_line, end_line = line_range
+        if self.current_path == target_abs:
+            if start_line <= self.current_line <= end_line:
+                if self.current_instance_id not in self.sections:
+                    ctx_str = self.get_context_string(target_abs, working_dir)
+                    self.sections[self.current_instance_id] = {
+                        "context": ctx_str,
+                        "lines": []
+                    }
+                self.sections[self.current_instance_id]["lines"].append(line)
+        self.current_line += 1
+
+
+def format_output_sections(sections, target_file, start_line, end_line):
+    """Format the accumulated sections into the final output string."""
+    output_sections = []
+    for _instance_id, data in sections.items():
+        if not data["lines"]:
+            continue
+        header = f"/* Expansion from {data['context']} */"
+        body = "\n".join(data["lines"])
+        output_sections.append(f"{header}\n{body}")
+
+    if not output_sections:
+        return (f"/* Error: No lines found for {target_file} "
+                f"in range {start_line}-{end_line} */")
+
+    return "\n/* end of expansion */\n".join(output_sections)
+
+
+def extract_range(stdout, target_file, start_line, end_line, working_dir):
+    """
+    Parse the output of the pre-processor while tracking where we
+    are in the source code from the markers so we can extract the
+    range asked for.
+    """
+    state = PreprocessorState()
+    target_abs = os.path.abspath(target_file)
+    line_range = (start_line, end_line)
+
+    # The format is undocumented but see:
+    #
+    #  gcc/c-family/c-ppoutput.c:print_line_1
+    #
+    # where 1 = entering file, 2 = leaving file
+    # and the 3 or 3 4 depends on linemap_location_in_system_header_p
+    line_marker_re = re.compile(r'^# (\d+) "(.*?)"(.*)')
+
+    for line in stdout.splitlines():
+        match = line_marker_re.match(line)
+        if match:
+            new_line = int(match.group(1))
+            raw_path = match.group(2)
+            flags = match.group(3).split()
+
+            path = normalize_path(raw_path, working_dir)
+            state.update_on_marker(new_line, flags, path)
+            continue
+
+        state.add_line(line, line_range, target_abs, working_dir)
+
+    return format_output_sections(state.sections, target_file,
+                                  start_line, end_line)
+
+
+def main():
+    """Main entry point for the script."""
+    desc = 'Expand macros in a section of a file using compile_commands.json'
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('file', help='Source file to expand macros in')
+    parser.add_argument('--range', help='Line range (e.g. 100-120)')
+
+    ctx_help = ('Context file (.c) to get compilation flags from '
+                '(useful for headers)')
+    parser.add_argument('--context', help=ctx_help)
+    parser.add_argument('--compile-commands', default='compile_commands.json',
+                        help='Path to compile_commands.json')
+    parser.add_argument('--show-command', action='store_true',
+                        help='Print the modified compile command and exit')
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.compile_commands):
+        print(f"Error: {args.compile_commands} not found.", file=sys.stderr)
+        sys.exit(1)
+
+    with open(args.compile_commands, encoding="utf-8") as f:
+        compile_commands = json.load(f)
+
+    query_file = args.context if args.context else args.file
+    entry = find_compile_command(query_file, compile_commands)
+
+    if not entry:
+        print(f"Error: Could not find compile command for {query_file}",
+              file=sys.stderr)
+        sys.exit(1)
+
+    cmdline = process_command(entry)
+    if not cmdline:
+        print(f"Error: Failed to process command for {query_file}",
+              file=sys.stderr)
+        sys.exit(1)
+
+    if args.show_command:
+        print(shlex.join(cmdline))
+        sys.exit(0)
+
+    working_dir = entry.get('directory', '.')
+    result = subprocess.run(cmdline, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE, cwd=working_dir,
+                            universal_newlines=True, check=False)
+
+    if result.returncode != 0:
+        print(f"Preprocessor failed:\n{result.stderr}", file=sys.stderr)
+        sys.exit(result.returncode)
+
+    content = result.stdout
+    if args.range:
+        try:
+            start, end = map(int, args.range.split('-'))
+            content = extract_range(content, args.file, start, end,
+                                    working_dir)
+        except ValueError:
+            print(f"Error: Invalid range format {args.range}. Use start-end.",
+                  file=sys.stderr)
+            sys.exit(1)
+
+    print(content)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.47.3

[RFC PATCH v3 03/11] scripts/expand-macro.py: helper script exploding macros

Reply via email to