Alex Bennée <[email protected]> writes: > Teach agents how to fetch and deal with archives of mail threads. > > Just YOLO'ing the entire mail thread into an LLM context is quite > expensive especially if reviewing a thread against your current tree > state. This skill allows the agent to extract just the comments and > tags saving tokens. > > Signed-off-by: Alex Bennée <[email protected]> > > --- > v2 > - rename to qemu-mail-thread > - add instructions on fetching threads via b4 > - refactor the metadata handling in the script > - mention in AGENTS skill list > --- > .agents/skills/qemu-mail-thread/SKILL.md | 34 +++++++ > .../scripts/qemu_mail_parser.py | 98 +++++++++++++++++++ > AGENTS.md | 1 + > 3 files changed, 133 insertions(+) > create mode 100644 .agents/skills/qemu-mail-thread/SKILL.md > create mode 100644 > .agents/skills/qemu-mail-thread/scripts/qemu_mail_parser.py > > diff --git a/.agents/skills/qemu-mail-thread/SKILL.md > b/.agents/skills/qemu-mail-thread/SKILL.md > new file mode 100644 > index 00000000000..58e7c833a27 > --- /dev/null > +++ b/.agents/skills/qemu-mail-thread/SKILL.md
Amazingly I was testing this fine (probably because it was in the context) but missed: --- name: qemu-mail-thread description: Fetch and extract reviewer comments from QEMU mailing list threads, handling mbox files or raw text dumps. license: GPL-2.0-or-later --- > @@ -0,0 +1,34 @@ > +# QEMU Mail Thread > + > +This skill helps you fetch and extract reviewer comments from QEMU mailing > list threads. It can handle standard `mbox` files (e.g., from `b4 mbox`) or > raw text dumps from the user. > + > +## How to fetch a mail thread > + > +If you have a Message-ID (e.g., from a patch series), use `b4` to fetch the > entire thread: > + > +```bash > +b4 mbox <message-id> > +``` > + > +This will typically save an `.mbx` file in your current directory. > + > +## How to parse comments > + > +Use the included Python script to extract feedback, filtering out quoted > text and diffs. > + > +```bash > +python .agents/skills/qemu-mail-thread/scripts/qemu_mail_parser.py > <path_to_mail_thread_file> > +``` > + > +The script automatically detects whether the input is a standard mbox or a > raw text dump. > + > +## Expected Output > +The script generates `parsed_comments.txt` in the current working directory: > +``` > +--- REPLY FROM Reviewer Name <[email protected]> --- > +Subject: Re: [PATCH 01/10] ... > +Comment text here... > +============================================================ > +``` > + > +Use this structured text to efficiently analyze the feedback and identify > outstanding suggestions. > diff --git a/.agents/skills/qemu-mail-thread/scripts/qemu_mail_parser.py > b/.agents/skills/qemu-mail-thread/scripts/qemu_mail_parser.py > new file mode 100644 > index 00000000000..fdaac57ac15 > --- /dev/null > +++ b/.agents/skills/qemu-mail-thread/scripts/qemu_mail_parser.py > @@ -0,0 +1,98 @@ > +# SPDX-License-Identifier: GPL-2.0-or-later > +import sys > +import os > +import mailbox > + > + > +def is_metadata_line(line): > + """Check if a line is metadata (quotes, diff, etc.)""" > + return (line.startswith(">") or > + line.startswith("---") or > + line.startswith("diff ")) > + > + > +def parse_raw_text(text, output_f): > + # Split by the separator used in lore.kernel.org / b4 dumps > + messages = text.split("----------------------------------------") > + for msg in messages: > + if not msg.strip(): continue > + > + lines = msg.strip().split('\n') > + author = "" > + subject = "" > + body_start = 0 > + for i, line in enumerate(lines): > + if line.startswith("From: "): author = line[6:] > + if line.startswith("Subject: "): subject = line[9:] > + if not line.strip() and body_start == 0: > + body_start = i + 1 > + break > + > + is_reply = subject and ("Re: " in subject or > subject.startswith("Re:")) > + > + if is_reply and author != "" and not author.startswith("qemu-devel"): > + output_f.write(f"--- REPLY FROM {author} ---\nSubject: > {subject}\n") > + > + for line in lines[body_start:]: > + if not is_metadata_line(line): > + output_f.write(line + "\n") > + output_f.write("="*60 + "\n\n") > + > + > +def parse_mbox(mbox_path, output_f): > + mbox = mailbox.mbox(mbox_path) > + for message in mbox: > + subject = message['subject'] > + if subject and 'Re: ' in subject: > + author = message['from'] > + output_f.write(f"--- REPLY FROM {author} ---\nSubject: > {subject}\n") > + > + payload = message.get_payload() > + body = "" > + if isinstance(payload, list): > + # Handle multipart > + for part in payload: > + if part.get_content_type() == 'text/plain': > + body = part.get_payload(decode=True).decode('utf-8', > errors='ignore') > + break > + else: > + body = message.get_payload(decode=True).decode('utf-8', > errors='ignore') > + > + # Simple heuristic to extract comments > + for line in body.split('\n'): > + if line.strip() and not is_metadata_line(line.strip()): > + output_f.write(line + "\n") > + output_f.write("="*60 + "\n\n") > + > + > +def main(): > + if len(sys.argv) < 2: > + print("Usage: python qemu_mail_parser.py <mail_thread_file>") > + sys.exit(1) > + > + input_file = sys.argv[1] > + output_file = "parsed_comments.txt" > + > + if not os.path.exists(input_file): > + print(f"Error: File not found - {input_file}") > + sys.exit(1) > + > + with open(output_file, "w", encoding="utf-8") as out_f: > + # Detect if it's an mbox or raw text > + with open(input_file, 'rb') as f: > + header = f.read(15) > + is_mbox = header.startswith(b'From mboxrd@z ') > + > + if is_mbox: > + print(f"Parsing {input_file} as mbox...") > + parse_mbox(input_file, out_f) > + else: > + print(f"Parsing {input_file} as raw text dump...") > + with open(input_file, "r", encoding="utf-8", errors='ignore') as > f: > + text = f.read() > + parse_raw_text(text, out_f) > + > + print(f"Done. Extracted comments saved to {output_file}") > + > +if __name__ == "__main__": > + main() > diff --git a/AGENTS.md b/AGENTS.md > index fbbc3b65ed0..d99d3078378 100644 > --- a/AGENTS.md > +++ b/AGENTS.md > @@ -28,6 +28,7 @@ You should use the following specialized skills for common > tasks: > - `qemu-build`: For configuring and building QEMU (including debug and > sanitizer builds). > - `qemu-testing`: For finding, listing, and running individual tests (Unit, > QTest, Functional, TCG). > - `qemu-code-reviewer`: For pulling and applying patch series from mailing > lists. > +- `qemu-mail-thread`: For analyzing and parsing mailing list threads. > > ## Source Code Layout (see `docs/devel/codebase.rst`) > - **`accel/`**: Hardware accelerators (KVM, TCG, HVF, Xen, etc.) and > architecture-agnostic acceleration code. -- Alex Bennée Virtualisation Tech Lead @ Linaro
