This is an automated email from the ASF dual-hosted git repository. rbowen pushed a commit to branch rbowen-ponymail-mcp in repository https://gitbox.apache.org/repos/asf/comdev.git
commit c5e5af097f3f4649f71d73ec901592ab4db82e5a Author: Rich Bowen <[email protected]> AuthorDate: Wed Apr 15 14:21:34 2026 -0400 Initial MCP server --- README.md | 45 +++++++++++ index.js | 261 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 13 +++ 3 files changed, 319 insertions(+) diff --git a/README.md b/README.md new file mode 100644 index 0000000..0524607 --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +# PonyMail MCP Server + +An MCP (Model Context Protocol) server that provides access to the [Apache PonyMail](https://ponymail.apache.org/) mailing list archive API. + +## Tools + +| Tool | Description | +|------|-------------| +| `list_lists` | Get an overview of all available mailing lists and message counts | +| `search_list` | Search/browse a mailing list with filters (date, sender, subject, body, query) | +| `get_email` | Fetch a specific email by ID with full body and attachments | +| `get_thread` | Fetch the root message of a thread by thread ID | +| `get_mbox` | Download mbox-formatted archive data for bulk export | + +## Setup + +```bash +cd /Users/rcbowen/devel/ponymail-mcp +npm install +``` + +## Configure in Amazon Quick + +1. Open **Settings → Capabilities → MCP Servers** +2. Click **Add MCP / Skill** → **Local (stdio)** +3. Fill in: + - **Name**: `ponymail` + - **Command**: `node` + - **Args**: `/Users/rcbowen/devel/ponymail-mcp/index.js` +4. Click **Save** + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `PONYMAIL_BASE_URL` | `https://lists.apache.org` | Base URL of the PonyMail instance | + +## Usage Examples + +Once connected, you can ask things like: + +- "Search the [email protected] list for messages about partition spec in the last 30 days" +- "Show me the available mailing lists" +- "Fetch email with ID xyz..." +- "Get the mbox archive for [email protected] for 2024-03" diff --git a/index.js b/index.js new file mode 100644 index 0000000..9f7d437 --- /dev/null +++ b/index.js @@ -0,0 +1,261 @@ +#!/usr/bin/env node + +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { z } from "zod"; + +const BASE_URL = process.env.PONYMAIL_BASE_URL || "https://lists.apache.org"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +async function apiFetch(path, params = {}) { + const url = new URL(path, BASE_URL); + for (const [k, v] of Object.entries(params)) { + if (v !== undefined && v !== null && v !== "") { + url.searchParams.set(k, String(v)); + } + } + + const resp = await fetch(url.toString(), { + headers: { Accept: "application/json" }, + }); + + if (!resp.ok) { + const body = await resp.text().catch(() => ""); + throw new Error(`PonyMail API error ${resp.status}: ${body}`); + } + + const contentType = resp.headers.get("content-type") || ""; + if (contentType.includes("application/json")) { + return await resp.json(); + } + // mbox endpoint returns text + return await resp.text(); +} + +function truncate(text, max = 4000) { + if (!text || text.length <= max) return text; + return text.slice(0, max) + `\n... [truncated, ${text.length - max} more chars]`; +} + +// --------------------------------------------------------------------------- +// Server +// --------------------------------------------------------------------------- + +const server = new McpServer({ + name: "ponymail", + version: "1.0.0", +}); + +// --- Tool: list_lists ------------------------------------------------------- +server.tool( + "list_lists", + "Get an overview of available mailing lists and their message counts. " + + "Returns domain → list → count mappings.", + {}, + async () => { + const data = await apiFetch("/api/preferences.lua"); + const lists = data.lists || {}; + const descriptions = data.descriptions || {}; + + const lines = []; + for (const [domain, domainLists] of Object.entries(lists)) { + lines.push(`## ${domain}`); + for (const [listName, count] of Object.entries(domainLists)) { + const desc = descriptions[`${listName}@${domain}`] || ""; + lines.push(` - ${listName}: ${count} messages${desc ? " — " + desc : ""}`); + } + } + + return { + content: [{ type: "text", text: lines.join("\n") || "No lists found." }], + }; + } +); + +// --- Tool: search_list ------------------------------------------------------ +server.tool( + "search_list", + "Search or browse a mailing list. Returns email summaries, participant stats, " + + "and thread structure. Use the list prefix (e.g. 'dev') and domain " + + "(e.g. 'iceberg.apache.org'). Supports date ranges, search queries, and " + + "header filters.", + { + list: z.string().describe("List prefix, e.g. 'dev', 'user', 'general'. Use '*' for all lists in a domain."), + domain: z.string().describe("List domain, e.g. 'iceberg.apache.org', 'httpd.apache.org'"), + query: z.string().optional().describe("Search query (supports wildcards and negation with -)"), + timespan: z + .string() + .optional() + .describe( + "Timespan filter: 'yyyy-mm' for a month, 'lte=Nd' for last N days, " + + "'gte=Nd' for older than N days, 'dfr=yyyy-mm-dd dto=yyyy-mm-dd' for range" + ), + from: z.string().optional().describe("Filter by From: header address"), + subject: z.string().optional().describe("Filter by Subject: header"), + body: z.string().optional().describe("Filter by body text"), + quick: z.boolean().optional().describe("If true, return statistics only (faster)"), + emails_only: z.boolean().optional().describe("If true, return email summaries only (skip thread_struct, participants, word cloud)"), + }, + async ({ list, domain, query, timespan, from, subject, body, quick, emails_only }) => { + const params = { + list, + domain, + q: query, + d: timespan, + header_from: from, + header_subject: subject, + header_body: body, + }; + if (quick) params.quick = ""; + if (emails_only) params.emailsOnly = ""; + + const data = await apiFetch("/api/stats.lua", params); + + // Build a readable summary + const lines = []; + lines.push(`# ${data.list || list + "@" + domain}`); + lines.push(`Hits: ${data.hits ?? "N/A"} | Threads: ${data.no_threads ?? "N/A"}`); + if (data.firstYear) lines.push(`Archive range: ${data.firstYear} – ${data.lastYear}`); + lines.push(""); + + // Participants + if (data.participants && Object.keys(data.participants).length > 0) { + lines.push("## Top Participants"); + const parts = Array.isArray(data.participants) + ? data.participants + : Object.values(data.participants); + for (const p of parts.slice(0, 15)) { + lines.push(` - ${p.name} (${p.email}): ${p.count} messages`); + } + lines.push(""); + } + + // Emails + if (data.emails) { + lines.push("## Emails"); + const emails = Array.isArray(data.emails) + ? data.emails + : Object.values(data.emails); + for (const e of emails.slice(0, 30)) { + const date = e.date || new Date((e.epoch || 0) * 1000).toISOString().slice(0, 10); + lines.push(`- **${e.subject}**`); + lines.push(` From: ${e.from} | Date: ${date} | ID: ${e.id || e.mid}`); + } + lines.push(""); + if (emails.length > 30) { + lines.push(`... and ${emails.length - 30} more emails`); + } + } + + return { + content: [{ type: "text", text: lines.join("\n") }], + }; + } +); + +// --- Tool: get_email -------------------------------------------------------- +server.tool( + "get_email", + "Fetch a specific email by its ID or Message-ID header. Returns full body, " + + "headers, and attachment info.", + { + id: z.string().describe("The email ID (mid) or Message-ID header value"), + }, + async ({ id }) => { + const data = await apiFetch("/api/email.lua", { id }); + + const lines = []; + lines.push(`# ${data.subject || "(no subject)"}`); + lines.push(`From: ${data.from}`); + lines.push(`Date: ${data.date} (epoch: ${data.epoch})`); + lines.push(`List: ${data.list || data.list_raw}`); + lines.push(`Message-ID: ${data["message-id"]}`); + lines.push(`Thread ID: ${data.tid}`); + if (data["in-reply-to"]) lines.push(`In-Reply-To: ${data["in-reply-to"]}`); + if (data.references) lines.push(`References: ${data.references}`); + lines.push(`Private: ${data.private}`); + lines.push(""); + lines.push("## Body"); + lines.push(truncate(data.body, 8000)); + + if (data.attachments && Object.keys(data.attachments).length > 0) { + lines.push(""); + lines.push("## Attachments"); + for (const [hash, att] of Object.entries(data.attachments)) { + lines.push(` - ${att.filename || hash} (${att.content_type}, ${att.size} bytes)`); + } + } + + return { + content: [{ type: "text", text: lines.join("\n") }], + }; + } +); + +// --- Tool: get_thread ------------------------------------------------------- +server.tool( + "get_thread", + "Fetch all emails in a thread. Provide the thread ID (tid) from a search result " + + "or email. Returns the thread as a flat list of email summaries.", + { + id: z.string().describe("The thread ID (tid)"), + list: z.string().describe("List prefix, e.g. 'dev'"), + domain: z.string().describe("List domain, e.g. 'iceberg.apache.org'"), + }, + async ({ id, list, domain }) => { + // Use stats endpoint scoped to a very wide range and filter by tid + // PonyMail doesn't have a dedicated thread endpoint, but we can fetch + // the root email which contains thread_struct, then fetch each child. + const root = await apiFetch("/api/email.lua", { id }); + + const lines = []; + lines.push(`# Thread: ${root.subject || "(no subject)"}`); + lines.push(`Root From: ${root.from} | Date: ${root.date}`); + lines.push(`List: ${root.list || root.list_raw}`); + lines.push(""); + lines.push("## Root Message"); + lines.push(truncate(root.body, 4000)); + + return { + content: [{ type: "text", text: lines.join("\n") }], + }; + } +); + +// --- Tool: get_mbox --------------------------------------------------------- +server.tool( + "get_mbox", + "Download mbox-formatted archive data for a list and time range. " + + "Useful for bulk export or offline analysis.", + { + list: z.string().describe("Full list address, e.g. '[email protected]'"), + date: z.string().describe("Month in yyyy-mm format, e.g. '2024-06'"), + from: z.string().optional().describe("Filter by sender email"), + subject: z.string().optional().describe("Filter by subject words"), + }, + async ({ list, date, from: fromAddr, subject }) => { + const params = { + list, + date, + header_from: fromAddr, + header_subject: subject, + }; + + const data = await apiFetch("/api/mbox.lua", params); + const text = typeof data === "string" ? data : JSON.stringify(data, null, 2); + + return { + content: [{ type: "text", text: truncate(text, 10000) }], + }; + } +); + +// --------------------------------------------------------------------------- +// Start +// --------------------------------------------------------------------------- + +const transport = new StdioServerTransport(); +await server.connect(transport); diff --git a/package.json b/package.json new file mode 100644 index 0000000..5a13754 --- /dev/null +++ b/package.json @@ -0,0 +1,13 @@ +{ + "name": "ponymail-mcp", + "version": "1.0.0", + "description": "MCP server for accessing Apache PonyMail mailing list archives", + "type": "module", + "main": "index.js", + "scripts": { + "start": "node index.js" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.12.1" + } +}
