takaebato commented on code in PR #497:
URL: https://github.com/apache/echarts-doc/pull/497#discussion_r3035177501


##########
build/build-llms.js:
##########
@@ -0,0 +1,350 @@
+/**
+ * Converts built part JSONs (HTML desc) to Markdown using turndown,
+ * and generates llms.txt + individual .md files.
+ *
+ * Mechanically converts documents/*-parts/*.json to llms-documents/ (.md 
files).
+ * Root files (e.g. option.md) are placed at llms-documents/, while part files
+ * (e.g. option.title.md) are placed at llms-documents/*-parts/.
+ * Type information is extracted from documents/*.json (full schema) via 
traverse.
+ *
+ * Prerequisites: JSON must be built first (node build.js --env dev)
+ * Usage: node build/build-llms.js --env dev
+ */
+const fs = require('fs');
+const fse = require('fs-extra');
+const path = require('path');
+const globby = require('globby');
+const TurndownService = require('turndown');
+const {gfm} = require('turndown-plugin-gfm');
+const {traverse} = require('../tool/schemaHelper');
+const {readConfigEnvFile} = require('./helper');
+
+// --- Constants ---
+
+const LANGUAGES = ['en', 'zh'];
+const OUTPUT_DIR_NAME = 'llms-documents';
+const MAX_HEADING_DEPTH = 6;
+
+const SECTION_LABELS = {
+    en: {'option-parts': 'Option', 'option-gl-parts': 'Option GL', 
'api-parts': 'API', 'tutorial-parts': 'Tutorial'},
+    zh: {'option-parts': '配置项 (Option)', 'option-gl-parts': 'Option GL', 
'api-parts': 'API', 'tutorial-parts': '教程 (Tutorial)'}
+};
+
+const LLMS_TXT_HEADER = [
+    '# Apache ECharts Documentation',
+    '',
+    '> Apache ECharts is a free, powerful charting and visualization library 
offering easy ways to add intuitive, interactive, and highly customizable 
charts to your commercial products.',
+    ''
+].join('\n');
+
+// --- Config ---
+
+const argv = require('yargs').argv;
+const envType = (argv.dev != null || argv.debug != null || argv.env === 'dev') 
? 'dev' : argv.env;
+if (!envType) throw new Error('--env MUST be specified');
+const config = readConfigEnvFile(envType);
+
+// --- Turndown ---
+
+const td = new TurndownService({headingStyle: 'atx', codeBlockStyle: 
'fenced'});
+td.use(gfm);
+td.addRule('iframe', {filter: 'iframe', replacement: () => ''});
+
+function htmlToMd(html) {
+    return html ? td.turndown(html).replace(/\n{3,}/g, '\n\n').trim() : '';
+}
+
+// --- Extract type info from full schema JSON ---
+
+/**
+ * Extract type and default value info from a full schema JSON by traversing
+ * the nested schema tree.
+ *
+ * @param {string} schemaJsonPath - path to schema JSON (e.g. 
"documents/option.json")
+ * @param {string} docName - e.g. "option", "api"
+ * @returns {Object<string, {type: string|null, default: string|null}>}
+ *   e.g. { "option.title.show": {type: "boolean", default: "true"} }
+ */
+function buildTypeMap(schemaJsonPath, docName) {
+    if (!fs.existsSync(schemaJsonPath)) return {};
+    const schema = JSON.parse(fs.readFileSync(schemaJsonPath, 'utf-8'));
+    const typeMap = {};
+    traverse(schema, docName, (schemaPath, node) => {
+        if (node.type || node.default != null) {
+            typeMap[schemaPath] = {
+                type: node.type ? (Array.isArray(node.type) ? 
node.type.join('|') : node.type) : null,
+                default: node.default != null ? String(node.default) : null
+            };
+        }
+    });
+    return typeMap;
+}
+
+// --- Resolve links in HTML ---
+// Best-effort rewriting of <a href="#path"> and <a href="api.html#path"> in 
HTML
+// so that turndown produces markdown links pointing to the correct .md files.
+// Some source links have non-standard formats (e.g. missing "#", no dot 
separator)
+// that cannot be resolved; these are left as-is or linked to the root file.
+
+/**
+ * Split linkPath into a part key (first segment) and fragment (rest), matching
+ * the key against partKeys with case-insensitive and singular/plural fallback.
+ *
+ * @param {string} linkPath - e.g. "title.show", "echarts.init"
+ * @param {Set<string>} partKeys - e.g. Set{'title','series-bar','geo',...}
+ * @returns {{key: string, frag: string|null}|null}
+ *   e.g. "title.show"                    -> {key: "title", frag: "show"}
+ *        "angleAxis.axisLabel.interval"  -> {key: "angleAxis", frag: 
"axisLabel.interval"}
+ *        "geo"                           -> {key: "geo", frag: null}
+ *        "unknown"                       -> null
+ */
+function tryResolvePartKey(linkPath, partKeys) {
+    const [seg, ...rest] = linkPath.split('.');
+    const frag = rest.length > 0 ? rest.join('.') : null;
+
+    if (partKeys.has(seg)) return {key: seg, frag};
+
+    // Fallback: case-insensitive and singular/plural matching
+    const segL = seg.toLowerCase();
+    for (const k of partKeys) {
+        if (k.toLowerCase() === segL) return {key: k, frag};
+    }
+    for (const k of partKeys) {
+        const kl = k.toLowerCase();
+        if (kl === segL + 's' || kl + 's' === segL) return {key: k, frag};
+    }
+    return null;
+}
+
+/**
+ * Resolve a link path to an href pointing to the correct .md file.
+ * If partKeys contains a match, link to the individual part file;
+ * otherwise fall back to the root file.
+ *
+ * @param {string} linkPath - e.g. "title.show", "visualMap"
+ * @param {Set<string>} partKeys - keys of individual part files
+ * @param {string} pathPrefix - path prefix for part files
+ *   same-doc: "option"           -> "option.title.md"
+ *   cross-doc: "../api-parts/api" -> "../api-parts/api.echarts.md"
+ * @param {string|null} rootPath - path prefix for root file fallback
+ *   same-doc: "../option"  -> "../option.md#visualMap"
+ *   cross-doc: "../api"     -> "../api.md#events"
+ * @returns {string|null} resolved href attribute string, or null
+ */
+function resolveLink(linkPath, partKeys, pathPrefix, rootPath) {
+    const resolved = tryResolvePartKey(linkPath, partKeys);
+    if (!resolved) {
+        if (rootPath) return `href="${rootPath}.md#${linkPath}"`;
+        return null;
+    }
+    return `href="${pathPrefix}.${resolved.key}.md${resolved.frag ? '#' + 
resolved.frag : ''}"`;
+}
+
+/**
+ * Rewrite internal links in HTML so that turndown produces correct .md links.
+ * Handles two patterns:
+ *   1. Same-doc:  href="#title.show"          -> href="option.title.md#show"
+ *   2. Cross-doc: href="api.html#echarts.init" -> 
href="../api-parts/api.echarts.md#init"
+ * Unresolvable links are left as-is or fall back to the root file.
+ *
+ * @param {string} html - HTML string containing <a href="..."> links
+ * @param {Object<string, Set<string>>} partKeysByDoc - part keys for all docs
+ * @param {string} docName - current doc name (e.g. "option")
+ * @returns {string} HTML with rewritten href attributes
+ */
+function tryResolveHtmlLinks(html, partKeysByDoc, docName) {
+    const partKeys = partKeysByDoc[docName];
+
+    // Same-doc links: href="#title.show" -> href="option.title.md#show"
+    const resolved = html.replace(/href="#([^"]+)"/g, (match, linkPath) =>
+        (partKeys && resolveLink(linkPath, partKeys, docName, 
`../${docName}`)) || match
+    );
+
+    // Cross-doc links: href="api.html#echarts.init" -> 
href="../api-parts/api.echarts.md#init"
+    return resolved.replace(
+        /href="(option-gl|option|api|tutorial)\.html#([^"]+)"/g,
+        (match, targetDoc, fragment) => {
+            const keys = partKeysByDoc[targetDoc];
+            if (!keys) return match;
+            return resolveLink(fragment, keys, 
`../${targetDoc}-parts/${targetDoc}`, `../${targetDoc}`) || match;
+        }
+    );
+}
+
+// --- Convert part JSON to Markdown ---
+
+function formatPropertyEntry(key, entry, typeInfo, linkResolver) {
+    const heading = '#'.repeat(Math.min(key.split('.').length + 1, 
MAX_HEADING_DEPTH)) + ' ' + key;
+    const meta = [
+        typeInfo && typeInfo.type && `- **Type**: \`${typeInfo.type}\``,
+        typeInfo && typeInfo.default != null && `- **Default**: 
\`${typeInfo.default}\``
+    ].filter(Boolean);
+    const body = entry.desc ? htmlToMd(linkResolver(entry.desc)) : '';
+    return [heading, ...meta, ...(body ? ['', body] : []), ''];
+}
+
+function jsonToMd(data, typeMap, baseName, linkResolver) {
+    const lines = Object.entries(data).flatMap(([key, entry]) => {
+        const fullKey = baseName ? `${baseName}.${key}` : key;
+        return formatPropertyEntry(key, entry, typeMap[fullKey], linkResolver);
+    });
+    return lines.join('\n').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n';
+}
+
+// --- Collect part JSON files ---
+
+/**
+ * Collect part JSON files for each *-parts/ directory, excluding outline 
files.
+ *
+ * @param {string[]} partsDirs - paths to *-parts/ directories
+ * @returns {Object<string, string[]>} dir path -> JSON file paths
+ */
+function collectPartJsonFiles(partsDirs) {
+    const jsonFilesByDir = {};
+    for (const dir of partsDirs) {
+        jsonFilesByDir[dir] = globby.sync(path.join(dir, '*.json'))
+            .filter(filePath => !path.basename(filePath).includes('-outline'));
+    }
+    return jsonFilesByDir;
+}
+
+// --- Collect file keys for link resolution across docs ---
+
+/**
+ * Build a map of doc name -> Set of part keys for all *-parts/ directories.
+ * Part keys are file names with the doc name stripped (e.g. "option.title" -> 
"title").
+ * Root files (e.g. "option.json") are excluded since they are not individual 
part files.
+ *
+ * @param {string[]} partsDirs - paths to *-parts/ directories
+ * @param {Object<string, string[]>} jsonFilesByDir - pre-collected JSON file 
paths
+ * @returns {Object<string, Set<string>>} partKeysByDoc - e.g. { option: 
Set{'title','geo',...}, api: Set{'echarts',...} }
+ */
+function buildPartKeysByDoc(partsDirs, jsonFilesByDir) {
+    const partKeysByDoc = {};
+    for (const dir of partsDirs) {
+        const docName = path.basename(dir).replace(/-parts$/, '');
+        partKeysByDoc[docName] = new Set(
+            jsonFilesByDir[dir].map(filePath => path.basename(filePath, 
'.json'))
+                .filter(k => k !== docName)
+                .map(k => k.startsWith(docName + '.') ? k.slice(docName.length 
+ 1) : k)
+        );
+    }
+    return partKeysByDoc;
+}
+
+// --- Process a single *-parts/ directory ---
+
+/**
+ * Convert part JSON files in a single *-parts/ directory to Markdown.
+ * Each JSON file becomes a .md file with resolved links and type info.
+ * Root files (e.g. option.json) are output to the parent directory.
+ *
+ * @param {string} partsDir - path to a *-parts/ directory (e.g. 
"documents/option-parts")
+ * @param {string} outDir - output base directory (e.g. "llms-documents")
+ * @param {Object} typeMap - property path -> {type, default} map
+ * @param {Object<string, Set<string>>} partKeysByDoc - part keys for all docs
+ * @param {string[]} jsonFiles - pre-collected JSON file paths for this 
directory
+ * @returns {{name: string, path: string, section: string}[]} output file 
descriptors
+ */
+function processPartsDir(partsDir, outDir, typeMap, partKeysByDoc, jsonFiles) {
+    const dirName = path.basename(partsDir);
+    const docName = dirName.replace(/-parts$/, '');
+
+    // Create a link resolver that rewrites HTML hrefs before turndown
+    const linkResolver = (html) => tryResolveHtmlLinks(html, partKeysByDoc, 
docName);
+
+    return jsonFiles.map(filePath => {
+        const baseName = path.basename(filePath, '.json');
+        const data = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
+        const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, 
baseName, linkResolver);
+        const isRoot = baseName === docName;
+        const fileName = isRoot ? `${baseName}.md` : 
`${dirName}/${baseName}.md`;
+        const fullPath = path.resolve(outDir, fileName);

Review Comment:
   Fixed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to