This is an automated email from the ASF dual-hosted git repository.
kingsword09 pushed a commit to branch website-download-image
in repository https://gitbox.apache.org/repos/asf/opendal.git
The following commit(s) were added to refs/heads/website-download-image by this
push:
new 0b5e3a98b build(website): speed up download images
0b5e3a98b is described below
commit 0b5e3a98b5497ba59fea115f808e00b247cbebab
Author: Kingsword <[email protected]>
AuthorDate: Fri Oct 10 22:55:21 2025 +0800
build(website): speed up download images
---
bin/ofs/Cargo.lock | 4 +-
website/plugins/image-ssr-plugin.js | 286 ++++++++++++++++++++++--------------
2 files changed, 174 insertions(+), 116 deletions(-)
diff --git a/bin/ofs/Cargo.lock b/bin/ofs/Cargo.lock
index 43146b5d6..ac86329d3 100644
--- a/bin/ofs/Cargo.lock
+++ b/bin/ofs/Cargo.lock
@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 3
+version = 4
[[package]]
name = "addr2line"
@@ -1296,13 +1296,13 @@ dependencies = [
"backon",
"base64",
"bytes",
- "chrono",
"crc32c",
"dotenvy",
"futures",
"getrandom 0.2.16",
"http",
"http-body",
+ "jiff",
"log",
"md-5",
"percent-encoding",
diff --git a/website/plugins/image-ssr-plugin.js
b/website/plugins/image-ssr-plugin.js
index adaf0ce83..e175d6768 100644
--- a/website/plugins/image-ssr-plugin.js
+++ b/website/plugins/image-ssr-plugin.js
@@ -19,12 +19,23 @@
const path = require("path");
const fs = require("fs/promises");
+const { pipeline } = require("stream/promises");
const { createHash } = require("crypto");
const cheerio = require("cheerio");
+const os = require("os");
module.exports = function (_context) {
const processedImages = new Map();
+ const IMAGE_URL_PATTERNS = [
+ /"(https?:\/\/[^"]+\.(png|jpg|jpeg|gif|svg|webp))"/g,
+ /"(https?:\/\/img\.shields\.io\/[^"]+)"/g,
+ /"(https?:\/\/github\.com\/[^"]+\/actions\/workflow[^"]+)"/g,
+ /'(https?:\/\/[^']+\.(png|jpg|jpeg|gif|svg|webp))'/g,
+ /'(https?:\/\/img\.shields\.io\/[^']+)'/g,
+ /'(https?:\/\/github\.com\/[^']+\/actions\/workflow[^']+)'/g,
+ ];
+
function getImageFilename(imageUrl) {
const hash = createHash("md5").update(imageUrl).digest("hex");
let ext = ".jpg";
@@ -55,7 +66,7 @@ module.exports = function (_context) {
);
}
- async function downloadImage(imageUrl, buildDir) {
+ async function downloadImage(imageUrl, buildDir, retries = 3) {
if (processedImages.has(imageUrl)) {
return processedImages.get(imageUrl);
}
@@ -71,31 +82,41 @@ module.exports = function (_context) {
if (!(await existsAsync(buildOutputPath))) {
console.log(`Downloading image: ${imageUrl}`);
- const controller = new AbortController();
- const timeoutId = setTimeout(() => controller.abort(), 20000);
-
- try {
- const response = await fetch(imageUrl, {
- signal: controller.signal,
- headers: {
- "User-Agent":
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
- Accept: "image/webp,image/apng,image/*,*/*;q=0.8",
- },
- redirect: "follow",
- });
-
- clearTimeout(timeoutId);
+ let lastError;
+ for (let attempt = 1; attempt <= retries; attempt++) {
+ try {
+ const response = await fetch(imageUrl, {
+ signal: AbortSignal.timeout(30000),
+ headers: {
+ "User-Agent":
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64)
AppleWebKit/537.36",
+ Accept: "image/webp,image/apng,image/*,*/*;q=0.8",
+ },
+ redirect: "follow",
+ });
+
+ if (!response.ok) {
+ throw new Error(`HTTP error! Status: ${response.status}`);
+ }
- if (!response.ok) {
- throw new Error(`HTTP error! Status: ${response.status}`);
+ const fd = await fs.open(buildOutputPath, "w");
+ await pipeline(response.body, fd.createWriteStream());
+ lastError = null;
+ break;
+ } catch (fetchError) {
+ lastError = fetchError;
+
+ // Clean up potentially corrupted file
+ try {
+ await fs.unlink(buildOutputPath);
+ } catch {
+ // Ignore if file doesn't exist
+ }
}
+ }
- const buffer = await response.arrayBuffer();
- await fs.writeFile(buildOutputPath, Buffer.from(buffer));
- } catch (fetchError) {
- clearTimeout(timeoutId);
- throw fetchError;
+ if (lastError) {
+ throw lastError;
}
}
@@ -108,142 +129,179 @@ module.exports = function (_context) {
}
}
- async function processJSFiles(outDir) {
- console.log("Processing JS files for external images...");
+ async function pLimit(concurrency, tasks) {
+ const results = [];
+ const executing = [];
+
+ for (const task of tasks) {
+ const p = Promise.resolve().then(() => task());
+ results.push(p);
+
+ if (concurrency <= tasks.length) {
+ const e = p.then(() => executing.splice(executing.indexOf(e), 1));
+ executing.push(e);
+ if (executing.length >= concurrency) {
+ await Promise.race(executing);
+ }
+ }
+ }
+
+ return Promise.all(results);
+ }
+ async function processFiles(dir) {
+ const htmlFiles = [];
const jsFiles = [];
- async function findJSFiles(dir) {
+ async function findFiles(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
- await findJSFiles(fullPath);
+ await findFiles(fullPath);
} else if (entry.name.endsWith(".js")) {
jsFiles.push(fullPath);
+ } else if (entry.name.endsWith(".html")) {
+ htmlFiles.push(fullPath);
}
}
}
- await findJSFiles(outDir);
+ await findFiles(dir);
- for (const jsFile of jsFiles) {
- const content = await fs.readFile(jsFile, "utf8");
- let modified = false;
- let newContent = content;
+ console.log(
+ `Collecting images from ${htmlFiles.length} HTML and ${jsFiles.length}
JS files...`
+ );
+ const allImageUrls = new Set();
- // Look for shield.io and other image URLs with a more comprehensive
regex
- const urlPatterns = [
- /"(https?:\/\/[^"]+\.(png|jpg|jpeg|gif|svg|webp))"/g,
- /"(https?:\/\/img\.shields\.io\/[^"]+)"/g,
- /"(https?:\/\/github\.com\/[^"]+\/actions\/workflow[^"]+)"/g,
- /'(https?:\/\/[^']+\.(png|jpg|jpeg|gif|svg|webp))'/g,
- /'(https?:\/\/img\.shields\.io\/[^']+)'/g,
- /'(https?:\/\/github\.com\/[^']+\/actions\/workflow[^']+)'/g,
- ];
+ await Promise.all([
+ collectImagesFromHtml(htmlFiles, allImageUrls),
+ collectImagesFromJs(jsFiles, allImageUrls),
+ ]);
- const allReplacements = [];
+ console.log(`Downloading ${allImageUrls.size} unique images...`);
+ const downloadTasks = Array.from(allImageUrls).map(
+ (url) => () => downloadImage(url, dir)
+ );
- for (const pattern of urlPatterns) {
- const matches = Array.from(newContent.matchAll(pattern));
+ const cpuCount = os.cpus().length;
+ const concurrency = Math.min(Math.max(cpuCount * 5, 10), 30);
+ console.log(
+ `Using ${concurrency} concurrent downloads (CPU cores: ${cpuCount})`
+ );
- for (const match of matches) {
- const imageUrl = match[1];
- if (!imageUrl) continue;
+ await pLimit(concurrency, downloadTasks);
- try {
- const localUrl = await downloadImage(imageUrl, outDir);
- if (localUrl !== imageUrl) {
- allReplacements.push({
- original: match[0],
- replacement: match[0].replace(imageUrl, localUrl),
- });
- modified = true;
- }
- } catch (error) {
- console.error(`Error processing URL in JS file: ${error.message}`);
- }
- }
- }
+ console.log("Updating files with local image URLs...");
+ await Promise.all([processHtmlFiles(htmlFiles), processJSFiles(jsFiles)]);
+ }
- // Apply replacements from longest to shortest to avoid partial
replacements
- allReplacements.sort((a, b) => b.original.length - a.original.length);
+ async function traverseHtmlFiles(htmlFiles, handler) {
+ for (const htmlFile of htmlFiles) {
+ const html = await fs.readFile(htmlFile, "utf8");
+ const $ = cheerio.load(html);
- for (const { original, replacement } of allReplacements) {
- newContent = newContent.replace(original, replacement);
- }
+ const result = handler(htmlFile, $);
- if (modified) {
- await fs.writeFile(jsFile, newContent);
+ if (result?.shouldWrite) {
+ await fs.writeFile(htmlFile, $.html());
}
}
}
- return {
- name: "docusaurus-ssr-image-plugin",
+ async function traverseJsFiles(jsFiles, handler) {
+ for (const jsFile of jsFiles) {
+ const content = await fs.readFile(jsFile, "utf8");
- async postBuild({ outDir }) {
- console.log("Processing HTML files for external images...");
+ const result = handler(content);
- const htmlFiles = [];
+ if (result?.newContent && result.newContent !== content) {
+ await fs.writeFile(jsFile, result.newContent);
+ }
+ }
+ }
- async function findHtmlFiles(dir) {
- const entries = await fs.readdir(dir, { withFileTypes: true });
+ async function collectImagesFromHtml(htmlFiles, imageUrls) {
+ await traverseHtmlFiles(htmlFiles, (_, $) => {
+ $("img").each((_, el) => {
+ const src = $(el).attr("src");
+ if (src && src.startsWith("http")) {
+ imageUrls.add(src);
+ }
+ });
+ });
+ }
- for (const entry of entries) {
- const fullPath = path.join(dir, entry.name);
- if (entry.isDirectory()) {
- await findHtmlFiles(fullPath);
- } else if (entry.name.endsWith(".html")) {
- htmlFiles.push(fullPath);
- }
+ async function collectImagesFromJs(jsFiles, imageUrls) {
+ await traverseJsFiles(jsFiles, (content) => {
+ for (const pattern of IMAGE_URL_PATTERNS) {
+ const matches = Array.from(content.matchAll(pattern));
+ for (const match of matches) {
+ if (match[1]) imageUrls.add(match[1]);
}
}
+ });
+ }
- await findHtmlFiles(outDir);
+ async function processHtmlFiles(htmlFiles) {
+ await traverseHtmlFiles(htmlFiles, (_, $) => {
+ let modified = false;
- for (const htmlFile of htmlFiles) {
- const html = await fs.readFile(htmlFile, "utf8");
- let $ = cheerio.load(html);
- let modified = false;
+ $("img").each((_, img) => {
+ const element = $(img);
+ const imageUrl = element.attr("src");
- const externalImages = $("img").filter((_, el) => {
- const src = $(el).attr("src");
- return src && src.startsWith("http");
- });
+ if (imageUrl && imageUrl.startsWith("http")) {
+ const localUrl = processedImages.get(imageUrl);
+ if (localUrl && localUrl !== imageUrl) {
+ element.attr("src", localUrl);
+ modified = true;
+ }
+ }
+ });
- if (externalImages.length === 0) continue;
+ return { shouldWrite: modified };
+ });
+ }
- const downloadPromises = [];
+ async function processJSFiles(jsFiles) {
+ await traverseJsFiles(jsFiles, (content) => {
+ let newContent = content;
+ const allReplacements = [];
- externalImages.each((_, img) => {
- const element = $(img);
- const imageUrl = element.attr("src");
+ for (const pattern of IMAGE_URL_PATTERNS) {
+ const matches = Array.from(newContent.matchAll(pattern));
- if (!imageUrl || !imageUrl.startsWith("http")) return;
+ for (const match of matches) {
+ const imageUrl = match[1];
+ if (!imageUrl) continue;
- downloadPromises.push(
- downloadImage(imageUrl, outDir)
- .then((localUrl) => {
- if (localUrl !== imageUrl) {
- element.attr("src", localUrl);
- modified = true;
- }
- })
- .catch(() => {})
- );
- });
+ const localUrl = processedImages.get(imageUrl);
+ if (localUrl && localUrl !== imageUrl) {
+ allReplacements.push({
+ original: match[0],
+ replacement: match[0].replace(imageUrl, localUrl),
+ });
+ }
+ }
+ }
- await Promise.all(downloadPromises);
+ allReplacements.sort((a, b) => b.original.length - a.original.length);
- if (modified) {
- await fs.writeFile(htmlFile, $.html());
- }
+ for (const { original, replacement } of allReplacements) {
+ newContent = newContent.replace(original, replacement);
}
- // Process JS files to update image references in bundled JavaScript
- await processJSFiles(outDir);
+ return { newContent };
+ });
+ }
+
+ return {
+ name: "docusaurus-ssr-image-plugin",
+
+ async postBuild({ outDir }) {
+ await processFiles(outDir);
console.log(`Processed ${processedImages.size} external images`);
},