This is an automated email from the ASF dual-hosted git repository.

kingsword09 pushed a commit to branch website-download-image
in repository https://gitbox.apache.org/repos/asf/opendal.git


The following commit(s) were added to refs/heads/website-download-image by this 
push:
     new 0b5e3a98b build(website): speed up download images
0b5e3a98b is described below

commit 0b5e3a98b5497ba59fea115f808e00b247cbebab
Author: Kingsword <[email protected]>
AuthorDate: Fri Oct 10 22:55:21 2025 +0800

    build(website): speed up download images
---
 bin/ofs/Cargo.lock                  |   4 +-
 website/plugins/image-ssr-plugin.js | 286 ++++++++++++++++++++++--------------
 2 files changed, 174 insertions(+), 116 deletions(-)

diff --git a/bin/ofs/Cargo.lock b/bin/ofs/Cargo.lock
index 43146b5d6..ac86329d3 100644
--- a/bin/ofs/Cargo.lock
+++ b/bin/ofs/Cargo.lock
@@ -1,6 +1,6 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4
 
 [[package]]
 name = "addr2line"
@@ -1296,13 +1296,13 @@ dependencies = [
  "backon",
  "base64",
  "bytes",
- "chrono",
  "crc32c",
  "dotenvy",
  "futures",
  "getrandom 0.2.16",
  "http",
  "http-body",
+ "jiff",
  "log",
  "md-5",
  "percent-encoding",
diff --git a/website/plugins/image-ssr-plugin.js 
b/website/plugins/image-ssr-plugin.js
index adaf0ce83..e175d6768 100644
--- a/website/plugins/image-ssr-plugin.js
+++ b/website/plugins/image-ssr-plugin.js
@@ -19,12 +19,23 @@
 
 const path = require("path");
 const fs = require("fs/promises");
+const { pipeline } = require("stream/promises");
 const { createHash } = require("crypto");
 const cheerio = require("cheerio");
+const os = require("os");
 
 module.exports = function (_context) {
   const processedImages = new Map();
 
+  const IMAGE_URL_PATTERNS = [
+    /"(https?:\/\/[^"]+\.(png|jpg|jpeg|gif|svg|webp))"/g,
+    /"(https?:\/\/img\.shields\.io\/[^"]+)"/g,
+    /"(https?:\/\/github\.com\/[^"]+\/actions\/workflow[^"]+)"/g,
+    /'(https?:\/\/[^']+\.(png|jpg|jpeg|gif|svg|webp))'/g,
+    /'(https?:\/\/img\.shields\.io\/[^']+)'/g,
+    /'(https?:\/\/github\.com\/[^']+\/actions\/workflow[^']+)'/g,
+  ];
+
   function getImageFilename(imageUrl) {
     const hash = createHash("md5").update(imageUrl).digest("hex");
     let ext = ".jpg";
@@ -55,7 +66,7 @@ module.exports = function (_context) {
     );
   }
 
-  async function downloadImage(imageUrl, buildDir) {
+  async function downloadImage(imageUrl, buildDir, retries = 3) {
     if (processedImages.has(imageUrl)) {
       return processedImages.get(imageUrl);
     }
@@ -71,31 +82,41 @@ module.exports = function (_context) {
       if (!(await existsAsync(buildOutputPath))) {
         console.log(`Downloading image: ${imageUrl}`);
 
-        const controller = new AbortController();
-        const timeoutId = setTimeout(() => controller.abort(), 20000);
-
-        try {
-          const response = await fetch(imageUrl, {
-            signal: controller.signal,
-            headers: {
-              "User-Agent":
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
-              Accept: "image/webp,image/apng,image/*,*/*;q=0.8",
-            },
-            redirect: "follow",
-          });
-
-          clearTimeout(timeoutId);
+        let lastError;
+        for (let attempt = 1; attempt <= retries; attempt++) {
+          try {
+            const response = await fetch(imageUrl, {
+              signal: AbortSignal.timeout(30000),
+              headers: {
+                "User-Agent":
+                  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) 
AppleWebKit/537.36",
+                Accept: "image/webp,image/apng,image/*,*/*;q=0.8",
+              },
+              redirect: "follow",
+            });
+
+            if (!response.ok) {
+              throw new Error(`HTTP error! Status: ${response.status}`);
+            }
 
-          if (!response.ok) {
-            throw new Error(`HTTP error! Status: ${response.status}`);
+            const fd = await fs.open(buildOutputPath, "w");
+            await pipeline(response.body, fd.createWriteStream());
+            lastError = null;
+            break;
+          } catch (fetchError) {
+            lastError = fetchError;
+
+            // Clean up potentially corrupted file
+            try {
+              await fs.unlink(buildOutputPath);
+            } catch {
+              // Ignore if file doesn't exist
+            }
           }
+        }
 
-          const buffer = await response.arrayBuffer();
-          await fs.writeFile(buildOutputPath, Buffer.from(buffer));
-        } catch (fetchError) {
-          clearTimeout(timeoutId);
-          throw fetchError;
+        if (lastError) {
+          throw lastError;
         }
       }
 
@@ -108,142 +129,179 @@ module.exports = function (_context) {
     }
   }
 
-  async function processJSFiles(outDir) {
-    console.log("Processing JS files for external images...");
+  async function pLimit(concurrency, tasks) {
+    const results = [];
+    const executing = [];
+
+    for (const task of tasks) {
+      const p = Promise.resolve().then(() => task());
+      results.push(p);
+
+      if (concurrency <= tasks.length) {
+        const e = p.then(() => executing.splice(executing.indexOf(e), 1));
+        executing.push(e);
+        if (executing.length >= concurrency) {
+          await Promise.race(executing);
+        }
+      }
+    }
+
+    return Promise.all(results);
+  }
 
+  async function processFiles(dir) {
+    const htmlFiles = [];
     const jsFiles = [];
 
-    async function findJSFiles(dir) {
+    async function findFiles(dir) {
       const entries = await fs.readdir(dir, { withFileTypes: true });
 
       for (const entry of entries) {
         const fullPath = path.join(dir, entry.name);
         if (entry.isDirectory()) {
-          await findJSFiles(fullPath);
+          await findFiles(fullPath);
         } else if (entry.name.endsWith(".js")) {
           jsFiles.push(fullPath);
+        } else if (entry.name.endsWith(".html")) {
+          htmlFiles.push(fullPath);
         }
       }
     }
 
-    await findJSFiles(outDir);
+    await findFiles(dir);
 
-    for (const jsFile of jsFiles) {
-      const content = await fs.readFile(jsFile, "utf8");
-      let modified = false;
-      let newContent = content;
+    console.log(
+      `Collecting images from ${htmlFiles.length} HTML and ${jsFiles.length} 
JS files...`
+    );
+    const allImageUrls = new Set();
 
-      // Look for shield.io and other image URLs with a more comprehensive 
regex
-      const urlPatterns = [
-        /"(https?:\/\/[^"]+\.(png|jpg|jpeg|gif|svg|webp))"/g,
-        /"(https?:\/\/img\.shields\.io\/[^"]+)"/g,
-        /"(https?:\/\/github\.com\/[^"]+\/actions\/workflow[^"]+)"/g,
-        /'(https?:\/\/[^']+\.(png|jpg|jpeg|gif|svg|webp))'/g,
-        /'(https?:\/\/img\.shields\.io\/[^']+)'/g,
-        /'(https?:\/\/github\.com\/[^']+\/actions\/workflow[^']+)'/g,
-      ];
+    await Promise.all([
+      collectImagesFromHtml(htmlFiles, allImageUrls),
+      collectImagesFromJs(jsFiles, allImageUrls),
+    ]);
 
-      const allReplacements = [];
+    console.log(`Downloading ${allImageUrls.size} unique images...`);
+    const downloadTasks = Array.from(allImageUrls).map(
+      (url) => () => downloadImage(url, dir)
+    );
 
-      for (const pattern of urlPatterns) {
-        const matches = Array.from(newContent.matchAll(pattern));
+    const cpuCount = os.cpus().length;
+    const concurrency = Math.min(Math.max(cpuCount * 5, 10), 30);
+    console.log(
+      `Using ${concurrency} concurrent downloads (CPU cores: ${cpuCount})`
+    );
 
-        for (const match of matches) {
-          const imageUrl = match[1];
-          if (!imageUrl) continue;
+    await pLimit(concurrency, downloadTasks);
 
-          try {
-            const localUrl = await downloadImage(imageUrl, outDir);
-            if (localUrl !== imageUrl) {
-              allReplacements.push({
-                original: match[0],
-                replacement: match[0].replace(imageUrl, localUrl),
-              });
-              modified = true;
-            }
-          } catch (error) {
-            console.error(`Error processing URL in JS file: ${error.message}`);
-          }
-        }
-      }
+    console.log("Updating files with local image URLs...");
+    await Promise.all([processHtmlFiles(htmlFiles), processJSFiles(jsFiles)]);
+  }
 
-      // Apply replacements from longest to shortest to avoid partial 
replacements
-      allReplacements.sort((a, b) => b.original.length - a.original.length);
+  async function traverseHtmlFiles(htmlFiles, handler) {
+    for (const htmlFile of htmlFiles) {
+      const html = await fs.readFile(htmlFile, "utf8");
+      const $ = cheerio.load(html);
 
-      for (const { original, replacement } of allReplacements) {
-        newContent = newContent.replace(original, replacement);
-      }
+      const result = handler(htmlFile, $);
 
-      if (modified) {
-        await fs.writeFile(jsFile, newContent);
+      if (result?.shouldWrite) {
+        await fs.writeFile(htmlFile, $.html());
       }
     }
   }
 
-  return {
-    name: "docusaurus-ssr-image-plugin",
+  async function traverseJsFiles(jsFiles, handler) {
+    for (const jsFile of jsFiles) {
+      const content = await fs.readFile(jsFile, "utf8");
 
-    async postBuild({ outDir }) {
-      console.log("Processing HTML files for external images...");
+      const result = handler(content);
 
-      const htmlFiles = [];
+      if (result?.newContent && result.newContent !== content) {
+        await fs.writeFile(jsFile, result.newContent);
+      }
+    }
+  }
 
-      async function findHtmlFiles(dir) {
-        const entries = await fs.readdir(dir, { withFileTypes: true });
+  async function collectImagesFromHtml(htmlFiles, imageUrls) {
+    await traverseHtmlFiles(htmlFiles, (_, $) => {
+      $("img").each((_, el) => {
+        const src = $(el).attr("src");
+        if (src && src.startsWith("http")) {
+          imageUrls.add(src);
+        }
+      });
+    });
+  }
 
-        for (const entry of entries) {
-          const fullPath = path.join(dir, entry.name);
-          if (entry.isDirectory()) {
-            await findHtmlFiles(fullPath);
-          } else if (entry.name.endsWith(".html")) {
-            htmlFiles.push(fullPath);
-          }
+  async function collectImagesFromJs(jsFiles, imageUrls) {
+    await traverseJsFiles(jsFiles, (content) => {
+      for (const pattern of IMAGE_URL_PATTERNS) {
+        const matches = Array.from(content.matchAll(pattern));
+        for (const match of matches) {
+          if (match[1]) imageUrls.add(match[1]);
         }
       }
+    });
+  }
 
-      await findHtmlFiles(outDir);
+  async function processHtmlFiles(htmlFiles) {
+    await traverseHtmlFiles(htmlFiles, (_, $) => {
+      let modified = false;
 
-      for (const htmlFile of htmlFiles) {
-        const html = await fs.readFile(htmlFile, "utf8");
-        let $ = cheerio.load(html);
-        let modified = false;
+      $("img").each((_, img) => {
+        const element = $(img);
+        const imageUrl = element.attr("src");
 
-        const externalImages = $("img").filter((_, el) => {
-          const src = $(el).attr("src");
-          return src && src.startsWith("http");
-        });
+        if (imageUrl && imageUrl.startsWith("http")) {
+          const localUrl = processedImages.get(imageUrl);
+          if (localUrl && localUrl !== imageUrl) {
+            element.attr("src", localUrl);
+            modified = true;
+          }
+        }
+      });
 
-        if (externalImages.length === 0) continue;
+      return { shouldWrite: modified };
+    });
+  }
 
-        const downloadPromises = [];
+  async function processJSFiles(jsFiles) {
+    await traverseJsFiles(jsFiles, (content) => {
+      let newContent = content;
+      const allReplacements = [];
 
-        externalImages.each((_, img) => {
-          const element = $(img);
-          const imageUrl = element.attr("src");
+      for (const pattern of IMAGE_URL_PATTERNS) {
+        const matches = Array.from(newContent.matchAll(pattern));
 
-          if (!imageUrl || !imageUrl.startsWith("http")) return;
+        for (const match of matches) {
+          const imageUrl = match[1];
+          if (!imageUrl) continue;
 
-          downloadPromises.push(
-            downloadImage(imageUrl, outDir)
-              .then((localUrl) => {
-                if (localUrl !== imageUrl) {
-                  element.attr("src", localUrl);
-                  modified = true;
-                }
-              })
-              .catch(() => {})
-          );
-        });
+          const localUrl = processedImages.get(imageUrl);
+          if (localUrl && localUrl !== imageUrl) {
+            allReplacements.push({
+              original: match[0],
+              replacement: match[0].replace(imageUrl, localUrl),
+            });
+          }
+        }
+      }
 
-        await Promise.all(downloadPromises);
+      allReplacements.sort((a, b) => b.original.length - a.original.length);
 
-        if (modified) {
-          await fs.writeFile(htmlFile, $.html());
-        }
+      for (const { original, replacement } of allReplacements) {
+        newContent = newContent.replace(original, replacement);
       }
 
-      // Process JS files to update image references in bundled JavaScript
-      await processJSFiles(outDir);
+      return { newContent };
+    });
+  }
+
+  return {
+    name: "docusaurus-ssr-image-plugin",
+
+    async postBuild({ outDir }) {
+      await processFiles(outDir);
 
       console.log(`Processed ${processedImages.size} external images`);
     },

Reply via email to