This is an automated email from the ASF dual-hosted git repository.
Yilialinn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix-website.git
The following commit(s) were added to refs/heads/master by this push:
new 9e78bab24bb fix(seo): publish only the latest version of each
sub-project's docs (de-bloat sitemap) (#2055)
9e78bab24bb is described below
commit 9e78bab24bbf462736f4f3068a2f9d91f022af6e
Author: Ming Wen <[email protected]>
AuthorDate: Tue Jun 23 10:54:23 2026 +0800
fix(seo): publish only the latest version of each sub-project's docs
(de-bloat sitemap) (#2055)
---
scripts/special-process-v2md.sh | 40 ++++++++++++++++++++++++++++++----------
scripts/sync-docs.js | 12 +++++++++++-
scripts/update-sitemap-loc.js | 8 ++++++--
3 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/scripts/special-process-v2md.sh b/scripts/special-process-v2md.sh
index dfc0e7b5a99..87e53447f0b 100644
--- a/scripts/special-process-v2md.sh
+++ b/scripts/special-process-v2md.sh
@@ -1,19 +1,39 @@
#!/usr/bin/env bash
-# remove <!--\s*markdown-link-check-disable\s*--> and
<!--\s*markdown-link-check-enable\s*-->
-# in /apisix-ingress-controller/references/v2.mdx
-# after synced docs
+# Remove the `markdown-link-check-disable` / `markdown-link-check-enable`
+# comment pairs from every apisix-ingress-controller `references/v2.mdx` that
+# exists after the docs sync — the current/master copy plus whatever versioned
+# copies were built.
+#
+# The built version set is not fixed (see SUBPROJECT_VERSIONS_TO_KEEP in
+# sync-docs.js), so glob for the files that actually exist instead of
+# hardcoding version numbers, which previously broke the build whenever the
+# referenced versions were no longer published.
+set -e
-BASEDIR=$(dirname $0)/..
+BASEDIR=$(dirname "$0")/..
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
- sed -i '/<!--\s*markdown-link-check-disable\s*-->/I,+1d;
/<!--\s*markdown-link-check-enable\s*-->/I,+1d;'
$BASEDIR/doc/docs/apisix-ingress-controller/references/v2.mdx
- sed -i '/<!--\s*markdown-link-check-disable\s*-->/I,+1d;
/<!--\s*markdown-link-check-enable\s*-->/I,+1d;'
$BASEDIR/doc/docs-apisix-ingress-controller_versioned_docs/version-1.7.0/references/v2.mdx
- sed -i '/<!--\s*markdown-link-check-disable\s*-->/I,+1d;
/<!--\s*markdown-link-check-enable\s*-->/I,+1d;'
$BASEDIR/doc/docs-apisix-ingress-controller_versioned_docs/version-1.8.0/references/v2.mdx
+ sed_inplace() { sed -i "$@"; }
elif [[ "$OSTYPE" == "darwin"* ]]; then
- sed -i '' '/<!--\s*markdown-link-check-disable\s*-->/I,+1d;
/<!--\s*markdown-link-check-enable\s*-->/I,+1d;'
$BASEDIR/doc/docs/apisix-ingress-controller/references/v2.mdx
- sed -i '' '/<!--\s*markdown-link-check-disable\s*-->/I,+1d;
/<!--\s*markdown-link-check-enable\s*-->/I,+1d;'
$BASEDIR/doc/docs-apisix-ingress-controller_versioned_docs/version-1.7.0/references/v2.mdx
- sed -i '' '/<!--\s*markdown-link-check-disable\s*-->/I,+1d;
/<!--\s*markdown-link-check-enable\s*-->/I,+1d;'
$BASEDIR/doc/docs-apisix-ingress-controller_versioned_docs/version-1.8.0/references/v2.mdx
+ sed_inplace() { sed -i '' "$@"; }
else
echo "Unsupported OS: $OSTYPE"
exit 1
fi
+
+shopt -s nullglob
+files=(
+ "$BASEDIR"/doc/docs/apisix-ingress-controller/references/v2.mdx
+
"$BASEDIR"/doc/docs-apisix-ingress-controller_versioned_docs/version-*/references/v2.mdx
+)
+shopt -u nullglob
+
+processed=0
+for f in "${files[@]}"; do
+ [ -f "$f" ] || continue
+ sed_inplace '/<!--\s*markdown-link-check-disable\s*-->/I,+1d;
/<!--\s*markdown-link-check-enable\s*-->/I,+1d;' "$f"
+ echo "special-process-v2md: processed $f"
+ processed=$((processed + 1))
+done
+
+echo "special-process-v2md: processed ${processed} file(s)"
diff --git a/scripts/sync-docs.js b/scripts/sync-docs.js
index b0863aa16b0..9974e876a7b 100644
--- a/scripts/sync-docs.js
+++ b/scripts/sync-docs.js
@@ -18,6 +18,14 @@ const websitePath = '../doc';
const gitMap = {};
const projectReleases = {};
+// SEO: only the newest N released versions of each non-apisix sub-project
+// (ingress-controller, helm-chart, docker, *-plugin-runner) are built and
+// published. Publishing every historical release bloated the sitemap with
+// hundreds of thin/duplicate pages (e.g. ingress 0.4.0–2.0.0, docker
+// apisix-2.10.x) and orphaned 403 landing dirs. apisix itself is curated
+// separately in config/apisix-versions.js. Increase this for a wider window.
+const SUBPROJECT_VERSIONS_TO_KEEP = 1;
+
const tasks = new Listr([
{
title: 'Start documents sync',
@@ -92,7 +100,9 @@ const tasks = new Listr([
.map((release) => (isIngressController
? release.replace('remotes/origin/v', '')
: release.replace('remotes/origin/release/', '')))
- .sort((a, b) => semver.compare(semver.coerce(a).version,
semver.coerce(b).version));
+ .sort((a, b) => semver.compare(semver.coerce(a).version,
semver.coerce(b).version))
+ // SEO: keep only the newest N released versions (see constant
above).
+ .slice(-SUBPROJECT_VERSIONS_TO_KEEP);
}
},
}));
diff --git a/scripts/update-sitemap-loc.js b/scripts/update-sitemap-loc.js
index 92034f86b43..b4c772238bc 100644
--- a/scripts/update-sitemap-loc.js
+++ b/scripts/update-sitemap-loc.js
@@ -29,8 +29,12 @@ const sitemapXMLs = [
* pages, also blocked by robots.txt.
*/
const excludePatterns = [
- // Versioned docs: /docs/<project>/<version>/ where version is digits.digits
- /\/docs\/[\w-]+\/\d+\.\d+\//,
+ // Versioned docs: /docs/<project>/<version>/ — only the unversioned (latest)
+ // path should be indexed. Matches 2-part (apisix 3.14), 3-part semver
+ // (ingress 2.0.0), and prefixed (docker apisix-2.10.0) version segments.
+ /\/docs\/[\w-]+\/(?:[\w-]+-)?\d+\.\d+(?:\.\d+)?\//,
+ // Doc tag aggregation pages (low-value, mirrors the /blog/tags/ rule below)
+ /\/docs\/[\w./-]+\/tags\//,
// Development "next" docs
/\/docs\/[\w-]+\/next\//,
// Search pages (blocked by robots.txt)