This is an automated email from the ASF dual-hosted git repository. moonming pushed a commit to branch feat/seo-prune-doc-versions in repository https://gitbox.apache.org/repos/asf/apisix-website.git
commit 942f017eaa022855681c775c11822d3e19d8e257 Author: Ming Wen <[email protected]> AuthorDate: Mon Jun 22 11:37:59 2026 +0800 fix(seo): publish only the latest version of each sub-project's docs The docs sync pulled *every* release branch/tag for the non-apisix sub-projects (ingress-controller, helm-chart, docker, plugin runners), so the sitemap carried ~800 sub-project URLs including ancient versions (ingress 0.4.0-2.0.0, docker apisix-2.10.x) and their thin /tags/ pages. apisix itself is already curated via config/apisix-versions.js; the sub-projects were not. - sync-docs.js: keep only the newest released version of each sub-project (SUBPROJECT_VERSIONS_TO_KEEP=1; bump for a wider window). The latest is served unversioned at /docs/<project>/ and indexed; 'next' stays robots-disallowed. Old versions remain in each project's source repo. - update-sitemap-loc.js: the version-exclusion regex only matched 2-part versions (apisix 3.14); it missed 3-part semver (ingress 2.0.0) and prefixed (docker apisix-2.10.0), which is why sub-project versioned docs leaked into the sitemap. Broaden it to cover all three forms. --- scripts/sync-docs.js | 12 +++++++++++- scripts/update-sitemap-loc.js | 6 ++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/scripts/sync-docs.js b/scripts/sync-docs.js index b0863aa16b0..9974e876a7b 100644 --- a/scripts/sync-docs.js +++ b/scripts/sync-docs.js @@ -18,6 +18,14 @@ const websitePath = '../doc'; const gitMap = {}; const projectReleases = {}; +// SEO: only the newest N released versions of each non-apisix sub-project +// (ingress-controller, helm-chart, docker, *-plugin-runner) are built and +// published. Publishing every historical release bloated the sitemap with +// hundreds of thin/duplicate pages (e.g. ingress 0.4.0–2.0.0, docker +// apisix-2.10.x) and orphaned 403 landing dirs. apisix itself is curated +// separately in config/apisix-versions.js. Increase this for a wider window. +const SUBPROJECT_VERSIONS_TO_KEEP = 1; + const tasks = new Listr([ { title: 'Start documents sync', @@ -92,7 +100,9 @@ const tasks = new Listr([ .map((release) => (isIngressController ? release.replace('remotes/origin/v', '') : release.replace('remotes/origin/release/', ''))) - .sort((a, b) => semver.compare(semver.coerce(a).version, semver.coerce(b).version)); + .sort((a, b) => semver.compare(semver.coerce(a).version, semver.coerce(b).version)) + // SEO: keep only the newest N released versions (see constant above). + .slice(-SUBPROJECT_VERSIONS_TO_KEEP); } }, })); diff --git a/scripts/update-sitemap-loc.js b/scripts/update-sitemap-loc.js index 92034f86b43..6650ff27c6a 100644 --- a/scripts/update-sitemap-loc.js +++ b/scripts/update-sitemap-loc.js @@ -29,8 +29,10 @@ const sitemapXMLs = [ * pages, also blocked by robots.txt. */ const excludePatterns = [ - // Versioned docs: /docs/<project>/<version>/ where version is digits.digits - /\/docs\/[\w-]+\/\d+\.\d+\//, + // Versioned docs: /docs/<project>/<version>/ — only the unversioned (latest) + // path should be indexed. Matches 2-part (apisix 3.14), 3-part semver + // (ingress 2.0.0), and prefixed (docker apisix-2.10.0) version segments. + /\/docs\/[\w-]+\/(?:[\w-]+-)?\d+\.\d+(?:\.\d+)?\//, // Development "next" docs /\/docs\/[\w-]+\/next\//, // Search pages (blocked by robots.txt)
