This is an automated email from the ASF dual-hosted git repository.

kassiez pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
     new dcbe27f1f1f [Feat]:add a new script to check deadlink (#2346)
dcbe27f1f1f is described below

commit dcbe27f1f1f4ddf6832e1eb26bdb88587444798c
Author: yangon <2689991...@qq.com>
AuthorDate: Thu May 8 16:44:31 2025 +0800

    [Feat]:add a new script to check deadlink (#2346)
---
 check_move_global.py | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/check_move_global.py b/check_move_global.py
new file mode 100644
index 00000000000..57be298346c
--- /dev/null
+++ b/check_move_global.py
@@ -0,0 +1,149 @@
+import argparse
+import subprocess
+import re
+import os
+import sys
+from typing import AnyStr, List
+from urllib.parse import urlparse
+
+move_pairs = []
+deletes = []
+change_detected = False
+search_dirs = ["docs", "i18n", "versioned_docs"]
+
+def is_same_file(path1, path2):
+    return os.path.normpath(path1) == os.path.normpath(path2)
+
+def remove_suffix(text: str, suffix: str):
+    if text.endswith(suffix):
+        return text[: -len(suffix)]
+    return text
+
+def find_nearest_file(file_base, start_dir):
+    """
+    在 start_dir 向上查找最近的 file_base(.md/.mdx),否则全局搜索
+    """
+    cur_dir = start_dir
+    # 向上搜索最多 10 层,避免卡死
+    for _ in range(10):
+        for ext in [".md", ".mdx"]:
+            candidate = os.path.join(cur_dir, file_base + ext)
+            if os.path.exists(candidate):
+                return candidate
+        parent = os.path.dirname(cur_dir)
+        if parent == cur_dir:
+            break
+        cur_dir = parent
+
+    # 全局搜索
+    for base_dir in search_dirs:
+        for root, dirs, files in os.walk(base_dir):
+            for file in files:
+                if (file == file_base + ".md") or (file == file_base + ".mdx"):
+                    return os.path.join(root, file)
+    return None
+
+def process_md_file(file_path):
+    global change_detected
+
+    link_pattern = re.compile(r"\[.*?\]\((.*?)\)")
+    with open(file_path, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    links = link_pattern.findall(content)
+    new_content = content
+
+    for link in links:
+        if not urlparse(link).scheme and not os.path.isabs(link):
+            full_path = 
os.path.normpath(os.path.join(os.path.dirname(file_path), link))
+            if not full_path.endswith(".md") and not 
full_path.endswith(".mdx"):
+                full_path += ".md"
+
+            # 处理 rename 情况
+            for [from_path, to_path] in move_pairs:
+                from_base, from_ext = os.path.splitext(from_path)
+                to_base, to_ext = os.path.splitext(to_path)
+                if (from_ext in [".md", ".mdx", ""] or to_ext in [".md", 
".mdx", ""]) and (from_base == to_base):
+                    continue
+
+                if is_same_file(full_path, from_path):
+                    relative_to_path = os.path.relpath(to_path, 
os.path.dirname(file_path))
+                    relative_to_path = remove_suffix(relative_to_path, ".md")
+                    relative_to_path = remove_suffix(relative_to_path, ".mdx")
+                    print(f"🔄 {file_path}: Updated moved link {link} -> 
{relative_to_path}")
+                    new_content = new_content.replace(f"({link})", 
f"({relative_to_path})")
+                    change_detected = True
+
+            # 处理 delete 情况
+            for deleted_path in deletes:
+                if is_same_file(full_path, deleted_path):
+                    print(f"⚠️ {file_path}: Link to deleted file {link}")
+                    change_detected = True
+
+            # 处理死链修复
+            if not os.path.exists(full_path):
+                # 说明当前 link 是坏的
+                file_base = os.path.basename(link)
+                file_base = remove_suffix(file_base, ".md")
+                file_base = remove_suffix(file_base, ".mdx")
+
+                found_path = find_nearest_file(file_base, 
os.path.dirname(file_path))
+                if found_path:
+                    relative_to_path = os.path.relpath(found_path, 
os.path.dirname(file_path))
+                    relative_to_path = remove_suffix(relative_to_path, ".md")
+                    relative_to_path = remove_suffix(relative_to_path, ".mdx")
+                    print(f"🛠️ {file_path}: Fixed broken link {link} -> 
{relative_to_path}")
+                    new_content = new_content.replace(f"({link})", 
f"({relative_to_path})")
+                    change_detected = True
+                else:
+                    print(f"❌ {file_path}: Could not fix broken link {link}")
+                    change_detected = True
+
+    if new_content != content:
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write(new_content)
+
+def extract_file_changes(git_show_output: List[AnyStr]):
+    print(f"Parsing commit lines...")
+    content = b"".join(git_show_output).decode()
+
+    move_pattern = r"rename from (.+?)\nrename to (.+?)\n"
+    move_matches = re.findall(move_pattern, content, re.DOTALL | re.MULTILINE)
+    print(f"Moved files detected: {len(move_matches)}")
+
+    delete_pattern = r"diff --git a/(\S+) b/\1\ndeleted file mode \d+\nindex 
.+"
+    delete_matches = re.findall(delete_pattern, content, re.DOTALL | 
re.MULTILINE)
+    print(f"Deleted files detected: {len(delete_matches)}")
+
+    global move_pairs
+    global deletes
+    move_pairs = move_matches
+    deletes = delete_matches
+
+def travel(root_path: str):
+    for root, dirs, files in os.walk(root_path):
+        for file in files:
+            if file.endswith(".md") or file.endswith(".mdx"):
+                process_md_file(os.path.join(root, file))
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Fix moved/deleted/broken md 
links for a commit")
+    parser.add_argument("commit_id", type=str, help="Git commit id to check")
+    args = parser.parse_args()
+
+    p = subprocess.Popen(
+        "git show " + args.commit_id,
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    )
+    extract_file_changes(p.stdout.readlines())
+
+    for dir in search_dirs:
+        travel(dir)
+
+    if change_detected:
+        print("❗ Link issues detected and/or fixed.")
+        sys.exit(1)
+    else:
+        print("✅ No issues detected.")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to