This is an automated email from the ASF dual-hosted git repository.
kassiez pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push:
new 9aacaa8c917 [script] Deadlink check script (#1552)
9aacaa8c917 is described below
commit 9aacaa8c9170b7802f2dd9cec6653337be7d3763
Author: zclllyybb <[email protected]>
AuthorDate: Fri Dec 20 16:01:04 2024 +0800
[script] Deadlink check script (#1552)
## Versions
- [ ] dev
- [ ] 3.0
- [ ] 2.1
- [ ] 2.0
## Languages
- [ ] Chinese
- [ ] English
## Docs Checklist
- [ ] Checked by AI
- [ ] Test Cases Built
---
check_all_deadlink.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)
diff --git a/check_all_deadlink.py b/check_all_deadlink.py
new file mode 100644
index 00000000000..8ebdde92d91
--- /dev/null
+++ b/check_all_deadlink.py
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+import os
+from urllib.parse import urlparse
+
+
+def process_md_file(file_path):
+ link_pattern = re.compile(r"\[.*?\]\((.*?)\)")
+ code_block_pattern = re.compile(r"^```.*$")
+
+ with open(file_path, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ lines = content.splitlines()
+ in_code_block = False
+
+ for line_number, line in enumerate(lines, start=1):
+ # Skip codeblocks
+ if code_block_pattern.match(line):
+ in_code_block = not in_code_block
+ continue
+
+ if in_code_block:
+ continue
+
+ links = link_pattern.findall(line)
+
+ for link in links:
+ # Skip urls
+ if (
+ not urlparse(link).scheme
+ and not os.path.isabs(link)
+ and not (link[0] == "#")
+ ):
+ full_path = os.path.normpath(
+ os.path.join(os.path.dirname(file_path), link)
+ )
+
+ # Skip section headers
+ if "#" in full_path:
+ full_path = full_path.split("#", 1)[0]
+
+ if not full_path.endswith(".md") and not
full_path.endswith(".mdx"):
+ full_path += ".md"
+ md_exists = os.path.exists(full_path)
+ mdx_exists = (
+ os.path.exists(full_path[:-3] + ".mdx")
+ if full_path.endswith(".md")
+ else False
+ )
+
+ if not md_exists and not mdx_exists:
+ print(
+ f"Error: File not found for link '{link}' in file
'{file_path}:{line_number}'"
+ )
+
+
+def travel(root_path: str):
+ for root, dirs, files in os.walk(root_path):
+ for file in files:
+ if file.endswith(".md") or file.endswith(".mdx"):
+ md_file_path = os.path.join(root, file)
+ process_md_file(md_file_path)
+
+
+if __name__ == "__main__":
+ # check docs directories
+ travel("docs")
+ travel("i18n")
+ travel("versioned_docs")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]