janhoy commented on a change in pull request #596:
URL: https://github.com/apache/solr/pull/596#discussion_r800984488



##########
File path: dev-tools/scripts/refguide/gen-refguide-redirects.py
##########
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Simple script that converts old refguide page names as of 8.11.1 to the new 
Antora URLs from 9.0
+See input files in folder gen-refguide-redirects/
+
+The old-guide.txt is the plain .adoc names from an 'ls | grep adoc' in old 
ref-guide src folder
+The new-guide.txt is the output from this command from the new repo in the 
'modules' folder:
+    find . | grep adoc | sed 's/\/pages//g' | sed 's/^.\///g'
+The mappings.csv comes from the explicit page renamings sourced from 
spreadsheet
+  
https://docs.google.com/spreadsheets/d/1mwxSpn5Ky7-P4DLFrJGel2h7Il4muTlHmAA-AuRY1rs/edit#gid=982988701
+"""
+
+import os
+import sys
+from pprint import pprint
+sys.path.append(os.path.dirname(__file__))
+import argparse
+
+
+def read_config():
+    parser = argparse.ArgumentParser(description='Convert old refguide page 
names to new')
+    parser.add_argument('--old', required=True, help='Old pagenames file, one 
.adoc filename per line')
+    parser.add_argument('--new', required=True, help='New pagenames file, one 
.adoc filename per line')
+    parser.add_argument('--mapping', required=True, help='Semicolon separated 
from-to file names (adoc)')
+    parser.add_argument('--htaccess', action='store_true', default=False, 
help='Output as htaccess rules')
+    newconf = parser.parse_args()
+    return newconf
+
+
+def out(text):
+    global conf
+    if not conf.htaccess:
+        print(text)
+
+
+def lines_from_file(filename):
+    with open(filename, 'r') as fp:
+        lines = []
+        for line in fp.readlines():
+            if line.startswith("#") or len(line.strip()) == 0:
+                continue
+            lines.append(line.replace(".adoc", ".html").strip())
+        return lines
+
+
+def main():
+    global conf
+    conf = read_config()
+
+    new = {}
+    name_map = {}
+
+    out("Reading config")
+    old = lines_from_file(conf.old)
+    for line in lines_from_file(conf.new):
+        (path, file) = line.split("/")
+        new[file] = line
+    for line in lines_from_file(conf.mapping):
+        (frm, to) = line.split(";")
+        name_map[frm] = to
+
+    # Files in src/old-pages as of 2022-02-04
+    old_pages = ["configuration-apis.html", "configuration-guide.html", 
"controlling-results.html", "deployment-guide.html", "enhancing-queries.html", 
"field-types.html", "fields-and-schema-design.html", "getting-started.html", 
"indexing-data-operations.html", "installation-deployment.html", 
"monitoring-solr.html", "query-guide.html", "scaling-solr.html", 
"schema-indexing-guide.html", "solr-concepts.html", "solr-schema.html", 
"solrcloud-clusters.html", "user-managed-clusters.html"]
+
+    result = {}
+    old_guide = []
+    failed = {}
+    regex_new = {}
+    out("Converting...")
+    for frm in old:
+        if frm in new:
+            (subpath, name) = new[frm].split("/")
+            if subpath not in regex_new:
+                regex_new[subpath] = []
+            regex_new[subpath].append(name.split(".html")[0])
+        elif frm in name_map:
+            new_name = name_map[frm]
+            if new_name in new:
+                result[frm] = new[new_name]
+            elif new_name.startswith("/guide/"):
+                result[frm] = new_name[7:]
+            elif new_name == "_8_11":
+                old_guide.append(frm.split(".html")[0])
+            else:
+                failed[frm] = "Mapped value %s not in new guide" % new_name
+        elif frm in old_pages:
+            failed[frm] = "Not yet mapped (in src/old-pages)"
+        else:
+            failed[frm] = "404"
+
+    if conf.htaccess:
+        print("# Existing pages moved to sub path")
+        for key in regex_new:
+            print("RedirectMatch ^/guide/(%s)\.html /guide/%s/$1.html" % 
("|".join(regex_new[key]), key))

Review comment:
       Multi layers of .htaccess is hard to get right. The Antora folders will 
all reside in svn in the folder pointed to by 
`__root/docs.solr.apache.org/guide`, so I don't even know if httpd will execute 
.htaccess from there. If httpd finds a `.htaccess` file on a deep level, it 
will use that one instead of the topmost one, and if you try to inherit, then 
all the rules form top-level stop working.
   
   So I think we have to put all rules in our main htaccess file, including the 
`latest` -> `9_0` one. At the end, we'll simply have to try and see what 
happens.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to