Changed in v3:
- Makefile.in: added OPT_URLS_HTML_DEPS and a comment

Changed in v2:
- added convenience targets to Makefile for regenerating the .opt.urls
  files, and for running unit tests for the generation code
- parse gdc and gfortran documentation, and create LangUrlSuffix_{lang}
directives for language-specific URLs.
- add documentation to sourcebuild.texi

gcc/ChangeLog:
        * Makefile.in (OPT_URLS_HTML_DEPS): New.
        (regenerate-opt-urls): New target.
        (regenerate-opt-urls-unit-test): New target.
        * doc/options.texi (Option properties): Add UrlSuffix and
        description of regenerate-opt-urls.py.  Add LangUrlSuffix_*.
        * doc/sourcebuild.texi (Anatomy of a Target Back End): Add
        reference to regenerate-opt-urls.py's TARGET_SPECIFIC_PAGES.
        * regenerate-opt-urls.py: New file.

Signed-off-by: David Malcolm <dmalc...@redhat.com>
---
 gcc/Makefile.in            |  16 ++
 gcc/doc/options.texi       |  26 +++
 gcc/doc/sourcebuild.texi   |   4 +
 gcc/regenerate-opt-urls.py | 408 +++++++++++++++++++++++++++++++++++++
 4 files changed, 454 insertions(+)
 create mode 100755 gcc/regenerate-opt-urls.py

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index f284c1387e27..d85953495ce8 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3611,6 +3611,22 @@ $(build_htmldir)/gccinstall/index.html: 
$(TEXI_GCCINSTALL_FILES)
        DESTDIR=$(@D) \
        $(SHELL) $(srcdir)/doc/install.texi2html
 
+# Regenerate the .opt.urls files from the generated html, and from the .opt
+# files.  Doing so requires all languages that have their own HTML manuals
+# to be enabled.
+.PHONY: regenerate-opt-urls
+OPT_URLS_HTML_DEPS = $(build_htmldir)/gcc/Option-Index.html \
+       $(build_htmldir)/gdc/Option-Index.html \
+       $(build_htmldir)/gfortran/Option-Index.html
+
+regenerate-opt-urls: $(srcdir)/regenerate-opt-urls.py $(OPT_URLS_HTML_DEPS)
+       $(srcdir)/regenerate-opt-urls.py $(build_htmldir) $(shell dirname 
$(srcdir))
+
+# Run the unit tests for regenerate-opt-urls.py
+.PHONY: regenerate-opt-urls-unit-test
+regenerate-opt-urls-unit-test: $(OPT_URLS_HTML_DEPS)
+       $(srcdir)/regenerate-opt-urls.py $(build_htmldir) $(shell dirname 
$(srcdir)) --unit-test
+
 MANFILES = doc/gcov.1 doc/cpp.1 doc/gcc.1 doc/gfdl.7 doc/gpl.7 \
            doc/fsf-funding.7 doc/gcov-tool.1 doc/gcov-dump.1 \
           $(if $(filter yes,@enable_lto@),doc/lto-dump.1)
diff --git a/gcc/doc/options.texi b/gcc/doc/options.texi
index 715f0a1479c7..37d7ecc1477d 100644
--- a/gcc/doc/options.texi
+++ b/gcc/doc/options.texi
@@ -597,4 +597,30 @@ This warning option corresponds to @code{cpplib.h} warning 
reason code
 @var{CPP_W_Enum}.  This should only be used for warning options of the
 C-family front-ends.
 
+@item UrlSuffix(@var{url_suffix})
+Adjacent to each human-written @code{.opt} file in the source tree is
+a corresponding file with a @code{.opt.urls} extension.  These files
+contain @code{UrlSuffix} directives giving the ending part of the URL
+for the documentation of the option, such as:
+
+@smallexample
+Wabi-tag
+UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wabi-tag)
+@end smallexample
+
+These URL suffixes are relative to @code{DOCUMENTATION_ROOT_URL}.
+
+There files are generated from the @code{.opt} files and the generated
+HTML documentation by @code{regenerate-opt-urls.py}, and should be
+regenerated when adding new options, via manually invoking
+@code{make regenerate-opt-urls}.
+
+@item LangUrlSuffix_@var{lang}(@var{url_suffix})
+In addition to @code{UrlSuffix} directives, @code{regenerate-opt-urls.py}
+can generate language-specific URLs, such as:
+
+@smallexample
+LangUrlSuffix_D(gdc/Code-Generation.html#index-MMD)
+@end smallexample
+
 @end table
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 26a7e9c35070..9a394b3e2c77 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -813,6 +813,10 @@ options supported by this target (@pxref{Run-time Target, 
, Run-time
 Target Specification}).  This means both entries in the summary table
 of options and details of the individual options.
 @item
+An entry in @file{gcc/regenerate-opt-urls.py}'s TARGET_SPECIFIC_PAGES
+dictionary mapping from target-specific HTML documentation pages
+to the target specific source directory.
+@item
 Documentation in @file{gcc/doc/extend.texi} for any target-specific
 attributes supported (@pxref{Target Attributes, , Defining
 target-specific uses of @code{__attribute__}}), including where the
diff --git a/gcc/regenerate-opt-urls.py b/gcc/regenerate-opt-urls.py
new file mode 100755
index 000000000000..b123fc57c7b9
--- /dev/null
+++ b/gcc/regenerate-opt-urls.py
@@ -0,0 +1,408 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2023 Free Software Foundation, Inc.
+#
+# Script to regenerate FOO.opt.urls files for each FOO.opt in the
+# source tree.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.  */
+
+DESCRIPTION = """
+Parses the generated HTML (from "make html") to locate anchors
+for options, then parses the .opt files within the source tree,
+and generates a .opt.urls in the source tree for each .opt file,
+giving URLs for each option, where it can.
+
+Usage (from build/gcc subdirectory):
+  ../../src/gcc/regenerate-opt-urls.py HTML/gcc-14.0.0/ ../../src
+
+To run unit tests:
+  ../../src/gcc/regenerate-opt-urls.py HTML/gcc-14.0.0/ ../../src --unit-test
+"""
+
+import argparse
+import json
+import os
+from pathlib import Path
+from pprint import pprint
+import sys
+import re
+import unittest
+
+def canonicalize_option_name(option_name):
+    if option_name.endswith('='):
+        option_name = option_name[0:-1]
+    return option_name
+
+
+def canonicalize_url_suffix(url_suffix):
+    """
+    Various options have anchors for both the positive and
+    negative form.  For example -Wcpp has both:
+      'gcc/Warning-Options.html#index-Wno-cpp'
+      'gcc/Warning-Options.html#index-Wcpp'
+
+    Return a canonicalized version of the url_suffix that
+    strips out any "no-" prefixes, for use in deduplication.
+    Note that the resulting url suffix might not correspond to
+    an actual anchor in the HTML.
+    """
+    url_suffix = re.sub('index-Wno-', 'index-W', url_suffix)
+    url_suffix = re.sub('index-fno-', 'index-f', url_suffix)
+    url_suffix = re.sub('_003d$', '', url_suffix)
+    url_suffix = re.sub('-([0-9]+)$', '', url_suffix)
+    return url_suffix
+
+
+class Index:
+    def __init__(self):
+        # Map from language (or None) to map from option name to set of URL 
suffixes
+        self.entries = {}
+
+    def add_entry(self, matched_text, url_suffix, language, verbose=False):
+        # TODO: use language
+        if 'Attributes.html' in url_suffix:
+            return
+        matched_text = canonicalize_option_name(matched_text)
+        if language not in self.entries:
+            self.entries[language] = {}
+        per_lang_entries = self.entries[language]
+        if matched_text in per_lang_entries:
+            # Partition by canonicalized url_suffixes; add the
+            # first url_suffix in each such partition.
+            c_new = canonicalize_url_suffix(url_suffix)
+            for entry in per_lang_entries[matched_text]:
+                c_entry = canonicalize_url_suffix(entry)
+                if c_new == c_entry:
+                    return
+            per_lang_entries[matched_text].add(url_suffix)
+        else:
+            per_lang_entries[matched_text] = set([url_suffix])
+
+    def get_languages(self):
+        return self.entries.keys()
+
+    def get_url_suffixes(self, text, language=None):
+        text = canonicalize_option_name(text)
+        per_lang_entries = self.entries.get(language)
+        if per_lang_entries:
+            return per_lang_entries.get(text)
+
+    def parse_option_index(self, input_filename, language, verbose=False):
+        with open(input_filename) as f:
+            dirname = input_filename.parent.name
+            for line in f:
+                self.parse_html_line_option_index(dirname, line, language, 
verbose)
+
+    def parse_html_line_option_index(self, dirname, line, language, 
verbose=False):
+        if verbose:
+            print(repr(line))
+
+        # Update for this in the GCC website's bin/preprocess 
process_html_file:
+        #   | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \
+        line = line.replace('_002d', '-')
+        line = line.replace('_002a', '*')
+
+        # e.g. <a 
href="Optimize-Options.html#index-fmodulo_002dsched"><code>fmodulo-sched</code></a>
+        m = re.search(r'<a href="([\S]+)"><code>([\S]+)</code></a>', line)
+        if not m:
+            return
+        if verbose:
+            print(m.groups())
+        url_suffix, index_text = m.groups()
+        #print(f'{url_suffix=} {index_text=}')
+        option = '-' + index_text
+
+        # Strip off "no-" prefixes from options
+        if option[:5] == '-Wno-':
+            option = '-W' + option[5:]
+        if option[:5] == '-fno-':
+            option = '-f' + option[5:]
+
+        url_suffix = dirname + '/' + url_suffix
+        self.add_entry(option, url_suffix, language, verbose)
+
+
+class TestParsingIndex(unittest.TestCase):
+    def test_parse_line(self):
+        index = Index()
+        index.parse_html_line_option_index('gcc',
+                                           '<a 
href="Optimize-Options.html#index-fmodulo_002dsched"><code>fmodulo-sched</code></a>',
+                                           None)
+        self.assertEqual(index.get_url_suffixes('-fmodulo-sched'),
+                         {'gcc/Optimize-Options.html#index-fmodulo-sched'})
+
+    def test_negated_flag(self):
+        index = Index()
+        index.parse_html_line_option_index('gcc',
+                                           '<tr><td></td><td valign="top"><a 
href="Static-Analyzer-Options.html#index-fno_002danalyzer"><code>fno-analyzer</code></a>:</td><td>&nbsp;</td><td
 valign="top"><a href="Static-Analyzer-Options.html">Static Analyzer 
Options</a></td></tr>\n',
+                                           None)
+        self.assertEqual(index.get_url_suffixes('-fno-analyzer'), None)
+        self.assertEqual(index.get_url_suffixes('-fanalyzer'),
+                         
{'gcc/Static-Analyzer-Options.html#index-fno-analyzer'})
+
+    def test_negated_warning(self):
+        index = Index()
+        index.parse_html_line_option_index('gcc',
+                                           '<tr><td></td><td valign="top"><a 
href="Warning-Options.html#index-Wno_002dalloca"><code>Wno-alloca</code></a>:</td><td>&nbsp;</td><td
 valign="top"><a href="Warning-Options.html">Warning Options</a></td></tr>\n',
+                                           None)
+        self.assertEqual(index.get_url_suffixes('-Wno-alloca'),
+                         None)
+        self.assertEqual(index.get_url_suffixes('-Walloca'),
+                         {'gcc/Warning-Options.html#index-Wno-alloca'})
+
+    def test_parse_option_index(self):
+        index = Index()
+        index.parse_option_index(INPUT_HTML_PATH / 'gcc/Option-Index.html',
+                                 language=None)
+        self.assertEqual(index.get_url_suffixes('-fmodulo-sched'),
+                         {'gcc/Optimize-Options.html#index-fmodulo-sched'})
+        self.assertEqual(index.get_url_suffixes('-O'),
+                         {'gcc/Optimize-Options.html#index-O'})
+        self.assertEqual(index.get_url_suffixes('-O0'),
+                         {'gcc/Optimize-Options.html#index-O0'})
+        self.assertEqual(index.get_url_suffixes('-Wframe-larger-than='),
+                         
{'gcc/Warning-Options.html#index-Wframe-larger-than_003d'})
+
+        # Check an option with duplicates: '-march'
+        # The url_suffixes will be of the form
+        #  'gcc/HPPA-Options.html#index-march-5',
+        #  'gcc/LoongArch-Options.html#index-march-7',
+        # etc, where the trailing number is, unfortunately, likely to
+        # change from release to release.
+        # Replace them with 'NN' for the purpose of this test:
+        em_arch_url_suffixes = [re.sub('(-[0-9]+)', '-NN', s)
+                                for s in index.get_url_suffixes('-march')]
+        if 0:
+            print(em_arch_url_suffixes)
+        self.assertIn('gcc/ARM-Options.html#index-march-NN', 
em_arch_url_suffixes)
+        self.assertIn('gcc/x86-Options.html#index-march-NN', 
em_arch_url_suffixes)
+
+        self.assertEqual(index.get_url_suffixes('-Wcpp'),
+                         {'gcc/Warning-Options.html#index-Wcpp'})
+
+        self.assertNotEqual(index.get_url_suffixes('-march'), None)
+        self.assertNotEqual(index.get_url_suffixes('-march='), None)
+
+class OptFile:
+    def __init__(self, opt_path, rel_path):
+        """
+        Parse a .opt file.  Similar to opt-gather.awk.
+        """
+        self.rel_path = rel_path
+        assert rel_path.startswith('gcc')
+        # self.filename = os.path.basename(path)
+        self.records = []
+        with open(opt_path) as f:
+            flag = 0
+            for line in f:
+                #print(repr(line))
+                if re.match(r'[ \t]*(;|$)', line):
+                    flag = 0
+                else:
+                    if flag == 0:
+                        self.records.append([line])
+                        flag = 1
+                    else:
+                        self.records[-1].append(line)
+
+# Mapping from target-specific page to subdirectory containing .opt files
+# documented on that page.
+
+TARGET_SPECIFIC_PAGES = {
+    'gcc/AArch64-Options.html' : 'gcc/config/aarch64/',
+    'gcc/AMD-GCN-Options.html' : 'gcc/config/gcn/',
+    'gcc/ARC-Options.html' : 'gcc/config/arc/',
+    'gcc/ARC-Options.html' : 'gcc/config/arc/',
+    'gcc/ARM-Options.html' : 'gcc/config/arm/',
+    'gcc/AVR-Options.html' : 'gcc/config/avr/',
+    'gcc/Adapteva-Epiphany-Options.html' : 'gcc/config/epiphany/',
+    'gcc/Blackfin-Options.html' : 'gcc/config/bfin/',
+    'gcc/C-SKY-Options.html' : 'gcc/config/csky/',
+    'gcc/C6X-Options.html' : 'gcc/config/c6x/',
+    'gcc/CRIS-Options.html' : 'gcc/config/cris/',
+    'gcc/DEC-Alpha-Options.html' : 'gcc/config/alpha/',
+    'gcc/FR30-Options.html' : 'gcc/config/fr30/',
+    'gcc/FRV-Options.html' : 'gcc/config/frv/',
+    'gcc/FT32-Options.html' : 'gcc/config/ft32/',
+    'gcc/H8_002f300-Options.html' : 'gcc/config/h8300/',
+    'gcc/HPPA-Options.html' : 'gcc/config/pa/',
+    'gcc/IA-64-Options.html' : 'gcc/config/ia64/',
+    'gcc/LoongArch-Options.html' : 'gcc/config/loongarch/',
+    'gcc/M32C-Options.html' : 'gcc/config/m32c/',
+    'gcc/M32R_002fD-Options.html' : 'gcc/config/m32r/',
+    'gcc/M680x0-Options.html' : 'gcc/config/m68k/',
+    'gcc/MCore-Options.html' : 'gcc/config/mcore/',
+    'gcc/MIPS-Options.html' : 'gcc/config/mips/',
+    'gcc/MMIX-Options.html' : 'gcc/config/mmix/',
+    'gcc/MN10300-Options.html' : 'gcc/config/mn10300/',
+    'gcc/MSP430-Options.html' : 'gcc/config/msp430/',
+    'gcc/MicroBlaze-Options.html' : 'gcc/config/microblaze/',
+    'gcc/Moxie-Options.html' : 'gcc/config/moxie/',
+    'gcc/NDS32-Options.html' : 'gcc/config/nds32/',
+    'gcc/Nios-II-Options.html' : 'gcc/config/nios2/',
+    'gcc/Nvidia-PTX-Options.html' : 'gcc/config/nvptx/',
+    'gcc/OpenRISC-Options.html' : 'gcc/config/or1k/',
+    'gcc/PDP-11-Options.html' : 'gcc/config/pdp11',
+    'gcc/PRU-Options.html' : 'gcc/config/pru/',
+    'gcc/RISC-V-Options.html' : 'gcc/config/riscv/',
+    'gcc/RL78-Options.html' : 'gcc/config/rl78/',
+    'gcc/RS_002f6000-and-PowerPC-Options.html' : 'gcc/config/rs6000/',
+    'gcc/RX-Options.html' : 'gcc/config/rx/',
+    'gcc/SH-Options.html' : 'gcc/config/sh/',
+    'gcc/SPARC-Options.html' : 'gcc/config/sparc/',
+    'gcc/S_002f390-and-zSeries-Options.html' : 'gcc/config/s390',
+    'gcc/V850-Options.html' : 'gcc/config/vax/',
+    'gcc/VAX-Options.html' : 'gcc/config/v850/',
+    'gcc/Visium-Options.html' : 'gcc/config/visium/',
+    'gcc/Xstormy16-Options.html' : 'gcc/config/stormy16/',
+    'gcc/Xtensa-Options.html' : 'gcc/config/xtensa/',
+    'gcc/eBPF-Options.html' : 'gcc/config/bpf/',
+    'gcc/x86-Options.html' : 'gcc/config/i386/',
+}
+
+def target_specific(url_suffix):
+    for page_prefix, subdir in TARGET_SPECIFIC_PAGES.items():
+        if url_suffix.startswith(page_prefix):
+            return subdir
+
+def filter_urlsuffixes_for_optfile(optfile, url_suffixes):
+    """
+    Filter out target-specific options for the wrong target.
+    """
+    result = set()
+    for url_suffix in url_suffixes:
+        subdir = target_specific(url_suffix)
+        if subdir:
+            if 0:
+                print(f'{optfile.rel_path=}')
+                print(f'{url_suffixes=}')
+                print(f'{subdir=}')
+            if not optfile.rel_path.startswith(subdir):
+                # Skip this
+                continue
+        result.add(url_suffix)
+    return result
+
+
+class TestFiltering(unittest.TestCase):
+    def test_target_specific(self):
+        
self.assertEqual(target_specific('gcc/Preprocessor-Options.html#index-A'),
+                         None)
+        
self.assertEqual(target_specific('gcc/MMIX-Options.html#index-mknuthdiv'),
+                         'gcc/config/mmix/')
+
+    def test_filter(self):
+        s = {'gcc/MIPS-Options.html#index-munaligned-access-1',
+             'gcc/ARM-Options.html#index-munaligned-access'}
+        arm_optfile = OptFile('/dev/null', 'gcc/config/arm/arm.opt')
+        mips_optfile = OptFile('/dev/null', 'gcc/config/mips/mips.opt')
+        self.assertEqual(
+            filter_urlsuffixes_for_optfile(arm_optfile, s),
+            {'gcc/ARM-Options.html#index-munaligned-access'})
+        self.assertEqual(
+            filter_urlsuffixes_for_optfile(mips_optfile, s),
+            {'gcc/MIPS-Options.html#index-munaligned-access-1'})
+
+
+def write_url_file(index, optfile, dstfile):
+    dstfile.write('; Autogenerated by regenerate-opt-urls.py from %s'
+                  ' and generated HTML\n\n'
+                  % optfile.rel_path)
+    for record in optfile.records:
+        opt = '-' + record[0].strip()
+        if 0:
+            dstfile.write('; entry for %s\n' % record)
+            dstfile.write('; opt=%r\n' % opt)
+        url_suffixes_per_lang = {}
+        count = 0
+        for lang in index.get_languages():
+            this_lang_suffixes = index.get_url_suffixes(opt, language=lang)
+            url_suffixes_per_lang[lang] = this_lang_suffixes
+            if this_lang_suffixes:
+                count += len(this_lang_suffixes)
+        if not count:
+            continue
+        directives = []
+        for lang in index.get_languages():
+            if lang:
+                directive = 'LangUrlSuffix_%s for %r' % (lang, opt[1:])
+            else:
+                directive = 'UrlSuffix for %r' % opt[1:]
+            url_suffixes = url_suffixes_per_lang[lang]
+            if 0:
+                dstfile.write('; lang=%r url_suffixes=%r\n' % (lang, 
url_suffixes))
+            if url_suffixes:
+                url_suffixes = filter_urlsuffixes_for_optfile(optfile, 
url_suffixes)
+                if url_suffixes:
+                    if len(url_suffixes) == 1:
+                        if lang:
+                            directives.append('LangUrlSuffix_%s(%s)' % (lang, 
list(url_suffixes)[0]))
+                        else:
+                            directives.append('UrlSuffix(%s)' % 
list(url_suffixes)[0])
+                    else:
+                        dstfile.write('; skipping %s due to multiple URLs:\n'
+                                      % directive)
+                        for u in sorted(url_suffixes):
+                            dstfile.write(';   duplicate: %r\n' % u)
+                else:
+                    dstfile.write('; skipping %s due to finding no URLs\n'
+                                  % directive)
+        if directives:
+            dstfile.write('%s\n' % opt[1:])
+            dstfile.write(' '.join(directives) + '\n')
+        dstfile.write('\n')
+
+
+def main(args):
+    index = Index()
+    index.parse_option_index(args.base_html_dir / 'gcc/Option-Index.html',
+                             language=None)
+    index.parse_option_index(args.base_html_dir / 'gdc/Option-Index.html',
+                             language='D')
+    index.parse_option_index(args.base_html_dir / 'gfortran/Option-Index.html',
+                             language='Fortran')
+    if 0:
+        pprint(index.entries)
+    for root, dirs, files in os.walk(args.src_gcc_dir):
+        for f in files:
+            if f.endswith('.opt'):
+                opt_path = os.path.join(root, f)
+                rel_path = os.path.relpath(opt_path, args.src_gcc_dir)
+                optfile = OptFile(opt_path, rel_path)
+                if 0:
+                    pprint(optfile.path)
+                    pprint(optfile.records)
+                dstname = f + '.urls'
+                urlfile = os.path.join(root, dstname)
+                with open(urlfile, 'w') as dstfile:
+                    write_url_file(index, optfile, dstfile)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=DESCRIPTION,
+                                     
formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('base_html_dir', type=Path)
+    parser.add_argument('src_gcc_dir', type=Path)
+    parser.add_argument('--unit-test', action='store_true')
+    args = parser.parse_args()
+
+    if args.unit_test:
+        INPUT_HTML_PATH = args.base_html_dir
+        unittest.main(argv=[sys.argv[0], '-v'])
+    else:
+        main(args)
-- 
2.26.3

Reply via email to