Hi, On Tue, 7 Oct 2025 at 16:12, Nazir Bilal Yavuz <[email protected]> wrote: > > The Meson build did not include tab and non-breaking space checks for > the docs. The attached patch adds these checks and includes a few > related improvements.
I have updated v6 to use a stamp file, as Andres suggested [1], to ensure a dependency between the syntax check and the postgres-full.xml file in the meson build. [1] https://postgr.es/m/tcjetkmnm4vtuyxakqvkqokvow6csjokdwwtplc5nl4zbpyjoo%40jjfhsuqa6fno -- Regards, Nazir Bilal Yavuz Microsoft
From 2549adca5facd392d68545c1a61b10e1cf8d4bde Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <[email protected]> Date: Tue, 7 Oct 2025 15:18:03 +0300 Subject: [PATCH v6] Improve docs syntax checking Move the checks out of the Makefile into a perl script that can be called from both the Makefile and meson.build. The set of files checked is simplified, so it is just all the sgml and xsl files found in docs/src/sgml directory tree. Along the way make some adjustments to .cirrus.tasks.yml to support this better in CI. Also ensure that the checks are run while generating the postgres-full.xml. Author: Nazir Bilal Yavuz <[email protected]> Co-Author: Andrew Dunstan <[email protected]> --- doc/src/sgml/Makefile | 17 +--- doc/src/sgml/docbook-test.sgml | 9 ++ doc/src/sgml/meson.build | 39 +++++++- doc/src/sgml/sgml_syntax_check.pl | 160 ++++++++++++++++++++++++++++++ .cirrus.tasks.yml | 4 + 5 files changed, 214 insertions(+), 15 deletions(-) create mode 100644 doc/src/sgml/docbook-test.sgml create mode 100755 doc/src/sgml/sgml_syntax_check.pl diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile index b53b2694a6b..9b6f65ad215 100644 --- a/doc/src/sgml/Makefile +++ b/doc/src/sgml/Makefile @@ -69,6 +69,7 @@ ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg) # files into one big file). This helps tools that don't understand # vpath builds (such as dbtoepub). postgres-full.xml: postgres.sgml $(ALL_SGML) + $(PERL) $(srcdir)/sgml_syntax_check.pl --srcdir $(srcdir) $(XMLLINT) $(XMLINCLUDE) --output $@ --noent --valid $< @@ -200,8 +201,8 @@ MAKEINFO = makeinfo ## # Quick syntax check without style processing -check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp - $(XMLLINT) $(XMLINCLUDE) --noout --valid $< +check: postgres.sgml $(ALL_SGML) + $(PERL) $(srcdir)/sgml_syntax_check.pl --xmllint "$(XMLLINT)" --xmllint_input $< --srcdir $(srcdir) ## @@ -261,18 +262,6 @@ clean-man: endif # sqlmansectnum != 7 -# tabs are harmless, but it is best to avoid them in SGML files -check-tabs: - @( ! grep ' ' $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \ - (echo "Tabs appear in SGML/XML files" 1>&2; exit 1) - -# Non-breaking spaces are harmless, but it is best to avoid them in SGML files. -# Use perl command because non-GNU grep or sed could not have hex escape sequence. -check-nbsp: - @ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \ - $(wildcard $(srcdir)/*.sgml $(srcdir)/func/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \ - (echo "Non-breaking spaces appear in SGML/XML files" 1>&2; exit 1) - ## ## Clean ## diff --git a/doc/src/sgml/docbook-test.sgml b/doc/src/sgml/docbook-test.sgml new file mode 100644 index 00000000000..242a52676e0 --- /dev/null +++ b/doc/src/sgml/docbook-test.sgml @@ -0,0 +1,9 @@ +<!-- doc/src/sgml/docbook-test.sgml --> + +<!-- This file is used to check if the DocBook can be found --> + +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" + "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd"> +<book> + <title>DocBook Test</title> +</book> diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build index 6ae192eac68..e5b7e58f6ad 100644 --- a/doc/src/sgml/meson.build +++ b/doc/src/sgml/meson.build @@ -96,6 +96,26 @@ xmllint = xmltools_wrapper + [ '--tool', xmllint_bin, '--', ] +sgml_syntax_check = files( + 'sgml_syntax_check.pl' +) + +# Create a stamp file for sgml syntax check and make postgres_full_xml +# depend on it. So, this check will run before the generation of +# postgres_full_xml. +sgml_check_stamp = custom_target('sgml-check-stamp', + output: 'sgml-check-stamp', + depfile: 'sgml-check-stamp.d', + command: [ + perl, sgml_syntax_check, + '--srcdir', meson.current_source_dir(), + '--builddir', meson.current_build_dir(), + '--dep_file', '@DEPFILE@', + '--stamp_file', '@OUTPUT@', + ], + build_by_default: false, +) + # Run validation only once, common to all subsequent targets. While # we're at it, also resolve all entities (that is, copy all included # files into one big file). This helps tools that don't understand @@ -106,7 +126,7 @@ postgres_full_xml = custom_target('postgres-full.xml', depfile: 'postgres-full.xml.d', command: [xmllint, '--nonet', '--noent', '--valid', '--path', '@OUTDIR@', '-o', '@OUTPUT@', '@INPUT@'], - depends: doc_generated, + depends: [doc_generated, sgml_check_stamp], build_by_default: false, ) docs += postgres_full_xml @@ -306,3 +326,20 @@ endif if alldocs.length() != 0 alias_target('alldocs', alldocs) endif + +test( + 'sgml_syntax_check', + perl, + protocol: 'exitcode', + suite: 'doc', + args: [ + sgml_syntax_check, + '--srcdir', meson.current_source_dir(), + '--builddir', meson.current_build_dir(), + '--xmllint', '@0@ --nonet'.format(xmllint_bin.full_path()), + '--xmllint_input', 'postgres.sgml' + ], + depends: doc_generated +) + +testprep_targets += doc_generated diff --git a/doc/src/sgml/sgml_syntax_check.pl b/doc/src/sgml/sgml_syntax_check.pl new file mode 100755 index 00000000000..8748a42cc99 --- /dev/null +++ b/doc/src/sgml/sgml_syntax_check.pl @@ -0,0 +1,160 @@ +#!/usr/bin/perl +# +# Perl script that checks SGML/XSL syntax and formatting issues. +# +# doc/src/sgml/sgml_syntax_check.pl +# +# Copyright (c) 2025, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; +use Getopt::Long; + +use File::Find; + +my $srcdir; +my $builddir; +my $xmllint; +my $xmllint_input; +my $dep_file; +my $stamp_file; + +GetOptions( + 'srcdir:s' => \$srcdir, + 'builddir:s' => \$builddir, + 'xmllint:s' => \$xmllint, + 'xmllint_input:s' => \$xmllint_input, + 'dep_file:s' => \$dep_file, + 'stamp_file:s' => \$stamp_file) or die "$0: wrong arguments"; + +die "$0: --srcdir must be specified\n" unless defined $srcdir; + +my $xmlinclude = "--path . --path $srcdir"; +$xmlinclude .= " --path $builddir" if defined $builddir; + +# Find files to process - all the sgml and xsl files (including in subdirectories) +my @files_to_process; +my @dirs_to_search = ($srcdir); +push @dirs_to_search, $builddir if defined $builddir; +find( + sub { + return unless -f $_; + return if $_ !~ /\.(sgml|xsl)$/; + push @files_to_process, $File::Find::name; + }, + @dirs_to_search,); + +# tabs and non-breaking spaces are harmless, but it is best to avoid them in SGML files +sub check_tabs_and_nbsp +{ + my $errors = 0; + for my $f (@files_to_process) + { + open my $fh, "<:encoding(UTF-8)", $f or die "Can't open $f: $!"; + my $line_no = 0; + while (<$fh>) + { + $line_no++; + if (/\t/) + { + print STDERR "Tab found in $f:$line_no\n"; + $errors++; + } + if (/\xC2\xA0/) + { + print STDERR "$f:$line_no: contains non-breaking space\n"; + $errors++; + } + } + close($fh); + } + + if ($errors) + { + remove_stamp_file(); + die "Tabs and/or non-breaking spaces appear in SGML/XML files\n"; + } +} + +sub test_docbook +{ + my $cmd = "$xmllint $xmlinclude --noout --valid docbook-test.sgml"; + if (system($cmd) != 0) + { + remove_stamp_file(); + + print STDERR "DocBook DTD file can not be found\n"; + # Exit code 77 is used for skipping the test instead of failing it + exit(77); + } +} + +sub run_xmllint +{ + # Check DocBook DTD file before running xmllint + test_docbook(); + + my $cmd = "$xmllint $xmlinclude --noout --valid $xmllint_input"; + my $res = system($cmd); + if ($res != 0) + { + remove_stamp_file(); + die "xmllint validation failed\n"; + } +} + +sub write_dep_file() +{ + if ($dep_file) + { + open my $fh, '>', $dep_file or die "can't open $dep_file: $!"; + + foreach my $file (@files_to_process) + { + print $fh "$dep_file : $file\n"; + } + + close $fh; + } +} + +sub create_stamp_file +{ + # All checks are passed, we can create a stamp file now + if ($stamp_file) + { + # Avoid touching existing stamp file to prevent unnecessary rebuilds + if (!(-f $stamp_file)) + { + open my $fh, '>', $stamp_file + or die "can't open $stamp_file: $!"; + close $fh; + } + + } +} + +sub remove_stamp_file +{ + if ($stamp_file) + { + unlink $stamp_file; + } +} + +# Create a dependency file for meson build +write_dep_file(); + +### Do the checks +check_tabs_and_nbsp(); + +# Run xmllint check only if its arguments are passed. Because generation of +# postgres-full.xml already does xmllint check, we don't want to do this check +# twice. +if ($xmllint && $xmllint_input) +{ + run_xmllint(); +} +### + +create_stamp_file(); diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index eca9d62fc22..701a508e396 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -627,6 +627,9 @@ task: TEST_JOBS: 8 IMAGE: ghcr.io/cirruslabs/macos-runner:sonoma + # Ensure DocBook 4.5 DTD is available for syntax checks + XML_CATALOG_FILES: /opt/local/share/xml/docbook/4.5/catalog.xml + CIRRUS_WORKING_DIR: ${HOME}/pgsql/ CCACHE_DIR: ${HOME}/ccache MACPORTS_CACHE: ${HOME}/macports-cache @@ -641,6 +644,7 @@ task: MACOS_PACKAGE_LIST: >- ccache + docbook-xml-4.5 icu kerberos5 lz4 -- 2.51.0
