From 08eaa8e499a7866bd5d1167faafda7e1c013a115 Mon Sep 17 00:00:00 2001
From: John Morris <john.morris@crunchydata.com>
Date: Thu, 1 Feb 2024 15:54:41 -0800
Subject: [PATCH] Rebased doxygen doc

---
 doc/doxygen/Doxyfile.in    |  71 ++++++++++++++++
 doc/doxygen/doxy_filter.l  | 168 +++++++++++++++++++++++++++++++++++++
 doc/doxygen/meson.build    |  79 +++++++++++++++++
 doc/src/sgml/docguide.sgml |   4 +
 meson.build                |   1 +
 meson_options.txt          |   2 +
 6 files changed, 325 insertions(+)
 create mode 100644 doc/doxygen/Doxyfile.in
 create mode 100644 doc/doxygen/doxy_filter.l
 create mode 100644 doc/doxygen/meson.build

diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
new file mode 100644
index 0000000000..76cf4cdd15
--- /dev/null
+++ b/doc/doxygen/Doxyfile.in
@@ -0,0 +1,71 @@
+################################################################################
+#
+# This file holds Doxygen settings used for creating PostgreSQL Documentation.
+# It only shows non-default values.
+# For a list of config values and what they mean, see the
+# complete Doxyfile shipped with Doxygen.
+#
+# Values surrounded by "@" are filled in by Meson config.
+#
+# Creating DOT graphs is very slow, so graphs are disabled by default.
+# If you want to enable then, configure the build with
+#    meson configure -Ddoxygen_graphs=true
+#
+#################################################################################
+
+# Postgres project info
+PROJECT_NAME           = PostgreSQL
+PROJECT_NUMBER         = @PROJECT_NUMBER@
+PROJECT_BRIEF          = "The world's most advanced open source database"
+PROJECT_LOGO           =
+
+# Where the output files go.
+OUTPUT_DIRECTORY       = @OUTPUT_DIRECTORY@
+CREATE_SUBDIRS         = YES
+STRIP_FROM_PATH        = @STRIP_FROM_PATH@
+
+# Output file format.
+GENERATE_LATEX         = NO
+GENERATE_HTML          = YES
+GENERATE_TREEVIEW      = YES
+FULL_SIDEBAR           = YES
+
+# What the output contains.
+JAVADOC_AUTOBRIEF      = YES
+JAVADOC_BANNER         = YES
+OPTIMIZE_OUTPUT_FOR_C  = YES
+DISTRIBUTE_GROUP_DOC   = YES
+INLINE_SIMPLE_STRUCTS  = YES
+TYPEDEF_HIDES_STRUCT   = YES
+SHOW_INCLUDE_FILES     = NO
+SOURCE_BROWSER         = YES
+STRIP_CODE_COMMENTS    = NO
+
+# Performance
+LOOKUP_CACHE_SIZE      = 3
+NUM_PROC_THREADS       = 0
+
+# Which files to process.
+INPUT                  = @INPUT@
+RECURSIVE              = YES
+EXCLUDE_PATTERNS       = ppport.h c.h postgres.h */test/*
+EXCLUDE_SYMBOLS        = __atribute__
+INPUT_FILTER           = @INPUT_FILTER@
+FILTER_PATTERNS        = *.c *.h
+CLANG_ASSISTED_PARSING = YES
+
+# Grephs.
+HAVE_DOT               = @GRAPHS@
+CLASS_GRAPH            = @GRAPHS@
+COLLABORATION_GRAPH    = @GRAPHS@
+GROUP_GRAPHS           = @GRAPHS@
+DOT_UML_DETAILS        = YES
+INCLUDE_GRAPH          = @GRAPHS@
+INCLUDED_BY_GRAPH      = @GRAPHS@
+CALL_GRAPH             = @GRAPHS@
+CALLER_GRAPH           = @GRAPHS@
+GRAPHICAL_HIERARCHY    = @GRAPHS@
+DIRECTORY_GRAPH        = @GRAPHS@
+DOT_IMAGE_FORMAT       = svg
+INTERACTIVE_SVG        = YES
+DOT_MULTI_TARGETS      = YES
diff --git a/doc/doxygen/doxy_filter.l b/doc/doxygen/doxy_filter.l
new file mode 100644
index 0000000000..3eb7042526
--- /dev/null
+++ b/doc/doxygen/doxy_filter.l
@@ -0,0 +1,168 @@
+ /*******************************************************************************************
+ A Doxygen filter which annotates comments in a C file.
+
+ The goal is to take an existing, non-doxygen comment style and turn it into doxygen comments.
+ It is packaged as a filter. The commented code never gets changed, but doxygen converts
+ the comments "on the fly".
+
+ The idea is to identify all comments in the code while keeping track of a small amount of context from
+ around the comment.  The context allows us to determine:
+    - Is the comment at the beginning of the file?
+    - Is the comment at the global level in the file?
+    - Is the comment inside a struct/union/enum?
+    - Does the comment trail other statements? Or is it in front of other statements. (What about inside?)
+
+ Basically, the filter does the following:
+     - A comment at the start of a file gets the @FILE tag added.
+       The tag is necessary for doxygen to recognize C code.
+       By convention, this first comment describes the overall purpose of the file.
+    -  Doxygen comments are passed on through.
+       Doxygen comments include "/// ..." and /x*** ... /"  and style comments.
+    -  Regular comments at the global level and inside structs/enums/unions are converted into doxygen comments.
+       The assumption is these comments describe fields and entities like procedures, variables and macros.
+    -  Trailing comments are associated with the previous item.
+           "int Foo; // Comment about Foo"  becomes a doxygen comment describing Foo.
+
+ The filter doesn't handle all existing doxygen comments. For example, it doesn't recognize the grouping tags
+     "//{"  and "//}", nor does it passively echo other doxygen constructs.  Also, it has a very limited
+     understanding of C syntax. For now, it is enough to get by.
+
+ TODO? respect conditional compilation, add a File comment if none exists, be robust in not altering existing doxygen comments.
+ ******************************************************************************************************************************/
+%option noyywrap nounput noinput
+%{
+#include <stdbool.h>
+%}
+    // Variables used to track the context around a comment.
+    int lines = 0; // How many lines of code have been processed. Comment lines are not counted at the moment.
+    int level = 0; // Level of nesting within brackets. Used to detect globals.
+    bool trailing = false;  // Is there statement before the comment? If so, we are a trailing comment.
+    bool complexData = false; // Are we inside a global complex data type (struct,union,enum)?
+    char *fileName;  // The name of the file we are filtering.
+
+    // Forward reference. Process comments when they are discovered.
+    static void scanComment(char *comment);
+
+%%
+
+  /* Complex data types */             /* Make note when we are inside one. Finishes with a global semicolon */
+struct|union|enum                      ECHO; if (level == 0) complexData = true;
+";"                                    ECHO; if (level == 0) complexData = false; trailing = true;
+
+  /* Other keywords or identifiers */  /* Eat them up so they don't confused with keywords */
+  [a-zA-Z_$]+[a-zA-Z_$0-9]*            ECHO;
+
+  /* Quoted string literals. */        /* Pass them through. */
+\"([^\\\"]|\\.)*\"                     ECHO;
+
+  /* Character literals */            /* Pass them through. */
+'(\\[tvrnafb\\]|[^\\'])'               ECHO;
+
+   /* Single line comment. */          /* Scan and convert to doxygen as needed. */
+"//".*                                 scanComment(yytext);
+
+   /* Multi line comment. */           /* Scan and convert to doxygen as needed. */
+[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/]    scanComment(yytext);
+
+  /* Multi line #define ... */         /* Skip over #defines since they may contain unbalanced braces. */
+^#define(.*\\\n)*(.*)                  ECHO;
+
+  /* New line. */                      /* Any subsequent comment is leading, and we are no longer at start of file. */
+"\n"                                   ECHO; lines++; trailing = false;  //printf("  lines=%d  level=%d  complex=%d   ", lines, level, complexData);
+
+ /* Track nesting depth. */            /* Are we at the global level or not? Are we in arg list or not? */
+                                       /*   We assume well formed code, so a paranthesis will never match a brace. */
+"{"|"("                                ECHO; level++;
+"}"|")"                                ECHO; level--; if (level == 0) complexData = false; trailing = true;
+
+ /* Anything else */                   /* Pass it through */
+.                                      ECHO;
+
+%%
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+/*
+ * Custom banner character to be removed from comments.
+ * We'll hardcode it to suit postgreSQL, but it should be set through a command line arg.
+ */
+char customBanner = '-';
+
+/*************************************************************************************************
+A simple program which reads a file, updates the comments, and writes to stdout.
+This is intended to be used as a doxygen filter, converting existing comments to doxygen comments.
+**********************************************************************************************/
+int main(int argc, char**argv) {
+
+    // Verify we have a single argument.
+    if (argc != 2) {
+        fprintf(stderr, "Please run as DoxygenFilter <filename>\n");
+        exit(1);
+    }
+
+    // Try to open the file.
+    fileName = argv[1];
+    if (freopen(fileName, "r", stdin) == NULL) {
+        fprintf(stderr, "Unable to open file %s: %s\n", fileName, strerror(errno));
+        exit(1);
+    }
+
+    // Now, parse the file, sending output to stdout.
+    return yylex();
+}
+
+// Output a string.
+static void putstr(char *str) {
+    fputs(str, stdout);
+}
+
+
+/*******************************************************************************************************
+Remove a custom banner from the comment, where a banner is a sequence of two or more special characters.
+*********************************************************************************************************/
+static void removeCustomBanner(char* str, char c) {
+    char *writePtr = str;
+    bool twoInARow = false;
+
+    // Scan across the comment, moving non-banner characters forward.
+    for (char *readPtr=str; *readPtr != '\0'; readPtr++) {
+        twoInARow = (readPtr[0] == c) && (readPtr[1] == c || twoInARow);
+        if (!twoInARow)
+            *writePtr++ = *readPtr;
+    }
+    *writePtr = '\0';
+}
+
+/********************************************************************************************************
+Process comments as they are encountered in the text.
+
+Uses context information gathered during scanning to decide if the comment should be
+converted to a doxygen comment.
+*********************************************************************************************************/
+static void scanComment(char *comment) {
+
+    // Echo the starting part of the comment, either // or /*.
+    putchar(comment[0]); putchar(comment[1]);
+
+    // If at global level or inside a complex data declarations. TODO: but not inside a statement.
+    if (level == 0 || complexData) {
+
+        // Ensure this is a doxygen comment by repeating the 2nd character.
+        putchar(comment[1]);  // Note: adds harmless extra char to existing doxygen comment.
+
+        // If there is a preceding stmt on the line, then associate the doxygen comment with that stmt.
+        if (trailing)
+            putstr("< ");
+
+        // If at the start of the file, add the @FILE tag. TODO: should it be at end of comment?
+        if (lines == 0)
+            printf(" @file %s ", fileName);
+    }
+
+    // Remove custom banners from the comment. A custom banner is some banner other than  //////  or /*******.
+    removeCustomBanner(comment, customBanner);
+
+    // finish outputting the comment. We know the comment is at least two characters.
+    putstr(comment+2);
+}
diff --git a/doc/doxygen/meson.build b/doc/doxygen/meson.build
new file mode 100644
index 0000000000..85dab357e3
--- /dev/null
+++ b/doc/doxygen/meson.build
@@ -0,0 +1,79 @@
+
+# Find doxygen command
+doxygen_exe = find_program('doxygen', required: false)
+
+# Do we want graphs? Make sure we have dot.
+graphs = get_option('doxygen_graphs')
+dot = find_program('dot', required: false)
+
+# Pick a message about graphs to display when generating documents.
+if (graphs and not dot.found())
+    graph_txt = 'Install dot if you wish to create Doxygen graphs.'
+    graphs = false
+elseif (graphs)
+    graph_txt = 'If doxygen takes too long, try disabling graphs. "meson configure -Ddoxygen_graphs=false"'
+else
+    graph_txt = 'Doxygen graphs are disabled. Enable with "meson configure -Ddoxygen_graphs=true"'
+endif
+
+
+# Display a message about graphs when generating documents.
+graph_message = custom_target('graph_message',
+                                    output: 'graph_message',
+                                    command: [python, '-c', 'print(\'' +  graph_txt  + '\')' ],
+                                    build_by_default: false)
+
+# build the doxygen 'C' filter
+doxygen_filter_c = custom_target('doxy_filter_c',
+                               build_by_default: false,
+                               input: 'doxy_filter.l',
+                               output: 'doxy_filter.c',
+                               command: flex_cmd)
+doxygen_filter_exe = executable('doxy_filter',
+                              doxygen_filter_c,
+                              build_by_default: false)
+
+# Source code directories
+doxygen_source_dirs = [
+                       join_paths(meson.source_root(), 'src'),
+                       join_paths(meson.source_root(), 'contrib'),
+                      ]
+
+# Configure the Doxyfile
+doxygen_config = configuration_data()
+doxygen_config.set('INPUT', ' '.join(doxygen_source_dirs))
+doxygen_config.set('OUTPUT_DIRECTORY', meson.current_build_dir())
+doxygen_config.set('PROJECT_NUMBER', pg_version)
+doxygen_config.set('INCLUDE_PATH', ' '.join(postgres_inc_d))
+doxygen_config.set('INPUT_FILTER', doxygen_filter_exe.full_path())
+doxygen_config.set('STRIP_FROM_PATH', meson.source_root())
+if (graphs)
+    doxygen_config.set('GRAPHS', 'YES')
+else
+    doxygen_config.set('GRAPHS', 'NO')
+endif
+
+doxyfile = configure_file(input : 'Doxyfile.in',
+                          output : 'Doxyfile',
+                          configuration : doxygen_config)
+
+# Create a target to generate doxygen html,`
+if (doxygen_exe.found())
+    doxygen_html = custom_target('html',
+                                 input : doxyfile,
+                                 output: 'html',
+                                 command : [doxygen_exe, doxyfile],
+                                 depends: doxygen_filter_exe,
+                                 build_by_default: false)
+
+# or, a dummy target if doxygen is not installed.
+else
+    doxygen_html = custom_target('html',
+                                 output: 'html',
+                                 command : [python, '-c', 'print(\'Please install doxygen to generate documentation\')' ],
+                                 build_by_default: false)
+endif
+
+
+# use "ninja doxygen" to build doxygen
+alias_target('doxygen', [graph_message, doxygen_html])
diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml
index db4bcce56e..3b812b31ec 100644
--- a/doc/src/sgml/docguide.sgml
+++ b/doc/src/sgml/docguide.sgml
@@ -33,6 +33,10 @@
   Additionally, a number of plain-text <filename>README</filename> files can
   be found throughout the <productname>PostgreSQL</productname> source tree,
   documenting various implementation issues.
+  Doxygen output describing <productname>PostgreSQL</productname> functions
+  and data types can be found at <ulink url="https://doxygen.postgresql.org"></ulink>.
+  If desired, Doxygen output can be  generated locally with the
+  <command>ninja doxygen</command> command.
  </para>
 
  <para>
diff --git a/meson.build b/meson.build
index 8ed51b6aae..8ad652a511 100644
--- a/meson.build
+++ b/meson.build
@@ -2902,6 +2902,7 @@ subdir('src/interfaces/libpq/test')
 subdir('src/interfaces/ecpg/test')
 
 subdir('doc/src/sgml')
+subdir('doc/doxygen')
 
 generated_sources_ac += {'': ['GNUmakefile']}
 
diff --git a/meson_options.txt b/meson_options.txt
index 249ecc5ffd..81cbb5bb15 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -27,6 +27,8 @@ option('krb_srvnam', type: 'string', value: 'postgres',
 option('system_tzdata', type: 'string', value: '',
   description: 'Use system time zone data in specified directory')
 
+option('doxygen_graphs', type: 'boolean', value: false,
+   description: 'Include graphs in "ninja doxygen" output. Generating graphs can be very slow.')
 
 # Defaults
 
-- 
2.33.0

