Many years ago we in effect moved maintenance of the typedefs list for pgindent into the buildfarm client. The reason was that there were a number of typedefs that were platform dependent, so we wanted to have coverage across a number of platforms to get a comprehensive list.

Lately, this has caused some dissatisfaction, with people wanting the logic for this moved back into core code, among other reasons so we're not reliant on one person - me - for changes. I share this dissatisfaction. Indeed, IIRC the use of the buildfarm was originally intended as something of a stopgap. Still, we do need to multi-platform support.

Attached is an attempt to thread this needle. The core is a new perl module that imports the current buildfarm client logic. The intention is that once we have this, the buildfarm client will switch to using the module (if found) rather than its own built-in logic. There is precedent for this sort of arrangement (AdjustUpgrade.pm). Accompanying the new module is a standalone perl script that uses the new module, and replaces the current shell script (thus making it more portable).

One thing this is intended to provide for is getting typedefs for non-core code such as third party extensions, which isn't entirely difficult (<https://adpgtech.blogspot.com/2015/05/running-pgindent-on-non-core-code-or.html>) but it's not as easy as it should be either.

Comments welcome.


cheers


andrew

--
Andrew Dunstan
EDB: https://www.enterprisedb.com
diff --git a/src/tools/find_typedef b/src/tools/find_typedef
deleted file mode 100755
index 24e9b76651..0000000000
--- a/src/tools/find_typedef
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/sh
-
-# src/tools/find_typedef
-
-# This script attempts to find all typedef's in the postgres binaries
-# by using 'objdump' or local equivalent to print typedef debugging symbols.
-# We need this because pgindent needs a list of typedef names.
-#
-# For this program to work, you must have compiled all code with
-# debugging symbols.
-#
-# We intentionally examine all files in the targeted directories so as to
-# find both .o files and executables.  Therefore, ignore error messages about
-# unsuitable files being fed to objdump.
-#
-# This is known to work on Linux and on some BSDen, including macOS.
-#
-# Caution: on the platforms we use, this only prints typedefs that are used
-# to declare at least one variable or struct field.  If you have say
-# "typedef struct foo { ... } foo;", and then the structure is only ever
-# referenced as "struct foo", "foo" will not be reported as a typedef,
-# causing pgindent to indent the typedef definition oddly.  This is not a
-# huge problem, since by definition there's just the one misindented line.
-#
-# We get typedefs by reading "STABS":
-#    http://www.informatik.uni-frankfurt.de/doc/texi/stabs_toc.html
-
-
-if [ "$#" -eq 0 -o ! -d "$1" ]
-then	echo "Usage:  $0 postgres_binary_directory [...]" 1>&2
-	exit 1
-fi
-
-for DIR
-do	# if objdump -W is recognized, only one line of error should appear
-	if [ `objdump -W 2>&1 | wc -l` -eq 1 ]
-	then	# Linux
-		objdump -W "$DIR"/* |
-		egrep -A3 '\(DW_TAG_typedef\)' |
-		awk ' $2 == "DW_AT_name" {print $NF}'
-	elif [ `readelf -w 2>&1 | wc -l` -gt 1 ]
-	then	# FreeBSD, similar output to Linux
-		readelf -w "$DIR"/* |
-		egrep -A3 '\(DW_TAG_typedef\)' |
-		awk ' $1 == "DW_AT_name" {print $NF}'
-	fi
-done |
-grep -v ' ' | # some typedefs have spaces, remove them
-sort |
-uniq |
-# these are used both for typedefs and variable names
-# so do not include them
-egrep -v '^(date|interval|timestamp|ANY)$'
diff --git a/src/tools/find_typedefs/PostgreSQL/FindTypedefs.pm b/src/tools/find_typedefs/PostgreSQL/FindTypedefs.pm
new file mode 100644
index 0000000000..4f8167c824
--- /dev/null
+++ b/src/tools/find_typedefs/PostgreSQL/FindTypedefs.pm
@@ -0,0 +1,238 @@
+############################################################################
+#
+# PostgreSQL/FindTypedefs.pm
+#
+# Module providing a function to find typedefs
+#
+# Copyright (c) 2021-2024, PostgreSQL Global Development Group
+#
+############################################################################
+
+package PostgreSQL::FindTypedefs;
+
+use strict;
+use warnings FATAL => 'all';
+
+use Exporter qw(import);
+our @EXPORT = qw(typedefs);
+
+use Config;
+use File::Find;
+use Scalar::Util qw(reftype);
+
+# Returns a reference to a sorted array of typedef names
+#
+# Arguments are given as a hash. recognized names are:
+#    binloc - where to find binary files. Required.
+#    srcdir - where to find source files. Required.
+#    msvc - boolean for whether we are using MSVC. Optional, default false.
+#    hostopt - --host= setting if we are cross-compiling. Optional, default "".
+#
+# If binloc is given as an arrayref instead of as a scalar, it is taken
+# as a list of binary files to be examined rather than as a path to be
+# explored using File::Find / glob().
+#
+# If binloc is a scalar, then on MacOs it's the path to the root of the
+# build directory, where we will look at the .o files. Everywhere else it
+# needs to be the root of an installation, with bin and lib subdirectories,
+# where we will examine built executables and library files.
+#
+sub typedefs
+{
+	my %args = @_;
+
+	my $binloc = $args{binloc} || die "no binloc specified";
+	my $srcdir = $args{srcdir} || die "no srcdir specified";
+	my $using_msvc = $args{msvc} || 0;
+	my $hostopt = $args{hostopt} || "";
+
+	# work around the fact that ucrt/binutils objdump is far slower
+	# than the one in msys/binutils
+	local $ENV{PATH} = $ENV{PATH};
+	$ENV{PATH} = "/usr/bin:$ENV{PATH}" if $Config{osname} eq 'msys';
+
+	my $hostobjdump = $hostopt ? "$hostopt-objdump" : "";
+	my $objdump = 'objdump';
+	my $sep = $using_msvc ? ';' : ':';
+
+	# if we have a hostobjdump, find out which of it and objdump is in the path
+	foreach my $p (split(/$sep/, $ENV{PATH}))
+	{
+		last unless $hostobjdump;
+		last if (-e "$p/objdump" || -e "$p/objdump.exe");
+		if (-e "$p/$hostobjdump" || -e "$p/$hostobjdump.exe")
+		{
+			$objdump = $hostobjdump;
+			last;
+		}
+	}
+	my @err = `$objdump -W 2>&1`;
+	my @readelferr = `readelf -w 2>&1`;
+	my $using_osx = (`uname` eq "Darwin\n");
+	my @testfiles;
+	my %syms;
+	my @dumpout;
+	my @flds;
+
+	if ((reftype($binloc) || "") eq 'ARRAY')
+	{
+		@testfiles = @$binloc;
+	}
+	elsif ($using_osx)
+	{
+		# On OS X, we need to examine the .o files
+		# exclude ecpg/test, which pgindent does too
+		my $obj_wanted = sub {
+			/^.*\.o\z/s
+			  && !($File::Find::name =~ m!/ecpg/test/!s)
+			  && push(@testfiles, $File::Find::name);
+		};
+
+		File::Find::find($obj_wanted, $binloc);
+	}
+	else
+	{
+		# Elsewhere, look at the installed executables and shared libraries
+		@testfiles = (
+			glob("$binloc/bin/*"),
+			glob("$binloc/lib/*"),
+			glob("$binloc/lib/postgresql/*")
+		);
+	}
+
+	foreach my $bin (@testfiles)
+	{
+		next if $bin =~ m!bin/(ipcclean|pltcl_)!;
+		next unless -f $bin;
+		next if -l $bin;                        # ignore symlinks to plain files
+		next if $bin =~ m!/postmaster.exe$!;    # sometimes a copy not a link
+
+		if ($using_osx)
+		{
+			@dumpout = `dwarfdump $bin 2>/dev/null`;
+			@dumpout = _dump_filter(\@dumpout, 'TAG_typedef', 2);
+			foreach (@dumpout)
+			{
+				## no critic (RegularExpressions::ProhibitCaptureWithoutTest)
+				@flds = split;
+				if (@flds == 3)
+				{
+					# old format
+					next unless ($flds[0] eq "AT_name(");
+					next unless ($flds[1] =~ m/^"(.*)"$/);
+					$syms{$1} = 1;
+				}
+				elsif (@flds == 2)
+				{
+					# new format
+					next unless ($flds[0] eq "DW_AT_name");
+					next unless ($flds[1] =~ m/^\("(.*)"\)$/);
+					$syms{$1} = 1;
+				}
+			}
+		}
+		elsif (@err == 1)    # Linux and sometimes windows
+		{
+			my $cmd = "$objdump -Wi $bin 2>/dev/null";
+			@dumpout = `$cmd`;
+			@dumpout = _dump_filter(\@dumpout, 'DW_TAG_typedef', 3);
+			foreach (@dumpout)
+			{
+				@flds = split;
+				next unless (1 < @flds);
+				next
+				  if (($flds[0] ne 'DW_AT_name' && $flds[1] ne 'DW_AT_name')
+					|| $flds[-1] =~ /^DW_FORM_str/);
+				$syms{ $flds[-1] } = 1;
+			}
+		}
+		elsif (@readelferr > 10)
+		{
+
+			# FreeBSD, similar output to Linux
+			my $cmd = "readelf -w $bin 2>/dev/null";
+			@dumpout = ` $cmd`;
+			@dumpout = _dump_filter(\@dumpout, 'DW_TAG_typedef', 3);
+
+			foreach (@dumpout)
+			{
+				@flds = split;
+				next unless (1 < @flds);
+				next if ($flds[0] ne 'DW_AT_name');
+				$syms{ $flds[-1] } = 1;
+			}
+		}
+		else
+		{
+			@dumpout = `$objdump --stabs $bin 2>/dev/null`;
+			foreach (@dumpout)
+			{
+				@flds = split;
+				next if (@flds < 7);
+				next if ($flds[1] ne 'LSYM' || $flds[6] !~ /([^:]+):t/);
+				## no critic (RegularExpressions::ProhibitCaptureWithoutTest)
+				$syms{$1} = 1;
+			}
+		}
+	}
+	my @badsyms = grep { /\s/ } keys %syms;
+	push(@badsyms, 'date', 'interval', 'timestamp', 'ANY');
+	delete @syms{@badsyms};
+
+	my @goodsyms = sort keys %syms;
+	my $foundsyms = [];
+
+	my %foundwords;
+
+	my $setfound = sub {
+
+		# $_ is the name of the file being examined
+		# its directory is our current cwd
+
+		return unless (-f $_ && /^.*\.[chly]\z/);
+
+		open(my $srcfile, '<', $_) || die "opening $_: $!";
+		local $/ = undef;
+		my $src = <$srcfile>;
+		close($srcfile);
+
+		# strip C comments
+		# Use a simple pattern rather than the recipe in perlfaq6.
+		# We don't need to keep the quoted string values anyway, and
+		# on some platforms the complex regex causes perl to barf and crash.
+		$src =~ s{/\*.*?\*/}{}gs;
+
+		foreach my $word (split(/\W+/, $src))
+		{
+			$foundwords{$word} = 1;
+		}
+	};
+
+	File::Find::find($setfound, $srcdir);
+
+	foreach my $sym (@goodsyms)
+	{
+		push(@$foundsyms, $sym) if exists $foundwords{$sym};
+	}
+
+	return $foundsyms;
+}
+
+# private routine, poor man's egrep -A
+sub _dump_filter
+{
+	my ($lines, $tag, $context) = @_;
+	my @output;
+	while (@$lines)
+	{
+		my $line = shift @$lines;
+		if (index($line, $tag) > -1)
+		{
+			push(@output, splice(@$lines, 0, $context));
+		}
+	}
+	return @output;
+}
+
+1;
+
diff --git a/src/tools/find_typedefs/find_typedef b/src/tools/find_typedefs/find_typedef
new file mode 100755
index 0000000000..8f187d1b47
--- /dev/null
+++ b/src/tools/find_typedefs/find_typedef
@@ -0,0 +1,90 @@
+#!usr/bin/perl
+
+# src/tools/find_typedefs/find_typedef
+
+# wrapper around PostgreSQL::FindTypedefs::typedefs()
+
+# This script attempts to find all typedef's in the postgres binaries
+# by using 'objdump' or local equivalent to print typedef debugging symbols.
+# We need this because pgindent needs a list of typedef names.
+#
+# For this program to work, you must have compiled all code with
+# debugging symbols.
+
+#
+# We intentionally examine all files in the targeted directories so as to
+# find both .o files and executables.  Therefore, ignore error messages about
+# unsuitable files being fed to objdump.
+#
+# This is known to work on Linux and on some BSDen, including macOS, as well
+# as on Windows
+#
+# Caution: on the platforms we use, this only prints typedefs that are used
+# to declare at least one variable or struct field.  If you have say
+# "typedef struct foo { ... } foo;", and then the structure is only ever
+# referenced as "struct foo", "foo" will not be reported as a typedef,
+# causing pgindent to indent the typedef definition oddly.  This is not a
+# huge problem, since by definition there's just the one misindented line.
+#
+
+use strict;
+use warnings FATAL => 'all';
+
+use Config;
+use Getopt::Long;
+use FindBin qw($Bin);
+use lib $Bin;
+use PostgreSQL::FindTypedefs;
+
+my $osname = $Config{osname};
+
+my $srcdir = "$Bin/../../..";
+my $bindir;
+my @files;
+my $hostprefix="";
+my $msvc;
+my $help;
+
+GetOptions(
+	"srcdir=s" => \$srcdir,
+	"bindir=s" => \$bindir,
+	"file=s" => \@files,
+	"hostprefix=s" => \$hostprefix,
+	"msvc" => \$msvc,
+	"help" => \$help,
+   );
+
+if ($help)
+{
+	do_help();
+	exit 0;
+}
+
+die "cannot specify both file and bindir options" if ($bindir && @files);
+die "need either bindir or file options" unless ($bindir || @files);
+
+my %process_opts = ( srcdir => $srcdir );
+$process_opts{binloc} = $bindir ? $bindir : \@files;
+$process_opts{msvc} = $msvc if $msvc;
+$process_opts{hostopt} = $hostprefix if $hostprefix;
+
+print join("\n", @{typedefs(%process_opts)}), "\n";
+
+exit;
+
+####################
+
+sub do_help
+{
+	print qq!
+Usage: $0 [options]
+   --srcdir=DIR [default source tree where find_typedef is found]
+   --bindir=DIR
+   --file=FILE
+   --hostprefix=CROSS_COMPILER_NAME_PREFIX
+   --msvc
+
+The --file option can be used multiple times. Exactly one of --file and
+--bindir must be used.
+!;
+}

Reply via email to