

package FindTypedefs;

use strict;
use warnings;

use File::Find;

## no critic (ProhibitAutomaticExportation)
use Exporter qw(import);
our (@EXPORT, @EXPORT_OK, %EXPORT_TAGS);

@EXPORT = qw(find_typedefs print_typedefs);
%EXPORT_TAGS = ();
@EXPORT_OK = ();

# private routine
# more or less equivalent of egrep -A
sub _dump_filter
{
	my ($lines, $tag, $context) = @_;
	my @output;
	while (@$lines)
	{
		my $line = shift @$lines;
		if (index($line, $tag) > -1)
		{
			push(@output, splice(@$lines, 0, $context));
		}
	}
	return @output;
}

# The main exported routine
#
# The first argument is the top of the source tree.
#
# The second argument is a directory. For everything except OSX it should be
# a directory where postgres is installed (e.g. $installdir for the buildfarm).
# It should have bin and lib subdirectories. On OSX it should instead be the
# top of the build tree, as we need to examine the individual object files.
#
# Note that this assumes there is not a platform-specific subdirectory of
# lib like meson likes to use. (The buildfarm avoids this by specifying
# --libdir=lib to meson setup.)
#
# The third argument, if present, should be the name of the objdump program.
# if building with --host= or the meson equivalent (--cross-file= ?). For
# plain native builds this should normally be unnecessary, and we will just
# call objdump if appropriate.

sub find_typedefs
{
	my $sourcedir = shift;
	my $bindir = shift;
	my $hostobjdump = shift || "";
	
	# work around the fact that ucrt/binutils objdump is far slower
	# than the one in msys/binutils
	local $ENV{PATH} = $ENV{PATH};
	$ENV{PATH} = "/usr/bin:$ENV{PATH}" if `uname -a` eq 'Msys';

	my $objdump = 'objdump';
	my $sep = ($ENV{PATH} =~ /;/) ? ';' : ':';

	# if we have a hostobjdump, find out which of it and objdump is in the path
	foreach my $p (split(/$sep/, $ENV{PATH}))
	{
		last unless $hostobjdump;
		last if (-e "$p/objdump" || -e "$p/objdump.exe");
		if (-e "$p/$hostobjdump" || -e "$p/$hostobjdump.exe")
		{
			$objdump = $hostobjdump;
			last;
		}
	}
	my @err = `$objdump -W 2>&1`;
	my @readelferr = `readelf -w 2>&1`;
	my $using_osx = (`uname` eq "Darwin\n");
	my @testfiles;
	my %syms;
	my @dumpout;
	my @flds;

	if ($using_osx)
	{
		# On OS X, we need to examine the .o files
		# exclude ecpg/test, which pgindent does too
		my $obj_wanted = sub {
			/^.*\.o\z/s
			  && !($File::Find::name =~ m!/ecpg/test/!s)
			  && push(@testfiles, $File::Find::name);
		};

		File::Find::find($obj_wanted, $bindir);
	}
	else
	{
		# Elsewhere, look at the installed executables and shared libraries
		@testfiles = (
			glob("$bindir/bin/*"),
			glob("$bindir/lib/*"),
			glob("$bindir/lib/postgresql/*")
		);
	}
	foreach my $bin (@testfiles)
	{
		next if $bin =~ m!bin/(ipcclean|pltcl_)!;
		next unless -f $bin;
		next if -l $bin;                        # ignore symlinks to plain files
		next if $bin =~ m!/postmaster.exe$!;    # sometimes a copy not a link

		if ($using_osx)
		{
			@dumpout = `dwarfdump $bin 2>/dev/null`;
			@dumpout = _dump_filter(\@dumpout, 'TAG_typedef', 2);
			foreach (@dumpout)
			{
				## no critic (RegularExpressions::ProhibitCaptureWithoutTest)
				@flds = split;
				if (@flds == 3)
				{
					# old format
					next unless ($flds[0] eq "AT_name(");
					next unless ($flds[1] =~ m/^"(.*)"$/);
					$syms{$1} = 1;
				}
				elsif (@flds == 2)
				{
					# new format
					next unless ($flds[0] eq "DW_AT_name");
					next unless ($flds[1] =~ m/^\("(.*)"\)$/);
					$syms{$1} = 1;
				}
			}
		}
		elsif (@err == 1)    # Linux and sometimes windows
		{
			my $cmd = "$objdump -Wi $bin 2>/dev/null";
			@dumpout = `$cmd`;
			@dumpout = _dump_filter(\@dumpout, 'DW_TAG_typedef', 3);
			foreach (@dumpout)
			{
				@flds = split;
				next unless (1 < @flds);
				next
				  if (($flds[0] ne 'DW_AT_name' && $flds[1] ne 'DW_AT_name')
					|| $flds[-1] =~ /^DW_FORM_str/);
				$syms{ $flds[-1] } = 1;
			}
		}
		elsif (@readelferr > 10)
		{
			# FreeBSD, similar output to Linux
			my $cmd = "readelf -w $bin 2>/dev/null";
			@dumpout = ` $cmd`;
			@dumpout = _dump_filter(\@dumpout, 'DW_TAG_typedef', 3);

			foreach (@dumpout)
			{
				@flds = split;
				next unless (1 < @flds);
				next if ($flds[0] ne 'DW_AT_name');
				$syms{ $flds[-1] } = 1;
			}
		}
		else
		{
			@dumpout = `$objdump --stabs $bin 2>/dev/null`;
			foreach (@dumpout)
			{
				@flds = split;
				next if (@flds < 7);
				next if ($flds[1] ne 'LSYM' || $flds[6] !~ /([^:]+):t/);
				## no critic (RegularExpressions::ProhibitCaptureWithoutTest)
				$syms{$1} = 1;
			}
		}
	}
	my @badsyms = grep { /\s/ } keys %syms;
	push(@badsyms, 'date', 'interval', 'timestamp', 'ANY');
	delete @syms{@badsyms};

	my @goodsyms = sort keys %syms;
	my @foundsyms;

	my %foundwords;

	my $setfound = sub {

		# $_ is the name of the file being examined
		# its directory is our current cwd

		return unless (-f $_ && /^.*\.[chly]\z/);

		my $src = file_contents($_);

		# strip C comments
		# It's unnecessary to use the recipe in perlfaq6.
		# We don't need to keep the quoted string values anyway, and
		# on some platforms the complex regex causes perl to barf and crash.
		$src =~ s{/\*.*?\*/}{}gs;

		foreach my $word (split(/\W+/, $src))
		{
			$foundwords{$word} = 1;
		}
	};

	File::Find::find($setfound, $sourcedir);

	foreach my $sym (@goodsyms)
	{
		push(@foundsyms, $sym) if exists $foundwords{$sym};
	}

	return @foundsyms;
}

# utility routine to print the typedefs instead of just returning them.
sub print_typedefs
{
	print "$_\n" foreach find_typedefs(@_);
}
  
1;