On Sun, Jul 09, 2023 at 01:29:58PM -0700, Andrew Hewus Fresh wrote: > Here is a patch to replace perl(1)'s use of syscall(2) with a dispatcher > that will call the libc function instead. > > I have to do some work on style before this is ready to commit, but it > should be ready for some testing. > > I don't currently plan on committing syscall_emulator.c because we need > to regenerate it each time as I'm not sure how to tie it into sys/kern's > `make syscalls` to only do it when things change. > > Looking for tests from folks who use syscall from perl as well as style > suggestions (like how do I correctly sort headers?) and maybe even > ways to enable additional syscalls.
Nits in the perl part. > Index: gnu/usr.bin/perl/gen_syscall_emulator.pl > =================================================================== > RCS file: gnu/usr.bin/perl/gen_syscall_emulator.pl > diff -N gnu/usr.bin/perl/gen_syscall_emulator.pl > --- /dev/null 1 Jan 1970 00:00:00 -0000 > +++ gnu/usr.bin/perl/gen_syscall_emulator.pl 9 Jul 2023 19:42:50 -0000 > @@ -0,0 +1,354 @@ > +#!/usr/bin/perl > +# $OpenBSD$ # > +use v5.36; > + > +# Copyright (c) 2023 Andrew Hewus Fresh <afre...@openbsd.org> > +# > +# Permission to use, copy, modify, and distribute this software for any > +# purpose with or without fee is hereby granted, provided that the above > +# copyright notice and this permission notice appear in all copies. > +# > +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + > +my $includes = '/usr/include'; > + > +# See also /usr/src/sys/kern/syscalls.master > +my %syscalls = parse_syscalls( > + "$includes/sys/syscall.h", > + "$includes/sys/syscallargs.h", > +); > +delete $syscalls{MAXSYSCALL}; # not an actual function > + > +# The ordered list of all the headers we need > +my @headers = qw< > + sys/syscall.h > + stdarg.h > + errno.h > + > + sys/socket.h > + sys/event.h > + sys/futex.h > + sys/ioctl.h > + sys/ktrace.h > + sys/mman.h > + sys/mount.h > + sys/msg.h > + sys/poll.h > + sys/ptrace.h > + sys/resource.h > + sys/select.h > + sys/sem.h > + sys/shm.h > + sys/stat.h > + sys/sysctl.h > + sys/time.h > + sys/uio.h > + sys/wait.h > + > + dirent.h > + fcntl.h > + sched.h > + signal.h > + stdlib.h > + stdio.h > + syslog.h > + tib.h > + time.h > + unistd.h > +>; > + > +foreach my $header (@headers) { > + my $file = "$includes/$header"; > + open my $fh, '<', $file or die "Unable to open $file: $!"; you could just autodie as you don't ever do anything fancy when open fails. > + my $content = do { local $/; readline $fh }; > + close $fh; > + > + # Look for matching syscalls in this header not sure that comment brings anything, or maybe you want a function, as you tend to get fairly long winded. > + foreach my $name (sort keys %syscalls) { > + my $s = $syscalls{$name}; > + my $func_sig = find_func_sig($content, $name, $s); > + > + if (ref $func_sig) { > + die "Multiple defs for $name <$header> <$s->{header}>" > + if $s->{header}; > + $s->{func} = $func_sig; > + $s->{header} = $header; > + } elsif ($func_sig) { > + $s->{mismatched_sig} = "$func_sig <$header>"; > + } > + } > +} > + > +say "/*\n * Generated from gen_syscall_emulator.pl\n */"; > +say "#include <$_>" for @headers; > +print <<"EOL"; > + > +long > +syscall_emulator(int syscall, ...) > +{ > + long ret = 0; > + va_list args; > + va_start(args, syscall); > + > + switch(syscall) { > +EOL > + > +foreach my $name ( > + sort { $syscalls{$a}{id} <=> $syscalls{$b}{id} } keys %syscalls > + ) { > + my %s = %{ $syscalls{$name} }; I never put spaces in this kind of construct, as I find it fairly readable by itself > + > + # Some syscalls we can't emulate, so we comment those out. > + $s{skip} //= "Indirect syscalls not supported" > + if !$s{argtypes} && ($s{args}[-1] || '') eq '...'; > + $s{skip} //= "Mismatched func: $s{mismatched_sig}" > + if $s{mismatched_sig} and not $s{func}; > + $s{skip} //= "No signature found in headers" > + unless $s{header}; > + > + my $ret = $s{ret} eq 'void' ? '' : 'ret = '; > + $ret .= '(long)' if $s{ret} eq 'void *'; > + > + my (@args, @defines); > + my $argname = ''; > + if ($s{argtypes}) { > + if (@{ $s{argtypes} } > 1) { > + @defines = map { > + my $t = $_->{type}; > + my $n = $_->{name}; > + $n = "_$n" if $n eq $name; # link :-/ > + push @args, $n; > + "$t $n = va_arg(args, $t);" > + } @{ $s{argtypes} }; I would make it so that the parameter is indented maybe @defines = map { my $t ... } @{$s{argtypes}}; > + } else { > + if (@{ $s{argtypes} }) { > + $argname = " // " . join ', ', > + map { $_->{name} } > + @{ $s{argtypes} }; > + } > + @args = map { "va_arg(args, $_->{type})" } > + @{ $s{argtypes} }; > + } > + } else { > + @args = @{ $s{args} }; > + > + # If we didn't find args in syscallargs.h but have args > + # we don't know how to write our function. > + $s{skip} //= "Not found in sys/syscallargs.h" > + if @args; > + } > + > + #my $header = $s{header} ? " <$s{header}>" : ''; > + > + my $indent = "\t"; > + say "$indent/* $s{skip}" if $s{skip}; > + > + $indent .= ' *' if $s{skip}; > + say "${indent} $s{signature} <sys/syscall.h>" > + if $s{skip} && $s{skip} =~ /Mismatch/; > + > + say "${indent}case $s{define}:"; # // $s{id}"; > + say "${indent}\t{" if @defines; > + say "${indent}\t$_" for @defines; > + #say "${indent}\t// $s{signature}$header"; > + say "${indent}\t$ret$name(" . join(', ', @args) . ");$argname"; > + say "${indent}\t}" if @defines; > + say "${indent}\tbreak;"; > + > + say "\t */" if $s{skip}; > +} > + > +print <<"EOL"; > + default: > + ret = -1; > + errno = ENOSYS; > + } > + va_end(args); > + > + return ret; > +} > +EOL > + > + > +sub parse_syscalls ($syscall, $args) why space before the parenthesis ? I don't don't that. (all functions have that issue) > +{ > + my %s = parse_syscall_h($syscall); > + > + my %a = parse_syscallargs_h($args); > + $s{$_}{argtypes} = $a{$_} for grep { $a{$_} } keys %s; > + > + return %s; > +} If you worry about efficiency, return a ref. > + > +sub parse_syscall_h ($file) I tend to use filename for filename parameters instead of just file to avoid confusion with the fh proper > +{ > + my %s; > + open my $fh, '<', $file or die "Unable to open $file: $!"; > + while ($_ = $fh->getline) { > + if (m{^/\* > + \s+ syscall: \s+ "(?<name>[^"]+)" > + \s+ ret: \s+ "(?<ret> [^"]+)" > + \s+ args: \s+ (?<args>.*?) > + \s* \*/ > + | > + ^\#define \s+ (?<define>SYS_(?<name>\S+)) \s+ (?<id>\d+) > + }x) { > + my $name = $+{name}; > + $s{$name}{$_} = $+{$_} for keys %+; > + $s{$name}{args} = [ $+{args} =~ /"(.*?)"/g ] > + if exists $+{args}; > + } > + } > + close $fh or die "Unable to close $file: $!"; > + > + foreach my $name (keys %s) { > + my %d = %{ $s{$name} }; > + next unless $d{ret}; # the MAXSYSCALL > + > + my $ret = $d{ret}; > + my @args = @{ $d{args} || [] }; > + @args = 'void' unless @args; > + > + if ($args[-1] ne '...') { > + my @a; > + for (@args) { > + push @a, $_; > + last if $_ eq '...'; > + } > + @args = @a; > + } > + > + my $args = join ", ", @args; > + $s{$name}{signature} = "$ret\t$name($args);" =~ s/\s+/ /gr; > + #print " $s{$name}{signature}\n"; > + } > + > + return %s; > +} likewise > + > +sub _parse_syscallarg ($fh) > +{ > + my @a; > + while ($_ = $fh->getline) { > + last if /^\s*\};\s*$/; > + if (/syscallarg\( ( [^)]+ ) \) \s+ (\w+) \s* ;/x) { > + push @a, { type => $1, name => $2 }; > + } > + } > + return \@a; > +} so here you return a ref... not consistent. > + > +sub parse_syscallargs_h ($file) > +{ > + my %a; > + open my $fh, '<', $file or die "Unable to open $file; $!"; > + while ($_ = $fh->getline) { > + if (/^struct sys_(\w+)_args \{/) { > + my $name = $1; > + $a{$name} = _parse_syscallarg($fh); > + } > + } > + close $fh; > + return %a; > +} > + > +sub find_func_sig ($content, $name, $s) > +{ > + my $re = $s->{re} //= qr{^ > + (?<ret> \S+ (?: [^\S\n]+ \S+)? ) [^\S\n]* \n? > + \b \Q$name\E \( (?<args> [^)]* ) \) > + [^;]*;}xms; make sure everything is indented > + > + $content =~ /$re/ || return; > + my $ret = $+{ret}; > + my $args = $+{args}; > + > + for ($ret, $args) { > + s/^\s+//; > + s/\s+$//; > + s/\s+/ /g; > + } > + > + # The actual functions may have this extra annotation > + $args =~ s/\*\s*__restrict/*/g; > + > + my %func_sig = ( ret => $ret, args => [ split /\s*,\s*/, $args ] ); > + > + return "$ret $name($args);" =~ s/\s+/ /gr > + unless sigs_match( $s, \%func_sig ); > + > + return \%func_sig; > +} > + > +# Tests whether two types are equivalent. > +# Sometimes there are two ways to represent the same thing > +# and it seems the functions and the syscalls > +# differ a fair amount. > +{ > +my %m; BEGIN { %m = ( > + caddr_t => 'char *', > + idtype_t => 'int', > + nfds_t => 'u_int', > + __off_t => 'off_t', > + pid_t => 'int', > + __size_t => 'u_int', > + size_t => 'u_int', > + 'unsigned int' => 'u_int', > + 'unsigned long' => 'u_long', > +) } Why is this in a BEGIN block ? This could probably be a state inside types_match. > +sub types_match ($l, $r) > +{ > + $l //= '__undef__'; > + $r //= '__undef__'; > + > + s/^volatile // for $l, $r; > + s/^const // for $l, $r; I would also use \s+ on those you're also somewhat dependent on order \b seems like a better match > + s/\s*\[\d*\]$/ \*/ for $l, $r; > + > + my ($f, $s) = sort { length($a) <=> length($b) } $l, $r; > + if (index($s,$f) == 0) { Missing space > + $s =~ s/^\Q$f\E\s*//; > + if ( $s && $s =~ /^\w+$/ ) { > + #warn "prefix ['$f', '$s']\n"; > + s/\s*\Q$s\E$// for $l, $r; > + } > + } > + > + # my ($p_l, $p_r) = ($l, $r); # ??? > + $l = $m{$l} //= $l; > + $r = $m{$r} //= $r; > + > + #warn " $p_l [$l] $p_r [$r] <'$f' '$s'>\n"; # ??? > + # return and use the original "right" value > + # as it's from the function header and closer to what we need. > + return $l eq $r; > +} > +} > + > + > +# Tests whether two funciton signatures match, ^^^^^^^^ > +# expected to be left from syscall.h, right from the appopriate header. > +sub sigs_match ($l, $r) > +{ > + return unless types_match( $l->{ret}, $l->{ret} ); I tend to explicitly return 0 on boolean functions, as this can leave undef free for further uses. > + > + my @l_args = @{ $l->{args} || [] }; This will do the right thing without the || [] (List without value) > + my @r_args = @{ $r->{args} || [] }; > + > + for (\@l_args, \@r_args) { > + @{$_} = 'void' unless @{$_}; > + } > + > + for my $i ( 0 .. $#l_args ) { > + return unless types_match( $l_args[$i], $r_args[$i] ); I don't put parenthesis inside function calls either > + last if $l_args[$i] eq '...'; > + } > + > + return 1; > +}