On Sun, Jul 09, 2023 at 01:29:58PM -0700, Andrew Hewus Fresh wrote:
> Here is a patch to replace perl(1)'s use of syscall(2) with a dispatcher
> that will call the libc function instead.
> 
> I have to do some work on style before this is ready to commit, but it
> should be ready for some testing.
> 
> I don't currently plan on committing syscall_emulator.c because we need
> to regenerate it each time as I'm not sure how to tie it into sys/kern's
> `make syscalls` to only do it when things change.
> 
> Looking for tests from folks who use syscall from perl as well as style
> suggestions (like how do I correctly sort headers?) and maybe even
> ways to enable additional syscalls.

Nits in the perl part.

> Index: gnu/usr.bin/perl/gen_syscall_emulator.pl
> ===================================================================
> RCS file: gnu/usr.bin/perl/gen_syscall_emulator.pl
> diff -N gnu/usr.bin/perl/gen_syscall_emulator.pl
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ gnu/usr.bin/perl/gen_syscall_emulator.pl  9 Jul 2023 19:42:50 -0000
> @@ -0,0 +1,354 @@
> +#!/usr/bin/perl
> +#    $OpenBSD$       #
> +use v5.36;
> +
> +# Copyright (c) 2023 Andrew Hewus Fresh <afre...@openbsd.org>
> +#
> +# Permission to use, copy, modify, and distribute this software for any
> +# purpose with or without fee is hereby granted, provided that the above
> +# copyright notice and this permission notice appear in all copies.
> +#
> +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +
> +my $includes = '/usr/include';
> +
> +# See also /usr/src/sys/kern/syscalls.master
> +my %syscalls = parse_syscalls(
> +    "$includes/sys/syscall.h",
> +    "$includes/sys/syscallargs.h",
> +);
> +delete $syscalls{MAXSYSCALL}; # not an actual function
> +
> +# The ordered list of all the headers we need
> +my @headers = qw<
> +     sys/syscall.h
> +     stdarg.h
> +     errno.h
> +
> +     sys/socket.h
> +     sys/event.h
> +     sys/futex.h
> +     sys/ioctl.h
> +     sys/ktrace.h
> +     sys/mman.h
> +     sys/mount.h
> +     sys/msg.h
> +     sys/poll.h
> +     sys/ptrace.h
> +     sys/resource.h
> +     sys/select.h
> +     sys/sem.h
> +     sys/shm.h
> +     sys/stat.h
> +     sys/sysctl.h
> +     sys/time.h
> +     sys/uio.h
> +     sys/wait.h
> +
> +     dirent.h
> +     fcntl.h
> +     sched.h
> +     signal.h
> +     stdlib.h
> +     stdio.h
> +     syslog.h
> +     tib.h
> +     time.h
> +     unistd.h
> +>;
> +
> +foreach my $header (@headers) {
> +     my $file = "$includes/$header";
> +     open my $fh, '<', $file or die "Unable to open $file: $!";
you could just autodie as you don't ever do anything fancy when open fails.
> +     my $content = do { local $/; readline $fh };
> +     close $fh;
> +
> +     # Look for matching syscalls in this header
not sure that comment brings anything, or maybe you want a function, as
you tend to get fairly long winded.
> +     foreach my $name (sort keys %syscalls) {
> +             my $s = $syscalls{$name};
> +             my $func_sig = find_func_sig($content, $name, $s);
> +
> +             if (ref $func_sig) {
> +                     die "Multiple defs for $name <$header> <$s->{header}>"
> +                         if $s->{header};
> +                     $s->{func} = $func_sig;
> +                     $s->{header} = $header;
> +             } elsif ($func_sig) {
> +                     $s->{mismatched_sig} = "$func_sig <$header>";
> +             }
> +     }
> +}
> +
> +say "/*\n * Generated from gen_syscall_emulator.pl\n */";
> +say "#include <$_>" for @headers;
> +print <<"EOL";
> +
> +long
> +syscall_emulator(int syscall, ...)
> +{
> +     long ret = 0;
> +     va_list args;
> +     va_start(args, syscall);
> +
> +     switch(syscall) {
> +EOL
> +
> +foreach my $name (
> +     sort { $syscalls{$a}{id} <=> $syscalls{$b}{id} } keys %syscalls
> +    ) {
> +     my %s = %{ $syscalls{$name} };
I never put spaces in this kind of construct, as I find it fairly readable
by itself
> +
> +     # Some syscalls we can't emulate, so we comment those out.
> +     $s{skip} //= "Indirect syscalls not supported"
> +         if !$s{argtypes} && ($s{args}[-1] || '') eq '...';
> +     $s{skip} //= "Mismatched func: $s{mismatched_sig}"
> +         if $s{mismatched_sig} and not $s{func};
> +     $s{skip} //= "No signature found in headers"
> +         unless $s{header};
> +
> +     my $ret = $s{ret} eq 'void' ? '' : 'ret = ';
> +     $ret .= '(long)' if $s{ret} eq 'void *';
> +
> +     my (@args, @defines);
> +     my $argname = '';
> +     if ($s{argtypes}) {
> +             if (@{ $s{argtypes} } > 1) {
> +                     @defines = map {
> +                         my $t = $_->{type};
> +                         my $n = $_->{name};
> +                         $n = "_$n" if $n eq $name; # link :-/
> +                         push @args, $n;
> +                         "$t $n = va_arg(args, $t);"
> +                     } @{ $s{argtypes} };
I would make it so that the parameter is indented maybe
@defines = map {
        my $t
        ...
    } @{$s{argtypes}};

> +             } else {
> +                     if (@{ $s{argtypes} }) {
> +                             $argname = " // " . join ', ',
> +                                 map { $_->{name} }
> +                                 @{ $s{argtypes} };
> +                     }
> +                     @args = map { "va_arg(args, $_->{type})" }
> +                         @{ $s{argtypes} };
> +             }
> +     } else {
> +             @args = @{ $s{args} };
> +
> +             # If we didn't find args in syscallargs.h but have args
> +             # we don't know how to write our function.
> +             $s{skip} //= "Not found in sys/syscallargs.h"
> +                 if @args;
> +     }
> +
> +     #my $header = $s{header} ? " <$s{header}>" : '';
> +
> +     my $indent = "\t";
> +     say "$indent/* $s{skip}" if $s{skip};
> +
> +     $indent .= ' *' if $s{skip};
> +     say "${indent}                  $s{signature} <sys/syscall.h>"
> +         if $s{skip} && $s{skip} =~ /Mismatch/;
> +
> +     say "${indent}case $s{define}:"; # // $s{id}";
> +     say "${indent}\t{" if @defines;
> +     say "${indent}\t$_" for @defines;
> +     #say "${indent}\t// $s{signature}$header";
> +     say "${indent}\t$ret$name(" . join(', ', @args) . ");$argname";
> +     say "${indent}\t}" if @defines;
> +     say "${indent}\tbreak;";
> +
> +     say "\t */" if $s{skip};
> +}
> +
> +print <<"EOL";
> +     default:
> +             ret = -1;
> +             errno = ENOSYS;
> +     }
> +     va_end(args);
> +
> +     return ret;
> +}
> +EOL
> +
> +
> +sub parse_syscalls ($syscall, $args)
why space before the parenthesis ? I don't don't that.
(all functions have that issue)
> +{
> +     my %s = parse_syscall_h($syscall);
> +
> +     my %a = parse_syscallargs_h($args);
> +     $s{$_}{argtypes} = $a{$_} for grep { $a{$_} } keys %s;
> +
> +     return %s;
> +}
If you worry about efficiency, return a ref.
> +
> +sub parse_syscall_h ($file)
I tend to use filename for filename parameters instead of just file to 
avoid confusion with the fh proper

> +{
> +     my %s;
> +     open my $fh, '<', $file or die "Unable to open $file: $!";
> +     while ($_ = $fh->getline) {
> +             if (m{^/\*
> +                 \s+ syscall: \s+ "(?<name>[^"]+)"
> +                 \s+  ret: \s+ "(?<ret> [^"]+)"
> +                 \s+ args: \s+  (?<args>.*?)
> +                 \s* \*/
> +               |
> +                 ^\#define \s+ (?<define>SYS_(?<name>\S+)) \s+ (?<id>\d+)
> +             }x) {
> +                     my $name        = $+{name};
> +                     $s{$name}{$_}   = $+{$_} for keys %+;
> +                     $s{$name}{args} = [ $+{args} =~ /"(.*?)"/g ]
> +                         if exists $+{args};
> +             }
> +     }
> +     close $fh or die "Unable to close $file: $!";
> +
> +     foreach my $name (keys %s) {
> +             my %d = %{ $s{$name} };
> +             next unless $d{ret}; # the MAXSYSCALL
> +
> +             my $ret = $d{ret};
> +             my @args = @{ $d{args} || [] };
> +             @args = 'void' unless @args;
> +
> +             if ($args[-1] ne '...') {
> +                     my @a;
> +                     for (@args) {
> +                             push @a, $_;
> +                             last if $_ eq '...';
> +                     }
> +                     @args = @a;
> +             }
> +
> +             my $args = join ", ", @args;
> +             $s{$name}{signature} = "$ret\t$name($args);" =~ s/\s+/ /gr;
> +             #print "    $s{$name}{signature}\n";
> +     }
> +
> +     return %s;
> +}
likewise
> +
> +sub _parse_syscallarg ($fh)
> +{
> +     my @a;
> +     while ($_ = $fh->getline) {
> +             last if /^\s*\};\s*$/;
> +             if (/syscallarg\( ( [^)]+  ) \) \s+ (\w+) \s* ;/x) {
> +                     push @a, { type => $1, name => $2 };
> +             }
> +     }
> +     return \@a;
> +}
so here you return a ref... not consistent.
> +
> +sub parse_syscallargs_h ($file)
> +{
> +     my %a;
> +     open my $fh, '<', $file or die "Unable to open $file; $!";
> +     while ($_ = $fh->getline) {
> +             if (/^struct sys_(\w+)_args \{/) {
> +                     my $name = $1;
> +                     $a{$name} = _parse_syscallarg($fh);
> +             }
> +     }
> +     close $fh;
> +     return %a;
> +}
> +
> +sub find_func_sig ($content, $name, $s)
> +{
> +     my $re = $s->{re} //= qr{^
> +         (?<ret> \S+ (?: [^\S\n]+ \S+)? ) [^\S\n]* \n?
> +         \b \Q$name\E \( (?<args> [^)]* ) \)
> +     [^;]*;}xms;
make sure everything is indented
> +
> +     $content =~ /$re/ || return;
> +     my $ret  = $+{ret};
> +     my $args = $+{args};
> +
> +     for ($ret, $args) {
> +             s/^\s+//;
> +             s/\s+$//;
> +             s/\s+/ /g;
> +     }
> +
> +     # The actual functions may have this extra annotation
> +     $args =~ s/\*\s*__restrict/*/g;
> +
> +     my %func_sig = ( ret => $ret, args => [ split /\s*,\s*/, $args ] );
> +
> +     return "$ret $name($args);" =~ s/\s+/ /gr
> +         unless sigs_match( $s, \%func_sig );
> +
> +     return \%func_sig;
> +}
> +
> +# Tests whether two types are equivalent.
> +# Sometimes there are two ways to represent the same thing
> +# and it seems the functions and the syscalls
> +# differ a fair amount.

> +{
> +my %m; BEGIN { %m = (
> +    caddr_t         => 'char *',
> +    idtype_t        => 'int',
> +    nfds_t          => 'u_int',
> +    __off_t         => 'off_t',
> +    pid_t           => 'int',
> +    __size_t        => 'u_int',
> +    size_t          => 'u_int',
> +    'unsigned int'  => 'u_int',
> +    'unsigned long' => 'u_long',
> +) }

Why is this in a BEGIN block ?
This could probably be a state inside types_match.


> +sub types_match ($l, $r)
> +{
> +     $l //= '__undef__';
> +     $r //= '__undef__';
> +
> +     s/^volatile //     for $l, $r;
> +     s/^const //        for $l, $r;
I would also use \s+ on those
you're also somewhat dependent on order
\b seems like a better match
> +     s/\s*\[\d*\]$/ \*/ for $l, $r;
> +
> +     my ($f, $s) = sort { length($a) <=> length($b) } $l, $r;
> +     if (index($s,$f) == 0) {
Missing space
> +             $s =~ s/^\Q$f\E\s*//;
> +             if ( $s && $s =~ /^\w+$/ ) {
> +                     #warn "prefix ['$f', '$s']\n";
> +                     s/\s*\Q$s\E$// for $l, $r;
> +             }
> +     }
> +
> +     # my ($p_l, $p_r) = ($l, $r);
# ???
> +     $l = $m{$l} //= $l;
> +     $r = $m{$r} //= $r;
> +
> +     #warn "    $p_l [$l] $p_r [$r] <'$f' '$s'>\n";
# ???
> +     # return and use the original "right" value
> +     # as it's from the function header and closer to what we need.
> +     return $l eq $r;
> +}
> +}
> +
> +
> +# Tests whether two funciton signatures match,
                       ^^^^^^^^
> +# expected to be left from syscall.h, right from the appopriate header.
> +sub sigs_match ($l, $r)
> +{
> +     return unless types_match( $l->{ret}, $l->{ret} );
I tend to explicitly return 0 on boolean functions, as this can leave undef
free for further uses.
> +
> +     my @l_args = @{ $l->{args} || [] };
This will do the right thing without the || []
(List without value)
> +     my @r_args = @{ $r->{args} || [] };
> +
> +     for (\@l_args, \@r_args) {
> +             @{$_} = 'void' unless @{$_};
> +     }
> +
> +     for my $i ( 0 .. $#l_args ) {
> +             return unless types_match( $l_args[$i], $r_args[$i] );
I don't put parenthesis inside function calls either
> +             last if $l_args[$i] eq '...';
> +     }
> +
> +     return 1;
> +}

Reply via email to