On 2013/01/08 02:22, Konstantin Belousov wrote:
Below is the forward of the patch for which I failed to obtain a private
review. Might be, the list generates more responses.

Our rtld has a performance bootleneck, typically exposed by the images
with the lot of the run-time relocation processing, and by the C++
exception handling. We block the signals delivery during the rtld
performing the relocations, as well as for the dl_iterate_phdr(3) (the
later is used for finding the dwarf unwinding tables).

The signal blocking is needed to allow the rtld to resolve the symbols
for the signal handlers in the safe way, but also causes 2 syscalls
overhead per each rtld entry.

The proposed approach allows to shave off those two syscalls, doubling
the FreeBSD performance for the (silly) throw/catch C++ microbenchmark.

Date: Mon, 13 Aug 2012 15:26:00 +0300
From: Konstantin Belousov <kostik...@gmail.com>

...

The basic idea is to implement sigprocmask() as single write into usermode
address. If kernel needs to calculate the signal mask for current thread,
it takes into the consideration non-zero value of the word at the agreed
address. Also, usermode is informed about signals which were put on hold
due to fast sigblock active.

As I said, on my measurements in microbenchmark that did throw/catch in
a loop, I see equal user and system time spent for unpatched system, and
same user time with zero system time on patched system.

Patch can be improved further, e.g. it would be nice to allow rtld to fall
back to sigprocmask(2) if kernel does not support fast sigblock, to prevent
flag day. Also, the mask enforced by fast sigblock can be made configurable.

Note that libthr already blocks signals by catching them, and not using rtld
service in the first line handler. I tried to make the change in the spirit
of libthr interceptors, but handoff to libthr appears too complicated to
work. In fact, libthr can be changed to start using fast sigblock instead
of wrapping sigaction, but this is out of scope of the proposal right now.

Please comment.

diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
index 6888ea0..3b75539 100644
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -546,6 +546,7 @@ FBSDprivate_1.0 {
        __sys_extattr_set_link;
        _extattrctl;
        __sys_extattrctl;
+       __sys_fast_sigblock;
        _fchdir;
        __sys_fchdir;
        _fchflags;
diff --git a/libexec/rtld-elf/rtld_lock.c b/libexec/rtld-elf/rtld_lock.c
index d1563e5..50c52c6 100644
--- a/libexec/rtld-elf/rtld_lock.c
+++ b/libexec/rtld-elf/rtld_lock.c
@@ -43,6 +43,7 @@
   */

  #include <sys/param.h>
+#include <sys/signalvar.h>
  #include <signal.h>
  #include <stdlib.h>
  #include <time.h>
@@ -59,8 +60,8 @@ typedef struct Struct_Lock {
        void *base;
  } Lock;

-static sigset_t fullsigmask, oldsigmask;
  static int thread_flag;
+static uint32_t fsigblock;

  static void *
  def_lock_create()
@@ -111,18 +112,26 @@ def_rlock_acquire(void *lock)
  }

  static void
+sig_fastunblock(void)
+{
+       uint32_t oldval;
+
+       oldval = atomic_fetchadd_32(&fsigblock, -FAST_SIGBLOCK_INC);
+       if (oldval == (FAST_SIGBLOCK_PEND | FAST_SIGBLOCK_INC))
+               __sys_fast_sigblock(FAST_SIGBLOCK_UNBLOCK, NULL);
+}
+
+static void
  def_wlock_acquire(void *lock)
  {
      Lock *l = (Lock *)lock;
-    sigset_t tmp_oldsigmask;

      for ( ; ; ) {
-       sigprocmask(SIG_BLOCK, &fullsigmask, &tmp_oldsigmask);
+       atomic_add_rel_32(&fsigblock, FAST_SIGBLOCK_INC);
        if (atomic_cmpset_acq_int(&l->lock, 0, WAFLAG))
            break;
-       sigprocmask(SIG_SETMASK, &tmp_oldsigmask, NULL);
+       sig_fastunblock();
      }
-    oldsigmask = tmp_oldsigmask;
  }

  static void
@@ -134,7 +143,7 @@ def_lock_release(void *lock)
        atomic_add_rel_int(&l->lock, -RC_INCR);
      else {
        atomic_add_rel_int(&l->lock, -WAFLAG);
-       sigprocmask(SIG_SETMASK, &oldsigmask, NULL);
+       sig_fastunblock();
      }
  }

@@ -286,19 +295,7 @@ lockdflt_init()

      memcpy(&lockinfo, &deflockinfo, sizeof(lockinfo));
      _rtld_thread_init(NULL);
-    /*
-     * Construct a mask to block all signals except traps which might
-     * conceivably be generated within the dynamic linker itself.
-     */
-    sigfillset(&fullsigmask);
-    sigdelset(&fullsigmask, SIGILL);
-    sigdelset(&fullsigmask, SIGTRAP);
-    sigdelset(&fullsigmask, SIGABRT);
-    sigdelset(&fullsigmask, SIGEMT);
-    sigdelset(&fullsigmask, SIGFPE);
-    sigdelset(&fullsigmask, SIGBUS);
-    sigdelset(&fullsigmask, SIGSEGV);
-    sigdelset(&fullsigmask, SIGSYS);
+    __sys_fast_sigblock(FAST_SIGBLOCK_SETPTR, &fsigblock);
  }

  /*
@@ -319,7 +316,10 @@ _rtld_thread_init(struct RtldLockInfo *pli)

        if (pli == NULL)
                pli = &deflockinfo;
-
+       else {
+               fsigblock = 0;
+               __sys_fast_sigblock(FAST_SIGBLOCK_UNSETPTR, NULL);
+       }

        for (i = 0; i < RTLD_LOCK_CNT; i++)
                if ((locks[i] = pli->lock_create()) == NULL)
diff --git a/sys/compat/freebsd32/syscalls.master 
b/sys/compat/freebsd32/syscalls.master
index 0891e41..f9e8b9e 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -997,3 +997,4 @@
                                    uint32_t offset1, uint32_t offset2,\
                                    uint32_t len1, uint32_t len2, \
                                    int advice); }
+532    AUE_NULL        NOPROTO { int fast_sigblock(int cmd, uint32_t *ptr); }
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 90f7311..8a3cd15 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1031,6 +1031,7 @@ exec_new_vmspace(imgp, sv)
        int error;
        struct proc *p = imgp->proc;
        struct vmspace *vmspace = p->p_vmspace;
+       struct thread *td = curthread;
        vm_object_t obj;
        vm_offset_t sv_minuser, stack_addr;
        vm_map_t map;
@@ -1039,6 +1040,10 @@ exec_new_vmspace(imgp, sv)
        imgp->vmspace_destroyed = 1;
        imgp->sysent = sv;

+       td->td_pflags &= ~TDP_FAST_SIGBLOCK;
+       td->td_sigblock_ptr = NULL;
+       td->td_sigblock_val = 0;
+
        /* May be called with Giant held */
        EVENTHANDLER_INVOKE(process_exec, p, imgp);

diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 2685a8b..3c8a2f7 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -230,6 +230,7 @@ static int sigproptbl[NSIG] = {
  };

  static void reschedule_signals(struct proc *p, sigset_t block, int flags);
+static sigset_t fastblock_mask;

  static void
  sigqueue_start(void)
@@ -240,6 +241,16 @@ sigqueue_start(void)
        p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS);
        p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1);
        p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc);
+       SIGFILLSET(fastblock_mask);
+       SIG_CANTMASK(fastblock_mask);
+       SIGDELSET(fastblock_mask, SIGILL);
+       SIGDELSET(fastblock_mask, SIGTRAP);
+       SIGDELSET(fastblock_mask, SIGABRT);
+       SIGDELSET(fastblock_mask, SIGEMT);
+       SIGDELSET(fastblock_mask, SIGFPE);
+       SIGDELSET(fastblock_mask, SIGBUS);
+       SIGDELSET(fastblock_mask, SIGSEGV);
+       SIGDELSET(fastblock_mask, SIGSYS);
  }

  ksiginfo_t *
@@ -2525,6 +2536,7 @@ issignal(struct thread *td, int stop_allowed)
        struct sigqueue *queue;
        sigset_t sigpending;
        int sig, prop, newsig;
+       uint32_t oldval;

        p = td->td_proc;
        ps = p->p_sigacts;
@@ -2541,6 +2553,32 @@ issignal(struct thread *td, int stop_allowed)
                        SIG_STOPSIGMASK(sigpending);
                if (SIGISEMPTY(sigpending))     /* no signal to send */
                        return (0);
+
+               /*
+                * Do fast sigblock if requested by usermode.  Since
+                * we do know that there was a signal pending at this
+                * point, set the FAST_SIGBLOCK_PEND as indicator for
+                * usermode to perform a dummy call to
+                * FAST_SIGBLOCK_UNBLOCK, which causes immediate
+                * delivery of postponed pending signal.
+                */
+               if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) {
+                       if (td->td_sigblock_val != 0)
+                               SIGSETNAND(sigpending, fastblock_mask);
+                       if (SIGISEMPTY(sigpending)) {
+                               oldval = fuword32(td->td_sigblock_ptr);
+                               if (oldval == -1) {
+                                       fetch_fast_sigblock_failed(td, 0);
+                                       return (0);
+                               }
+                               oldval |= FAST_SIGBLOCK_PEND;
+                               if (suword32(td->td_sigblock_ptr, oldval) == -1)
+                                       fetch_fast_sigblock_failed(td, 1);
+                               td->td_sigblock_val = oldval;
+                               return (0);
+                       }
+               }
+
                sig = sig_ffs(&sigpending);

                if (p->p_stops & S_SIG) {
@@ -3456,3 +3494,92 @@ sigacts_shared(struct sigacts *ps)
        mtx_unlock(&ps->ps_mtx);
        return (shared);
  }
+
+int
+sys_fast_sigblock(struct thread *td, struct fast_sigblock_args *uap)
+{
+       struct proc *p;
+       int error;
+       uint32_t oldval;
+
+       error = 0;
+       switch (uap->cmd) {
+       case FAST_SIGBLOCK_SETPTR:
+               if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0)
+                       error = EBUSY;
+               else if (((uintptr_t)(uap->ptr) & (sizeof(uint32_t) - 1)) != 0)
+                       error = EINVAL;
+               else {
+                       td->td_pflags |= TDP_FAST_SIGBLOCK;
+                       td->td_sigblock_ptr = uap->ptr;
+               }
+               break;
+
+       case FAST_SIGBLOCK_UNBLOCK:
+               if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) {
+                       oldval = casuword32(td->td_sigblock_ptr,
+                           FAST_SIGBLOCK_PEND, 0);
+                       if (oldval == (uint32_t)-1)
+                               error = EFAULT;
+                       else if (oldval != FAST_SIGBLOCK_PEND)
+                               error = EBUSY;
+                       else
+                               td->td_sigblock_val = 0;
+               } else
+                       error = EDOOFUS;
+
+               /*
+                * Rely on normal ast mechanism to deliver pending
+                * signals to current thread.  But notify others about
+                * fake unblock.
+                */
+               p = td->td_proc;
+               if (error == 0 && p->p_numthreads != 1) {
+                       PROC_LOCK(p);
+                       reschedule_signals(p, td->td_sigmask, 0);
+                       PROC_UNLOCK(p);
+               }
+               break;
+
+       case FAST_SIGBLOCK_UNSETPTR:
+               if ((td->td_pflags & TDP_FAST_SIGBLOCK) != 0) {
+                       error = copyin(td->td_sigblock_ptr, &oldval,
+                           sizeof(uint32_t));
+                       if (error != 0)
+                               break;
+                       if (oldval != 0 && oldval != FAST_SIGBLOCK_PEND)
+                               error = EBUSY;
+                       else {
+                               td->td_pflags &= ~TDP_FAST_SIGBLOCK;
+                               td->td_sigblock_val = 0;
+                       }
+               } else
+                       error = EDOOFUS;
+               break;
+       }
+       return (error);
+}
+
+void
+fetch_fast_sigblock(struct thread *td)
+{
+
+       if ((td->td_pflags & TDP_FAST_SIGBLOCK) == 0)
+               return;
+       td->td_sigblock_val = fuword32(td->td_sigblock_ptr);
+       if (td->td_sigblock_val == -1)
+               fetch_fast_sigblock_failed(td, 0);
+}
+
+void
+fetch_fast_sigblock_failed(struct thread *td, int write)
+{
+       ksiginfo_t ksi;
+
+       td->td_sigblock_val = 0;
+       ksiginfo_init_trap(&ksi);
+       ksi.ksi_signo = SIGSEGV;
+       ksi.ksi_code = write ? SEGV_ACCERR :  SEGV_MAPERR;
+       ksi.ksi_addr = td->td_sigblock_ptr;
+       trapsignal(td, &ksi);
+}
diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c
index 5aee684..77b250d 100644
--- a/sys/kern/subr_syscall.c
+++ b/sys/kern/subr_syscall.c
@@ -131,6 +131,12 @@ syscallenter(struct thread *td, struct syscall_args *sa)
                            sa->callp, sa->args, 0);
  #endif

+               /*
+                * Fetch fast sigblock value at the time of syscall
+                * entry because sleepqueue primitives might call
+                * cursig().
+                */
+               fetch_fast_sigblock(td);
                AUDIT_SYSCALL_ENTER(sa->code, td);
                error = (sa->callp->sy_call)(td, sa->args);
                AUDIT_SYSCALL_EXIT(error, td);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 095bbdf..66e485b 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -237,6 +237,7 @@ ast(struct trapframe *framep)
         */
        if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
            !SIGISEMPTY(p->p_siglist)) {
+               fetch_fast_sigblock(td);
                PROC_LOCK(p);
                mtx_lock(&p->p_sigacts->ps_mtx);
                while ((sig = cursig(td, SIG_STOP_ALLOWED)) != 0)
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index f62dad7..28a9393 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -951,5 +951,6 @@
                                    off_t offset, off_t len); }
  531   AUE_NULL        STD     { int posix_fadvise(int fd, off_t offset, \
                                    off_t len, int advice); }
+532    AUE_NULL        STD     { int fast_sigblock(int cmd, uint32_t *ptr); }
  ; Please copy any additions and changes to the following compatability tables:
  ; sys/compat/freebsd32/syscalls.master
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 06df632..4899ca2 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -272,6 +272,9 @@ struct thread {
        struct osd      td_osd;         /* (k) Object specific data. */
        struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
        pid_t           td_dbg_forked;  /* (c) Child pid for debugger. */
+       void            *td_sigblock_ptr; /* (k) uptr for fast sigblock. */
+       uint32_t        td_sigblock_val;  /* (k) fast sigblock value at
+                                            kernel entry. */
  #define       td_endzero td_sigmask

  /* Copied during fork1() or create_thread(). */
@@ -424,6 +427,7 @@ do {                                                        
                \
  #define       TDP_RESETSPUR   0x04000000 /* Reset spurious page fault 
history. */
  #define       TDP_NERRNO      0x08000000 /* Last errno is already in td_errno 
*/
  #define       TDP_UIOHELD     0x10000000 /* Current uio has pages held in 
td_ma */
+#define        TDP_FAST_SIGBLOCK 0x20000000 /* Fast sigblock active */

  /*
   * Reasons that the current thread can not be run yet.
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 71685e7..68cca58 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -250,6 +250,20 @@ typedef struct sigqueue {
  /* Flags for ksi_flags */
  #define       SQ_INIT 0x01

+/*
+ * Fast_sigblock
+ */
+#define        FAST_SIGBLOCK_SETPTR    1
+#define        FAST_SIGBLOCK_UNBLOCK   2
+#define        FAST_SIGBLOCK_UNSETPTR  3
+
+#define        FAST_SIGBLOCK_PEND      0x1
+#define        FAST_SIGBLOCK_INC       0x10
+
+#ifndef _KERNEL
+int __sys_fast_sigblock(int cmd, void *ptr);
+#endif
+
  #ifdef _KERNEL

  /* Return nonzero if process p has an unmasked pending signal. */
@@ -329,6 +343,8 @@ extern struct mtx   sigio_lock;

  int   cursig(struct thread *td, int stop_allowed);
  void  execsigs(struct proc *p);
+void   fetch_fast_sigblock(struct thread *td);
+void   fetch_fast_sigblock_failed(struct thread *td, int write);
  void  gsignal(int pgid, int sig, ksiginfo_t *ksi);
  void  killproc(struct proc *p, char *why);
  ksiginfo_t * ksiginfo_alloc(int wait);



----- End forwarded message -----

So you want to make kernel share data with userland. The only thing I
don't like is it adds overhead to syscall, rumor said that our syscall
is slow than others, this might not be a problem ?

Long time ago, I proposed a schedctl() syscall to make kernel share
data with userland, some old patches are still there:
http://people.freebsd.org/~davidxu/schedctl/

Mine does not have such an overhead, but it has another one:
memory page is allocated in kernel which can not be swapped out
and can not be freed until process is exited, the page is doubly
mapped into in kernel and userland, accessing the shared data
in kernel has zero overhead though.

_______________________________________________
freebsd-toolchain@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-toolchain
To unsubscribe, send any mail to "freebsd-toolchain-unsubscr...@freebsd.org"

Reply via email to