Author: jhb
Date: Thu Sep 19 18:53:42 2013
New Revision: 255708
URL: http://svnweb.freebsd.org/changeset/base/255708

Log:
  Extend the support for exempting processes from being killed when swap is
  exhausted.
  - Add a new protect(1) command that can be used to set or revoke protection
    from arbitrary processes.  Similar to ktrace it can apply a change to all
    existing descendants of a process as well as future descendants.
  - Add a new procctl(2) system call that provides a generic interface for
    control operations on processes (as opposed to the debugger-specific
    operations provided by ptrace(2)).  procctl(2) uses a combination of
    idtype_t and an id to identify the set of processes on which to operate
    similar to wait6().
  - Add a PROC_SPROTECT control operation to manage the protection status
    of a set of processes.  MADV_PROTECT still works for backwards
    compatability.
  - Add a p_flag2 to struct proc (and a corresponding ki_flag2 to kinfo_proc)
    the first bit of which is used to track if P_PROTECT should be inherited
    by new child processes.
  
  Reviewed by:  kib, jilles (earlier version)
  Approved by:  re (delphij)
  MFC after:    1 month

Added:
  head/lib/libc/sys/procctl.2   (contents, props changed)
  head/sys/sys/procctl.h   (contents, props changed)
  head/usr.bin/protect/
  head/usr.bin/protect/Makefile   (contents, props changed)
  head/usr.bin/protect/protect.1   (contents, props changed)
  head/usr.bin/protect/protect.c   (contents, props changed)
Modified:
  head/lib/libc/sys/Makefile.inc
  head/lib/libc/sys/Symbol.map
  head/sys/compat/freebsd32/freebsd32.h
  head/sys/compat/freebsd32/freebsd32_misc.c
  head/sys/compat/freebsd32/syscalls.master
  head/sys/kern/init_main.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_proc.c
  head/sys/kern/sys_process.c
  head/sys/kern/syscalls.master
  head/sys/sys/proc.h
  head/sys/sys/syscallsubr.h
  head/sys/sys/user.h
  head/sys/vm/vm_mmap.c
  head/usr.bin/Makefile
  head/usr.bin/kdump/kdump.c
  head/usr.bin/kdump/mksubr
  head/usr.bin/truss/syscall.h
  head/usr.bin/truss/syscalls.c

Modified: head/lib/libc/sys/Makefile.inc
==============================================================================
--- head/lib/libc/sys/Makefile.inc      Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/lib/libc/sys/Makefile.inc      Thu Sep 19 18:53:42 2013        
(r255708)
@@ -197,6 +197,7 @@ MAN+=       abort2.2 \
        posix_fadvise.2 \
        posix_fallocate.2 \
        posix_openpt.2 \
+       procctl.2 \
        profil.2 \
        pselect.2 \
        ptrace.2 \

Modified: head/lib/libc/sys/Symbol.map
==============================================================================
--- head/lib/libc/sys/Symbol.map        Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/lib/libc/sys/Symbol.map        Thu Sep 19 18:53:42 2013        
(r255708)
@@ -395,6 +395,7 @@ FBSD_1.3 {
        ffclock_setestimate;
        pipe2;
        posix_fadvise;
+       procctl;
        wait6;
 };
 
@@ -822,6 +823,8 @@ FBSDprivate_1.0 {
        __sys_poll;
        _preadv;
        __sys_preadv;
+       _procctl;
+       __sys_procctl;
        _profil;
        __sys_profil;
        _pselect;

Added: head/lib/libc/sys/procctl.2
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/sys/procctl.2 Thu Sep 19 18:53:42 2013        (r255708)
@@ -0,0 +1,142 @@
+.\" Copyright (c) 2013 Advanced Computing Technologies LLC
+.\" Written by: John H. Baldwin <j...@freebsd.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 19, 2013
+.Dt PROCCTL 2
+.Os
+.Sh NAME
+.Nm procctl
+.Nd control processes
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/procctl.h
+.Ft int
+.Fn procctl "idtype_t idtype" "id_t id" "int cmd" "void *arg"
+.Sh DESCRIPTION
+The
+.Fn procctl
+system call provides for control over processes.
+The
+.Fa idtype
+and
+.Fa id
+arguments specify the set of processes to control.
+If multiple processes match the identifier,
+.Nm
+will make a
+.Dq best effort
+to control as many of the selected possibles as possible.
+An error is only returned if no selected processes successfully complete
+the request.
+The following identifier types are supported:
+.Bl -tag -width Dv P_PGID
+.It Dv P_PID
+Control the process with the process ID
+.Fa id .
+.It Dv P_PGID
+Control processes belonging to the process group with the ID
+.Fa id .
+.El
+.Pp
+The control request to perform is specified by the
+.Fa cmd
+argument.
+The following commands are supported:
+.Bl -tag -width Dv PROC_SPROTECT
+.It Dv PROC_SPROTECT
+Set process protection state.
+This is used to mark a process as protected from being killed if the system
+exhausts available memory and swap.
+The
+.Fa arg
+parameter must point to an integer containing an operation and zero or more
+optional flags.
+The following operations are supported:
+.Bl -tag -width Dv PPROT_CLEAR
+.It Dv PPROT_SET
+Mark the selected processes as protected.
+.It Dv PPROT_CLEAR
+Clear the protected state of selected processes.
+.El
+.Pp
+The following optional flags are supported:
+.Bl -tag -width Dv PPROT_DESCEND
+.It Dv PPROT_DESCEND
+Apply the requested operation to all child processes of each selected process
+in addition to each selected process.
+.It Dv PPROT_INHERIT
+When used with
+.Dv PPROT_SET ,
+mark all future child processes of each selected process as protected.
+Future child processes will also mark all of their future child processes.
+.El
+.El
+.Sh RETURN VALUES
+If an error occurs, a value of -1 is returned and
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+The
+.Fn procctl
+system call
+will fail if:
+.Bl -tag -width Er
+.It Bq Er EFAULT
+The
+.Fa arg
+points outside the process's allocated address space.
+.It Bq Er EINVAL
+The
+.Fa cmd
+argument specifies an unsupported command.
+.Pp
+The
+.Fa idtype
+argument specifies an unsupported identifier type.
+.It Bq Er EPERM
+The calling process does not have permission to perform the requested
+operation on any of the selected processes.
+.It Bq Er ESRCH
+No processes matched the requested
+.Fa idtype
+and
+.Fa id .
+.It Bq Er EINVAL
+An invalid operation or flag was passed in
+.Fa arg
+for a
+.Dv PROC_SPROTECT
+command.
+.El
+.Sh SEE ALSO
+.Xr ptrace 2
+.Sh HISTORY
+The
+.Fn procctl
+function appeared in
+.Fx 10 .

Modified: head/sys/compat/freebsd32/freebsd32.h
==============================================================================
--- head/sys/compat/freebsd32/freebsd32.h       Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/sys/compat/freebsd32/freebsd32.h       Thu Sep 19 18:53:42 2013        
(r255708)
@@ -342,6 +342,7 @@ struct kinfo_proc32 {
        char    ki_loginclass[LOGINCLASSLEN+1];
        char    ki_sparestrings[50];
        int     ki_spareints[KI_NSPARE_INT];
+       int     ki_flag2;
        int     ki_fibnum;
        u_int   ki_cr_flags;
        int     ki_jid;

Modified: head/sys/compat/freebsd32/freebsd32_misc.c
==============================================================================
--- head/sys/compat/freebsd32/freebsd32_misc.c  Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/sys/compat/freebsd32/freebsd32_misc.c  Thu Sep 19 18:53:42 2013        
(r255708)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/reboot.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
@@ -3000,3 +3001,23 @@ convert_sigevent32(struct sigevent32 *si
        }
        return (0);
 }
+
+int
+freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
+{
+       void *data;
+       int error, flags;
+
+       switch (uap->com) {
+       case PROC_SPROTECT:
+               error = copyin(PTRIN(uap->data), &flags, sizeof(flags));
+               if (error)
+                       return (error);
+               data = &flags;
+               break;
+       default:
+               return (EINVAL);
+       }
+       return (kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id),
+           uap->com, data));
+}

Modified: head/sys/compat/freebsd32/syscalls.master
==============================================================================
--- head/sys/compat/freebsd32/syscalls.master   Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/sys/compat/freebsd32/syscalls.master   Thu Sep 19 18:53:42 2013        
(r255708)
@@ -1056,3 +1056,12 @@
 542    AUE_PIPE        NOPROTO { int pipe2(int *fildes, int flags); }
 543    AUE_NULL        NOSTD   { int freebsd32_aio_mlock( \
                                    struct aiocb32 *aiocbp); }
+#ifdef PAD64_REQUIRED
+544    AUE_NULL        STD     { int freebsd32_procctl(int idtype, int pad, \
+                                   uint32_t id1, uint32_t id2, int com, \
+                                   void *data); }
+#else
+544    AUE_NULL        STD     { int freebsd32_procctl(int idtype, \
+                                   uint32_t id1, uint32_t id2, int com, \
+                                   void *data); }
+#endif

Modified: head/sys/kern/init_main.c
==============================================================================
--- head/sys/kern/init_main.c   Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/kern/init_main.c   Thu Sep 19 18:53:42 2013        (r255708)
@@ -474,6 +474,7 @@ proc0_init(void *dummy __unused)
 
        p->p_sysent = &null_sysvec;
        p->p_flag = P_SYSTEM | P_INMEM;
+       p->p_flag2 = 0;
        p->p_state = PRS_NORMAL;
        knlist_init_mtx(&p->p_klist, &p->p_mtx);
        STAILQ_INIT(&p->p_ktr);

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c   Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/kern/kern_fork.c   Thu Sep 19 18:53:42 2013        (r255708)
@@ -489,6 +489,7 @@ do_fork(struct thread *td, int flags, st
         * Increase reference counts on shared objects.
         */
        p2->p_flag = P_INMEM;
+       p2->p_flag2 = 0;
        p2->p_swtick = ticks;
        if (p1->p_flag & P_PROFIL)
                startprofclock(p2);
@@ -512,6 +513,11 @@ do_fork(struct thread *td, int flags, st
        p2->p_fd = fd;
        p2->p_fdtol = fdtol;
 
+       if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
+               p2->p_flag |= P_PROTECTED;
+               p2->p_flag2 |= P2_INHERIT_PROTECTED;
+       }
+
        /*
         * p_limit is copy-on-write.  Bump its refcount.
         */

Modified: head/sys/kern/kern_proc.c
==============================================================================
--- head/sys/kern/kern_proc.c   Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/kern/kern_proc.c   Thu Sep 19 18:53:42 2013        (r255708)
@@ -802,6 +802,7 @@ fill_kinfo_proc_only(struct proc *p, str
        kp->ki_fd = p->p_fd;
        kp->ki_vmspace = p->p_vmspace;
        kp->ki_flag = p->p_flag;
+       kp->ki_flag2 = p->p_flag2;
        cred = p->p_ucred;
        if (cred) {
                kp->ki_uid = cred->cr_uid;
@@ -1161,6 +1162,7 @@ freebsd32_kinfo_proc_out(const struct ki
        bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
        bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
        bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
+       CP(*ki, *ki32, ki_flag2);
        CP(*ki, *ki32, ki_fibnum);
        CP(*ki, *ki32, ki_cr_flags);
        CP(*ki, *ki32, ki_jid);

Modified: head/sys/kern/sys_process.c
==============================================================================
--- head/sys/kern/sys_process.c Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/kern/sys_process.c Thu Sep 19 18:53:42 2013        (r255708)
@@ -41,7 +41,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
 #include <sys/rwlock.h>
@@ -1240,3 +1242,196 @@ stopevent(struct proc *p, unsigned int e
                msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
        } while (p->p_step);
 }
+
+static int
+protect_setchild(struct thread *td, struct proc *p, int flags)
+{
+
+       PROC_LOCK_ASSERT(p, MA_OWNED);
+       if (p->p_flag & P_SYSTEM || p_cansee(td, p) != 0)
+               return (0);
+       if (flags & PPROT_SET) {
+               p->p_flag |= P_PROTECTED;
+               if (flags & PPROT_INHERIT)
+                       p->p_flag2 |= P2_INHERIT_PROTECTED;
+       } else {
+               p->p_flag &= ~P_PROTECTED;
+               p->p_flag2 &= ~P2_INHERIT_PROTECTED;
+       }
+       return (1);
+}
+
+static int
+protect_setchildren(struct thread *td, struct proc *top, int flags)
+{
+       struct proc *p;
+       int ret;
+
+       p = top;
+       ret = 0;
+       sx_assert(&proctree_lock, SX_LOCKED);
+       for (;;) {
+               ret |= protect_setchild(td, p, flags);
+               PROC_UNLOCK(p);
+               /*
+                * If this process has children, descend to them next,
+                * otherwise do any siblings, and if done with this level,
+                * follow back up the tree (but not past top).
+                */
+               if (!LIST_EMPTY(&p->p_children))
+                       p = LIST_FIRST(&p->p_children);
+               else for (;;) {
+                       if (p == top) {
+                               PROC_LOCK(p);
+                               return (ret);
+                       }
+                       if (LIST_NEXT(p, p_sibling)) {
+                               p = LIST_NEXT(p, p_sibling);
+                               break;
+                       }
+                       p = p->p_pptr;
+               }
+               PROC_LOCK(p);
+       }
+}
+
+static int
+protect_set(struct thread *td, struct proc *p, int flags)
+{
+       int error, ret;
+
+       switch (PPROT_OP(flags)) {
+       case PPROT_SET:
+       case PPROT_CLEAR:
+               break;
+       default:
+               return (EINVAL);
+       }
+
+       if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
+               return (EINVAL);
+
+       error = priv_check(td, PRIV_VM_MADV_PROTECT);
+       if (error)
+               return (error);
+
+       if (flags & PPROT_DESCEND)
+               ret = protect_setchildren(td, p, flags);
+       else
+               ret = protect_setchild(td, p, flags);
+       if (ret == 0)
+               return (EPERM);
+       return (0);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct procctl_args {
+       idtype_t idtype;
+       id_t    id;
+       int     com;
+       void    *data;
+};
+#endif
+/* ARGSUSED */
+int
+sys_procctl(struct thread *td, struct procctl_args *uap)
+{
+       int error, flags;
+       void *data;
+
+       switch (uap->com) {
+       case PROC_SPROTECT:
+               error = copyin(uap->data, &flags, sizeof(flags));
+               if (error)
+                       return (error);
+               data = &flags;
+               break;
+       default:
+               return (EINVAL);
+       }
+
+       return (kern_procctl(td, uap->idtype, uap->id, uap->com, data));
+}
+
+static int
+kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
+{
+
+       PROC_LOCK_ASSERT(p, MA_OWNED);
+       switch (com) {
+       case PROC_SPROTECT:
+               return (protect_set(td, p, *(int *)data));
+       default:
+               return (EINVAL);
+       }
+}
+
+int
+kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
+{
+       struct pgrp *pg;
+       struct proc *p;
+       int error, first_error, ok;
+
+       sx_slock(&proctree_lock);
+       switch (idtype) {
+       case P_PID:
+               p = pfind(id);
+               if (p == NULL) {
+                       error = ESRCH;
+                       break;
+               }
+               if (p->p_state == PRS_NEW)
+                       error = ESRCH;
+               else
+                       error = p_cansee(td, p);
+               if (error == 0)
+                       error = kern_procctl_single(td, p, com, data);
+               PROC_UNLOCK(p);
+               break;
+       case P_PGID:
+               /*
+                * Attempt to apply the operation to all members of the
+                * group.  Ignore processes in the group that can't be
+                * seen.  Ignore errors so long as at least one process is
+                * able to complete the request successfully.
+                */
+               pg = pgfind(id);
+               if (pg == NULL) {
+                       error = ESRCH;
+                       break;
+               }
+               PGRP_UNLOCK(pg);
+               ok = 0;
+               first_error = 0;
+               LIST_FOREACH(p, &pg->pg_members, p_pglist) {
+                       PROC_LOCK(p);
+                       if (p->p_state == PRS_NEW || p_cansee(td, p) != 0) {
+                               PROC_UNLOCK(p);
+                               continue;
+                       }
+                       error = kern_procctl_single(td, p, com, data);
+                       PROC_UNLOCK(p);
+                       if (error == 0)
+                               ok = 1;
+                       else if (first_error == 0)
+                               first_error = error;
+               }
+               if (ok)
+                       error = 0;
+               else if (first_error != 0)
+                       error = first_error;
+               else
+                       /*
+                        * Was not able to see any processes in the
+                        * process group.
+                        */
+                       error = ESRCH;
+               break;
+       default:
+               error = EINVAL;
+               break;
+       }
+       sx_sunlock(&proctree_lock);
+       return (error);
+}

Modified: head/sys/kern/syscalls.master
==============================================================================
--- head/sys/kern/syscalls.master       Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/sys/kern/syscalls.master       Thu Sep 19 18:53:42 2013        
(r255708)
@@ -978,5 +978,7 @@
                                    int flags); }
 542    AUE_PIPE        STD     { int pipe2(int *fildes, int flags); }
 543    AUE_NULL        NOSTD   { int aio_mlock(struct aiocb *aiocbp); }
+544    AUE_NULL        STD     { int procctl(idtype_t idtype, id_t id, \
+                                   int com, void *data); }
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master

Modified: head/sys/sys/proc.h
==============================================================================
--- head/sys/sys/proc.h Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/sys/proc.h Thu Sep 19 18:53:42 2013        (r255708)
@@ -492,11 +492,8 @@ struct proc {
        struct callout  p_limco;        /* (c) Limit callout handle */
        struct sigacts  *p_sigacts;     /* (x) Signal actions, state (CPU). */
 
-       /*
-        * The following don't make too much sense.
-        * See the td_ or ke_ versions of the same flags.
-        */
        int             p_flag;         /* (c) P_* flags. */
+       int             p_flag2;        /* (c) P2_* flags. */
        enum {
                PRS_NEW = 0,            /* In creation */
                PRS_NORMAL,             /* threads can be run. */
@@ -641,6 +638,9 @@ struct proc {
 #define        P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED)
 #define        P_KILLED(p)     ((p)->p_flag & P_WKILLED)
 
+/* These flags are kept in p_flag2. */
+#define        P2_INHERIT_PROTECTED 0x00000001 /* New children get 
P_PROTECTED. */
+
 /*
  * These were process status values (p_stat), now they are only used in
  * legacy conversion code.

Added: head/sys/sys/procctl.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/sys/procctl.h      Thu Sep 19 18:53:42 2013        (r255708)
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2013 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <j...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef        _SYS_PROCCTL_H_
+#define        _SYS_PROCCTL_H_
+
+#define        PROC_SPROTECT           1       /* set protected state */
+
+/* Operations for PROC_SPROTECT (passed in integer arg). */
+#define        PPROT_OP(x)     ((x) & 0xf)
+#define        PPROT_SET       1
+#define        PPROT_CLEAR     2
+
+/* Flags for PROC_SPROTECT (ORed in with operation). */
+#define        PPROT_FLAGS(x)  ((x) & ~0xf)
+#define        PPROT_DESCEND   0x10
+#define        PPROT_INHERIT   0x20
+
+#ifndef _KERNEL
+#include <sys/types.h>
+#include <sys/wait.h>
+
+__BEGIN_DECLS
+int    procctl(idtype_t, id_t, int, void *);
+__END_DECLS
+
+#endif
+
+#endif /* !_SYS_PROCCTL_H_ */

Modified: head/sys/sys/syscallsubr.h
==============================================================================
--- head/sys/sys/syscallsubr.h  Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/sys/syscallsubr.h  Thu Sep 19 18:53:42 2013        (r255708)
@@ -167,6 +167,8 @@ int kern_posix_fadvise(struct thread *td
            int advice);
 int    kern_posix_fallocate(struct thread *td, int fd, off_t offset,
            off_t len);
+int    kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com,
+           void *data);
 int    kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
 int    kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
            fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);

Modified: head/sys/sys/user.h
==============================================================================
--- head/sys/sys/user.h Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/sys/user.h Thu Sep 19 18:53:42 2013        (r255708)
@@ -84,7 +84,7 @@
  * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
  * function kvm_proclist in lib/libkvm/kvm_proc.c .
  */
-#define        KI_NSPARE_INT   8
+#define        KI_NSPARE_INT   7
 #define        KI_NSPARE_LONG  12
 #define        KI_NSPARE_PTR   6
 
@@ -187,6 +187,7 @@ struct kinfo_proc {
         */
        char    ki_sparestrings[50];    /* spare string space */
        int     ki_spareints[KI_NSPARE_INT];    /* spare room for growth */
+       int     ki_flag2;               /* P2_* flags */
        int     ki_fibnum;              /* Default FIB number */
        u_int   ki_cr_flags;            /* Credential flags */
        int     ki_jid;                 /* Process jail ID */

Modified: head/sys/vm/vm_mmap.c
==============================================================================
--- head/sys/vm/vm_mmap.c       Thu Sep 19 18:00:05 2013        (r255707)
+++ head/sys/vm/vm_mmap.c       Thu Sep 19 18:53:42 2013        (r255708)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/filedesc.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
+#include <sys/procctl.h>
 #include <sys/racct.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
@@ -68,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mount.h>
 #include <sys/conf.h>
 #include <sys/stat.h>
+#include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/vmmeter.h>
 
@@ -739,23 +741,18 @@ sys_madvise(td, uap)
 {
        vm_offset_t start, end;
        vm_map_t map;
-       struct proc *p;
-       int error;
+       int flags;
 
        /*
         * Check for our special case, advising the swap pager we are
         * "immortal."
         */
        if (uap->behav == MADV_PROTECT) {
-               error = priv_check(td, PRIV_VM_MADV_PROTECT);
-               if (error == 0) {
-                       p = td->td_proc;
-                       PROC_LOCK(p);
-                       p->p_flag |= P_PROTECTED;
-                       PROC_UNLOCK(p);
-               }
-               return (error);
+               flags = PPROT_SET;
+               return (kern_procctl(td, P_PID, td->td_proc->p_pid,
+                   PROC_SPROTECT, &flags));
        }
+
        /*
         * Check for illegal behavior
         */

Modified: head/usr.bin/Makefile
==============================================================================
--- head/usr.bin/Makefile       Thu Sep 19 18:00:05 2013        (r255707)
+++ head/usr.bin/Makefile       Thu Sep 19 18:53:42 2013        (r255708)
@@ -132,6 +132,7 @@ SUBDIR=     alias \
        printenv \
        printf \
        procstat \
+       protect \
        rctl \
        renice \
        rev \

Modified: head/usr.bin/kdump/kdump.c
==============================================================================
--- head/usr.bin/kdump/kdump.c  Thu Sep 19 18:00:05 2013        (r255707)
+++ head/usr.bin/kdump/kdump.c  Thu Sep 19 18:53:42 2013        (r255708)
@@ -1161,6 +1161,18 @@ ktrsyscall(struct ktr_syscall *ktr, u_in
                                ip++;
                                narg--;
                                break;
+                       case SYS_procctl:
+                               putchar('(');
+                               idtypename(*ip, decimal);
+                               c = ',';
+                               ip++;
+                               narg--;
+                               print_number(ip, narg, c);
+                               putchar(',');
+                               procctlcmdname(*ip);
+                               ip++;
+                               narg--;
+                               break;
                        }
                }
                while (narg > 0) {

Modified: head/usr.bin/kdump/mksubr
==============================================================================
--- head/usr.bin/kdump/mksubr   Thu Sep 19 18:00:05 2013        (r255707)
+++ head/usr.bin/kdump/mksubr   Thu Sep 19 18:53:42 2013        (r255708)
@@ -169,6 +169,7 @@ cat <<_EOF_
 #include <netinet/in.h>
 #include <sys/param.h>
 #include <sys/mount.h>
+#include <sys/procctl.h>
 #include <sys/ptrace.h>
 #include <sys/resource.h>
 #include <sys/reboot.h>
@@ -465,6 +466,7 @@ auto_or_type     "mountflagsname"      "
 auto_switch_type "msyncflagsname"      "MS_[A-Z]+[[:space:]]+0x[0-9]+"         
       "sys/mman.h"
 auto_or_type     "nfssvcname"          "NFSSVC_[A-Z0-9]+[[:space:]]+0x[0-9]+"  
          "nfs/nfssvc.h"
 auto_switch_type "prioname"            "PRIO_[A-Z]+[[:space:]]+[0-9]"          
       "sys/resource.h"
+auto_switch_type "procctlcmdname"      "PROC_[A-Z]+[[:space:]]+[0-9]"          
       "sys/procctl.h"
 auto_switch_type "ptraceopname"        "PT_[[:alnum:]_]+[[:space:]]+[0-9]+"    
       "sys/ptrace.h"
 auto_switch_type "quotactlname"        "Q_[A-Z]+[[:space:]]+0x[0-9]+"          
       "ufs/ufs/quota.h"
 auto_or_type     "rebootoptname"       "RB_[A-Z]+[[:space:]]+0x[0-9]+"         
       "sys/reboot.h"

Added: head/usr.bin/protect/Makefile
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/usr.bin/protect/Makefile       Thu Sep 19 18:53:42 2013        
(r255708)
@@ -0,0 +1,6 @@
+# $FreeBSD$
+
+PROG=   protect
+WARNS?=        6
+
+.include <bsd.prog.mk>

Added: head/usr.bin/protect/protect.1
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/usr.bin/protect/protect.1      Thu Sep 19 18:53:42 2013        
(r255708)
@@ -0,0 +1,89 @@
+.\" Copyright (c) 2013 Advanced Computing Technologies LLC
+.\" Written by: John H. Baldwin <j...@freebsd.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 19, 2013
+.Dt PROTECT 1
+.Os
+.Sh NAME
+.Nm protect
+.Nd "protect processes from being killed when swap space is exhausted"
+.Sh SYNOPSIS
+.Nm
+.Op Fl i
+.Ar command
+.Nm
+.Op Fl cdi
+.Fl g Ar pgrp | Fl p Ar pid
+.Sh DESCRIPTION
+The
+.Nm
+command is used to mark processes as protected.
+The kernel does not kill protected processes when swap space is exhausted.
+Note that this protected state is not inherited by child processes by default.
+.Pp
+The options are:
+.Bl -tag -width indent
+.It Fl c
+Remove protection from the specified processes.
+.It Fl d
+Apply the operation to all current children of the specified processes.
+.It Fl i
+Apply the operation to all future children of the specified processes.
+.It Fl g Ar pgrp
+Apply the operation to all processes in the specified process group.
+.It Fl p Ar pid
+Apply the operation to the specified process.
+.It Ar command
+Execute
+.Ar command
+as a protected process.
+.El
+.Pp
+Note that only one of the
+.Fl p
+or
+.Fl g
+flags may be specified when adjusting the state of existing processes.
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+Mark the Xorg server as protected:
+.Pp
+.Dl "pgrep Xorg | xargs protect -p"
+Protect all ssh sessions and their child processes:
+.Pp
+.Dl "pgrep sshd | xargs protect -dip"
+Remove protection from all current and future processes:
+.Pp
+.Dl "protect -cdi -p 1"
+.Sh SEE ALSO
+.Xr pprotect 2
+.Sh BUGS
+If you protect a runaway process that allocates all memory the system will
+deadlock.
+.Pp
+Inheritance of the protected state is not yet implemented.

Added: head/usr.bin/protect/protect.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/usr.bin/protect/protect.c      Thu Sep 19 18:53:42 2013        
(r255708)
@@ -0,0 +1,122 @@
+/*-
+ * Copyright (c) 2013 Advanced Computing Technologies LLC
+ * Written by: John H. Baldwin <j...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/procctl.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void
+usage(void)
+{
+
+       fprintf(stderr, "usage: protect [-i] command\n");
+       fprintf(stderr, "       protect [-cdi] -g pgrp | -p pid\n");
+       exit(1);
+}
+
+static id_t
+parse_id(char *id)
+{
+       static bool first = true;
+       long value;
+       char *ch;
+
+       if (!first) {
+               warnx("only one -g or -p flag is permitted");
+               usage();
+       }
+       value = strtol(id, &ch, 0);
+       if (*ch != '\0') {
+               warnx("invalid process id");
+               usage();
+       }
+       return (value);
+}
+
+int
+main(int argc, char *argv[])
+{
+       idtype_t idtype;
+       id_t id;
+       int ch, flags;
+       bool descend, inherit, idset;
+
+       idtype = P_PID;
+       id = getpid();
+       flags = PPROT_SET;
+       descend = inherit = idset = false;
+       while ((ch = getopt(argc, argv, "cdig:p:")) != -1)
+               switch (ch) {
+               case 'c':
+                       flags = PPROT_CLEAR;
+                       break;
+               case 'd':
+                       descend = true;
+                       break;
+               case 'i':
+                       inherit = true;
+                       break;
+               case 'g':
+                       idtype = P_PGID;
+                       id = parse_id(optarg);
+                       idset = true;
+                       break;
+               case 'p':
+                       idtype = P_PID;
+                       id = parse_id(optarg);
+                       idset = true;
+                       break;
+               }
+       argc -= optind;
+       argv += optind;
+
+       if ((idset && argc != 0) || (!idset && (argc == 0 || descend)))
+               usage();
+
+       if (descend)
+               flags |= PPROT_DESCEND;
+       if (inherit)
+               flags |= PPROT_INHERIT;
+       if (procctl(idtype, id, PROC_SPROTECT, &flags) == -1)
+               err(1, "procctl");
+
+       if (argc != 0) {
+               errno = 0;
+               execvp(*argv, argv);
+               err(errno == ENOENT ? 127 : 126, "%s", *argv);
+       }
+       return (0);
+}

Modified: head/usr.bin/truss/syscall.h
==============================================================================
--- head/usr.bin/truss/syscall.h        Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/usr.bin/truss/syscall.h        Thu Sep 19 18:53:42 2013        
(r255708)
@@ -40,7 +40,7 @@ enum Argtype { None = 1, Hex, Octal, Int
        Fd_set, Sigaction, Fcntl, Mprot, Mmapflags, Whence, Readlinkres,
        Umtx, Sigset, Sigprocmask, Kevent, Sockdomain, Socktype, Open,
        Fcntlflag, Rusage, BinString, Shutdown, Resource, Rlimit, Timeval2,
-       Pathconf, Rforkflags, ExitStatus, Waitoptions, Idtype };
+       Pathconf, Rforkflags, ExitStatus, Waitoptions, Idtype, Procctl };
 
 #define        ARG_MASK        0xff
 #define        OUT     0x100

Modified: head/usr.bin/truss/syscalls.c
==============================================================================
--- head/usr.bin/truss/syscalls.c       Thu Sep 19 18:00:05 2013        
(r255707)
+++ head/usr.bin/truss/syscalls.c       Thu Sep 19 18:53:42 2013        
(r255708)
@@ -41,6 +41,7 @@ static const char rcsid[] =
 
 #include <sys/types.h>
 #include <sys/mman.h>
+#include <sys/procctl.h>
 #include <sys/ptrace.h>
 #include <sys/socket.h>
 #include <sys/time.h>
@@ -270,6 +271,8 @@ static struct syscall syscalls[] = {
        { .name = "wait6", .ret_type = 1, .nargs = 6,
          .args = { { Idtype, 0 }, { Int, 1 }, { ExitStatus | OUT, 2 },
                    { Waitoptions, 3 }, { Rusage | OUT, 4 }, { Ptr, 5 } } },
+       { .name = "procctl", .ret_type = 1, .nargs = 4,
+         .args = { { Idtype, 0 }, { Int, 1 }, { Procctl, 2 }, { Ptr, 3 } } },
        { .name = 0 },
 };
 
@@ -399,6 +402,10 @@ static struct xlat idtype_arg[] = {
        X(P_CTID) X(P_CPUID) X(P_PSETID) XEND
 };
 
+static struct xlat procctl_arg[] = {
+       X(PROC_SPROTECT) XEND
+};
+
 #undef X
 #undef XEND
 
@@ -1198,6 +1205,9 @@ print_arg(struct syscall_args *sc, unsig
        case Idtype:
                tmp = strdup(xlookup(idtype_arg, args[sc->offset]));
                break;
+       case Procctl:
+               tmp = strdup(xlookup(procctl_arg, args[sc->offset]));
+               break;
        default:
                errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK);
        }
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to