Re: [apparmor] [patch] v4 - parser: add basic support for parallel compiles and loads

John Johansen Fri, 11 Dec 2015 13:28:42 -0800

revised v4 version of patch

* replace * with x in -j syntax to avoid shell processing problems
  ie. -j*4 is now -jx4


* -j now requires an option, so that -jx is not potentially confused with 
possible future -x option

* when abort_on_error is specified, have the parser driver wait for outstanding 
work before exiting, so that init doesn't have to inherit the outstanding work 
units

---

commit a841dcb8a6a77649f22cf988c971043615c9b7b4
Author: John Johansen <[email protected]>
Date:   Sat Oct 17 00:28:46 2015 -0700

    parser: add basic support for parallel compiles and loads
    
    This adds a basic support for parallel compiles. It uses a fork()/wait
    model due to the parsers current dependence on global variables and
    structures. It has been setup in a similar manner to how cilk handles
    multithreading to make it easy to port to a managed thread model once
    the parser removes the dependence on global compute structures in the
    backend.
    
    This patch adds two new command line flags
      -j or --jobs
         which follows the make syntax of specifying parallel jobs currently
         defaults to -jauto
         -j8     or  --jobs=8   allows for 8 parallel jobs
         -jauto  or  --jobs=auto        sets the jobs to the # of cpus
         -jx4    or  --jobs=x4  sets the jobs to # of cpus * 4
         -jx1 is equivalent to -jauto
    
         Note: unlike make -j must be accompanied by an option
    
    --max_jobs=n
        allows setting hard cap on the number of jobs that can be specified
        by --jobs. It defaults to the number of processors in the system * 8.
        It supports the "auto" and "max" keywords, and using *n for a
        multiple of the available cpus.
    
    additionally the -d flag has been modified to take an optional parameter
    and
      --debug=jobs
    will output debug information for the job control logic.
    
    In light testing on one machine the job control logic provides a nice
    performance boost.  On an x86 test machine with 60 profiles in the
    /etc/apparmor.d/ directory, for the command
      time apparmor_parser -QT /etc/apparmor.d/
    
      old (equiv of -j1):
         real  0m10.968s
         user  0m10.888s
         sys   0m0.088s
    
      ubuntu parallel load using xargs:
         real  0m8.003s
         user  0m21.680s
         sys   0m0.216s
    
      -j:
         real  0m6.547s
         user  0m17.900s
         sys   0m0.132s
    
    Signed-off-by: John Johansen <[email protected]>

diff --git a/parser/apparmor_parser.pod b/parser/apparmor_parser.pod
index 12c784a..4e10469 100644
--- a/parser/apparmor_parser.pod
+++ b/parser/apparmor_parser.pod
@@ -278,6 +278,29 @@ the matching stats flag.
 
 Use --help=dump to see a full list of which dump flags are supported
 
+=item -j n, --jobs=n
+
+Set the number of jobs used to compile the specified policy. Where n can
+be
+
+  #    - a specific number of jobs
+  auto - the # of cpus in the in the system
+  x#   - # * number of cpus
+
+Eg.
+  -j8     OR --jobs=8                   allows for 8 parallel jobs
+  -jauto  OR --jobs=auto                sets the jobs to the # of cpus
+  -jx4    OR --jobs=x4                  sets the jobs to # of cpus * 4
+  -jx1   is equivalent to   -jauto
+
+The default value is the number of cpus in the system.
+
+=item --max-jobs n
+
+Set a hard cap on the value that can be specified by the --jobs flag.
+It takes the same set of options available to the --jobs option, and
+defaults to 8*cpus
+
 =item -O n, --optimize=n
 
 Set the optimization flags used by policy compilation.  A single optimization
diff --git a/parser/parser_main.c b/parser/parser_main.c
index 9110076..98ceab9 100644
--- a/parser/parser_main.c
+++ b/parser/parser_main.c
@@ -36,8 +36,11 @@
 #include <limits.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/wait.h>
+
 #include <sys/apparmor.h>
 
+
 #include "lib.h"
 #include "features.h"
 #include "parser.h"
@@ -76,6 +79,18 @@ int abort_on_error = 0;                      /* stop 
processing profiles if error */
 int skip_bad_cache_rebuild = 0;
 int mru_skip_cache = 1;
 int debug_cache = 0;
+
+/* for jobs_max and jobs
+ * LONG_MAX : no limit
+ * 0  : auto  = detect system processing cores
+ * n  : use that number of processes/threads to compile policy
+ */
+#define JOBS_AUTO 0
+long jobs_max = -8;                    /* 8 * cpus */
+long jobs = JOBS_AUTO;                 /* default: number of processor cores */
+long njobs = 0;
+bool debug_jobs = false;
+
 struct timespec cache_tstamp, mru_policy_tstamp;
 
 static char *apparmorfs = NULL;
@@ -84,7 +99,7 @@ static char *cacheloc = NULL;
 static aa_features *features = NULL;
 
 /* Make sure to update BOTH the short and long_options */
-static const char *short_options = "adf:h::rRVvI:b:BCD:NSm:M:qQn:XKTWkL:O:po:";
+static const char *short_options = 
"ad::f:h::rRVvI:b:BCD:NSm:M:qQn:XKTWkL:O:po:j:";
 struct option long_options[] = {
        {"add",                 0, 0, 'a'},
        {"binary",              0, 0, 'B'},
@@ -116,7 +131,7 @@ struct option long_options[] = {
        {"purge-cache",         0, 0, 130},     /* no short option */
        {"create-cache-dir",    0, 0, 131},     /* no short option */
        {"cache-loc",           1, 0, 'L'},
-       {"debug",               0, 0, 'd'},
+       {"debug",               2, 0, 'd'},
        {"dump",                1, 0, 'D'},
        {"Dump",                1, 0, 'D'},
        {"optimize",            1, 0, 'O'},
@@ -126,6 +141,8 @@ struct option long_options[] = {
        {"skip-bad-cache-rebuild",      0, 0, 133},     /* no short option */
        {"warn",                1, 0, 134},     /* no short option */
        {"debug-cache",         0, 0, 135},     /* no short option */
+       {"jobs",                1, 0, 'j'},
+       {"max-jobs",            1, 0, 136},     /* no short option */
        {NULL, 0, 0, 0},
 };
 
@@ -170,11 +187,13 @@ static void display_usage(const char *command)
               "-v, --verbose           Show profile names as they load\n"
               "-Q, --skip-kernel-load  Do everything except loading into 
kernel\n"
               "-V, --version           Display version info and exit\n"
-              "-d, --debug             Debug apparmor definitions\n"
+              "-d [n], --debug         Debug apparmor definitions OR [n]\n"
               "-p, --preprocess        Dump preprocessed profile\n"
               "-D [n], --dump          Dump internal info for debugging\n"
               "-O [n], --Optimize      Control dfa optimizations\n"
               "-h [cmd], --help[=cmd]  Display this text or info about cmd\n"
+              "-j [n], --jobs [n]      Set the number of compile threads\n"
+              "--max-jobs [n]          Hard cap on --jobs. Default 8*cpus\n"
               "--abort-on-error        Abort processing of profiles on first 
error\n"
               "--skip-bad-cache-rebuild Do not try rebuilding the cache if it 
is rejected by the kernel\n"
               "--warn n                Enable warnings (see --help=warn)\n"
@@ -268,6 +287,32 @@ static int getopt_long_file(FILE *f, const struct option 
*longopts,
        return 0;
 }
 
+static long process_jobs_arg(const char *arg, const char *val) {
+       char *end;
+       long n;
+
+       if (!val || strcmp(val, "auto") == 0)
+               n = JOBS_AUTO;
+       else if (strcmp(val, "max") == 0)
+               n = LONG_MAX;
+       else {
+               bool multiple = false;
+               if (*val == 'x') {
+                       multiple = true;
+                       val++;
+               }
+               n = strtol(val, &end, 0);
+               if (!(*val && val != end && *end == '\0')) {
+                       PERROR("%s: Invalid option %s=%s%s\n", progname, arg, 
multiple ? "*" : "", val);
+                       exit(1);
+               }
+               if (multiple)
+                       n = n * -1;
+       }
+
+       return n;
+}
+
 /* process a single argment from getopt_long
  * Returns: 1 if an action arg, else 0
  */
@@ -286,8 +331,17 @@ static int process_arg(int c, char *optarg)
                option = OPTION_ADD;
                break;
        case 'd':
-               debug++;
-               skip_read_cache = 1;
+               if (!optarg) {
+                       debug++;
+                       skip_read_cache = 1;
+               } else if (strcmp(optarg, "jobs") == 0 ||
+                          strcmp(optarg, "j") == 0) {
+                       debug_jobs = true;
+               } else {
+                       PERROR("%s: Invalid --debug option '%s'\n",
+                              progname, optarg);
+                       exit(1);
+               }
                break;
        case 'h':
                if (!optarg) {
@@ -470,6 +524,12 @@ static int process_arg(int c, char *optarg)
        case 135:
                debug_cache = 1;
                break;
+       case 'j':
+               jobs = process_jobs_arg("-j", optarg);
+               break;
+       case 136:
+               jobs_max = process_jobs_arg("max-jobs", optarg);
+               break;
        default:
                display_usage(progname);
                exit(1);
@@ -803,6 +863,117 @@ out:
        return retval;
 }
 
+/* Do not call directly, this is a helper for work_sync, which can handle
+ * single worker cases and cases were the work queue is optimized away
+ *
+ * call only if there are work children to wait on
+ */
+#define work_sync_one(RESULT)                                          \
+do {                                                                   \
+       int status;                                                     \
+       wait(&status);                                                  \
+       if (WIFEXITED(status))                                          \
+               RESULT(WEXITSTATUS(status));                            \
+       else                                                            \
+               RESULT(ECHILD);                                         \
+       /* TODO: do we need to handle traced */                         \
+       njobs--;                                                        \
+       if (debug_jobs)                                                 \
+               fprintf(stderr, "    JOBS SYNC ONE: result %d, jobs left 
%ld\n", status, njobs);                                                        \
+} while (0)
+
+#define work_sync(RESULT)                                              \
+do {                                                                   \
+       if (debug_jobs)                                                 \
+               fprintf(stderr, "JOBS SYNC: jobs left %ld\n", njobs);   \
+       while (njobs)                                                   \
+               work_sync_one(RESULT);                                  \
+} while (0)
+
+#define work_spawn(WORK, RESULT)                                       \
+do {                                                                   \
+       if (jobs == 1) {                                                \
+               /* no parallel work so avoid fork() overhead */         \
+               RESULT(WORK);                                           \
+               break;                                                  \
+       }                                                               \
+       if (njobs == jobs) {                                            \
+               /* wait for a child */                                  \
+               if (debug_jobs)                                         \
+                       fprintf(stderr, "    JOBS SPAWN: waiting (jobs %ld == 
max %ld) ...\n", njobs, jobs);                                            \
+               work_sync_one(RESULT);                                  \
+       }                                                               \
+                                                                       \
+       pid_t child = fork();                                           \
+       if (child == 0) {                                               \
+               /* child - exit work unit with returned value */        \
+               exit(WORK);                                             \
+       } else if (child > 0) {                                         \
+               /* parent */                                            \
+               njobs++;                                                \
+               if (debug_jobs)                                         \
+                       fprintf(stderr, "    JOBS SPAWN: created %ld ...\n", 
njobs);                                                                    \
+       } else {                                                        \
+               /* error */                                             \
+               if (debug_jobs)                                         \
+                       fprintf(stderr, "    JOBS SPAWN: failed error: %d) 
...\n", errno);                                                              \
+               RESULT(errno);                                          \
+       }                                                               \
+} while (0)
+
+
+/* sadly C forces us to do this with exit, long_jump or returning error
+ * from work_spawn and work_sync. We could throw a C++ exception, is it
+ * worth doing it to avoid the exit here.
+ *
+ * atm not all resources maybe cleanedup at exit
+ */
+int last_error = 0;
+void handle_work_result(int retval)
+{
+       if (retval) {
+               last_error = retval;
+               if (abort_on_error) {
+                       /* already in abort mode we don't need subsequent
+                        * syncs to do this too
+                        */
+                       abort_on_error = 0;
+                       work_sync(handle_work_result);
+                       exit(last_error);
+
+               }
+       }
+}
+
+static long compute_jobs(long n, long j)
+{
+       if (j == JOBS_AUTO)
+               j = n;
+       else if (j < 0)
+               j = n * j * -1;
+       return j;
+}
+
+static void setup_parallel_compile(void)
+{
+       /* jobs and paralell_max set by default, config or args */
+       long n = sysconf(_SC_NPROCESSORS_ONLN);
+       if (n == -1)
+               /* unable to determine number of processors, default to 1 */
+               n = 1;
+       jobs = compute_jobs(n, jobs);
+       jobs_max = compute_jobs(n, jobs_max);
+
+       if (jobs > jobs_max) {
+               pwarn("%s: Warning caping number of jobs to %ld * # of cpus == 
'%ld'",
+                     progname, jobs_max, jobs);
+               jobs = jobs_max;
+       }
+       njobs = 0;
+       if (debug_jobs)
+               fprintf(stderr, "jobs: %ld\n", jobs);
+}
+
 struct dir_cb_data {
        aa_kernel_interface *kernel_interface;
        const char *dirname;    /* name of the parent dir */
@@ -820,8 +991,9 @@ static int profile_dir_cb(int dirfd unused, const char 
*name, struct stat *st,
                autofree char *path = NULL;
                if (asprintf(&path, "%s/%s", cb_data->dirname, name) < 0)
                        PERROR(_("Out of memory"));
-               rc = process_profile(option, cb_data->kernel_interface, path,
-                                    cb_data->cachedir);
+               work_spawn(process_profile(option, cb_data->kernel_interface,
+                                          path, cb_data->cachedir),
+                          handle_work_result);
        }
        return rc;
 }
@@ -837,7 +1009,9 @@ static int binary_dir_cb(int dirfd unused, const char 
*name, struct stat *st,
                autofree char *path = NULL;
                if (asprintf(&path, "%s/%s", cb_data->dirname, name) < 0)
                        PERROR(_("Out of memory"));
-               rc = process_binary(option, cb_data->kernel_interface, path);
+               work_spawn(process_binary(option, cb_data->kernel_interface,
+                                          path),
+                           handle_work_result);
        }
        return rc;
 }
@@ -861,7 +1035,7 @@ int main(int argc, char *argv[])
 {
        aa_kernel_interface *kernel_interface = NULL;
        aa_policy_cache *policy_cache = NULL;
-       int retval, last_error;
+       int retval;
        int i;
        int optind;
 
@@ -873,6 +1047,8 @@ int main(int argc, char *argv[])
        process_config_file("/etc/apparmor/parser.conf");
        optind = process_args(argc, argv);
 
+       setup_parallel_compile();
+
        setlocale(LC_MESSAGES, "");
        bindtextdomain(PACKAGE, LOCALEDIR);
        textdomain(PACKAGE);
@@ -939,6 +1115,7 @@ int main(int argc, char *argv[])
                }
        }
 
+
        retval = last_error = 0;
        for (i = optind; i <= argc; i++) {
                struct stat stat_file;
@@ -973,22 +1150,19 @@ int main(int argc, char *argv[])
                                       profilename);
                        }
                } else if (binary_input) {
-                       retval = process_binary(option, kernel_interface,
-                                               profilename);
+                       work_spawn(process_binary(option, kernel_interface,
+                                                 profilename),
+                                  handle_work_result);
                } else {
-                       retval = process_profile(option, kernel_interface,
-                                                profilename, cacheloc);
+                       work_spawn(process_profile(option, kernel_interface,
+                                                  profilename, cacheloc),
+                                  handle_work_result);
                }
 
                if (profilename) free(profilename);
                profilename = NULL;
-
-               if (retval) {
-                       last_error = retval;
-                       if (abort_on_error)
-                               break;
-               }
        }
+       work_sync(handle_work_result);
 
        if (ofile)
                fclose(ofile);



-- 
AppArmor mailing list
[email protected]
Modify settings or unsubscribe at: 
https://lists.ubuntu.com/mailman/listinfo/apparmor

Re: [apparmor] [patch] v4 - parser: add basic support for parallel compiles and loads

Reply via email to