Implement optional daemon mode for readahead. Bloatcheck without daemon mode: function old new delta readahead_main 127 123 -4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-4) Total: -4 bytes
Bloatcheck with daemon mode: function old new delta readahead_main 127 2421 +2294 .rodata 158733 159373 +640 qsort_cmp - 54 +54 packed_usage 30263 30312 +49 tree_add_item - 40 +40 move_item_to_array - 38 +38 tree_cmp - 11 +11 ------------------------------------------------------------------------------ (add/remove: 4/0 grow/shrink: 3/0 up/down: 3126/0) Total: 3126 bytes Signed-off-by: Bartosz Golaszewski <bartekg...@gmail.com> --- docs/readahead.txt | 39 +++ miscutils/Config.src | 10 + miscutils/readahead.c | 640 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 674 insertions(+), 15 deletions(-) create mode 100644 docs/readahead.txt diff --git a/docs/readahead.txt b/docs/readahead.txt new file mode 100644 index 0000000..b13170b --- /dev/null +++ b/docs/readahead.txt @@ -0,0 +1,39 @@ +Readahead applet works in two modes. If at least one file is given via the +command-line, it just calls readahead() for all the files. Otherwise it works +as a daemon. + +In daemon mode it reads config options from /etc/readahead/readahead.conf, +readahead()s all the files that have already been saved in +/etc/readahead/readahead.lst and potentially starts collecting data about +files being accessed for a configured number of seconds. It then adds all +new entires to readahead.lst and increases the number in readahead.stamp +which is then used to determine the number of data acquisition passes +already done. + +If the value in readahead.stamp is equal or greater than the value of +COLLECT_PASSES config option readahead stops collecting data and only +readahead()s the files as soon as possible. + +Config file: + +/etc/readahead/readahead.conf contains simple key = value configuration +options. + +Available options: + +COLLECT_TIME - desired time of data collection in seconds + +RAM_MAX - max memory usage for readahead in bytes (half of + available RAM by default) + +COLLECT_PASSES - number of times readahead should collect data before + switching to passive mode + +If the config file doesn't exist, readahead works using reasonable default +settings. + +It is possible to run readahead either as a regular process during system +boot, or as init in which case it will spawn a second readahead process to +start the file preload as fast as possible and then re-exec as the real +init. The kernel command-line argument readahead_init can be used to specify +the init executable different than /sbin/init. diff --git a/miscutils/Config.src b/miscutils/Config.src index d69abf1..d4cea06 100644 --- a/miscutils/Config.src +++ b/miscutils/Config.src @@ -469,6 +469,16 @@ config READAHEAD As readahead(2) blocks until each file has been read, it is best to run this applet as a background job. +config READAHEAD_DAEMON + bool "daemon mode" + default n + depends on READAHEAD + select BUNZIP2 + help + Include the readahead daemon which runs in the background, records + the list of files that are accessed during boot and readahead()s + them in subsequent system start-ups to improve the boot-speed. + config RUNLEVEL bool "runlevel" default y diff --git a/miscutils/readahead.c b/miscutils/readahead.c index e22aaa4..fa8dab7 100644 --- a/miscutils/readahead.c +++ b/miscutils/readahead.c @@ -5,43 +5,653 @@ * Preloads the given files in RAM, to reduce access time. * Does this by calling the readahead(2) system call. * - * Copyright (C) 2006 Michael Opdenacker <mich...@free-electrons.com> + * Copyright (C) 2006 Michael Opdenacker <mich...@free-electrons.com> + * Copyright (C) 2015 Bartosz Golaszewski <bartekg...@gmail.com> * * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ +//usage:#ifndef CONFIG_READAHEAD_DAEMON //usage:#define readahead_trivial_usage //usage: "[FILE]..." //usage:#define readahead_full_usage "\n\n" //usage: "Preload FILEs to RAM" +//usage:#else +//usage:#define readahead_trivial_usage +//usage: "[-f] [FILE]" +//usage:#define readahead_full_usage "\n\n" +//usage: "Preload files to RAM (as a command-line tool or as a daemon)" +//usage: "\n -f don't fork in daemon mode\n\n" +//usage: "For detailed daemon configuration see readahead.txt." +//usage:#endif #include "libbb.h" +static off_t get_filelen(int fd) +{ + off_t len; + + len = xlseek(fd, 0, SEEK_END); + xlseek(fd, 0, SEEK_SET); + + return len; +} + +#ifndef CONFIG_READAHEAD_DAEMON int readahead_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int readahead_main(int argc UNUSED_PARAM, char **argv) +#else +static int readahead_compat(char **argv) +#endif { - int retval = EXIT_SUCCESS; + int status, fd, retval = EXIT_SUCCESS; + off_t len; + +#ifndef CONFIG_READAHEAD_DAEMON + argv++; + if (!argv[0]) + return retval; +#endif + /* + * The initial version of this applet only readahead() the list of + * files passed as command-line arguments. For backwards compatibility + * in daemon mode this code is called when any additional non-option + * arguments are passed via command-line. All option arguments are + * ignored in this mode. + */ + do { + fd = open_or_warn(*argv, O_RDONLY); + if (fd >= 0) { + len = get_filelen(fd); + status = readahead(fd, 0LL, len); + close(fd); + if (status >= 0) + continue; + } + retval = EXIT_FAILURE; + } while(*++argv); + + return retval; +} + +#ifdef CONFIG_READAHEAD_DAEMON + +#include <search.h> +#include <sys/sysinfo.h> +#include <sys/signalfd.h> +#include <linux/fanotify.h> +#include <sys/fanotify.h> + +#define OPT_f (1 << 0) +#define FAN_POLL_INTERVAL 1000 +#define PARSER_FLAGS (PARSE_NORMAL & ~(PARSE_TRIM | PARSE_COLLAPSE)) + +#define RA_CONFIG "/etc/readahead/readahead.conf" +#define RA_FILE_LIST "/etc/readahead/readahead.lst.bz2" +#define RA_STAMP "/etc/readahead/readahead.stamp" + +#define BZIP2_CMD "/bin/busybox bzip2 -c >" +#define BZCAT_CMD "/bin/busybox bzcat" + +struct globals { + /* General config. */ + unsigned long ram_max; + unsigned long collect_time; + unsigned long collect_passes; + + /* Number of collect passes already completed. */ + unsigned passes_done; + + int do_collect; + + /* Root node of the file tree. */ + void *root; + /* Number of items in the tree. */ + size_t num_items; + /* Items sorted by access time. */ + struct ra_item **items_array; + /* Helper index for moving items between the tree and the array. */ + int item_index; + + int mem_exceeded; + int fan_fd; + int sig_fd; + + struct timeval start_time; + + char *rdlink_buf; +} FIX_ALIASING; + +#define G (*(struct globals*)&bb_common_bufsiz1) + +/* Set some reasonable defaults for config options in INIT_G(). */ +#define INIT_G() do { \ + memset(&G, 0, sizeof(struct globals)); \ + G.collect_time = 180; \ + G.collect_passes = 2; \ + G.do_collect = 0; \ + G.ram_max = get_totalram() / 2; \ + G.fan_fd = -1; \ + G.sig_fd = -1; \ + G.rdlink_buf = xzalloc(PATH_MAX); \ + } while (0) + +struct ra_item { + char *path; + struct timeval access_time; +}; + +static unsigned long get_totalram(void) +{ + struct sysinfo info; + + (void)sysinfo(&info); + + return info.totalram; +} + +static int file_is_regular(int fd) +{ + struct stat statbuf; + int status; + + status = fstat(fd, &statbuf); + if (status < 0) + return 0; + + return S_ISREG(statbuf.st_mode); +} + +static int qsort_cmp(const void *p1, const void *p2) +{ + const struct ra_item *i1 = *(const struct ra_item **)p1; + const struct ra_item *i2 = *(const struct ra_item **)p2; + + /* Compare first by time, then by path. */ + if (timercmp(&i1->access_time, &i2->access_time, <)) + return -1; + if (timercmp(&i1->access_time, &i2->access_time, >)) + return 1; + + return strcmp(i1->path, i2->path); +} + +static int tree_cmp(const void *p1, const void *p2) +{ + const struct ra_item *i1 = (const struct ra_item *)p1; + const struct ra_item *i2 = (const struct ra_item *)p2; + + return strcmp(i1->path, i2->path); +} + +static void tree_add_item(const struct ra_item *item) +{ + void *rv; + + rv = tsearch(item, &G.root, tree_cmp); + if (rv == NULL) + bb_perror_msg_and_die("tsearch"); /* OOM */ + G.num_items++; +} + +static void item_set_event_time(struct ra_item *item) +{ + struct timeval now; + + gettimeofday(&now, NULL); + timersub(&now, &G.start_time, &item->access_time); +} + +static void parse_config(void) +{ + char *tokens[2], *key, *val; + parser_t *parser; + int num_toks; + + parser = config_open2(RA_CONFIG, fopen_for_read); + if (!parser) + /* Don't complain, we can do without a config file. */ + return; + + while ((num_toks = config_read(parser, tokens, + 2, 1, "#=", PARSER_FLAGS))) { + if (num_toks != 2) + continue; - if (!argv[1]) { - bb_show_usage(); + key = tokens[0]; + val = tokens[1]; + trim(key); + trim(val); + + /* + * Too few configuration settings to make it worth playing + * with some advanced parsing. Just use strcmp(). + */ + if (strcmp(key, "COLLECT_TIME") == 0) { + G.collect_time = xstrtoul(val, 10); + } else if (strcmp(key, "RAM_MAX") == 0) { + G.ram_max = xstrtoul(val, 10); + } else if (strcmp(key, "COLLECT_PASSES") == 0) { + G.collect_passes = xstrtoul(val, 10); + } else { + bb_error_msg( + "ignoring unrecognized variable: '%s'", key); + } } - while (*++argv) { - int fd = open_or_warn(*argv, O_RDONLY); - if (fd >= 0) { - off_t len; - int r; + config_close(parser); +} + +static void read_stamp(void) +{ + unsigned stamp; + FILE *fp; + int rv; + + fp = fopen(RA_STAMP, "r"); + if (fp == NULL) { + G.passes_done = 0; + } else { + rv = fscanf(fp, "%u", &stamp); + if (rv != 1) + G.passes_done = 0; + else + G.passes_done = stamp; + fclose(fp); + } +} + +static void write_stamp(void) +{ + FILE *fp; + + fp = fopen(RA_STAMP, "w"); + if (fp == NULL) { + bb_perror_msg("error opening the stamp file"); + return; + } + + fprintf(fp, "%u\n", G.passes_done); + fclose(fp); +} + +#if ENABLE_FEATURE_CLEAN_UP +static void free_node(void *nodep) +{ + struct ra_item *item = (struct ra_item *)nodep; + + if (item) { + free(item->path); + free(item); + } +} + +static void file_tree_destroy(void) +{ + tdestroy(G.root, free_node); +} +#endif /* ENABLE_FEATURE_CLEAN_UP */ + +static void fork_and_exec_init(void) +{ + const char *ra_init; + pid_t pid; - /* fdlength was reported to be unreliable - use seek */ - len = xlseek(fd, 0, SEEK_END); - xlseek(fd, 0, SEEK_SET); - r = readahead(fd, 0, len); + pid = xfork(); + if (pid > 0) { + ra_init = getenv("readahead_init"); + if (!ra_init) + ra_init = "/sbin/init"; + execl(ra_init, ra_init, (char *)NULL); + bb_perror_msg_and_die( + "error executing '%s'", ra_init); + } +} + +static void daemonize(void) +{ + int status; + + /* Daemonize, but retain the console. */ + status = daemon(0, 1); + if (status < 0) + bb_perror_msg_and_die("unable to run in background"); +} + +/* + * Do the actual readahead if the list file exists before starting to collect + * data. + * + * The files are expected to be generated by readahead_collect() and are not + * checked for repetitions and ordering. They are checked however for being + * regular files. + * + * In case of open() errors the applet continues silently. + */ +static int readahead_files(void) +{ + enum { TOK_PATH = 0, TOK_SEC, TOK_USEC, _TOK_MAX }; + + int status, retval = EXIT_SUCCESS, fd, num_tok, bzrv; + char popen_cmp[sizeof(BZCAT_CMD RA_FILE_LIST) + 2]; + unsigned long len, ram_taken = 0; + char *token[_TOK_MAX], *path; + struct ra_item *item; + parser_t *parser; + long sec, usec; + FILE *fp; + + snprintf(popen_cmp, sizeof(popen_cmp), + "%s %s", BZCAT_CMD, RA_FILE_LIST); + + if (access(RA_FILE_LIST, F_OK)) + return retval; + + fp = popen(popen_cmp, "r"); + if (!fp) + bb_perror_msg_and_die("popen bzcat"); + + parser = config_from_fp(fp); + if (parser) { + while ((num_tok = config_read(parser, token, + 3, 1, "#:", PARSER_FLAGS))) { + if (str_isblank(token[0])) + continue; + + if (num_tok < 3) + bb_error_msg_and_die( + "%s: invalid format", RA_FILE_LIST); + + path = token[0]; + sec = xstrtol(token[1], 10); + usec = xstrtol(token[2], 10); + + trim(path); + + fd = open(path, O_RDONLY); + if (fd < 0) + continue; + + if (!file_is_regular(fd)) { + close(fd); + continue; + } + + if (G.mem_exceeded) + goto add_file; + + len = get_filelen(fd); + if (G.ram_max && (ram_taken + len) > G.ram_max) { + bb_error_msg("memory treshold exceeded"); + G.mem_exceeded = 1; + close(fd); + goto add_file; + } + ram_taken += len; + + status = readahead(fd, 0LL, len); close(fd); - if (r >= 0) + if (status < 0) { + bb_perror_msg("readahead(\"%s\"):", path); + retval = EXIT_FAILURE; continue; + } + +add_file: + /* + * If the file could be readahead() properly and + * we're in collecting mode - add it to the tree. + */ + if (G.do_collect) { + item = xzalloc(sizeof(struct ra_item)); + item->path = xstrdup(path); + item->access_time.tv_sec = sec; + item->access_time.tv_usec = usec; + tree_add_item(item); + } } - retval = EXIT_FAILURE; + + config_free(parser); + bzrv = pclose(fp); + if (bzrv != EXIT_SUCCESS) + bb_error_msg_and_die("error reading file list"); } return retval; } + +static int setup_fanotify(void) +{ + int fd, status, init_flags, event_flags, mark_flags, mark_mask; + + init_flags = FAN_CLOEXEC | FAN_NONBLOCK | FAN_CLASS_CONTENT; + event_flags = O_RDONLY | O_LARGEFILE; + mark_flags = FAN_MARK_ADD | FAN_MARK_MOUNT; + mark_mask = FAN_OPEN; + + fd = fanotify_init(init_flags, event_flags); + if (fd < 0) + bb_perror_msg_and_die("fanotify_init"); + + status = fanotify_mark(fd, mark_flags, mark_mask, 0, "/"); + if (status < 0) + bb_perror_msg_and_die("fanotify_mark"); + + return fd; +} + +static int setup_signalfd(void) +{ + sigset_t sigmask; + int fd, status; + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGTERM); + sigaddset(&sigmask, SIGINT); + sigaddset(&sigmask, SIGALRM); + + status = sigprocmask(SIG_BLOCK, &sigmask, NULL); + if (status < 0) + bb_perror_msg_and_die("sigprocmask"); + + fd = signalfd(-1, &sigmask, SFD_NONBLOCK | SFD_CLOEXEC); + if (fd < 0) + bb_perror_msg_and_die("signalfd"); + + return fd; +} + +static void handle_fanotify_events(void) +{ + char procpath[sizeof("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1]; + struct fanotify_event_metadata fan_data; + struct ra_item tmp_item, *item; + void *tf_ptr; + ssize_t rd; + + for (;;) { + memset(&fan_data, 0, FAN_EVENT_METADATA_LEN); + rd = read(G.fan_fd, &fan_data, FAN_EVENT_METADATA_LEN); + if (rd < 0) { + if (errno == EAGAIN) + break; /* No more events. */ + if (errno == EINTR) + continue; + + bb_perror_msg_and_die("fanotify event read"); + } + + if (!FAN_EVENT_OK(&fan_data, sizeof(fan_data)) + || fan_data.fd == FAN_NOFD) + continue; + + snprintf(procpath, sizeof(procpath), + "/proc/self/fd/%d", fan_data.fd); + + memset(G.rdlink_buf, 0, PATH_MAX); + rd = readlink(procpath, G.rdlink_buf, PATH_MAX); + if (rd < 0) { + /* Don't complain if file was just removed. */ + if (errno != ENOENT) + bb_perror_msg("readlink"); + close(fan_data.fd); + continue; + } + + /* + * Make sure we don't store deleted files. + * Ignore files in /tmp as well as it's + * usually tmpfs. + */ + if (is_suffixed_with(G.rdlink_buf, " (deleted)") + || is_prefixed_with(G.rdlink_buf, "/tmp/")) { + close(fan_data.fd); + continue; + } + + tmp_item.path = G.rdlink_buf; + tf_ptr = tfind(&tmp_item, &G.root, tree_cmp); + if (tf_ptr == NULL) { + /* New file -> add it. */ + item = xzalloc(sizeof(struct ra_item)); + item->path = xstrdup(G.rdlink_buf); + item_set_event_time(item); + tree_add_item(item); + } + + close(fan_data.fd); + } +} + +static void readahead_collect(void) +{ + enum { FD_FANOTIFY = 0, FD_SIGNAL, FD_NUM }; + + struct pollfd fds[FD_NUM]; + int status; + + G.fan_fd = setup_fanotify(); + G.sig_fd = setup_signalfd(); + + fds[FD_FANOTIFY].fd = G.fan_fd; + fds[FD_FANOTIFY].events = POLLIN; + fds[FD_SIGNAL].fd = G.sig_fd; + fds[FD_SIGNAL].events = POLLIN; + + if (G.collect_time) + alarm(G.collect_time); + + for (;;) { + status = poll(fds, FD_NUM, FAN_POLL_INTERVAL); + if (status < 0) { + if (errno == EINTR) + continue; + bb_perror_msg_and_die("poll"); + } else if (status == 0) { + continue; /* timeout */ + } + + if (fds[FD_FANOTIFY].revents) + handle_fanotify_events(); + + if (fds[FD_SIGNAL].revents) { + /* + * Any expected signal will do - don't waste time + * and code reading the event data. + */ + close(G.sig_fd); + break; + } + } + + close(G.fan_fd); +} + +static void move_item_to_array(const void *nodep, + const VISIT which, const int UNUSED_PARAM depth) +{ + struct ra_item *item = *(struct ra_item **)nodep; + + if (which == leaf || which == postorder) + G.items_array[G.item_index++] = item; +} + +static void sort_by_time(void) +{ + G.items_array = xmalloc(G.num_items * sizeof(struct ra_item *)); + twalk(G.root, move_item_to_array); + qsort(G.items_array, G.num_items, sizeof(struct ra_item *), qsort_cmp); +} + +static void save_lst_file(void) +{ + char cmd[sizeof(BZIP2_CMD RA_FILE_LIST) + 2]; + struct ra_item *item; + int i, rv; + FILE *fp; + + (void)mkdir("/etc/readahead", 0777); + + snprintf(cmd, sizeof(cmd), "%s %s", BZIP2_CMD, RA_FILE_LIST); + fp = popen(cmd, "w"); + if (!fp) + bb_perror_msg_and_die("popen saving lst file"); + + for (i = 0; i < G.num_items; i++) { + item = G.items_array[i]; + fprintf(fp, "%s:%ld:%ld\n", item->path, + item->access_time.tv_sec, item->access_time.tv_usec); + } + + rv = pclose(fp); + if (rv != EXIT_SUCCESS) + bb_error_msg_and_die("error saving to '%s'", RA_FILE_LIST); +} + +int readahead_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int readahead_main(int argc UNUSED_PARAM, char **argv) +{ + int retval = EXIT_SUCCESS; + unsigned opts; + + INIT_G(); + + opts = getopt32(argv, "f"); + argv += optind; + + if (argv[0]) + return readahead_compat(argv); + + if (getpid() == 1) { + /* + * If we are being run as init - spawn a separate process + * for readahead daemon and exec real init in pid 1. + */ + fork_and_exec_init(); + } else if (!(opts & OPT_f)) { + daemonize(); + } + + parse_config(); + read_stamp(); + if (G.passes_done < G.collect_passes) + G.do_collect = 1; + + retval = readahead_files(); + + if (G.do_collect) { + gettimeofday(&G.start_time, NULL); + readahead_collect(); + sort_by_time(); + save_lst_file(); + G.passes_done++; + write_stamp(); + + IF_FEATURE_CLEAN_UP(file_tree_destroy()); + IF_FEATURE_CLEAN_UP(free(G.items_array)); + } + + return retval; +} + +#endif /* CONFIG_READAHEAD_DAEMON */ -- 2.1.4 _______________________________________________ busybox mailing list busybox@busybox.net http://lists.busybox.net/mailman/listinfo/busybox