The following commit has been merged in the master branch: commit 8e31b0f0fb6b14e605407b824f4e2f1b0c12bab6 Author: Morten Hustveit <mor...@debian.org> Date: Tue Nov 3 16:11:46 2009 +0100
dpkg: Use FIEMAP to sort .list files before scanning When running dpkg from a cold cache on a system where <admindir>/info/ lies on a hard disk, a lot of time is spent waiting for seeks between (typically) thousands of files. This patch changes the behavior of ensure_allinstfiles_available(), so that it accesses the packages in the order of their .list files' physical locations on the hard disk, greatly reducing drive head movements. The performance improvement is around 70% on my system: reinstalling a simple package takes 8 seconds instead of 27 seconds. The caches were dropped before each run, and 10 runs were done with consistent results. The performance is identical to the previous patch using FIBMAP, althought this one has the advantage of not needing root privileges. Signed-off-by: Guillem Jover <guil...@debian.org> diff --git a/configure.ac b/configure.ac index f854756..967f8c1 100644 --- a/configure.ac +++ b/configure.ac @@ -98,7 +98,7 @@ fi # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS([stddef.h error.h locale.h libintl.h kvm.h \ - sys/cdefs.h sys/syscall.h]) + sys/cdefs.h sys/syscall.h linux/fiemap.h]) DPKG_CHECK_DEFINE(TIOCNOTTY, [sys/ioctl.h]) # Checks for typedefs, structures, and compiler characteristics. diff --git a/debian/changelog b/debian/changelog index a32f4e1..f498569 100644 --- a/debian/changelog +++ b/debian/changelog @@ -148,6 +148,9 @@ dpkg (1.15.6) UNRELEASED; urgency=low * Switch SE Linux support to explicitly set path context. This fixes the mislabeling of files under <admindir> on conffile extraction or on unpack errors, due to improper default context restoration. Closes: #498438 + * Use FIEMAP when available (on Linux based systems) to sort the .list + files loading order. With a cold cache it improves up to a 70%. + Thanks to Morten Hustveit <mor...@debian.org>. [ Modestas Vainius ] * Implement symbol patterns (Closes: #563752). From now on, it is possible to diff --git a/src/filesdb.c b/src/filesdb.c index 1ae10b2..1ec8e93 100644 --- a/src/filesdb.c +++ b/src/filesdb.c @@ -22,6 +22,11 @@ #include <config.h> #include <compat.h> +#ifdef HAVE_LINUX_FIEMAP_H +#include <linux/fiemap.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#endif #include <sys/types.h> #include <sys/stat.h> @@ -39,6 +44,7 @@ #include <dpkg/dpkg-db.h> #include <dpkg/path.h> #include <dpkg/buffer.h> +#include <dpkg/pkg-array.h> #include <dpkg/progress.h> #include "filesdb.h" @@ -59,6 +65,7 @@ ensure_package_clientdata(struct pkginfo *pkg) pkg->clientdata->color = white; pkg->clientdata->fileslistvalid = 0; pkg->clientdata->files = NULL; + pkg->clientdata->listfile_phys_offs = 0; pkg->clientdata->trigprocdeferred = NULL; } @@ -254,10 +261,81 @@ ensure_packagefiles_available(struct pkginfo *pkg) pkg->clientdata->fileslistvalid= 1; } +#if defined(HAVE_LINUX_FIEMAP_H) +static int +pkg_sorter_by_listfile_phys_offs(const void *a, const void *b) +{ + const struct pkginfo *pa = *(const struct pkginfo **)a; + const struct pkginfo *pb = *(const struct pkginfo **)b; + + /* We can't simply subtract, because the difference may be greater than + * INT_MAX. */ + if (pa->clientdata->listfile_phys_offs < pb->clientdata->listfile_phys_offs) + return -1; + else + return 1; +} + +static void +pkg_files_optimize_load(struct pkg_array *array) +{ + int i; + int blocksize = 0; + + /* Sort packages by the physical location of their list files, so that + * scanning them later will minimize disk drive head movements. */ + for (i = 0; i < array->n_pkgs; i++) { + struct pkginfo *pkg = array->pkgs[i]; + struct { + struct fiemap fiemap; + struct fiemap_extent extent; + } fm; + const char *listfile; + int fd; + + ensure_package_clientdata(pkg); + + if (pkg->status == stat_notinstalled || + pkg->clientdata->listfile_phys_offs != 0) + continue; + + pkg->clientdata->listfile_phys_offs = -1; + + listfile = pkgadminfile(pkg, LISTFILE); + + fd = open(listfile, O_RDONLY); + if (fd < 0) + continue; + + if (!blocksize && ioctl(fd, FIGETBSZ, &blocksize) < 0) + break; + + memset(&fm, 0, sizeof(fm)); + fm.fiemap.fm_start = 0; + fm.fiemap.fm_length = blocksize; + fm.fiemap.fm_flags = 0; + fm.fiemap.fm_extent_count = 1; + + if (ioctl(fd, FS_IOC_FIEMAP, (unsigned long)&fm) == 0) + pkg->clientdata->listfile_phys_offs = fm.fiemap.fm_extents[0].fe_physical; + + close(fd); + } + + pkg_array_sort(array, pkg_sorter_by_listfile_phys_offs); +} +#else +static void +pkg_files_optimize_load(struct pkg_array *array) +{ +} +#endif + void ensure_allinstfiles_available(void) { - struct pkgiterator *it; + struct pkg_array array; struct pkginfo *pkg; struct progress progress; + int i; if (allpackagesdone) return; if (saidread<2) { @@ -267,14 +345,20 @@ void ensure_allinstfiles_available(void) { progress_init(&progress, _("(Reading database ... "), max); } - it= iterpkgstart(); - while ((pkg = iterpkgnext(it)) != NULL) { + pkg_array_init_from_db(&array); + + pkg_files_optimize_load(&array); + + for (i = 0; i < array.n_pkgs; i++) { + pkg = array.pkgs[i]; ensure_packagefiles_available(pkg); if (saidread == 1) progress_step(&progress); } - iterpkgend(it); + + pkg_array_destroy(&array); + allpackagesdone= 1; if (saidread==1) { diff --git a/src/main.h b/src/main.h index 869c4a1..acb1a8a 100644 --- a/src/main.h +++ b/src/main.h @@ -50,6 +50,8 @@ struct perpackagestate { struct fileinlist *files; int replacingfilesandsaid; + off_t listfile_phys_offs; + /* Non-NULL iff in trigproc.c:deferred. */ struct pkg_list *trigprocdeferred; }; -- dpkg's main repository -- To UNSUBSCRIBE, email to debian-dpkg-cvs-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org