From: Minchan Kim <minc...@kernel.org>

Now squashfs have used for only one stream buffer for decompression
so it hurts parallel read performance so this patch supports
multiple decompressor to enhance performance parallel I/O.

Four 1G file dd read on KVM machine which has 2 CPU and 4G memory.

dd if=test/test1.dat of=/dev/null &
dd if=test/test2.dat of=/dev/null &
dd if=test/test3.dat of=/dev/null &
dd if=test/test4.dat of=/dev/null &

old : 1m39s -> new : 9s

* From v1
  * Change comp_strm with decomp_strm - Phillip
  * Change/add comments - Phillip

Signed-off-by: Minchan Kim <minc...@kernel.org>
Signed-off-by: Phillip Lougher <phil...@squashfs.org.uk>
---
 fs/squashfs/Kconfig              |   13 +++
 fs/squashfs/Makefile             |    9 +-
 fs/squashfs/decompressor_multi.c |  200 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 221 insertions(+), 1 deletion(-)
 create mode 100644 fs/squashfs/decompressor_multi.c

diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index c70111e..1c6d340 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -63,6 +63,19 @@ config SQUASHFS_LZO
 
          If unsure, say N.
 
+config SQUASHFS_MULTI_DECOMPRESSOR
+       bool "Use multiple decompressors for handling parallel I/O"
+       depends on SQUASHFS
+       help
+         By default Squashfs uses a single decompressor but it gives
+         poor performance on parallel I/O workloads when using multiple CPU
+         machines due to waiting on decompressor availability.
+
+         If you have a parallel I/O workload and your system has enough memory,
+         using this option may improve overall I/O performance.
+
+         If unsure, say N.
+
 config SQUASHFS_XZ
        bool "Include support for XZ compressed file systems"
        depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index c223c84..dfebc3b 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -4,8 +4,15 @@
 
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
-squashfs-y += namei.o super.o symlink.o decompressor.o decompressor_single.o
+squashfs-y += namei.o super.o symlink.o decompressor.o
+
 squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
 squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
+
+ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR
+       squashfs-y              += decompressor_multi.o
+else
+       squashfs-y              += decompressor_single.o
+endif
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
new file mode 100644
index 0000000..462731d
--- /dev/null
+++ b/fs/squashfs/decompressor_multi.c
@@ -0,0 +1,200 @@
+/*
+ *  Copyright (c) 2013
+ *  Minchan Kim <minc...@kernel.org>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression in the
+ * decompressor framework
+ */
+
+
+/*
+ * The reason that multiply two is that a CPU can request new I/O
+ * while it is waiting previous request.
+ */
+#define MAX_DECOMPRESSOR       (num_online_cpus() * 2)
+
+
+int squashfs_max_decompressors(void)
+{
+       return MAX_DECOMPRESSOR;
+}
+
+
+struct squashfs_stream {
+       void                    *comp_opts;
+       struct list_head        strm_list;
+       struct mutex            mutex;
+       int                     avail_decomp;
+       wait_queue_head_t       wait;
+};
+
+
+struct decomp_stream {
+       void *stream;
+       struct list_head list;
+};
+
+
+static void put_decomp_stream(struct decomp_stream *decomp_strm,
+                               struct squashfs_stream *stream)
+{
+       mutex_lock(&stream->mutex);
+       list_add(&decomp_strm->list, &stream->strm_list);
+       mutex_unlock(&stream->mutex);
+       wake_up(&stream->wait);
+}
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                               void *comp_opts)
+{
+       struct squashfs_stream *stream;
+       struct decomp_stream *decomp_strm = NULL;
+       int err = -ENOMEM;
+
+       stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+       if (!stream)
+               goto out;
+
+       stream->comp_opts = comp_opts;
+       mutex_init(&stream->mutex);
+       INIT_LIST_HEAD(&stream->strm_list);
+       init_waitqueue_head(&stream->wait);
+
+       /*
+        * We should have a decompressor at least as default
+        * so if we fail to allocate new decompressor dynamically,
+        * we could always fall back to default decompressor and
+        * file system works.
+        */
+       decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+       if (!decomp_strm)
+               goto out;
+
+       decomp_strm->stream = msblk->decompressor->init(msblk,
+                                               stream->comp_opts);
+       if (IS_ERR(decomp_strm->stream)) {
+               err = PTR_ERR(decomp_strm->stream);
+               goto out;
+       }
+
+       list_add(&decomp_strm->list, &stream->strm_list);
+       stream->avail_decomp = 1;
+       return stream;
+
+out:
+       kfree(decomp_strm);
+       kfree(stream);
+       return ERR_PTR(err);
+}
+
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+       struct squashfs_stream *stream = msblk->stream;
+       if (stream) {
+               struct decomp_stream *decomp_strm;
+
+               while (!list_empty(&stream->strm_list)) {
+                       decomp_strm = list_entry(stream->strm_list.prev,
+                                               struct decomp_stream, list);
+                       list_del(&decomp_strm->list);
+                       msblk->decompressor->free(decomp_strm->stream);
+                       kfree(decomp_strm);
+                       stream->avail_decomp--;
+               }
+       }
+
+       WARN_ON(stream->avail_decomp);
+       kfree(stream->comp_opts);
+       kfree(stream);
+}
+
+
+static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
+                                       struct squashfs_stream *stream)
+{
+       struct decomp_stream *decomp_strm;
+
+       while (1) {
+               mutex_lock(&stream->mutex);
+
+               /* There is available decomp_stream */
+               if (!list_empty(&stream->strm_list)) {
+                       decomp_strm = list_entry(stream->strm_list.prev,
+                               struct decomp_stream, list);
+                       list_del(&decomp_strm->list);
+                       mutex_unlock(&stream->mutex);
+                       break;
+               }
+
+               /*
+                * If there is no available decomp and already full,
+                * let's wait for releasing decomp from other users.
+                */
+               if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+                       goto wait;
+
+               /* Let's allocate new decomp */
+               decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+               if (!decomp_strm)
+                       goto wait;
+
+               decomp_strm->stream = msblk->decompressor->init(msblk,
+                                               stream->comp_opts);
+               if (IS_ERR(decomp_strm->stream)) {
+                       kfree(decomp_strm);
+                       goto wait;
+               }
+
+               stream->avail_decomp++;
+               WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+
+               mutex_unlock(&stream->mutex);
+               break;
+wait:
+               /*
+                * If system memory is tough, let's for other's
+                * releasing instead of hurting VM because it could
+                * make page cache thrashing.
+                */
+               mutex_unlock(&stream->mutex);
+               wait_event(stream->wait,
+                       !list_empty(&stream->strm_list));
+       }
+
+       return decomp_strm;
+}
+
+
+int squashfs_decompress(struct squashfs_sb_info *msblk,
+       void **buffer, struct buffer_head **bh, int b, int offset, int length,
+       int srclength, int pages)
+{
+       int res;
+       struct squashfs_stream *stream = msblk->stream;
+       struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
+       res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
+               buffer, bh, b, offset, length, srclength, pages);
+       put_decomp_stream(decomp_stream, stream);
+       if (res < 0)
+               ERROR("%s decompression failed, data probably corrupt\n",
+                       msblk->decompressor->name);
+       return res;
+}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to