Hi,

On 01.03.2017 07:25, Timothy Arceri wrote:
This reduces the cache size for Deus Ex from ~160M to ~30M for
radeonsi.

I'm also seeing the following improvements in minimum fps in the
Shadow of Mordor benchmark:

no-cache:                    ~10fps
with-cache-no-compression:   ~15fps
with-cache-and-compression:  ~20fps

Note the with cache results are from the second run after closing
and opening the game to avoid the in-memory cache.

Since we only really care about decompression I went with
Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson
who has benchmarked decompression speeds.

Did he tried liblzo instead of zlib?

It should be faster than zlib while still having fairly OK compression ratio.


        - Eero

---
 configure.ac          |   4 ++
 src/util/Makefile.am  |   2 +
 src/util/disk_cache.c | 173 +++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 156 insertions(+), 23 deletions(-)

diff --git a/configure.ac b/configure.ac
index 890a379..9fde95f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -92,20 +92,21 @@ LIBVA_REQUIRED=0.38.0
 VDPAU_REQUIRED=1.1
 WAYLAND_REQUIRED=1.11
 XCB_REQUIRED=1.9.3
 XCBDRI2_REQUIRED=1.8
 XCBGLX_REQUIRED=1.8.1
 XDAMAGE_REQUIRED=1.1
 XSHMFENCE_REQUIRED=1.1
 XVMC_REQUIRED=1.0.6
 PYTHON_MAKO_REQUIRED=0.8.0
 LIBSENSORS_REQUIRED=4.0.0
+ZLIB_REQUIRED=1.2.8

 dnl LLVM versions
 LLVM_REQUIRED_GALLIUM=3.3.0
 LLVM_REQUIRED_OPENCL=3.6.0
 LLVM_REQUIRED_R600=3.6.0
 LLVM_REQUIRED_RADEONSI=3.6.0
 LLVM_REQUIRED_RADV=3.9.0
 LLVM_REQUIRED_SWR=3.6.0

 dnl Check for progs
@@ -777,20 +778,23 @@ darwin*)
     AC_CHECK_FUNCS([clock_gettime], [CLOCK_LIB=],
                    [AC_CHECK_LIB([rt], [clock_gettime], [CLOCK_LIB=-lrt],
                                  [AC_MSG_ERROR([Could not find 
clock_gettime])])])
     AC_SUBST([CLOCK_LIB])
     ;;
 esac

 dnl See if posix_memalign is available
 AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"])

+dnl Check for zlib
+PKG_CHECK_MODULES([ZLIB], [zlib >= $ZLIB_REQUIRED])
+
 dnl Check for pthreads
 AX_PTHREAD
 if test "x$ax_pthread_ok" = xno; then
     AC_MSG_ERROR([Building mesa on this platform requires pthreads])
 fi
 dnl AX_PTHREADS leaves PTHREAD_LIBS empty for gcc and sets PTHREAD_CFLAGS
 dnl to -pthread, which causes problems if we need -lpthread to appear in
 dnl pkgconfig files.  Since Android doesn't have a pthread lib, this check
 dnl is not valid for that platform.
 if test "x$android" = xno; then
diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index ae50a3b..e46d893 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -36,20 +36,22 @@ libmesautil_la_CPPFLAGS = \
        -I$(top_srcdir)/src/mesa \
        -I$(top_srcdir)/src/gallium/include \
        -I$(top_srcdir)/src/gallium/auxiliary \
        $(VISIBILITY_CFLAGS) \
        $(MSVC2013_COMPAT_CFLAGS)

 libmesautil_la_SOURCES = \
        $(MESA_UTIL_FILES) \
        $(MESA_UTIL_GENERATED_FILES)

+libmesautil_la_LIBADD = -lz
+
 roundeven_test_LDADD = -lm

 check_PROGRAMS = u_atomic_test roundeven_test
 TESTS = $(check_PROGRAMS)

 BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES)
 EXTRA_DIST = \
        format_srgb.py \
        SConscript \
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 2a0edca..03aae02 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -30,20 +30,21 @@
 #include <stdio.h>
 #include <sys/file.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <pwd.h>
 #include <errno.h>
 #include <dirent.h>
+#include "zlib.h"

 #include "util/crc32.h"
 #include "util/u_atomic.h"
 #include "util/mesa-sha1.h"
 #include "util/ralloc.h"
 #include "main/errors.h"

 #include "disk_cache.h"

 /* Number of bits to mask off from a cache key to get an index. */
@@ -638,30 +639,106 @@ disk_cache_remove(struct disk_cache *cache, cache_key 
key)
       return;
    }

    unlink(filename);
    free(filename);

    if (sb.st_size)
       p_atomic_add(cache->size, - sb.st_size);
 }

+/* From the zlib docs:
+ *    "If the memory is available, buffers sizes on the order of 128K or 256K
+ *    bytes should be used."
+ */
+#define BUFSIZE 256 * 1024
+
+/**
+ * Compresses cache entry in memeory and writes it to disk. Returns the size
+ * of the data written to disk.
+ */
+static size_t
+deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
+                          char *filename)
+{
+   unsigned char out[BUFSIZE];
+
+   /* allocate deflate state */
+   z_stream strm;
+   strm.zalloc = Z_NULL;
+   strm.zfree = Z_NULL;
+   strm.opaque = Z_NULL;
+   strm.next_in = (uint8_t *) in_data;
+   strm.avail_in = in_data_size;
+
+   int ret = deflateInit(&strm, Z_BEST_COMPRESSION);
+   if (ret != Z_OK)
+       return 0;
+
+   /* compress until end of in_data */
+   size_t compressed_size = 0;
+   int flush;
+   do {
+      int remaining = in_data_size - BUFSIZE;
+      flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
+      in_data_size -= BUFSIZE;
+
+      /* Run deflate() on input until the output buffer is not full (which
+       * means there is no more data to deflate).
+       */
+      do {
+         strm.avail_out = BUFSIZE;
+         strm.next_out = out;
+
+         ret = deflate(&strm, flush);    /* no bad return value */
+         assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
+
+         size_t have = BUFSIZE - strm.avail_out;
+         compressed_size += compressed_size + have;
+
+         size_t written = 0;
+         for (size_t len = 0; len < have; len += written) {
+            written = write(dest, out + len, have - len);
+            if (written == -1) {
+               (void)deflateEnd(&strm);
+               return 0;
+            }
+         }
+      } while (strm.avail_out == 0);
+
+      /* all input should be used */
+      assert(strm.avail_in == 0);
+
+   } while (flush != Z_FINISH);
+
+   /* stream should be complete */
+   assert(ret == Z_STREAM_END);
+
+   /* clean up and return */
+   (void)deflateEnd(&strm);
+   return compressed_size;
+}
+
+struct cache_entry_file_data {
+   uint32_t crc32;
+   uint32_t uncompressed_size;
+};
+
 void
 disk_cache_put(struct disk_cache *cache,
           cache_key key,
           const void *data,
           size_t size)
 {
    int fd = -1, fd_final = -1, err, ret;
    size_t len;
    char *filename = NULL, *filename_tmp = NULL;
-   const char *p = data;

    filename = get_cache_file(cache, key);
    if (filename == NULL)
       goto done;

    /* Write to a temporary file to allow for an atomic rename to the
     * final destination filename, (to prevent any readers from seeing
     * a partially written file).
     */
    if (asprintf(&filename_tmp, "%s.tmp", filename) == -1)
@@ -706,120 +783,170 @@ disk_cache_put(struct disk_cache *cache,
     *
     * Before we do that, if the cache is too large, evict something
     * else first.
     */
    if (*cache->size + size > cache->max_size)
       evict_random_item(cache);

    /* Create CRC of the data and store at the start of the file. We will
     * read this when restoring the cache and use it to check for corruption.
     */
-   uint32_t crc32 = util_hash_crc32(data, size);
-   size_t crc_size = sizeof(crc32);
-   for (len = 0; len < crc_size; len += ret) {
-      ret = write(fd, &crc32, crc_size - len);
+   struct cache_entry_file_data cf_data;
+   cf_data.crc32 = util_hash_crc32(data, size);
+   cf_data.uncompressed_size = size;
+
+   size_t cf_data_size = sizeof(cf_data);
+   for (len = 0; len < cf_data_size; len += ret) {
+      ret = write(fd, &cf_data, cf_data_size - len);
       if (ret == -1) {
          unlink(filename_tmp);
          goto done;
       }
    }

    /* Now, finally, write out the contents to the temporary file, then
     * rename them atomically to the destination filename, and also
     * perform an atomic increment of the total cache size.
     */
-   for (len = 0; len < size; len += ret) {
-      ret = write(fd, p + len, size - len);
-      if (ret == -1) {
-         unlink(filename_tmp);
-         goto done;
-      }
+   size_t file_size = deflate_and_write_to_disk(data, size, fd, filename_tmp);
+   if (file_size == 0) {
+      unlink(filename_tmp);
+      goto done;
    }
-
    rename(filename_tmp, filename);

-   size += crc_size;
-   p_atomic_add(cache->size, size);
+   file_size += cf_data_size;
+   p_atomic_add(cache->size, file_size);

  done:
    if (fd_final != -1)
       close(fd_final);
    /* This close finally releases the flock, (now that the final dile
     * has been renamed into place and the size has been added).
     */
    if (fd != -1)
       close(fd);
    if (filename_tmp)
       free(filename_tmp);
    if (filename)
       free(filename);
 }

+/**
+ * Decompresses cache entry, returns true if successful.
+ */
+static bool
+inflate_cache_data(uint8_t *in_data, size_t in_data_size,
+                   uint8_t *out_data, size_t out_data_size)
+{
+   z_stream strm;
+
+   /* allocate inflate state */
+   strm.zalloc = Z_NULL;
+   strm.zfree = Z_NULL;
+   strm.opaque = Z_NULL;
+   strm.next_in = in_data;
+   strm.avail_in = in_data_size;
+   strm.next_out = out_data;
+   strm.avail_out = out_data_size;
+
+   int ret = inflateInit(&strm);
+   if (ret != Z_OK)
+      return false;
+
+   ret = inflate(&strm, Z_NO_FLUSH);
+   assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
+
+   /* Unless there was an error we should have decompressed everything in one
+    * go as we know the uncompressed file size.
+    */
+   if (ret != Z_STREAM_END) {
+      (void)inflateEnd(&strm);
+      return false;
+   }
+   assert(strm.avail_out == 0);
+
+   /* clean up and return */
+   (void)inflateEnd(&strm);
+   return true;
+}
+
 void *
 disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size)
 {
    int fd = -1, ret, len;
    struct stat sb;
    char *filename = NULL;
    uint8_t *data = NULL;
+   uint8_t *uncompressed_data = NULL;

    if (size)
       *size = 0;

    filename = get_cache_file(cache, key);
    if (filename == NULL)
       goto fail;

    fd = open(filename, O_RDONLY | O_CLOEXEC);
    if (fd == -1)
       goto fail;

    if (fstat(fd, &sb) == -1)
       goto fail;

    data = malloc(sb.st_size);
    if (data == NULL)
       goto fail;

    /* Load the CRC that was created when the file was written. */
-   uint32_t crc32;
-   size_t crc_size = sizeof(crc32);
-   assert(sb.st_size > crc_size);
-   for (len = 0; len < crc_size; len += ret) {
-      ret = read(fd, &crc32 + len, crc_size - len);
+   struct cache_entry_file_data cf_data;
+   size_t cf_data_size = sizeof(cf_data);
+   assert(sb.st_size > cf_data_size);
+   for (len = 0; len < cf_data_size; len += ret) {
+      ret = read(fd, &cf_data + len, cf_data_size - len);
       if (ret == -1)
          goto fail;
    }

    /* Load the actual cache data. */
-   size_t cache_data_size = sb.st_size - crc_size;
+   size_t cache_data_size = sb.st_size - cf_data_size;
    for (len = 0; len < cache_data_size; len += ret) {
       ret = read(fd, data + len, cache_data_size - len);
       if (ret == -1)
          goto fail;
    }

+   /* Uncompress the cache data */
+   uncompressed_data = malloc(cf_data.uncompressed_size);
+   if (!inflate_cache_data(data, cache_data_size, uncompressed_data,
+                           cf_data.uncompressed_size))
+      goto fail;
+
    /* Check the data for corruption */
-   if (crc32 != util_hash_crc32(data, cache_data_size))
+   if (cf_data.crc32 != util_hash_crc32(uncompressed_data,
+                                        cf_data.uncompressed_size))
       goto fail;

+   free(data);
    free(filename);
    close(fd);

    if (size)
-      *size = cache_data_size;
+      *size = cf_data.uncompressed_size;

-   return data;
+   return uncompressed_data;

  fail:
    if (data)
       free(data);
+   if (uncompressed_data)
+      free(uncompressed_data);
    if (filename)
       free(filename);
    if (fd != -1)
       close(fd);

    return NULL;
 }

 void
 disk_cache_put_key(struct disk_cache *cache, cache_key key)


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to