From: Zhaoxiu Zeng <zhaoxiu.z...@gmail.com>

This patch does:
1. Cleanup code and reduce branches
2. Use copy_from_back to copy the matched bytes from the back output buffer

I tested on 5.8.18-300.fc33.x86_64.
The performance of function zlib_inflate is improved by about 7%.
If the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is disabled in copy_from_back.h,
the performance is improved by about 5%.

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.z...@gmail.com>
---
 lib/zlib_inflate/inffast.c | 122 ++++++-------------------------------
 1 file changed, 17 insertions(+), 105 deletions(-)

diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index ed1f3df27260..c27e45fc5335 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -4,29 +4,13 @@
  */
 
 #include <linux/zutil.h>
+#include <asm/copy_from_back.h>
 #include "inftrees.h"
 #include "inflate.h"
 #include "inffast.h"
 
 #ifndef ASMINF
 
-union uu {
-       unsigned short us;
-       unsigned char b[2];
-};
-
-/* Endian independed version */
-static inline unsigned short
-get_unaligned16(const unsigned short *p)
-{
-       union uu  mm;
-       unsigned char *b = (unsigned char *)p;
-
-       mm.b[0] = b[0];
-       mm.b[1] = b[1];
-       return mm.us;
-}
-
 /*
    Decode literal, length, and distance codes and write out the resulting
    literal and match bytes until either not enough input or output is
@@ -184,104 +168,32 @@ void inflate_fast(z_streamp strm, unsigned start)
                         state->mode = BAD;
                         break;
                     }
-                    from = window;
-                    if (write == 0) {           /* very common case */
-                        from += wsize - op;
-                        if (op < len) {         /* some from window */
-                            len -= op;
-                            do {
-                                *out++ = *from++;
-                            } while (--op);
-                            from = out - dist;  /* rest from output */
-                        }
-                    }
-                    else if (write < op) {      /* wrap around window */
-                        from += wsize + write - op;
-                        op -= write;
-                        if (op < len) {         /* some from end of window */
-                            len -= op;
-                            do {
-                                *out++ = *from++;
-                            } while (--op);
-                            from = window;
-                            if (write < len) {  /* some from start of window */
-                                op = write;
+                    from = window + write - op;
+                    if (write < op) {           /* very common case */
+                        from += wsize;
+                        if (write) {            /* wrap around window */
+                            op -= write;
+                            if (op < len) {     /* some from end of window */
                                 len -= op;
                                 do {
                                     *out++ = *from++;
                                 } while (--op);
-                                from = out - dist;      /* rest from output */
+                                from = window;  /* some from start of window */
+                                op = write;
                             }
                         }
                     }
-                    else {                      /* contiguous in window */
-                        from += write - op;
-                        if (op < len) {         /* some from window */
-                            len -= op;
-                            do {
-                                *out++ = *from++;
-                            } while (--op);
-                            from = out - dist;  /* rest from output */
-                        }
-                    }
-                    while (len > 2) {
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        len -= 3;
-                    }
-                    if (len) {
-                        *out++ = *from++;
-                        if (len > 1)
+                    if (op < len) {             /* some from window */
+                        len -= op;              /* rest from output */
+                        do {
                             *out++ = *from++;
+                        } while (--op);
+                    } else {
+                        dist = out - from;
                     }
                 }
-                else {
-                   unsigned short *sout;
-                   unsigned long loops;
-
-                    from = out - dist;          /* copy direct from output */
-                   /* minimum length is three */
-                   /* Align out addr */
-                   if (!((long)(out - 1) & 1)) {
-                       *out++ = *from++;
-                       len--;
-                   }
-                   sout = (unsigned short *)(out);
-                   if (dist > 2) {
-                       unsigned short *sfrom;
-
-                       sfrom = (unsigned short *)(from);
-                       loops = len >> 1;
-                       do
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-                           *sout++ = *sfrom++;
-#else
-                           *sout++ = get_unaligned16(sfrom++);
-#endif
-                       while (--loops);
-                       out = (unsigned char *)sout;
-                       from = (unsigned char *)sfrom;
-                   } else { /* dist == 1 or dist == 2 */
-                       unsigned short pat16;
-
-                       pat16 = *(sout-1);
-                       if (dist == 1) {
-                               union uu mm;
-                               /* copy one char pattern to both bytes */
-                               mm.us = pat16;
-                               mm.b[0] = mm.b[1];
-                               pat16 = mm.us;
-                       }
-                       loops = len >> 1;
-                       do
-                           *sout++ = pat16;
-                       while (--loops);
-                       out = (unsigned char *)sout;
-                   }
-                   if (len & 1)
-                       *out++ = *from++;
-                }
+                /* copy direct from output */
+                out = copy_from_back(out, dist, len);
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
                 this = dcode[this.val + (hold & ((1U << op) - 1))];
-- 
2.28.0


Reply via email to