I have attached updated patches and ArrayUtil.java.
HC4 needed changes/optimizations in both locations.
I also found a better way to handle BT4 occasionally sending -1 as the length.
diff --git a/src/org/tukaani/xz/lz/BT4.java b/src/org/tukaani/xz/lz/BT4.java
index 6c46feb..7d78aef 100644
--- a/src/org/tukaani/xz/lz/BT4.java
+++ b/src/org/tukaani/xz/lz/BT4.java
@@ -11,6 +11,7 @@
 package org.tukaani.xz.lz;

 import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.common.ArrayUtil;

 final class BT4 extends LZEncoder {
     private final Hash234 hash;
@@ -46,6 +47,7 @@ final class BT4 extends LZEncoder {
         this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
     }

+    @Override
     public void putArraysToCache(ArrayCache arrayCache) {
         arrayCache.putArray(tree);
         hash.putArraysToCache(arrayCache);
@@ -70,6 +72,7 @@ final class BT4 extends LZEncoder {
         return avail;
     }

+    @Override
     public Matches getMatches() {
         matches.count = 0;

@@ -118,9 +121,10 @@ final class BT4 extends LZEncoder {

         // If a match was found, see how long it is.
         if (matches.count > 0) {
-            while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
-                                              == buf[readPos + lenBest])
-                ++lenBest;
+            lenBest += ArrayUtil.mismatch(buf,
+                                          readPos + lenBest - delta2,
+                                          readPos + lenBest,
+                                          matchLenLimit - lenBest);

             matches.len[matches.count - 1] = lenBest;

@@ -160,25 +164,27 @@ final class BT4 extends LZEncoder {
                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
             int len = Math.min(len0, len1);

-            if (buf[readPos + len - delta] == buf[readPos + len]) {
-                while (++len < matchLenLimit)
-                    if (buf[readPos + len - delta] != buf[readPos + len])
-                        break;
-
-                if (len > lenBest) {
-                    lenBest = len;
-                    matches.len[matches.count] = len;
-                    matches.dist[matches.count] = delta - 1;
-                    ++matches.count;
-
-                    if (len >= niceLenLimit) {
-                        tree[ptr1] = tree[pair];
-                        tree[ptr0] = tree[pair + 1];
-                        return matches;
-                    }
+            int mismatch = ArrayUtil.mismatch(buf,
+                                              readPos + len - delta,
+                                              readPos + len,
+                                              matchLenLimit - len);
+
+            len += mismatch;
+
+            if (len > lenBest) {
+                lenBest = len;
+                matches.len[matches.count] = len;
+                matches.dist[matches.count] = delta - 1;
+                ++matches.count;
+
+                if (len >= niceLenLimit) {
+                    tree[ptr1] = tree[pair];
+                    tree[ptr0] = tree[pair + 1];
+                    return matches;
                 }
             }

+
             if ((buf[readPos + len - delta] & 0xFF)
                     < (buf[readPos + len] & 0xFF)) {
                 tree[ptr1] = currentMatch;
@@ -215,18 +221,19 @@ final class BT4 extends LZEncoder {
                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
             int len = Math.min(len0, len1);

-            if (buf[readPos + len - delta] == buf[readPos + len]) {
-                // No need to look for longer matches than niceLenLimit
-                // because we only are updating the tree, not returning
-                // matches found to the caller.
-                do {
-                    if (++len == niceLenLimit) {
-                        tree[ptr1] = tree[pair];
-                        tree[ptr0] = tree[pair + 1];
-                        return;
-                    }
-                } while (buf[readPos + len - delta] == buf[readPos + len]);
+            // No need to look for longer matches than niceLenLimit
+            // because we only are updating the tree, not returning
+            // matches found to the caller.
+            int mismatch = ArrayUtil.mismatch(buf,
+                                              readPos + len - delta,
+                                              readPos + len,
+                                              niceLenLimit);
+            if (mismatch == niceLenLimit) {
+                tree[ptr1] = tree[pair];
+                tree[ptr0] = tree[pair + 1];
+                return;
             }
+            len += mismatch;

             if ((buf[readPos + len - delta] & 0xFF)
                     < (buf[readPos + len] & 0xFF)) {
@@ -243,6 +250,7 @@ final class BT4 extends LZEncoder {
         }
     }

+    @Override
     public void skip(int len) {
         while (len-- > 0) {
             int niceLenLimit = niceLen;



diff --git a/src/org/tukaani/xz/lz/HC4.java b/src/org/tukaani/xz/lz/HC4.java
index d2b4e84..0037067 100644
--- a/src/org/tukaani/xz/lz/HC4.java
+++ b/src/org/tukaani/xz/lz/HC4.java
@@ -11,6 +11,7 @@
 package org.tukaani.xz.lz;

 import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.common.ArrayUtil;

 final class HC4 extends LZEncoder {
     private final Hash234 hash;
@@ -57,6 +58,7 @@ final class HC4 extends LZEncoder {
         this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
     }

+    @Override
     public void putArraysToCache(ArrayCache arrayCache) {
         arrayCache.putArray(chain);
         hash.putArraysToCache(arrayCache);
@@ -87,6 +89,7 @@ final class HC4 extends LZEncoder {
         return avail;
     }

+    @Override
     public Matches getMatches() {
         matches.count = 0;
         int matchLenLimit = matchLenMax;
@@ -136,9 +139,12 @@ final class HC4 extends LZEncoder {

         // If a match was found, see how long it is.
         if (matches.count > 0) {
-            while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
-                                              == buf[readPos + lenBest])
-                ++lenBest;
+            // this often differs on first byte, so we call mismatch option
+            // optimized for that
+            lenBest += ArrayUtil.checkFirstMismatch(buf,
+                                                    readPos + lenBest - delta2,
+                                                    readPos + lenBest,
+                                                    matchLenLimit - lenBest);

             matches.len[matches.count - 1] = lenBest;

@@ -167,34 +173,31 @@ final class HC4 extends LZEncoder {
             currentMatch = chain[cyclicPos - delta
                                  + (delta > cyclicPos ? cyclicSize : 0)];

-            // Test the first byte and the first new byte that would give us
-            // a match that is at least one byte longer than lenBest. This
-            // too short matches get quickly skipped.
-            if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
-                    && buf[readPos - delta] == buf[readPos]) {
-                // Calculate the length of the match.
-                int len = 0;
-                while (++len < matchLenLimit)
-                    if (buf[readPos + len - delta] != buf[readPos + len])
-                        break;
-
-                // Use the match if and only if it is better than the longest
-                // match found so far.
-                if (len > lenBest) {
-                    lenBest = len;
-                    matches.len[matches.count] = len;
+            // first check the byte past current lenBest, because it often will
+            // not match, in which case the rest of the match does not matter
+            if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]) {
+                final int mismatch = ArrayUtil.mismatch(buf,
+                                                        readPos - delta,
+                                                        readPos,
+                                                        matchLenLimit);
+                // use the match only if it is better than the longest match
+                // found so far
+                if (mismatch > lenBest) {
+                    lenBest = mismatch;
+                    matches.len[matches.count] = mismatch;
                     matches.dist[matches.count] = delta - 1;
                     ++matches.count;

                     // Return if it is long enough (niceLen or reached the
                     // end of the dictionary).
-                    if (len >= niceLenLimit)
+                    if (mismatch >= niceLenLimit)
                         return matches;
                 }
             }
         }
     }

+    @Override
     public void skip(int len) {
         assert len >= 0;







index 0f13029..afa8185 100644
--- a/src/org/tukaani/xz/lz/LZEncoder.java
+++ b/src/org/tukaani/xz/lz/LZEncoder.java
@@ -10,9 +10,11 @@

 package org.tukaani.xz.lz;

-import java.io.OutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
+
 import org.tukaani.xz.ArrayCache;
+import org.tukaani.xz.common.ArrayUtil;

 public abstract class LZEncoder {
     public static final int MF_HC4 = 0x04;
@@ -334,13 +336,7 @@ public abstract class LZEncoder {
      * @return      length of the match; it is in the range [0, lenLimit]
      */
     public int getMatchLen(int dist, int lenLimit) {
-        int backPos = readPos - dist - 1;
-        int len = 0;
-
-        while (len < lenLimit && buf[readPos + len] == buf[backPos + len])
-            ++len;
-
-        return len;
+        return ArrayUtil.mismatch(buf, readPos - dist - 1, readPos, lenLimit);
     }

     /**
@@ -353,14 +349,8 @@ public abstract class LZEncoder {
      * @return      length of the match; it is in the range [0, lenLimit]
      */
     public int getMatchLen(int forward, int dist, int lenLimit) {
-        int curPos = readPos + forward;
-        int backPos = curPos - dist - 1;
-        int len = 0;
-
-        while (len < lenLimit && buf[curPos + len] == buf[backPos + len])
-            ++len;
-
-        return len;
+        final int curPos = readPos + forward;
+        return ArrayUtil.mismatch(buf, curPos - dist - 1, curPos, lenLimit);
     }

     /**




Attachment: ArrayUtil.java
Description: Binary data

Reply via email to