We can make it look even more like liblzma :)
In my benchmark I observe no negative impact of using the functions.
Which is to say that this is still 5-7% faster than the byte-by-byte
approach.


public class CRC64 extends Check {

    private static final VarHandle INT_HANDLE =
            MethodHandles.byteArrayViewVarHandle(int[].class,
ByteOrder.nativeOrder());
    private static final VarHandle LONG_HANDLE =
            MethodHandles.byteArrayViewVarHandle(long[].class,
ByteOrder.nativeOrder());
    private static final long[][] TABLE = new long[4][256];

    private static final LongToIntFunction A1;
    private static final IntUnaryOperator A;
    private static final IntUnaryOperator B;
    private static final IntUnaryOperator C;
    private static final IntUnaryOperator D;
    private static final LongUnaryOperator S8;
    private static final LongUnaryOperator S32;
    private static final LongToIntFunction INT_OP;

    static {
        final long poly64 = 0xC96C5795D7870F42L;

        for (int s = 0; s < 4; ++s) {
            for (int b = 0; b < 256; ++b) {
                long r = s == 0 ? b : TABLE[s - 1][b];
                for (int i = 0; i < 8; ++i) {
                    if ((r & 1) == 1) {
                        r = (r >>> 1) ^ poly64;
                    } else {
                        r >>>= 1;
                    }
                }
                TABLE[s][b] = r;
            }
        }

        if (ByteOrder.BIG_ENDIAN == ByteOrder.nativeOrder()) {
            for (int s = 0; s < 4; ++s)
                for (int b = 0; b < 256; ++b)
                    TABLE[s][b] = Long.reverseBytes(TABLE[s][b]);

            A1 = x -> ((int) (x >>> 56)) & 0xFF;
            A = x -> x >>> 24;
            B = x -> (x >>> 16) & 0xFF;
            C = x -> (x >>> 8) & 0xFF;
            D = x -> x & 0xFF;
            S8 = x -> x << 8;
            S32 = x -> x << 32;
            INT_OP = x -> (int) (x >>> 32);
        } else {
            A1 = x -> ((int) x) & 0xFF;
            A = x -> x & 0xFF;
            B = x -> (x >>> 8) & 0xFF;
            C = x -> (x >>> 16) & 0xFF;
            D = x -> x >>> 24;
            S8 = x -> x >>> 8;
            S32 = x -> x >>> 32;
            INT_OP = x -> (int) x;
        }
    }

    private long crc = -1;

    public CRC64() {
        size = 8;
        name = "CRC64";
    }

    @Override
    public void update(byte[] buf, int off, int len) {
        final int end = off + len;
        int i=off;
        if (len > 7) {
            //get things aligned
            //no need to check length, as the most this can loop is 3 times
            //and the if check above guarantees at least length of 7
            while ((i & 3) != 0) {
                crc = TABLE[0][(buf[i++] & 0xFF) ^ A1.applyAsInt(crc)]
^ S8.applyAsLong(crc);
            }
            for (int j = end - 3; i < j; i += 4) {
                int tmp = INT_OP.applyAsInt(crc) ^ (int) INT_HANDLE.get(buf, i);
                crc = TABLE[3][A.applyAsInt(tmp)] ^
                      TABLE[2][B.applyAsInt(tmp)] ^
                      S32.applyAsLong(crc) ^
                      TABLE[1][C.applyAsInt(tmp)] ^
                      TABLE[0][D.applyAsInt(tmp)];
            }
        }
        while (i<end) {
            crc = TABLE[0][(buf[i++] & 0xFF) ^ A1.applyAsInt(crc)] ^
S8.applyAsLong(crc);
        }
    }

    @Override
    public byte[] finish() {
        long value = ~crc;
        crc = -1;

        byte[] buf = new byte[8];
        LONG_HANDLE.set(buf, 0, value);

        return buf;
    }
}

Reply via email to