The following fast_mix function, with the loop unrolling, is about 70%
slower than your proposed version, but it's still four times faster
than the original byte-based fast_mix function.  This is what I'm
considering using as a compromise.

Any comments or objections?

                                        - Ted

static void fast_mix(struct fast_pool *f, __u32 input[4])
{
        __u32           w;
        int i;
        unsigned        input_rotate = f->rotate;

#if 0
        for (i = 0; i < 4; i++) {
                w = rol32(input[i], input_rotate) ^ f->pool[i] ^
                        f->pool[(i + 3) & 3];
                f->pool[i] = (w >> 3) ^ twist_table[w & 7];
                input_rotate = (input_rotate + (i ? 7 : 14)) & 31;
        }
#else   /* loop unrolled for speed */
        w = rol32(input[0], input_rotate) ^ f->pool[0] ^ f->pool[3];
        f->pool[0] = (w >> 3) ^ twist_table[w & 7];
        input_rotate = (input_rotate + 14) & 31;
        w = rol32(input[1], input_rotate) ^ f->pool[1] ^ f->pool[0];
        f->pool[1] = (w >> 3) ^ twist_table[w & 7];
        input_rotate = (input_rotate + 7) & 31;
        w = rol32(input[2], input_rotate) ^ f->pool[2] ^ f->pool[1];
        f->pool[2] = (w >> 3) ^ twist_table[w & 7];
        input_rotate = (input_rotate + 7) & 31;
        w = rol32(input[3], input_rotate) ^ f->pool[3] ^ f->pool[2];
        f->pool[3] = (w >> 3) ^ twist_table[w & 7];
        input_rotate = (input_rotate + 7) & 31;
#endif
        f->count += 16;
        f->rotate = input_rotate;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to