The following fast_mix function, with the loop unrolling, is about 70% slower than your proposed version, but it's still four times faster than the original byte-based fast_mix function. This is what I'm considering using as a compromise.
Any comments or objections? - Ted static void fast_mix(struct fast_pool *f, __u32 input[4]) { __u32 w; int i; unsigned input_rotate = f->rotate; #if 0 for (i = 0; i < 4; i++) { w = rol32(input[i], input_rotate) ^ f->pool[i] ^ f->pool[(i + 3) & 3]; f->pool[i] = (w >> 3) ^ twist_table[w & 7]; input_rotate = (input_rotate + (i ? 7 : 14)) & 31; } #else /* loop unrolled for speed */ w = rol32(input[0], input_rotate) ^ f->pool[0] ^ f->pool[3]; f->pool[0] = (w >> 3) ^ twist_table[w & 7]; input_rotate = (input_rotate + 14) & 31; w = rol32(input[1], input_rotate) ^ f->pool[1] ^ f->pool[0]; f->pool[1] = (w >> 3) ^ twist_table[w & 7]; input_rotate = (input_rotate + 7) & 31; w = rol32(input[2], input_rotate) ^ f->pool[2] ^ f->pool[1]; f->pool[2] = (w >> 3) ^ twist_table[w & 7]; input_rotate = (input_rotate + 7) & 31; w = rol32(input[3], input_rotate) ^ f->pool[3] ^ f->pool[2]; f->pool[3] = (w >> 3) ^ twist_table[w & 7]; input_rotate = (input_rotate + 7) & 31; #endif f->count += 16; f->rotate = input_rotate; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/