Hi all, Just a quick note; with a fletcher-4 checksum (the current version), the following algorithm determines the position of any single-bit error.
bool has_1bit_err(zio_cksum_t *base, zio_cksum_t *bad, size_t bufsize, int bswap) { uint64_t a, b, c, d; int neg; size_t nwords = size / sizeof (uint32_t); uint64_t word; uint32_t pattern; if (base->a < bad->a) { neg = 0; a = bad->a - base->a; b = bad->b - base->b; c = bad->c - base->c; d = bad->d - base->d; } else { neg = 1; a = base->a - bad->a; b = base->b - bad->b; c = base->c - bad->c; d = base->d - bad->d; } if (a != (uint64_t)(uint32_t)a || a == 0 || (a & (a - 1)) != 0) return (0); /* high bits set, or not a power-of-2 */ if ((b & (a - 1)) != 0) return (0); /* b not a multiple of a */ word = b / a; if (word == 0 || word > nwords) return (0); /* b out of range */ if (c != (word * (word + 1)) / 2 * a || d != ((word * (word + 1) / 2) * (word + 2) / 3) * a) return (0); /* c and d don't match up */ pattern = bswap ? BSWAP_32((uint32_t)a) : (uint32_t)a; printf("error is %c%lx in word %ld\n", dir ? '-' : '+', pattern, (size_t)(nwords - word); return (1); } Handling multi-bit errors would be more complicated, of course, and this would need a slight update to be used with (mod 2^32 - 1) calculations. In any case, this is effectively zero work; it wouldn't be hard to correct single-bit errors when we read in a bad block, and self-heal the source. Cheers, - jonathan