Hi all,
Just a quick note; with a fletcher-4 checksum (the current version), the
following algorithm determines the position of any single-bit error.
bool
has_1bit_err(zio_cksum_t *base, zio_cksum_t *bad, size_t bufsize, int bswap)
{
uint64_t a, b, c, d;
int neg;
size_t nwords = size / sizeof (uint32_t);
uint64_t word;
uint32_t pattern;
if (base->a < bad->a) {
neg = 0;
a = bad->a - base->a;
b = bad->b - base->b;
c = bad->c - base->c;
d = bad->d - base->d;
} else {
neg = 1;
a = base->a - bad->a;
b = base->b - bad->b;
c = base->c - bad->c;
d = base->d - bad->d;
}
if (a != (uint64_t)(uint32_t)a ||
a == 0 || (a & (a - 1)) != 0)
return (0); /* high bits set, or not a power-of-2 */
if ((b & (a - 1)) != 0)
return (0); /* b not a multiple of a */
word = b / a;
if (word == 0 || word > nwords)
return (0); /* b out of range */
if (c != (word * (word + 1)) / 2 * a ||
d != ((word * (word + 1) / 2) * (word + 2) / 3) * a)
return (0); /* c and d don't match up */
pattern = bswap ? BSWAP_32((uint32_t)a) : (uint32_t)a;
printf("error is %c%lx in word %ld\n",
dir ? '-' : '+', pattern, (size_t)(nwords - word);
return (1);
}
Handling multi-bit errors would be more complicated, of course, and this
would need a slight update to be used with (mod 2^32 - 1) calculations.
In any case, this is effectively zero work; it wouldn't be hard to correct
single-bit errors when we read in a bad block, and self-heal the source.
Cheers,
- jonathan