> 887 mustbe0(xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0,
> dd2.nrec,
> 888 kvdf, kvdb, (xp.flags & XDF_NEED_MINIMAL)
> != 0,
> 889 &xenv));
> 878 dd1.nrec = xe->xdf1.nreff; // this is the number of
> lines in file 1
> 879 dd1.ha = xe->xdf1.ha; // this is the "hash" of
> every line. it's actually an index into a hash list, same thing
> 880 dd1.rchg = xe->xdf1.rchg; // this is the change
> vector -- the algorithm sets 1 to indicate the line does not match
> 881 dd1.rindex = xe->xdf1.rindex; // this is the vector
> of pointers to line data including the actual content
> (gdb) s
> xdl_recs_cmp (dd1=0x7ffff4b08880, off1=0, lim1=2, dd2=0x7ffff4b088c0, off2=0,
> lim2=1, kvdf=0x50c000000290, kvdb=0x50c0000002c0, need_min=1,
> xenv=0x7ffff4b08840) at
> /home/karl3/projects/zinc/third_party/xdiff/xdiffi.c:259
> 259 unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
> (gdb) list
> 254 * (marking changed lines) is done in the two boundary reaching
> checks.
> 255 */
> 256 int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
> 257 diffdata_t *dd2, long off2, long lim2,
> 258 long *kvdf, long *kvdb, int need_min, xdalgoenv_t
> *xenv) {
> 259 unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
> 260
> 261 /*
> 262 * Shrink the box by walking through each diagonal snake (SW
> and NE).
> 263 */
> (gdb) list
> 264 for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2];
> off1++, off2++);
> 265 for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] ==
> ha2[lim2 - 1]; lim1--, lim2--);
> 266
> 267 /*
> 268 * If one dimension is empty, then all records on the other
> one must
> 269 * be obviously changed.
> 270 */
> 271 if (off1 == lim1) {
> 272 char *rchg2 = dd2->rchg;
> 273 long *rindex2 = dd2->rindex;
> (gdb) list
> 274
> 275 for (; off2 < lim2; off2++)
> 276 rchg2[rindex2[off2]] = 1;
> 277 } else if (off2 == lim2) {
> 278 char *rchg1 = dd1->rchg;
> 279 long *rindex1 = dd1->rindex;
> 280
> 281 for (; off1 < lim1; off1++)
> 282 rchg1[rindex1[off1]] = 1;
> 283 } else {
> (gdb) list
> 284 xdpsplit_t spl;
> 285 spl.i1 = spl.i2 = 0;
> 286
> 287 /*
> 288 * Divide ...
> 289 */
> 290 if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf,
> kvdb,
> 291 need_min, &spl, xenv) < 0) {
> 292
> 293 return -1;
> (gdb) list
> 294 }
> 295
> 296 /*
> 297 * ... et Impera.
> 298 */
> 299 if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
> 300 kvdf, kvdb, spl.min_lo, xenv) < 0 ||
> 301 xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
> 302 kvdf, kvdb, spl.min_hi, xenv) < 0) {
> 303
> (gdb) list
> 304 return -1;
> 305 }
> 306 }
> 307
> 308 return 0;
> 309 }
> so in 264 and 265 above, it's looking for starting and ending lines that
> match, and increasing off and decreasing lim to narrow the problem domain
>
> after these lines it then runs some quick checks for unchangedness, if either
> pair of offs and lims end up touching. we won't have that condition as the
> window is only partly filled. the ending lines will mismatch; the condition
> ha1[lim1 - 1] == ha2[lim2 - 1] will be false.
>
> i guess i'd better step into that and verify i'm right!
so it turns out I'm _wrong_ . although the second loop indeed doesn't exhaust
because:
(gdb) p ha1[lim1 - 1]
$16 = 2
(gdb) p ha2[lim2 - 1]
$17 = 1
the fact the first loop exhausts still meets:
277 } else if (off2 == lim2) {
which then causes the associated block to execute:
278 char *rchg1 = dd1->rchg;
279 long *rindex1 = dd1->rindex;
280
281 for (; off1 < lim1; off1++)
282 rchg1[rindex1[off1]] = 1;
and then that's the end of the function!
308 return 0;
309 }
So basically all the function did was assign 1 to rchg1[off1 thru lim1] and
that's it!
It's completely relying on rchg to be filled with zeros before it's called :D
that's definitely the issue here. ... or it looks like it to me now ...
hopefully if i do that, then this assertion will pass and a new one will rise
later.
i'm expecting they won't all pass because the streaming window is too small
right now. but i do want it to produce outputs that are the best it could find
given that limit.