> which then causes the associated block to execute:
> 278 char *rchg1 = dd1->rchg;
> 279 long *rindex1 = dd1->rindex;
> 280
> 281 for (; off1 < lim1; off1++)
> 282 rchg1[rindex1[off1]] = 1;
> and then that's the end of the function!
> 308 return 0;
> 309 }
> It's completely relying on rchg to be filled with zeros before it's called :D
> that's definitely the issue here. ... or it looks like it to me now ...
> hopefully if i do that, then this assertion will pass and a new one will rise
> later.
Ok! So indeed wiping rchg did make that assertion pass.
@@ -839,6 +842,9 @@ private:
void do_diff()
{
auto xe = &this->xe;
+ // for now rchg is wiped as the git/xdiff algorithms assume this
+ std::memset(xe->xdf1.rchg - 1, 0, (size_t)(xe->xdf1.nrec + 2) *
sizeof(*xe->xdf1.rchg));
+ std::memset(xe->xdf2.rchg - 1, 0, (size_t)(xe->xdf2.nrec + 2) *
sizeof(*xe->xdf2.rchg));
switch (XDF_DIFF_ALG(xp.flags)) {
case XDF_PATIENCE_DIFF:
mustbe0(xdl_do_patience_diff(&xp, xe));
The next bug was a coding mistake in one of the consume loops, I wasn't walking
the linked list resulting in iterating the same node forever:
@@ -362,17 +362,20 @@ public:
/* if the classifier entry hash been consumed, recover it, then
classify the line. otherwise, classify the line, then update the line pointer
in case old lines are consumed. */
auto rchash_hi = (long) XDL_HASHLONG(crec->ha, cf->hbits);
- auto * rcrec_link = &cf.consumed_rchash[(size_t)rchash_hi];
+ xdlclass_t ** prev;
xdlclass_t * rcrec;
- while (*rcrec_link) {
- rcrec = *rcrec_link;
+ for (
+ prev = &cf.consumed_rchash[(size_t)rchash_hi];
+ (rcrec = *prev);
+ prev = &(*prev)->next
+ ) {
if (rcrec->ha == crec->ha && rcrec->size == crec->size) {
break;
};
}
- if (*rcrec_link) {
+ if (rcrec) {
assert(rcrec->next == nullptr && "hash collision in consumed
lines; should check for existing matching entry before using consumed entry");
- *rcrec_link = rcrec->next;
+ *prev = rcrec->next;
rcrec->line = crec->ptr;
That's two bugs fixed!
The next bug is a super-fun one: _a memory error_. I have so many memory checks
going on for this code. Here we have it:
$ ./diff_xdiff
[28/1799]
diff_main: Null case.
WINDOW RESIZE0=>2
consuming rec ptr 0x7f2942e00a00
consuming rec ptr 0x502000000250
WINDOW RESIZE2=>4
consuming rec ptr 0x7f2942e00a02
consuming rec ptr 0x502000000312
WINDOW RESIZE4=>8
consuming rec ptr 0x7f2942e00a04
consuming rec ptr 0x502000000334
diff_main: Equality.
WINDOW RESIZE0=>2
WINDOW RESIZE2=>4
WINDOW RESIZE4=>8
consuming rec ptr 0x7f2942e01200
consuming rec ptr 0x502000000550
consuming rec ptr 0x7f2942e01202
consuming rec ptr 0x502000000552
WINDOW RESIZE8=>16
consuming rec ptr 0x7f2942e01204
=================================================================
==15016==ERROR: AddressSanitizer: heap-use-after-free on address 0x5070000002f8
at pc 0x55d895dbd0a9 bp 0x7ffeba337450 sp 0x7ffeba337448
READ of size 8 at 0x5070000002f8 thread T0
#0 0x55d895dbd0a8 in
AsymmetricStreamingXDiff::assert_no_dangling_pointers()
/home/karl3/projects/zinc/src/diff_xdiff.cpp:765
#1 0x55d895dbf402 in
AsymmetricStreamingXDiff::extend_env(std::basic_string_view<char,
std::char_traits<char> >) /home/karl3/projects/zinc/src/diff_xdiff.cpp:818
#2 0x55d895d9667c in diff /home/karl3/projects/zinc/src/diff_xdiff.cpp:695
#3 0x55d895dec067 in
std::__n4861::coroutine_handle<zinc::__generator_promise_base<Diff,
zinc::generator<Diff, Diff, zinc::use_allocator_arg> > >::resume() const
/usr/include/c++/12/coroutine:244
#4 0x55d895dddad3 in zinc::__generator_promise_base<Diff,
zinc::generator<Diff, Diff, zinc::use_allocator_arg> >::resume()
../include/zinc/__generator.hpp:548
#5 0x55d895dcf32d in zinc::generator<Diff, Diff,
zinc::use_allocator_arg>::iterator::operator++()
../include/zinc/__generator.hpp:822
#6 0x55d895d9b7d3 in diff_main
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1029
#7 0x55d895d9dbc8 in test_diff_xdiff()
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1074
#8 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108
#9 0x7f2944a33d67 in __libc_start_call_main
../sysdeps/nptl/libc_start_call_main.h:58
#10 0x7f2944a33e24 in __libc_start_main_impl ../csu/libc-start.c:360
#11 0x55d895d8c640 in _start
(/home/karl3/projects/zinc/src/diff_xdiff+0xb4640) (BuildId:
1dcce094fc42295839b19dfd2bae3a3a7afd2442)
0x5070000002f8 is located 56 bytes inside of 80-byte region
[0x5070000002c0,0x507000000310)
freed by thread T0 here:
#0 0x7f29458f3918 in free
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:52
#1 0x55d895db5cb7 in DynamicXDFile::cha_consume_(long)
/home/karl3/projects/zinc/src/diff_xdiff.cpp:554
#2 0x55d895db331e in DynamicXDFile::consume(long)
/home/karl3/projects/zinc/src/diff_xdiff.cpp:463
#3 0x55d895d96edf in diff /home/karl3/projects/zinc/src/diff_xdiff.cpp:701
#4 0x55d895dec067 in
std::__n4861::coroutine_handle<zinc::__generator_promise_base<Diff,
zinc::generator<Diff, Diff, zinc::use_allocator_arg> > >::resume() const
/usr/include/c++/12/coroutine:244
#5 0x55d895dddad3 in zinc::__generator_promise_base<Diff,
zinc::generator<Diff, Diff, zinc::use_allocator_arg> >::resume()
../include/zinc/__generator.hpp:548
#6 0x55d895dcf32d in zinc::generator<Diff, Diff,
zinc::use_allocator_arg>::iterator::operator++()
../include/zinc/__generator.hpp:822
#7 0x55d895d9b7d3 in diff_main
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1029
#8 0x55d895d9dbc8 in test_diff_xdiff()
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1074
#9 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108
#10 0x7f2944a33d67 in __libc_start_call_main
../sysdeps/nptl/libc_start_call_main.h:58
previously allocated by thread T0 here:
#0 0x7f29458f4c77 in malloc
../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
#1 0x55d895e12542 in xdl_cha_alloc
/home/karl3/projects/zinc/third_party/xdiff/xutils.c:101
#2 0x55d895dae6d4 in DynamicXDFile::extend_pre(s_xpparam*, long,
std::basic_string_view<char, std::char_traits<char> >)
/home/karl3/projects/zinc/src/diff_xdiff.cpp:357
#3 0x55d895db947b in
AsymmetricStreamingXDiff::AsymmetricStreamingXDiff(std::basic_string_view<char,
std::char_traits<char> >, long, unsigned long)
/home/karl3/projects/zinc/src/diff_xdiff.cpp:668
#4 0x55d895d9ab24 in diff_main
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1018
#5 0x55d895d9dbc8 in test_diff_xdiff()
/home/karl3/projects/zinc/src/diff_xdiff.cpp:1074
#6 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108
#7 0x7f2944a33d67 in __libc_start_call_main
../sysdeps/nptl/libc_start_call_main.h:58
SUMMARY: AddressSanitizer: heap-use-after-free
/home/karl3/projects/zinc/src/diff_xdiff.cpp:765 in
AsymmetricStreamingXDiff::assert_no_dangling_pointers()
One of the fun things about these memory errors (now in these modern times i
seem to have stumbled on when they actually get reported) is that there are
three separate stack traces: one for violation, one for deallocation, and one
for allocation.