On Fri, 30 Mar 2018 12:47:15 +0200 I wrote:
>  That suggests that Set.h doesn't work as expected.

No, that wasn't the case.  A couple of days ago I got a core dump with exactly
the same back trace (bt), even if using std::set.

Core was generated by `/usr/sbin/famd -v -f -T 0'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x0000000000000000 in ?? ()
(gdb) bt
#0  0x0000000000000000 in ?? ()
#1  0x0000000000412aec in TCP_Client::unblock_handler (closure=0x1935d20) at 
TCP_Client.c++:270
#2  0x00000000004105ac in Scheduler::handle_io (fds=0x1c028f0, 
fds@entry=0x7ffd7bd92ef0, iotype=&Scheduler::FDInfo::read, 
    iotype@entry=&Scheduler::FDInfo::write) at Scheduler.c++:315
#3  0x00000000004107e1 in Scheduler::select () at Scheduler.c++:342
#4  0x0000000000402fa5 in loop () at Scheduler.h:89
#5  main (argc=<optimized out>, argv=0x7ffd7bd93168) at main.c++:306


Need to look more closely at ip.

(gdb) hd
Undefined command: "hd".  Try "help".
(gdb) define hd
Type commands for definition of "hd".
End with a line saying just "end".
>dump binary memory dump.tmp $arg0 $arg0+$arg1
>shell hd dump.tmp
>end

(gdb) p sizeof(Interest)
$11 = 200

(gdb) p sizeof(File)
$43 = 240

(gdb) hd (char*)ip 240
00000000  30 c1 73 02 00 00 00 00  50 56 02 02 00 00 00 00  |0.s.....PV......|
00000010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000020  e0 29 c0 01 00 00 00 00  36 30 30 30 36 38 31 31  |.)......60006811|
00000030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
000000c0  7f 00 00 01 01 00 00 00  f0 44 91 01 00 00 00 00  |.........D......|
000000d0  10 2b c0 01 00 00 00 00  01 00 00 00 00 00 00 00  |.+..............|
000000e0  f0 00 00 00 00 00 00 00  60 00 00 00 00 00 00 00  |........`.......|

We have the following layout  (from Interest.h, ClientInterest.h and `p 
&((Interest*)0).<name>'):

offset      Instance Variables              content found

00000000    _vtbl                           bad (virtual functions)
00000008    Interest *hashlink;             ??
00000010    dev_t dev;                      0
00000018    ino_t ino;                      0
00000020    char *const myname;             mixed buffer (see MYNAME below)
00000028    ScanState     scan_state: 1;    "6000?6811I?"
            ExecState cur_exec_state: 1;
            ExecState old_exec_state: 1;
00000029    char ci_char;
0000002a    char dir_char;
00000030    struct stat old_stat;
000000c0    in_addr myhost;                 127.0.0.1
000000c4    bool mypath_exported_to_host;   1 (remaining 3 bytes could have 
been set before)

000000c8    Client *myclient;               bad, 0x19144f0 (see CLIENT below)
000000d0    Request request;
000000d8    const Cred mycred;
000000e0    FileSystem *myfilesystem;
000000e8    Request fs_request;

MYNAME:
(gdb) hd 0x1c029e0 80
00000000  00 00 00 00 00 00 00 00  32 39 2e 4d 38 33 31 38  |........29.M8318|
00000010  34 37 50 32 37 31 36 38  56 30 30 30 30 30 30 30  |47P27168V0000000|
00000020  30 30 30 30 30 36 38 31  31 49 30 30 30 30 30 30  |000006811I000000|
00000030  30 30 30 30 34 45 35 33  37 32 5f 31 2e 6e 6f 72  |00004E5372_1.nor|
00000040  74 68 2c 53 3d 32 32 35  36 36 35 00 53 00 00 53  |th,S=225665.S..S|
00000050


The same is true for other Interest pointers still in to_be_scanned, $tbs0, ...:


(gdb) hd $tbs0 200
00000000  90 d6 73 02 00 00 00 00  c0 2c c0 01 00 00 00 00  |..s......,......|
00000010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000020  40 2a c0 01 00 00 00 00  36 30 30 30 36 38 31 31  |@*......60006811|
00000030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
000000c0  7f 00 00 01 01 35 34 33                           |.....543|
000000c8

(gdb) p ((Interest*)$tbs0)->myname
$13 = 0x1c02a40 ""
(gdb) hd 0x1c02a40 80
00000000  00 00 00 00 00 00 00 00  32 39 2e 4d 38 34 33 38  |........29.M8438|
00000010  38 38 50 32 37 31 37 30  56 30 30 30 30 30 30 30  |88P27170V0000000|
00000020  30 30 30 30 30 36 38 31  31 49 30 30 30 30 30 30  |000006811I000000|
00000030  30 30 30 30 34 45 35 33  38 43 5f 31 2e 6e 6f 72  |00004E538C_1.nor|
00000040  74 68 2c 53 3d 32 32 35  36 36 35 00 00 00 00 00  |th,S=225665.....|
00000050  b0 01 00 00 00 00 00 00  30 00 00 00 00 00 00 00  |........0.......|


(gdb) hd $tbs1 200
00000000  00 2b c0 01 00 00 00 00  80 2e c0 01 00 00 00 00  |.+..............|
00000010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000020  00 2c c0 01 00 00 00 00  36 30 30 30 36 38 31 31  |.,......60006811|
00000030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
000000c0  7f 00 00 01 01 35 34 33                           |.....543|
000000c8

(gdb) p ((Interest*)$tbs1)->myname
$14 = 0x1c02c00 ""
(gdb) hd 0x1c02c00 80
00000000  00 00 00 00 00 00 00 00  34 34 2e 4d 34 35 37 31  |........44.M4571|
00000010  36 50 32 37 32 30 35 56  30 30 30 30 30 30 30 30  |6P27205V00000000|
00000020  30 30 30 30 36 38 31 31  49 30 30 30 30 30 30 30  |00006811I0000000|
00000030  30 30 30 34 45 35 33 43  33 5f 31 2e 6e 6f 72 74  |0004E53C3_1.nort|
00000040  68 2c 53 3d 32 32 35 36  39 35 00 00 00 00 00 00  |h,S=225695......|
00000050  50 01 00 00 00 00 00 00  70 00 00 00 00 00 00 00  |P.......p.......|


(gdb) hd $tbs2 200
00000000  c0 2c c0 01 00 00 00 00  40 30 c0 01 00 00 00 00  |.,......@0......|
00000010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000020  c0 2d c0 01 00 00 00 00  36 30 30 36 38 31 31 49  |.-......6006811I|
00000030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
000000c0  7f 00 00 01 01 33 35 33                           |.....353|
000000c8

(gdb) p ((Interest*)$tbs2)->myname
$15 = 0x1c02dc0 ""
(gdb) hd 0x1c02dc0 80
00000000  00 00 00 00 00 00 00 00  35 39 2e 4d 36 32 36 37  |........59.M6267|
00000010  34 34 50 32 37 33 31 33  56 30 30 30 30 30 30 30  |44P27313V0000000|
00000020  30 30 30 30 30 36 38 31  31 49 30 30 30 30 30 30  |000006811I000000|
00000030  30 30 30 30 34 45 35 33  43 34 5f 31 2e 6e 6f 72  |00004E53C4_1.nor|
00000040  74 68 2c 53 3d 32 32 35  36 39 38 00 00 00 00 00  |th,S=225698.....|
00000050  50 01 00 00 00 00 00 00  30 00 00 00 00 00 00 00  |P.......0.......|


That file exists, so it was written in myname before writing 0's at
offsets 0-7 and 4b-4d
  File: 
‘1523230959.M626744P27313V0000000000006811I00000000004E53C4_1.north,S=225698:2,’
  Size: 225698          Blocks: 456        IO Block: 4096   regular file
Device: 6811h/26641d    Inode: 5133252     Links: 1
Access: (0600/-rw-------)  Uid: (  117/ courier)   Gid: (  117/ courier)
Access: 2018-04-09 01:42:39.000000000 +0200
Modify: 2018-04-09 01:42:39.000000000 +0200
Change: 2018-04-09 09:55:12.000000000 +0200
 Birth: -

That change is likely a rename by the mail server from "new" to "cur"
Maildir directories.  That renaming takes place when a mail client
accesses the IMAP folder, typically in the morning.

Core file epoch is 1523260548, 36 secs after change.  In fact, all core
dumps are in the morning:
1517563957 = 2018-02-02T10:32:37+1:00
1521020492 = 2018-03-14T10:41:32+1:00
1522227453 = 2018-03-28T09:57:33+1:00
1522402673 = 2018-03-30T10:37:53+1:00
1523260548 = 2018-04-09T09:55:48+2:00




CLIENT:
0x19144f0 is not a Client:
Its vtable doesn't match.

(gdb) info vtbl *(Client*)0x19144f0
vtable for 'Client' @ 0x414f10 (subobject @ 0x19144f0):
[0]: 0x404d30 <Directory::~Directory()>
[1]: 0x404de0 <Directory::~Directory()>
[2]: 0x403630 <ClientInterest::active() const>
[3]: 0x4049d0 <Directory::do_scan()>
[4]: 0x403650 <ClientInterest::scan(Interest*)>
[5]: 0x4036b0 <ClientInterest::unscan(Interest*)>
[6]: 0x403800 <ClientInterest::post_event(Event const&, char const*)>
[7]: 0x403bc0 <ClientInterest::get_filesystem()>


I'm not clear why member myclient could have been overwritten,
or if an Interest can be recycled from a client to another.

And, since the same pointer is repeated for various Interest's, it 
might have been a valid Client*, deleted and then overwritten.  However,
it is not $closure.  Can an Interest be scanned by multiple clients?

At any rate, ~MxClient() destroys all interests mapped in RequestMap,
but I found no attempt to dequeue an Interest to be scanned.  Perhaps,
those Interest's were enqueued, but then deleted before being scanned,
and the freed memory reused for other tasks.  When scanning resumed,
the vtable had zero at index 4, which cause sigsegv?

I further modified sources like so:


--- fam2/fam-2.7.0/build-tree/fam-2.7.0/src/Client.h    2003-01-18 
15:18:12.000000000 +0100
+++ fam/fam-2.7.0/build-tree/fam-2.7.0/src/Client.h     2018-04-12 
10:59:14.000000000 +0200
@@ -58,7 +58,7 @@ public:
     virtual bool ready_for_events() = 0;
     virtual void post_event(const Event&, Request, const char *name) = 0;
     virtual void enqueue_for_scan(Interest *) = 0;
-    virtual void dequeue_from_scan(Interest *) = 0;
+    virtual void dequeue_from_scan(Interest *) {}
     virtual void enqueue_scanner(Scanner *) = 0;
     virtual void suggest_insecure_compat(const char *) { return; }
     in_addr host() const        { return myhost; }

--- fam2/fam-2.7.0/build-tree/fam-2.7.0/src/ClientInterest.c++  2003-01-18 
15:18:12.000000000 +0100
+++ fam/fam-2.7.0/build-tree/fam-2.7.0/src/ClientInterest.c++   2018-04-11 
18:03:57.000000000 +0200
@@ -62,6 +62,8 @@ ClientInterest::ClientInterest(const cha
 ClientInterest::~ClientInterest()
 {
     myfilesystem->cancel(this, fs_request);
+    if (myclient)
+       myclient->dequeue_from_scan(this);
 }
 
 void

--- fam2/fam-2.7.0/build-tree/fam-2.7.0/src/InternalClient.c++  2003-01-18 
15:18:12.000000000 +0100
+++ fam/fam-2.7.0/build-tree/fam-2.7.0/src/InternalClient.c++   2018-04-12 
11:04:23.000000000 +0200
@@ -60,7 +60,8 @@ InternalClient::enqueue_for_scan(Interes
 void
 InternalClient::dequeue_from_scan(Interest *)
 {
-    int function_is_never_called = 0; assert(function_is_never_called);
+    // not sure, since I added this call from Interest's d'tor
+    // int function_is_never_called = 0; assert(function_is_never_called);
 }




Waiting for next core dump...

Reply via email to