* De: Stefan Farfeleder <[EMAIL PROTECTED]> [ Data: 2002-10-04 ] [ Subjecte: Re: Junior Kernel Hacker page updated... ] > On Thu, Oct 03, 2002 at 04:41:46PM +0200, Poul-Henning Kamp wrote: > > > > Hi Stefan, > > > > I tried this patch and it paniced my (almost-) current machine with > > a pagefault in the kqueue code: Bravo! > > > > I can see that there is some amount of #ifdef stuff in your patch, > > The #ifdefs are already in the code, namely REMOTE and RMT_WILL_WATCH. > Is anybody using them? Building with -DREMOTE doesn't compile and with > -DRMT_WILL_WATCH the linker is complaining about the lack of the > functions Rmt_Ignore(), Rmt_Watch() and Rmt_Wait(). Can't we get rid of > those defines? I understand Juli Mallett wants to rewrite make, so maybe > this effort would be wasted. > > > in light of that, would it be possible to make an #ifdef'ed version > > of your patch which we could commit ? > > Ok, the new patch is attached. Compile with -DUSE_KQUEUE to use the new > code. > > > That way we give the kqueue hackers a good testcase, and we can > > easily enable when they have solved the problem. > > After Don Lewis fixed the 'could sleep with' problem (thanks!), I'm > still encountering freezes and panics. Here's one I caught: > > [warning: parts are typed in] > %%% > Fatal trap 12: page fault while in kernel mode > cpuid = 0; lapic.id = 00000000 > fault virtual address = 0x8 > fault code = supervisor read, page not present > instruction pointer = 0x8:0xc01a1212 > stack pointer = 0x10:0xe5226c14 > frame pointer = 0x10:0xe5226ca0 > code segment = base 0x0, limit 0xfffff, type 0x1b > = DPL 0, pres 1, def32 1, gran 1 > processor eflags = interrupt enabled, resume, IOPL = 0 > current process = 56525 (make) > > kernel: type 12 trap, code = 0 > > Stopped at kqueue_scan+0x242: cmpl $0,0x8(%ebx) > db> trace > kqueue_scan(c6472bf4,4,bfbfebc0,0,c70ecea0) at kqueue_scan+0x242 > kevent(c70ecea0,e5226d10,c0351d80,418,6) at kevent+0x1e1 > syscall(2f,2f,2f,818d780,818d960) at syscall+0x2be > %%%
Run the kqueue source file through gcc with -fverbose-asm -S and then look at the resulting .s file, grep for cmpl.*0x8( and look for what's being dereferenced without being checked for NULL. > The core file doesn't seem to be particularly helpful, as the > kqueue_scan frame seems to miss: > > %%% > (kgdb) bt > #0 doadump () at /freebsd/current/src/sys/kern/kern_shutdown.c:223 > #1 0xc01ba92a in boot (howto=260) > at /freebsd/current/src/sys/kern/kern_shutdown.c:355 > #2 0xc01babe7 in panic () at /freebsd/current/src/sys/kern/kern_shutdown.c:508 > #3 0xc013b1d2 in db_panic () at /freebsd/current/src/sys/ddb/db_command.c:450 > #4 0xc013b152 in db_command (last_cmdp=0xc035db20, cmd_table=0x0, > aux_cmd_tablep=0xc03577dc, aux_cmd_tablep_end=0xc03577e0) > at /freebsd/current/src/sys/ddb/db_command.c:346 > #5 0xc013b266 in db_command_loop () > at /freebsd/current/src/sys/ddb/db_command.c:472 > #6 0xc013deca in db_trap (type=12, code=0) > at /freebsd/current/src/sys/ddb/db_trap.c:72 > #7 0xc02e9f60 in kdb_trap (type=12, code=0, regs=0xe5226bd4) > at /freebsd/current/src/sys/i386/i386/db_interface.c:166 > #8 0xc0302602 in trap_fatal (frame=0xe5226bd4, eva=0) > at /freebsd/current/src/sys/i386/i386/trap.c:841 > #9 0xc03022e2 in trap_pfault (frame=0xe5226bd4, usermode=0, eva=8) > at /freebsd/current/src/sys/i386/i386/trap.c:760 > #10 0xc0301e0d in trap (frame= > {tf_fs = 24, tf_es = 16, tf_ds = 16, tf_edi = -943606528, tf_esi = -943606488, >tf_ebp = -450728800, tf_isp = -450728960, tf_ebx = 0, tf_edx = 4, tf_ecx = 1, tf_eax >= 0, tf_trapno = 12, tf_err = 0, tf_eip = -1072033262, tf_cs = 8, tf_eflags = 66050, >tf_esp = -966356416, tf_ss = 0}) > ---Type <return> to continue, or q <return> to quit--- > at /freebsd/current/src/sys/i386/i386/trap.c:446 > #11 0xc02eb768 in calltrap () at {standard input}:99 > #12 0xc01a0ad1 in kevent (td=0xc70ecea0, uap=0xe5226d10) > at /freebsd/current/src/sys/kern/kern_event.c:470 > #13 0xc030299e in syscall (frame= > {tf_fs = 47, tf_es = 47, tf_ds = 47, tf_edi = 135845760, tf_esi = 135846240, >tf_ebp = -1077941224, tf_isp = -450728588, tf_ebx = 134831872, tf_edx = 2184, tf_ecx >= 0, tf_eax = 363, tf_trapno = 12, tf_err = 2, tf_eip = 134626551, tf_cs = 31, >tf_eflags = 514, tf_esp = -1077941348, tf_ss = 47}) > at /freebsd/current/src/sys/i386/i386/trap.c:1050 > #14 0xc02eb7bd in Xint0x80_syscall () at {standard input}:141 > ---Can't read userspace from dump, or kernel process--- > %%% > > objdump -dS kern_event.o says the following about kqueue_scan+0x242 > [==0xfd2]: > > %%% > fce: 90 nop > fcf: 90 nop > kn = TAILQ_FIRST(&kq->kq_head); > fd0: 8b 1f mov (%edi),%ebx > TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); > fd2: 83 7b 08 00 cmpl $0x0,0x8(%ebx) > fd6: 74 0b je fe3 <kqueue_scan+0x253> > fd8: 8b 53 08 mov 0x8(%ebx),%edx > fdb: 8b 43 0c mov 0xc(%ebx),%eax > fde: 89 42 0c mov %eax,0xc(%edx) > fe1: eb 06 jmp fe9 <kqueue_scan+0x259> > fe3: 8b 43 0c mov 0xc(%ebx),%eax > fe6: 89 47 04 mov %eax,0x4(%edi) > fe9: 8b 43 0c mov 0xc(%ebx),%eax > fec: 8b 53 08 mov 0x8(%ebx),%edx > fef: 89 10 mov %edx,(%eax) > if (kn == &marker) { > ff1: 8d 45 b4 lea 0xffffffb4(%ebp),%eax > ff4: 39 c3 cmp %eax,%ebx > ff6: 75 18 jne 1010 <kqueue_scan+0x280> > splx(s); > [...] > %%% > > This seems to indicate that TAILQ_FIRST returns a null pointer and > TAILQ_REMOVE tries to dereference it, doesn't it? Maybe 'count' is not 0 > though it should be? > > Regards, > Stefan Farfeleder > Index: job.c > =================================================================== > RCS file: /home/ncvs/src/usr.bin/make/job.c,v > retrieving revision 1.43 > diff -u -r1.43 job.c > --- job.c 29 Sep 2002 00:20:28 -0000 1.43 > +++ job.c 4 Oct 2002 10:58:52 -0000 > @@ -106,6 +106,7 @@ > #include <sys/stat.h> > #include <sys/file.h> > #include <sys/time.h> > +#include <sys/event.h> > #include <sys/wait.h> > #include <err.h> > #include <errno.h> > @@ -237,9 +238,13 @@ > * (2) a job can only be run locally, but > * nLocal equals maxLocal */ > #ifndef RMT_WILL_WATCH > +#ifdef USE_KQUEUE > +static int kqfd; /* File descriptor obtained by kqueue() */ > +#else > static fd_set outputs; /* Set of descriptors of pipes connected to > * the output channels of children */ > #endif > +#endif > > STATIC GNode *lastNode; /* The node for which output was most recently > * produced. */ > @@ -692,7 +697,7 @@ > if (usePipes) { > #ifdef RMT_WILL_WATCH > Rmt_Ignore(job->inPipe); > -#else > +#elif !defined(USE_KQUEUE) > FD_CLR(job->inPipe, &outputs); > #endif > if (job->outPipe != job->inPipe) { > @@ -1267,10 +1272,22 @@ > * position in the buffer to the beginning and mark another > * stream to watch in the outputs mask > */ > +#ifdef USE_KQUEUE > + struct kevent kev[2]; > +#endif > job->curPos = 0; > > #ifdef RMT_WILL_WATCH > Rmt_Watch(job->inPipe, JobLocalInput, job); > +#elif defined(USE_KQUEUE) > + EV_SET(&kev[0], job->inPipe, EVFILT_READ, EV_ADD, 0, 0, job); > + EV_SET(&kev[1], job->pid, EVFILT_PROC, EV_ADD | EV_ONESHOT, > + NOTE_EXIT, 0, NULL); > + if (kevent(kqfd, kev, 2, NULL, 0, NULL) != 0) { > + /* kevent() will fail if the job is already finished */ > + if (errno != EBADF && errno != ESRCH) > + Punt("kevent: %s", strerror(errno)); > + } > #else > FD_SET(job->inPipe, &outputs); > #endif /* RMT_WILL_WATCH */ > @@ -2229,10 +2246,16 @@ > Job_CatchOutput() > { > int nfds; > +#ifdef USE_KQUEUE > +#define KEV_SIZE 4 > + struct kevent kev[KEV_SIZE]; > + int i; > +#else > struct timeval timeout; > fd_set readfds; > LstNode ln; > Job *job; > +#endif > #ifdef RMT_WILL_WATCH > int pnJobs; /* Previous nJobs */ > #endif > @@ -2262,6 +2285,27 @@ > } > #else > if (usePipes) { > +#ifdef USE_KQUEUE > + if ((nfds = kevent(kqfd, NULL, 0, kev, KEV_SIZE, NULL)) == -1) { > + Punt("kevent: %s", strerror(errno)); > + } else { > + for (i = 0; i < nfds; i++) { > + if (kev[i].flags & EV_ERROR) { > + warnc(kev[i].data, "kevent"); > + continue; > + } > + switch (kev[i].filter) { > + case EVFILT_READ: > + JobDoOutput(kev[i].udata, FALSE); > + break; > + case EVFILT_PROC: > + /* Just wake up and let Job_CatchChildren() collect the > + * terminated job. */ > + break; > + } > + } > + } > +#else > readfds = outputs; > timeout.tv_sec = SEL_SEC; > timeout.tv_usec = SEL_USEC; > @@ -2282,6 +2326,7 @@ > } > Lst_Close(jobs); > } > +#endif /* !USE_KQUEUE */ > } > #endif /* RMT_WILL_WATCH */ > } > @@ -2408,6 +2453,12 @@ > } > if (signal(SIGWINCH, SIG_IGN) != SIG_IGN) { > (void) signal(SIGWINCH, JobPassSig); > + } > +#endif > + > +#ifdef USE_KQUEUE > + if ((kqfd = kqueue()) == -1) { > + Punt("kqueue: %s", strerror(errno)); > } > #endif > > Index: job.h > =================================================================== > RCS file: /home/ncvs/src/usr.bin/make/job.h,v > retrieving revision 1.18 > diff -u -r1.18 job.h > --- job.h 26 Sep 2002 01:39:22 -0000 1.18 > +++ job.h 4 Oct 2002 10:55:11 -0000 > @@ -50,6 +50,7 @@ > > #define TMPPAT "/tmp/makeXXXXXXXXXX" > > +#ifndef USE_KQUEUE > /* > * The SEL_ constants determine the maximum amount of time spent in select > * before coming out to see if a child has finished. SEL_SEC is the number of > @@ -57,6 +58,7 @@ > */ > #define SEL_SEC 0 > #define SEL_USEC 100000 > +#endif /* !USE_KQUEUE */ > > > /*- -- Juli Mallett <[EMAIL PROTECTED]> | FreeBSD: The Power To Serve Will break world for fulltime employment. | finger [EMAIL PROTECTED] http://people.FreeBSD.org/~jmallett/ | Support my FreeBSD hacking! To Unsubscribe: send mail to [EMAIL PROTECTED] with "unsubscribe freebsd-current" in the body of the message