Okay, can you try following patch? It puts puts back a flag for IRQ handling. If it works, I might have an idea what's happening. I think there is some rogue interrupt disturbing the state.
If it doesn't work, can you please try to compile kernel with ATADEBUG, and set atadebug_mask (possibly via ddb during boot) to 0x40? Jaromir 2017-10-15 23:10 GMT+02:00 Chavdar Ivanov <ci4...@gmail.com>: > Sorry, it still crashes the same way. I made sure all was updated before > trying, I do have > > ident /netbsd | grep wdc > $NetBSD: atapi_wdc.c,v 1.128 2017/10/10 21:37:49 jdolecek Exp $ > $NetBSD: ata_wdc.c,v 1.108 2017/10/15 11:27:14 jdolecek Exp $ > $NetBSD: wdc_isa.c,v 1.60 2017/10/07 16:05:32 jdolecek Exp $ > $NetBSD: wdc_pcmcia.c,v 1.125 2017/10/07 16:05:33 jdolecek Exp $ > $NetBSD: wdc.c,v 1.285 2017/10/15 18:02:33 jdolecek Exp $ > > and the panic is exactly the same. > > I am sure I will sort out my problem on this particular machine if I swap > the internal SSD and the one in the DVD bay, placing the NetBSD root in the > proper place, but nevertheless the panic may indicate some other unfinished > work, so I shall keep it as it is for testing. > > Chavdar Ivanov > > On Sun, 15 Oct 2017 at 19:03 Jaromír Doleček <jaromir.dole...@gmail.com> > wrote: > >> Hi, >> >> should be fixed in rev. 1.285 of dev/ic/wdc.c, can you please check? >> >> Jaromir >> >> 2017-10-14 17:48 GMT+02:00 Chavdar Ivanov <ci4...@gmail.com>: >> >>> It still panics the same way, no difference. >>> >>> On my other laptop, an HP EliteBook, I haven't the problem at all, only >>> on the two T61p's (one of them stopped working a week ago, though). >>> >>> Chavdar Ivanov >>> >>> >>> On Sat, 14 Oct 2017 at 15:45 Jaromír Doleček <jaromir.dole...@gmail.com> >>> wrote: >>> >>>> Sorry, this fixed patch >>>> >>>> 2017-10-14 16:23 GMT+02:00 Jaromír Doleček <jaromir.dole...@gmail.com>: >>>> >>>>> Can you try attached patch? >>>>> >>>>> Jaromir >>>>> >>>>> 2017-10-11 1:04 GMT+02:00 Chavdar Ivanov <ci4...@gmail.com>: >>>>> >>>>>> The timeouts when running under VirtualBox disappeared, but of course >>>>>> the panic on my T61p remains. >>>>>> >>>>>> Chavdar Ivanov >>>>>> >>>>>> On Tue, 10 Oct 2017 at 22:40 Jaromír Doleček < >>>>>> jaromir.dole...@gmail.com> wrote: >>>>>> >>>>>>> Hey, >>>>>>> >>>>>>> can you try with dev/scsipi/atapi_wdc.c 1.128? That should resolve >>>>>>> the timeouts for atapi, at least it did for me. >>>>>>> >>>>>>> Jaromir >>>>>>> >>>>>>> 2017-10-10 8:08 GMT+02:00 Rares Aioanei <bsdlis...@gmail.com>: >>>>>>> >>>>>>>> I get that also on VBox, except it doesn't try to add cd0a as a swap >>>>>>>> device, nor does it show an endless stream of "lost interrupt" >>>>>>>> messages; eventually I get a login prompt. This is with yesterday's >>>>>>>> latest -CURRENT. >>>>>>>> >>>>>>>> On Sun, Oct 8, 2017 at 5:17 PM, Chavdar Ivanov <ci4...@gmail.com> >>>>>>>> wrote: >>>>>>>> > I tried the same kernel on a VirtualBox guest - it doesn't crash, >>>>>>>> but one >>>>>>>> > gets endless >>>>>>>> > >>>>>>>> > piixide0:1:0: lost interrupt >>>>>>>> > type: atapi tc_bcount: 0 tc_skip: 0 >>>>>>>> > >>>>>>>> > stream of messages. Also /etc/rc.d/swap2 start hangs while trying >>>>>>>> to add >>>>>>>> > /dev/cd0a as a dump device... as shown by ktruss. >>>>>>>> > >>>>>>>> > Weird. >>>>>>>> > >>>>>>>> > Chavdar >>>>>>>> > >>>>>>>> > On Sun, 8 Oct 2017 at 11:55 Chavdar Ivanov <ci4...@gmail.com> >>>>>>>> wrote: >>>>>>>> >> >>>>>>>> >> System updated about two hours ago. I am getting: >>>>>>>> >> >>>>>>>> >> .... >>>>>>>> >> wd0 at atabus0 drive 0 >>>>>>>> >> wd0: <Hitachi HTS725032A9A364> >>>>>>>> >> wd0: drive supports 16-sector PIO transfers, LBA48 addressing >>>>>>>> >> wd0: 298 GB, 620181 cyl, 16 head, 63 sec, 512 bytes/sect x >>>>>>>> 625142448 >>>>>>>> >> sectors >>>>>>>> >> piixide0:0:0: bad state 0 in wdc_ata_bio_intr >>>>>>>> >> panic: wdc_ata_bio_intr: bad state >>>>>>>> >> fatal breakpoint trap in supervisor mode >>>>>>>> >> trap type 1 code 0 rip 0xffffffff8021c0c5 cs 0x8 rflags 0x246 >>>>>>>> cr2 0 ilevel >>>>>>>> >> 0x8 rsp 0xffffe40040003c38 >>>>>>>> >> curlwp 0xffffe4013bb27840 pid 0.2 lowest kstack >>>>>>>> 0xffffe400400002c0 >>>>>>>> >> Stopped at pid 0.2 (system) at netbsd:breakpoint+0x5: leave >>>>>>>> >> db{0}> bt >>>>>>>> >> breakpoint() at netbsd:breakpoint+0x5 >>>>>>>> >> vpanic() at netbsd:vpanic+0x140 >>>>>>>> >> snprintf() at netbsd:snprintf >>>>>>>> >> wdc_ata_bio_poll() at netbsd:wdc_ata_bio_poll >>>>>>>> >> intr_biglock_wrapper() at netbsd:intr_biglock_wrapper+0x1d >>>>>>>> >> Xintr_ioapic_edge10() at netbsd:Xintr_ioapic_edge10+0xee >>>>>>>> >> --- interrupt --- >>>>>>>> >> x86_mwait() at netbsd:x86_mwait+0xd >>>>>>>> >> acpicpu_cstate_idel_enter() at netbsd:acpicpu_cstate_idle_ >>>>>>>> enter+0xdb >>>>>>>> >> acpicpu_cstate_idle() at netbsd:acpicpu_cstate_idle+0xb6 >>>>>>>> >> idle_loop() at netbsd:idle_loop+0x18c >>>>>>>> >> db{0}> >>>>>>>> >> .... >>>>>>>> >> >>>>>>>> >> (that is on my usual ThinkPad T61p). >>>>>>>> >> >>>>>>>> >> Couldn't get a crash dump. >>>>>>>> >> >>>>>>>> >> Chavdar Ivanov >>>>>>>> >> >>>>>>>> > >>>>>>>> >>>>>>> >>>>>>> >>>>> >>>> >>
? dev/ata.c.locked_reset.diff ? dev/lst ? dev/n.zip ? dev/qq Index: dev/ata/ata_subr.c =================================================================== RCS file: /cvsroot/src/sys/dev/ata/ata_subr.c,v retrieving revision 1.1 diff -u -p -r1.1 ata_subr.c --- dev/ata/ata_subr.c 10 Oct 2017 17:19:38 -0000 1.1 +++ dev/ata/ata_subr.c 16 Oct 2017 18:01:50 -0000 @@ -334,7 +334,7 @@ ata_free_xfer(struct ata_channel *chp, s /* finish the busmastering PIO */ (*wdc->piobm_done)(wdc->dma_arg, chp->ch_channel, xfer->c_drive); - chp->ch_flags &= ~(ATACH_DMA_WAIT | ATACH_PIOBM_WAIT); + chp->ch_flags &= ~(ATACH_DMA_WAIT | ATACH_PIOBM_WAIT | ATACH_IRQ_WAIT); } #endif Index: dev/ata/ata_wdc.c =================================================================== RCS file: /cvsroot/src/sys/dev/ata/ata_wdc.c,v retrieving revision 1.108 diff -u -p -r1.108 ata_wdc.c --- dev/ata/ata_wdc.c 15 Oct 2017 11:27:14 -0000 1.108 +++ dev/ata/ata_wdc.c 16 Oct 2017 18:01:50 -0000 @@ -592,7 +592,12 @@ _wdc_ata_bio_start(struct ata_channel *c intr: #endif /* Wait for IRQ (either real or polled) */ - return (ata_bio->flags & ATA_POLL) ? ATASTART_POLL : ATASTART_STARTED; + if ((ata_bio->flags & ATA_POLL) == 0) { + chp->ch_flags |= ATACH_IRQ_WAIT; + return ATASTART_STARTED; + } else { + return ATASTART_POLL; + } timeout: printf("%s:%d:%d: not ready, st=0x%02x, err=0x%02x\n", Index: dev/ata/atavar.h =================================================================== RCS file: /cvsroot/src/sys/dev/ata/atavar.h,v retrieving revision 1.94 diff -u -p -r1.94 atavar.h --- dev/ata/atavar.h 10 Oct 2017 17:19:38 -0000 1.94 +++ dev/ata/atavar.h 16 Oct 2017 18:01:50 -0000 @@ -399,6 +399,7 @@ struct ata_channel { /* Our state */ volatile int ch_flags; #define ATACH_SHUTDOWN 0x02 /* channel is shutting down */ +#define ATACH_IRQ_WAIT 0x10 /* controller is waiting for irq */ #define ATACH_DMA_WAIT 0x20 /* controller is waiting for DMA */ #define ATACH_PIOBM_WAIT 0x40 /* controller is waiting for busmastering PIO */ #define ATACH_DISABLED 0x80 /* channel is disabled */ Index: dev/ic/wdc.c =================================================================== RCS file: /cvsroot/src/sys/dev/ic/wdc.c,v retrieving revision 1.286 diff -u -p -r1.286 wdc.c --- dev/ic/wdc.c 16 Oct 2017 05:52:43 -0000 1.286 +++ dev/ic/wdc.c 16 Oct 2017 18:01:50 -0000 @@ -879,6 +879,11 @@ wdcintr(void *arg) return (0); } + if ((chp->ch_flags & ATACH_IRQ_WAIT) == 0) { + ATADEBUG_PRINT(("wdcintr: irq not expected\n"), DEBUG_INTR); + goto ignore; + } + xfer = ata_queue_get_active_xfer(chp); if (xfer == NULL) { ATADEBUG_PRINT(("wdcintr: inactive controller\n"), DEBUG_INTR); @@ -915,8 +920,11 @@ ignore: chp->ch_flags &= ~ATACH_DMA_WAIT; } #endif + chp->ch_flags &= ~ATACH_IRQ_WAIT; KASSERT(xfer->c_intr != NULL); ret = xfer->c_intr(chp, xfer, 1); + if (ret == 0) /* irq was not for us, still waiting for irq */ + chp->ch_flags |= ATACH_IRQ_WAIT; return (ret); } @@ -943,6 +951,8 @@ wdc_reset_channel(struct ata_channel *ch struct wdc_softc *wdc = CHAN_TO_WDC(chp); #endif + chp->ch_flags &= ~ATACH_IRQ_WAIT; + /* * if the current command is on an ATAPI device, issue a * ATAPI_SOFT_RESET @@ -1465,6 +1475,7 @@ __wdccommand_start(struct ata_channel *c } if ((ata_c->flags & AT_POLL) == 0) { + chp->ch_flags |= ATACH_IRQ_WAIT; /* wait for interrupt */ callout_reset(&xfer->c_timo_callout, ata_c->timeout / 1000 * hz, wdctimeout, xfer); return ATASTART_STARTED; @@ -1587,6 +1598,7 @@ again: wdc->dataout_pio(chp, drive_flags, data, bcount); ata_c->flags |= AT_XFDONE; if ((ata_c->flags & AT_POLL) == 0) { + chp->ch_flags |= ATACH_IRQ_WAIT; /* wait for interrupt */ callout_reset(&xfer->c_timo_callout, mstohz(ata_c->timeout), wdctimeout, xfer); ata_channel_unlock(chp); Index: dev/pci/pciide_common.c =================================================================== RCS file: /cvsroot/src/sys/dev/pci/pciide_common.c,v retrieving revision 1.63 diff -u -p -r1.63 pciide_common.c --- dev/pci/pciide_common.c 7 Oct 2017 16:05:33 -0000 1.63 +++ dev/pci/pciide_common.c 16 Oct 2017 18:01:50 -0000 @@ -553,6 +553,10 @@ pciide_pci_intr(void *arg) if (cp->compat) continue; + /* if this channel not waiting for intr, skip */ + if ((wdc_cp->ch_flags & ATACH_IRQ_WAIT) == 0) + continue; + crv = wdcintr(wdc_cp); if (crv == 0) ; /* leave rv alone */ Index: dev/scsipi/atapi_wdc.c =================================================================== RCS file: /cvsroot/src/sys/dev/scsipi/atapi_wdc.c,v retrieving revision 1.128 diff -u -p -r1.128 atapi_wdc.c --- dev/scsipi/atapi_wdc.c 10 Oct 2017 21:37:49 -0000 1.128 +++ dev/scsipi/atapi_wdc.c 16 Oct 2017 18:01:50 -0000 @@ -672,8 +672,10 @@ ready: if ((sc_xfer->xs_periph->periph_cap & ATAPI_CFG_DRQ_MASK) != ATAPI_CFG_IRQ_DRQ || (sc_xfer->xs_control & XS_CTL_POLL)) return ATASTART_POLL; - else + else { + chp->ch_flags |= ATACH_IRQ_WAIT; return ATASTART_STARTED; + } timeout: printf("%s:%d:%d: %s timed out\n", @@ -884,6 +886,11 @@ again: chp->ch_flags |= ATACH_DMA_WAIT; } #endif + + if ((sc_xfer->xs_control & XS_CTL_POLL) == 0) { + chp->ch_flags |= ATACH_IRQ_WAIT; + } + ata_channel_unlock(chp); return 1; @@ -917,7 +924,8 @@ again: (*wdc->piobm_start)(wdc->dma_arg, chp->ch_channel, xfer->c_drive, xfer->c_skip, len, WDC_PIOBM_XFER_IRQ); - chp->ch_flags |= ATACH_DMA_WAIT | ATACH_PIOBM_WAIT; + chp->ch_flags |= ATACH_DMA_WAIT | ATACH_IRQ_WAIT | + ATACH_PIOBM_WAIT; ata_channel_unlock(chp); return 1; } @@ -934,6 +942,9 @@ again: xfer->c_skip += len; xfer->c_bcount -= len; + if ((sc_xfer->xs_control & XS_CTL_POLL) == 0) { + chp->ch_flags |= ATACH_IRQ_WAIT; + } ata_channel_unlock(chp); return 1; @@ -967,7 +978,8 @@ again: (*wdc->piobm_start)(wdc->dma_arg, chp->ch_channel, xfer->c_drive, xfer->c_skip, len, WDC_PIOBM_XFER_IRQ); - chp->ch_flags |= ATACH_DMA_WAIT | ATACH_PIOBM_WAIT; + chp->ch_flags |= ATACH_DMA_WAIT | ATACH_IRQ_WAIT | + ATACH_PIOBM_WAIT; ata_channel_unlock(chp); return 1; } @@ -983,6 +995,9 @@ again: xfer->c_skip += len; xfer->c_bcount -= len; + if ((sc_xfer->xs_control & XS_CTL_POLL) == 0) { + chp->ch_flags |= ATACH_IRQ_WAIT; + } ata_channel_unlock(chp); return 1; @@ -1080,7 +1095,6 @@ wdc_atapi_phase_complete(struct ata_xfer } } - /* * Some drive occasionally set WDCS_ERR with * "ATA illegal length indication" in the error