Okay, can you try following patch? It puts puts back a flag for IRQ
handling. If it works, I might have an idea what's happening. I think there
is some rogue interrupt disturbing the state.

If it doesn't work, can you please try to compile kernel with ATADEBUG, and
set atadebug_mask (possibly via ddb during boot) to 0x40?

Jaromir

2017-10-15 23:10 GMT+02:00 Chavdar Ivanov <ci4...@gmail.com>:

> Sorry, it still crashes the same way. I made sure all was updated before
> trying, I do have
>
> ident /netbsd  | grep wdc
>      $NetBSD: atapi_wdc.c,v 1.128 2017/10/10 21:37:49 jdolecek Exp $
>      $NetBSD: ata_wdc.c,v 1.108 2017/10/15 11:27:14 jdolecek Exp $
>      $NetBSD: wdc_isa.c,v 1.60 2017/10/07 16:05:32 jdolecek Exp $
>      $NetBSD: wdc_pcmcia.c,v 1.125 2017/10/07 16:05:33 jdolecek Exp $
>      $NetBSD: wdc.c,v 1.285 2017/10/15 18:02:33 jdolecek Exp $
>
> and the panic is exactly the same.
>
> I am sure I will sort out my problem on this particular machine if I swap
> the internal SSD and the one in the DVD bay, placing the NetBSD root in the
> proper place, but nevertheless the panic may indicate some other unfinished
> work, so I shall keep it as it is for testing.
>
> Chavdar Ivanov
>
> On Sun, 15 Oct 2017 at 19:03 Jaromír Doleček <jaromir.dole...@gmail.com>
> wrote:
>
>> Hi,
>>
>> should be fixed in rev. 1.285 of dev/ic/wdc.c, can you please check?
>>
>> Jaromir
>>
>> 2017-10-14 17:48 GMT+02:00 Chavdar Ivanov <ci4...@gmail.com>:
>>
>>> It still panics the same way, no difference.
>>>
>>> On my other laptop, an HP EliteBook, I haven't the problem at all, only
>>> on the two T61p's (one of them stopped working a week ago, though).
>>>
>>> Chavdar Ivanov
>>>
>>>
>>> On Sat, 14 Oct 2017 at 15:45 Jaromír Doleček <jaromir.dole...@gmail.com>
>>> wrote:
>>>
>>>> Sorry, this fixed patch
>>>>
>>>> 2017-10-14 16:23 GMT+02:00 Jaromír Doleček <jaromir.dole...@gmail.com>:
>>>>
>>>>> Can you try attached patch?
>>>>>
>>>>> Jaromir
>>>>>
>>>>> 2017-10-11 1:04 GMT+02:00 Chavdar Ivanov <ci4...@gmail.com>:
>>>>>
>>>>>> The timeouts when running under VirtualBox disappeared, but of course
>>>>>> the panic on my T61p remains.
>>>>>>
>>>>>> Chavdar Ivanov
>>>>>>
>>>>>> On Tue, 10 Oct 2017 at 22:40 Jaromír Doleček <
>>>>>> jaromir.dole...@gmail.com> wrote:
>>>>>>
>>>>>>> Hey,
>>>>>>>
>>>>>>> can you try with dev/scsipi/atapi_wdc.c 1.128? That should resolve
>>>>>>> the timeouts for atapi, at least it did for me.
>>>>>>>
>>>>>>> Jaromir
>>>>>>>
>>>>>>> 2017-10-10 8:08 GMT+02:00 Rares Aioanei <bsdlis...@gmail.com>:
>>>>>>>
>>>>>>>> I get that also on VBox, except it doesn't try to add cd0a as a swap
>>>>>>>> device, nor does it show an endless stream of "lost interrupt"
>>>>>>>> messages; eventually I get a login prompt. This is with yesterday's
>>>>>>>> latest -CURRENT.
>>>>>>>>
>>>>>>>> On Sun, Oct 8, 2017 at 5:17 PM, Chavdar Ivanov <ci4...@gmail.com>
>>>>>>>> wrote:
>>>>>>>> > I tried the same kernel on a VirtualBox guest - it doesn't crash,
>>>>>>>> but one
>>>>>>>> > gets endless
>>>>>>>> >
>>>>>>>> > piixide0:1:0: lost interrupt
>>>>>>>> >         type: atapi tc_bcount: 0 tc_skip: 0
>>>>>>>> >
>>>>>>>> > stream of messages. Also /etc/rc.d/swap2 start hangs while trying
>>>>>>>> to add
>>>>>>>> > /dev/cd0a as a dump device... as shown by ktruss.
>>>>>>>> >
>>>>>>>> > Weird.
>>>>>>>> >
>>>>>>>> > Chavdar
>>>>>>>> >
>>>>>>>> > On Sun, 8 Oct 2017 at 11:55 Chavdar Ivanov <ci4...@gmail.com>
>>>>>>>> wrote:
>>>>>>>> >>
>>>>>>>> >> System updated about two hours ago. I am getting:
>>>>>>>> >>
>>>>>>>> >> ....
>>>>>>>> >> wd0 at atabus0 drive 0
>>>>>>>> >> wd0: <Hitachi HTS725032A9A364>
>>>>>>>> >> wd0: drive supports 16-sector PIO transfers, LBA48 addressing
>>>>>>>> >> wd0: 298 GB, 620181 cyl, 16 head, 63 sec, 512 bytes/sect x
>>>>>>>> 625142448
>>>>>>>> >> sectors
>>>>>>>> >> piixide0:0:0: bad state 0 in wdc_ata_bio_intr
>>>>>>>> >> panic: wdc_ata_bio_intr: bad state
>>>>>>>> >> fatal breakpoint trap in supervisor mode
>>>>>>>> >> trap type 1 code 0 rip 0xffffffff8021c0c5 cs 0x8 rflags 0x246
>>>>>>>> cr2 0 ilevel
>>>>>>>> >> 0x8 rsp 0xffffe40040003c38
>>>>>>>> >> curlwp 0xffffe4013bb27840 pid 0.2 lowest kstack
>>>>>>>> 0xffffe400400002c0
>>>>>>>> >> Stopped at pid 0.2 (system) at netbsd:breakpoint+0x5: leave
>>>>>>>> >> db{0}> bt
>>>>>>>> >> breakpoint() at netbsd:breakpoint+0x5
>>>>>>>> >> vpanic() at netbsd:vpanic+0x140
>>>>>>>> >> snprintf() at netbsd:snprintf
>>>>>>>> >> wdc_ata_bio_poll() at netbsd:wdc_ata_bio_poll
>>>>>>>> >> intr_biglock_wrapper() at netbsd:intr_biglock_wrapper+0x1d
>>>>>>>> >> Xintr_ioapic_edge10() at netbsd:Xintr_ioapic_edge10+0xee
>>>>>>>> >> --- interrupt ---
>>>>>>>> >> x86_mwait() at netbsd:x86_mwait+0xd
>>>>>>>> >> acpicpu_cstate_idel_enter() at netbsd:acpicpu_cstate_idle_
>>>>>>>> enter+0xdb
>>>>>>>> >> acpicpu_cstate_idle() at netbsd:acpicpu_cstate_idle+0xb6
>>>>>>>> >> idle_loop() at netbsd:idle_loop+0x18c
>>>>>>>> >> db{0}>
>>>>>>>> >> ....
>>>>>>>> >>
>>>>>>>> >> (that is on my usual ThinkPad T61p).
>>>>>>>> >>
>>>>>>>> >> Couldn't get a crash dump.
>>>>>>>> >>
>>>>>>>> >> Chavdar Ivanov
>>>>>>>> >>
>>>>>>>> >
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>
>>>>
>>
? dev/ata.c.locked_reset.diff
? dev/lst
? dev/n.zip
? dev/qq
Index: dev/ata/ata_subr.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ata/ata_subr.c,v
retrieving revision 1.1
diff -u -p -r1.1 ata_subr.c
--- dev/ata/ata_subr.c  10 Oct 2017 17:19:38 -0000      1.1
+++ dev/ata/ata_subr.c  16 Oct 2017 18:01:50 -0000
@@ -334,7 +334,7 @@ ata_free_xfer(struct ata_channel *chp, s
                /* finish the busmastering PIO */
                (*wdc->piobm_done)(wdc->dma_arg,
                    chp->ch_channel, xfer->c_drive);
-               chp->ch_flags &= ~(ATACH_DMA_WAIT | ATACH_PIOBM_WAIT);
+               chp->ch_flags &= ~(ATACH_DMA_WAIT | ATACH_PIOBM_WAIT | 
ATACH_IRQ_WAIT);
        }
 #endif
 
Index: dev/ata/ata_wdc.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ata/ata_wdc.c,v
retrieving revision 1.108
diff -u -p -r1.108 ata_wdc.c
--- dev/ata/ata_wdc.c   15 Oct 2017 11:27:14 -0000      1.108
+++ dev/ata/ata_wdc.c   16 Oct 2017 18:01:50 -0000
@@ -592,7 +592,12 @@ _wdc_ata_bio_start(struct ata_channel *c
 intr:
 #endif
        /* Wait for IRQ (either real or polled) */
-       return (ata_bio->flags & ATA_POLL) ? ATASTART_POLL : ATASTART_STARTED;
+       if ((ata_bio->flags & ATA_POLL) == 0) {
+               chp->ch_flags |= ATACH_IRQ_WAIT;
+               return ATASTART_STARTED;
+       } else {
+               return ATASTART_POLL;
+       }
 
 timeout:
        printf("%s:%d:%d: not ready, st=0x%02x, err=0x%02x\n",
Index: dev/ata/atavar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/ata/atavar.h,v
retrieving revision 1.94
diff -u -p -r1.94 atavar.h
--- dev/ata/atavar.h    10 Oct 2017 17:19:38 -0000      1.94
+++ dev/ata/atavar.h    16 Oct 2017 18:01:50 -0000
@@ -399,6 +399,7 @@ struct ata_channel {
        /* Our state */
        volatile int ch_flags;
 #define ATACH_SHUTDOWN 0x02    /* channel is shutting down */
+#define ATACH_IRQ_WAIT 0x10    /* controller is waiting for irq */
 #define ATACH_DMA_WAIT 0x20    /* controller is waiting for DMA */
 #define ATACH_PIOBM_WAIT 0x40  /* controller is waiting for busmastering PIO */
 #define        ATACH_DISABLED 0x80     /* channel is disabled */
Index: dev/ic/wdc.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/wdc.c,v
retrieving revision 1.286
diff -u -p -r1.286 wdc.c
--- dev/ic/wdc.c        16 Oct 2017 05:52:43 -0000      1.286
+++ dev/ic/wdc.c        16 Oct 2017 18:01:50 -0000
@@ -879,6 +879,11 @@ wdcintr(void *arg)
                return (0);
        }
 
+       if ((chp->ch_flags & ATACH_IRQ_WAIT) == 0) {
+               ATADEBUG_PRINT(("wdcintr: irq not expected\n"), DEBUG_INTR);
+               goto ignore;
+       }
+
        xfer = ata_queue_get_active_xfer(chp);
        if (xfer == NULL) {
                ATADEBUG_PRINT(("wdcintr: inactive controller\n"), DEBUG_INTR);
@@ -915,8 +920,11 @@ ignore:
                chp->ch_flags &= ~ATACH_DMA_WAIT;
        }
 #endif
+       chp->ch_flags &= ~ATACH_IRQ_WAIT;
        KASSERT(xfer->c_intr != NULL);
        ret = xfer->c_intr(chp, xfer, 1);
+       if (ret == 0) /* irq was not for us, still waiting for irq */
+               chp->ch_flags |= ATACH_IRQ_WAIT;
        return (ret);
 }
 
@@ -943,6 +951,8 @@ wdc_reset_channel(struct ata_channel *ch
        struct wdc_softc *wdc = CHAN_TO_WDC(chp);
 #endif
 
+       chp->ch_flags &= ~ATACH_IRQ_WAIT;
+
        /*
         * if the current command is on an ATAPI device, issue a
         * ATAPI_SOFT_RESET
@@ -1465,6 +1475,7 @@ __wdccommand_start(struct ata_channel *c
        }
 
        if ((ata_c->flags & AT_POLL) == 0) {
+               chp->ch_flags |= ATACH_IRQ_WAIT; /* wait for interrupt */
                callout_reset(&xfer->c_timo_callout, ata_c->timeout / 1000 * hz,
                    wdctimeout, xfer);
                return ATASTART_STARTED;
@@ -1587,6 +1598,7 @@ again:
                wdc->dataout_pio(chp, drive_flags, data, bcount);
                ata_c->flags |= AT_XFDONE;
                if ((ata_c->flags & AT_POLL) == 0) {
+                       chp->ch_flags |= ATACH_IRQ_WAIT; /* wait for interrupt 
*/
                        callout_reset(&xfer->c_timo_callout,
                            mstohz(ata_c->timeout), wdctimeout, xfer);
                        ata_channel_unlock(chp);
Index: dev/pci/pciide_common.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/pciide_common.c,v
retrieving revision 1.63
diff -u -p -r1.63 pciide_common.c
--- dev/pci/pciide_common.c     7 Oct 2017 16:05:33 -0000       1.63
+++ dev/pci/pciide_common.c     16 Oct 2017 18:01:50 -0000
@@ -553,6 +553,10 @@ pciide_pci_intr(void *arg)
                if (cp->compat)
                        continue;
 
+               /* if this channel not waiting for intr, skip */
+               if ((wdc_cp->ch_flags & ATACH_IRQ_WAIT) == 0)
+                       continue;
+
                crv = wdcintr(wdc_cp);
                if (crv == 0)
                        ;               /* leave rv alone */
Index: dev/scsipi/atapi_wdc.c
===================================================================
RCS file: /cvsroot/src/sys/dev/scsipi/atapi_wdc.c,v
retrieving revision 1.128
diff -u -p -r1.128 atapi_wdc.c
--- dev/scsipi/atapi_wdc.c      10 Oct 2017 21:37:49 -0000      1.128
+++ dev/scsipi/atapi_wdc.c      16 Oct 2017 18:01:50 -0000
@@ -672,8 +672,10 @@ ready:
        if ((sc_xfer->xs_periph->periph_cap & ATAPI_CFG_DRQ_MASK) !=
            ATAPI_CFG_IRQ_DRQ || (sc_xfer->xs_control & XS_CTL_POLL))
                return ATASTART_POLL;
-       else
+       else {
+               chp->ch_flags |= ATACH_IRQ_WAIT;
                return ATASTART_STARTED;
+       }
 
 timeout:
        printf("%s:%d:%d: %s timed out\n",
@@ -884,6 +886,11 @@ again:
                        chp->ch_flags |= ATACH_DMA_WAIT;
                }
 #endif
+
+               if ((sc_xfer->xs_control & XS_CTL_POLL) == 0) {
+                       chp->ch_flags |= ATACH_IRQ_WAIT;
+               }
+
                ata_channel_unlock(chp);
                return 1;
 
@@ -917,7 +924,8 @@ again:
                        (*wdc->piobm_start)(wdc->dma_arg,
                            chp->ch_channel, xfer->c_drive,
                            xfer->c_skip, len, WDC_PIOBM_XFER_IRQ);
-                       chp->ch_flags |= ATACH_DMA_WAIT | ATACH_PIOBM_WAIT;
+                       chp->ch_flags |= ATACH_DMA_WAIT | ATACH_IRQ_WAIT |
+                           ATACH_PIOBM_WAIT;
                        ata_channel_unlock(chp);
                        return 1;
                }
@@ -934,6 +942,9 @@ again:
 
                xfer->c_skip += len;
                xfer->c_bcount -= len;
+               if ((sc_xfer->xs_control & XS_CTL_POLL) == 0) {
+                       chp->ch_flags |= ATACH_IRQ_WAIT;
+               }
                ata_channel_unlock(chp);
                return 1;
 
@@ -967,7 +978,8 @@ again:
                        (*wdc->piobm_start)(wdc->dma_arg,
                            chp->ch_channel, xfer->c_drive,
                            xfer->c_skip, len, WDC_PIOBM_XFER_IRQ);
-                       chp->ch_flags |= ATACH_DMA_WAIT | ATACH_PIOBM_WAIT;
+                       chp->ch_flags |= ATACH_DMA_WAIT | ATACH_IRQ_WAIT |
+                           ATACH_PIOBM_WAIT;
                        ata_channel_unlock(chp);
                        return 1;
                }
@@ -983,6 +995,9 @@ again:
 
                xfer->c_skip += len;
                xfer->c_bcount -= len;
+               if ((sc_xfer->xs_control & XS_CTL_POLL) == 0) {
+                       chp->ch_flags |= ATACH_IRQ_WAIT;
+               }
                ata_channel_unlock(chp);
                return 1;
 
@@ -1080,7 +1095,6 @@ wdc_atapi_phase_complete(struct ata_xfer
                }
        }
 
-
        /*
         * Some drive occasionally set WDCS_ERR with
         * "ATA illegal length indication" in the error

Reply via email to