Stephen Rothwell wrote:
> Hi all,
> 
> I get an oops during initialisation of the pata_pdc2027x module on a
> POWER 285 machine.  This is on a very recent Linus kernel tree
> (ff51a98799931256b555446b2f5675db08de6229) with Paulus' powerpc tree
> (that has now been merged). The oops looks like this:
> 
> ----------------------------------------------------------------------------
> Synthesizing the initial hotplug events...done.
> Waiting for /dev to be fully populated...pata_pdc2027x 0001:cc:01.0: PLL 
> input clock 32721 kHz
> ata1: PATA max UDMA/133 cmd 0xD0000800801457C0 ctl 0xD000080080145FDA bmdma 
> 0xD000080080145000 irq 324
> ata2: PATA max UDMA/133 cmd 0xD0000800801455C0 ctl 0xD000080080145DDA bmdma 
> 0xD000080080145008 irq 324
> scsi1 : pata_pdc2027x
> ata1.00: ATAPI, max UDMA/66
> Unable to handle kernel paging request for data at address 0xc0000001007e8d80
> Faulting instruction address: 0xd00000000007b660
> Oops: Kernel access of bad area, sig: 11 [#1]
> SMP NR_CPUS=128 NUMA
> Modules linked in: pata_pdc2027x dm_mirror dm_snapshot ipr libata
> NIP: D00000000007B660 LR: D00000000007B628 CTR: 0000000000000000
> REGS: c0000000f4e3b5a0 TRAP: 0300   Not tainted  (2.6.19comb)
> MSR: 8000000000009032 <EE,ME,IR,DR>  CR: 24000080  XER: 20000009
> DAR: C0000001007E8D80, DSISR: 0000000040010000
> TASK = c000000007001040[3393] 'scsi_eh_1' THREAD: c0000000f4e38000 CPU: 2
> GPR00: 0000000100000000 C0000000F4E3B820 D0000000000A95C0 C00000000FA946E8
> GPR04: C0000000F4E3B960 0000000000000000 0000000000000003 C0000000F4E3B890
> GPR08: 0000000000000001 C0000000007E8D80 D000080080145110 C000000000033A34
> GPR12: 0000000000000000 C00000000056FF80 0000000000000000 C0000000004751C8
> GPR16: 4000000001C00000 C000000000473B90 0000000000000000 000000000035D800
> GPR20: 000000000213AF30 C00000000053AF30 C0000000F4E3BB10 0000000000000001
> GPR24: 0000000000000002 0000000000000000 C0000000F4E3B960 C00000000FA946E8
> GPR28: 0000000000000003 4000000000000000 D0000000000A7980 0000000000000000
> NIP [D00000000007B660] .ata_exec_internal+0x8c/0xf8 [libata]
> LR [D00000000007B628] .ata_exec_internal+0x54/0xf8 [libata]
> Call Trace:
> [C0000000F4E3B820] [D0000000000A7980] .ipr_store_update_fw+0x100/0x6f0 [ipr] 
> (unreliable)
> [C0000000F4E3B8F0] [D00000000007C15C] .ata_set_mode+0x4dc/0x6d0 [libata]
> [C0000000F4E3B9D0] [D0000000000860E0] .ata_do_eh+0x1538/0x1930 [libata]
> [C0000000F4E3BC20] [D000000000083774] .ata_bmdma_drive_eh+0x1f8/0x2e8 [libata]
> [C0000000F4E3BCD0] [D0000000000C17B4] .pdc2027x_error_handler+0x28/0x40 
> [pata_pdc2027x]
> [C0000000F4E3BD50] [D000000000086DC0] .ata_scsi_error+0x32c/0x660 [libata]
> [C0000000F4E3BE00] [C000000000312F7C] .scsi_error_handler+0xc8/0x80c
> [C0000000F4E3BEE0] [C00000000006A0F4] .kthread+0x124/0x174
> [C0000000F4E3BF90] [C000000000024D68] .kernel_thread+0x4c/0x68
> Instruction dump:
> 57ac053e e93e8020 7f63db78 7f44d378 780007c6 7f25cb78 7f86e378 38e10070
> 7fbd0214 39000001 7ba0f860 78001f24 <7d69002a> 7ba0a302 7bbd4602 39200000
>  done.
> ----------------------------------------------------------------------------
> 
> (The reference to ipr_store_update_fw is certainly not real)
> 
> The config, lspci and full boot dmesg output are available at
> http://ozlabs.org/~sfr/{config,lspci,dmesg}
> 
> This machine has a only cdrom drive attached to the pdc controller.
> 

It seems the opps occurred in the ata_exec_internal(). Could you please
try the attached patch and see if the older ata_exec_internal() works?
This can help to narrow it down.

--
albert

--- pdc2027x_ozlab/drivers/ata/libata-core.c    2006-12-06 15:01:13.000000000 
+0800
+++ exec_internal/drivers/ata/libata-core.c     2006-12-07 11:49:08.000000000 
+0800
@@ -1334,7 +1334,7 @@ unsigned ata_exec_internal_sg(struct ata
 }
 
 /**
- *     ata_exec_internal_sg - execute libata internal command
+ *     ata_exec_internal - execute libata internal command
  *     @dev: Device to which the command is sent
  *     @tf: Taskfile registers for the command and the result
  *     @cdb: CDB for packet command
@@ -1342,8 +1342,11 @@ unsigned ata_exec_internal_sg(struct ata
  *     @buf: Data buffer of the command
  *     @buflen: Length of data buffer
  *
- *     Wrapper around ata_exec_internal_sg() which takes simple
- *     buffer instead of sg list.
+ *     Executes libata internal command with timeout.  @tf contains
+ *     command on entry and result on return.  Timeout and error
+ *     conditions are reported via return value.  No recovery action
+ *     is taken after a command times out.  It's caller's duty to
+ *     clean up after timeout.
  *
  *     LOCKING:
  *     None.  Should be called with kernel context, might sleep.
@@ -1355,11 +1358,141 @@ unsigned ata_exec_internal(struct ata_de
                           struct ata_taskfile *tf, const u8 *cdb,
                           int dma_dir, void *buf, unsigned int buflen)
 {
-       struct scatterlist sg;
+       struct ata_port *ap = dev->ap;
+       u8 command = tf->command;
+       struct ata_queued_cmd *qc;
+       unsigned int tag, preempted_tag;
+       u32 preempted_sactive, preempted_qc_active;
+       DECLARE_COMPLETION_ONSTACK(wait);
+       unsigned long flags;
+       unsigned int err_mask;
+       int rc;
 
-       sg_init_one(&sg, buf, buflen);
+       spin_lock_irqsave(ap->lock, flags);
 
-       return ata_exec_internal_sg(dev, tf, cdb, dma_dir, &sg, 1);
+       /* no internal command while frozen */
+       if (ap->pflags & ATA_PFLAG_FROZEN) {
+               spin_unlock_irqrestore(ap->lock, flags);
+               return AC_ERR_SYSTEM;
+       }
+
+       /* initialize internal qc */
+
+       /* XXX: Tag 0 is used for drivers with legacy EH as some
+        * drivers choke if any other tag is given.  This breaks
+        * ata_tag_internal() test for those drivers.  Don't use new
+        * EH stuff without converting to it.
+        */
+       if (ap->ops->error_handler)
+               tag = ATA_TAG_INTERNAL;
+       else
+               tag = 0;
+
+       if (test_and_set_bit(tag, &ap->qc_allocated))
+               BUG();
+       qc = __ata_qc_from_tag(ap, tag);
+
+       qc->tag = tag;
+       qc->scsicmd = NULL;
+       qc->ap = ap;
+       qc->dev = dev;
+       ata_qc_reinit(qc);
+
+       preempted_tag = ap->active_tag;
+       preempted_sactive = ap->sactive;
+       preempted_qc_active = ap->qc_active;
+       ap->active_tag = ATA_TAG_POISON;
+       ap->sactive = 0;
+       ap->qc_active = 0;
+
+       /* prepare & issue qc */
+       qc->tf = *tf;
+       if (cdb)
+               memcpy(qc->cdb, cdb, ATAPI_CDB_LEN);
+       qc->flags |= ATA_QCFLAG_RESULT_TF;
+       qc->dma_dir = dma_dir;
+       if (dma_dir != DMA_NONE) {
+               ata_sg_init_one(qc, buf, buflen);
+               qc->nsect = buflen / ATA_SECT_SIZE;
+       }
+
+       qc->private_data = &wait;
+       qc->complete_fn = ata_qc_complete_internal;
+
+       ata_qc_issue(qc);
+
+       spin_unlock_irqrestore(ap->lock, flags);
+
+       rc = wait_for_completion_timeout(&wait, ata_probe_timeout);
+
+       ata_port_flush_task(ap);
+
+       if (!rc) {
+               spin_lock_irqsave(ap->lock, flags);
+
+               /* We're racing with irq here.  If we lose, the
+                * following test prevents us from completing the qc
+                * twice.  If we win, the port is frozen and will be
+                * cleaned up by ->post_internal_cmd().
+                */
+               if (qc->flags & ATA_QCFLAG_ACTIVE) {
+                       qc->err_mask |= AC_ERR_TIMEOUT;
+
+                       if (ap->ops->error_handler)
+                               ata_port_freeze(ap);
+                       else
+                               ata_qc_complete(qc);
+
+                       if (ata_msg_warn(ap))
+                               ata_dev_printk(dev, KERN_WARNING,
+                                       "qc timeout (cmd 0x%x)\n", command);
+               }
+
+               spin_unlock_irqrestore(ap->lock, flags);
+       }
+
+       /* do post_internal_cmd */
+       if (ap->ops->post_internal_cmd)
+               ap->ops->post_internal_cmd(qc);
+
+       if (qc->flags & ATA_QCFLAG_FAILED && !qc->err_mask) {
+               if (ata_msg_warn(ap))
+                       ata_dev_printk(dev, KERN_WARNING,
+                               "zero err_mask for failed "
+                               "internal command, assuming AC_ERR_OTHER\n");
+               qc->err_mask |= AC_ERR_OTHER;
+       }
+
+       /* finish up */
+       spin_lock_irqsave(ap->lock, flags);
+
+       *tf = qc->result_tf;
+       err_mask = qc->err_mask;
+
+       ata_qc_free(qc);
+       ap->active_tag = preempted_tag;
+       ap->sactive = preempted_sactive;
+       ap->qc_active = preempted_qc_active;
+
+       /* XXX - Some LLDDs (sata_mv) disable port on command failure.
+        * Until those drivers are fixed, we detect the condition
+        * here, fail the command with AC_ERR_SYSTEM and reenable the
+        * port.
+        *
+        * Note that this doesn't change any behavior as internal
+        * command failure results in disabling the device in the
+        * higher layer for LLDDs without new reset/EH callbacks.
+        *
+        * Kill the following code as soon as those drivers are fixed.
+        */
+       if (ap->flags & ATA_FLAG_DISABLED) {
+               err_mask |= AC_ERR_SYSTEM;
+               ata_port_probe(ap);
+       }
+
+       spin_unlock_irqrestore(ap->lock, flags);
+
+       return err_mask;
 }
 
 /**




-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to