Hello, Jeff.

 This is the updated version of mod15write workaround.  Changes are

 * sg list restoration on completion
 * debug code to verify sg list doesn't get changed
 * updated against the latest libata-dev-2.6
 * more comments

 I think the code is okay, but I left the debug code there just in
case.  The debug code is inside #ifdef and can be removed easily once
testing is complete.

 Thanks.


 Signed-off-by: Tejun Heo <[EMAIL PROTECTED]>


# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/03/26 14:16:23+09:00 [EMAIL PROTECTED] 
#   cosmetic
# 
# drivers/scsi/sata_sil.c
#   2005/03/26 14:13:44+09:00 [EMAIL PROTECTED] +17 -4
#   xxx
# 
# ChangeSet
#   2005/03/26 12:32:07+09:00 [EMAIL PROTECTED] 
#   Merge htj.dyndns.org:/mnt/tj-work/os/ata/libata-dev-2.6
#   into htj.dyndns.org:/mnt/tj-work/os/ata/libata-work
# 
# drivers/scsi/sata_sil.c
#   2005/03/26 12:32:01+09:00 [EMAIL PROTECTED] +0 -0
#   Auto merged
# 
# ChangeSet
#   2005/03/26 12:29:29+09:00 [EMAIL PROTECTED] 
#   sg restoration implemented
#   M15W_DEBUG
# 
# drivers/scsi/sata_sil.c
#   2005/03/26 12:29:21+09:00 [EMAIL PROTECTED] +54 -2
#   sg restoration implemented
#   M15W_DEBUG
# 
# ChangeSet
#   2005/03/16 08:45:47+09:00 [EMAIL PROTECTED] 
#   comments
# 
# drivers/scsi/sata_sil.c
#   2005/03/16 08:45:39+09:00 [EMAIL PROTECTED] +36 -10
#   comments
# 
# ChangeSet
#   2005/03/16 02:14:47+09:00 [EMAIL PROTECTED] 
#   m15w workaround works now
# 
# drivers/scsi/sata_sil.c
#   2005/03/16 02:14:40+09:00 [EMAIL PROTECTED] +54 -25
#   m15w workaround works now
# 
# ChangeSet
#   2005/03/15 22:16:44+09:00 [EMAIL PROTECTED] 
#   xxx
# 
# drivers/scsi/sata_sil.c
#   2005/03/15 22:16:35+09:00 [EMAIL PROTECTED] +89 -36
#   xxx
# 
# ChangeSet
#   2005/03/15 16:36:29+09:00 [EMAIL PROTECTED] 
#   initial implementaion of mod15write workaround
# 
# drivers/scsi/sata_sil.c
#   2005/03/15 16:36:21+09:00 [EMAIL PROTECTED] +139 -11
#   initial implementaion of mod15write workaround
# 
diff -Nru a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c
--- a/drivers/scsi/sata_sil.c   2005-03-26 14:17:50 +09:00
+++ b/drivers/scsi/sata_sil.c   2005-03-26 14:17:50 +09:00
@@ -71,9 +71,12 @@
 
 static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id 
*ent);
 static void sil_dev_config(struct ata_port *ap, struct ata_device *dev);
+static void sil_qc_prep (struct ata_queued_cmd *qc);
+static void sil_eng_timeout (struct ata_port *ap);
 static u32 sil_scr_read (struct ata_port *ap, unsigned int sc_reg);
 static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val);
 static void sil_post_set_mode (struct ata_port *ap);
+static void sil_host_stop (struct ata_host_set *host_set);
 
 static struct pci_device_id sil_pci_tbl[] = {
        { 0x1095, 0x3112, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112 },
@@ -151,15 +154,16 @@
        .bmdma_start            = ata_bmdma_start,
        .bmdma_stop             = ata_bmdma_stop,
        .bmdma_status           = ata_bmdma_status,
-       .qc_prep                = ata_qc_prep,
+       .qc_prep                = sil_qc_prep,
        .qc_issue               = ata_qc_issue_prot,
-       .eng_timeout            = ata_eng_timeout,
+       .eng_timeout            = sil_eng_timeout,
        .irq_handler            = ata_interrupt,
        .irq_clear              = ata_bmdma_irq_clear,
        .scr_read               = sil_scr_read,
        .scr_write              = sil_scr_write,
        .port_start             = ata_port_start,
        .port_stop              = ata_port_stop,
+       .host_stop              = sil_host_stop,
 };
 
 static struct ata_port_info sil_port_info[] = {
@@ -202,6 +206,53 @@
        /* ... port 3 */
 };
 
+/*
+ * Context to loop over write requests > 15 sectors for Mod15Write bug.
+ *
+ * The following libata layer fields are saved at the beginning and
+ * mangled as necessary.
+ *
+ * qc->sg              : To fool ata_fill_sg().
+ * qc->n_elem          : ditto.
+ * qc->flags           : Except for the last iteration, ATA_QCFLAG_DMAMAP
+ *                       should be off on entering ata_interrupt() such
+ *                       that ata_qc_complete() doesn't call ata_sg_clean()
+ *                       before sil_m15w_chunk_complete(), but the flags
+ *                       should be set for ata_qc_prep() to work.  This flag
+ *                       handling is the hackiest part of this workaround.
+ * qc->complete_fn     : Overrided to sil_m15w_chunk_complete().
+ *
+ * The following cxt fields are used to iterate over write requests.
+ *
+ * next_block          : The starting block of the next chunk.
+ * next_sg             : The first sg entry of the next chunk.
+ * left                        : Total bytes left.
+ * cur_sg_ofs          : Number of processed bytes in the first sg entry
+ *                       of this chunk.
+ * next_sg_ofs         : Number of bytes to be processed in the last sg
+ *                       entry of this chunk.
+ * next_sg_len         : Number of bytes to be processed in the first sg
+ *                       entry of the next chunk.
+ */
+#define M15W_DEBUG
+struct sil_m15w_cxt {
+       u64                     next_block;
+       struct scatterlist *    next_sg;
+       unsigned int            left;
+       unsigned int            cur_sg_ofs;
+       unsigned int            next_sg_ofs;
+       unsigned int            next_sg_len;
+       int                     timedout;
+
+       struct scatterlist *    orig_sg;
+       unsigned int            orig_nelem;
+       unsigned long           orig_flags;
+       ata_qc_cb_t             orig_complete_fn;
+#ifdef M15W_DEBUG
+       struct scatterlist      sg_copy[LIBATA_MAX_PRD];
+#endif
+};
+
 MODULE_AUTHOR("Jeff Garzik");
 MODULE_DESCRIPTION("low-level driver for Silicon Image SATA controller");
 MODULE_LICENSE("GPL");
@@ -242,6 +293,227 @@
        readl(addr);    /* flush */
 }
 
+static inline u64 sil_m15w_read_tf_block (struct ata_taskfile *tf)
+{
+       u64 block = 0;
+
+       BUG_ON(!(tf->flags & ATA_TFLAG_LBA));
+
+       block |= (u64)tf->lbal;
+       block |= (u64)tf->lbam << 8;
+       block |= (u64)tf->lbah << 16;
+
+       if (tf->flags & ATA_TFLAG_LBA48) {
+               block |= (u64)tf->hob_lbal << 24;
+               block |= (u64)tf->hob_lbam << 32;
+               block |= (u64)tf->hob_lbah << 40;
+       } else
+               block |= (u64)(tf->device & 0xf) << 24;
+
+       return block;
+}
+
+static inline void sil_m15w_rewrite_tf (struct ata_taskfile *tf,
+                                       u64 block, u16 nsect)
+{
+       BUG_ON(!(tf->flags & ATA_TFLAG_LBA));
+
+       tf->nsect = nsect & 0xff;
+       tf->lbal = block & 0xff;
+       tf->lbam = (block >> 8) & 0xff;
+       tf->lbah = (block >> 16) & 0xff;
+
+       if (tf->flags & ATA_TFLAG_LBA48) {
+               tf->hob_nsect = (nsect >> 8) & 0xff;
+               tf->hob_lbal = (block >> 24) & 0xff;
+               tf->hob_lbam = (block >> 32) & 0xff;
+               tf->hob_lbah = (block >> 40) & 0xff;
+       } else {
+               tf->device &= ~0xf;
+               tf->device |= (block >> 24) & 0xf;
+       }
+}
+
+static void sil_m15w_next(struct ata_queued_cmd *qc)
+{
+       struct sil_m15w_cxt *cxt = qc->private_data;
+       struct scatterlist *sg;
+       unsigned int todo, res, nelem;
+
+       if (qc->sg != cxt->next_sg) {
+               sg_dma_address(qc->sg) -= cxt->cur_sg_ofs;
+               sg_dma_len(qc->sg) += cxt->cur_sg_ofs;
+               cxt->cur_sg_ofs = 0;
+       }
+       cxt->cur_sg_ofs += cxt->next_sg_ofs;
+
+       qc->sg = sg = cxt->next_sg;
+       sg_dma_address(sg) += cxt->next_sg_ofs;
+       sg_dma_len(sg) = cxt->next_sg_len;
+
+       res = todo = min_t(unsigned int, cxt->left, 15 << 9);
+
+       nelem = 0;
+       while (sg_dma_len(sg) <= res) {
+               res -= sg_dma_len(sg);
+               sg++;
+               nelem++;
+       }
+
+       if (todo < cxt->left) {
+               cxt->next_sg = sg;
+               cxt->next_sg_ofs = res;
+               cxt->next_sg_len = sg_dma_len(sg) - res;
+               if (res) {
+                       nelem++;
+                       sg_dma_len(sg) = res;
+               }
+       } else {
+               cxt->next_sg = NULL;
+               cxt->next_sg_ofs = 0;
+               cxt->next_sg_len = 0;
+       }
+
+       DPRINTK("block=%llu nelem=%u todo=%u left=%u\n",
+               cxt->next_block, nelem, todo, cxt->left);
+
+       qc->n_elem = nelem;
+       sil_m15w_rewrite_tf(&qc->tf, cxt->next_block, todo >> 9);
+       cxt->left -= todo;
+       cxt->next_block += todo >> 9;
+}
+
+static inline void sil_m15w_restore_qc (struct ata_queued_cmd *qc)
+{
+       struct sil_m15w_cxt *cxt = qc->private_data;
+
+       DPRINTK("ENTER\n");
+
+       sg_dma_address(qc->sg) -= cxt->cur_sg_ofs;
+       sg_dma_len(qc->sg) += cxt->cur_sg_ofs;
+       if (cxt->next_sg_ofs)
+               sg_dma_len(cxt->next_sg) += cxt->next_sg_len;
+       qc->sg = cxt->orig_sg;
+       qc->n_elem = cxt->orig_nelem;
+       qc->flags |= cxt->orig_flags;
+       qc->complete_fn = cxt->orig_complete_fn;
+#ifdef M15W_DEBUG
+       {
+               int i, j;
+               for (i = 0; i < qc->n_elem; i++)
+                       if (memcmp(&cxt->sg_copy[i], &qc->sg[i],
+                                  sizeof(qc->sg[0])))
+                               break;
+               if (i < qc->n_elem) {
+                       printk(KERN_ERR "sil_m15w: sg mismatch\n");
+                       printk(KERN_ERR "orig: ");
+                       for (j = 0; j < qc->n_elem; j++)
+                               printk("%s%08x:%04u ",
+                                      i == j ? "*" : "",
+                                      (u32)sg_dma_address(&cxt->sg_copy[j]),
+                                      sg_dma_len(&cxt->sg_copy[j]));
+                       printk("\n");
+                       printk(KERN_ERR "used: ");
+                       for (j = 0; j < qc->n_elem; j++)
+                               printk("%s%08x:%04u ",
+                                      i == j ? "*" : "",
+                                      (u32)sg_dma_address(&qc->sg[j]),
+                                      sg_dma_len(&qc->sg[j]));
+                       printk("\n");
+               }
+       }
+#endif
+}
+
+static int sil_m15w_chunk_complete (struct ata_queued_cmd *qc, u8 drv_stat)
+{
+       struct sil_m15w_cxt *cxt = qc->private_data;
+
+       DPRINTK("ENTER\n");
+
+       if (unlikely(cxt->timedout))
+               drv_stat |= ATA_BUSY;   /* Any better error status? */
+
+       /* Complete the command immediately on error */
+       if (unlikely(drv_stat & (ATA_ERR | ATA_BUSY | ATA_DRQ))) {
+               sil_m15w_restore_qc(qc);
+               ata_qc_complete(qc, drv_stat);
+               return 1;
+       }
+
+       sil_m15w_next(qc);
+       
+       qc->flags |= cxt->orig_flags;
+       ata_qc_prep(qc);
+       qc->flags &= ~ATA_QCFLAG_DMAMAP;
+
+       /* On last iteration, restore fields such that normal
+        * completion path is run */
+       if (!cxt->left)
+               sil_m15w_restore_qc(qc);
+       sil_ops.qc_issue(qc);
+       return 1;
+}
+
+static void sil_qc_prep (struct ata_queued_cmd *qc)
+{
+       struct sil_m15w_cxt *cxt = qc->private_data;
+
+       if (unlikely(cxt && qc->tf.flags & ATA_TFLAG_WRITE && qc->nsect > 15)) {
+               BUG_ON(cxt->left);
+               if (qc->tf.protocol == ATA_PROT_DMA) {
+                       /* Okay, begin mod15write workaround */
+                       cxt->next_block = sil_m15w_read_tf_block(&qc->tf);
+                       cxt->next_sg = qc->sg;
+                       cxt->left = qc->nsect << 9;
+                       cxt->cur_sg_ofs = 0;
+                       cxt->next_sg_ofs = 0;
+                       cxt->next_sg_len = sg_dma_len(qc->sg);
+                       cxt->timedout = 0;
+
+                       /* Save fields we're gonna mess with.  Read comments
+                        * above struct sil_m15w_cxt for more info. */
+                       cxt->orig_sg = qc->sg;
+                       cxt->orig_nelem = qc->n_elem;
+                       cxt->orig_flags = qc->flags & ATA_QCFLAG_DMAMAP;
+                       cxt->orig_complete_fn = qc->complete_fn;
+                       qc->complete_fn = sil_m15w_chunk_complete;
+#ifdef M15W_DEBUG
+                       {
+                               int i;
+                               for (i = 0; i < qc->n_elem; i++)
+                                       cxt->sg_copy[i] = qc->sg[i];
+                       }
+#endif
+                       DPRINTK("MOD15WRITE, block=%llu nsect=%u\n",
+                               cxt->next_block, qc->nsect);
+                       sil_m15w_next(qc);
+
+                       ata_qc_prep(qc);
+                       qc->flags &= ~ATA_QCFLAG_DMAMAP;
+                       return;
+               } else
+                       printk(KERN_WARNING "ata%u(%u): write request > 15 "
+                              "issued using non-DMA protocol.  Drive may "
+                              "lock up.\n", qc->ap->id, qc->dev->devno);
+       }
+
+       ata_qc_prep(qc);
+}
+
+static void sil_eng_timeout (struct ata_port *ap)
+{
+       struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->active_tag);
+
+       if (qc && qc->private_data) {
+               struct sil_m15w_cxt *cxt = qc->private_data;
+               if (cxt->left)
+                       cxt->timedout = 1;
+       }
+
+       ata_eng_timeout(ap);
+}
+
 static inline unsigned long sil_scr_addr(struct ata_port *ap, unsigned int 
sc_reg)
 {
        unsigned long offset = ap->ioaddr.scr_addr;
@@ -276,6 +548,12 @@
                writel(val, mmio);
 }
 
+static void sil_host_stop (struct ata_host_set *host_set)
+{
+       /* Free mod15write context array. */
+       kfree(host_set->private_data);
+}
+
 /**
  *     sil_dev_config - Apply device/host-specific errata fixups
  *     @ap: Port containing device to be examined
@@ -286,17 +564,12 @@
  *     We apply two errata fixups which are specific to Silicon Image,
  *     a Seagate and a Maxtor fixup.
  *
- *     For certain Seagate devices, we must limit the maximum sectors
- *     to under 8K.
+ *     For certain Seagate devices, we cannot issue write requests
+ *     larger than 15 sectors.
  *
  *     For certain Maxtor devices, we must not program the drive
  *     beyond udma5.
  *
- *     Both fixups are unfairly pessimistic.  As soon as I get more
- *     information on these errata, I will create a more exhaustive
- *     list, and apply the fixups to only the specific
- *     devices/hosts/firmwares that need it.
- *
  *     20040111 - Seagate drives affected by the Mod15Write bug are blacklisted
  *     The Maxtor quirk is in the blacklist, but I'm keeping the original
  *     pessimistic fix for the following reasons...
@@ -304,6 +577,15 @@
  *     Windows driver, maybe only one is affected.  More info would be greatly
  *     appreciated.
  *     - But then again UDMA5 is hardly anything to complain about
+ *
+ *     20050316 Tejun Heo - Proper Mod15Write workaround implemented.
+ *     sata_sil doesn't report affected Seagate drives as having max
+ *     sectors of 15 anymore, but handle write requests larger than
+ *     15 sectors by looping over it inside this driver proper.  This
+ *     is messy but it's better to isolate this kind of peculiar bug
+ *     handling inside individual drivers than tainting libata layer.
+ *     This workaround results in unhampered read performance and
+ *     much better write performance.
  */
 static void sil_dev_config(struct ata_port *ap, struct ata_device *dev)
 {
@@ -311,6 +593,7 @@
        unsigned char model_num[40];
        const char *s;
        unsigned int len;
+       int i;
 
        ata_dev_id_string(dev->id, model_num, ATA_ID_PROD_OFS,
                          sizeof(model_num));
@@ -328,15 +611,23 @@
                        break;
                }
        
-       /* limit requests to 15 sectors */
+       /* Activate mod15write quirk workaround */
        if (quirks & SIL_QUIRK_MOD15WRITE) {
+               struct sil_m15w_cxt *cxt;
+
                printk(KERN_INFO "ata%u(%u): applying Seagate errata fix\n",
                       ap->id, dev->devno);
-               ap->host->max_sectors = 15;
-               ap->host->hostt->max_sectors = 15;
-               dev->flags |= ATA_DFLAG_LOCK_SECTORS;
+
+               cxt = ap->host_set->private_data;
+               cxt += ap->port_no * ATA_MAX_QUEUE;
+               for (i = 0; i < ATA_MAX_QUEUE; i++)
+                       ap->qcmd[i].private_data = cxt++;
+
                return;
        }
+       /* Clear qcmd->private_data if mod15write quirk isn't present */
+       for (i = 0; i < ATA_MAX_QUEUE; i++)
+               ap->qcmd[i].private_data = NULL;
 
        /* limit to udma5 */
        if (quirks & SIL_QUIRK_UDMA5MAX) {
@@ -350,7 +641,8 @@
 static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        static int printed_version;
-       struct ata_probe_ent *probe_ent = NULL;
+       struct ata_probe_ent *probe_ent;
+       struct sil_m15w_cxt *m15w_cxt;
        unsigned long base;
        void *mmio_base;
        int rc;
@@ -383,11 +675,17 @@
        if (rc)
                goto err_out_regions;
 
-       probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
-       if (probe_ent == NULL) {
-               rc = -ENOMEM;
+       rc = -ENOMEM;
+
+       tmp = sizeof(m15w_cxt[0]) * ATA_MAX_PORTS * ATA_MAX_QUEUE;
+       m15w_cxt = kmalloc(tmp, GFP_KERNEL);
+       if (m15w_cxt == NULL)
                goto err_out_regions;
-       }
+       memset(m15w_cxt, 0, tmp);
+
+       probe_ent = kmalloc(sizeof(*probe_ent), GFP_KERNEL);
+       if (probe_ent == NULL)
+               goto err_out_free_m15w;
 
        memset(probe_ent, 0, sizeof(*probe_ent));
        INIT_LIST_HEAD(&probe_ent->node);
@@ -401,6 +699,7 @@
                probe_ent->irq = pdev->irq;
                probe_ent->irq_flags = SA_SHIRQ;
        probe_ent->host_flags = sil_port_info[ent->driver_data].host_flags;
+       probe_ent->private_data = m15w_cxt;
 
        mmio_base = ioremap(pci_resource_start(pdev, 5),
                            pci_resource_len(pdev, 5));
@@ -467,6 +766,8 @@
 
 err_out_free_ent:
        kfree(probe_ent);
+err_out_free_m15w:
+       kfree(m15w_cxt);
 err_out_regions:
        pci_release_regions(pdev);
 err_out:

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to