PATA ATAPI detection debug

2007-01-15 Thread Tejun Heo
Hello, all.

Many people have been reporting libata PATA ATAPI detection problem.  In
many but not all cases, the ATAPI device was occupying the slave slot
while a disk drive occupies the master slot.  Based on that and J.
Taimr's nullify freeze on via fix, I made a cocktail patch which
contained four different fixes and it seemed to have fixed the problem
for (at least) several people, but the reports are not all consistent.

The attached patches contain the same four fixes but has a selector
parameter to enable each fix separately.  Both are equivalent but using
2.6.20-rc5 is recommended to rule out detection problems fixed by
polling IDENTIFY.

If your libata driver is compiled into the kernel, add
'libata.debug_cocktail=N' to your kernel parameter.  If you compile
libata.ko as module, add 'debug_cocktail=N' module parameter to the
module parameter.  e.g. 'modprobe libata.ko debug_cocktail=1'.

Please test

0: nothing
1: common PIO mask between devices sharing a channel
2: force PIO0 (DMA mode is unaffected)
4: clear NIEN on both devices
8: make ata_bmdma_freeze() nill

If none of above works, try 5, 9, then 6, 10.

Please test each option and report the result.  It's best if you can
include dmesg's for each value but if you can't get the dmesg because
boot doesn't complete, just reporting the option doesn't work suffices.
 Also, please don't forget to attach the result of 'lspci -nnvvv'.

Thanks for your patience.

-- 
tejun
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 915a55a..0eea6bd 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -90,6 +90,9 @@ static int ata_probe_timeout = ATA_TMOUT_INTERNAL / HZ;
 module_param(ata_probe_timeout, int, 0444);
 MODULE_PARM_DESC(ata_probe_timeout, "Set ATA probing timeout (seconds)");
 
+int ata_debug_cocktail = 0;
+module_param_named(debug_cocktail, ata_debug_cocktail, int, 0444);
+
 MODULE_AUTHOR("Jeff Garzik");
 MODULE_DESCRIPTION("Library module for ATA devices");
 MODULE_LICENSE("GPL");
@@ -2227,6 +2230,10 @@ int ata_set_mode(struct ata_port *ap, struct ata_device **r_failed_dev)
 		pio_mask = ata_pack_xfermask(dev->pio_mask, 0, 0);
 		dma_mask = ata_pack_xfermask(0, dev->mwdma_mask, dev->udma_mask);
 		dev->pio_mode = ata_xfer_mask2mode(pio_mask);
+		if (dev->pio_mode && ata_debug_cocktail & (1 << 1)) {
+			ata_port_printk(ap, KERN_INFO, "XXX force PIO0\n");
+			dev->pio_mode = XFER_PIO_0;
+		}
 		dev->dma_mode = ata_xfer_mask2mode(dma_mask);
 
 		found = 1;
@@ -3124,6 +3131,29 @@ static void ata_dev_xfermask(struct ata_device *dev)
    dev->mwdma_mask, dev->udma_mask);
 	xfer_mask &= ata_id_xfermask(dev->id);
 
+	if (ata_debug_cocktail & (1 << 0)) {
+		int i;
+
+		/* PIO xfermask limits are shared by all devices on the same
+		 * channel to avoid violating device selection timing.
+		 */
+		ata_dev_printk(dev, KERN_INFO, "XXX common PIO mode: pre: %lx\n",
+			   xfer_mask);
+		for (i = 0; i < ATA_MAX_DEVICES; i++) {
+			struct ata_device *d = &ap->device[i];
+			unsigned int pio_mask;
+
+			if (ata_dev_absent(d))
+continue;
+
+			ata_unpack_xfermask(ata_id_xfermask(d->id),
+	&pio_mask, NULL, NULL);
+			pio_mask &= d->pio_mask;
+			xfer_mask &= ata_pack_xfermask(pio_mask, UINT_MAX, UINT_MAX);
+		}
+		ata_dev_printk(dev, KERN_INFO, "XXX common PIO mode: post: %lx\n",
+			   xfer_mask);
+	}
 	/*
 	 *	CFA Advanced TrueIDE timings are not allowed on a shared
 	 *	cable
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 7645f2b..5bb5bd4 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -39,6 +39,51 @@
 #include "libata.h"
 
 /**
+ *	ata_irq_on - Enable interrupts on a port.
+ *	@ap: Port on which interrupts are enabled.
+ *
+ *	Enable interrupts on a legacy IDE device using MMIO or PIO,
+ *	wait for idle, clear any pending interrupts.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
+u8 ata_irq_on(struct ata_port *ap)
+{
+	struct ata_ioports *ioaddr = &ap->ioaddr;
+	u8 tmp;
+
+	ap->ctl &= ~ATA_NIEN;
+	ap->last_ctl = ap->ctl;
+
+	if (ata_debug_cocktail & (1 << 2)) {
+		ata_port_printk(ap, KERN_INFO, "XXX clear NIEN on both devices\n");
+		ap->ops->dev_select(ap, 1);
+	}
+	if (ap->flags & ATA_FLAG_MMIO)
+		writeb(ap->ctl, (void __iomem *) ioaddr->ctl_addr);
+	else
+		outb(ap->ctl, ioaddr->ctl_addr);
+	tmp = ata_wait_idle(ap);
+
+	if (ata_debug_cocktail & (1 << 2)) {
+		ap->ops->dev_select(ap, 0);
+		if (ap->flags & ATA_FLAG_MMIO)
+			writeb(ap->ctl, (void __iomem *) ioaddr->ctl_addr);
+		else
+			outb(ap->ctl, ioaddr->ctl_addr);
+		tmp = ata_wait_idle(ap);
+	}
+
+	ap->ops->irq_clear(ap);
+
+	return tmp;
+}
+
+
+
+/**
  *	ata_tf_load_pio - send taskfile registers to host controller
  *	@ap: Port to which output is sent
  *	@tf: ATA taskfile register set
@@ -664,6 +709,10 @@ void ata_bmdma_freeze(struct ata_port *ap)
 {
 	struct ata_ioports *ioaddr = &ap->ioaddr;
 
+	if (ata_debug_cocktail & (1 << 3)) {
+		ata_port_printk(ap, KERN_INFO, "XXX skip

[PATCH] ahci: improve and limit spurious interrupt messages

2007-01-15 Thread Tejun Heo
We're still seeing a lot of issues with NCQ implementation in drive
firmwares.  Sprious FISes during NCQ command phase occur on many
drives and some of them seem potentially dangerous (at least to me).
Until we find the solution, spurious messages can give us more info.
Improve and limit them such that more info can be reported while not
disturbing users too much.

Signed-off-by: Tejun Heo <[EMAIL PROTECTED]>

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5998f74..00c6bcc 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -75,6 +75,7 @@ enum {
AHCI_CMD_CLR_BUSY   = (1 << 10),
 
RX_FIS_D2H_REG  = 0x40, /* offset of D2H Register FIS data */
+   RX_FIS_SDB  = 0x58, /* offset of SDB FIS data */
RX_FIS_UNK  = 0x60, /* offset of Unknown FIS data */
 
board_ahci  = 0,
@@ -202,6 +203,10 @@ struct ahci_port_priv {
dma_addr_t  cmd_tbl_dma;
void*rx_fis;
dma_addr_t  rx_fis_dma;
+   /* for NCQ spurious interrupt analysis */
+   int ncq_saw_spurious_sdb_cnt;
+   unsigned intncq_saw_d2h:1;
+   unsigned intncq_saw_dmas:1;
 };
 
 static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
@@ -1126,6 +1131,7 @@ static void ahci_host_intr(struct ata_port *ap)
void __iomem *mmio = ap->host->mmio_base;
void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
struct ata_eh_info *ehi = &ap->eh_info;
+   struct ahci_port_priv *pp = ap->private_data;
u32 status, qc_active;
int rc;
 
@@ -1154,17 +1160,43 @@ static void ahci_host_intr(struct ata_port *ap)
 
/* hmmm... a spurious interupt */
 
-   /* some devices send D2H reg with I bit set during NCQ command phase */
-   if (ap->sactive && (status & PORT_IRQ_D2H_REG_FIS))
+   /* if !NCQ, ignore.  No modern ATA device has broken HSM
+* implementation for non-NCQ commands.
+*/
+   if (!ap->sactive)
return;
 
-   /* ignore interim PIO setup fis interrupts */
-   if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS))
-   return;
+   if (status & PORT_IRQ_D2H_REG_FIS) {
+   if (!pp->ncq_saw_d2h)
+   ata_port_printk(ap, KERN_INFO,
+   "D2H reg with I during NCQ, "
+   "this message won't be printed again\n");
+   pp->ncq_saw_d2h = 1;
+   } else if (status & PORT_IRQ_DMAS_FIS) {
+   if (!pp->ncq_saw_dmas)
+   ata_port_printk(ap, KERN_INFO,
+   "DMAS FIS during NCQ, "
+   "this message won't be printed again\n");
+   pp->ncq_saw_dmas = 1;
+   } else if (status & PORT_IRQ_SDB_FIS &&
+  pp->ncq_saw_spurious_sdb_cnt < 10) {
+   /* SDB FIS containing spurious completions might be
+* dangerous, we need to know more about them.  Print
+* more of it.
+*/
+   const u32 *f = pp->rx_fis + RX_FIS_SDB;
+
+   pp->ncq_saw_spurious_sdb_cnt++;
 
-   if (ata_ratelimit())
+   ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
+   "issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
+   readl(port_mmio + PORT_CMD_ISSUE),
+   readl(port_mmio + PORT_SCR_ACT), f[0], f[1],
+   pp->ncq_saw_spurious_sdb_cnt < 10 ?
+   "" : ", shutting up");
+   } else
ata_port_printk(ap, KERN_INFO, "spurious interrupt "
-   "(irq_stat 0x%x active_tag %d sactive 0x%x)\n",
+   "(irq_stat 0x%x active_tag 0x%x sactive 
0x%x)\n",
status, ap->active_tag, ap->sactive);
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ahci problems with sata disk.

2007-01-15 Thread Tejun Heo
kenneth johansson wrote:
> I changed my bios setting for SATA from IDE to AHCI.
> 
> This resulted in some "interesting" read throughput. 
> 
> plots can be found at http://kenjo.org/~ken/sata/
> The plots was done on a live disk so some noise is expected but in the
> ahci mode the throughput get stuck at 17 MB way to much.

It's probably not an ahci problem but more of NCQ implementation problem
in the drive firmware.  Please report the result of 'hdparm -I /dev/sdX'
and try adjust queue depth and see what happens.

http://linux-ata.org/faq.html

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 2.6.20-rc4-mm1

2007-01-15 Thread Ingo Molnar

* Jens Axboe <[EMAIL PROTECTED]> wrote:

> > In a previous write invoked by: fsck.ext3(1896): WRITE block 8552 on 
> > sdb1 end_buffer_async_write() is invoked.
> > 
> > sdb1 is not a part of a raid device.
> 
> When I briefly tested this before I left (and found it broken), doing 
> a cat /proc/mdstat got things going again. Hard if that's your rootfs, 
> it's just a hint :-)

hm, so you knew it's broken, still you let Andrew pick it up, or am i 
misunderstanding something?

Ingo
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ahci problems with sata disk.

2007-01-15 Thread kenneth johansson
On Mon, 2007-01-15 at 18:13 +0900, Tejun Heo wrote:
> kenneth johansson wrote:
> > I changed my bios setting for SATA from IDE to AHCI.
> > 
> > This resulted in some "interesting" read throughput. 
> > 
> > plots can be found at http://kenjo.org/~ken/sata/
> > The plots was done on a live disk so some noise is expected but in the
> > ahci mode the throughput get stuck at 17 MB way to much.
> 
> It's probably not an ahci problem but more of NCQ implementation problem
> in the drive firmware.  Please report the result of 'hdparm -I /dev/sdX'
> and try adjust queue depth and see what happens.
> 
> http://linux-ata.org/faq.html
> 

It was, when I turn of NCQ with "echo 1
> /sys/block/sda/device/queue_depth" I get the same performance as when
the BIOS is set to IDE.

I though that NCQ was intended to increase performance ??

also the disk is a Westen Digital raptor and it's probably the most
benchmarked drive one could get so I was not expecting a problem with
the drive. 

---
ATA device, with non-removable media
Model Number:   WDC WD1500ADFD-00NLR1   
Serial Number:  WD-WMAP41269747
Firmware Revision:  20.07P20
Standards:
Used: ATA/ATAPI-7 published, ANSI INCITS 397-2005 
Supported: 7 6 5 4 
Configuration:
Logical max current
cylinders   16383   16383
heads   16  16
sectors/track   63  63
--
CHS current addressable sectors:   16514064
LBAuser addressable sectors:  268435455
LBA48  user addressable sectors:  293046768
device size with M = 1024*1024:  143089 MBytes
device size with M = 1000*1000:  150039 MBytes (150 GB)
Capabilities:
LBA, IORDY(can be disabled)
Queue depth: 32
Standby timer values: spec'd by Standard, with device specific
minimum
R/W multiple sector transfer: Max = 16  Current = 16
Recommended acoustic management value: 128, current value: 254
DMA: mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 udma4 udma5
*udma6 
 Cycle time: min=120ns recommended=120ns
PIO: pio0 pio1 pio2 pio3 pio4 
 Cycle time: no flow control=120ns  IORDY flow control=120ns
Commands/features:
Enabled Supported:
   *SMART feature set
Security Mode feature set
   *Power Management feature set
   *Write cache
   *Look-ahead
   *Host Protected Area feature set
   *WRITE_BUFFER command
   *READ_BUFFER command
   *NOP cmd
   *DOWNLOAD_MICROCODE
Power-Up In Standby feature set
   *SET_FEATURES required to spinup after power up
SET_MAX security extension
   *Automatic Acoustic Management feature set
   *48-bit Address feature set
   *Device Configuration Overlay feature set
   *Mandatory FLUSH_CACHE
   *FLUSH_CACHE_EXT
   *SMART error logging
   *SMART self-test
   *General Purpose Logging feature set
   *SATA-I signaling speed (1.5Gb/s)
   *Native Command Queueing (NCQ)
   *Host-initiated interface power management
   *Phy event counters
DMA Setup Auto-Activate optimization
   *Software settings preservation
   *SMART Command Transport (SCT) feature set
   *SCT Long Sector Access (AC1)
   *SCT LBA Segment Access (AC2)
   *SCT Error Recovery Control (AC3)
   *SCT Features Control (AC4)
   *SCT Data Tables (AC5)
unknown 206[12]
Security: 
Master password revision code = 65534
supported
not enabled
not locked
frozen
not expired: security count
not supported: enhanced erase
Checksum: correct


-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ahci problems with sata disk.

2007-01-15 Thread Alan
> also the disk is a Westen Digital raptor and it's probably the most
> benchmarked drive one could get so I was not expecting a problem with
> the drive. 

A lot of early NCQ firmware seems to reduce performance and cause
problems. At least one other raptor is in our "don't NCQ" list in the
kernel drivers.
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ahci problems with sata disk.

2007-01-15 Thread Tejun Heo
kenneth johansson wrote:
> On Mon, 2007-01-15 at 18:13 +0900, Tejun Heo wrote:
>> kenneth johansson wrote:
>>> I changed my bios setting for SATA from IDE to AHCI.
>>>
>>> This resulted in some "interesting" read throughput. 
>>>
>>> plots can be found at http://kenjo.org/~ken/sata/
>>> The plots was done on a live disk so some noise is expected but in the
>>> ahci mode the throughput get stuck at 17 MB way to much.
>> It's probably not an ahci problem but more of NCQ implementation problem
>> in the drive firmware.  Please report the result of 'hdparm -I /dev/sdX'
>> and try adjust queue depth and see what happens.
>>
>> http://linux-ata.org/faq.html
>>
> 
> It was, when I turn of NCQ with "echo 1
>> /sys/block/sda/device/queue_depth" I get the same performance as when
> the BIOS is set to IDE.

Can you play with queue depth a bit?  e.g. Benchmark queue depth of 4, 8
and 16.

> I though that NCQ was intended to increase performance ??

Supposedly.

> also the disk is a Westen Digital raptor and it's probably the most
> benchmarked drive one could get so I was not expecting a problem with
> the drive. 

Most benchmarked doesn't make the firmware any better, it seems.  The
raptor Alan talked about, reportedly, locks up after hours of NCQ load too.

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: sata_vsc.c cache line size question

2007-01-15 Thread Dailey, Nate
Here's a patch that does what you suggest.

Because the default cache line size on my system is 0x10, I tested the
patch by checking against this value rather than 0... it worked as
expected.

This patch is against 2.6.19.2 that I just downloaded from kernel.org. I
actually tested on RHEL4 update 4, a 2.6.9 kernel, but I'll try building
the 2.6.19.2 on my system to make sure it works in that version as well.

Nate


--- sata_vsc.c.orig 2007-01-15 11:06:17.0 -0500
+++ sata_vsc.c  2007-01-15 11:10:29.0 -0500
@@ -340,6 +340,7 @@ static int __devinit vsc_sata_init_one (
int pci_dev_busy = 0;
void __iomem *mmio_base;
int rc;
+   u8 cls;

if (!printed_version++)
dev_printk(KERN_DEBUG, &pdev->dev, "version "
DRV_VERSION "\n");
@@ -389,9 +390,13 @@ static int __devinit vsc_sata_init_one (
base = (unsigned long) mmio_base;

/*
-* Due to a bug in the chip, the default cache line size can't
be used
+* Due to a bug in the chip, the default cache line size can't
be
+* used (unless the default is non-zero).
 */
-   pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x80);
+   pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cls);
+   if (cls == 0x00) {
+   pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x80);
+   }

probe_ent->sht = &vsc_sata_sht;
probe_ent->port_flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |





-Original Message-
From: Jeremy Higdon [mailto:[EMAIL PROTECTED] 
Sent: Sunday, January 14, 2007 3:03 AM
To: Dailey, Nate
Cc: linux-ide@vger.kernel.org
Subject: Re: sata_vsc.c cache line size question

On Fri, Jan 12, 2007 at 02:45:23PM -0500, Dailey, Nate wrote:
> Hoping someone on this list might shed some light on this...
> 
> I was investigating a problem of poor sequential write performance
> (IOmeter, various size sequential writes) with an embedded Vitesse
7174,
> maxing out (with disk write cache on) at around 10 MB/s...
> 
> After noticing that Windows on the same hardware was using 0x10 for
the
> cache line size, but Linux was using 0x80, I tried removing the
> following from sata_vsc.c:
> 
> 381 /*
> 382  * Due to a bug in the chip, the default cache line size
> can't be used
> 383  */
> 384 pci_write_config_byte(pdev, PCI_CACHE_LINE_SIZE, 0x80);
> 
> Now, with cache line size the same as Windows, Linux is doing more
like
> 43 MB/s.
> 
> Just wondering what the deal with this "bug in the chip" might be,
since
> for me it seems that the default cache line size is better? If there's
a
> real bug, I don't want to do anything dangerous by removing this code
> (though I've heard--haven't seen the code--that the Windows driver
> doesn't touch the cache line size, nor does the Linux non-libata
> reference driver from Vitesse).


The problem is that it can't be zero, which is the default value
after reset.

So I suppose the driver should be modified to set it to 0x80 only
if it's 0.  I believe that most PCI implementations will set it in
the BIOS or whatever.

Care to send a patch?

jeremy
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


pata_sis:sg_write errors

2007-01-15 Thread Jordan Neumeyer
Well recently I've been using libata since my my distribution offered it when
they switched to 2.6.19( maybe? 18) in the initramfs image.  I have a sis 5513
controller, which after a couple of days started acting up and coming up with
the following error:

sg_write: data in/out 30576/30576 bytes for SCSI command 0xbe--guessing data i$;
 program grip not setting count and/or reply_len properly
printk: 319 messages suppressed.
sg_write: data in/out 30576/30576 bytes for SCSI command 0xbe--guessing data $n;
 program grip not setting count and/or reply_len properly
printk: 321 messages suppressed.
sg_write: data in/out 16464/16464 bytes for SCSI command 0xbe--guessing data $n;
 program grip not setting count and/or reply_len properly
printk: 323 messages suppressed.
sg_write: data in/out 16464/16464 bytes for SCSI command 0xbe--guessing data $n;
 program grip not setting count and/or reply_len properly
printk: 323 messages suppressed.
sg_write: data in/out 16464/16464 bytes for SCSI command 0xbe--guessing data $n;
 program grip not setting count and/or reply_len properly
printk: 324 messages suppressed.

It's repeated over, and over; differing amounts of bytes.  I'm unsure what
invoked such errors, because it only started to happen a few days after use of
the kernel.  Which was 2.6.19-beyond kernel.  I don't believe any additions made
by the beyond kernel affected the libata system.

Has this been fixed in the 2.6.20-rcXs?  

~Jordan Neumeyer 

-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Proposed changes for libata speed handling

2007-01-15 Thread Jeff Garzik
BTW, for a solution to be complete, we need to halt all work on all 
other ports, when issuing SET FEATURES - XFER MODE.  On SiI and Promise 
controllers, possibly others, the command is snooped and side effects 
such as register setting occur.


Long standing to-do.  Currently we hack around this by serializing the 
bus probe, and preventing people from issuing SET FEATURES - XFER MODE 
from userspace.


Jeff



-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH -mm] sata_nv: cleanup ADMA error handling

2007-01-15 Thread Robert Hancock

Robert Hancock wrote:
-In the error_handler function the code would always go through and do 
an ADMA channel reset and also dump out the state of all the CPBs. This 
reset seems heinous in this situation since we haven't even decided to 
reset anything yet. The output seems redundant at this point since 
libata already dumps the state of all active commands on errors (and it 
also triggers at times when it shouldn't, like when suspending). Do the 
ADMA reset only on hardreset and remove the output.


Actually, upon further thought some of this stuff really should be done 
in the error_handler method, just maybe not the channel reset. I'll cut 
another patch shortly.

-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ICH7m problem using libata

2007-01-15 Thread Matthew Stapleton
Tejun Heo wrote:
> Does the problem still persist?
> 
> -- 
> tejun
> 

With that kernel and the previous patches it does.  I'll try kernel 2.6.20-rc5 
and the which-cocktail-2.6.19.patch

-- 
Matthew Stapleton
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: ahci problems with sata disk.

2007-01-15 Thread kenneth johansson
On Mon, 2007-01-15 at 22:50 +0900, Tejun Heo wrote:
> kenneth johansson wrote:
> > On Mon, 2007-01-15 at 18:13 +0900, Tejun Heo wrote:
> >> kenneth johansson wrote:
> >>> I changed my bios setting for SATA from IDE to AHCI.
> >>>
> >>> This resulted in some "interesting" read throughput. 
> >>>
> >>> plots can be found at http://kenjo.org/~ken/sata/
> >>> The plots was done on a live disk so some noise is expected but in the
> >>> ahci mode the throughput get stuck at 17 MB way to much.
> >> It's probably not an ahci problem but more of NCQ implementation problem
> >> in the drive firmware.  Please report the result of 'hdparm -I /dev/sdX'
> >> and try adjust queue depth and see what happens.
> >>
> >> http://linux-ata.org/faq.html
> >>
> > 
> > It was, when I turn of NCQ with "echo 1
> >> /sys/block/sda/device/queue_depth" I get the same performance as when
> > the BIOS is set to IDE.
> 
> Can you play with queue depth a bit?  e.g. Benchmark queue depth of 4, 8
> and 16.

I did some more test "http://kenjo.org/~ken/sata/"; and queue 1 and maybe
2 works  but everything larger than that has problems. 



-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: pata_sis:sg_write errors

2007-01-15 Thread Tejun Heo
Jordan Neumeyer wrote:
> Well recently I've been using libata since my my distribution offered it when
> they switched to 2.6.19( maybe? 18) in the initramfs image.  I have a sis 5513
> controller, which after a couple of days started acting up and coming up with
> the following error:
> 
> sg_write: data in/out 30576/30576 bytes for SCSI command 0xbe--guessing data 
> i$;
>  program grip not setting count and/or reply_len properly
> printk: 319 messages suppressed.
> sg_write: data in/out 30576/30576 bytes for SCSI command 0xbe--guessing data 
> $n;
>  program grip not setting count and/or reply_len properly
> printk: 321 messages suppressed.
> sg_write: data in/out 16464/16464 bytes for SCSI command 0xbe--guessing data 
> $n;
>  program grip not setting count and/or reply_len properly
> printk: 323 messages suppressed.
> sg_write: data in/out 16464/16464 bytes for SCSI command 0xbe--guessing data 
> $n;
>  program grip not setting count and/or reply_len properly
> printk: 323 messages suppressed.
> sg_write: data in/out 16464/16464 bytes for SCSI command 0xbe--guessing data 
> $n;
>  program grip not setting count and/or reply_len properly
> printk: 324 messages suppressed.
> 
> It's repeated over, and over; differing amounts of bytes.  I'm unsure what
> invoked such errors, because it only started to happen a few days after use of
> the kernel.  Which was 2.6.19-beyond kernel.  I don't believe any additions 
> made
> by the beyond kernel affected the libata system.
> 
> Has this been fixed in the 2.6.20-rcXs?  

This is not a kernel bug.  The SCSI midlayer is bitching that grip
hasn't set certain parameter while using the sg interface (which
previous kernels have ignored).  grip should be updated to use proper
parameter.  So, no, 2.6.20-rcX won't fix it.  You need to update grip.

-- 
tejun
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] sata_via: add PCI ID 0x5337

2007-01-15 Thread Tejun Heo
From: Luca Pedrielli <[EMAIL PROTECTED]>

Add PCI ID 0x5337 to supported PCI ID.  This is VT8237 in IDE mode.

Signed-off-by: Luca Pedrielli <[EMAIL PROTECTED]>
Signed-off-by: Tejun Heo <[EMAIL PROTECTED]>
---

Luca, I formatted the patch in the form Jeff can take.  Please format
patches like this next time.

This was verified by another bug reporter too.

diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 1f1d71e..4b24354 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -85,6 +85,7 @@ static void vt6421_set_dma_mode(struct ata_port *ap, struct 
ata_device *adev);
 static int vt6421_port_start(struct ata_port *ap);
 
 static const struct pci_device_id svia_pci_tbl[] = {
+   { PCI_VDEVICE(VIA, 0x5337), vt6420 },
{ PCI_VDEVICE(VIA, 0x0591), vt6420 },
{ PCI_VDEVICE(VIA, 0x3149), vt6420 },
{ PCI_VDEVICE(VIA, 0x3249), vt6421 },
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


sata_via: eSATA drive not recognised on VT6421 card

2007-01-15 Thread Ray Overdijk

Any ideas on what would cause these errors?
http://www.kernel.org/hg/linux-2.6/file/68ac6248e71b/drivers/ata/sata_via.c

dmesg
SCSI subsystem initialized
libata version 2.00 loaded.
sata_via :01:00.0: version 2.0
ACPI: PCI Interrupt :01:00.0[A] -> GSI 21 (level, low) -> IRQ 21
sata_via :01:00.0: routed to hard irq line 10
ata1: SATA max UDMA/133 cmd 0xDC00 ctl 0xDC0A bmdma 0xD400 irq 21
ata2: SATA max UDMA/133 cmd 0xD880 ctl 0xD88A bmdma 0xD408 irq 21
scsi0 : sata_via
ata1: SATA link down (SStatus 0 SControl 310)
ATA: abnormal status 0x7F on port 0xDC07
scsi1 : sata_via
ata2: port is slow to respond, please be patient
ata2: port failed to respond (30 secs)
ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 310)
ATA: abnormal status 0xD0 on port 0xD887
ATA: abnormal status 0xD0 on port 0xD887
ATA: abnormal status 0xD0 on port 0xD887
ATA: abnormal status 0xD0 on port 0xD887
ATA: abnormal status 0xD0 on port 0xD887
ATA: abnormal status 0xD0 on port 0xD887
ata2.00: qc timeout (cmd 0xec)
ata2.00: failed to IDENTIFY (I/O error, err_mask=0x4)
ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 310)
ata2.00: ATA-7, max UDMA/133, 625142448 sectors: LBA48 NCQ (depth 0/32)
ata2.00: qc timeout (cmd 0xec)
ata2.00: failed to IDENTIFY (I/O error, err_mask=0x4)
ata2.00: revalidation failed (errno=-5)
ata2.00: limiting speed to PIO0
ata2: failed to recover some devices, retrying in 5 secs
ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 310)
ata2.00: qc timeout (cmd 0xec)
ata2.00: failed to IDENTIFY (I/O error, err_mask=0x4)
ata2.00: revalidation failed (errno=-5)
ata2.00: disabled

uname -r
2.6.18-1.2869.fc6xen

Thanks,
Ray
(please cc me directly on reply)

-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] sata_uli: ignore SIMPLEX

2007-01-15 Thread Tejun Heo
Some uli controllers have stuck SIMPLEX bit which can't be cleared
with ata_pci_clear_simplex(), but the controller is capable of doing
DMAs on both channels simultaneously.  Ignore it.

Signed-off-by: Tejun Heo <[EMAIL PROTECTED]>

diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index 5c603ca..62b9269 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -226,6 +226,13 @@ static int uli_init_one (struct pci_dev *pdev, const 
struct pci_device_id *ent)
 
probe_ent->private_data = hpriv;
 
+   /* these chips have stuck dummy simplex bit, ignore it */
+   if (probe_ent->_host_flags & ATA_HOST_SIMPLEX) {
+   dev_printk(KERN_INFO, &pdev->dev,
+  "BMDMA simplex set, ignored\n");
+   probe_ent->_host_flags &= ~ATA_HOST_SIMPLEX;
+   }
+
switch (board_idx) {
case uli_5287:
hpriv->scr_cfg_addr[0] = ULI5287_BASE;
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] ahci: improve and limit spurious interrupt messages, take#2

2007-01-15 Thread Tejun Heo
We're still seeing a lot of issues with NCQ implementation in drive
firmwares.  Sprious FISes during NCQ command phase occur on many
drives and some of them seem potentially dangerous (at least to me).
Until we find the solution, spurious messages can give us more info.
Improve and limit them such that more info can be reported while not
disturbing users too much.

Signed-off-by: Tejun Heo <[EMAIL PROTECTED]>
---
Updated to not use bitfields as requested.  Thanks.

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5998f74..d5ea1c3 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -75,6 +75,7 @@ enum {
AHCI_CMD_CLR_BUSY   = (1 << 10),
 
RX_FIS_D2H_REG  = 0x40, /* offset of D2H Register FIS data */
+   RX_FIS_SDB  = 0x58, /* offset of SDB FIS data */
RX_FIS_UNK  = 0x60, /* offset of Unknown FIS data */
 
board_ahci  = 0,
@@ -202,6 +203,9 @@ struct ahci_port_priv {
dma_addr_t  cmd_tbl_dma;
void*rx_fis;
dma_addr_t  rx_fis_dma;
+   /* for NCQ spurious interrupt analysis */
+   int spurious_sdb_cnt;
+   u32 seen_status;
 };
 
 static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
@@ -1126,6 +1130,7 @@ static void ahci_host_intr(struct ata_port *ap)
void __iomem *mmio = ap->host->mmio_base;
void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
struct ata_eh_info *ehi = &ap->eh_info;
+   struct ahci_port_priv *pp = ap->private_data;
u32 status, qc_active;
int rc;
 
@@ -1154,17 +1159,40 @@ static void ahci_host_intr(struct ata_port *ap)
 
/* hmmm... a spurious interupt */
 
-   /* some devices send D2H reg with I bit set during NCQ command phase */
-   if (ap->sactive && (status & PORT_IRQ_D2H_REG_FIS))
+   /* if !NCQ, ignore.  No modern ATA device has broken HSM
+* implementation for non-NCQ commands.
+*/
+   if (!ap->sactive)
return;
 
-   /* ignore interim PIO setup fis interrupts */
-   if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS))
-   return;
+   if ((status & PORT_IRQ_D2H_REG_FIS) &&
+   !(pp->seen_status & PORT_IRQ_D2H_REG_FIS)) {
+   ata_port_printk(ap, KERN_INFO, "D2H reg with I during NCQ, "
+   "this message won't be printed again\n");
+   pp->seen_status |= PORT_IRQ_D2H_REG_FIS;
+   } else if ((status & PORT_IRQ_DMAS_FIS) &&
+  !(pp->seen_status & PORT_IRQ_DMAS_FIS)) {
+   ata_port_printk(ap, KERN_INFO, "DMAS FIS during NCQ, "
+   "this message won't be printed again\n");
+   pp->seen_status |= PORT_IRQ_DMAS_FIS;
+   } else if (status & PORT_IRQ_SDB_FIS && pp->spurious_sdb_cnt < 10) {
+   /* SDB FIS containing spurious completions might be
+* dangerous, we need to know more about them.  Print
+* more of it.
+*/
+   const u32 *f = pp->rx_fis + RX_FIS_SDB;
+
+   pp->spurious_sdb_cnt++;
 
-   if (ata_ratelimit())
+   ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
+   "issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
+   readl(port_mmio + PORT_CMD_ISSUE),
+   readl(port_mmio + PORT_SCR_ACT), f[0], f[1],
+   pp->spurious_sdb_cnt < 10 ?
+   "" : ", shutting up");
+   } else
ata_port_printk(ap, KERN_INFO, "spurious interrupt "
-   "(irq_stat 0x%x active_tag %d sactive 0x%x)\n",
+   "(irq_stat 0x%x active_tag 0x%x sactive 
0x%x)\n",
status, ap->active_tag, ap->sactive);
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: 2.6.20-rc4-mm1

2007-01-15 Thread Jens Axboe
On Mon, Jan 15 2007, Ingo Molnar wrote:
> 
> * Jens Axboe <[EMAIL PROTECTED]> wrote:
> 
> > > In a previous write invoked by: fsck.ext3(1896): WRITE block 8552 on 
> > > sdb1 end_buffer_async_write() is invoked.
> > > 
> > > sdb1 is not a part of a raid device.
> > 
> > When I briefly tested this before I left (and found it broken), doing 
> > a cat /proc/mdstat got things going again. Hard if that's your rootfs, 
> > it's just a hint :-)
> 
> hm, so you knew it's broken, still you let Andrew pick it up, or am i 
> misunderstanding something?

Well the raid issue wasn't known before it was in -mm.

-- 
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-ide" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html