Re: [PATCH 0/4] SCSI: Printing cleanups
Martin K. Petersen wrote: > This patch series is the first batch of cleanups in an attempt to make > the SCSI printing more consistent and suitable for human consumption. > > Previously a typical error looked like this: > > sd 0:0:0:0: SCSI error: return code = 0x0802 > sda: Current: sense key: Aborted Command > Additional sense: Logical block reference tag check failed > > You had to have the magic return value decoder ring handy to figure > out what had really happened. And you had to do the mapping between > sd 0:0:0:0 and sda yourself. > > > The following patches clean up various bits so that the same > information can be presented in a more readable form: > > sd 0:0:0:0: [sda] Result: hostbyte=DID_OK > driverbyte=DRIVER_SENSE,SUGGEST_OK > sd 0:0:0:0: [sda] Sense Key : Aborted Command [current] > sd 0:0:0:0: [sda] Add. Sense: Logical block reference tag check failed > > All printk's from sd.c now have the same prefix. If logging is turned > on, for instance, we also get: > > sd 0:0:0:0: [sda] Send: 0x0fb89180 > sd 0:0:0:0: [sda] CDB: Read(16): 88 20 00 00 00 00 00 00 00 20 00 00 00 > 08 00 00 > sd 0:0:0:0: [sda] Done: 0x0fb89180 SUCCESS > > The patches need to be applied in order. Martin, Looks good. If you need to revise anything, perhaps you could add a comment with this url near the list of additional sense codes: http://www.t10.org/lists/asc-num.txt That is the official list of SCSI additional sense codes. Based on the date of my last additional sense code update only this one is missing: 2Fh/02h DTLPWROMAEBKVF COMMANDS CLEARED BY DEVICE SERVER Doug Gilbert - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] SCSI: Convert sd logging to new printk helpers
Convert the sd.c SCSI logging calls to scmd_printk()/sd_printk() instead of plain printk(). Signed-off-by: Martin K. Petersen <[EMAIL PROTECTED]> --- sd.c | 50 +- 1 files changed, 29 insertions(+), 21 deletions(-) Index: scsi-misc-2.6/drivers/scsi/sd.c === --- scsi-misc-2.6.orig/drivers/scsi/sd.c +++ scsi-misc-2.6/drivers/scsi/sd.c @@ -312,15 +312,19 @@ static int sd_init_command(struct scsi_c unsigned int this_count = SCpnt->request_bufflen >> 9; unsigned int timeout = sdp->timeout; - SCSI_LOG_HLQUEUE(1, printk("sd_init_command: disk=%s, block=%llu, " - "count=%d\n", disk->disk_name, -(unsigned long long)block, this_count)); + SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt, + "sd_init_command: block=%llu, " + "count=%d\n", + (unsigned long long)block, + this_count)); if (!sdp || !scsi_device_online(sdp) || block + rq->nr_sectors > get_capacity(disk)) { - SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n", -rq->nr_sectors)); - SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); + SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, + "Finishing %ld sectors\n", + rq->nr_sectors)); + SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, + "Retry with 0x%p\n", SCpnt)); return 0; } @@ -332,8 +336,8 @@ static int sd_init_command(struct scsi_c /* printk("SCSI disk has been changed. Prohibiting further I/O.\n"); */ return 0; } - SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n", - disk->disk_name, (unsigned long long)block)); + SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n", + (unsigned long long)block)); /* * If we have a 1K hardware sectorsize, prevent access to single @@ -390,9 +394,11 @@ static int sd_init_command(struct scsi_c return 0; } - SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n", - disk->disk_name, (rq_data_dir(rq) == WRITE) ? - "writing" : "reading", this_count, rq->nr_sectors)); + SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, SCpnt, + "%s %d/%ld 512 byte blocks.\n", + (rq_data_dir(rq) == WRITE) ? + "writing" : "reading", this_count, + rq->nr_sectors)); SCpnt->cmnd[1] = 0; @@ -494,7 +500,7 @@ static int sd_open(struct inode *inode, return -ENXIO; - SCSI_LOG_HLQUEUE(3, printk("sd_open: disk=%s\n", disk->disk_name)); + SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n")); sdev = sdkp->device; @@ -564,7 +570,7 @@ static int sd_release(struct inode *inod struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdev = sdkp->device; - SCSI_LOG_HLQUEUE(3, printk("sd_release: disk=%s\n", disk->disk_name)); + SCSI_LOG_HLQUEUE(3, sdkp_printk(KERN_INFO, sdkp, "sd_release\n")); if (!--sdkp->openers && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) @@ -677,8 +683,7 @@ static int sd_media_changed(struct gendi struct scsi_device *sdp = sdkp->device; int retval; - SCSI_LOG_HLQUEUE(3, printk("sd_media_changed: disk=%s\n", - disk->disk_name)); + SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_media_changed\n")); if (!sdp->removable) return 0; @@ -871,12 +876,14 @@ static void sd_rw_intr(struct scsi_cmnd sense_deferred = scsi_sense_is_deferred(&sshdr); } #ifdef CONFIG_SCSI_LOGGING - SCSI_LOG_HLCOMPLETE(1, printk("sd_rw_intr: %s: res=0x%x\n", - SCpnt->request->rq_disk->disk_name, result)); + SCSI_LOG_HLCOMPLETE(1, scsi_print_result(SCpnt)); if (sense_valid) { - SCSI_LOG_HLCOMPLETE(1, printk("sd_rw_intr: sb[respc,sk,asc," - "ascq]=%x,%x,%x,%x\n", sshdr.response_code, - sshdr.sense_key, sshdr.asc, sshdr.ascq)); + SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, + "sd_rw_intr: sb[respc,sk,asc," + "
[PATCH 3/4] SCSI: Make sd printing use a common prefix
Make SCSI disk printing more consistent: - Define sd_printk(), sd_print_sense_hdr() and sd_print_result() - Move relevant header bits into sd.h - Remove all the legacy disk_name passing and use scsi_disk pointers where possible - Switch printk() lines to the new sd_ functions so that output is consistent Signed-off-by: Martin K. Petersen <[EMAIL PROTECTED]> --- drivers/scsi/sd.c | 253 -- include/scsi/sd.h | 70 ++ 2 files changed, 165 insertions(+), 158 deletions(-) Index: scsi-misc-2.6/drivers/scsi/sd.c === --- scsi-misc-2.6.orig/drivers/scsi/sd.c +++ scsi-misc-2.6/drivers/scsi/sd.c @@ -58,16 +58,10 @@ #include #include #include +#include #include "scsi_logging.h" -/* - * More than enough for everybody ;) The huge number of majors - * is a leftover from 16bit dev_t days, we don't really need that - * much numberspace. - */ -#define SD_MAJORS 16 - MODULE_AUTHOR("Eric Youngdale"); MODULE_DESCRIPTION("SCSI disk (sd) driver"); MODULE_LICENSE("GPL"); @@ -89,45 +83,6 @@ MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_ MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR); -/* - * This is limited by the naming scheme enforced in sd_probe, - * add another character to it if you really need more disks. - */ -#define SD_MAX_DISKS (((26 * 26) + 26 + 1) * 26) - -/* - * Time out in seconds for disks and Magneto-opticals (which are slower). - */ -#define SD_TIMEOUT (30 * HZ) -#define SD_MOD_TIMEOUT (75 * HZ) - -/* - * Number of allowed retries - */ -#define SD_MAX_RETRIES 5 -#define SD_PASSTHROUGH_RETRIES 1 - -/* - * Size of the initial data buffer for mode and read capacity data - */ -#define SD_BUF_SIZE512 - -struct scsi_disk { - struct scsi_driver *driver; /* always &sd_template */ - struct scsi_device *device; - struct class_device cdev; - struct gendisk *disk; - unsigned intopeners;/* protected by BKL for now, yuck */ - sector_tcapacity; /* size in 512-byte sectors */ - u32 index; - u8 media_present; - u8 write_prot; - unsignedWCE : 1;/* state of disk WCE bit */ - unsignedRCD : 1;/* state of disk RCD bit, unused */ - unsignedDPOFUA : 1; /* state of disk DPOFUA bit */ -}; -#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev) - static DEFINE_IDR(sd_index_idr); static DEFINE_SPINLOCK(sd_index_lock); @@ -136,20 +91,6 @@ static DEFINE_SPINLOCK(sd_index_lock); * object after last put) */ static DEFINE_MUTEX(sd_ref_mutex); -static int sd_revalidate_disk(struct gendisk *disk); -static void sd_rw_intr(struct scsi_cmnd * SCpnt); - -static int sd_probe(struct device *); -static int sd_remove(struct device *); -static void sd_shutdown(struct device *dev); -static void sd_rescan(struct device *); -static int sd_init_command(struct scsi_cmnd *); -static int sd_issue_flush(struct device *, sector_t *); -static void sd_prepare_flush(request_queue_t *, struct request *); -static void sd_read_capacity(struct scsi_disk *sdkp, char *diskname, -unsigned char *buffer); -static void scsi_disk_release(struct class_device *cdev); - static const char *sd_cache_types[] = { "write through", "none", "write back", "write back, no read (daft)" @@ -199,7 +140,7 @@ static ssize_t sd_store_cache_type(struc if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, SD_MAX_RETRIES, &data, &sshdr)) { if (scsi_sense_valid(&sshdr)) - scsi_print_sense_hdr(sdkp->disk->disk_name, &sshdr); + sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } sd_revalidate_disk(sdkp->disk); @@ -407,7 +348,8 @@ static int sd_init_command(struct scsi_c */ if (sdp->sector_size == 1024) { if ((block & 1) || (rq->nr_sectors & 1)) { - printk(KERN_ERR "sd: Bad block number requested"); + scmd_printk(KERN_ERR, SCpnt, + "Bad block number requested\n"); return 0; } else { block = block >> 1; @@ -416,7 +358,8 @@ static int sd_init_command(struct scsi_c } if (sdp->sector_size == 2048) { if ((block & 3) || (rq->nr_sectors & 3)) { - printk(KERN_ERR "sd: Bad block number requested"); + scmd_printk(KERN_ERR, SCpnt, + "Bad block number requested\n"); return 0; } else { block = block >> 2; @@ -425,7 +368,8 @@ static in
[PATCH 2/4] SCSI: Make error printing more verbose
This patch enhances SCSI error printing by: - Making use of scsi_print_result() in the completion functions. - Having scmd_printk() output the disk name (when applicable). Signed-off-by: Martin K. Petersen <[EMAIL PROTECTED]> --- drivers/scsi/scsi.c| 47 - drivers/scsi/scsi_lib.c|4 --- include/scsi/scsi_device.h |8 +-- 3 files changed, 24 insertions(+), 35 deletions(-) Index: scsi-misc-2.6/drivers/scsi/scsi_lib.c === --- scsi-misc-2.6.orig/drivers/scsi/scsi_lib.c +++ scsi-misc-2.6/drivers/scsi/scsi_lib.c @@ -968,9 +968,7 @@ void scsi_io_completion(struct scsi_cmnd } if (result) { if (!(req->cmd_flags & REQ_QUIET)) { - scmd_printk(KERN_INFO, cmd, - "SCSI error: return code = 0x%08x\n", - result); + scsi_print_result(cmd); if (driver_byte(result) & DRIVER_SENSE) scsi_print_sense("", cmd); } Index: scsi-misc-2.6/include/scsi/scsi_device.h === --- scsi-misc-2.6.orig/include/scsi/scsi_device.h +++ scsi-misc-2.6/include/scsi/scsi_device.h @@ -5,6 +5,7 @@ #include #include #include +#include #include struct request_queue; @@ -153,8 +154,11 @@ struct scsi_device { #define sdev_printk(prefix, sdev, fmt, a...) \ dev_printk(prefix, &(sdev)->sdev_gendev, fmt, ##a) -#define scmd_printk(prefix, scmd, fmt, a...) \ - dev_printk(prefix, &(scmd)->device->sdev_gendev, fmt, ##a) +#define scmd_printk(prefix, scmd, fmt, a...) \ +(scmd)->request->rq_disk ? \ + sdev_printk(prefix, (scmd)->device, "[%s] " fmt,\ + (scmd)->request->rq_disk->disk_name, ##a) : \ + sdev_printk(prefix, (scmd)->device, fmt, ##a) enum scsi_target_state { STARGET_RUNNING = 1, Index: scsi-misc-2.6/drivers/scsi/scsi.c === --- scsi-misc-2.6.orig/drivers/scsi/scsi.c +++ scsi-misc-2.6/drivers/scsi/scsi.c @@ -344,7 +344,6 @@ void scsi_destroy_command_freelist(struc void scsi_log_send(struct scsi_cmnd *cmd) { unsigned int level; - struct scsi_device *sdev; /* * If ML QUEUE log level is greater than or equal to: @@ -361,22 +360,17 @@ void scsi_log_send(struct scsi_cmnd *cmd level = SCSI_LOG_LEVEL(SCSI_LOG_MLQUEUE_SHIFT, SCSI_LOG_MLQUEUE_BITS); if (level > 1) { - sdev = cmd->device; - sdev_printk(KERN_INFO, sdev, "send "); + scmd_printk(KERN_INFO, cmd, "Send: "); if (level > 2) printk("0x%p ", cmd); - /* -* spaces to match disposition and cmd->result -* output in scsi_log_completion. -*/ - printk(" "); + printk("\n"); scsi_print_command(cmd); if (level > 3) { printk(KERN_INFO "buffer = 0x%p, bufflen = %d," " done = 0x%p, queuecommand 0x%p\n", cmd->request_buffer, cmd->request_bufflen, cmd->done, - sdev->host->hostt->queuecommand); + cmd->device->host->hostt->queuecommand); } } @@ -386,7 +380,6 @@ void scsi_log_send(struct scsi_cmnd *cmd void scsi_log_completion(struct scsi_cmnd *cmd, int disposition) { unsigned int level; - struct scsi_device *sdev; /* * If ML COMPLETE log level is greater than or equal to: @@ -405,8 +398,7 @@ void scsi_log_completion(struct scsi_cmn SCSI_LOG_MLCOMPLETE_BITS); if (((level > 0) && (cmd->result || disposition != SUCCESS)) || (level > 1)) { - sdev = cmd->device; - sdev_printk(KERN_INFO, sdev, "done "); + scmd_printk(KERN_INFO, cmd, "Done: "); if (level > 2) printk("0x%p ", cmd); /* @@ -415,40 +407,35 @@ void scsi_log_completion(struct scsi_cmn */ switch (disposition) { case SUCCESS: - printk("SUCCESS"); + printk("SUCCES
[PATCH 1/4] SCSI: constants.c cleanup, verbose result printing
Clean up constants.c and make result printing more user friendly: - Refactor the command and sense functions so that the actual formatting can be called from the various helper functions with the correct prefix. - Replace scsi_print_hostbyte() and scsi_print_driverbyte() with scsi_print_result() which is verbose when CONFIG_SCSI_CONSTANTS is on. Signed-off-by: Martin K. Petersen <[EMAIL PROTECTED]> --- drivers/scsi/constants.c | 267 +-- include/scsi/scsi_dbg.h | 10 + 2 files changed, 151 insertions(+), 126 deletions(-) Index: scsi-misc-2.6/drivers/scsi/constants.c === --- scsi-misc-2.6.orig/drivers/scsi/constants.c +++ scsi-misc-2.6/drivers/scsi/constants.c @@ -202,31 +202,29 @@ static const char * get_sa_name(const st } /* attempt to guess cdb length if cdb_len==0 . No trailing linefeed. */ -static void print_opcode_name(unsigned char * cdbp, int cdb_len, - int start_of_line) +static void print_opcode_name(unsigned char * cdbp, int cdb_len) { int sa, len, cdb0; const char * name; - const char * leadin = start_of_line ? KERN_INFO : ""; cdb0 = cdbp[0]; switch(cdb0) { case VARIABLE_LENGTH_CMD: len = cdbp[7] + 8; if (len < 10) { - printk("%sshort variable length command, " - "len=%d ext_len=%d", leadin, len, cdb_len); + printk("short variable length command, " + "len=%d ext_len=%d", len, cdb_len); break; } sa = (cdbp[8] << 8) + cdbp[9]; name = get_sa_name(maint_in_arr, MAINT_IN_SZ, sa); if (name) { - printk("%s%s", leadin, name); + printk("%s", name); if ((cdb_len > 0) && (len != cdb_len)) printk(", in_cdb_len=%d, ext_len=%d", len, cdb_len); } else { - printk("%scdb[0]=0x%x, sa=0x%x", leadin, cdb0, sa); + printk("cdb[0]=0x%x, sa=0x%x", cdb0, sa); if ((cdb_len > 0) && (len != cdb_len)) printk(", in_cdb_len=%d, ext_len=%d", len, cdb_len); @@ -236,83 +234,80 @@ static void print_opcode_name(unsigned c sa = cdbp[1] & 0x1f; name = get_sa_name(maint_in_arr, MAINT_IN_SZ, sa); if (name) - printk("%s%s", leadin, name); + printk("%s", name); else - printk("%scdb[0]=0x%x, sa=0x%x", leadin, cdb0, sa); + printk("cdb[0]=0x%x, sa=0x%x", cdb0, sa); break; case MAINTENANCE_OUT: sa = cdbp[1] & 0x1f; name = get_sa_name(maint_out_arr, MAINT_OUT_SZ, sa); if (name) - printk("%s%s", leadin, name); + printk("%s", name); else - printk("%scdb[0]=0x%x, sa=0x%x", leadin, cdb0, sa); + printk("cdb[0]=0x%x, sa=0x%x", cdb0, sa); break; case SERVICE_ACTION_IN_12: sa = cdbp[1] & 0x1f; name = get_sa_name(serv_in12_arr, SERV_IN12_SZ, sa); if (name) - printk("%s%s", leadin, name); + printk("%s", name); else - printk("%scdb[0]=0x%x, sa=0x%x", leadin, cdb0, sa); + printk("cdb[0]=0x%x, sa=0x%x", cdb0, sa); break; case SERVICE_ACTION_OUT_12: sa = cdbp[1] & 0x1f; name = get_sa_name(serv_out12_arr, SERV_OUT12_SZ, sa); if (name) - printk("%s%s", leadin, name); + printk("%s", name); else - printk("%scdb[0]=0x%x, sa=0x%x", leadin, cdb0, sa); + printk("cdb[0]=0x%x, sa=0x%x", cdb0, sa); break; case SERVICE_ACTION_IN_16: sa = cdbp[1] & 0x1f; name = get_sa_name(serv_in16_arr, SERV_IN16_SZ, sa); if (name) - printk("%s%s", leadin, name); + printk("%s", name); else - printk("%scdb[0]=0x%x, sa=0x%x", leadin, cdb0, sa); + printk("cdb[0]=0x%x, sa=0x%x", cdb0, sa); break; case SERVICE_ACTION_OUT_16: sa = cdbp[1] & 0x1f; name = get_sa_name(serv_out16_arr, SERV_OUT16_SZ, sa); if (name) - printk("%s%s", leadin, name
[PATCH 0/4] SCSI: Printing cleanups
This patch series is the first batch of cleanups in an attempt to make the SCSI printing more consistent and suitable for human consumption. Previously a typical error looked like this: sd 0:0:0:0: SCSI error: return code = 0x0802 sda: Current: sense key: Aborted Command Additional sense: Logical block reference tag check failed You had to have the magic return value decoder ring handy to figure out what had really happened. And you had to do the mapping between sd 0:0:0:0 and sda yourself. The following patches clean up various bits so that the same information can be presented in a more readable form: sd 0:0:0:0: [sda] Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE,SUGGEST_OK sd 0:0:0:0: [sda] Sense Key : Aborted Command [current] sd 0:0:0:0: [sda] Add. Sense: Logical block reference tag check failed All printk's from sd.c now have the same prefix. If logging is turned on, for instance, we also get: sd 0:0:0:0: [sda] Send: 0x0fb89180 sd 0:0:0:0: [sda] CDB: Read(16): 88 20 00 00 00 00 00 00 00 20 00 00 00 08 00 00 sd 0:0:0:0: [sda] Done: 0x0fb89180 SUCCESS The patches need to be applied in order. -- Martin K. Petersen Oracle Linux Engineering - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: end to end error recovery musings
> "Alan" == Alan <[EMAIL PROTECTED]> writes: >> Not sure you're up-to-date on the T10 data integrity feature. >> Essentially it's an extension of the 520 byte sectors common in >> disk [...] Alan> but here's a minor bit of passing bad news - quite a few older Alan> ATA controllers can't issue DMA transfers that are not a Alan> multiple of 512 bytes without crapping themselves (eg Alan> READ_LONG). Guess we may need to add Alan> ap-> i_do_not_suck or similar 8) I'm afraid it stops even before you get that far. There doesn't seem to be any interest in adopting the Data Integrity Feature (or anything similar) in the ATA camp. So for now it's a SCSI-only thing. I encourage people to lean on their favorite disk manufacturer. This would be a great feature to have on SATA too... -- Martin K. Petersen Oracle Linux Engineering - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: end to end error recovery musings
> Not sure you're up-to-date on the T10 data integrity feature. > Essentially it's an extension of the 520 byte sectors common in disk I saw the basics but not the detail. Thanks for the explanation it was most helpful and promises to fix a few things for some controllers.. but here's a minor bit of passing bad news - quite a few older ATA controllers can't issue DMA transfers that are not a multiple of 512 bytes without crapping themselves (eg READ_LONG). Guess we may need to add ap->i_do_not_suck or similar 8) On the bright side I believe the Intel ICH is the only one with this problem (and a workaround) which is SATA capable 8) Alan - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch 2/2] cciss: supercedes add shutdown support (replaces reboot notifier)
> On Fri, 23 Feb 2007 14:42:39 -0600 "Mike Miller (OS Dev)" <[EMAIL PROTECTED]> > wrote: > This patch supercedes yesterdays cciss-shutdown patch. The primary difference > is > removing __devexit from cciss_remove_one. Instead of create another function > I'd > rather use the code that was intended to perform the cleanup and cache flush. > I've > tested as a loadable module and statically linked without error. > Please consider this for inclusion. Please don't document patches like this. The entirety of your changelog and the Subject: are relative to a patch which will never hit the mainline git tree. Put yourself in the position of someone reading the git changelogs in a year's time. They're going to be left scratching their heads at the above, aren't they? Always include a complete and standalone, not-referential-to-an-old-patch changelog in each iteration of a patch. Always choose a suitable Subject: Yes, it's good to tell us things about how this patch differs from the previous one. That info can be placed after the ^--- which comes after your signed-off-by:, or can be placed at the top of the email, as long as the full permanent changelog is there too. Bottom line: you are submitting code and its documentation into the permanent kernel record, not just the mailing list. Try to make it appropriate, thanks. Please send a new Subject: and changlog for this patch. - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: end to end error recovery musings
> "Alan" == Alan <[EMAIL PROTECTED]> writes: >> These features make the most sense in terms of WRITE. Disks >> already have plenty of CRC on the data so if a READ fails on a >> regular drive we already know about it. Alan> Don't bet on it. This is why I mentioned that I want to expose the protection data to the host. As written, DIF only protects the path between initiator and target. See below... Alan> If you want to do this seriously you need an end to end (media Alan> to host ram) checksum. We do see bizarre and quite evil things Alan> happen to people occasionally because they rely on bus level Alan> protection - both faulty network cards and faulty disk or Alan> controller RAM can cause very bad things to happen in a critical Alan> environment and are very very hard to detect and test for. Not sure you're up-to-date on the T10 data integrity feature. Essentially it's an extension of the 520 byte sectors common in disk arrays. For each 512 byte sector (or 4KB ditto) you get 8 bytes of protection data. There's a 2 byte CRC (GUARD tag), a 2 byte user-defined tag (APP) and a 4-byte reference tag (REF). Depending on how the drive is formatted, the REF tag usually needs to match the lower 32-bits of the target sector #. For each sector coming in the disk firmware verifies that the CRC and the reference tags are in accordance with the contents of the sector and the CDB start sector + offset. If they don't match the drive will reject the request. If an HBA is capable of exposing the protection tuples to the host we can precalculate the checksum and the LBA when submitting a WRITE. My current proposal involves passing them down in two separate buffers to minimize the risk of in-memory corruption (Besides, it would suck if you had to interleave data and protection data. The scatterlists would become long and twisted). And that's when the READ case becomes interesting. Because then the fs can verify that the checksum of the in-buffer matches of the GUARD tag. In that case we'll know there's been no corruption in the middle. And of course this also opens up using the APP field to tag sector contents. -- Martin K. Petersen Oracle Linux Engineering - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: end to end error recovery musings
Martin K. Petersen wrote: "Eric" == Moore, Eric <[EMAIL PROTECTED]> writes: Eric> Martin K. Petersen on Data Intergrity Feature, which is also Eric> called EEDP(End to End Data Protection), which he presented some Eric> ideas/suggestions of adding an API in linux for this. T10 DIF is interesting for a few things: - Ensuring that the data integrity is preserved when writing a buffer to disk - Ensuring that the write ends up on the right hardware sector These features make the most sense in terms of WRITE. Disks already have plenty of CRC on the data so if a READ fails on a regular drive we already know about it. There are paths through a read that could still benefit from the extra data integrity. The CRC gets validated on the physical sector, but we don't have the same level of strict data checking once it is read into the disk's write cache or being transferred out of cache on the way to the transport... We can, however, leverage DIF with my proposal to expose the protection data to host memory. This will allow us to verify the data integrity information before passing it to the filesystem or application. We can say "this is really the information the disk sent. It hasn't been mangled along the way". And by using the APP tag we can mark a sector as - say - metadata or data to ease putting the recovery puzzle back together. It would be great if the app tag was more than 16 bits. Ted mentioned that ideally he'd like to store the inode number in the app tag. But as it stands there isn't room. In any case this is all slightly orthogonal to Ric's original post about finding the right persistence heuristics in the error handling path... Still all a very relevant discussion - I agree that we could really use more than just 16 bits... ric - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qla2xxx BUG: workqueue leaked lock or atomic
On Tue, 27 Feb 2007, Andre Noll wrote: > On 10:26, Andrew Vasquez wrote: > > You are loading some stale firmware that's left over on the card -- > > I'm not even sure what 4.00.70 is, as the latest release firmware is > > 4.00.27. > > That's the firmware which came with the card. Anyway, I just upgraded > the firmware, but the bug remains. The backtrace differs a bit though > as now the tg3 network driver seems to be involved as well. > > Thanks for your help > Andre ... > [ 68.532665] BUG: at kernel/lockdep.c:1860 trace_hardirqs_on() > [ 68.532784] > [ 68.532785] Call Trace: > [ 68.532979][] trace_hardirqs_on+0xd7/0x180 > [ 68.533168] [] _spin_unlock_irq+0x2b/0x40 > [ 68.533295] [] > :qla2xxx:qla2x00_process_completed_request+0x137/0x1d0 > [ 68.533457] [] :qla2xxx:qla2x00_status_entry+0x82/0xa40 > [ 68.533577] [] __lock_acquire+0xcdf/0xd90 > [ 68.533693] [] _spin_unlock_irqrestore+0x42/0x60 > [ 68.533816] [] :qla2xxx:qla24xx_intr_handler+0x4e/0x2b0 > [ 68.533942] [] > :qla2xxx:qla24xx_process_response_queue+0xc1/0x1c0 > [ 68.534102] [] :qla2xxx:qla24xx_intr_handler+0x1d4/0x2b0 Ok, since 2.6.20, there been a patch added to qla2xxx which drops the spin_unlock_irq() call while attempting to ramp-up the queue-depth: commit befede3dabd204e9c546cbfbe391b29286c57da2 Author: Seokmann Ju <[EMAIL PROTECTED]> Date: Tue Jan 9 11:37:52 2007 -0800 [SCSI] qla2xxx: correct locking while call starget_for_each_device() Removed spin_unlock_irq()/spin_lock_irq() pairs surrounding starget_for_each_device() calls. As Matthew W. pointed out, starget_for_each_device() can be called under a spinlock being held. The change has been tested and verified on qla2xxx.ko module. Thanks Matthew W. and Hisashi H. for help. Signed-off-by: Andrew Vasquez <[EMAIL PROTECTED]> Signed-off-by: Seokmann Ju <[EMAIL PROTECTED]> Signed-off-by: James Bottomley <[EMAIL PROTECTED]> http://marc.theaimsgroup.com/?l=linux-scsi&m=116837234904583&w=2 Could you try the latest 2.6.21-rc which contains the correction? Regards, Andrew Vasquez - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: end to end error recovery musings
On Feb 27, 2007 19:02 +, Alan wrote: > > It would be great if the app tag was more than 16 bits. Ted mentioned > > that ideally he'd like to store the inode number in the app tag. But > > as it stands there isn't room. > > The lowest few bits are the most important with ext2/ext3 because you > normally lose a sector of inodes which means you've got dangly bits > associated with a sequence of inodes with the same upper bits. More > problematic is losing indirect blocks, and being able to keep some kind > of [inode low bits/block index] would help put stuff back together. In the ext4 extents format there is the ability (not implemented yet) to add some extra information into the extent index blocks (previously referred to as the ext3_extent_tail). This is planned to be a checksum of the index block, and a back-pointer to the inode which is using this extent block. This allows online detection of corrupt index blocks, and also detection of an index block that is written to the wrong location. There is as yet no plan that I'm aware of to have in-filesystem checksums of the extent data. Cheers, Andreas -- Andreas Dilger Principal Software Engineer Cluster File Systems, Inc. - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: end to end error recovery musings
> These features make the most sense in terms of WRITE. Disks already > have plenty of CRC on the data so if a READ fails on a regular drive > we already know about it. Don't bet on it. If you want to do this seriously you need an end to end (media to host ram) checksum. We do see bizarre and quite evil things happen to people occasionally because they rely on bus level protection - both faulty network cards and faulty disk or controller RAM can cause very bad things to happen in a critical environment and are very very hard to detect and test for. IDE has another hideously evil feature in this area. Command blocks are sent by PIO cycles, and are therefore unprotected from corruption. So while a data burst with corruption will error and retry and command which corrupts the block number although very very much less likely (less bits and much lower speed) will not be caught on a PATA system for read or for write and will hit the wrong block. With networking you can turn off hardware IP checksumming (and many cluster people do) with disks we don't yet have a proper end to end checksum to media system in the fs or block layers. > It would be great if the app tag was more than 16 bits. Ted mentioned > that ideally he'd like to store the inode number in the app tag. But > as it stands there isn't room. The lowest few bits are the most important with ext2/ext3 because you normally lose a sector of inodes which means you've got dangly bits associated with a sequence of inodes with the same upper bits. More problematic is losing indirect blocks, and being able to keep some kind of [inode low bits/block index] would help put stuff back together. Alan - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: end to end error recovery musings
> "Eric" == Moore, Eric <[EMAIL PROTECTED]> writes: Eric> Martin K. Petersen on Data Intergrity Feature, which is also Eric> called EEDP(End to End Data Protection), which he presented some Eric> ideas/suggestions of adding an API in linux for this. T10 DIF is interesting for a few things: - Ensuring that the data integrity is preserved when writing a buffer to disk - Ensuring that the write ends up on the right hardware sector These features make the most sense in terms of WRITE. Disks already have plenty of CRC on the data so if a READ fails on a regular drive we already know about it. We can, however, leverage DIF with my proposal to expose the protection data to host memory. This will allow us to verify the data integrity information before passing it to the filesystem or application. We can say "this is really the information the disk sent. It hasn't been mangled along the way". And by using the APP tag we can mark a sector as - say - metadata or data to ease putting the recovery puzzle back together. It would be great if the app tag was more than 16 bits. Ted mentioned that ideally he'd like to store the inode number in the app tag. But as it stands there isn't room. In any case this is all slightly orthogonal to Ric's original post about finding the right persistence heuristics in the error handling path... -- Martin K. Petersen Oracle Linux Engineering - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qla2xxx BUG: workqueue leaked lock or atomic
On 11:11, Andre Noll wrote: > On 10:26, Andrew Vasquez wrote: > > You are loading some stale firmware that's left over on the card -- > > I'm not even sure what 4.00.70 is, as the latest release firmware is > > 4.00.27. > > That's the firmware which came with the card. Anyway, I just upgraded > the firmware, but the bug remains. the system crashed again btw., this time resulting in a kernel panic instead of just locking up silently. Here's a screenshot: http://systemlinux.org/~maan/shots/qla2xxx-crash-huangho2.png Regards Andre signature.asc Description: Digital signature
RE: LSI Logic 40919o fibre channel: scsi works ip not
Mario, There appears to be a bug in the driver. Go to mpt_lan_post_receive_buckets(), in mptlan.c. Find: mf = mpt_get_msg_frame(LanCtx, mpt_dev); Later find: pRecvReq = (LANReceivePostRequest_t *) mf; Below that, should be: i = le16_to_cpu(mf->u.frame.hwhdr.msgctxu.fld.req_idx); mpt_dev->RequestNB[i] = 0; The above two lines of code are present in some versions of mptctl.c (the 2.06.xx versions and 3.02.xx versions) but are missing from some others (3.03.xx, 3.04.xx, and 4.00.xx). If you can add those two lines of code, rebuild mptctl.ko, and retest, I believe that should take care of your reported problems. Let me know, please. stephen -Original Message- From: Mario Giammarco [mailto:[EMAIL PROTECTED] Sent: Sunday, February 18, 2007 6:16 AM To: Shirron, Stephen; linux-scsi@vger.kernel.org Cc: Hickerson, Roger Subject: Re: LSI Logic 40919o fibre channel: scsi works ip not Shirron, Stephen ha scritto: > Hi Mario, > > Can you tell me what firmware version you have on your 40919 > card(s)? The easiest way is to "cat /proc/mpt/summary". This is summary: ioc0: LSIFC919, FwRev=02000f00h, Ports=1, MaxQ=1023,LanAddr=00:06:2B:07:FA:10, IRQ=209 > > How often do the errors occur? dmesg provides no timestamps, > while /var/log/messages does. > Sorry but in messagese there is not this error. I can say you that the frequency of the error is proportional to data transferred. If I start a n ftp dmesg output goes crazy. I have seen this in /var/log/messages (sorry no timestamps again): Fusion MPT LAN driver 3.04.01 mptlan: ioc0: PortNum=0, ProtocolFlags=0Fh (ITLB) mptlan: ioc0: Fusion MPT LAN device registered as 'fc0' mptlan: ioc0/fc0: LanAddr = 00:06:2B:07:FA:10 mptlan: ioc0/fc0: interface up & active mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 mptbase: ioc0: LogInfo(0x2202): SubCl={LAN} mptlan: ioc0/fc0: ERROR - Got a non-TURBO ReceivePostReply w/ PacketLength zero! mptlan: MsgFlags = 80, IOCStatus = 8003 - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qla2xxx BUG: workqueue leaked lock or atomic
On 10:26, Andrew Vasquez wrote: > You are loading some stale firmware that's left over on the card -- > I'm not even sure what 4.00.70 is, as the latest release firmware is > 4.00.27. That's the firmware which came with the card. Anyway, I just upgraded the firmware, but the bug remains. The backtrace differs a bit though as now the tg3 network driver seems to be involved as well. Thanks for your help Andre [ 67.511167] qla2xxx :05:08.0: Allocated (64 KB) for EFT... [ 67.511434] qla2xxx :05:08.0: Allocated (1413 KB) for firmware dump... [ 67.531231] scsi0 : qla2xxx [ 67.854344] qla2xxx :05:08.0: [ 67.854346] QLogic Fibre Channel HBA Driver: 8.01.07-k4 [ 67.854347] QLogic HP AE369-60001 - QLA2340 [ 67.854348] ISP2422: PCI-X Mode 1 (133 MHz) @ :05:08.0 hdma+, host#=0, fw=4.00.27 [IP] [ 67.854881] ACPI: PCI Interrupt :05:08.1[B] -> GSI 33 (level, low) -> IRQ 33 [ 67.855230] qla2xxx :05:08.1: Found an ISP2422, irq 33, iobase 0xc2012000 [ 67.855645] qla2xxx :05:08.1: Configuring PCI space... [ 67.855907] qla2xxx :05:08.1: Configure NVRAM parameters... [ 67.862486] qla2xxx :05:08.1: Verifying loaded RISC code... [ 68.106663] qla2xxx :05:08.1: Allocated (64 KB) for EFT... [ 68.107058] qla2xxx :05:08.1: Allocated (1413 KB) for firmware dump... [ 68.126759] scsi1 : qla2xxx [ 68.196783] Adding 6540152k swap on /dev/md2. Priority:-1 extents:1 across:6540152k [ 68.260645] qla2xxx :05:08.0: LIP reset occured (f8f7). [ 68.296027] qla2xxx :05:08.0: LIP occured (f8f7). [ 68.298214] qla2xxx :05:08.0: LOOP UP detected (2 Gbps). [ 68.326627] qla2xxx :05:08.1: [ 68.326628] QLogic Fibre Channel HBA Driver: 8.01.07-k4 [ 68.326630] QLogic HP AE369-60001 - QLA2340 [ 68.326631] ISP2422: PCI-X Mode 1 (133 MHz) @ :05:08.1 hdma+, host#=1, fw=4.00.27 [IP] [ 68.504335] EXT3 FS on md1, internal journal [ 68.524627] PM: Writing back config space on device :03:06.0 at offset b (was 164814e4, writing d00e11) [ 68.524644] PM: Writing back config space on device :03:06.0 at offset 3 (was 804000, writing 804010) [ 68.524650] PM: Writing back config space on device :03:06.0 at offset 2 (was 200, writing 210) [ 68.524657] PM: Writing back config space on device :03:06.0 at offset 1 (was 2b0, writing 2b00146) [ 68.532665] BUG: at kernel/lockdep.c:1860 trace_hardirqs_on() [ 68.532784] [ 68.532785] Call Trace: [ 68.532979][] trace_hardirqs_on+0xd7/0x180 [ 68.533168] [] _spin_unlock_irq+0x2b/0x40 [ 68.533295] [] :qla2xxx:qla2x00_process_completed_request+0x137/0x1d0 [ 68.533457] [] :qla2xxx:qla2x00_status_entry+0x82/0xa40 [ 68.533577] [] __lock_acquire+0xcdf/0xd90 [ 68.533693] [] _spin_unlock_irqrestore+0x42/0x60 [ 68.533816] [] :qla2xxx:qla24xx_intr_handler+0x4e/0x2b0 [ 68.533942] [] :qla2xxx:qla24xx_process_response_queue+0xc1/0x1c0 [ 68.534102] [] :qla2xxx:qla24xx_intr_handler+0x1d4/0x2b0 [ 68.534224] [] handle_IRQ_event+0x20/0x60 [ 68.534339] [] handle_fasteoi_irq+0xbd/0x110 [ 68.534459] [] do_IRQ+0x132/0x1a0 [ 68.534574] [] ret_from_intr+0x0/0xf [ 68.534687][] __delay+0xc/0x20 [ 68.534862] [] __const_udelay+0x37/0x40 [ 68.534982] [] :tg3:tg3_chip_reset+0x547/0x670 [ 68.535103] [] :tg3:tg3_reset_hw+0x5d/0x1790 [ 68.535218] [] __udelay+0x37/0x40 [ 68.535333] [] :tg3:_tw32_flush+0x6d/0x80 [ 68.535451] [] :tg3:tg3_open+0x2d6/0x610 [ 68.535569] [] :tg3:tg3_init_hw+0x42/0x50 [ 68.535687] [] :tg3:tg3_open+0x2e3/0x610 [ 68.535804] [] dev_open+0x43/0x90 [ 68.535917] [] dev_change_flags+0x74/0x160 [ 68.536034] [] devinet_ioctl+0x2e6/0x730 [ 68.536149] [] dev_ioctl+0x302/0x340 [ 68.536264] [] __up_read+0x9b/0xb0 [ 68.536378] [] inet_ioctl+0x4c/0x70 [ 68.536494] [] sock_ioctl+0x1fc/0x230 [ 68.536610] [] do_ioctl+0x31/0xa0 [ 68.536722] [] vfs_ioctl+0x2bb/0x2e0 [ 68.536836] [] sys_ioctl+0x4a/0x80 [ 68.536948] [] system_call+0x7e/0x83 [ 68.537059] [ 68.712832] scsi 0:0:0:0: Direct-Access transtec T6100F16R1-E 342I PQ: 0 ANSI: 5 [ 68.713384] sda : very big device. try to use READ CAPACITY(16). [ 68.713594] SCSI device sda: 11714863104 512-byte hdwr sectors (5998010 MB) [ 68.713976] sda: Write Protect is off [ 68.714079] sda: Mode Sense: 9b 00 00 08 [ 68.714483] SCSI device sda: write cache: disabled, read cache: enabled, doesn't support DPO or FUA [ 68.714876] sda : very big device. try to use READ CAPACITY(16). [ 68.715080] SCSI device sda: 11714863104 512-byte hdwr sectors (5998010 MB) [ 68.715436] sda: Write Protect is off [ 68.715539] sda: Mode Sense: 9b 00 00 08 [ 68.715944] SCSI device sda: write cache: disabled, read cache: enabled, doesn't support DPO or FUA [ 68.718244] sda: unknown partition table [ 68.718707] sd 0:0:0:0: Attached scsi disk sda [ 68.718945] sd 0:0:0:0: Attached scsi generic sg0 type 0 [ 68.719413]