Hi, The patch from pkg-sysvinit libata-fixes branch (r1054) that does global inhibition of -h halt option when `ls -1 /sys/class/scsi_disk/*/manage_start_stop' was true seems like an easy way out, but not a foolproof solution. That may not be true in all cases: one disk may have libata management of write flush and spindown, while another disk in the same system could conceivably not have that support.
Tejun Heo hinted[0] that a suse developer, Werner Fink, was working[1] on a more substantial effort to update hddown.c logic, providing per disk device decision on whether or not to give standby signal and cause flush of write-cache or not based on sysfs detection of attached disk devices. Attached is diff against the libata branch of pkg-sysvinit that adds Werner's last patch[2]. I thought it was at least worth a go on my own hardware, if only for my own curiosity's sake. Seems to do what it advertises too. Thanks, Kel. [0] http://lists.alioth.debian.org/pipermail/pkg-sysvinit-devel/2007-June/001963.html [1] https://bugzilla.novell.com/show_bug.cgi?id=229210 [2] https://bugzilla.novell.com/attachment.cgi?id=145904
Index: debian/patches/67_init_hddown_libata.dpatch =================================================================== --- debian/patches/67_init_hddown_libata.dpatch (revision 0) +++ debian/patches/67_init_hddown_libata.dpatch (revision 0) @@ -0,0 +1,357 @@ +#! /bin/sh /usr/share/dpatch/dpatch-run +## init_hddown_libata.patch by Kel Modderman <[EMAIL PROTECTED]> +## +## All lines beginning with `## DP:' are a description of the patch. +## DP: https://bugzilla.novell.com/show_bug.cgi?id=229210 +## DP: https://bugzilla.novell.com/attachment.cgi?id=145904 + [EMAIL PROTECTED]@ +diff -Nrup sysvinit-2.86.ds1.orig/src/hddown.c sysvinit-2.86.ds1.libata/src/hddown.c +--- sysvinit-2.86.ds1.orig/src/hddown.c 2004-06-09 22:47:45.000000000 +1000 ++++ sysvinit-2.86.ds1.libata/src/hddown.c 2007-06-27 19:40:11.000000000 +1000 +@@ -5,6 +5,9 @@ + */ + char *v_hddown = "@(#)hddown.c 1.02 22-Apr-2003 [EMAIL PROTECTED]"; + ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE ++#endif + #include <stdio.h> + #include <stdlib.h> + #include <unistd.h> +@@ -18,6 +21,326 @@ char *v_hddown = "@(#)hddown.c 1.02 22 + #include <sys/ioctl.h> + #include <linux/hdreg.h> + ++#define USE_SYSFS ++#ifdef USE_SYSFS ++/* ++ * sysfs part Find all disks on the system, list out IDE and unmanaged ++ * SATA disks, flush the cache of those and shut them down. ++ * Author: Werner Fink <[EMAIL PROTECTED]>, 2007/06/12 ++ * ++ */ ++#include <limits.h> ++#include <errno.h> ++#include <sys/stat.h> ++#include <sys/types.h> ++#ifdef WORDS_BIGENDIAN ++#include <byteswap.h> ++#endif ++ ++#define SYS_BLK "/sys/block" ++#define SYS_CLASS "/sys/class/scsi_disk" ++#define DEV_BASE "/dev" ++#define ISSPACE(c) (((c)==' ')||((c)=='\n')||((c)=='\t')||((c)=='\v')||((c)=='\r')||((c)=='\f')) ++ ++/* Used in flush_cache_ext(), compare with <linux/hdreg.h> */ ++#define IDBYTES 512 ++#define MASK_EXT 0xE000 /* Bit 15 shall be zero, bit 14 shall be one, bit 13 flush cache ext */ ++#define TEST_EXT 0x6000 ++ ++/* Maybe set in list_disks() and used in do_standby_idedisk() */ ++#define DISK_IS_IDE 0x00000001 ++#define DISK_IS_SATA 0x00000002 ++#define DISK_EXTFLUSH 0x00000004 ++ ++static char *strstrip(char *str); ++static FILE *hdopen(const char* const format, const char* const name); ++static int flush_cache_ext(const char *device); ++ ++/* ++ * Find all disks through /sys/block. ++ */ ++static char *list_disks(DIR* blk, unsigned int* flags) ++{ ++ struct dirent *d; ++ ++ while ((d = readdir(blk))) { ++ *flags = 0; ++ if (d->d_name[1] == 'd' && (d->d_name[0] == 'h' || d->d_name[0] == 's')) { ++ char buf[NAME_MAX+1], lnk[NAME_MAX+1], *ptr; ++ struct stat st; ++ FILE *fp; ++ int ret; ++ ++ fp = hdopen(SYS_BLK "/%s/removable", d->d_name); ++ if ((long)fp <= 0) { ++ if ((long)fp < 0) ++ goto empty; /* error */ ++ continue; /* no entry `removable' */ ++ } ++ ++ ret = getc(fp); ++ fclose(fp); ++ ++ if (ret != '0') ++ continue; /* not a hard disk */ ++ ++ if (d->d_name[0] == 'h') { ++ (*flags) |= DISK_IS_IDE; ++ if ((ret = flush_cache_ext(d->d_name))) { ++ if (ret < 0) ++ goto empty; ++ (*flags) |= DISK_EXTFLUSH; ++ } ++ break; /* old IDE disk not managed by kernel, out here */ ++ } ++ ++ ret = snprintf(buf, sizeof(buf), SYS_BLK "/%s/device", d->d_name); ++ if ((ret >= sizeof(buf)) || (ret < 0)) ++ goto empty; /* error */ ++ ++ ret = readlink(buf, lnk, sizeof(lnk)); ++ if (ret >= sizeof(lnk)) ++ goto empty; /* error */ ++ if (ret < 0) { ++ if (errno != ENOENT) ++ goto empty; /* error */ ++ continue; /* no entry `device' */ ++ } ++ lnk[ret] = '\0'; ++ ++ ptr = basename(lnk); ++ if (!ptr || !*ptr) ++ continue; /* should not happen */ ++ ++ ret = snprintf(buf, sizeof(buf), SYS_CLASS "/%s/manage_start_stop", ptr); ++ if ((ret >= sizeof(buf)) || (ret < 0)) ++ goto empty; /* error */ ++ ++ ret = stat(buf, &st); ++ if (ret == 0) ++ continue; /* disk found but managed by kernel */ ++ ++ if (errno != ENOENT) ++ goto empty; /* error */ ++ ++ fp = hdopen(SYS_BLK "/%s/device/vendor", d->d_name); ++ if ((long)fp <= 0) { ++ if ((long)fp < 0) ++ goto empty; /* error */ ++ continue; /* no entry `device/vendor' */ ++ } ++ ++ ptr = fgets(buf, sizeof(buf), fp); ++ fclose(fp); ++ if (ptr == (char*)0) ++ continue; /* should not happen */ ++ ++ ptr = strstrip(buf); ++ if (*ptr == '\0') ++ continue; /* should not happen */ ++ ++ if (strncmp(buf, "ATA", sizeof(buf))) ++ continue; /* no SATA but a real SCSI disk */ ++ ++ (*flags) |= (DISK_IS_IDE|DISK_IS_SATA); ++ if ((ret = flush_cache_ext(d->d_name))) { ++ if (ret < 0) ++ goto empty; ++ (*flags) |= DISK_EXTFLUSH; ++ } ++ break; /* new SATA disk to shutdown, out here */ ++ } ++ } ++ if (d == (struct dirent*)0) ++ goto empty; ++ return d->d_name; ++empty: ++ return (char*)0; ++} ++ ++/* ++ * Put an disk in standby mode. ++ * Code stolen from hdparm.c ++ */ ++static int do_standby_idedisk(char *device, unsigned int flags) ++{ ++#ifndef WIN_STANDBYNOW1 ++#define WIN_STANDBYNOW1 0xE0 ++#endif ++#ifndef WIN_STANDBYNOW2 ++#define WIN_STANDBYNOW2 0x94 ++#endif ++#ifndef WIN_FLUSH_CACHE_EXT ++#define WIN_FLUSH_CACHE_EXT 0xEA ++#endif ++#ifndef WIN_FLUSH_CACHE ++#define WIN_FLUSH_CACHE 0xE7 ++#endif ++ unsigned char flush1[4] = {WIN_FLUSH_CACHE_EXT,0,0,0}; ++ unsigned char flush2[4] = {WIN_FLUSH_CACHE,0,0,0}; ++ unsigned char stdby1[4] = {WIN_STANDBYNOW1,0,0,0}; ++ unsigned char stdby2[4] = {WIN_STANDBYNOW2,0,0,0}; ++ char buf[NAME_MAX+1]; ++ int fd, ret; ++ ++ ret = snprintf(buf, sizeof(buf), DEV_BASE "/%s", device); ++ if ((ret >= sizeof(buf)) || (ret < 0)) ++ return -1; ++ ++ if ((fd = open(buf, O_RDWR)) < 0) ++ return -1; ++ ++ switch (flags & DISK_EXTFLUSH) { ++ case DISK_EXTFLUSH: ++ if (ioctl(fd, HDIO_DRIVE_CMD, &flush1) == 0) ++ break; ++ /* Extend flush rejected, try standard flush */ ++ default: ++ ioctl(fd, HDIO_DRIVE_CMD, &flush2); ++ break; ++ } ++ ++ ret = ioctl(fd, HDIO_DRIVE_CMD, &stdby1) && ++ ioctl(fd, HDIO_DRIVE_CMD, &stdby2); ++ close(fd); ++ ++ if (ret) ++ return -1; ++ return 0; ++} ++ ++/* ++ * List all disks and put them in standby mode. ++ * This has the side-effect of flushing the writecache, ++ * which is exactly what we want on poweroff. ++ */ ++int hddown(void) ++{ ++ unsigned int flags; ++ char *disk; ++ DIR *blk; ++ ++ if ((blk = opendir(SYS_BLK)) == (DIR*)0) ++ return -1; ++ ++ while ((disk = list_disks(blk, &flags))) ++ do_standby_idedisk(disk, flags); ++ ++ return closedir(blk); ++} ++ ++/* ++ * Strip off trailing white spaces ++ */ ++static char *strstrip(char *str) ++{ ++ const size_t len = strlen(str); ++ if (len) { ++ char* end = str + len - 1; ++ while ((end != str) && ISSPACE(*end)) ++ end--; ++ *(end + 1) = '\0'; /* remove trailing white spaces */ ++ } ++ return str; ++} ++ ++/* ++ * Open a sysfs file without getting a controlling tty ++ * and return FILE* pointer. ++ */ ++static FILE *hdopen(const char* const format, const char* const name) ++{ ++ char buf[NAME_MAX+1]; ++ FILE *fp = (FILE*)-1; ++ int fd, ret; ++ ++ ret = snprintf(buf, sizeof(buf), format, name); ++ if ((ret >= sizeof(buf)) || (ret < 0)) ++ goto error; /* error */ ++ ++ fd = open(buf, O_RDONLY|O_NOCTTY); ++ if (fd < 0) { ++ if (errno != ENOENT) ++ goto error; /* error */ ++ fp = (FILE*)0; ++ goto error; /* no entry `removable' */ ++ } ++ ++ fp = fdopen(fd, "r"); ++ if (fp == (FILE*)0) ++ close(fd); /* should not happen */ ++error: ++ return fp; ++} ++ ++/* ++ * Check IDE/(S)ATA hard disk identity for ++ * the FLUSH CACHE EXT bit set. ++ */ ++static int flush_cache_ext(const char *device) ++{ ++#ifndef WIN_IDENTIFY ++#define WIN_IDENTIFY 0xEC ++#endif ++ unsigned char args[4+IDBYTES]; ++ unsigned short *id = (unsigned short*)(&args[4]); ++ char buf[NAME_MAX+1], *ptr; ++ int fd = -1, ret = 0; ++ FILE *fp; ++ ++ fp = hdopen(SYS_BLK "/%s/size", device); ++ if ((long)fp <= 0) { ++ if ((long)fp < 0) ++ return -1; /* error */ ++ goto out; /* no entry `size' */ ++ } ++ ++ ptr = fgets(buf, sizeof(buf), fp); ++ fclose(fp); ++ if (ptr == (char*)0) ++ goto out; /* should not happen */ ++ ++ ptr = strstrip(buf); ++ if (*ptr == '\0') ++ goto out; /* should not happen */ ++ ++ if ((size_t)atoll(buf) < (1<<28)) ++ goto out; /* small disk */ ++ ++ ret = snprintf(buf, sizeof(buf), DEV_BASE "/%s", device); ++ if ((ret >= sizeof(buf)) || (ret < 0)) ++ return -1; /* error */ ++ ++ if ((fd = open(buf, O_RDONLY|O_NONBLOCK)) < 0) ++ goto out; ++ ++ memset(&args[0], 0, sizeof(args)); ++ args[0] = WIN_IDENTIFY; ++ args[3] = 1; ++ if (ioctl(fd, HDIO_DRIVE_CMD, &args)) ++ goto out; ++#ifdef WORDS_BIGENDIAN ++# if 0 ++ { ++ const unsigned short *end = id + IDBYTES/2; ++ const unsigned short *from = id; ++ unsigned short *to = id; ++ ++ while (from < end) ++ *to++ = bswap_16(*from++); ++ } ++# else ++ id[83] = bswap_16(id[83]); ++# endif ++#endif ++ if ((id[83] & MASK_EXT) == TEST_EXT) ++ ret = 1; ++out: ++ if (fd >= 0) ++ close(fd); ++ return ret; ++} ++#else /* ! USE_SYSFS */ + #define MAX_DISKS 64 + #define PROC_IDE "/proc/ide" + #define DEV_BASE "/dev" +@@ -104,7 +427,7 @@ int hddown(void) + + return 0; + } +- ++#endif /* ! USE_SYSFS */ + #else /* __linux__ */ + + int hddown(void) Index: debian/patches/17_doc_halt_libata.dpatch =================================================================== --- debian/patches/17_doc_halt_libata.dpatch (revision 0) +++ debian/patches/17_doc_halt_libata.dpatch (revision 0) @@ -0,0 +1,41 @@ +#! /bin/sh /usr/share/dpatch/dpatch-run +## doc_halt_libata.patch by Kel Modderman <[EMAIL PROTECTED]> +## +## All lines beginning with `## DP:' are a description of the patch. +## DP: No description + [EMAIL PROTECTED]@ +diff -Nrup sysvinit-2.86.ds1.orig/man/halt.8 sysvinit-2.86.ds1.libata/man/halt.8 +--- sysvinit-2.86.ds1.orig/man/halt.8 2001-11-22 07:11:22.000000000 +1000 ++++ sysvinit-2.86.ds1.libata/man/halt.8 2007-06-27 20:36:19.000000000 +1000 +@@ -61,7 +61,7 @@ Force halt or reboot, don't call \fBshut + .IP \fB\-i\fP + Shut down all network interfaces just before halt or reboot. + .IP \fB\-h\fP +-Put all harddrives on the system in standby mode just before halt or poweroff. ++Put harddrives on the system in standby mode just before halt or poweroff. + .IP \fB\-p\fP + When halting the system, do a poweroff. This is the default when halt is + called as \fBpoweroff\fP. +@@ -80,14 +80,14 @@ runlevel (for example, when \fI/var/run/ + correctly) \fBshutdown\fP will be called, which might not be what you want. + Use the \fB-f\fP flag if you want to do a hard \fBhalt\fP or \fBreboot\fP. + .PP +-The \fB-h\fP flag puts all harddisks in standby mode just before halt +-or poweroff. Right now this is only implemented for IDE drives. A side +-effect of putting the drive in standby mode is that the write cache +-on the disk is flushed. This is important for IDE drives, since the +-kernel doesn't flush the write-cache itself before poweroff. ++The \fB-h\fP flag puts harddisks in standby mode just before halt or poweroff. ++This is only done for harddisks that do not have kernel ata support for ++managing the synchronization of write-cache and spindown. A side effect of ++putting the drive in standby mode is that the write cache on the disk is ++flushed. + .PP +-The \fBhalt\fP program uses /proc/ide/hd* to find all IDE disk devices, +-which means that /proc needs to be mounted when \fBhalt\fP or ++The \fBhalt\fP program uses /sys/block and /sys/class/scsi_disk/* to find all ++disk devices, which means that /sys needs to be mounted when \fBhalt\fP or + \fBpoweroff\fP is called or the \fB-h\fP switch will do nothing. + .PP + .\"}}} Index: debian/patches/00list =================================================================== --- debian/patches/00list (revision 1056) +++ debian/patches/00list (working copy) @@ -6,6 +6,7 @@ 14_doc_fsf_addr 15_doc_pidof 16_doc_runlevel +17_doc_halt_libata 20_pidof 21_ifdown_kfreebsd 25_last_sanify @@ -24,7 +25,8 @@ 64_init_set_PATH 65_init_u_in_06 66_init_emerg_tty -67_init_hddown +#67_init_hddown +67_init_hddown_libata #68_init_quiet 70_wall_ttyname 71_wall_hostname Index: debian/changelog =================================================================== --- debian/changelog (revision 1056) +++ debian/changelog (working copy) @@ -1,5 +1,6 @@ sysvinit (2.86.ds1-39) UNRELEASED; urgency=low + [ Henrique de Moraes Holschuh ] * libata shutdown handling fixes: Check http://linux-ata.org/shutdown.html for information * init.d/halt: do not issue -h to halt(8) when the kernel supports @@ -9,8 +10,25 @@ * Add halt(5) manpage, updated for HDDOWN. Thanks to Casper Gielen for the manpage. (Closes: #407211) - -- Henrique de Moraes Holschuh <[EMAIL PROTECTED]> Sat, 9 Jun 2007 11:29:42 -0300 + [ Kel Modderman ] + * Revert change to /etc/init.d/halt that inhibited -h halt option when + /sys/class/scsi_disk/*/manage_start_stop exists for at least one disk, but + possibly not _all_ disks. + * Use patch developed by Werner Fink of Novell fame to parse sysfs to detect + disk devices and decide whether or not to issue the legacy STANDBYNOW + signal on a per disk basis. See: + https://bugzilla.novell.com/show_bug.cgi?id=229210 + [debian/patches/67_init_hddown_libata.dpatch] + * Update halt(8) manpage to reflect that /sys/block/ and + /sys/class/scsi_disk/* are now used for disk device detection and that + STANDBYNOW signal is only sent to devices that do not have kernel support + for management of write-cache sync and spindown. + [debian/patches/17_doc_halt_libata.dpatch] + * Disable debian/patches/67_init_hddown.dpatch from being applied, it is + entirely useless after the above mentioned changes. + -- Kel Modderman <[EMAIL PROTECTED]> Wed, 27 Jun 2007 20:40:20 +1000 + sysvinit (2.86.ds1-38) unstable; urgency=medium * Medium urgency as it solve an RC bug in etch. Index: debian/initscripts/etc/init.d/halt =================================================================== --- debian/initscripts/etc/init.d/halt (revision 1056) +++ debian/initscripts/etc/init.d/halt (working copy) @@ -40,13 +40,11 @@ /etc/init.d/ups-monitor poweroff fi - # Don't shut down drives if we're using RAID, or if the kernel can - # do it properly by itself, see http://linux-ata.org/shutdown.html + # Don't shut down drives if we're using RAID # - # Allow the user to override it in the default file, however + # Also allow the user to override it in the default file, halt(5) hddown="-h" - if [ "$HDDOWN" = "NEVER" ] || grep -qs '^md.*active' /proc/mdstat || \ - ls -1 /sys/class/scsi_disk/*/manage_start_stop >/dev/null 2>&1 + if [ "$HDDOWN" = "NEVER" ] || grep -qs '^md.*active' /proc/mdstat then hddown="" fi