Module Name:    src
Committed By:   jdolecek
Date:           Fri Aug 10 22:43:22 UTC 2018

Modified Files:
        src/sys/dev/ata: ata_subr.c atavar.h wd.c

Log Message:
fix race in wd_lastclose() on systems with two ide disks on same
channel, which happened when one disk had pending I/O while the other
disk executed the final disk flush - need to restart bufq processing
once xfer is freed in this case

it could happen e.g. on boot when system executes fsck on different
partitions on the two drives in parallell and hence open and closes
the disk devices repeatedly

add KASSERT() for empty bufq on wd_lastclose(), and fix similar issue
also on suspend/standby path

this was introduced by the NCQ merge and not dksubr - before the merge
each drive had their own xfer, so they could not block each other

fixes PR kern/52783 by Onno van der Linden; many thanks for extensive
help with tracking this down


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 src/sys/dev/ata/ata_subr.c
cvs rdiff -u -r1.98 -r1.99 src/sys/dev/ata/atavar.h
cvs rdiff -u -r1.440 -r1.441 src/sys/dev/ata/wd.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ata/ata_subr.c
diff -u src/sys/dev/ata/ata_subr.c:1.5 src/sys/dev/ata/ata_subr.c:1.6
--- src/sys/dev/ata/ata_subr.c:1.5	Mon Aug  6 20:07:05 2018
+++ src/sys/dev/ata/ata_subr.c	Fri Aug 10 22:43:22 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: ata_subr.c,v 1.5 2018/08/06 20:07:05 jdolecek Exp $	*/
+/*	$NetBSD: ata_subr.c,v 1.6 2018/08/10 22:43:22 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ata_subr.c,v 1.5 2018/08/06 20:07:05 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ata_subr.c,v 1.6 2018/08/10 22:43:22 jdolecek Exp $");
 
 #include "opt_ata.h"
 
@@ -377,7 +377,7 @@ out:
  * released.
  */ 
 void
-ata_channel_start(struct ata_channel *chp, int drive)
+ata_channel_start(struct ata_channel *chp, int drive, bool start_self)
 {
 	int i, s;
 	struct ata_drive_datas *drvp;
@@ -413,7 +413,8 @@ ata_channel_start(struct ata_channel *ch
 	}
 
 	/* Now try to kick off xfers on the current drive */
-	ATA_DRIVE_START(chp, drive);
+	if (start_self)
+		ATA_DRIVE_START(chp, drive);
 
 	splx(s);
 #undef ATA_DRIVE_START

Index: src/sys/dev/ata/atavar.h
diff -u src/sys/dev/ata/atavar.h:1.98 src/sys/dev/ata/atavar.h:1.99
--- src/sys/dev/ata/atavar.h:1.98	Mon Aug  6 20:07:05 2018
+++ src/sys/dev/ata/atavar.h	Fri Aug 10 22:43:22 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: atavar.h,v 1.98 2018/08/06 20:07:05 jdolecek Exp $	*/
+/*	$NetBSD: atavar.h,v 1.99 2018/08/10 22:43:22 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -538,7 +538,7 @@ void	ata_kill_active(struct ata_channel 
 void	ata_reset_channel(struct ata_channel *, int);
 void	ata_channel_freeze(struct ata_channel *);
 void	ata_channel_thaw(struct ata_channel *);
-void	ata_channel_start(struct ata_channel *, int);
+void	ata_channel_start(struct ata_channel *, int, bool);
 void	ata_channel_lock(struct ata_channel *);
 void	ata_channel_unlock(struct ata_channel *);
 void	ata_channel_lock_owned(struct ata_channel *);

Index: src/sys/dev/ata/wd.c
diff -u src/sys/dev/ata/wd.c:1.440 src/sys/dev/ata/wd.c:1.441
--- src/sys/dev/ata/wd.c:1.440	Mon Aug  6 20:07:05 2018
+++ src/sys/dev/ata/wd.c	Fri Aug 10 22:43:22 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: wd.c,v 1.440 2018/08/06 20:07:05 jdolecek Exp $ */
+/*	$NetBSD: wd.c,v 1.441 2018/08/10 22:43:22 jdolecek Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.440 2018/08/06 20:07:05 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.441 2018/08/10 22:43:22 jdolecek Exp $");
 
 #include "opt_ata.h"
 #include "opt_wd.h"
@@ -917,7 +917,7 @@ noerror:	if ((xfer->c_bio.flags & ATA_CO
 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
 
 	dk_done(dksc, bp);
-	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, true);
 }
 
 static void
@@ -1072,6 +1072,8 @@ wd_lastclose(device_t self)
 {
 	struct wd_softc *wd = device_private(self);
 
+	KASSERTMSG(bufq_peek(wd->sc_dksc.sc_bufq) == NULL, "bufq not empty");
+
 	wd_flushcache(wd, AT_WAIT, false);
 
 	wd->atabus->ata_delref(wd->drvp);
@@ -1667,7 +1669,7 @@ wd_setcache(struct wd_softc *wd, int bit
 
 out:
 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
-	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, true);
 	return error;
 }
 
@@ -1711,12 +1713,18 @@ wd_standby(struct wd_softc *wd, int flag
 
 out:
 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
-	/* drive is supposed to go idle, do not call ata_channel_start() */
+
+	/*
+	 * Drive is supposed to go idle, start only other drives.
+	 * bufq might be actually already freed at this moment.
+	 */
+	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, false);
+
 	return error;
 }
 
 int
-wd_flushcache(struct wd_softc *wd, int flags, bool start)
+wd_flushcache(struct wd_softc *wd, int flags, bool start_self)
 {
 	struct dk_softc *dksc = &wd->sc_dksc;
 	struct ata_xfer *xfer;
@@ -1783,9 +1791,8 @@ out_xfer:
 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
 
 out:
-	/* kick queue processing blocked while waiting for flush xfer */
-	if (start)
-		ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+	/* start again I/O processing possibly stopped due to no xfer */
+	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, start_self);
 
 	return error;
 }
@@ -1848,7 +1855,7 @@ wd_trim(struct wd_softc *wd, daddr_t bno
 
 out:
 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
-	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
+	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive, true);
 	return error;
 }
 
@@ -2060,7 +2067,7 @@ wdioctlstrategy(struct buf *bp)
 out:
 	ata_free_xfer(wi->wi_softc->drvp->chnl_softc, xfer);
 	ata_channel_start(wi->wi_softc->drvp->chnl_softc,
-	    wi->wi_softc->drvp->drive);
+	    wi->wi_softc->drvp->drive, true);
 out2:
 	bp->b_error = error;
 	if (error)

Reply via email to