commit 443d4d30d8700963f53180650c533b3334ccf1f1
Author: YAMAMOTO Takashi <imuwoto@gmail.com>
Date:   Wed Mar 4 15:38:03 2026 +0000

    zfs: port a fix for data corruption issue from illumos
    
    see https://www.illumos.org/issues/17734 for the details.
    
    note: i didn't bother to patch illumos/freebsd code in our tree.
    
    the original commit message:
    
    > commit f6559a18843abdfa5849b9e74f239f9bd15796d3
    > Author: Andy Fiddaman <illumos@fiddaman.net>
    > Date:   Mon Nov 10 22:52:05 2025 +0000
    >
    >     17734 ZFS fsync can trigger ZIL transaction reordering and data corruption
    >     Portions contributed by: Alexander Motin <mav@FreeBSD.org>
    >     Reviewed by: Ryan Zezeski <ryan@zinascii.com>
    >     Reviewed by: Toomas Soome <tsoome@me.com>
    >     Approved by: Dan McDonald <danmcd@edgecast.io>

diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_vfsops.h b/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
index 57cb61246c3a..5a324deb9821 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_vfsops.h
@@ -138,7 +138,6 @@ typedef struct zfid_long {
 #define	SHORT_FID_LEN	(sizeof (zfid_short_t) - sizeof (uint16_t))
 #define	LONG_FID_LEN	(sizeof (zfid_long_t) - sizeof (uint16_t))
 
-extern uint_t zfs_fsyncer_key;
 extern int zfs_super_owner;
 
 extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_znode.h b/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_znode.h
index 128309d233cc..58018728ce2d 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/sys/zfs_znode.h
@@ -364,7 +364,7 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
 extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
 extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
-    znode_t *zp, offset_t off, ssize_t len, int ioflag);
+    znode_t *zp, offset_t off, ssize_t len, boolean_t commit);
 extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
     znode_t *zp, uint64_t off, uint64_t len);
 extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c
index 786be8420436..67082015c771 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c
@@ -221,7 +221,6 @@ dev_info_t *zfs_dip = &__zfs_devinfo;
 #define vfs_rel(x)	vfs_rele(x)
 #endif
 
-uint_t zfs_fsyncer_key;
 extern uint_t rrw_tsd_key;
 static uint_t zfs_allow_log_key;
 extern uint_t zfs_geom_probe_vdev_key;
@@ -6769,7 +6768,6 @@ _init(void)
 		return (error);
 	}
 
-	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&zfs_putpages_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
@@ -6802,7 +6800,6 @@ _fini(void)
 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
 		(void) ddi_modclose(sharefs_mod);
 
-	tsd_destroy(&zfs_fsyncer_key);
 	ldi_ident_release(zfs_li);
 	zfs_li = NULL;
 	mutex_destroy(&zfs_share_lock);
@@ -6869,7 +6866,6 @@ zfs__init(void)
 	zvol_init();
 	zfs_ioctl_init();
 
-	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 	tsd_create(&zfs_geom_probe_vdev_key, NULL);
@@ -6895,7 +6891,6 @@ zfs__fini(void)
 	zfs_fini();
 	spa_fini();
 
-	tsd_destroy(&zfs_fsyncer_key);
 	tsd_destroy(&rrw_tsd_key);
 	tsd_destroy(&zfs_allow_log_key);
 
@@ -7180,7 +7175,6 @@ zfs_modcmd(modcmd_t cmd, void *arg)
 		mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&zfs_debug_mtx, NULL, MUTEX_DEFAULT, NULL);
 
-		tsd_create(&zfs_fsyncer_key, NULL);
 		tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 		tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 		tsd_create(&zfs_putpage_key, NULL);
@@ -7217,7 +7211,6 @@ attacherr:
 		spa_fini();
 
 		tsd_destroy(&zfs_putpage_key);
-		tsd_destroy(&zfs_fsyncer_key);
 		tsd_destroy(&rrw_tsd_key);
 		tsd_destroy(&zfs_allow_log_key);
 
diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_log.c b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_log.c
index 31054bac5998..191a9d62b207 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_log.c
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_log.c
@@ -462,11 +462,10 @@ SYSCTL_LONG(_vfs_zfs, OID_AUTO, immediate_write_sz, CTLFLAG_RWTUN,
 
 void
 zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
-    znode_t *zp, offset_t off, ssize_t resid, int ioflag)
+    znode_t *zp, offset_t off, ssize_t resid, boolean_t commit)
 {
 	uint32_t blocksize = zp->z_blksz;
 	itx_wr_state_t write_state;
-	uintptr_t fsync_cnt;
 
 	if (zil_replaying(zilog, tx) || zp->z_unlinked)
 		return;
@@ -476,15 +475,11 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 	else if (!spa_has_slogs(zilog->zl_spa) &&
 	    resid >= zfs_immediate_write_sz)
 		write_state = WR_INDIRECT;
-	else if (ioflag & (FSYNC | FDSYNC))
+	else if (commit)
 		write_state = WR_COPIED;
 	else
 		write_state = WR_NEED_COPY;
 
-	if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
-		(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
-	}
-
 	while (resid) {
 		itx_t *itx;
 		lr_write_t *lr;
@@ -515,10 +510,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 		BP_ZERO(&lr->lr_blkptr);
 
 		itx->itx_private = zp->z_zfsvfs;
-
-		if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
-		    (fsync_cnt == 0))
-			itx->itx_sync = B_FALSE;
+		itx->itx_sync = (zp->z_sync_cnt != 0);
 
 		zil_itx_assign(zilog, itx, tx);
 
diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c
index d239a92afcf5..1e1af6baa64e 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c
@@ -218,9 +218,15 @@ zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 		}
 	}
 
-	/* Keep a count of the synchronous opens in the znode */
-	if (flag & (FSYNC | FDSYNC))
-		atomic_inc_32(&zp->z_sync_cnt);
+	/*
+	 * Keep a count of the synchronous opens in the znode. On first
+	 * synchronous open we must convert all previous async transactions
+	 * into sync to keep correct ordering.
+	 */
+	if (flag & (FSYNC | FDSYNC)) {
+		if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
+			zil_async_to_sync(zfsvfs->z_log, zp->z_id);
+	}
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
@@ -1044,6 +1050,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 	sa_bulk_attr_t	bulk[4];
 	uint64_t	mtime[2], ctime[2];
 	int		segflg;
+	boolean_t	commit;
 
 #ifdef __NetBSD__
 	segflg = VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ?
@@ -1181,6 +1188,9 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 
 	end_size = MAX(zp->z_size, woff + n);
 
+	commit = ((ioflag & (FSYNC | FDSYNC)) != 0 ||
+	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
+
 	/*
 	 * Write the file in reasonable size chunks.  Each chunk is written
 	 * in a separate transaction; this keeps the intent log records small
@@ -1403,7 +1413,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 		else
 			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 
-		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
+		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit);
 		dmu_tx_commit(tx);
 
 		if (error != 0)
@@ -1439,8 +1449,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 	}
 #endif
 
-	if (ioflag & (FSYNC | FDSYNC) ||
-	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+	if (commit)
 		zil_commit(zilog, zp->z_id);
 
 	ZFS_EXIT(zfsvfs);
@@ -3009,16 +3018,12 @@ update:
 	return (error);
 }
 
-ulong_t zfs_fsync_sync_cnt = 4;
-
 static int
 zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
 {
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
-	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
-
 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
@@ -6213,7 +6218,8 @@ zfs_putapage(vnode_t *vp, page_t **pp, int count, int flags)
 		    B_TRUE);
 		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		ASSERT0(err);
-		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
+		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len,
+		    B_FALSE);
 	}
 	dmu_tx_commit(tx);
 
diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/zvol.c b/external/cddl/osnet/dist/uts/common/fs/zfs/zvol.c
index 93ed199585fc..27f6e7dd76aa 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zvol.c
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zvol.c
@@ -1604,7 +1604,7 @@ SYSCTL_LONG(_vfs_zfs_vol, OID_AUTO, immediate_write_sz, CTLFLAG_RWTUN,
 
 static void
 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
-    boolean_t sync)
+    boolean_t commit)
 {
 	uint32_t blocksize = zv->zv_volblocksize;
 	zilog_t *zilog = zv->zv_zilog;
@@ -1613,15 +1613,16 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
 	if (zil_replaying(zilog, tx))
 		return;
 
-	if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+	if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) {
 		write_state = WR_INDIRECT;
-	else if (!spa_has_slogs(zilog->zl_spa) &&
-	    resid >= blocksize && blocksize > zvol_immediate_write_sz)
+	} else if (!spa_has_slogs(zilog->zl_spa) &&
+	    resid >= blocksize && blocksize > zvol_immediate_write_sz) {
 		write_state = WR_INDIRECT;
-	else if (sync)
+	} else if (commit) {
 		write_state = WR_COPIED;
-	else
+	} else {
 		write_state = WR_NEED_COPY;
+	}
 
 	while (resid) {
 		itx_t *itx;
@@ -1654,9 +1655,6 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
 
 		itx->itx_private = zv;
 
-		if (!sync && (zv->zv_sync_cnt == 0))
-			itx->itx_sync = B_FALSE;
-
 		zil_itx_assign(zilog, itx, tx);
 
 		off += len;
@@ -1792,7 +1790,7 @@ zvol_strategy(buf_t *bp)
 	boolean_t doread = 0;
 #endif
 	boolean_t is_dumpified;
-	boolean_t sync;
+	boolean_t commit;
 
 #ifdef illumos
 	zfs_soft_state_t *zs = NULL;
@@ -1929,9 +1927,9 @@ zvol_strategy(buf_t *bp)
 	}
 
 	is_dumpified = B_FALSE;
-	sync = ((!(bp->b_flags & B_ASYNC) &&
+	commit = ((!(bp->b_flags & B_ASYNC) &&
 	    !(zv->zv_flags & ZVOL_WCE)) ||
-	    (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) &&
+	    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) &&
 	    !doread && !is_dumpified;
 
 	mutex_enter(&zv->zv_dklock);
@@ -1979,7 +1977,7 @@ zvol_strategy(buf_t *bp)
 				dmu_tx_abort(tx);
 			} else {
 				dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
-				zvol_log_write(zv, tx, off, size, sync);
+				zvol_log_write(zv, tx, off, size, commit);
 				dmu_tx_commit(tx);
 			}
 		}
@@ -2027,7 +2025,7 @@ out:
 	if ((bp->b_resid = resid) == bp->b_bcount)
 		bioerror(bp, off > volsize ? EINVAL : error);
 
-	if (sync)
+	if (commit)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 	mutex_enter(&zv->zv_dklock);
 	disk_unbusy(&zv->zv_dk, bp->b_bcount - bp->b_resid, doread);
@@ -2174,7 +2172,7 @@ zvol_write(struct cdev *dev, struct uio *uio, int ioflag)
 	uint64_t volsize;
 	rl_t *rl;
 	int error = 0;
-	boolean_t sync;
+	boolean_t commit;
 
 #if defined(illumos) || defined(__NetBSD__)
 	minor_t minor = getminor(dev);
@@ -2205,7 +2203,7 @@ zvol_write(struct cdev *dev, struct uio *uio, int ioflag)
 	sync = (ioflag & IO_SYNC) ||
 #endif
 #ifdef __NetBSD__
-	sync = 1 ||
+	commit = 1 ||
 #endif
 	    (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
 
@@ -2233,14 +2231,14 @@ zvol_write(struct cdev *dev, struct uio *uio, int ioflag)
 		}
 		error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx);
 		if (error == 0)
-			zvol_log_write(zv, tx, off, bytes, sync);
+			zvol_log_write(zv, tx, off, bytes, commit);
 		dmu_tx_commit(tx);
 
 		if (error)
 			break;
 	}
 	zfs_range_unlock(rl);
-	if (sync)
+	if (commit)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 #ifdef __NetBSD__
 	mutex_enter(&zv->zv_dklock);
