Module Name:    src
Committed By:   tkusumi
Date:           Wed Jan  1 08:56:41 UTC 2020

Modified Files:
        src/usr.sbin/fstyp: Makefile fstyp.8 fstyp.c fstyp.h
Added Files:
        src/usr.sbin/fstyp: hammer.c hammer2.c hammer2_disk.h hammer_disk.h

Log Message:
fstyp: Add HAMMER1/2 support

FreeBSD has recently imported HAMMER1/2 support from DragonFlyBSD,
so why not in NetBSD as well.

taken-from: DragonFlyBSD


To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/usr.sbin/fstyp/Makefile
cvs rdiff -u -r1.4 -r1.5 src/usr.sbin/fstyp/fstyp.8
cvs rdiff -u -r1.7 -r1.8 src/usr.sbin/fstyp/fstyp.c
cvs rdiff -u -r1.6 -r1.7 src/usr.sbin/fstyp/fstyp.h
cvs rdiff -u -r0 -r1.1 src/usr.sbin/fstyp/hammer.c \
    src/usr.sbin/fstyp/hammer2.c src/usr.sbin/fstyp/hammer2_disk.h \
    src/usr.sbin/fstyp/hammer_disk.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.sbin/fstyp/Makefile
diff -u src/usr.sbin/fstyp/Makefile:1.8 src/usr.sbin/fstyp/Makefile:1.9
--- src/usr.sbin/fstyp/Makefile:1.8	Fri Dec 27 11:15:06 2019
+++ src/usr.sbin/fstyp/Makefile	Wed Jan  1 08:56:41 2020
@@ -1,9 +1,9 @@
-#	$NetBSD: Makefile,v 1.8 2019/12/27 11:15:06 tkusumi Exp $
+#	$NetBSD: Makefile,v 1.9 2020/01/01 08:56:41 tkusumi Exp $
 
 .include <bsd.own.mk>
 
 PROG=	fstyp
-SRCS=	apfs.c cd9660.c exfat.c ext2fs.c fstyp.c hfsplus.c msdosfs.c ntfs.c ufs.c
+SRCS=	apfs.c cd9660.c exfat.c ext2fs.c fstyp.c hammer.c hammer2.c hfsplus.c msdosfs.c ntfs.c ufs.c
 
 .if (${MKZFS} != "no")
 SRCS+=	zfs.c

Index: src/usr.sbin/fstyp/fstyp.8
diff -u src/usr.sbin/fstyp/fstyp.8:1.4 src/usr.sbin/fstyp/fstyp.8:1.5
--- src/usr.sbin/fstyp/fstyp.8:1.4	Thu Nov 21 15:24:17 2019
+++ src/usr.sbin/fstyp/fstyp.8	Wed Jan  1 08:56:41 2020
@@ -1,4 +1,4 @@
-.\" $NetBSD: fstyp.8,v 1.4 2019/11/21 15:24:17 tkusumi Exp $
+.\" $NetBSD: fstyp.8,v 1.5 2020/01/01 08:56:41 tkusumi Exp $
 .\"
 .\" Copyright (c) 2017 The NetBSD Foundation, Inc.
 .\" Copyright (c) 2016 The DragonFly Project
@@ -34,7 +34,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 19, 2019
+.Dd January 1, 2020
 .Dt FSTYP 8
 .Os
 .Sh NAME
@@ -48,7 +48,7 @@
 The
 .Nm
 utility is used to determine the file system type on a given device.
-It can recognize ISO-9660, exFAT, Ext2, FAT, NTFS, and UFS file systems.
+It can recognize ISO-9660, exFAT, Ext2, FAT, NTFS, UFS, HAMMER, and HAMMER2 file systems.
 When the
 .Fl u
 flag is specified,
@@ -74,9 +74,15 @@ ntfs
 .It
 ufs
 .It
+hammer
+.It
+hammer2
+.It
 zfs
 .El
 .Pp
+Note that HAMMER filesystem consists of more than one volumes requires a path in blkdevs format.
+.Pp
 Because
 .Nm
 is built specifically to detect file system types, it differs from

Index: src/usr.sbin/fstyp/fstyp.c
diff -u src/usr.sbin/fstyp/fstyp.c:1.7 src/usr.sbin/fstyp/fstyp.c:1.8
--- src/usr.sbin/fstyp/fstyp.c:1.7	Sat Dec 28 08:22:30 2019
+++ src/usr.sbin/fstyp/fstyp.c	Wed Jan  1 08:56:41 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: fstyp.c,v 1.7 2019/12/28 08:22:30 tkusumi Exp $	*/
+/*	$NetBSD: fstyp.c,v 1.8 2020/01/01 08:56:41 tkusumi Exp $	*/
 
 /*-
  * Copyright (c) 2017 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  *
  */
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: fstyp.c,v 1.7 2019/12/28 08:22:30 tkusumi Exp $");
+__RCSID("$NetBSD: fstyp.c,v 1.8 2020/01/01 08:56:41 tkusumi Exp $");
 
 #include <sys/disklabel.h>
 #include <sys/dkio.h>
@@ -55,7 +55,7 @@ __RCSID("$NetBSD: fstyp.c,v 1.7 2019/12/
 
 #include "fstyp.h"
 
-#define	LABEL_LEN	256
+#define	LABEL_LEN	512
 
 bool show_label = false;
 
@@ -76,6 +76,8 @@ static struct {
 	{ "msdosfs", &fstyp_msdosfs, false, NULL },
 	{ "ntfs", &fstyp_ntfs, false, NTFS_ENC },
 	{ "ufs", &fstyp_ufs, false, NULL },
+	{ "hammer", &fstyp_hammer, false, NULL },
+	{ "hammer2", &fstyp_hammer2, false, NULL },
 #ifdef HAVE_ZFS
 	{ "zfs", &fstyp_zfs, true, NULL },
 #endif

Index: src/usr.sbin/fstyp/fstyp.h
diff -u src/usr.sbin/fstyp/fstyp.h:1.6 src/usr.sbin/fstyp/fstyp.h:1.7
--- src/usr.sbin/fstyp/fstyp.h:1.6	Sat Dec 28 08:22:30 2019
+++ src/usr.sbin/fstyp/fstyp.h	Wed Jan  1 08:56:41 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: fstyp.h,v 1.6 2019/12/28 08:22:30 tkusumi Exp $	*/
+/*	$NetBSD: fstyp.h,v 1.7 2020/01/01 08:56:41 tkusumi Exp $	*/
 
 /*-
  * Copyright (c) 2017 The NetBSD Foundation, Inc.
@@ -41,6 +41,9 @@
 
 #include <stdbool.h>
 
+/* Undefine this on FreeBSD and NetBSD. */
+//#define HAS_DEVPATH
+
 /* The spec doesn't seem to permit UTF-16 surrogates; definitely LE. */
 #define	EXFAT_ENC	"UCS-2LE"
 /*
@@ -63,8 +66,13 @@ int	fstyp_hfsp(FILE *fp, char *label, si
 int	fstyp_msdosfs(FILE *, char *, size_t);
 int	fstyp_ntfs(FILE *, char *, size_t);
 int	fstyp_ufs(FILE *, char *, size_t);
+int	fstyp_hammer(FILE *, char *, size_t);
+int	fstyp_hammer2(FILE *, char *, size_t);
 #ifdef HAVE_ZFS
 int	fstyp_zfs(FILE *, char *, size_t);
 #endif
 
+int	fsvtyp_hammer(const char *blkdevs, char *label, size_t size);
+int	fsvtyp_hammer_partial(const char *blkdevs, char *label, size_t size);
+
 #endif /* !FSTYP_H */

Added files:

Index: src/usr.sbin/fstyp/hammer.c
diff -u /dev/null src/usr.sbin/fstyp/hammer.c:1.1
--- /dev/null	Wed Jan  1 08:56:42 2020
+++ src/usr.sbin/fstyp/hammer.c	Wed Jan  1 08:56:41 2020
@@ -0,0 +1,217 @@
+/*        $NetBSD: hammer.c,v 1.1 2020/01/01 08:56:41 tkusumi Exp $      */
+
+/*-
+ * Copyright (c) 2016-2019 The DragonFly Project
+ * Copyright (c) 2016-2019 Tomohiro Kusumi <tkus...@netbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: hammer.c,v 1.1 2020/01/01 08:56:41 tkusumi Exp $");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <assert.h>
+
+#include "fstyp.h"
+#include "hammer_disk.h"
+
+static hammer_volume_ondisk_t
+read_ondisk(FILE *fp)
+{
+	hammer_volume_ondisk_t ondisk;
+
+	ondisk = read_buf(fp, 0, sizeof(*ondisk));
+	if (ondisk == NULL)
+		err(1, "failed to read ondisk");
+
+	return (ondisk);
+}
+
+static int
+test_ondisk(const hammer_volume_ondisk_t ondisk)
+{
+	static int count = 0;
+	static hammer_uuid_t fsid, fstype;
+	static char label[64];
+
+	if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME &&
+	    ondisk->vol_signature != HAMMER_FSBUF_VOLUME_REV)
+		return (1);
+	if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO)
+		return (2);
+	if (ondisk->vol_no < 0 || ondisk->vol_no > HAMMER_MAX_VOLUMES - 1)
+		return (3);
+	if (ondisk->vol_count < 1 || ondisk->vol_count > HAMMER_MAX_VOLUMES)
+		return (4);
+
+	if (count == 0) {
+		count = ondisk->vol_count;
+		assert(count != 0);
+		memcpy(&fsid, &ondisk->vol_fsid, sizeof(fsid));
+		memcpy(&fstype, &ondisk->vol_fstype, sizeof(fstype));
+		strncpy(label, ondisk->vol_label, sizeof(label));
+	} else {
+		if (ondisk->vol_count != count)
+			return (5);
+		if (memcmp(&ondisk->vol_fsid, &fsid, sizeof(fsid)))
+			return (6);
+		if (memcmp(&ondisk->vol_fstype, &fstype, sizeof(fstype)))
+			return (7);
+		if (strncmp(ondisk->vol_label, label, sizeof(label)))
+			return (8);
+	}
+
+	return (0);
+}
+
+int
+fstyp_hammer(FILE *fp, char *label, size_t size)
+{
+	hammer_volume_ondisk_t ondisk;
+	int error = 1;
+#ifdef HAS_DEVPATH
+	const char *p;
+#endif
+	ondisk = read_ondisk(fp);
+	if (ondisk->vol_no != HAMMER_ROOT_VOLNO)
+		goto done;
+	if (ondisk->vol_count != 1)
+		goto done;
+	if (test_ondisk(ondisk))
+		goto done;
+
+	/*
+	 * fstyp_function in DragonFly takes an additional devpath argument
+	 * which doesn't exist in FreeBSD and NetBSD.
+	 */
+#ifdef HAS_DEVPATH
+	/* Add device name to help support multiple autofs -media mounts. */
+	p = strrchr(devpath, '/');
+	if (p) {
+		p++;
+		if (*p == 0)
+			strlcpy(label, ondisk->vol_label, size);
+		else
+			snprintf(label, size, "%s_%s", ondisk->vol_label, p);
+	} else
+		snprintf(label, size, "%s_%s", ondisk->vol_label, devpath);
+#else
+	strlcpy(label, ondisk->vol_label, size);
+#endif
+	error = 0;
+done:
+	free(ondisk);
+	return (error);
+}
+
+static int
+test_volume(const char *volpath)
+{
+	hammer_volume_ondisk_t ondisk;
+	FILE *fp;
+	int volno = -1;
+
+	if ((fp = fopen(volpath, "r")) == NULL)
+		err(1, "failed to open %s", volpath);
+
+	ondisk = read_ondisk(fp);
+	fclose(fp);
+	if (test_ondisk(ondisk))
+		goto done;
+
+	volno = ondisk->vol_no;
+done:
+	free(ondisk);
+	return (volno);
+}
+
+static int
+__fsvtyp_hammer(const char *blkdevs, char *label, size_t size, int partial)
+{
+	hammer_volume_ondisk_t ondisk;
+	FILE *fp;
+	char *dup, *p, *volpath, x[HAMMER_MAX_VOLUMES];
+	int i, volno, error = 1;
+
+	memset(x, 0, sizeof(x));
+	dup = strdup(blkdevs);
+	p = dup;
+
+	volpath = NULL;
+	while (p) {
+		volpath = p;
+		if ((p = strchr(p, ':')) != NULL)
+			*p++ = '\0';
+		if ((volno = test_volume(volpath)) == -1)
+			break;
+		x[volno]++;
+	}
+
+	if (!volpath)
+		err(1, "invalid path %s", blkdevs);
+	if ((fp = fopen(volpath, "r")) == NULL)
+		err(1, "failed to open %s", volpath);
+	ondisk = read_ondisk(fp);
+	fclose(fp);
+
+	free(dup);
+
+	if (volno == -1)
+		goto done;
+	if (partial)
+		goto success;
+
+	for (i = 0; i < HAMMER_MAX_VOLUMES; i++)
+		if (x[i] > 1)
+			goto done;
+	for (i = 0; i < HAMMER_MAX_VOLUMES; i++)
+		if (x[i] == 0)
+			break;
+	if (ondisk->vol_count != i)
+		goto done;
+	for (; i < HAMMER_MAX_VOLUMES; i++)
+		if (x[i] != 0)
+			goto done;
+success:
+	/* XXX autofs -media mount can't handle multiple mounts */
+	strlcpy(label, ondisk->vol_label, size);
+	error = 0;
+done:
+	free(ondisk);
+	return (error);
+}
+
+int
+fsvtyp_hammer(const char *blkdevs, char *label, size_t size)
+{
+	return (__fsvtyp_hammer(blkdevs, label, size, 0));
+}
+
+int
+fsvtyp_hammer_partial(const char *blkdevs, char *label, size_t size)
+{
+	return (__fsvtyp_hammer(blkdevs, label, size, 1));
+}
Index: src/usr.sbin/fstyp/hammer2.c
diff -u /dev/null src/usr.sbin/fstyp/hammer2.c:1.1
--- /dev/null	Wed Jan  1 08:56:42 2020
+++ src/usr.sbin/fstyp/hammer2.c	Wed Jan  1 08:56:41 2020
@@ -0,0 +1,319 @@
+/*        $NetBSD: hammer2.c,v 1.1 2020/01/01 08:56:41 tkusumi Exp $      */
+
+/*-
+ * Copyright (c) 2017-2019 The DragonFly Project
+ * Copyright (c) 2017-2019 Tomohiro Kusumi <tkus...@netbsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: hammer2.c,v 1.1 2020/01/01 08:56:41 tkusumi Exp $");
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <err.h>
+#include <assert.h>
+
+#include "fstyp.h"
+#include "hammer2_disk.h"
+
+static hammer2_volume_data_t*
+read_voldata(FILE *fp)
+{
+	hammer2_volume_data_t *voldata;
+
+	voldata = read_buf(fp, 0, sizeof(*voldata));
+	if (voldata == NULL)
+		err(1, "failed to read volume data");
+
+	return (voldata);
+}
+
+static int
+test_voldata(const hammer2_volume_data_t *voldata)
+{
+	if (voldata->magic != HAMMER2_VOLUME_ID_HBO &&
+	    voldata->magic != HAMMER2_VOLUME_ID_ABO)
+		return (1);
+
+	return (0);
+}
+
+static hammer2_media_data_t*
+read_media(FILE *fp, const hammer2_blockref_t *bref, size_t *media_bytes)
+{
+	hammer2_media_data_t *media;
+	hammer2_off_t io_off, io_base;
+	size_t bytes, io_bytes, boff;
+
+	bytes = (bref->data_off & HAMMER2_OFF_MASK_RADIX);
+	if (bytes)
+		bytes = (size_t)1 << bytes;
+	*media_bytes = bytes;
+
+	if (!bytes) {
+		warnx("Blockref has no data");
+		return (NULL);
+	}
+
+	io_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
+	io_base = io_off & ~(hammer2_off_t)(HAMMER2_MINIOSIZE - 1);
+	boff = io_off - io_base;
+
+	io_bytes = HAMMER2_MINIOSIZE;
+	while (io_bytes + boff < bytes)
+		io_bytes <<= 1;
+
+	if (io_bytes > sizeof(hammer2_media_data_t)) {
+		warnx("Invalid I/O bytes");
+		return (NULL);
+	}
+
+	if (fseek(fp, (long int)io_base, SEEK_SET) == -1) {
+		warnx("Failed to seek media");
+		return (NULL);
+	}
+	media = read_buf(fp, (off_t)io_base, io_bytes);
+	if (media == NULL) {
+		warnx("Failed to read media");
+		return (NULL);
+	}
+	if (boff)
+		memcpy(media, (char *)media + boff, bytes);
+
+	return (media);
+}
+
+static int
+find_pfs(FILE *fp, const hammer2_blockref_t *bref, const char *pfs, bool *res)
+{
+	hammer2_media_data_t *media;
+	hammer2_inode_data_t ipdata;
+	hammer2_blockref_t *bscan;
+	size_t bytes;
+	int i, bcount;
+
+	media = read_media(fp, bref, &bytes);
+	if (media == NULL)
+		return (-1);
+
+	switch (bref->type) {
+	case HAMMER2_BREF_TYPE_INODE:
+		ipdata = media->ipdata;
+		if (ipdata.meta.pfs_type & HAMMER2_PFSTYPE_SUPROOT) {
+			bscan = &ipdata.u.blockset.blockref[0];
+			bcount = HAMMER2_SET_COUNT;
+		} else {
+			bscan = NULL;
+			bcount = 0;
+			if (ipdata.meta.op_flags & HAMMER2_OPFLAG_PFSROOT) {
+				if (memchr(ipdata.filename, 0,
+				    sizeof(ipdata.filename))) {
+					if (!strcmp(
+					    (const char*)ipdata.filename, pfs))
+						*res = true;
+				} else {
+					if (strlen(pfs) > 0 &&
+					    !memcmp(ipdata.filename, pfs,
+					    strlen(pfs)))
+						*res = true;
+				}
+			} else
+				assert(0);
+		}
+		break;
+	case HAMMER2_BREF_TYPE_INDIRECT:
+		bscan = &media->npdata[0];
+		bcount = (int)(bytes / sizeof(hammer2_blockref_t));
+		break;
+	default:
+		bscan = NULL;
+		bcount = 0;
+		break;
+	}
+
+	for (i = 0; i < bcount; ++i) {
+		if (bscan[i].type != HAMMER2_BREF_TYPE_EMPTY) {
+			if (find_pfs(fp, &bscan[i], pfs, res) == -1) {
+				free(media);
+				return (-1);
+			}
+		}
+	}
+	free(media);
+
+	return (0);
+}
+
+static char*
+extract_device_name(const char *devpath)
+{
+	char *p, *head;
+
+	if (!devpath)
+		return NULL;
+
+	p = strdup(devpath);
+	head = p;
+
+	p = strchr(p, '@');
+	if (p)
+		*p = 0;
+
+	p = strrchr(head, '/');
+	if (p) {
+		p++;
+		if (*p == 0) {
+			free(head);
+			return NULL;
+		}
+		p = strdup(p);
+		free(head);
+		return p;
+	}
+
+	return head;
+}
+
+static int
+read_label(FILE *fp, char *label, size_t size)
+{
+	hammer2_blockref_t broot, best, *bref;
+	hammer2_media_data_t *vols[HAMMER2_NUM_VOLHDRS], *media;
+	size_t bytes;
+	bool res = false;
+	int i, best_i, error = 0;
+	const char *pfs;
+	char *devname;
+
+	best_i = -1;
+	memset(&best, 0, sizeof(best));
+
+	for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++) {
+		memset(&broot, 0, sizeof(broot));
+		broot.type = HAMMER2_BREF_TYPE_VOLUME;
+		broot.data_off = ((hammer2_off_t)i *
+		    (hammer2_off_t)HAMMER2_ZONE_BYTES64) | HAMMER2_PBUFRADIX;
+		vols[i] = read_buf(fp,
+		    (off_t)(broot.data_off & ~HAMMER2_OFF_MASK_RADIX),
+		    sizeof(*vols[i]));
+		broot.mirror_tid = vols[i]->voldata.mirror_tid;
+		if (best_i < 0 || best.mirror_tid < broot.mirror_tid) {
+			best_i = i;
+			best = broot;
+		}
+	}
+	if (best_i == -1) {
+		warnx("Failed to find best zone");
+		error = 1;
+		goto done;
+	}
+
+	bref = &vols[best_i]->voldata.sroot_blockset.blockref[0];
+	if (bref->type != HAMMER2_BREF_TYPE_INODE) {
+		warnx("Blockref type is not inode");
+		error = 1;
+		goto done;
+	}
+
+	media = read_media(fp, bref, &bytes);
+	if (media == NULL) {
+		error = 1;
+		goto done;
+	}
+
+	/*
+	 * fstyp_function in DragonFly takes an additional devpath argument
+	 * which doesn't exist in FreeBSD and NetBSD.
+	 */
+#ifdef HAS_DEVPATH
+	pfs = strchr(devpath, '@');
+	if (!pfs) {
+		assert(strlen(devpath));
+		switch (devpath[strlen(devpath) - 1]) {
+		case 'a':
+			pfs = "BOOT";
+			break;
+		case 'd':
+			pfs = "ROOT";
+			break;
+		default:
+			pfs = "DATA";
+			break;
+		}
+	} else
+		pfs++;
+
+	if (strlen(pfs) > HAMMER2_INODE_MAXNAME) {
+		error = 1;
+		goto done;
+	}
+	devname = extract_device_name(devpath);
+#else
+	pfs = "";
+	devname = extract_device_name(NULL);
+	assert(!devname);
+#endif
+
+	/* Add device name to help support multiple autofs -media mounts. */
+	if (find_pfs(fp, bref, pfs, &res) == 0 && res) {
+		if (devname)
+			snprintf(label, size, "%s_%s", pfs, devname);
+		else
+			strlcpy(label, pfs, size);
+	} else {
+		memset(label, 0, size);
+		memcpy(label, media->ipdata.filename,
+		    sizeof(media->ipdata.filename));
+		if (devname) {
+			strlcat(label, "_", size);
+			strlcat(label, devname, size);
+		}
+	}
+	if (devname)
+		free(devname);
+	free(media);
+done:
+	for (i = 0; i < HAMMER2_NUM_VOLHDRS; i++)
+		free(vols[i]);
+
+	return (error);
+}
+
+int
+fstyp_hammer2(FILE *fp, char *label, size_t size)
+{
+	hammer2_volume_data_t *voldata;
+	int error = 1;
+
+	voldata = read_voldata(fp);
+	if (test_voldata(voldata))
+		goto done;
+
+	error = read_label(fp, label, size);
+done:
+	free(voldata);
+	return (error);
+}
Index: src/usr.sbin/fstyp/hammer2_disk.h
diff -u /dev/null src/usr.sbin/fstyp/hammer2_disk.h:1.1
--- /dev/null	Wed Jan  1 08:56:42 2020
+++ src/usr.sbin/fstyp/hammer2_disk.h	Wed Jan  1 08:56:41 2020
@@ -0,0 +1,1376 @@
+/*        $NetBSD: hammer2_disk.h,v 1.1 2020/01/01 08:56:41 tkusumi Exp $      */
+
+/*
+ * Copyright (c) 2011-2019 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dil...@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrini...@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: hammer2_disk.h,v 1.1 2020/01/01 08:56:41 tkusumi Exp $");
+
+#ifndef _VFS_HAMMER2_DISK_H_
+#define _VFS_HAMMER2_DISK_H_
+
+#ifndef _SYS_UUID_H_
+#include <sys/uuid.h>
+#endif
+#if 0
+#ifndef _SYS_DMSG_H_
+#include <sys/dmsg.h>
+#endif
+#endif
+
+/*
+ * The structures below represent the on-disk media structures for the HAMMER2
+ * filesystem.  Note that all fields for on-disk structures are naturally
+ * aligned.  The host endian format is typically used - compatibility is
+ * possible if the implementation detects reversed endian and adjusts accesses
+ * accordingly.
+ *
+ * HAMMER2 primarily revolves around the directory topology:  inodes,
+ * directory entries, and block tables.  Block device buffer cache buffers
+ * are always 64KB.  Logical file buffers are typically 16KB.  All data
+ * references utilize 64-bit byte offsets.
+ *
+ * Free block management is handled independently using blocks reserved by
+ * the media topology.
+ */
+
+/*
+ * The data at the end of a file or directory may be a fragment in order
+ * to optimize storage efficiency.  The minimum fragment size is 1KB.
+ * Since allocations are in powers of 2 fragments must also be sized in
+ * powers of 2 (1024, 2048, ... 65536).
+ *
+ * For the moment the maximum allocation size is HAMMER2_PBUFSIZE (64K),
+ * which is 2^16.  Larger extents may be supported in the future.  Smaller
+ * fragments might be supported in the future (down to 64 bytes is possible),
+ * but probably will not be.
+ *
+ * A full indirect block use supports 512 x 128-byte blockrefs in a 64KB
+ * buffer.  Indirect blocks down to 1KB are supported to keep small
+ * directories small.
+ *
+ * A maximally sized file (2^64-1 bytes) requires ~6 indirect block levels
+ * using 64KB indirect blocks (128 byte refs, 512 or radix 9 per indblk).
+ *
+ *	16(datablk) + 9 + 9 + 9 + 9 + 9 + 9 = ~70.
+ *	16(datablk) + 7 + 9 + 9 + 9 + 9 + 9 = ~68.  (smaller top level indblk)
+ *
+ * The actual depth depends on copies redundancy and whether the filesystem
+ * has chosen to use a smaller indirect block size at the top level or not.
+ */
+#define HAMMER2_ALLOC_MIN	1024	/* minimum allocation size */
+#define HAMMER2_RADIX_MIN	10	/* minimum allocation size 2^N */
+#define HAMMER2_ALLOC_MAX	65536	/* maximum allocation size */
+#define HAMMER2_RADIX_MAX	16	/* maximum allocation size 2^N */
+#define HAMMER2_RADIX_KEY	64	/* number of bits in key */
+
+/*
+ * MINALLOCSIZE		- The minimum allocation size.  This can be smaller
+ *		  	  or larger than the minimum physical IO size.
+ *
+ *			  NOTE: Should not be larger than 1K since inodes
+ *				are 1K.
+ *
+ * MINIOSIZE		- The minimum IO size.  This must be less than
+ *			  or equal to HAMMER2_LBUFSIZE.
+ *
+ * HAMMER2_LBUFSIZE	- Nominal buffer size for I/O rollups.
+ *
+ * HAMMER2_PBUFSIZE	- Topological block size used by files for all
+ *			  blocks except the block straddling EOF.
+ *
+ * HAMMER2_SEGSIZE	- Allocation map segment size, typically 4MB
+ *			  (space represented by a level0 bitmap).
+ */
+
+#define HAMMER2_SEGSIZE		(1 << HAMMER2_FREEMAP_LEVEL0_RADIX)
+#define HAMMER2_SEGRADIX	HAMMER2_FREEMAP_LEVEL0_RADIX
+
+#define HAMMER2_PBUFRADIX	16	/* physical buf (1<<16) bytes */
+#define HAMMER2_PBUFSIZE	65536
+#define HAMMER2_LBUFRADIX	14	/* logical buf (1<<14) bytes */
+#define HAMMER2_LBUFSIZE	16384
+
+/*
+ * Generally speaking we want to use 16K and 64K I/Os
+ */
+#define HAMMER2_MINIORADIX	HAMMER2_LBUFRADIX
+#define HAMMER2_MINIOSIZE	HAMMER2_LBUFSIZE
+
+#define HAMMER2_IND_BYTES_MIN	4096
+#define HAMMER2_IND_BYTES_NOM	HAMMER2_LBUFSIZE
+#define HAMMER2_IND_BYTES_MAX	HAMMER2_PBUFSIZE
+#define HAMMER2_IND_RADIX_MIN	12
+#define HAMMER2_IND_RADIX_NOM	HAMMER2_LBUFRADIX
+#define HAMMER2_IND_RADIX_MAX	HAMMER2_PBUFRADIX
+#define HAMMER2_IND_COUNT_MIN	(HAMMER2_IND_BYTES_MIN / \
+				 sizeof(hammer2_blockref_t))
+#define HAMMER2_IND_COUNT_MAX	(HAMMER2_IND_BYTES_MAX / \
+				 sizeof(hammer2_blockref_t))
+
+/*
+ * In HAMMER2, arrays of blockrefs are fully set-associative, meaning that
+ * any element can occur at any index and holes can be anywhere.  As a
+ * future optimization we will be able to flag that such arrays are sorted
+ * and thus optimize lookups, but for now we don't.
+ *
+ * Inodes embed either 512 bytes of direct data or an array of 4 blockrefs,
+ * resulting in highly efficient storage for files <= 512 bytes and for files
+ * <= 512KB.  Up to 4 directory entries can be referenced from a directory
+ * without requiring an indirect block.
+ *
+ * Indirect blocks are typically either 4KB (64 blockrefs / ~4MB represented),
+ * or 64KB (1024 blockrefs / ~64MB represented).
+ */
+#define HAMMER2_SET_RADIX		2	/* radix 2 = 4 entries */
+#define HAMMER2_SET_COUNT		(1 << HAMMER2_SET_RADIX)
+#define HAMMER2_EMBEDDED_BYTES		512	/* inode blockset/dd size */
+#define HAMMER2_EMBEDDED_RADIX		9
+
+#define HAMMER2_PBUFMASK	(HAMMER2_PBUFSIZE - 1)
+#define HAMMER2_LBUFMASK	(HAMMER2_LBUFSIZE - 1)
+#define HAMMER2_SEGMASK		(HAMMER2_SEGSIZE - 1)
+
+#define HAMMER2_LBUFMASK64	((hammer2_off_t)HAMMER2_LBUFMASK)
+#define HAMMER2_PBUFSIZE64	((hammer2_off_t)HAMMER2_PBUFSIZE)
+#define HAMMER2_PBUFMASK64	((hammer2_off_t)HAMMER2_PBUFMASK)
+#define HAMMER2_SEGSIZE64	((hammer2_off_t)HAMMER2_SEGSIZE)
+#define HAMMER2_SEGMASK64	((hammer2_off_t)HAMMER2_SEGMASK)
+
+#define HAMMER2_UUID_STRING	"5cbb9ad1-862d-11dc-a94d-01301bb8a9f5"
+
+/*
+ * A 4MB segment is reserved at the beginning of each 2GB zone.  This segment
+ * contains the volume header (or backup volume header), the free block
+ * table, and possibly other information in the future.  A 4MB segment for
+ * freemap is reserved at the beginning of every 1GB.
+ *
+ * 4MB = 64 x 64K blocks.  Each 4MB segment is broken down as follows:
+ *
+ * ==========
+ *  0 volume header (for the first four 2GB zones)
+ *  1 freemap00 level1 FREEMAP_LEAF (256 x 128B bitmap data per 1GB)
+ *  2           level2 FREEMAP_NODE (256 x 128B indirect block per 256GB)
+ *  3           level3 FREEMAP_NODE (256 x 128B indirect block per 64TB)
+ *  4           level4 FREEMAP_NODE (256 x 128B indirect block per 16PB)
+ *  5           level5 FREEMAP_NODE (256 x 128B indirect block per 4EB)
+ *  6 freemap01 level1 (rotation)
+ *  7           level2
+ *  8           level3
+ *  9           level4
+ * 10           level5
+ * 11 freemap02 level1 (rotation)
+ * 12           level2
+ * 13           level3
+ * 14           level4
+ * 15           level5
+ * 16 freemap03 level1 (rotation)
+ * 17           level2
+ * 18           level3
+ * 19           level4
+ * 20           level5
+ * 21 freemap04 level1 (rotation)
+ * 22           level2
+ * 23           level3
+ * 24           level4
+ * 25           level5
+ * 26 freemap05 level1 (rotation)
+ * 27           level2
+ * 28           level3
+ * 29           level4
+ * 30           level5
+ * 31 freemap06 level1 (rotation)
+ * 32           level2
+ * 33           level3
+ * 34           level4
+ * 35           level5
+ * 36 freemap07 level1 (rotation)
+ * 37           level2
+ * 38           level3
+ * 39           level4
+ * 40           level5
+ * 41 unused
+ * .. unused
+ * 63 unused
+ * ==========
+ *
+ * The first four 2GB zones contain volume headers and volume header backups.
+ * After that the volume header block# is reserved for future use.  Similarly,
+ * there are many blocks related to various Freemap levels which are not
+ * used in every segment and those are also reserved for future use.
+ * Note that each FREEMAP_LEAF or FREEMAP_NODE uses 32KB out of 64KB slot.
+ *
+ *			Freemap (see the FREEMAP document)
+ *
+ * The freemap utilizes blocks #1-40 in 8 sets of 5 blocks.  Each block in
+ * a set represents a level of depth in the freemap topology.  Eight sets
+ * exist to prevent live updates from disturbing the state of the freemap
+ * were a crash/reboot to occur.  That is, a live update is not committed
+ * until the update's flush reaches the volume root.  There are FOUR volume
+ * roots representing the last four synchronization points, so the freemap
+ * must be consistent no matter which volume root is chosen by the mount
+ * code.
+ *
+ * Each freemap set is 5 x 64K blocks and represents the 1GB, 256GB, 64TB,
+ * 16PB and 4EB indirect map.  The volume header itself has a set of 4 freemap
+ * blockrefs representing another 2 bits, giving us a total 64 bits of
+ * representable address space.
+ *
+ * The Level 0 64KB block represents 1GB of storage represented by 32KB
+ * (256 x struct hammer2_bmap_data).  Each structure represents 4MB of storage
+ * and has a 512 bit bitmap, using 2 bits to represent a 16KB chunk of
+ * storage.  These 2 bits represent the following states:
+ *
+ *	00	Free
+ *	01	(reserved) (Possibly partially allocated)
+ *	10	Possibly free
+ *	11	Allocated
+ *
+ * One important thing to note here is that the freemap resolution is 16KB,
+ * but the minimum storage allocation size is 1KB.  The hammer2 vfs keeps
+ * track of sub-allocations in memory, which means that on a unmount or reboot
+ * the entire 16KB of a partially allocated block will be considered fully
+ * allocated.  It is possible for fragmentation to build up over time, but
+ * defragmentation is fairly easy to accomplish since all modifications
+ * allocate a new block.
+ *
+ * The Second thing to note is that due to the way snapshots and inode
+ * replication works, deleting a file cannot immediately free the related
+ * space.  Furthermore, deletions often do not bother to traverse the
+ * block subhierarchy being deleted.  And to go even further, whole
+ * sub-directory trees can be deleted simply by deleting the directory inode
+ * at the top.  So even though we have a symbol to represent a 'possibly free'
+ * block (binary 10), only the bulk free scanning code can actually use it.
+ * Normal 'rm's or other deletions do not.
+ *
+ * WARNING!  ZONE_SEG and VOLUME_ALIGN must be a multiple of 1<<LEVEL0_RADIX
+ *	     (i.e. a multiple of 4MB).  VOLUME_ALIGN must be >= ZONE_SEG.
+ *
+ * In Summary:
+ *
+ * (1) Modifications to freemap blocks 'allocate' a new copy (aka use a block
+ *     from the next set).  The new copy is reused until a flush occurs at
+ *     which point the next modification will then rotate to the next set.
+ */
+#define HAMMER2_VOLUME_ALIGN		(8 * 1024 * 1024)
+#define HAMMER2_VOLUME_ALIGN64		((hammer2_off_t)HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_VOLUME_ALIGNMASK	(HAMMER2_VOLUME_ALIGN - 1)
+#define HAMMER2_VOLUME_ALIGNMASK64     ((hammer2_off_t)HAMMER2_VOLUME_ALIGNMASK)
+
+#define HAMMER2_NEWFS_ALIGN		(HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_NEWFS_ALIGN64		((hammer2_off_t)HAMMER2_VOLUME_ALIGN)
+#define HAMMER2_NEWFS_ALIGNMASK		(HAMMER2_VOLUME_ALIGN - 1)
+#define HAMMER2_NEWFS_ALIGNMASK64	((hammer2_off_t)HAMMER2_NEWFS_ALIGNMASK)
+
+#define HAMMER2_ZONE_BYTES64		(2LLU * 1024 * 1024 * 1024)
+#define HAMMER2_ZONE_MASK64		(HAMMER2_ZONE_BYTES64 - 1)
+#define HAMMER2_ZONE_SEG		(4 * 1024 * 1024)
+#define HAMMER2_ZONE_SEG64		((hammer2_off_t)HAMMER2_ZONE_SEG)
+#define HAMMER2_ZONE_BLOCKS_SEG		(HAMMER2_ZONE_SEG / HAMMER2_PBUFSIZE)
+
+#define HAMMER2_ZONE_FREEMAP_INC	5	/* 5 deep */
+
+#define HAMMER2_ZONE_VOLHDR		0	/* volume header or backup */
+#define HAMMER2_ZONE_FREEMAP_00		1	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_01		6	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_02		11	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_03		16	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_04		21	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_05		26	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_06		31	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_07		36	/* normal freemap rotation */
+#define HAMMER2_ZONE_FREEMAP_END	41	/* (non-inclusive) */
+
+#define HAMMER2_ZONE_UNUSED41		41
+#define HAMMER2_ZONE_UNUSED42		42
+#define HAMMER2_ZONE_UNUSED43		43
+#define HAMMER2_ZONE_UNUSED44		44
+#define HAMMER2_ZONE_UNUSED45		45
+#define HAMMER2_ZONE_UNUSED46		46
+#define HAMMER2_ZONE_UNUSED47		47
+#define HAMMER2_ZONE_UNUSED48		48
+#define HAMMER2_ZONE_UNUSED49		49
+#define HAMMER2_ZONE_UNUSED50		50
+#define HAMMER2_ZONE_UNUSED51		51
+#define HAMMER2_ZONE_UNUSED52		52
+#define HAMMER2_ZONE_UNUSED53		53
+#define HAMMER2_ZONE_UNUSED54		54
+#define HAMMER2_ZONE_UNUSED55		55
+#define HAMMER2_ZONE_UNUSED56		56
+#define HAMMER2_ZONE_UNUSED57		57
+#define HAMMER2_ZONE_UNUSED58		58
+#define HAMMER2_ZONE_UNUSED59		59
+#define HAMMER2_ZONE_UNUSED60		60
+#define HAMMER2_ZONE_UNUSED61		61
+#define HAMMER2_ZONE_UNUSED62		62
+#define HAMMER2_ZONE_UNUSED63		63
+#define HAMMER2_ZONE_END		64	/* non-inclusive */
+
+#define HAMMER2_NFREEMAPS		8	/* FREEMAP_00 - FREEMAP_07 */
+
+						/* relative to FREEMAP_x */
+#define HAMMER2_ZONEFM_LEVEL1		0	/* 1GB leafmap */
+#define HAMMER2_ZONEFM_LEVEL2		1	/* 256GB indmap */
+#define HAMMER2_ZONEFM_LEVEL3		2	/* 64TB indmap */
+#define HAMMER2_ZONEFM_LEVEL4		3	/* 16PB indmap */
+#define HAMMER2_ZONEFM_LEVEL5		4	/* 4EB indmap */
+/* LEVEL6 is a set of 4 blockrefs in the volume header 16EB */
+
+/*
+ * Freemap radix.  Assumes a set-count of 4, 128-byte blockrefs,
+ * 32KB indirect block for freemap (LEVELN_PSIZE below).
+ *
+ * Leaf entry represents 4MB of storage broken down into a 512-bit
+ * bitmap, 2-bits per entry.  So course bitmap item represents 16KB.
+ */
+#if HAMMER2_SET_COUNT != 4
+#error "hammer2_disk.h - freemap assumes SET_COUNT is 4"
+#endif
+#define HAMMER2_FREEMAP_LEVEL6_RADIX	64	/* 16EB (end) */
+#define HAMMER2_FREEMAP_LEVEL5_RADIX	62	/* 4EB */
+#define HAMMER2_FREEMAP_LEVEL4_RADIX	54	/* 16PB */
+#define HAMMER2_FREEMAP_LEVEL3_RADIX	46	/* 64TB */
+#define HAMMER2_FREEMAP_LEVEL2_RADIX	38	/* 256GB */
+#define HAMMER2_FREEMAP_LEVEL1_RADIX	30	/* 1GB */
+#define HAMMER2_FREEMAP_LEVEL0_RADIX	22	/* 4MB (128by in l-1 leaf) */
+
+#define HAMMER2_FREEMAP_LEVELN_PSIZE	32768	/* physical bytes */
+
+#define HAMMER2_FREEMAP_LEVEL5_SIZE	((hammer2_off_t)1 <<		\
+					 HAMMER2_FREEMAP_LEVEL5_RADIX)
+#define HAMMER2_FREEMAP_LEVEL4_SIZE	((hammer2_off_t)1 <<		\
+					 HAMMER2_FREEMAP_LEVEL4_RADIX)
+#define HAMMER2_FREEMAP_LEVEL3_SIZE	((hammer2_off_t)1 <<		\
+					 HAMMER2_FREEMAP_LEVEL3_RADIX)
+#define HAMMER2_FREEMAP_LEVEL2_SIZE	((hammer2_off_t)1 <<		\
+					 HAMMER2_FREEMAP_LEVEL2_RADIX)
+#define HAMMER2_FREEMAP_LEVEL1_SIZE	((hammer2_off_t)1 <<		\
+					 HAMMER2_FREEMAP_LEVEL1_RADIX)
+#define HAMMER2_FREEMAP_LEVEL0_SIZE	((hammer2_off_t)1 <<		\
+					 HAMMER2_FREEMAP_LEVEL0_RADIX)
+
+#define HAMMER2_FREEMAP_LEVEL5_MASK	(HAMMER2_FREEMAP_LEVEL5_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL4_MASK	(HAMMER2_FREEMAP_LEVEL4_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL3_MASK	(HAMMER2_FREEMAP_LEVEL3_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL2_MASK	(HAMMER2_FREEMAP_LEVEL2_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL1_MASK	(HAMMER2_FREEMAP_LEVEL1_SIZE - 1)
+#define HAMMER2_FREEMAP_LEVEL0_MASK	(HAMMER2_FREEMAP_LEVEL0_SIZE - 1)
+
+#define HAMMER2_FREEMAP_COUNT		(int)(HAMMER2_FREEMAP_LEVELN_PSIZE / \
+					 sizeof(hammer2_bmap_data_t))
+
+/*
+ * XXX I made a mistake and made the reserved area begin at each LEVEL1 zone,
+ *     which is on a 1GB demark.  This will eat a little more space but for
+ *     now we retain compatibility and make FMZONEBASE every 1GB
+ */
+#define H2FMZONEBASE(key)	((key) & ~HAMMER2_FREEMAP_LEVEL1_MASK)
+#define H2FMBASE(key, radix)	rounddown2(key, (hammer2_off_t)1 << (radix))
+
+/*
+ * 16KB bitmap granularity (x2 bits per entry).
+ */
+#define HAMMER2_FREEMAP_BLOCK_RADIX	14
+#define HAMMER2_FREEMAP_BLOCK_SIZE	(1 << HAMMER2_FREEMAP_BLOCK_RADIX)
+#define HAMMER2_FREEMAP_BLOCK_MASK	(HAMMER2_FREEMAP_BLOCK_SIZE - 1)
+
+/*
+ * bitmap[] structure.  2 bits per HAMMER2_FREEMAP_BLOCK_SIZE.
+ *
+ * 8 x 64-bit elements, 2 bits per block.
+ * 32 blocks (radix 5) per element.
+ * representing INDEX_SIZE bytes worth of storage per element.
+ */
+
+typedef uint64_t			hammer2_bitmap_t;
+
+#define HAMMER2_BMAP_ALLONES		((hammer2_bitmap_t)-1)
+#define HAMMER2_BMAP_ELEMENTS		8
+#define HAMMER2_BMAP_BITS_PER_ELEMENT	64
+#define HAMMER2_BMAP_INDEX_RADIX	5	/* 32 blocks per element */
+#define HAMMER2_BMAP_BLOCKS_PER_ELEMENT	(1 << HAMMER2_BMAP_INDEX_RADIX)
+
+#define HAMMER2_BMAP_INDEX_SIZE		(HAMMER2_FREEMAP_BLOCK_SIZE * \
+					 HAMMER2_BMAP_BLOCKS_PER_ELEMENT)
+#define HAMMER2_BMAP_INDEX_MASK		(HAMMER2_BMAP_INDEX_SIZE - 1)
+
+#define HAMMER2_BMAP_SIZE		(HAMMER2_BMAP_INDEX_SIZE * \
+					 HAMMER2_BMAP_ELEMENTS)
+#define HAMMER2_BMAP_MASK		(HAMMER2_BMAP_SIZE - 1)
+
+/*
+ * Two linear areas can be reserved after the initial 4MB segment in the base
+ * zone (the one starting at offset 0).  These areas are NOT managed by the
+ * block allocator and do not fall under HAMMER2 crc checking rules based
+ * at the volume header (but can be self-CRCd internally, depending).
+ */
+#define HAMMER2_BOOT_MIN_BYTES		HAMMER2_VOLUME_ALIGN
+#define HAMMER2_BOOT_NOM_BYTES		(64*1024*1024)
+#define HAMMER2_BOOT_MAX_BYTES		(256*1024*1024)
+
+#define HAMMER2_REDO_MIN_BYTES		HAMMER2_VOLUME_ALIGN
+#define HAMMER2_REDO_NOM_BYTES		(256*1024*1024)
+#define HAMMER2_REDO_MAX_BYTES		(1024*1024*1024)
+
+/*
+ * Most HAMMER2 types are implemented as unsigned 64-bit integers.
+ * Transaction ids are monotonic.
+ *
+ * We utilize 32-bit iSCSI CRCs.
+ */
+typedef uint64_t hammer2_tid_t;
+typedef uint64_t hammer2_off_t;
+typedef uint64_t hammer2_key_t;
+typedef uint32_t hammer2_crc32_t;
+
+/*
+ * Miscellanious ranges (all are unsigned).
+ */
+#define HAMMER2_TID_MIN		1ULL
+#define HAMMER2_TID_MAX		0xFFFFFFFFFFFFFFFFULL
+#define HAMMER2_KEY_MIN		0ULL
+#define HAMMER2_KEY_MAX		0xFFFFFFFFFFFFFFFFULL
+#define HAMMER2_OFFSET_MIN	0ULL
+#define HAMMER2_OFFSET_MAX	0xFFFFFFFFFFFFFFFFULL
+
+/*
+ * HAMMER2 data offset special cases and masking.
+ *
+ * All HAMMER2 data offsets have to be broken down into a 64K buffer base
+ * offset (HAMMER2_OFF_MASK_HI) and a 64K buffer index (HAMMER2_OFF_MASK_LO).
+ *
+ * Indexes into physical buffers are always 64-byte aligned.  The low 6 bits
+ * of the data offset field specifies how large the data chunk being pointed
+ * to as a power of 2.  The theoretical minimum radix is thus 6 (The space
+ * needed in the low bits of the data offset field).  However, the practical
+ * minimum allocation chunk size is 1KB (a radix of 10), so HAMMER2 sets
+ * HAMMER2_RADIX_MIN to 10.  The maximum radix is currently 16 (64KB), but
+ * we fully intend to support larger extents in the future.
+ *
+ * WARNING! A radix of 0 (such as when data_off is all 0's) is a special
+ *	    case which means no data associated with the blockref, and
+ *	    not the '1 byte' it would otherwise calculate to.
+ */
+#define HAMMER2_OFF_BAD		((hammer2_off_t)-1)
+#define HAMMER2_OFF_MASK	0xFFFFFFFFFFFFFFC0ULL
+#define HAMMER2_OFF_MASK_LO	(HAMMER2_OFF_MASK & HAMMER2_PBUFMASK64)
+#define HAMMER2_OFF_MASK_HI	(~HAMMER2_PBUFMASK64)
+#define HAMMER2_OFF_MASK_RADIX	0x000000000000003FULL
+#define HAMMER2_MAX_COPIES	6
+
+/*
+ * HAMMER2 directory support and pre-defined keys
+ */
+#define HAMMER2_DIRHASH_VISIBLE	0x8000000000000000ULL
+#define HAMMER2_DIRHASH_USERMSK	0x7FFFFFFFFFFFFFFFULL
+#define HAMMER2_DIRHASH_LOMASK	0x0000000000007FFFULL
+#define HAMMER2_DIRHASH_HIMASK	0xFFFFFFFFFFFF0000ULL
+#define HAMMER2_DIRHASH_FORCED	0x0000000000008000ULL	/* bit forced on */
+
+#define HAMMER2_SROOT_KEY	0x0000000000000000ULL	/* volume to sroot */
+#define HAMMER2_BOOT_KEY	0xd9b36ce135528000ULL	/* sroot to BOOT PFS */
+
+/************************************************************************
+ *				DMSG SUPPORT				*
+ ************************************************************************
+ * LNK_VOLCONF
+ *
+ * All HAMMER2 directories directly under the super-root on your local
+ * media can be mounted separately, even if they share the same physical
+ * device.
+ *
+ * When you do a HAMMER2 mount you are effectively tying into a HAMMER2
+ * cluster via local media.  The local media does not have to participate
+ * in the cluster, other than to provide the hammer2_volconf[] array and
+ * root inode for the mount.
+ *
+ * This is important: The mount device path you specify serves to bootstrap
+ * your entry into the cluster, but your mount will make active connections
+ * to ALL copy elements in the hammer2_volconf[] array which match the
+ * PFSID of the directory in the super-root that you specified.  The local
+ * media path does not have to be mentioned in this array but becomes part
+ * of the cluster based on its type and access rights.  ALL ELEMENTS ARE
+ * TREATED ACCORDING TO TYPE NO MATTER WHICH ONE YOU MOUNT FROM.
+ *
+ * The actual cluster may be far larger than the elements you list in the
+ * hammer2_volconf[] array.  You list only the elements you wish to
+ * directly connect to and you are able to access the rest of the cluster
+ * indirectly through those connections.
+ *
+ * WARNING!  This structure must be exactly 128 bytes long for its config
+ *	     array to fit in the volume header.
+ */
+struct hammer2_volconf {
+	uint8_t	copyid;		/* 00	 copyid 0-255 (must match slot) */
+	uint8_t inprog;		/* 01	 operation in progress, or 0 */
+	uint8_t chain_to;	/* 02	 operation chaining to, or 0 */
+	uint8_t chain_from;	/* 03	 operation chaining from, or 0 */
+	uint16_t flags;		/* 04-05 flags field */
+	uint8_t error;		/* 06	 last operational error */
+	uint8_t priority;	/* 07	 priority and round-robin flag */
+	uint8_t remote_pfs_type;/* 08	 probed direct remote PFS type */
+	uint8_t reserved08[23];	/* 09-1F */
+	uuid_t	pfs_clid;	/* 20-2F copy target must match this uuid */
+	uint8_t label[16];	/* 30-3F import/export label */
+	uint8_t path[64];	/* 40-7F target specification string or key */
+} __packed;
+
+typedef struct hammer2_volconf hammer2_volconf_t;
+
+#define DMSG_VOLF_ENABLED	0x0001
+#define DMSG_VOLF_INPROG	0x0002
+#define DMSG_VOLF_CONN_RR	0x80	/* round-robin at same priority */
+#define DMSG_VOLF_CONN_EF	0x40	/* media errors flagged */
+#define DMSG_VOLF_CONN_PRI	0x0F	/* select priority 0-15 (15=best) */
+
+#if 0
+struct dmsg_lnk_hammer2_volconf {
+	dmsg_hdr_t		head;
+	hammer2_volconf_t	copy;	/* copy spec */
+	int32_t			index;
+	int32_t			unused01;
+	uuid_t			mediaid;
+	int64_t			reserved02[32];
+} __packed;
+#endif
+
+typedef struct dmsg_lnk_hammer2_volconf dmsg_lnk_hammer2_volconf_t;
+
+#define DMSG_LNK_HAMMER2_VOLCONF DMSG_LNK(DMSG_LNK_CMD_HAMMER2_VOLCONF, \
+					  dmsg_lnk_hammer2_volconf)
+
+#define H2_LNK_VOLCONF(msg)	((dmsg_lnk_hammer2_volconf_t *)(msg)->any.buf)
+
+/*
+ * HAMMER2 directory entry header (embedded in blockref)  exactly 16 bytes
+ */
+struct hammer2_dirent_head {
+	hammer2_tid_t		inum;		/* inode number */
+	uint16_t		namlen;		/* name length */
+	uint8_t			type;		/* OBJTYPE_*	*/
+	uint8_t			unused0B;
+	uint8_t			unused0C[4];
+} __packed;
+
+typedef struct hammer2_dirent_head hammer2_dirent_head_t;
+
+/*
+ * The media block reference structure.  This forms the core of the HAMMER2
+ * media topology recursion.  This 128-byte data structure is embedded in the
+ * volume header, in inodes (which are also directory entries), and in
+ * indirect blocks.
+ *
+ * A blockref references a single media item, which typically can be a
+ * directory entry (aka inode), indirect block, or data block.
+ *
+ * The primary feature a blockref represents is the ability to validate
+ * the entire tree underneath it via its check code.  Any modification to
+ * anything propagates up the blockref tree all the way to the root, replacing
+ * the related blocks and compounding the generated check code.
+ *
+ * The check code can be a simple 32-bit iscsi code, a 64-bit crc, or as
+ * complex as a 512 bit cryptographic hash.  I originally used a 64-byte
+ * blockref but later expanded it to 128 bytes to be able to support the
+ * larger check code as well as to embed statistics for quota operation.
+ *
+ * Simple check codes are not sufficient for unverified dedup.  Even with
+ * a maximally-sized check code unverified dedup should only be used in
+ * in subdirectory trees where you do not need 100% data integrity.
+ *
+ * Unverified dedup is deduping based on meta-data only without verifying
+ * that the data blocks are actually identical.  Verified dedup guarantees
+ * integrity but is a far more I/O-expensive operation.
+ *
+ * --
+ *
+ * mirror_tid - per cluster node modified (propagated upward by flush)
+ * modify_tid - clc record modified (not propagated).
+ * update_tid - clc record updated (propagated upward on verification)
+ *
+ * CLC - Stands for 'Cluster Level Change', identifiers which are identical
+ *	 within the topology across all cluster nodes (when fully
+ *	 synchronized).
+ *
+ * NOTE: The range of keys represented by the blockref is (key) to
+ *	 ((key) + (1LL << keybits) - 1).  HAMMER2 usually populates
+ *	 blocks bottom-up, inserting a new root when radix expansion
+ *	 is required.
+ *
+ * leaf_count  - Helps manage leaf collapse calculations when indirect
+ *		 blocks become mostly empty.  This value caps out at
+ *		 HAMMER2_BLOCKREF_LEAF_MAX (65535).
+ *
+ *		 Used by the chain code to determine when to pull leafs up
+ *		 from nearly empty indirect blocks.  For the purposes of this
+ *		 calculation, BREF_TYPE_INODE is considered a leaf, along
+ *		 with DIRENT and DATA.
+ *
+ *				    RESERVED FIELDS
+ *
+ * A number of blockref fields are reserved and should generally be set to
+ * 0 for future compatibility.
+ *
+ *				FUTURE BLOCKREF EXPANSION
+ *
+ * CONTENT ADDRESSABLE INDEXING (future) - Using a 256 or 512-bit check code.
+ */
+struct hammer2_blockref {		/* MUST BE EXACTLY 64 BYTES */
+	uint8_t		type;		/* type of underlying item */
+	uint8_t		methods;	/* check method & compression method */
+	uint8_t		copyid;		/* specify which copy this is */
+	uint8_t		keybits;	/* #of keybits masked off 0=leaf */
+	uint8_t		vradix;		/* virtual data/meta-data size */
+	uint8_t		flags;		/* blockref flags */
+	uint16_t	leaf_count;	/* leaf aggregation count */
+	hammer2_key_t	key;		/* key specification */
+	hammer2_tid_t	mirror_tid;	/* media flush topology & freemap */
+	hammer2_tid_t	modify_tid;	/* clc modify (not propagated) */
+	hammer2_off_t	data_off;	/* low 6 bits is phys size (radix)*/
+	hammer2_tid_t	update_tid;	/* clc modify (propagated upward) */
+	union {
+		char	buf[16];
+
+		/*
+		 * Directory entry header (BREF_TYPE_DIRENT)
+		 *
+		 * NOTE: check.buf contains filename if <= 64 bytes.  Longer
+		 *	 filenames are stored in a data reference of size
+		 *	 HAMMER2_ALLOC_MIN (at least 256, typically 1024).
+		 *
+		 * NOTE: inode structure may contain a copy of a recently
+		 *	 associated filename, for recovery purposes.
+		 *
+		 * NOTE: Superroot entries are INODEs, not DIRENTs.  Code
+		 *	 allows both cases.
+		 */
+		hammer2_dirent_head_t dirent;
+
+		/*
+		 * Statistics aggregation (BREF_TYPE_INODE, BREF_TYPE_INDIRECT)
+		 */
+		struct {
+			hammer2_key_t	data_count;
+			hammer2_key_t	inode_count;
+		} stats;
+	} embed;
+	union {				/* check info */
+		char	buf[64];
+		struct {
+			uint32_t value;
+			uint32_t reserved[15];
+		} iscsi32;
+		struct {
+			uint64_t value;
+			uint64_t reserved[7];
+		} xxhash64;
+		struct {
+			char data[24];
+			char reserved[40];
+		} sha192;
+		struct {
+			char data[32];
+			char reserved[32];
+		} sha256;
+		struct {
+			char data[64];
+		} sha512;
+
+		/*
+		 * Freemap hints are embedded in addition to the icrc32.
+		 *
+		 * bigmask - Radixes available for allocation (0-31).
+		 *	     Heuristical (may be permissive but not
+		 *	     restrictive).  Typically only radix values
+		 *	     10-16 are used (i.e. (1<<10) through (1<<16)).
+		 *
+		 * avail   - Total available space remaining, in bytes
+		 */
+		struct {
+			uint32_t icrc32;
+			uint32_t bigmask;	/* available radixes */
+			uint64_t avail;		/* total available bytes */
+			char reserved[48];
+		} freemap;
+	} check;
+} __packed;
+
+typedef struct hammer2_blockref hammer2_blockref_t;
+
+#define HAMMER2_BLOCKREF_BYTES		128	/* blockref struct in bytes */
+#define HAMMER2_BLOCKREF_RADIX		7
+
+#define HAMMER2_BLOCKREF_LEAF_MAX	65535
+
+/*
+ * On-media and off-media blockref types.
+ *
+ * types >= 128 are pseudo values that should never be present on-media.
+ */
+#define HAMMER2_BREF_TYPE_EMPTY		0
+#define HAMMER2_BREF_TYPE_INODE		1
+#define HAMMER2_BREF_TYPE_INDIRECT	2
+#define HAMMER2_BREF_TYPE_DATA		3
+#define HAMMER2_BREF_TYPE_DIRENT	4
+#define HAMMER2_BREF_TYPE_FREEMAP_NODE	5
+#define HAMMER2_BREF_TYPE_FREEMAP_LEAF	6
+#define HAMMER2_BREF_TYPE_FREEMAP	254	/* pseudo-type */
+#define HAMMER2_BREF_TYPE_VOLUME	255	/* pseudo-type */
+
+#define HAMMER2_BREF_FLAG_PFSROOT	0x01	/* see also related opflag */
+#define HAMMER2_BREF_FLAG_ZERO		0x02
+#define HAMMER2_BREF_FLAG_EMERG_MIP	0x04	/* emerg modified-in-place */
+
+/*
+ * Encode/decode check mode and compression mode for
+ * bref.methods.  The compression level is not encoded in
+ * bref.methods.
+ */
+#define HAMMER2_ENC_CHECK(n)		(((n) & 15) << 4)
+#define HAMMER2_DEC_CHECK(n)		(((n) >> 4) & 15)
+#define HAMMER2_ENC_COMP(n)		((n) & 15)
+#define HAMMER2_DEC_COMP(n)		((n) & 15)
+
+#define HAMMER2_CHECK_NONE		0
+#define HAMMER2_CHECK_DISABLED		1
+#define HAMMER2_CHECK_ISCSI32		2
+#define HAMMER2_CHECK_XXHASH64		3
+#define HAMMER2_CHECK_SHA192		4
+#define HAMMER2_CHECK_FREEMAP		5
+
+#define HAMMER2_CHECK_DEFAULT		HAMMER2_CHECK_XXHASH64
+
+/* user-specifiable check modes only */
+#define HAMMER2_CHECK_STRINGS		{ "none", "disabled", "crc32", \
+					  "xxhash64", "sha192" }
+#define HAMMER2_CHECK_STRINGS_COUNT	5
+
+/*
+ * Encode/decode check or compression algorithm request in
+ * ipdata->meta.check_algo and ipdata->meta.comp_algo.
+ */
+#define HAMMER2_ENC_ALGO(n)		(n)
+#define HAMMER2_DEC_ALGO(n)		((n) & 15)
+#define HAMMER2_ENC_LEVEL(n)		((n) << 4)
+#define HAMMER2_DEC_LEVEL(n)		(((n) >> 4) & 15)
+
+#define HAMMER2_COMP_NONE		0
+#define HAMMER2_COMP_AUTOZERO		1
+#define HAMMER2_COMP_LZ4		2
+#define HAMMER2_COMP_ZLIB		3
+
+#define HAMMER2_COMP_NEWFS_DEFAULT	HAMMER2_COMP_LZ4
+#define HAMMER2_COMP_STRINGS		{ "none", "autozero", "lz4", "zlib" }
+#define HAMMER2_COMP_STRINGS_COUNT	4
+
+/*
+ * Passed to hammer2_chain_create(), causes methods to be inherited from
+ * parent.
+ */
+#define HAMMER2_METH_DEFAULT		-1
+
+/*
+ * HAMMER2 block references are collected into sets of 4 blockrefs.  These
+ * sets are fully associative, meaning the elements making up a set are
+ * not sorted in any way and may contain duplicate entries, holes, or
+ * entries which shortcut multiple levels of indirection.  Sets are used
+ * in various ways:
+ *
+ * (1) When redundancy is desired a set may contain several duplicate
+ *     entries pointing to different copies of the same data.  Up to 4 copies
+ *     are supported.
+ *
+ * (2) The blockrefs in a set can shortcut multiple levels of indirections
+ *     within the bounds imposed by the parent of set.
+ *
+ * When a set fills up another level of indirection is inserted, moving
+ * some or all of the set's contents into indirect blocks placed under the
+ * set.  This is a top-down approach in that indirect blocks are not created
+ * until the set actually becomes full (that is, the entries in the set can
+ * shortcut the indirect blocks when the set is not full).  Depending on how
+ * things are filled multiple indirect blocks will eventually be created.
+ *
+ * Indirect blocks are typically 4KB (64 entres) or 64KB (1024 entries) and
+ * are also treated as fully set-associative.
+ */
+struct hammer2_blockset {
+	hammer2_blockref_t	blockref[HAMMER2_SET_COUNT];
+};
+
+typedef struct hammer2_blockset hammer2_blockset_t;
+
+/*
+ * Catch programmer snafus
+ */
+#if (1 << HAMMER2_SET_RADIX) != HAMMER2_SET_COUNT
+#error "hammer2 direct radix is incorrect"
+#endif
+#if (1 << HAMMER2_PBUFRADIX) != HAMMER2_PBUFSIZE
+#error "HAMMER2_PBUFRADIX and HAMMER2_PBUFSIZE are inconsistent"
+#endif
+#if (1 << HAMMER2_RADIX_MIN) != HAMMER2_ALLOC_MIN
+#error "HAMMER2_RADIX_MIN and HAMMER2_ALLOC_MIN are inconsistent"
+#endif
+
+/*
+ * hammer2_bmap_data - A freemap entry in the LEVEL1 block.
+ *
+ * Each 128-byte entry contains the bitmap and meta-data required to manage
+ * a LEVEL0 (4MB) block of storage.  The storage is managed in 256 x 16KB
+ * chunks.
+ *
+ * A smaller allocation granularity is supported via a linear iterator and/or
+ * must otherwise be tracked in ram.
+ *
+ * (data structure must be 128 bytes exactly)
+ *
+ * linear  - A BYTE linear allocation offset used for sub-16KB allocations
+ *	     only.  May contain values between 0 and 4MB.  Must be ignored
+ *	     if 16KB-aligned (i.e. force bitmap scan), otherwise may be
+ *	     used to sub-allocate within the 16KB block (which is already
+ *	     marked as allocated in the bitmap).
+ *
+ *	     Sub-allocations need only be 1KB-aligned and do not have to be
+ *	     size-aligned, and 16KB or larger allocations do not update this
+ *	     field, resulting in pretty good packing.
+ *
+ *	     Please note that file data granularity may be limited by
+ *	     other issues such as buffer cache direct-mapping and the
+ *	     desire to support sector sizes up to 16KB (so H2 only issues
+ *	     I/O's in multiples of 16KB anyway).
+ *
+ * class   - Clustering class.  Cleared to 0 only if the entire leaf becomes
+ *	     free.  Used to cluster device buffers so all elements must have
+ *	     the same device block size, but may mix logical sizes.
+ *
+ *	     Typically integrated with the blockref type in the upper 8 bits
+ *	     to localize inodes and indrect blocks, improving bulk free scans
+ *	     and directory scans.
+ *
+ * bitmap  - Two bits per 16KB allocation block arranged in arrays of
+ *	     64-bit elements, 256x2 bits representing ~4MB worth of media
+ *	     storage.  Bit patterns are as follows:
+ *
+ *	     00	Unallocated
+ *	     01 (reserved)
+ *	     10 Possibly free
+ *           11 Allocated
+ */
+struct hammer2_bmap_data {
+	int32_t linear;		/* 00 linear sub-granular allocation offset */
+	uint16_t class;		/* 04-05 clustering class ((type<<8)|radix) */
+	uint8_t reserved06;	/* 06 */
+	uint8_t reserved07;	/* 07 */
+	uint32_t reserved08;	/* 08 */
+	uint32_t reserved0C;	/* 0C */
+	uint32_t reserved10;	/* 10 */
+	uint32_t reserved14;	/* 14 */
+	uint32_t reserved18;	/* 18 */
+	uint32_t avail;		/* 1C */
+	uint32_t reserved20[8];	/* 20-3F 256 bits manages 128K/1KB/2-bits */
+				/* 40-7F 512 bits manages 4MB of storage */
+	hammer2_bitmap_t bitmapq[HAMMER2_BMAP_ELEMENTS];
+} __packed;
+
+typedef struct hammer2_bmap_data hammer2_bmap_data_t;
+
+/*
+ * XXX "Inodes ARE directory entries" is no longer the case.  Hardlinks are
+ * dirents which refer to the same inode#, which is how filesystems usually
+ * implement hardlink.  The following comments need to be updated.
+ *
+ * In HAMMER2 inodes ARE directory entries, with a special exception for
+ * hardlinks.  The inode number is stored in the inode rather than being
+ * based on the location of the inode (since the location moves every time
+ * the inode or anything underneath the inode is modified).
+ *
+ * The inode is 1024 bytes, made up of 256 bytes of meta-data, 256 bytes
+ * for the filename, and 512 bytes worth of direct file data OR an embedded
+ * blockset.  The in-memory hammer2_inode structure contains only the mostly-
+ * node-independent meta-data portion (some flags are node-specific and will
+ * not be synchronized).  The rest of the inode is node-specific and chain I/O
+ * is required to obtain it.
+ *
+ * Directories represent one inode per blockref.  Inodes are not laid out
+ * as a file but instead are represented by the related blockrefs.  The
+ * blockrefs, in turn, are indexed by the 64-bit directory hash key.  Remember
+ * that blocksets are fully associative, so a certain degree efficiency is
+ * achieved just from that.
+ *
+ * Up to 512 bytes of direct data can be embedded in an inode, and since
+ * inodes are essentially directory entries this also means that small data
+ * files end up simply being laid out linearly in the directory, resulting
+ * in fewer seeks and highly optimal access.
+ *
+ * The compression mode can be changed at any time in the inode and is
+ * recorded on a blockref-by-blockref basis.
+ *
+ * Hardlinks are supported via the inode map.  Essentially the way a hardlink
+ * works is that all individual directory entries representing the same file
+ * are special cased and specify the same inode number.  The actual file
+ * is placed in the nearest parent directory that is parent to all instances
+ * of the hardlink.  If all hardlinks to a file are in the same directory
+ * the actual file will also be placed in that directory.  This file uses
+ * the inode number as the directory entry key and is invisible to normal
+ * directory scans.  Real directory entry keys are differentiated from the
+ * inode number key via bit 63.  Access to the hardlink silently looks up
+ * the real file and forwards all operations to that file.  Removal of the
+ * last hardlink also removes the real file.
+ */
+#define HAMMER2_INODE_BYTES		1024	/* (asserted by code) */
+#define HAMMER2_INODE_MAXNAME		256	/* maximum name in bytes */
+#define HAMMER2_INODE_VERSION_ONE	1
+
+#define HAMMER2_INODE_START		1024	/* dynamically allocated */
+
+struct hammer2_inode_meta {
+	uint16_t	version;	/* 0000 inode data version */
+	uint8_t		reserved02;	/* 0002 */
+	uint8_t		pfs_subtype;	/* 0003 pfs sub-type */
+
+	/*
+	 * core inode attributes, inode type, misc flags
+	 */
+	uint32_t	uflags;		/* 0004 chflags */
+	uint32_t	rmajor;		/* 0008 available for device nodes */
+	uint32_t	rminor;		/* 000C available for device nodes */
+	uint64_t	ctime;		/* 0010 inode change time */
+	uint64_t	mtime;		/* 0018 modified time */
+	uint64_t	atime;		/* 0020 access time (unsupported) */
+	uint64_t	btime;		/* 0028 birth time */
+	uuid_t		uid;		/* 0030 uid / degenerate unix uid */
+	uuid_t		gid;		/* 0040 gid / degenerate unix gid */
+
+	uint8_t		type;		/* 0050 object type */
+	uint8_t		op_flags;	/* 0051 operational flags */
+	uint16_t	cap_flags;	/* 0052 capability flags */
+	uint32_t	mode;		/* 0054 unix modes (typ low 16 bits) */
+
+	/*
+	 * inode size, identification, localized recursive configuration
+	 * for compression and backup copies.
+	 *
+	 * NOTE: Nominal parent inode number (iparent) is only applicable
+	 *	 for directories but can also help for files during
+	 *	 catastrophic recovery.
+	 */
+	hammer2_tid_t	inum;		/* 0058 inode number */
+	hammer2_off_t	size;		/* 0060 size of file */
+	uint64_t	nlinks;		/* 0068 hard links (typ only dirs) */
+	hammer2_tid_t	iparent;	/* 0070 nominal parent inum */
+	hammer2_key_t	name_key;	/* 0078 full filename key */
+	uint16_t	name_len;	/* 0080 filename length */
+	uint8_t		ncopies;	/* 0082 ncopies to local media */
+	uint8_t		comp_algo;	/* 0083 compression request & algo */
+
+	/*
+	 * These fields are currently only applicable to PFSROOTs.
+	 *
+	 * NOTE: We can't use {volume_data->fsid, pfs_clid} to uniquely
+	 *	 identify an instance of a PFS in the cluster because
+	 *	 a mount may contain more than one copy of the PFS as
+	 *	 a separate node.  {pfs_clid, pfs_fsid} must be used for
+	 *	 registration in the cluster.
+	 */
+	uint8_t		target_type;	/* 0084 hardlink target type */
+	uint8_t		check_algo;	/* 0085 check code request & algo */
+	uint8_t		pfs_nmasters;	/* 0086 (if PFSROOT) if multi-master */
+	uint8_t		pfs_type;	/* 0087 (if PFSROOT) node type */
+	hammer2_tid_t	pfs_inum;	/* 0088 (if PFSROOT) inum allocator */
+	uuid_t		pfs_clid;	/* 0090 (if PFSROOT) cluster uuid */
+	uuid_t		pfs_fsid;	/* 00A0 (if PFSROOT) unique uuid */
+
+	/*
+	 * Quotas and aggregate sub-tree inode and data counters.  Note that
+	 * quotas are not replicated downward, they are explicitly set by
+	 * the sysop and in-memory structures keep track of inheritance.
+	 */
+	hammer2_key_t	data_quota;	/* 00B0 subtree quota in bytes */
+	hammer2_key_t	unusedB8;	/* 00B8 subtree byte count */
+	hammer2_key_t	inode_quota;	/* 00C0 subtree quota inode count */
+	hammer2_key_t	unusedC8;	/* 00C8 subtree inode count */
+
+	/*
+	 * The last snapshot tid is tested against modify_tid to determine
+	 * when a copy must be made of a data block whos check mode has been
+	 * disabled (a disabled check mode allows data blocks to be updated
+	 * in place instead of copy-on-write).
+	 */
+	hammer2_tid_t	pfs_lsnap_tid;	/* 00D0 last snapshot tid */
+	hammer2_tid_t	reservedD8;	/* 00D8 (avail) */
+
+	/*
+	 * Tracks (possibly degenerate) free areas covering all sub-tree
+	 * allocations under inode, not counting the inode itself.
+	 * 0/0 indicates empty entry.  fully set-associative.
+	 *
+	 * (not yet implemented)
+	 */
+	uint64_t	decrypt_check;	/* 00E0 decryption validator */
+	hammer2_off_t	reservedE0[3];	/* 00E8/F0/F8 */
+} __packed;
+
+typedef struct hammer2_inode_meta hammer2_inode_meta_t;
+
+struct hammer2_inode_data {
+	hammer2_inode_meta_t	meta;	/* 0000-00FF */
+	unsigned char	filename[HAMMER2_INODE_MAXNAME];
+					/* 0100-01FF (256 char, unterminated) */
+	union {				/* 0200-03FF (64x8 = 512 bytes) */
+		hammer2_blockset_t blockset;
+		char data[HAMMER2_EMBEDDED_BYTES];
+	} u;
+} __packed;
+
+typedef struct hammer2_inode_data hammer2_inode_data_t;
+
+#define HAMMER2_OPFLAG_DIRECTDATA	0x01
+#define HAMMER2_OPFLAG_PFSROOT		0x02	/* (see also bref flag) */
+#define HAMMER2_OPFLAG_COPYIDS		0x04	/* copyids override parent */
+
+#define HAMMER2_OBJTYPE_UNKNOWN		0
+#define HAMMER2_OBJTYPE_DIRECTORY	1
+#define HAMMER2_OBJTYPE_REGFILE		2
+#define HAMMER2_OBJTYPE_FIFO		4
+#define HAMMER2_OBJTYPE_CDEV		5
+#define HAMMER2_OBJTYPE_BDEV		6
+#define HAMMER2_OBJTYPE_SOFTLINK	7
+#define HAMMER2_OBJTYPE_UNUSED08	8
+#define HAMMER2_OBJTYPE_SOCKET		9
+#define HAMMER2_OBJTYPE_WHITEOUT	10
+
+#define HAMMER2_COPYID_NONE		0
+#define HAMMER2_COPYID_LOCAL		((uint8_t)-1)
+
+#define HAMMER2_COPYID_COUNT		256
+
+/*
+ * PFS types identify the role of a PFS within a cluster.  The PFS types
+ * is stored on media and in LNK_SPAN messages and used in other places.
+ *
+ * The low 4 bits specify the current active type while the high 4 bits
+ * specify the transition target if the PFS is being upgraded or downgraded,
+ * If the upper 4 bits are not zero it may effect how a PFS is used during
+ * the transition.
+ *
+ * Generally speaking, downgrading a MASTER to a SLAVE cannot complete until
+ * at least all MASTERs have updated their pfs_nmasters field.  And upgrading
+ * a SLAVE to a MASTER cannot complete until the new prospective master has
+ * been fully synchronized (though theoretically full synchronization is
+ * not required if a (new) quorum of other masters are fully synchronized).
+ *
+ * It generally does not matter which PFS element you actually mount, you
+ * are mounting 'the cluster'.  So, for example, a network mount will mount
+ * a DUMMY PFS type on a memory filesystem.  However, there are two exceptions.
+ * In order to gain the benefits of a SOFT_MASTER or SOFT_SLAVE, those PFSs
+ * must be directly mounted.
+ */
+#define HAMMER2_PFSTYPE_NONE		0x00
+#define HAMMER2_PFSTYPE_CACHE		0x01
+#define HAMMER2_PFSTYPE_UNUSED02	0x02
+#define HAMMER2_PFSTYPE_SLAVE		0x03
+#define HAMMER2_PFSTYPE_SOFT_SLAVE	0x04
+#define HAMMER2_PFSTYPE_SOFT_MASTER	0x05
+#define HAMMER2_PFSTYPE_MASTER		0x06
+#define HAMMER2_PFSTYPE_UNUSED07	0x07
+#define HAMMER2_PFSTYPE_SUPROOT		0x08
+#define HAMMER2_PFSTYPE_DUMMY		0x09
+#define HAMMER2_PFSTYPE_MAX		16
+
+#define HAMMER2_PFSTRAN_NONE		0x00	/* no transition in progress */
+#define HAMMER2_PFSTRAN_CACHE		0x10
+#define HAMMER2_PFSTRAN_UNMUSED20	0x20
+#define HAMMER2_PFSTRAN_SLAVE		0x30
+#define HAMMER2_PFSTRAN_SOFT_SLAVE	0x40
+#define HAMMER2_PFSTRAN_SOFT_MASTER	0x50
+#define HAMMER2_PFSTRAN_MASTER		0x60
+#define HAMMER2_PFSTRAN_UNUSED70	0x70
+#define HAMMER2_PFSTRAN_SUPROOT		0x80
+#define HAMMER2_PFSTRAN_DUMMY		0x90
+
+#define HAMMER2_PFS_DEC(n)		((n) & 0x0F)
+#define HAMMER2_PFS_DEC_TRANSITION(n)	(((n) >> 4) & 0x0F)
+#define HAMMER2_PFS_ENC_TRANSITION(n)	(((n) & 0x0F) << 4)
+
+#define HAMMER2_PFSSUBTYPE_NONE		0
+#define HAMMER2_PFSSUBTYPE_SNAPSHOT	1	/* manual/managed snapshot */
+#define HAMMER2_PFSSUBTYPE_AUTOSNAP	2	/* automatic snapshot */
+
+/*
+ * PFS mode of operation is a bitmask.  This is typically not stored
+ * on-media, but defined here because the field may be used in dmsgs.
+ */
+#define HAMMER2_PFSMODE_QUORUM		0x01
+#define HAMMER2_PFSMODE_RW		0x02
+
+/*
+ *				Allocation Table
+ *
+ */
+
+
+/*
+ * Flags (8 bits) - blockref, for freemap only
+ *
+ * Note that the minimum chunk size is 1KB so we could theoretically have
+ * 10 bits here, but we might have some future extension that allows a
+ * chunk size down to 256 bytes and if so we will need bits 8 and 9.
+ */
+#define HAMMER2_AVF_SELMASK		0x03	/* select group */
+#define HAMMER2_AVF_ALL_ALLOC		0x04	/* indicate all allocated */
+#define HAMMER2_AVF_ALL_FREE		0x08	/* indicate all free */
+#define HAMMER2_AVF_RESERVED10		0x10
+#define HAMMER2_AVF_RESERVED20		0x20
+#define HAMMER2_AVF_RESERVED40		0x40
+#define HAMMER2_AVF_RESERVED80		0x80
+#define HAMMER2_AVF_AVMASK32		((uint32_t)0xFFFFFF00LU)
+#define HAMMER2_AVF_AVMASK64		((uint64_t)0xFFFFFFFFFFFFFF00LLU)
+
+#define HAMMER2_AV_SELECT_A		0x00
+#define HAMMER2_AV_SELECT_B		0x01
+#define HAMMER2_AV_SELECT_C		0x02
+#define HAMMER2_AV_SELECT_D		0x03
+
+/*
+ * The volume header eats a 64K block.  There is currently an issue where
+ * we want to try to fit all nominal filesystem updates in a 512-byte section
+ * but it may be a lost cause due to the need for a blockset.
+ *
+ * All information is stored in host byte order.  The volume header's magic
+ * number may be checked to determine the byte order.  If you wish to mount
+ * between machines w/ different endian modes you'll need filesystem code
+ * which acts on the media data consistently (either all one way or all the
+ * other).  Our code currently does not do that.
+ *
+ * A read-write mount may have to recover missing allocations by doing an
+ * incremental mirror scan looking for modifications made after alloc_tid.
+ * If alloc_tid == last_tid then no recovery operation is needed.  Recovery
+ * operations are usually very, very fast.
+ *
+ * Read-only mounts do not need to do any recovery, access to the filesystem
+ * topology is always consistent after a crash (is always consistent, period).
+ * However, there may be shortcutted blockref updates present from deep in
+ * the tree which are stored in the volumeh eader and must be tracked on
+ * the fly.
+ *
+ * NOTE: The copyinfo[] array contains the configuration for both the
+ *	 cluster connections and any local media copies.  The volume
+ *	 header will be replicated for each local media copy.
+ *
+ *	 The mount command may specify multiple medias or just one and
+ *	 allow HAMMER2 to pick up the others when it checks the copyinfo[]
+ *	 array on mount.
+ *
+ * NOTE: root_blockref points to the super-root directory, not the root
+ *	 directory.  The root directory will be a subdirectory under the
+ *	 super-root.
+ *
+ *	 The super-root directory contains all root directories and all
+ *	 snapshots (readonly or writable).  It is possible to do a
+ *	 null-mount of the super-root using special path constructions
+ *	 relative to your mounted root.
+ *
+ * NOTE: HAMMER2 allows any subdirectory tree to be managed as if it were
+ *	 a PFS, including mirroring and storage quota operations, and this is
+ *	 prefered over creating discrete PFSs in the super-root.  Instead
+ *	 the super-root is most typically used to create writable snapshots,
+ *	 alternative roots, and so forth.  The super-root is also used by
+ *	 the automatic snapshotting mechanism.
+ */
+#define HAMMER2_VOLUME_ID_HBO	0x48414d3205172011LLU
+#define HAMMER2_VOLUME_ID_ABO	0x11201705324d4148LLU
+
+struct hammer2_volume_data {
+	/*
+	 * sector #0 - 512 bytes
+	 */
+	uint64_t	magic;			/* 0000 Signature */
+	hammer2_off_t	boot_beg;		/* 0008 Boot area (future) */
+	hammer2_off_t	boot_end;		/* 0010 (size = end - beg) */
+	hammer2_off_t	aux_beg;		/* 0018 Aux area (future) */
+	hammer2_off_t	aux_end;		/* 0020 (size = end - beg) */
+	hammer2_off_t	volu_size;		/* 0028 Volume size, bytes */
+
+	uint32_t	version;		/* 0030 */
+	uint32_t	flags;			/* 0034 */
+	uint8_t		copyid;			/* 0038 copyid of phys vol */
+	uint8_t		freemap_version;	/* 0039 freemap algorithm */
+	uint8_t		peer_type;		/* 003A HAMMER2_PEER_xxx */
+	uint8_t		reserved003B;		/* 003B */
+	uint32_t	reserved003C;		/* 003C */
+
+	uuid_t		fsid;			/* 0040 */
+	uuid_t		fstype;			/* 0050 */
+
+	/*
+	 * allocator_size is precalculated at newfs time and does not include
+	 * reserved blocks, boot, or redo areas.
+	 *
+	 * Initial non-reserved-area allocations do not use the freemap
+	 * but instead adjust alloc_iterator.  Dynamic allocations take
+	 * over starting at (allocator_beg).  This makes newfs_hammer2's
+	 * job a lot easier and can also serve as a testing jig.
+	 */
+	hammer2_off_t	allocator_size;		/* 0060 Total data space */
+	hammer2_off_t   allocator_free;		/* 0068	Free space */
+	hammer2_off_t	allocator_beg;		/* 0070 Initial allocations */
+
+	/*
+	 * mirror_tid reflects the highest committed change for this
+	 * block device regardless of whether it is to the super-root
+	 * or to a PFS or whatever.
+	 *
+	 * freemap_tid reflects the highest committed freemap change for
+	 * this block device.
+	 */
+	hammer2_tid_t	mirror_tid;		/* 0078 committed tid (vol) */
+	hammer2_tid_t	reserved0080;		/* 0080 */
+	hammer2_tid_t	reserved0088;		/* 0088 */
+	hammer2_tid_t	freemap_tid;		/* 0090 committed tid (fmap) */
+	hammer2_tid_t	bulkfree_tid;		/* 0098 bulkfree incremental */
+	hammer2_tid_t	reserved00A0[5];	/* 00A0-00C7 */
+
+	/*
+	 * Copyids are allocated dynamically from the copyexists bitmap.
+	 * An id from the active copies set (up to 8, see copyinfo later on)
+	 * may still exist after the copy set has been removed from the
+	 * volume header and its bit will remain active in the bitmap and
+	 * cannot be reused until it is 100% removed from the hierarchy.
+	 */
+	uint32_t	copyexists[8];		/* 00C8-00E7 copy exists bmap */
+	char		reserved0140[248];	/* 00E8-01DF */
+
+	/*
+	 * 32 bit CRC array at the end of the first 512 byte sector.
+	 *
+	 * icrc_sects[7] - First 512-4 bytes of volume header (including all
+	 *		   the other icrc's except this one).
+	 *
+	 * icrc_sects[6] - Sector 1 (512 bytes) of volume header, which is
+	 *		   the blockset for the root.
+	 *
+	 * icrc_sects[5] - Sector 2
+	 * icrc_sects[4] - Sector 3
+	 * icrc_sects[3] - Sector 4 (the freemap blockset)
+	 */
+	hammer2_crc32_t	icrc_sects[8];		/* 01E0-01FF */
+
+	/*
+	 * sector #1 - 512 bytes
+	 *
+	 * The entire sector is used by a blockset.
+	 */
+	hammer2_blockset_t sroot_blockset;	/* 0200-03FF Superroot dir */
+
+	/*
+	 * sector #2-7
+	 */
+	char	sector2[512];			/* 0400-05FF reserved */
+	char	sector3[512];			/* 0600-07FF reserved */
+	hammer2_blockset_t freemap_blockset;	/* 0800-09FF freemap  */
+	char	sector5[512];			/* 0A00-0BFF reserved */
+	char	sector6[512];			/* 0C00-0DFF reserved */
+	char	sector7[512];			/* 0E00-0FFF reserved */
+
+	/*
+	 * sector #8-71	- 32768 bytes
+	 *
+	 * Contains the configuration for up to 256 copyinfo targets.  These
+	 * specify local and remote copies operating as masters or slaves.
+	 * copyid's 0 and 255 are reserved (0 indicates an empty slot and 255
+	 * indicates the local media).
+	 *
+	 * Each inode contains a set of up to 8 copyids, either inherited
+	 * from its parent or explicitly specified in the inode, which
+	 * indexes into this array.
+	 */
+						/* 1000-8FFF copyinfo config */
+	hammer2_volconf_t copyinfo[HAMMER2_COPYID_COUNT];
+
+	/*
+	 * Remaining sections are reserved for future use.
+	 */
+	char		reserved0400[0x6FFC];	/* 9000-FFFB reserved */
+
+	/*
+	 * icrc on entire volume header
+	 */
+	hammer2_crc32_t	icrc_volheader;		/* FFFC-FFFF full volume icrc*/
+} __packed;
+
+typedef struct hammer2_volume_data hammer2_volume_data_t;
+
+/*
+ * Various parts of the volume header have their own iCRCs.
+ *
+ * The first 512 bytes has its own iCRC stored at the end of the 512 bytes
+ * and not included the icrc calculation.
+ *
+ * The second 512 bytes also has its own iCRC but it is stored in the first
+ * 512 bytes so it covers the entire second 512 bytes.
+ *
+ * The whole volume block (64KB) has an iCRC covering all but the last 4 bytes,
+ * which is where the iCRC for the whole volume is stored.  This is currently
+ * a catch-all for anything not individually iCRCd.
+ */
+#define HAMMER2_VOL_ICRC_SECT0		7
+#define HAMMER2_VOL_ICRC_SECT1		6
+
+#define HAMMER2_VOLUME_BYTES		65536
+
+#define HAMMER2_VOLUME_ICRC0_OFF	0
+#define HAMMER2_VOLUME_ICRC1_OFF	512
+#define HAMMER2_VOLUME_ICRCVH_OFF	0
+
+#define HAMMER2_VOLUME_ICRC0_SIZE	(512 - 4)
+#define HAMMER2_VOLUME_ICRC1_SIZE	(512)
+#define HAMMER2_VOLUME_ICRCVH_SIZE	(65536 - 4)
+
+#define HAMMER2_VOL_VERSION_MIN		1
+#define HAMMER2_VOL_VERSION_DEFAULT	1
+#define HAMMER2_VOL_VERSION_WIP 	2
+
+#define HAMMER2_NUM_VOLHDRS		4
+
+union hammer2_media_data {
+	hammer2_volume_data_t	voldata;
+        hammer2_inode_data_t    ipdata;
+	hammer2_blockset_t	blkset;
+	hammer2_blockref_t	npdata[HAMMER2_IND_COUNT_MAX];
+	hammer2_bmap_data_t	bmdata[HAMMER2_FREEMAP_COUNT];
+	char			buf[HAMMER2_PBUFSIZE];
+} __packed;
+
+typedef union hammer2_media_data hammer2_media_data_t;
+
+#endif /* !_VFS_HAMMER2_DISK_H_ */
Index: src/usr.sbin/fstyp/hammer_disk.h
diff -u /dev/null src/usr.sbin/fstyp/hammer_disk.h:1.1
--- /dev/null	Wed Jan  1 08:56:42 2020
+++ src/usr.sbin/fstyp/hammer_disk.h	Wed Jan  1 08:56:41 2020
@@ -0,0 +1,1092 @@
+/*        $NetBSD: hammer_disk.h,v 1.1 2020/01/01 08:56:41 tkusumi Exp $      */
+
+/*
+ * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dil...@backplane.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.55 2008/11/13 02:18:43 dillon Exp $
+ */
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: hammer_disk.h,v 1.1 2020/01/01 08:56:41 tkusumi Exp $");
+
+#ifndef VFS_HAMMER_DISK_H_
+#define VFS_HAMMER_DISK_H_
+
+#include <sys/endian.h>
+
+#ifndef _SYS_UUID_H_
+#include <sys/uuid.h>
+#endif
+
+/*
+ * The structures below represent the on-disk format for a HAMMER
+ * filesystem.  Note that all fields for on-disk structures are naturally
+ * aligned.  HAMMER uses little endian for fields in on-disk structures.
+ * HAMMER doesn't support big endian arch, but is planned.
+ *
+ * Most of HAMMER revolves around the concept of an object identifier.  An
+ * obj_id is a 64 bit quantity which uniquely identifies a filesystem object
+ * FOR THE ENTIRE LIFE OF THE FILESYSTEM.  This uniqueness allows backups
+ * and mirrors to retain varying amounts of filesystem history by removing
+ * any possibility of conflict through identifier reuse.
+ *
+ * A HAMMER filesystem may span multiple volumes.
+ *
+ * A HAMMER filesystem uses a 16K filesystem buffer size.  All filesystem
+ * I/O is done in multiples of 16K.
+ *
+ * 64K X-bufs are used for blocks >= a file's 1MB mark.
+ *
+ * Per-volume storage limit: 52 bits		4096 TB
+ * Per-Zone storage limit: 60 bits		1 MTB
+ * Per-filesystem storage limit: 60 bits	1 MTB
+ */
+#define HAMMER_BUFSIZE		16384
+#define HAMMER_XBUFSIZE		65536
+#define HAMMER_HBUFSIZE		(HAMMER_BUFSIZE / 2)
+#define HAMMER_XDEMARC		(1024 * 1024)
+#define HAMMER_BUFMASK		(HAMMER_BUFSIZE - 1)
+#define HAMMER_XBUFMASK		(HAMMER_XBUFSIZE - 1)
+
+#define HAMMER_BUFSIZE64	((uint64_t)HAMMER_BUFSIZE)
+#define HAMMER_BUFMASK64	((uint64_t)HAMMER_BUFMASK)
+
+#define HAMMER_XBUFSIZE64	((uint64_t)HAMMER_XBUFSIZE)
+#define HAMMER_XBUFMASK64	((uint64_t)HAMMER_XBUFMASK)
+
+#define HAMMER_OFF_ZONE_MASK	0xF000000000000000ULL /* zone portion */
+#define HAMMER_OFF_VOL_MASK	0x0FF0000000000000ULL /* volume portion */
+#define HAMMER_OFF_SHORT_MASK	0x000FFFFFFFFFFFFFULL /* offset portion */
+#define HAMMER_OFF_LONG_MASK	0x0FFFFFFFFFFFFFFFULL /* offset portion */
+
+#define HAMMER_OFF_BAD		((hammer_off_t)-1)
+
+#define HAMMER_BUFSIZE_DOALIGN(offset)				\
+	(((offset) + HAMMER_BUFMASK) & ~HAMMER_BUFMASK)
+#define HAMMER_BUFSIZE64_DOALIGN(offset)			\
+	(((offset) + HAMMER_BUFMASK64) & ~HAMMER_BUFMASK64)
+
+#define HAMMER_XBUFSIZE_DOALIGN(offset)				\
+	(((offset) + HAMMER_XBUFMASK) & ~HAMMER_XBUFMASK)
+#define HAMMER_XBUFSIZE64_DOALIGN(offset)			\
+	(((offset) + HAMMER_XBUFMASK64) & ~HAMMER_XBUFMASK64)
+
+/*
+ * The current limit of volumes that can make up a HAMMER FS
+ */
+#define HAMMER_MAX_VOLUMES	256
+
+/*
+ * Reserved space for (future) header junk after the volume header.
+ */
+#define HAMMER_MIN_VOL_JUNK	(HAMMER_BUFSIZE * 16)	/* 256 KB */
+#define HAMMER_MAX_VOL_JUNK	HAMMER_MIN_VOL_JUNK
+#define HAMMER_VOL_JUNK_SIZE	HAMMER_MIN_VOL_JUNK
+
+/*
+ * Hammer transaction ids are 64 bit unsigned integers and are usually
+ * synchronized with the time of day in nanoseconds.
+ *
+ * Hammer offsets are used for FIFO indexing and embed a cycle counter
+ * and volume number in addition to the offset.  Most offsets are required
+ * to be 16 KB aligned.
+ */
+typedef uint64_t hammer_tid_t;
+typedef uint64_t hammer_off_t;
+typedef uint32_t hammer_crc_t;
+typedef uuid_t hammer_uuid_t;
+
+#define HAMMER_MIN_TID		0ULL			/* unsigned */
+#define HAMMER_MAX_TID		0xFFFFFFFFFFFFFFFFULL	/* unsigned */
+#define HAMMER_MIN_KEY		-0x8000000000000000LL	/* signed */
+#define HAMMER_MAX_KEY		0x7FFFFFFFFFFFFFFFLL	/* signed */
+#define HAMMER_MIN_OBJID	HAMMER_MIN_KEY		/* signed */
+#define HAMMER_MAX_OBJID	HAMMER_MAX_KEY		/* signed */
+#define HAMMER_MIN_RECTYPE	0x0U			/* unsigned */
+#define HAMMER_MAX_RECTYPE	0xFFFFU			/* unsigned */
+#define HAMMER_MIN_OFFSET	0ULL			/* unsigned */
+#define HAMMER_MAX_OFFSET	0xFFFFFFFFFFFFFFFFULL	/* unsigned */
+
+/*
+ * hammer_off_t has several different encodings.  Note that not all zones
+ * encode a vol_no.  Zone bits are not a part of filesystem capacity as
+ * the zone bits aren't directly or indirectly mapped to physical volumes.
+ *
+ * In other words, HAMMER's logical filesystem offset consists of 64 bits,
+ * but the filesystem is considered 60 bits filesystem, not 64 bits.
+ * The maximum filesystem capacity is 1EB, not 16EB.
+ *
+ * zone 0:		available, a big-block that contains the offset is unused
+ * zone 1 (z,v,o):	raw volume relative (offset 0 is the volume header)
+ * zone 2 (z,v,o):	raw buffer relative (offset 0 is the first buffer)
+ * zone 3 (z,o):	undo/redo fifo	- fixed zone-2 offset array in volume header
+ * zone 4 (z,v,o):	freemap		- only real blockmap
+ * zone 8 (z,v,o):	B-Tree		- actually zone-2 address
+ * zone 9 (z,v,o):	meta		- actually zone-2 address
+ * zone 10 (z,v,o):	large-data	- actually zone-2 address
+ * zone 11 (z,v,o):	small-data	- actually zone-2 address
+ * zone 15:		unavailable, usually the offset is beyond volume size
+ *
+ * layer1/layer2 direct map:
+ *	     Maximum HAMMER filesystem capacity from volume aspect
+ *	     2^8(max volumes) * 2^52(max volume size) = 2^60 = 1EB (long offset)
+ *	    <------------------------------------------------------------->
+ *	     8bits   52bits (short offset)
+ *	    <------><----------------------------------------------------->
+ *	zzzzvvvvvvvvoooo oooooooooooooooo oooooooooooooooo oooooooooooooooo
+ *	----111111111111 1111112222222222 222222222ooooooo oooooooooooooooo
+ *	    <-----------------><------------------><---------------------->
+ *	     18bits             19bits              23bits
+ *	    <------------------------------------------------------------->
+ *	     2^18(layer1) * 2^19(layer2) * 2^23(big-block) = 2^60 = 1EB
+ *	     Maximum HAMMER filesystem capacity from blockmap aspect
+ *
+ * volume#0 layout
+ *	+-------------------------> offset 0 of a device/partition
+ *	| volume header (1928 bytes)
+ *	| the rest of header junk space (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> vol_bot_beg
+ *	| boot area (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> vol_mem_beg
+ *	| memory log (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> vol_buf_beg (physical offset of zone-2)
+ *	| zone-4 big-block for layer1
+ *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE
+ *	| zone-4 big-blocks for layer2
+ *	| ... (1 big-block per 4TB space)
+ *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ *	| zone-3 big-blocks for UNDO/REDO FIFO
+ *	| ... (max 128 big-blocks)
+ *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ *	| zone-8 big-block for root B-Tree node/etc
+ *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ *	| zone-9 big-block for root inode/PFS/etc
+ *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ *	| zone-X big-blocks
+ *	| ... (big-blocks for new zones after newfs_hammer)
+ *	| ...
+ *	| ...
+ *	| ...
+ *	| ...
+ *	+-------------------------> vol_buf_end (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> end of a device/partition
+ *
+ * volume#N layout (0<N<256)
+ *	+-------------------------> offset 0 of a device/partition
+ *	| volume header (1928 bytes)
+ *	| the rest of header junk space (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> vol_bot_beg
+ *	| boot area (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> vol_mem_beg
+ *	| memory log (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> vol_buf_beg (physical offset of zone-2)
+ *	| zone-4 big-blocks for layer2
+ *	| ... (1 big-block per 4TB space)
+ *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
+ *	| zone-X big-blocks
+ *	| ... (unused until volume#(N-1) runs out of space)
+ *	| ...
+ *	| ...
+ *	| ...
+ *	| ...
+ *	+-------------------------> vol_buf_end (HAMMER_BUFSIZE aligned)
+ *	+-------------------------> end of a device/partition
+ */
+
+#define HAMMER_ZONE_RAW_VOLUME		0x1000000000000000ULL
+#define HAMMER_ZONE_RAW_BUFFER		0x2000000000000000ULL
+#define HAMMER_ZONE_UNDO		0x3000000000000000ULL
+#define HAMMER_ZONE_FREEMAP		0x4000000000000000ULL
+#define HAMMER_ZONE_RESERVED05		0x5000000000000000ULL  /* not used */
+#define HAMMER_ZONE_RESERVED06		0x6000000000000000ULL  /* not used */
+#define HAMMER_ZONE_RESERVED07		0x7000000000000000ULL  /* not used */
+#define HAMMER_ZONE_BTREE		0x8000000000000000ULL
+#define HAMMER_ZONE_META		0x9000000000000000ULL
+#define HAMMER_ZONE_LARGE_DATA		0xA000000000000000ULL
+#define HAMMER_ZONE_SMALL_DATA		0xB000000000000000ULL
+#define HAMMER_ZONE_RESERVED0C		0xC000000000000000ULL  /* not used */
+#define HAMMER_ZONE_RESERVED0D		0xD000000000000000ULL  /* not used */
+#define HAMMER_ZONE_RESERVED0E		0xE000000000000000ULL  /* not used */
+#define HAMMER_ZONE_UNAVAIL		0xF000000000000000ULL
+
+#define HAMMER_ZONE_RAW_VOLUME_INDEX	1
+#define HAMMER_ZONE_RAW_BUFFER_INDEX	2
+#define HAMMER_ZONE_UNDO_INDEX		3
+#define HAMMER_ZONE_FREEMAP_INDEX	4
+#define HAMMER_ZONE_BTREE_INDEX		8
+#define HAMMER_ZONE_META_INDEX		9
+#define HAMMER_ZONE_LARGE_DATA_INDEX	10
+#define HAMMER_ZONE_SMALL_DATA_INDEX	11
+#define HAMMER_ZONE_UNAVAIL_INDEX	15
+
+#define HAMMER_MAX_ZONES		16
+
+#define HAMMER_ZONE(offset)		((offset) & HAMMER_OFF_ZONE_MASK)
+
+#define hammer_is_zone_raw_volume(offset)		\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_RAW_VOLUME)
+#define hammer_is_zone_raw_buffer(offset)		\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_RAW_BUFFER)
+#define hammer_is_zone_undo(offset)			\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_UNDO)
+#define hammer_is_zone_freemap(offset)			\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_FREEMAP)
+#define hammer_is_zone_btree(offset)			\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_BTREE)
+#define hammer_is_zone_meta(offset)			\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_META)
+#define hammer_is_zone_large_data(offset)		\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_LARGE_DATA)
+#define hammer_is_zone_small_data(offset)		\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_SMALL_DATA)
+#define hammer_is_zone_unavail(offset)			\
+	(HAMMER_ZONE(offset) == HAMMER_ZONE_UNAVAIL)
+#define hammer_is_zone_data(offset)			\
+	(hammer_is_zone_large_data(offset) || hammer_is_zone_small_data(offset))
+
+#define hammer_is_index_record(zone)			\
+	((zone) >= HAMMER_ZONE_BTREE_INDEX &&		\
+	 (zone) < HAMMER_MAX_ZONES)
+
+#define hammer_is_zone_record(offset)			\
+	hammer_is_index_record(HAMMER_ZONE_DECODE(offset))
+
+#define hammer_is_index_direct_xlated(zone)		\
+	(((zone) == HAMMER_ZONE_RAW_BUFFER_INDEX) ||	\
+	 ((zone) == HAMMER_ZONE_FREEMAP_INDEX) ||	\
+	 hammer_is_index_record(zone))
+
+#define hammer_is_zone_direct_xlated(offset)		\
+	hammer_is_index_direct_xlated(HAMMER_ZONE_DECODE(offset))
+
+#define HAMMER_ZONE_ENCODE(zone, ham_off)		\
+	(((hammer_off_t)(zone) << 60) | (ham_off))
+#define HAMMER_ZONE_DECODE(ham_off)			\
+	((int)(((hammer_off_t)(ham_off) >> 60)))
+
+#define HAMMER_VOL_ENCODE(vol_no)			\
+	((hammer_off_t)((vol_no) & 255) << 52)
+#define HAMMER_VOL_DECODE(ham_off)			\
+	((int)(((hammer_off_t)(ham_off) >> 52) & 255))
+
+#define HAMMER_OFF_SHORT_ENCODE(offset)			\
+	((hammer_off_t)(offset) & HAMMER_OFF_SHORT_MASK)
+#define HAMMER_OFF_LONG_ENCODE(offset)			\
+	((hammer_off_t)(offset) & HAMMER_OFF_LONG_MASK)
+
+#define HAMMER_ENCODE(zone, vol_no, offset)		\
+	(((hammer_off_t)(zone) << 60) |			\
+	HAMMER_VOL_ENCODE(vol_no) |			\
+	HAMMER_OFF_SHORT_ENCODE(offset))
+#define HAMMER_ENCODE_RAW_VOLUME(vol_no, offset)	\
+	HAMMER_ENCODE(HAMMER_ZONE_RAW_VOLUME_INDEX, vol_no, offset)
+#define HAMMER_ENCODE_RAW_BUFFER(vol_no, offset)	\
+	HAMMER_ENCODE(HAMMER_ZONE_RAW_BUFFER_INDEX, vol_no, offset)
+#define HAMMER_ENCODE_UNDO(offset)			\
+	HAMMER_ENCODE(HAMMER_ZONE_UNDO_INDEX, HAMMER_ROOT_VOLNO, offset)
+#define HAMMER_ENCODE_FREEMAP(vol_no, offset)		\
+	HAMMER_ENCODE(HAMMER_ZONE_FREEMAP_INDEX, vol_no, offset)
+
+/*
+ * Translate a zone address to zone-X address.
+ */
+#define hammer_xlate_to_zoneX(zone, offset)		\
+	HAMMER_ZONE_ENCODE((zone), (offset) & ~HAMMER_OFF_ZONE_MASK)
+#define hammer_xlate_to_zone2(offset)			\
+	hammer_xlate_to_zoneX(HAMMER_ZONE_RAW_BUFFER_INDEX, (offset))
+
+#define hammer_data_zone(data_len)			\
+	(((data_len) >= HAMMER_BUFSIZE) ?		\
+	 HAMMER_ZONE_LARGE_DATA :			\
+	 HAMMER_ZONE_SMALL_DATA)
+#define hammer_data_zone_index(data_len)		\
+	(((data_len) >= HAMMER_BUFSIZE) ?		\
+	 HAMMER_ZONE_LARGE_DATA_INDEX :			\
+	 HAMMER_ZONE_SMALL_DATA_INDEX)
+
+/*
+ * Big-Block backing store
+ *
+ * A blockmap is a two-level map which translates a blockmap-backed zone
+ * offset into a raw zone 2 offset.  The layer 1 handles 18 bits and the
+ * layer 2 handles 19 bits.  The 8M big-block size is 23 bits so two
+ * layers gives us 18+19+23 = 60 bits of address space.
+ *
+ * When using hinting for a blockmap lookup, the hint is lost when the
+ * scan leaves the HINTBLOCK, which is typically several BIGBLOCK's.
+ * HINTBLOCK is a heuristic.
+ */
+#define HAMMER_HINTBLOCK_SIZE		(HAMMER_BIGBLOCK_SIZE * 4)
+#define HAMMER_HINTBLOCK_MASK64		((uint64_t)HAMMER_HINTBLOCK_SIZE - 1)
+#define HAMMER_BIGBLOCK_SIZE		(8192 * 1024)
+#define HAMMER_BIGBLOCK_SIZE64		((uint64_t)HAMMER_BIGBLOCK_SIZE)
+#define HAMMER_BIGBLOCK_MASK		(HAMMER_BIGBLOCK_SIZE - 1)
+#define HAMMER_BIGBLOCK_MASK64		((uint64_t)HAMMER_BIGBLOCK_SIZE - 1)
+#define HAMMER_BIGBLOCK_BITS		23
+#if 0
+#define HAMMER_BIGBLOCK_OVERFILL	(6144 * 1024)
+#endif
+#if (1 << HAMMER_BIGBLOCK_BITS) != HAMMER_BIGBLOCK_SIZE
+#error "HAMMER_BIGBLOCK_BITS BROKEN"
+#endif
+
+#define HAMMER_BUFFERS_PER_BIGBLOCK			\
+	(HAMMER_BIGBLOCK_SIZE / HAMMER_BUFSIZE)
+#define HAMMER_BUFFERS_PER_BIGBLOCK_MASK		\
+	(HAMMER_BUFFERS_PER_BIGBLOCK - 1)
+#define HAMMER_BUFFERS_PER_BIGBLOCK_MASK64		\
+	((hammer_off_t)HAMMER_BUFFERS_PER_BIGBLOCK_MASK)
+
+#define HAMMER_BIGBLOCK_DOALIGN(offset)				\
+	(((offset) + HAMMER_BIGBLOCK_MASK64) & ~HAMMER_BIGBLOCK_MASK64)
+
+/*
+ * Maximum number of mirrors operating in master mode (multi-master
+ * clustering and mirroring). Note that HAMMER1 does not support
+ * multi-master clustering as of 2015.
+ */
+#define HAMMER_MAX_MASTERS		16
+
+/*
+ * The blockmap is somewhat of a degenerate structure.  HAMMER only actually
+ * uses it in its original incarnation to implement the freemap.
+ *
+ * zone:1	raw volume (no blockmap)
+ * zone:2	raw buffer (no blockmap)
+ * zone:3	undomap    (direct layer2 array in volume header)
+ * zone:4	freemap    (the only real blockmap)
+ * zone:8-15	zone id used to classify big-block only, address is actually
+ *		a zone-2 address.
+ */
+typedef struct hammer_blockmap {
+	hammer_off_t	phys_offset;  /* zone-2 offset only used by zone-4 */
+	hammer_off_t	first_offset; /* zone-X offset only used by zone-3 */
+	hammer_off_t	next_offset;  /* zone-X offset for allocation */
+	hammer_off_t	alloc_offset; /* zone-X offset only used by zone-3 */
+	uint32_t	reserved01;
+	hammer_crc_t	entry_crc;
+} *hammer_blockmap_t;
+
+#define HAMMER_BLOCKMAP_CRCSIZE	\
+	offsetof(struct hammer_blockmap, entry_crc)
+
+/*
+ * The blockmap is a 2-layer entity made up of big-blocks.  The first layer
+ * contains 262144 32-byte entries (18 bits), the second layer contains
+ * 524288 16-byte entries (19 bits), representing 8MB (23 bit) blockmaps.
+ * 18+19+23 = 60 bits.  The top four bits are the zone id.
+ *
+ * Currently only the freemap utilizes both layers in all their glory.
+ * All primary data/meta-data zones actually encode a zone-2 address
+ * requiring no real blockmap translation.
+ *
+ * The freemap uses the upper 8 bits of layer-1 to identify the volume,
+ * thus any space allocated via the freemap can be directly translated
+ * to a zone:2 (or zone:8-15) address.
+ *
+ * zone-X blockmap offset: [zone:4][layer1:18][layer2:19][big-block:23]
+ */
+
+/*
+ * 32 bytes layer1 entry for 8MB big-block.
+ * A big-block can hold 2^23 / 2^5 = 2^18 layer1 entries,
+ * which equals bits assigned for layer1 in zone-2 address.
+ */
+typedef struct hammer_blockmap_layer1 {
+	hammer_off_t	blocks_free;	/* big-blocks free */
+	hammer_off_t	phys_offset;	/* UNAVAIL or zone-2 */
+	hammer_off_t	reserved01;
+	hammer_crc_t	layer2_crc;	/* xor'd crc's of HAMMER_BLOCKSIZE */
+					/* (not yet used) */
+	hammer_crc_t	layer1_crc;	/* MUST BE LAST FIELD OF STRUCTURE*/
+} *hammer_blockmap_layer1_t;
+
+#define HAMMER_LAYER1_CRCSIZE	\
+	offsetof(struct hammer_blockmap_layer1, layer1_crc)
+
+/*
+ * 16 bytes layer2 entry for 8MB big-blocks.
+ * A big-block can hold 2^23 / 2^4 = 2^19 layer2 entries,
+ * which equals bits assigned for layer2 in zone-2 address.
+ *
+ * NOTE: bytes_free is signed and can legally go negative if/when data
+ *	 de-dup occurs.  This field will never go higher than
+ *	 HAMMER_BIGBLOCK_SIZE.  If exactly HAMMER_BIGBLOCK_SIZE
+ *	 the big-block is completely free.
+ */
+typedef struct hammer_blockmap_layer2 {
+	uint8_t		zone;		/* typed allocation zone */
+	uint8_t		reserved01;
+	uint16_t	reserved02;
+	uint32_t	append_off;	/* allocatable space index */
+	int32_t		bytes_free;	/* bytes free within this big-block */
+	hammer_crc_t	entry_crc;
+} *hammer_blockmap_layer2_t;
+
+#define HAMMER_LAYER2_CRCSIZE	\
+	offsetof(struct hammer_blockmap_layer2, entry_crc)
+
+#define HAMMER_BLOCKMAP_UNAVAIL	((hammer_off_t)-1LL)
+
+#define HAMMER_BLOCKMAP_RADIX1	/* 2^18 = 262144 */	\
+	((int)(HAMMER_BIGBLOCK_SIZE / sizeof(struct hammer_blockmap_layer1)))
+#define HAMMER_BLOCKMAP_RADIX2	/* 2^19 = 524288 */	\
+	((int)(HAMMER_BIGBLOCK_SIZE / sizeof(struct hammer_blockmap_layer2)))
+
+#define HAMMER_BLOCKMAP_LAYER1	/* 2^(18+19+23) = 1EB */	\
+	(HAMMER_BLOCKMAP_RADIX1 * HAMMER_BLOCKMAP_LAYER2)
+#define HAMMER_BLOCKMAP_LAYER2	/* 2^(19+23) = 4TB */		\
+	(HAMMER_BLOCKMAP_RADIX2 * HAMMER_BIGBLOCK_SIZE64)
+
+#define HAMMER_BLOCKMAP_LAYER1_MASK	(HAMMER_BLOCKMAP_LAYER1 - 1)
+#define HAMMER_BLOCKMAP_LAYER2_MASK	(HAMMER_BLOCKMAP_LAYER2 - 1)
+
+#define HAMMER_BLOCKMAP_LAYER2_DOALIGN(offset)			\
+	(((offset) + HAMMER_BLOCKMAP_LAYER2_MASK) &		\
+	 ~HAMMER_BLOCKMAP_LAYER2_MASK)
+
+/*
+ * Index within layer1 or layer2 big-block for the entry representing
+ * a zone-2 physical offset.
+ */
+#define HAMMER_BLOCKMAP_LAYER1_INDEX(zone2_offset)		\
+	((int)(((zone2_offset) & HAMMER_BLOCKMAP_LAYER1_MASK) /	\
+	 HAMMER_BLOCKMAP_LAYER2))
+
+#define HAMMER_BLOCKMAP_LAYER2_INDEX(zone2_offset)		\
+	((int)(((zone2_offset) & HAMMER_BLOCKMAP_LAYER2_MASK) /	\
+	HAMMER_BIGBLOCK_SIZE64))
+
+/*
+ * Byte offset within layer1 or layer2 big-block for the entry representing
+ * a zone-2 physical offset.  Multiply the index by sizeof(blockmap_layer).
+ */
+#define HAMMER_BLOCKMAP_LAYER1_OFFSET(zone2_offset)		\
+	(HAMMER_BLOCKMAP_LAYER1_INDEX(zone2_offset) *		\
+	 sizeof(struct hammer_blockmap_layer1))
+
+#define HAMMER_BLOCKMAP_LAYER2_OFFSET(zone2_offset)		\
+	(HAMMER_BLOCKMAP_LAYER2_INDEX(zone2_offset) *		\
+	 sizeof(struct hammer_blockmap_layer2))
+
+/*
+ * Move on to offset 0 of the next layer1 or layer2.
+ */
+#define HAMMER_ZONE_LAYER1_NEXT_OFFSET(offset)			\
+	(((offset) + HAMMER_BLOCKMAP_LAYER2) & ~HAMMER_BLOCKMAP_LAYER2_MASK)
+
+#define HAMMER_ZONE_LAYER2_NEXT_OFFSET(offset)			\
+	(((offset) + HAMMER_BIGBLOCK_SIZE) & ~HAMMER_BIGBLOCK_MASK64)
+
+/*
+ * HAMMER UNDO parameters.  The UNDO fifo is mapped directly in the volume
+ * header with an array of zone-2 offsets.  A maximum of (128x8MB) = 1GB,
+ * and minimum of (64x8MB) = 512MB may be reserved.  The size of the undo
+ * fifo is usually set a newfs time.
+ */
+#define HAMMER_MIN_UNDO_BIGBLOCKS		64
+#define HAMMER_MAX_UNDO_BIGBLOCKS		128
+
+/*
+ * All on-disk HAMMER structures which make up elements of the UNDO FIFO
+ * contain a hammer_fifo_head and hammer_fifo_tail structure.  This structure
+ * contains all the information required to validate the fifo element
+ * and to scan the fifo in either direction.  The head is typically embedded
+ * in higher level hammer on-disk structures while the tail is typically
+ * out-of-band.  hdr_size is the size of the whole mess, including the tail.
+ *
+ * All undo structures are guaranteed to not cross a 16K filesystem
+ * buffer boundary.  Most undo structures are fairly small.  Data spaces
+ * are not immediately reused by HAMMER so file data is not usually recorded
+ * as part of an UNDO.
+ *
+ * PAD elements are allowed to take up only 8 bytes of space as a special
+ * case, containing only hdr_signature, hdr_type, and hdr_size fields,
+ * and with the tail overloaded onto the head structure for 8 bytes total.
+ *
+ * Every undo record has a sequence number.  This number is unrelated to
+ * transaction ids and instead collects the undo transactions associated
+ * with a single atomic operation.  A larger transactional operation, such
+ * as a remove(), may consist of several smaller atomic operations
+ * representing raw meta-data operations.
+ *
+ *				HAMMER VERSION 4 CHANGES
+ *
+ * In HAMMER version 4 the undo structure alignment is reduced from 16384
+ * to 512 bytes in order to ensure that each 512 byte sector begins with
+ * a header.  The hdr_seq field in the header is a 32 bit sequence number
+ * which allows the recovery code to detect missing sectors
+ * without relying on the 32-bit crc and to definitively identify the current
+ * undo sequence space without having to rely on information from the volume
+ * header.  In addition, new REDO entries in the undo space are used to
+ * record write, write/extend, and transaction id updates.
+ *
+ * The grand result is:
+ *
+ * (1) The volume header no longer needs to be synchronized for most
+ *     flush and fsync operations.
+ *
+ * (2) Most fsync operations need only lay down REDO records
+ *
+ * (3) Data overwrite for nohistory operations covered by REDO records
+ *     can be supported (instead of rolling a new block allocation),
+ *     by rolling UNDO for the prior contents of the data.
+ *
+ *				HAMMER VERSION 5 CHANGES
+ *
+ * Hammer version 5 contains a minor adjustment making layer2's bytes_free
+ * field signed, allowing dedup to push it into the negative domain.
+ */
+#define HAMMER_HEAD_ALIGN		8
+#define HAMMER_HEAD_ALIGN_MASK		(HAMMER_HEAD_ALIGN - 1)
+#define HAMMER_HEAD_DOALIGN(bytes)	\
+	(((bytes) + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK)
+
+#define HAMMER_UNDO_ALIGN		512
+#define HAMMER_UNDO_ALIGN64		((uint64_t)512)
+#define HAMMER_UNDO_MASK		(HAMMER_UNDO_ALIGN - 1)
+#define HAMMER_UNDO_MASK64		(HAMMER_UNDO_ALIGN64 - 1)
+#define HAMMER_UNDO_DOALIGN(offset)	\
+	(((offset) + HAMMER_UNDO_MASK) & ~HAMMER_UNDO_MASK64)
+
+typedef struct hammer_fifo_head {
+	uint16_t hdr_signature;
+	uint16_t hdr_type;
+	uint32_t hdr_size;	/* Aligned size of the whole mess */
+	uint32_t hdr_seq;	/* Sequence number */
+	hammer_crc_t hdr_crc;	/* XOR crc up to field w/ crc after field */
+} *hammer_fifo_head_t;
+
+#define HAMMER_FIFO_HEAD_CRCOFF	offsetof(struct hammer_fifo_head, hdr_crc)
+
+typedef struct hammer_fifo_tail {
+	uint16_t tail_signature;
+	uint16_t tail_type;
+	uint32_t tail_size;	/* aligned size of the whole mess */
+} *hammer_fifo_tail_t;
+
+/*
+ * Fifo header types.
+ *
+ * NOTE: 0x8000U part of HAMMER_HEAD_TYPE_PAD can be removed if the HAMMER
+ * version ever gets bumped again. It exists only to keep compatibility with
+ * older versions.
+ */
+#define HAMMER_HEAD_TYPE_PAD	(0x0040U | 0x8000U)
+#define HAMMER_HEAD_TYPE_DUMMY	0x0041U		/* dummy entry w/seqno */
+#define HAMMER_HEAD_TYPE_UNDO	0x0043U		/* random UNDO information */
+#define HAMMER_HEAD_TYPE_REDO	0x0044U		/* data REDO / fast fsync */
+
+#define HAMMER_HEAD_SIGNATURE	0xC84EU
+#define HAMMER_TAIL_SIGNATURE	0xC74FU
+
+/*
+ * Misc FIFO structures.
+ *
+ * UNDO - Raw meta-data media updates.
+ */
+typedef struct hammer_fifo_undo {
+	struct hammer_fifo_head	head;
+	hammer_off_t		undo_offset;	/* zone-1,2 offset */
+	int32_t			undo_data_bytes;
+	int32_t			undo_reserved01;
+	/* followed by data */
+} *hammer_fifo_undo_t;
+
+/*
+ * REDO (HAMMER version 4+) - Logical file writes/truncates.
+ *
+ * REDOs contain information which will be duplicated in a later meta-data
+ * update, allowing fast write()+fsync() operations.  REDOs can be ignored
+ * without harming filesystem integrity but must be processed if fsync()
+ * semantics are desired.
+ *
+ * Unlike UNDOs which are processed backwards within the recovery span,
+ * REDOs must be processed forwards starting further back (starting outside
+ * the recovery span).
+ *
+ *	WRITE	- Write logical file (with payload).  Executed both
+ *		  out-of-span and in-span.  Out-of-span WRITEs may be
+ *		  filtered out by TERMs.
+ *
+ *	TRUNC	- Truncate logical file (no payload).  Executed both
+ *		  out-of-span and in-span.  Out-of-span WRITEs may be
+ *		  filtered out by TERMs.
+ *
+ *	TERM_*	- Indicates meta-data was committed (if out-of-span) or
+ *		  will be rolled-back (in-span).  Any out-of-span TERMs
+ *		  matching earlier WRITEs remove those WRITEs from
+ *		  consideration as they might conflict with a later data
+ *		  commit (which is not being rolled-back).
+ *
+ *	SYNC	- The earliest in-span SYNC (the last one when scanning
+ *		  backwards) tells the recovery code how far out-of-span
+ *		  it must go to run REDOs.
+ *
+ * NOTE: WRITEs do not always have matching TERMs even under
+ *	 perfect conditions because truncations might remove the
+ *	 buffers from consideration.  I/O problems can also remove
+ *	 buffers from consideration.
+ *
+ *	 TRUNCSs do not always have matching TERMs because several
+ *	 truncations may be aggregated together into a single TERM.
+ */
+typedef struct hammer_fifo_redo {
+	struct hammer_fifo_head	head;
+	int64_t			redo_objid;	/* file being written */
+	hammer_off_t		redo_offset;	/* logical offset in file */
+	int32_t			redo_data_bytes;
+	uint32_t		redo_flags;
+	uint32_t		redo_localization;
+	uint32_t		redo_reserved01;
+	uint64_t		redo_reserved02;
+	/* followed by data */
+} *hammer_fifo_redo_t;
+
+#define HAMMER_REDO_WRITE	0x00000001
+#define HAMMER_REDO_TRUNC	0x00000002
+#define HAMMER_REDO_TERM_WRITE	0x00000004
+#define HAMMER_REDO_TERM_TRUNC	0x00000008
+#define HAMMER_REDO_SYNC	0x00000010
+
+typedef union hammer_fifo_any {
+	struct hammer_fifo_head	head;
+	struct hammer_fifo_undo	undo;
+	struct hammer_fifo_redo	redo;
+} *hammer_fifo_any_t;
+
+/*
+ * Volume header types
+ */
+#define HAMMER_FSBUF_VOLUME	0xC8414D4DC5523031ULL	/* HAMMER01 */
+#define HAMMER_FSBUF_VOLUME_REV	0x313052C54D4D41C8ULL	/* (reverse endian) */
+
+/*
+ * HAMMER Volume header
+ *
+ * A HAMMER filesystem can be built from 1-256 block devices, each block
+ * device contains a volume header followed by however many buffers fit
+ * into the volume.
+ *
+ * One of the volumes making up a HAMMER filesystem is the root volume.
+ * The root volume is always volume #0 which is the first block device path
+ * specified by newfs_hammer(8).  All HAMMER volumes have a volume header,
+ * however the root volume may be the only volume that has valid values for
+ * some fields in the header.
+ *
+ * Special field notes:
+ *
+ *	vol_bot_beg - offset of boot area (mem_beg - bot_beg bytes)
+ *	vol_mem_beg - offset of memory log (buf_beg - mem_beg bytes)
+ *	vol_buf_beg - offset of the first buffer in volume
+ *	vol_buf_end - offset of volume EOF (on buffer boundary)
+ *
+ *	The memory log area allows a kernel to cache new records and data
+ *	in memory without allocating space in the actual filesystem to hold
+ *	the records and data.  In the event that a filesystem becomes full,
+ *	any records remaining in memory can be flushed to the memory log
+ *	area.  This allows the kernel to immediately return success.
+ *
+ *	The buffer offset is a physical offset of zone-2 offset. The lower
+ *	52 bits of the zone-2 offset is added to the buffer offset of each
+ *	volume to generate an actual I/O offset within the block device.
+ *
+ *	NOTE: boot area and memory log are currently not used.
+ */
+
+/*
+ * Filesystem type string
+ */
+#define HAMMER_FSTYPE_STRING		"DragonFly HAMMER"
+
+/*
+ * These macros are only used by userspace when userspace commands either
+ * initialize or add a new HAMMER volume.
+ */
+#define HAMMER_BOOT_MINBYTES		(32*1024)
+#define HAMMER_BOOT_NOMBYTES		(64LL*1024*1024)
+#define HAMMER_BOOT_MAXBYTES		(256LL*1024*1024)
+
+#define HAMMER_MEM_MINBYTES		(256*1024)
+#define HAMMER_MEM_NOMBYTES		(1LL*1024*1024*1024)
+#define HAMMER_MEM_MAXBYTES		(64LL*1024*1024*1024)
+
+typedef struct hammer_volume_ondisk {
+	uint64_t vol_signature;	/* HAMMER_FSBUF_VOLUME for a valid header */
+
+	/*
+	 * These are relative to block device offset, not zone offsets.
+	 */
+	int64_t vol_bot_beg;	/* offset of boot area */
+	int64_t vol_mem_beg;	/* offset of memory log */
+	int64_t vol_buf_beg;	/* offset of the first buffer in volume */
+	int64_t vol_buf_end;	/* offset of volume EOF (on buffer boundary) */
+	int64_t vol_reserved01;
+
+	hammer_uuid_t vol_fsid;	/* identify filesystem */
+	hammer_uuid_t vol_fstype; /* identify filesystem type */
+	char vol_label[64];	/* filesystem label */
+
+	int32_t vol_no;		/* volume number within filesystem */
+	int32_t vol_count;	/* number of volumes making up filesystem */
+
+	uint32_t vol_version;	/* version control information */
+	hammer_crc_t vol_crc;	/* header crc */
+	uint32_t vol_flags;	/* volume flags */
+	uint32_t vol_rootvol;	/* the root volume number (must be 0) */
+
+	uint32_t vol_reserved[8];
+
+	/*
+	 * These fields are initialized and space is reserved in every
+	 * volume making up a HAMMER filesytem, but only the root volume
+	 * contains valid data.  Note that vol0_stat_bigblocks does not
+	 * include big-blocks for freemap and undomap initially allocated
+	 * by newfs_hammer(8).
+	 */
+	int64_t vol0_stat_bigblocks;	/* total big-blocks when fs is empty */
+	int64_t vol0_stat_freebigblocks;/* number of free big-blocks */
+	int64_t	vol0_reserved01;
+	int64_t vol0_stat_inodes;	/* for statfs only */
+	int64_t vol0_reserved02;
+	hammer_off_t vol0_btree_root;	/* B-Tree root offset in zone-8 */
+	hammer_tid_t vol0_next_tid;	/* highest partially synchronized TID */
+	hammer_off_t vol0_reserved03;
+
+	/*
+	 * Blockmaps for zones.  Not all zones use a blockmap.  Note that
+	 * the entire root blockmap is cached in the hammer_mount structure.
+	 */
+	struct hammer_blockmap	vol0_blockmap[HAMMER_MAX_ZONES];
+
+	/*
+	 * Array of zone-2 addresses for undo FIFO.
+	 */
+	hammer_off_t		vol0_undo_array[HAMMER_MAX_UNDO_BIGBLOCKS];
+} *hammer_volume_ondisk_t;
+
+#define HAMMER_ROOT_VOLNO		0
+
+#define HAMMER_VOLF_NEEDFLUSH		0x0004	/* volume needs flush */
+
+#define HAMMER_VOL_CRCSIZE1	\
+	offsetof(struct hammer_volume_ondisk, vol_crc)
+#define HAMMER_VOL_CRCSIZE2	\
+	(sizeof(struct hammer_volume_ondisk) - HAMMER_VOL_CRCSIZE1 -	\
+	 sizeof(hammer_crc_t))
+
+#define HAMMER_VOL_VERSION_MIN		1	/* minimum supported version */
+#define HAMMER_VOL_VERSION_DEFAULT	7	/* newfs default version */
+#define HAMMER_VOL_VERSION_WIP		8	/* version >= this is WIP */
+#define HAMMER_VOL_VERSION_MAX		7	/* maximum supported version */
+
+#define HAMMER_VOL_VERSION_ONE		1
+#define HAMMER_VOL_VERSION_TWO		2	/* new dirent layout (2.3+) */
+#define HAMMER_VOL_VERSION_THREE	3	/* new snapshot layout (2.5+) */
+#define HAMMER_VOL_VERSION_FOUR		4	/* new undo/flush (2.5+) */
+#define HAMMER_VOL_VERSION_FIVE		5	/* dedup (2.9+) */
+#define HAMMER_VOL_VERSION_SIX		6	/* DIRHASH_ALG1 */
+#define HAMMER_VOL_VERSION_SEVEN	7	/* use the faster iscsi_crc */
+
+/*
+ * Translate a zone-2 address to physical address
+ */
+#define hammer_xlate_to_phys(volume, zone2_offset)	\
+	((volume)->vol_buf_beg + HAMMER_OFF_SHORT_ENCODE(zone2_offset))
+
+/*
+ * Translate a zone-3 address to zone-2 address
+ */
+#define HAMMER_UNDO_INDEX(zone3_offset)			\
+	(HAMMER_OFF_SHORT_ENCODE(zone3_offset) / HAMMER_BIGBLOCK_SIZE)
+
+#define hammer_xlate_to_undo(volume, zone3_offset)			\
+	((volume)->vol0_undo_array[HAMMER_UNDO_INDEX(zone3_offset)] +	\
+	 (zone3_offset & HAMMER_BIGBLOCK_MASK64))
+
+/*
+ * Effective per-volume filesystem capacity including big-blocks for layer1/2
+ */
+#define HAMMER_VOL_BUF_SIZE(volume)			\
+	((volume)->vol_buf_end - (volume)->vol_buf_beg)
+
+/*
+ * Record types are fairly straightforward.  The B-Tree includes the record
+ * type in its index sort.
+ */
+#define HAMMER_RECTYPE_UNKNOWN		0x0000
+#define HAMMER_RECTYPE_INODE		0x0001	/* inode in obj_id space */
+#define HAMMER_RECTYPE_DATA		0x0010
+#define HAMMER_RECTYPE_DIRENTRY		0x0011
+#define HAMMER_RECTYPE_DB		0x0012
+#define HAMMER_RECTYPE_EXT		0x0013	/* ext attributes */
+#define HAMMER_RECTYPE_FIX		0x0014	/* fixed attribute */
+#define HAMMER_RECTYPE_PFS		0x0015	/* PFS management */
+#define HAMMER_RECTYPE_SNAPSHOT		0x0016	/* Snapshot management */
+#define HAMMER_RECTYPE_CONFIG		0x0017	/* hammer cleanup config */
+#define HAMMER_RECTYPE_MAX		0xFFFF
+
+#define HAMMER_RECTYPE_ENTRY_START	(HAMMER_RECTYPE_INODE + 1)
+#define HAMMER_RECTYPE_CLEAN_START	HAMMER_RECTYPE_EXT
+
+#define HAMMER_FIXKEY_SYMLINK		1
+
+#define HAMMER_OBJTYPE_UNKNOWN		0	/* never exists on-disk as unknown */
+#define HAMMER_OBJTYPE_DIRECTORY	1
+#define HAMMER_OBJTYPE_REGFILE		2
+#define HAMMER_OBJTYPE_DBFILE		3
+#define HAMMER_OBJTYPE_FIFO		4
+#define HAMMER_OBJTYPE_CDEV		5
+#define HAMMER_OBJTYPE_BDEV		6
+#define HAMMER_OBJTYPE_SOFTLINK		7
+#define HAMMER_OBJTYPE_PSEUDOFS		8	/* pseudo filesystem obj */
+#define HAMMER_OBJTYPE_SOCKET		9
+
+/*
+ * HAMMER inode attribute data
+ *
+ * The data reference for a HAMMER inode points to this structure.  Any
+ * modifications to the contents of this structure will result in a
+ * replacement operation.
+ *
+ * parent_obj_id is only valid for directories (which cannot be hard-linked),
+ * and specifies the parent directory obj_id.  This field will also be set
+ * for non-directory inodes as a recovery aid, but can wind up holding
+ * stale information.  However, since object id's are not reused, the worse
+ * that happens is that the recovery code is unable to use it.
+ * A parent_obj_id of 0 means it's a root inode of root or non-root PFS.
+ *
+ * NOTE: Future note on directory hardlinks.  We can implement a record type
+ * which allows us to point to multiple parent directories.
+ */
+typedef struct hammer_inode_data {
+	uint16_t version;	/* inode data version */
+	uint16_t mode;		/* basic unix permissions */
+	uint32_t uflags;	/* chflags */
+	uint32_t rmajor;	/* used by device nodes */
+	uint32_t rminor;	/* used by device nodes */
+	uint64_t ctime;
+	int64_t parent_obj_id;	/* parent directory obj_id */
+	hammer_uuid_t uid;
+	hammer_uuid_t gid;
+
+	uint8_t obj_type;
+	uint8_t cap_flags;	/* capability support flags (extension) */
+	uint16_t reserved01;
+	uint32_t reserved02;
+	uint64_t nlinks;	/* hard links */
+	uint64_t size;		/* filesystem object size */
+	union {
+		char	symlink[24];	/* HAMMER_INODE_BASESYMLEN */
+	} ext;
+	uint64_t mtime;	/* mtime must be second-to-last */
+	uint64_t atime;	/* atime must be last */
+} *hammer_inode_data_t;
+
+/*
+ * Neither mtime nor atime upates are CRCd by the B-Tree element.
+ * mtime updates have UNDO, atime updates do not.
+ */
+#define HAMMER_INODE_CRCSIZE	\
+	offsetof(struct hammer_inode_data, mtime)
+
+#define HAMMER_INODE_DATA_VERSION	1
+#define HAMMER_OBJID_ROOT		1	/* root inodes # */
+#define HAMMER_INODE_BASESYMLEN		24	/* see ext.symlink */
+
+/*
+ * Capability & implementation flags.
+ *
+ * HAMMER_INODE_CAP_DIR_LOCAL_INO - Use inode B-Tree localization
+ * for directory entries.  Also see HAMMER_DIR_INODE_LOCALIZATION().
+ */
+#define HAMMER_INODE_CAP_DIRHASH_MASK	0x03	/* directory: hash algorithm */
+#define HAMMER_INODE_CAP_DIRHASH_ALG0	0x00
+#define HAMMER_INODE_CAP_DIRHASH_ALG1	0x01
+#define HAMMER_INODE_CAP_DIRHASH_ALG2	0x02
+#define HAMMER_INODE_CAP_DIRHASH_ALG3	0x03
+#define HAMMER_INODE_CAP_DIR_LOCAL_INO	0x04	/* use inode localization */
+
+#define HAMMER_DATA_DOALIGN(offset)				\
+	(((offset) + 15) & ~15)
+#define HAMMER_DATA_DOALIGN_WITH(type, offset)			\
+	(((type)(offset) + 15) & (~(type)15))
+
+/*
+ * A HAMMER directory entry associates a HAMMER filesystem object with a
+ * namespace.  It is hooked into a pseudo-filesystem (with its own inode
+ * numbering space) in the filesystem by setting the high 16 bits of the
+ * localization field.  The low 16 bits must be 0 and are reserved for
+ * future use.
+ *
+ * Directory entries are indexed with a 128 bit namekey rather then an
+ * offset.  A portion of the namekey is an iterator/randomizer to deal
+ * with collisions.
+ *
+ * NOTE: leaf.base.obj_type from the related B-Tree leaf entry holds
+ * the filesystem object type of obj_id, e.g. a den_type equivalent.
+ * It is not stored in hammer_direntry_data.
+ *
+ * NOTE: name field / the filename data reference is NOT terminated with \0.
+ */
+typedef struct hammer_direntry_data {
+	int64_t obj_id;			/* object being referenced */
+	uint32_t localization;		/* identify pseudo-filesystem */
+	uint32_t reserved01;
+	char	name[16];		/* name (extended) */
+} *hammer_direntry_data_t;
+
+#define HAMMER_ENTRY_NAME_OFF	offsetof(struct hammer_direntry_data, name[0])
+#define HAMMER_ENTRY_SIZE(nlen)	offsetof(struct hammer_direntry_data, name[nlen])
+
+/*
+ * Symlink data which does not fit in the inode is stored in a separate
+ * FIX type record.
+ */
+typedef struct hammer_symlink_data {
+	char	name[16];		/* name (extended) */
+} *hammer_symlink_data_t;
+
+#define HAMMER_SYMLINK_NAME_OFF	offsetof(struct hammer_symlink_data, name[0])
+
+/*
+ * The root inode for the primary filesystem and root inode for any
+ * pseudo-fs may be tagged with an optional data structure using
+ * HAMMER_RECTYPE_PFS and localization id.  This structure allows
+ * the node to be used as a mirroring master or slave.
+ *
+ * When operating as a slave CD's into the node automatically become read-only
+ * and as-of sync_end_tid.
+ *
+ * When operating as a master the read PFSD info sets sync_end_tid to
+ * the most recently flushed TID.
+ *
+ * sync_low_tid is not yet used but will represent the highest pruning
+ * end-point, after which full history is available.
+ *
+ * We need to pack this structure making it equally sized on both 32-bit and
+ * 64-bit machines as it is part of struct hammer_ioc_mrecord_pfs which is
+ * send over the wire in hammer mirror operations. Only on 64-bit machines
+ * the size of this struct differ when packed or not. This leads us to the
+ * situation where old 64-bit systems (using the non-packed structure),
+ * which were never able to mirror to/from 32-bit systems, are now no longer
+ * able to mirror to/from newer 64-bit systems (using the packed structure).
+ */
+struct hammer_pseudofs_data {
+	hammer_tid_t	sync_low_tid;	/* full history beyond this point */
+	hammer_tid_t	sync_beg_tid;	/* earliest tid w/ full history avail */
+	hammer_tid_t	sync_end_tid;	/* current synchronizatoin point */
+	uint64_t	sync_beg_ts;	/* real-time of last completed sync */
+	uint64_t	sync_end_ts;	/* initiation of current sync cycle */
+	hammer_uuid_t	shared_uuid;	/* shared uuid (match required) */
+	hammer_uuid_t	unique_uuid;	/* unique uuid of this master/slave */
+	int32_t		reserved01;	/* reserved for future master_id */
+	int32_t		mirror_flags;	/* misc flags */
+	char		label[64];	/* filesystem space label */
+	char		snapshots[64];	/* softlink dir for pruning */
+	int32_t		reserved02;	/* was prune_{time,freq} */
+	int32_t		reserved03;	/* was reblock_{time,freq} */
+	int32_t		reserved04;	/* was snapshot_freq */
+	int32_t		prune_min;	/* do not prune recent history */
+	int32_t		prune_max;	/* do not retain history beyond here */
+	int32_t		reserved[16];
+} __packed;
+
+typedef struct hammer_pseudofs_data *hammer_pseudofs_data_t;
+
+#define HAMMER_PFSD_SLAVE	0x00000001
+#define HAMMER_PFSD_DELETED	0x80000000
+
+#define hammer_is_pfs_slave(pfsd)			\
+	(((pfsd)->mirror_flags & HAMMER_PFSD_SLAVE) != 0)
+#define hammer_is_pfs_master(pfsd)			\
+	(!hammer_is_pfs_slave(pfsd))
+#define hammer_is_pfs_deleted(pfsd)			\
+	(((pfsd)->mirror_flags & HAMMER_PFSD_DELETED) != 0)
+
+#define HAMMER_MAX_PFS		65536
+#define HAMMER_MAX_PFSID	(HAMMER_MAX_PFS - 1)
+#define HAMMER_ROOT_PFSID	0
+
+/*
+ * Snapshot meta-data { Objid = HAMMER_OBJID_ROOT, Key = tid, rectype = SNAPSHOT }.
+ *
+ * Snapshot records replace the old <fs>/snapshots/<softlink> methodology.  Snapshot
+ * records are mirrored but may be independantly managed once they are laid down on
+ * a slave.
+ *
+ * NOTE: The b-tree key is signed, the tid is not, so callers must still sort the
+ *	 results.
+ *
+ * NOTE: Reserved fields must be zero (as usual)
+ */
+typedef struct hammer_snapshot_data {
+	hammer_tid_t	tid;		/* the snapshot TID itself (== key) */
+	uint64_t	ts;		/* real-time when snapshot was made */
+	uint64_t	reserved01;
+	uint64_t	reserved02;
+	char		label[64];	/* user-supplied description */
+	uint64_t	reserved03[4];
+} *hammer_snapshot_data_t;
+
+/*
+ * Config meta-data { ObjId = HAMMER_OBJID_ROOT, Key = 0, rectype = CONFIG }.
+ *
+ * Used to store the hammer cleanup config.  This data is not mirrored.
+ */
+typedef struct hammer_config_data {
+	char		text[1024];
+} *hammer_config_data_t;
+
+/*
+ * Rollup various structures embedded as record data
+ */
+typedef union hammer_data_ondisk {
+	struct hammer_direntry_data entry;
+	struct hammer_inode_data inode;
+	struct hammer_symlink_data symlink;
+	struct hammer_pseudofs_data pfsd;
+	struct hammer_snapshot_data snap;
+	struct hammer_config_data config;
+} *hammer_data_ondisk_t;
+
+/*
+ * Ondisk layout of B-Tree related structures
+ */
+//#include "hammer_btree.h"
+
+#define HAMMER_DIR_INODE_LOCALIZATION(ino_data)				\
+	(((ino_data)->cap_flags & HAMMER_INODE_CAP_DIR_LOCAL_INO) ?	\
+	 HAMMER_LOCALIZE_INODE :					\
+	 HAMMER_LOCALIZE_MISC)
+
+#endif /* !VFS_HAMMER_DISK_H_ */

Reply via email to