[osv-dev] [PATCH] zfs: allow mounting and building on host

2022-07-16 Thread Waldemar Kozaczuk
This requires OpenZFS installed on host (see
https://openzfs.github.io/openzfs-docs/Getting%20Started/Fedora/index.html
for Fedora and
https://openzfs.github.io/openzfs-docs/Getting%20Started/Ubuntu/index.html
for Ubuntu).

In essence this patch adds new script zfs-image-on-host.sh that allows
mounting and build ZFS images using OpenZFS without running OSv. It also
modifies the build script to support new option: '--use-openzfs' that
delegates to zfs-image-on-host.sh to build the ZFS image. Please see
examples above:

./scripts/build image=native-example fs=zfs -j$(nproc) --use-openzfs

./scripts/build image=native-example fs=rofs_with_zfs -j$(nproc) --use-openzfs

./scripts/build image=native-example fs=rofs -j$(nproc) --use-openzfs 
--create-zfs-disk

Fixes #1068

Signed-off-by: Waldemar Kozaczuk 
---
 modules/zfs-tools/usr.manifest |   1 +
 scripts/build  |  51 +---
 scripts/imgedit.py |   8 ++
 scripts/zfs-image-on-host.sh   | 227 +
 4 files changed, 269 insertions(+), 18 deletions(-)
 create mode 100755 scripts/zfs-image-on-host.sh

diff --git a/modules/zfs-tools/usr.manifest b/modules/zfs-tools/usr.manifest
index 8be0e5d0..ccc9becd 100644
--- a/modules/zfs-tools/usr.manifest
+++ b/modules/zfs-tools/usr.manifest
@@ -1,4 +1,5 @@
 [manifest]
 /zpool.so: zpool.so
+/zfs.so: zfs.so
 /libzfs.so: libzfs.so
 /libuutil.so: libuutil.so
diff --git a/scripts/build b/scripts/build
index 64a55516..df4b7c70 100755
--- a/scripts/build
+++ b/scripts/build
@@ -38,6 +38,7 @@ usage() {
  --append-manifest Append build//append.manifest to 
usr.manifest
  --create-disk Instead of usr.img create kernel-less 
disk.img
  --create-zfs-disk Create extra empty disk with ZFS 
filesystem
+ --use-openzfs Build and manipulate ZFS images using 
on host OpenZFS tools
 
Examples:
  ./scripts/build -j4 fs=rofs image=native-example   # Create image 
with native-example app
@@ -79,7 +80,7 @@ do
case $i in
--help|-h)
usage ;;
-   
image=*|modules=*|fs=*|usrskel=*|check|--append-manifest|--create-disk|--create-zfs-disk)
 ;;
+   
image=*|modules=*|fs=*|usrskel=*|check|--append-manifest|--create-disk|--create-zfs-disk|--use-openzfs)
 ;;
clean)
stage1_args=clean ;;
arch=*)
@@ -163,11 +164,13 @@ do
vars[create_disk]="true";;
--create-zfs-disk)
vars[create_zfs_disk]="true";;
+   --use-openzfs)
+   vars[use_openzfs]="true";;
esac
 done
 
 # fs_size_mb is in megabytes (1024*1024 bytes)
-fs_size_mb=${vars[fs_size_mb]-256}
+fs_size_mb=${vars[fs_size_mb]-512}
 # fs_size is in bytes
 fs_size=${vars[fs_size]-$(($fs_size_mb*1024*1024))}
 # size must be a multiple of 512. Round it down
@@ -316,10 +319,17 @@ fi
 create_zfs_disk() {
cp $bare $raw_disk.raw
"$SRC"/scripts/imgedit.py setpartition "-f raw ${raw_disk}.raw" 2 
$partition_offset $partition_size
-   qemu-img convert -f raw -O qcow2 $raw_disk.raw $qcow2_disk.img
-   qemu-img resize $qcow2_disk.img ${image_size}b >/dev/null 2>&1
-   "$SRC"/scripts/upload_manifest.py --arch=$arch -o $qcow2_disk.img -m 
usr.manifest -D libgcc_s_dir="$libgcc_s_dir"
-   #"$SRC"/scripts/zfs-image-on-host.sh build $qcow2_disk.img 
$partition_offset osv zfs
+   if [[ ${vars[use_openzfs]} == "true" ]]; then
+   #We use raw disk on purpose so that zfs-image-on-host.sh can 
use loop device which is faster to copy files to
+   qemu-img resize ${raw_disk}.raw ${image_size}b >/dev/null 2>&1
+   "$SRC"/scripts/zfs-image-on-host.sh build ${raw_disk}.raw 1 osv 
zfs true
+   qemu-img convert -f raw -O qcow2 $raw_disk.raw $qcow2_disk.img
+   else
+   qemu-img convert -f raw -O qcow2 $raw_disk.raw $qcow2_disk.img
+   qemu-img resize $qcow2_disk.img ${image_size}b >/dev/null 2>&1
+   "$SRC"/scripts/upload_manifest.py --arch=$arch -o 
$qcow2_disk.img -m usr.manifest -D libgcc_s_dir="$libgcc_s_dir"
+   fi
+   rm ${raw_disk}.raw
 }
 
 create_rofs_disk() {
@@ -332,18 +342,23 @@ create_rofs_disk() {
 
 create_zfs_filesystem() {
local image_path=$1
-   local device_path=$2
-   local qemu_arch=$arch
-   if [[ "$qemu_arch" == 'aarch64' ]]; then
-   console=''
-   zfs_builder_name='zfs_builder.img'
+   if [[ ${vars[use_openzfs]} == "true" ]]; then
+   local partition=$3
+   "$SRC"/scripts/zfs-image-on-host.sh build $image_path 
$partition osv zfs false
else
-   qemu_arch='x86_64'
-   console='--console=serial'
-   zfs_builder_name='zfs_builder-stripped.elf'
+   local device_path=$2
+   local qemu_arch=$arch
+   if [[ "$qemu_arch" == 'aarch64' ]]; then
+   

[osv-dev] [PATCH] tests: fix misc-zfs-arc.cc

2022-07-16 Thread Waldemar Kozaczuk
This patch also enhances this test to make it support running
with ZFS mounted from non-root.

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/cddl/compat/opensolaris/sys/mman.h|  1 +
 .../opensolaris/uts/common/fs/zfs/arc.c   |  4 +-
 modules/tests/Makefile| 11 +--
 tests/misc-zfs-arc.cc | 86 ---
 4 files changed, 63 insertions(+), 39 deletions(-)

diff --git a/bsd/sys/cddl/compat/opensolaris/sys/mman.h 
b/bsd/sys/cddl/compat/opensolaris/sys/mman.h
index ca746898..ec1f17ac 100644
--- a/bsd/sys/cddl/compat/opensolaris/sys/mman.h
+++ b/bsd/sys/cddl/compat/opensolaris/sys/mman.h
@@ -32,6 +32,7 @@
 
 #include_next 
 
+#undef mmap64
 #definemmap64(_a,_b,_c,_d,_e,_f)   mmap(_a,_b,_c,_d,_e,_f)
 
 #endif
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c 
b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index 15b7a59d..ba339ed1 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -134,6 +134,7 @@
 #include 
 
 #include 
+#include 
 
 #ifdef illumos
 #ifndef _KERNEL
@@ -444,6 +445,7 @@ static arc_stats_t arc_stats = {
}   \
}
 
+OSV_LIB_SOLARIS_API
 kstat_t*arc_ksp;
 static arc_state_t *arc_anon;
 static arc_state_t *arc_mru;
@@ -2328,7 +2330,7 @@ arc_flush(spa_t *spa)
ASSERT(spa || arc_eviction_list == NULL);
 }
 
-void
+OSV_LIB_SOLARIS_API void
 arc_shrink(void)
 {
if (arc_c > arc_c_min) {
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index d1732e75..d084e357 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -134,13 +134,9 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
tst-elf-init.so tst-realloc.so tst-setjmp.so \
libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \
-   tst-netlink.so misc-zfs-io.so
+   tst-netlink.so misc-zfs-io.so misc-zfs-arc.so
 #  libstatic-thread-variable.so tst-static-thread-variable.so \
 
-#TODO For now let us disable these tests for aarch64 until
-# we support floating point numbers, TLS and correct syscall handling
-# The tst-ifaddrs.so is an exception and it does not compile due to some
-# missing headers
 ifeq ($(arch),x64)
 tests += tst-mmx-fpu.so
 endif
@@ -222,10 +218,9 @@ tests += $(boost-tests)
 
 solaris-tests := tst-solaris-taskq.so
 
-# FIXME: two of the test below can't compile now because of include path
-# (BSD and OSv header files get mixed up, etc.).
+#FIXME: the misc-zfs-disk.c does not compile due to some header issues
 #zfs-tests := misc-zfs-disk.so misc-zfs-io.so misc-zfs-arc.so
-zfs-tests := misc-zfs-io.so
+zfs-tests := misc-zfs-io.so misc-zfs-arc.so
 solaris-tests += $(zfs-tests)
 
 $(zfs-tests:%=$(out)/tests/%): COMMON+= \
diff --git a/tests/misc-zfs-arc.cc b/tests/misc-zfs-arc.cc
index b24dd56b..24ce2e26 100644
--- a/tests/misc-zfs-arc.cc
+++ b/tests/misc-zfs-arc.cc
@@ -5,9 +5,6 @@
  * BSD license as described in the LICENSE file in the top-level directory.
  */
 
-#include 
-#include 
-#include 
 #include 
 
 #include "stat.hh"
@@ -19,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -31,6 +29,9 @@ typedef u_long ulong_t;
 #include 
 #include 
 
+#include 
+#include 
+
 #define MB (1024 * 1024)
 
 using namespace std;
@@ -52,7 +53,7 @@ struct arc_data {
 uint64_t size;
 };
 
-static mutex_t kstat_map_mutex;
+static pthread_mutex_t kstat_map_mutex;
 static unordered_map kstat_map;
 
 static struct kstat_named *kstat_map_lookup(const char *name)
@@ -86,18 +87,20 @@ static uint64_t *get_kstat_by_name(const kstat_t *ksp, 
const char *name)
 
 assert(ksp && ksp->ks_data);
 
-WITH_LOCK(kstat_map_mutex) {
-knp = kstat_map_lookup(name);
+pthread_mutex_lock(_map_mutex);
+knp = kstat_map_lookup(name);
 
-/* If knp is NULL, kstat_named wasn't found in the hash */
+/* If knp is NULL, kstat_named wasn't found in the hash */
+if (!knp) {
+/* Then do the manual search and insert it into the hash */
+knp = kstat_map_insert(ksp, name);
 if (!knp) {
-/* Then do the manual search and insert it into the hash */
-knp = kstat_map_insert(ksp, name);
-if (!knp) {
-return 0;
-}
+pthread_mutex_unlock(_map_mutex);
+return 0;
 }
 }
+pthread_mutex_unlock(_map_mutex);
+
 assert(knp->data_type == KSTAT_DATA_UINT64);
 
 return &(knp->value.ui64);
@@ -297,7 +300,6 @@ static int run_test(const kstat_t *ksp, int argc, char 
**argv)
 struct arc_data data;
 struct stat st;
 char path[PATH_MAX];
-int ret;
 
 snprintf(path, PATH_MAX, "%s/%s", TESTDIR, argv[0]);
 

[osv-dev] [PATCH] zpool import: do not try devices where ZFS is mounted

2022-07-16 Thread Waldemar Kozaczuk
Signed-off-by: Waldemar Kozaczuk 
---
 .../lib/libzfs/common/libzfs_import.c | 48 +--
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/bsd/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c 
b/bsd/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
index 1f8fe36f..908d4b4d 100644
--- a/bsd/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
+++ b/bsd/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
@@ -1127,6 +1127,7 @@ zpool_clear_label(int fd)
  * poolname or guid (but not both) are provided by the caller when trying
  * to import a specific pool.
  */
+#define MAX_MOUNTED_DEVS 64
 static nvlist_t *
 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 {
@@ -1146,6 +1147,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t 
*iarg)
avl_tree_t slice_cache;
rdsk_node_t *slice;
void *cookie;
+   char *excluded_dev_names[MAX_MOUNTED_DEVS];
+   int excluded_dev_count = 0;
 
if (dirs == 0) {
dirs = 1;
@@ -1229,6 +1232,28 @@ zpool_find_import_impl(libzfs_handle_t *hdl, 
importargs_t *iarg)
}
 #endif
 
+#ifdef __OSV__
+   // Iterate over all mounts and identify the devices we wanted 
to exlude
+   FILE *ent = fopen("/etc/fstab", "r");
+   if (ent) {
+   struct mnttab m;
+   while (getmntent(ent, ) == 0) {
+   if (strcmp("none", m.mnt_special) == 0) {
+   continue;
+   }
+   char *dev_name = 
excluded_dev_names[excluded_dev_count++] = strdup(m.mnt_special + 5);
+   // If the device has a '.' in it it means it 
corresponds to a disk partion
+   // and in this case we should skip the parent 
disk as well as it will make
+   // the pool discovery slow. For example for 
'vblk0.1' exclude 'vblk0' as well but
+   // not 'vblk0.2'
+   char *dot_pos = strchr(dev_name, '.');
+   if (dot_pos) {
+   
excluded_dev_names[excluded_dev_count++] = strndup(dev_name, dot_pos - 
dev_name);
+   }
+   }
+   fclose(ent);
+   }
+#endif
/*
 * This is not MT-safe, but we have no MT consumers of libzfs
 */
@@ -1238,12 +1263,20 @@ zpool_find_import_impl(libzfs_handle_t *hdl, 
importargs_t *iarg)
(name[1] == 0 || (name[1] == '.' && name[2] == 0)))
continue;
 #ifdef __OSV__
-   /* In OSv, mount_zfs_roofs() always mounts /dev/vblk0.1
-* before calling zpool import, so this device is
-* already mounted, and trying to do it again while
-* it is already mounted is surprisingly slow.
+   /* Trying to call zpool import on a device that we
+* have already mounted ZFS root pool from before,
+* is surprisingly slow. So let us try to avoid it
+* by filtering it out using a list of mounted devices
+* identified before in excluded_dev_names.
 */
-   if (!strcmp(name, "vblk0.1"))
+   bool skip_entry = false;
+   for (int i = 0; i < excluded_dev_count; i++) {
+   if (!strcmp(name, excluded_dev_names[i])) {
+   skip_entry = true;
+   break;
+   }
+   }
+   if (skip_entry)
continue;
 #endif
 
@@ -1255,6 +1288,11 @@ zpool_find_import_impl(libzfs_handle_t *hdl, 
importargs_t *iarg)
slice->rn_nozpool = B_FALSE;
avl_add(_cache, slice);
}
+#ifdef __OSV__
+   while (excluded_dev_count) {
+   free(excluded_dev_names[--excluded_dev_count]);
+   }
+#endif
 
 #ifndef __OSV__
 skipdir:
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220717053624.96106-1-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH] tests: add misc-zfs-io to all images

2022-07-16 Thread Waldemar Kozaczuk
Signed-off-by: Waldemar Kozaczuk 
---
 modules/tests/Makefile | 4 ++--
 tests/misc-zfs-io.cc   | 6 --
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index 7d15522c..d1732e75 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -91,7 +91,7 @@ rofs-only-tests := rofs/tst-chdir.so rofs/tst-symlink.so 
rofs/tst-readdir.so \
rofs/tst-concurrent-read.so
 
 zfs-only-tests := tst-readdir.so tst-fallocate.so tst-fs-link.so \
-   tst-concurrent-read.so misc-zfs-io.so tst-solaris-taskq.so
+   tst-concurrent-read.so tst-solaris-taskq.so
 
 specific-fs-tests := $($(fs_type)-only-tests)
 
@@ -134,7 +134,7 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so 
tst-bsd-evh.so \
tst-elf-init.so tst-realloc.so tst-setjmp.so \
libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \
-   tst-netlink.so
+   tst-netlink.so misc-zfs-io.so
 #  libstatic-thread-variable.so tst-static-thread-variable.so \
 
 #TODO For now let us disable these tests for aarch64 until
diff --git a/tests/misc-zfs-io.cc b/tests/misc-zfs-io.cc
index 96c4194d..ba0b81d0 100644
--- a/tests/misc-zfs-io.cc
+++ b/tests/misc-zfs-io.cc
@@ -72,7 +72,7 @@ static void seq_read(int fd, char *buf, unsigned long size, 
unsigned long offset
 
 int main(int argc, char **argv)
 {
-char fpath[64] = "/zfs-io-file";
+const char *fpath = "/zfs-io-file";
 char buf[BUF_SIZE];
 unsigned size;
 int fd;
@@ -90,6 +90,8 @@ int main(int argc, char **argv)
 all_cached = true;
 } else if (!strcmp("--no-unlink", argv[i])) {
 unlink_file = false;
+} else if (!strcmp("--file-path", argv[i]) && (i + 1) < argc) {
+fpath = argv[i + 1];
 }
 }
 
@@ -140,7 +142,7 @@ int main(int argc, char **argv)
 
 close(fd);
 if (unlink_file) {
-unlink("/zfs-io-file");
+unlink(fpath);
 }
 
 return 0;
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220717053540.96073-1-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH] zfs: support building rofs+zfs image and second ZFS disk

2022-07-16 Thread Waldemar Kozaczuk
This patch enhances the build scriots and run.py to allow build the
images as described by #1200:

1. Run OSv from a single disk with two partitions: ROFS + ZFS (on /dev/vblk0.2)
  ./scripts/build image=tests,zfs,zfs-tools fs=rofs_with_zfs fs_size_mb=5000
  ./scripts/run.py --execute='--mount-fs=zfs,/dev/vblk0.2,/data 
/tests/misc-zfs-io.so --random --file-path /data/file'

2. Run OSv with 2 disks: 1st one with ROFS and second one with ZFS 
(/dev/vblk1.1):
  ./scripts/build image=tests,zfs,zfs-tools fs=rofs fs_size_mb=5000 
--create-zfs-disk
  ./scripts/run.py --execute='--mount-fs=zfs,/dev/vblk1.1,/data 
/tests/misc-zfs-io.so --random --file-path /data/file' --second-disk-image 
build/release/zfs_disk.img

Fixes #1200

Signed-off-by: Waldemar Kozaczuk 
---
 scripts/build  | 59 ++
 scripts/export_manifest.py |  7 ++---
 scripts/run.py | 12 
 tools/mkfs/mkfs.cc |  8 +++---
 4 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/scripts/build b/scripts/build
index b31b8172..64a55516 100755
--- a/scripts/build
+++ b/scripts/build
@@ -26,7 +26,8 @@ usage() {
  mode=release|debugSpecify the build mode; default is 
release
  export=none|selected|all  If 'selected' or 'all' export the app 
files to 
  export_dir=  The directory to export the files to; 
default is build/export
- fs=zfs|rofs|ramfs|virtiofsSpecify the filesystem of the image 
partition
+ fs=zfs|rofs|rofs_with_zfs|Specify the filesystem of the image 
partition
+ramfs|virtiofs
  fs_size=N Specify the size of the image in bytes
  fs_size_mb=N  Specify the size of the image in MiB
  app_local_exec_tls_size=N Specify the size of app local TLS in 
bytes; the default is 64
@@ -36,6 +37,7 @@ usage() {
  -j Set number of parallel jobs for make
  --append-manifest Append build//append.manifest to 
usr.manifest
  --create-disk Instead of usr.img create kernel-less 
disk.img
+ --create-zfs-disk Create extra empty disk with ZFS 
filesystem
 
Examples:
  ./scripts/build -j4 fs=rofs image=native-example   # Create image 
with native-example app
@@ -77,7 +79,7 @@ do
case $i in
--help|-h)
usage ;;
-   image=*|modules=*|fs=*|usrskel=*|check|--append-manifest|--create-disk) 
;;
+   
image=*|modules=*|fs=*|usrskel=*|check|--append-manifest|--create-disk|--create-zfs-disk)
 ;;
clean)
stage1_args=clean ;;
arch=*)
@@ -159,6 +161,8 @@ do
vars[append_manifest]="true";;
--create-disk)
vars[create_disk]="true";;
+   --create-zfs-disk)
+   vars[create_zfs_disk]="true";;
esac
 done
 
@@ -195,7 +199,7 @@ usrskel_arg=
 case $fs_type in
 zfs)
;; # Nothing to change here. This is our default behavior
-rofs|virtiofs)
+rofs|rofs_with_zfs|virtiofs)
# Both are read-only (in OSv) and require nothing extra on bootfs to 
work
manifest=bootfs_empty.manifest.skel
usrskel_arg="--usrskel usr_rofs.manifest.skel";;
@@ -293,6 +297,7 @@ cd $OUT
 
 if [ "$export" != "none" ]; then
export_dir=${vars[export_dir]-$SRC/build/export}
+   rm -rf "$export_dir"
"$SRC"/scripts/export_manifest.py -e "$export_dir" -m usr.manifest -D 
libgcc_s_dir="$libgcc_s_dir"
 fi
 
@@ -314,6 +319,7 @@ create_zfs_disk() {
qemu-img convert -f raw -O qcow2 $raw_disk.raw $qcow2_disk.img
qemu-img resize $qcow2_disk.img ${image_size}b >/dev/null 2>&1
"$SRC"/scripts/upload_manifest.py --arch=$arch -o $qcow2_disk.img -m 
usr.manifest -D libgcc_s_dir="$libgcc_s_dir"
+   #"$SRC"/scripts/zfs-image-on-host.sh build $qcow2_disk.img 
$partition_offset osv zfs
 }
 
 create_rofs_disk() {
@@ -324,6 +330,22 @@ create_rofs_disk() {
qemu-img convert -f raw -O qcow2 $raw_disk.raw $qcow2_disk.img
 }
 
+create_zfs_filesystem() {
+   local image_path=$1
+   local device_path=$2
+   local qemu_arch=$arch
+   if [[ "$qemu_arch" == 'aarch64' ]]; then
+   console=''
+   zfs_builder_name='zfs_builder.img'
+   else
+   qemu_arch='x86_64'
+   console='--console=serial'
+   zfs_builder_name='zfs_builder-stripped.elf'
+   fi
+   "$SRC"/scripts/run.py -k --kernel-path $zfs_builder_name 
--arch=$qemu_arch --vnc none -m 512 -c1 -i ${image_path} \
+   --block-device-cache unsafe -s -e "${console} --norandom 
--nomount --noinit --preload-zfs-library /tools/mkfs.so ${device_path}; /zfs.so 
set compression=off osv"
+}
+
 if [[ "$arch" == 'aarch64' ]]; then
export STRIP=${CROSS_PREFIX:-aarch64-linux-gnu-}strip
 fi
@@ -332,13 +354,27 @@ case $fs_type in
 zfs)
partition_size=$((fs_size 

[osv-dev] [PATCH] vfs: support mounting ZFS from non-root partition

2022-07-16 Thread Waldemar Kozaczuk
This patch mainly enhances VFS layer to support dynamically loading
the ZFS library libsolaris.so from other root filesystem like ROFS and
mounting ZFS filesystem from devices different than /dev/vblk0.1. The
supported scenarios include specifying a loader option '--mount-fs' or
adding an entry to /etc/fstab.

In this patch we take advantage of the existing logic in VFS
pivot_rootfs() that implicitly loads shared libraries from the directory 
/usr/lib/fs
which is where we place libsolaris.so in the image. This was done as
part of the commit 4ffb0fa9329849cd587d62f91b6979bc0e0ce6d1 to support
dynamically loading NFS filesystem library. To support similar scenario
with ZFS we need to on top of this enhance the mount_fs() to detect
ZFS case and call zfsdev_init() and initialize BSD shrinker. We also
enhance unmount_rootfs() to make it unmount ZFS from non-root mount
points.

This patch also add new module - zfs - which is intended to be used when
building two types of images as described by #1200. Please note the next
patch will enhance the build script to support building such images.

1. Run OSv from a single disk with two partitions: ROFS + ZFS (on /dev/vblk0.2)
  ./scripts/run.py --execute='--mount-fs=zfs,/dev/vblk0.2,/data /zpool.so list'

2. Run OSv with 2 disks: 1st one with ROFS and second one with ZFS 
(/dev/vblk1.1):
  ./scripts/run.py --execute='--mount-fs=zfs,/dev/vblk1.1,/data /zpool.so list' 
--second-disk-image build/release/zfs_disk.img

Refs #1200

Signed-off-by: Waldemar Kozaczuk 
---
 Makefile   |  2 +-
 bootfs.manifest.skel   |  2 +-
 drivers/zfs.cc | 12 -
 exported_symbols/osv_libsolaris.so.symbols |  1 +
 fs/vfs/main.cc | 52 +++---
 fs/zfs/zfs_initialize.c|  6 +++
 fs/zfs/zfs_null_vfsops.cc  |  5 ++-
 loader.cc  | 34 ++
 modules/zfs-tools/usr.manifest |  1 -
 modules/zfs/usr.manifest   |  2 +
 scripts/upload_manifest.py |  2 +-
 usr_rofs.manifest.skel |  1 +
 zfs_builder_bootfs.manifest.skel   |  2 +-
 13 files changed, 100 insertions(+), 22 deletions(-)
 create mode 100644 modules/zfs/usr.manifest

diff --git a/Makefile b/Makefile
index 6ac9c792..0625524b 100644
--- a/Makefile
+++ b/Makefile
@@ -2383,7 +2383,7 @@ 
$(out)/bsd/cddl/contrib/opensolaris/lib/libzfs/common/zprop_common.o: bsd/sys/cd
 
 $(out)/libzfs.so: $(libzfs-objects) $(out)/libuutil.so $(out)/libsolaris.so
$(makedir)
-   $(call quiet, $(CC) $(CFLAGS) -o $@ $(libzfs-objects) -L$(out) -luutil 
-lsolaris, LINK libzfs.so)
+   $(call quiet, $(CC) $(CFLAGS) -o $@ $(libzfs-objects) -L$(out) -luutil, 
LINK libzfs.so)
 
 #include $(src)/bsd/cddl/contrib/opensolaris/cmd/zpool/build.mk:
 zpool-cmd-file-list = zpool_iter  zpool_main  zpool_util  zpool_vdev
diff --git a/bootfs.manifest.skel b/bootfs.manifest.skel
index bab4c606..5e71fb3b 100644
--- a/bootfs.manifest.skel
+++ b/bootfs.manifest.skel
@@ -1,2 +1,2 @@
 [manifest]
-/libsolaris.so: libsolaris.so
+/usr/lib/fs/libsolaris.so: libsolaris.so
diff --git a/drivers/zfs.cc b/drivers/zfs.cc
index fb335340..02c011ac 100644
--- a/drivers/zfs.cc
+++ b/drivers/zfs.cc
@@ -61,16 +61,24 @@ zfs_device::~zfs_device()
 device_destroy(_zfs_dev);
 }
 
+static bool zfsdev_initialized = false;
+
 void zfsdev_init(void)
 {
-new zfs_device();
+if (!zfsdev_initialized) {
+new zfs_device();
+zfsdev_initialized = true;
+}
 }
 
 }
 
 extern "C" OSV_LIBSOLARIS_API void zfsdev_init()
 {
-new zfsdev::zfs_device();
+if (!zfsdev::zfsdev_initialized) {
+new zfsdev::zfs_device();
+zfsdev::zfsdev_initialized = true;
+}
 }
 
 
diff --git a/exported_symbols/osv_libsolaris.so.symbols 
b/exported_symbols/osv_libsolaris.so.symbols
index c115cb02..9b959675 100644
--- a/exported_symbols/osv_libsolaris.so.symbols
+++ b/exported_symbols/osv_libsolaris.so.symbols
@@ -91,4 +91,5 @@ vrele
 vttoif_tab
 wakeup
 zfsdev_init
+zfs_driver_initialized
 zfs_update_vfsops
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
index 8fa99b00..df2becbe 100644
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -62,6 +62,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "vfs.h"
@@ -83,6 +84,9 @@
 #include 
 #include 
 
+#include "drivers/zfs.hh"
+#include "bsd/porting/shrinker.h"
+
 using namespace std;
 
 
@@ -2493,6 +2497,18 @@ static void mount_fs(mntent *m)
 return;
 }
 
+bool zfs = strcmp(m->mnt_type, "zfs") == 0;
+if (zfs) {
+// Ignore if ZFS root pool is already mounted because we can only have 
one root pool
+std::vector mounts = osv::current_mounts();
+for (auto  : mounts) {
+if (mount.type == "zfs" && mount.special.rfind("/dev")) {
+kprintf("ZFS root pool is already mounted at %s\n", 
m->mnt_dir);
+  

[osv-dev] [PATCH] devfs: print details of mounted partition

2022-07-16 Thread Waldemar Kozaczuk
Print information about the partition and offset child device is created
for. This is very helpful to understand the process of mounting the
filesystems.

Signed-off-by: Waldemar Kozaczuk 
---
 fs/devfs/device.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/devfs/device.cc b/fs/devfs/device.cc
index 3a5c9f7c..4730f411 100644
--- a/fs/devfs/device.cc
+++ b/fs/devfs/device.cc
@@ -142,6 +142,8 @@ void read_partition_table(struct device *dev)
new_dev->max_io_size = dev->max_io_size;
new_dev->private_data = dev->private_data;
device_set_softc(new_dev, device_get_softc(dev));
+
+   kprintf("devfs: created device %s for a partition at offset:%ld 
with size:%ld\n", dev_name, new_dev->offset, new_dev->size);
}
 
sched_unlock();
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220717043714.95626-1-jwkozaczuk%40gmail.com.


[osv-dev] [PATCH] zfs: refactor loader to handle loading libsolaris.so

2022-07-16 Thread Waldemar Kozaczuk
This patch refactors the code that loads libsolaris.so to mount ZFS
filesystem by extracting the common code into the
load_zfs_library_and_mount_zfs_root() function. This will help us
enhance the loader and VFS code to support mounting ZFS filesystem
from devices different than /dev/vblk0.1.

Refs #1200

Signed-off-by: Waldemar Kozaczuk 
---
 loader.cc | 61 ---
 1 file changed, 27 insertions(+), 34 deletions(-)

diff --git a/loader.cc b/loader.cc
index ee05033b..3db560e1 100644
--- a/loader.cc
+++ b/loader.cc
@@ -406,6 +406,31 @@ static void stop_all_remaining_app_threads()
 }
 }
 
+static void load_zfs_library_and_mount_zfs_root(const char* mount_error_msg, 
bool pivot_when_error = false)
+{
+// Load and initialize ZFS filesystem driver implemented in libsolaris.so
+const auto libsolaris_file_name = "libsolaris.so";
+//TODO: Consider calling dlclose() somewhere after ZFS is unmounted
+if (dlopen(libsolaris_file_name, RTLD_LAZY)) {
+zfsdev::zfsdev_init();
+auto error = mount_zfs_rootfs(opt_pivot, opt_extra_zfs_pools);
+if (error) {
+debug(mount_error_msg);
+if (pivot_when_error) {
+// Continue with ramfs (already mounted)
+// TODO: Avoid the hack of using pivot_rootfs() just for
+// mounting the fstab entries.
+pivot_rootfs("/");
+}
+} else {
+bsd_shrinker_init();
+boot_time.event("ZFS mounted");
+}
+} else {
+debug("Could not load and/or initialize %s.\n", libsolaris_file_name);
+}
+}
+
 void* do_main_thread(void *_main_args)
 {
 auto app_cmdline = static_cast(_main_args);
@@ -424,7 +449,6 @@ void* do_main_thread(void *_main_args)
 if (opt_mount) {
 unmount_devfs();
 
-const auto libsolaris_file_name = "libsolaris.so";
 if (opt_rootfs.compare("rofs") == 0) {
 auto error = mount_rofs_rootfs(opt_pivot);
 if (error) {
@@ -437,20 +461,7 @@ void* do_main_thread(void *_main_args)
 }
 boot_time.event("ROFS mounted");
 } else if (opt_rootfs.compare("zfs") == 0) {
-//Initialize ZFS filesystem driver implemented in libsolaris.so
-//TODO: Consider calling dlclose() somewhere after ZFS is unmounted
-if (dlopen(libsolaris_file_name, RTLD_LAZY)) {
-zfsdev::zfsdev_init();
-auto error = mount_zfs_rootfs(opt_pivot, opt_extra_zfs_pools);
-if (error) {
-debug("Could not mount zfs root filesystem.\n");
-}
-
-bsd_shrinker_init();
-boot_time.event("ZFS mounted");
-} else {
-debug("Could not load and/or initialize %s.\n", 
libsolaris_file_name);
-}
+load_zfs_library_and_mount_zfs_root("Could not mount zfs root 
filesystem.\n");
 } else if (opt_rootfs.compare("ramfs") == 0) {
 // NOTE: The ramfs is already mounted, we just need to mount fstab
 // entries. That's the only difference between this and --nomount.
@@ -476,25 +487,7 @@ void* do_main_thread(void *_main_args)
 } else if (mount_virtiofs_rootfs(opt_pivot) == 0) {
 boot_time.event("Virtio-fs mounted");
 } else {
-//Initialize ZFS filesystem driver implemented in libsolaris.so
-//TODO: Consider calling dlclose() somewhere after ZFS is 
unmounted
-if (dlopen("libsolaris.so", RTLD_LAZY)) {
-zfsdev::zfsdev_init();
-auto error = mount_zfs_rootfs(opt_pivot, 
opt_extra_zfs_pools);
-if (error) {
-debug("Could not mount zfs root filesystem (while "
-  "auto-discovering).\n");
-// Continue with ramfs (already mounted)
-// TODO: Avoid the hack of using pivot_rootfs() just 
for
-// mounting the fstab entries.
-pivot_rootfs("/");
-} else {
-bsd_shrinker_init();
-boot_time.event("ZFS mounted");
-}
-} else {
-debug("Could not load and/or initialize %s.\n", 
libsolaris_file_name);
-}
+load_zfs_library_and_mount_zfs_root("Could not mount zfs root 
filesystem (while auto-discovering).\n", true);
 }
 }
 }
-- 
2.35.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 

[osv-dev] [PATCH] zfs: use spa_dev_path instead of defaulting to /dev/vblk0.1

2022-07-16 Thread Waldemar Kozaczuk
The commit c9640a385c44704626a9169c03cff0752bfe764d addressing the issue
#918, tweaked the vdev disk mounting logic to default to import the root
pool from the device /dev/vblk0.1. This was really a hack that was
satisfactory to support mounting a ZFS image created or modified on host.

However, if we want to be able to import root pool and mount ZFS
filesystem from arbitrary device and partition like /dev/vblk0.2 or
/deb/vblk1.1, we have to pass the specific device path to all places
in ZFS code where it references it. There are 4 code paths that end up
calling vdev_alloc() but unfortunately changing all relevant functions
and its callers to pass the device path would be quite untenable.

So instead, this patch adds new field spa_dev_path to the spa structure
that holds the information about the Storage Pool Allocator in memory.
This new field is set to point to the device we want to import the ZFS
root pool from in spa_import_rootpool() function called by ZFS mount
disk process and then used by vdev_alloc() downstream.

Refs #1200

Signed-off-by: Waldemar Kozaczuk 
---
 bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c  | 1 +
 .../cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c | 1 +
 .../contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h  | 1 +
 bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c | 8 ++--
 .../contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c | 3 ---
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c 
b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index 6cee8352..70f0c5b1 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -4206,6 +4206,7 @@ spa_import_rootpool(const char *name)
}
spa->spa_is_root = B_TRUE;
spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
+   spa->spa_dev_path = name;
 
/*
 * Build up a vdev tree based on the boot device's label config.
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c 
b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index 2ea8b577..b61c308c 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -464,6 +464,7 @@ spa_add(const char *name, nvlist_t *config, const char 
*altroot)
spa->spa_load_max_txg = UINT64_MAX;
spa->spa_proc = 
spa->spa_proc_state = SPA_PROC_NONE;
+   spa->spa_dev_path = NULL;
 
refcount_create(>spa_refcount);
spa_config_lock_init(spa);
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h 
b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
index 043370e4..dfd0fc89 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
@@ -243,6 +243,7 @@ struct spa {
 #ifndef sun
boolean_t   spa_splitting_newspa;   /* creating new spa in split */
 #endif
+   const char  *spa_dev_path;  /* device spa is mounted */
 };
 
 extern const char *spa_config_path;
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c 
b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
index 2a265f7a..9dc2278b 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
@@ -442,8 +442,12 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t 
*parent, uint_t id,
vd->vdev_islog = islog;
vd->vdev_nparity = nparity;
 
-   if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, >vdev_path) == 0)
-   vd->vdev_path = spa_strdup(vd->vdev_path);
+   if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, >vdev_path) == 0) {
+   if (spa->spa_dev_path)
+   vd->vdev_path = strdup(spa->spa_dev_path);
+   else
+   vd->vdev_path = spa_strdup(vd->vdev_path);
+   }
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, >vdev_devid) == 0)
vd->vdev_devid = spa_strdup(vd->vdev_devid);
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
diff --git a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c 
b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
index 3d0d6324..650c969c 100644
--- a/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
+++ b/bsd/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
@@ -74,9 +74,6 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t 
*max_psize,
dvd = vd->vdev_tsd = kmem_zalloc(sizeof(struct vdev_disk), 
KM_SLEEP);
 
device_name = vd->vdev_path + 5;
-   if (strncmp(device_name, "vblk", 4) != 0) {
-   device_name = "vblk0.1";
-   }
error =