From: Waldemar Kozaczuk <jwkozac...@gmail.com>
Committer: Waldemar Kozaczuk <jwkozac...@gmail.com>
Branch: master

vfs: support mounting ZFS from non-root partition

This patch mainly enhances VFS layer to support dynamically loading
the ZFS library libsolaris.so from other root filesystem like ROFS and
mounting ZFS filesystem from devices different than /dev/vblk0.1. The
supported scenarios include specifying a loader option '--mount-fs' or
adding an entry to /etc/fstab.

In this patch we take advantage of the existing logic in VFS
pivot_rootfs() that implicitly loads shared libraries from the directory 
/usr/lib/fs
which is where we place libsolaris.so in the image. This was done as
part of the commit 4ffb0fa9329849cd587d62f91b6979bc0e0ce6d1 to support
dynamically loading NFS filesystem library. To support similar scenario
with ZFS we need to on top of this enhance the mount_fs() to detect
ZFS case and call zfsdev_init() and initialize BSD shrinker. We also
enhance unmount_rootfs() to make it unmount ZFS from non-root mount
points.

This patch also add new module - zfs - which is intended to be used when
building two types of images as described by #1200. Please note the next
patch will enhance the build script to support building such images.

1. Run OSv from a single disk with two partitions: ROFS + ZFS (on /dev/vblk0.2)
  ./scripts/run.py --execute='--mount-fs=zfs,/dev/vblk0.2,/data /zpool.so list'

2. Run OSv with 2 disks: 1st one with ROFS and second one with ZFS 
(/dev/vblk1.1):
  ./scripts/run.py --execute='--mount-fs=zfs,/dev/vblk1.1,/data /zpool.so list' 
--second-disk-image build/release/zfs_disk.img

Refs #1200

Signed-off-by: Waldemar Kozaczuk <jwkozac...@gmail.com>

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -2383,7 +2383,7 @@ 
$(out)/bsd/cddl/contrib/opensolaris/lib/libzfs/common/zprop_common.o: bsd/sys/cd
 
 $(out)/libzfs.so: $(libzfs-objects) $(out)/libuutil.so $(out)/libsolaris.so
        $(makedir)
-       $(call quiet, $(CC) $(CFLAGS) -o $@ $(libzfs-objects) -L$(out) -luutil 
-lsolaris, LINK libzfs.so)
+       $(call quiet, $(CC) $(CFLAGS) -o $@ $(libzfs-objects) -L$(out) -luutil, 
LINK libzfs.so)
 
 #include $(src)/bsd/cddl/contrib/opensolaris/cmd/zpool/build.mk:
 zpool-cmd-file-list = zpool_iter  zpool_main  zpool_util  zpool_vdev
diff --git a/bootfs.manifest.skel b/bootfs.manifest.skel
--- a/bootfs.manifest.skel
+++ b/bootfs.manifest.skel
@@ -1,2 +1,2 @@
 [manifest]
-/libsolaris.so: libsolaris.so
+/usr/lib/fs/libsolaris.so: libsolaris.so
diff --git a/drivers/zfs.cc b/drivers/zfs.cc
--- a/drivers/zfs.cc
+++ b/drivers/zfs.cc
@@ -61,16 +61,24 @@ zfs_device::~zfs_device()
     device_destroy(_zfs_dev);
 }
 
+static bool zfsdev_initialized = false;
+
 void zfsdev_init(void)
 {
-    new zfs_device();
+    if (!zfsdev_initialized) {
+        new zfs_device();
+        zfsdev_initialized = true;
+    }
 }
 
 }
 
 extern "C" OSV_LIBSOLARIS_API void zfsdev_init()
 {
-    new zfsdev::zfs_device();
+    if (!zfsdev::zfsdev_initialized) {
+        new zfsdev::zfs_device();
+        zfsdev::zfsdev_initialized = true;
+    }
 }
 
 
diff --git a/exported_symbols/osv_libsolaris.so.symbols 
b/exported_symbols/osv_libsolaris.so.symbols
--- a/exported_symbols/osv_libsolaris.so.symbols
+++ b/exported_symbols/osv_libsolaris.so.symbols
@@ -91,4 +91,5 @@ vrele
 vttoif_tab
 wakeup
 zfsdev_init
+zfs_driver_initialized
 zfs_update_vfsops
diff --git a/fs/vfs/main.cc b/fs/vfs/main.cc
--- a/fs/vfs/main.cc
+++ b/fs/vfs/main.cc
@@ -62,6 +62,7 @@
 #include <osv/ioctl.h>
 #include <osv/trace.hh>
 #include <osv/run.hh>
+#include <osv/mount.h>
 #include <drivers/console.hh>
 
 #include "vfs.h"
@@ -83,6 +84,9 @@
 #include <api/utime.h>
 #include <chrono>
 
+#include "drivers/zfs.hh"
+#include "bsd/porting/shrinker.h"
+
 using namespace std;
 
 
@@ -2493,6 +2497,18 @@ static void mount_fs(mntent *m)
         return;
     }
 
+    bool zfs = strcmp(m->mnt_type, "zfs") == 0;
+    if (zfs) {
+        // Ignore if ZFS root pool is already mounted because we can only have 
one root pool
+        std::vector<osv::mount_desc> mounts = osv::current_mounts();
+        for (auto &mount : mounts) {
+            if (mount.type == "zfs" && mount.special.rfind("/dev")) {
+                kprintf("ZFS root pool is already mounted at %s\n", 
m->mnt_dir);
+                return;
+            }
+        }
+    }
+
     auto mount_dir = opendir(m->mnt_dir);
     if (!mount_dir) {
         if (mkdir(m->mnt_dir, 0755) < 0) {
@@ -2505,14 +2521,23 @@ static void mount_fs(mntent *m)
         closedir(mount_dir);
     }
 
-    if ((m->mnt_opts != nullptr) && strcmp(m->mnt_opts, MNTOPT_DEFAULTS)) {
-        printf("Warning: opts %s, ignored for fs %s\n", m->mnt_opts, 
m->mnt_type);
+    if (zfs) {
+        m->mnt_opts = "osv/zfs";
+    } else {
+        if ((m->mnt_opts != nullptr) && strcmp(m->mnt_opts, MNTOPT_DEFAULTS)) {
+            printf("Warning: opts %s, ignored for fs %s\n", m->mnt_opts, 
m->mnt_type);
+        }
+        m->mnt_opts = nullptr;
     }
 
-    // FIXME: Right now, ignoring mntops. In the future we may have an option 
parser
-    auto ret = sys_mount(m->mnt_fsname, m->mnt_dir, m->mnt_type, 0, nullptr);
+    // FIXME: Right now, ignoring mntops except for ZFS. In the future we may 
have an option parser
+    auto ret = sys_mount(m->mnt_fsname, m->mnt_dir, m->mnt_type, 0, 
(void*)m->mnt_opts);
     if (ret) {
         printf("failed to mount %s, error = %s\n", m->mnt_type, strerror(ret));
+    } else {
+        if (zfs) {
+            bsd_shrinker_init();
+        }
     }
 }
 
@@ -2531,8 +2556,12 @@ extern "C" void pivot_rootfs(const char* path)
             if (len >= 3 && strcmp(dirent->d_name + (len - 3), ".so") == 0) {
                 auto lib_path = std::string("/usr/lib/fs/") + dirent->d_name;
                 auto module = dlopen(lib_path.c_str(), RTLD_LAZY);
-                if (module)
-                    debugf("VFS: Initialized filesystem library: %s\n", 
lib_path.c_str());
+                if (module) {
+                    if (strcmp(dirent->d_name, "libsolaris.so") == 0) {
+                        zfsdev::zfsdev_init();
+                    }
+                    debugf("VFS: initialized filesystem library: %s\n", 
lib_path.c_str());
+                }
             }
         }
 
@@ -2647,6 +2676,17 @@ extern "C" void unmount_rootfs(void)
             "error = %s\n", strerror(ret));
     }
 
+    std::vector<osv::mount_desc> mounts = osv::current_mounts();
+    for (auto &m : mounts) {
+        if (m.type == "zfs" && m.special.rfind("/dev") == 0 && m.path != "/") {
+            ret = sys_umount2(m.path.c_str(), MNT_FORCE);
+            if (ret) {
+                kprintf("Warning: unmount_rootfs: failed to unmount %s, "
+                    "error = %s\n", m.path.c_str(), strerror(ret));
+            }
+        }
+    }
+
     ret = sys_umount2("/", MNT_FORCE);
     if (ret) {
         kprintf("Warning: unmount_rootfs: failed to unmount /, "
diff --git a/fs/zfs/zfs_initialize.c b/fs/zfs/zfs_initialize.c
--- a/fs/zfs/zfs_initialize.c
+++ b/fs/zfs/zfs_initialize.c
@@ -52,11 +52,16 @@ extern void zfs_update_vfsops(struct vfsops* _vfsops);
 extern void start_pagecache_access_scanner();
 
 extern int zfs_init(void);
+extern bool zfs_driver_initialized;
 
 //This init function gets called on loading of libsolaris.so
 //and it initializes all necessary resources (threads, etc) used by the code in
 //libsolaris.so. This initialization is necessary before ZFS can be mounted.
 void __attribute__((constructor)) zfs_initialize(void) {
+    if (zfs_driver_initialized) {
+        debug("zfs: driver has been ALREADY initialized!\n");
+        return;
+    }
     // These 3 functions used to be called at the end of bsd_init()
     // and are intended to initialize various resources, mainly thread pools
     // (threads named 'system_taskq_*' and 'solthread-0x*')
@@ -85,6 +90,7 @@ void __attribute__((constructor)) zfs_initialize(void) {
     //functions in the kernel
     zfs_init();
 
+    zfs_driver_initialized = true;
     debug("zfs: driver has been initialized!\n");
 }
 
diff --git a/fs/zfs/zfs_null_vfsops.cc b/fs/zfs/zfs_null_vfsops.cc
--- a/fs/zfs/zfs_null_vfsops.cc
+++ b/fs/zfs/zfs_null_vfsops.cc
@@ -36,10 +36,13 @@ struct vfsops zfs_vfsops = {
     nullptr,        /* vnops */
 };
 
-extern "C" int zfs_init(void)
+extern "C" {
+OSV_LIBSOLARIS_API bool zfs_driver_initialized = false;
+int zfs_init(void)
 {
     return 0;
 }
+}
 
 //Normally (without ZFS enabled) the zfs_vfsops points to dummy
 //noop functions. So when libsolaris.so is loaded, we provide the
diff --git a/loader.cc b/loader.cc
--- a/loader.cc
+++ b/loader.cc
@@ -143,6 +143,7 @@ int main(int loader_argc, char **loader_argv)
     sched::init([=] { main_cont(loader_argc, loader_argv); });
 }
 
+static bool opt_preload_zfs_library = false;
 static bool opt_extra_zfs_pools = false;
 static bool opt_disable_rofs_cache = false;
 static bool opt_leak = false;
@@ -202,7 +203,8 @@ static void usage()
     std::cout << "  --disable_rofs_cache  disable ROFS memory cache\n";
     std::cout << "  --nopci               disable PCI enumeration\n";
     std::cout << "  --extra-zfs-pools     import extra ZFS pools\n";
-    std::cout << "  --mount-fs=arg        mount extra filesystem, 
format:<fs_type,url,path>\n\n";
+    std::cout << "  --mount-fs=arg        mount extra filesystem, 
format:<fs_type,url,path>\n";
+    std::cout << "  --preload-zfs-library preload ZFS library from 
/usr/lib/fs\n\n";
 }
 
 static void handle_parse_error(const std::string &message)
@@ -233,6 +235,10 @@ static void parse_options(int loader_argc, char** 
loader_argv)
         opt_disable_rofs_cache = true;
     }
 
+    if (extract_option_flag(options_values, "preload-zfs-library")) {
+        opt_preload_zfs_library = true;
+    }
+
     if (extract_option_flag(options_values, "extra-zfs-pools")) {
         opt_extra_zfs_pools = true;
     }
@@ -406,12 +412,22 @@ static void stop_all_remaining_app_threads()
     }
 }
 
-static void load_zfs_library_and_mount_zfs_root(const char* mount_error_msg, 
bool pivot_when_error = false)
+static void load_zfs_library(std::function<void()> on_load_fun = nullptr)
 {
     // Load and initialize ZFS filesystem driver implemented in libsolaris.so
-    const auto libsolaris_file_name = "libsolaris.so";
-    //TODO: Consider calling dlclose() somewhere after ZFS is unmounted
-    if (dlopen(libsolaris_file_name, RTLD_LAZY)) {
+    const auto libsolaris_path = "/usr/lib/fs/libsolaris.so";
+    if (dlopen(libsolaris_path, RTLD_LAZY)) {
+        if (on_load_fun) {
+           on_load_fun();
+        }
+    } else {
+        debug("Could not load and/or initialize %s.\n", libsolaris_path);
+    }
+}
+
+static void load_zfs_library_and_mount_zfs_root(const char* mount_error_msg, 
bool pivot_when_error = false)
+{
+    load_zfs_library([mount_error_msg, pivot_when_error]() {
         zfsdev::zfsdev_init();
         auto error = mount_zfs_rootfs(opt_pivot, opt_extra_zfs_pools);
         if (error) {
@@ -426,9 +442,7 @@ static void load_zfs_library_and_mount_zfs_root(const char* 
mount_error_msg, boo
             bsd_shrinker_init();
             boot_time.event("ZFS mounted");
         }
-    } else {
-        debug("Could not load and/or initialize %s.\n", libsolaris_file_name);
-    }
+    });
 }
 
 void* do_main_thread(void *_main_args)
@@ -492,6 +506,10 @@ void* do_main_thread(void *_main_args)
         }
     }
 
+    if (opt_preload_zfs_library) {
+        load_zfs_library();
+    }
+
     bool has_if = false;
     osv::for_each_if([&has_if] (std::string if_name) {
         if (if_name == "lo0")
diff --git a/modules/zfs-tools/usr.manifest b/modules/zfs-tools/usr.manifest
--- a/modules/zfs-tools/usr.manifest
+++ b/modules/zfs-tools/usr.manifest
@@ -2,4 +2,3 @@
 /zpool.so: zpool.so
 /libzfs.so: libzfs.so
 /libuutil.so: libuutil.so
-/libsolaris.so: libsolaris.so
diff --git a/modules/zfs/usr.manifest b/modules/zfs/usr.manifest
--- a/modules/zfs/usr.manifest
+++ b/modules/zfs/usr.manifest
@@ -0,0 +1,2 @@
+[manifest]
+/usr/lib/fs/libsolaris.so: libsolaris.so
diff --git a/scripts/upload_manifest.py b/scripts/upload_manifest.py
--- a/scripts/upload_manifest.py
+++ b/scripts/upload_manifest.py
@@ -164,7 +164,7 @@ def main():
         console = '--console=serial'
         zfs_builder_name = 'zfs_builder-stripped.elf'
 
-    osv = subprocess.Popen('cd ../..; scripts/run.py -k --kernel-path 
build/release/%s --arch=%s --vnc none -m 512 -c1 -i "%s" --block-device-cache 
unsafe -s -e "%s --norandom --nomount --noinit /tools/mkfs.so; /tools/cpiod.so 
--prefix /zfs/zfs/; /zfs.so set compression=off osv" --forward 
tcp:127.0.0.1:%s-:10000' % 
(zfs_builder_name,arch,image_path,console,upload_port), shell=True, 
stdout=subprocess.PIPE)
+    osv = subprocess.Popen('cd ../..; scripts/run.py -k --kernel-path 
build/release/%s --arch=%s --vnc none -m 512 -c1 -i "%s" --block-device-cache 
unsafe -s -e "%s --norandom --nomount --noinit --preload-zfs-library 
/tools/mkfs.so; /tools/cpiod.so --prefix /zfs/zfs/; /zfs.so set compression=off 
osv" --forward tcp:127.0.0.1:%s-:10000' % 
(zfs_builder_name,arch,image_path,console,upload_port), shell=True, 
stdout=subprocess.PIPE)
 
     upload(osv, manifest, depends, upload_port)
 
diff --git a/usr_rofs.manifest.skel b/usr_rofs.manifest.skel
--- a/usr_rofs.manifest.skel
+++ b/usr_rofs.manifest.skel
@@ -12,3 +12,4 @@
 /proc: ../../static
 /sys: ../../static
 /tmp: ../../static
+/data: ../../static
diff --git a/zfs_builder_bootfs.manifest.skel b/zfs_builder_bootfs.manifest.skel
--- a/zfs_builder_bootfs.manifest.skel
+++ b/zfs_builder_bootfs.manifest.skel
@@ -2,8 +2,8 @@
 /libvdso.so: libvdso.so
 /libuutil.so: libuutil.so
 /zpool.so: zpool.so
+/usr/lib/fs/libsolaris.so: libsolaris.so
 /libzfs.so: libzfs.so
-/libsolaris.so: libsolaris.so
 /zfs.so: zfs.so
 /tools/mkfs.so: tools/mkfs/mkfs.so
 /tools/cpiod.so: tools/cpiod/cpiod.so

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to osv-dev+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/000000000000f0b11a05e40176a6%40google.com.

Reply via email to