The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/8136

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
Avoid disk device cleanup issues due to container mounts still being active.
From bf653a26f2c1251e67566736c841d894eaf8fed9 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Tue, 10 Nov 2020 12:21:46 +0000
Subject: [PATCH 1/2] lxd/instance/drivers/driver/lxc: Stop devices in two
 phases

- Continue stopping NICs in the OnStopNS hook.
- Stop all other devices in the OnStop hook after the container has been fully 
stopped.

This is to prevent issues unmounting disk devices when the container hasn't 
been stopped yet.

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/instance/drivers/driver_lxc.go | 53 ++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/lxd/instance/drivers/driver_lxc.go 
b/lxd/instance/drivers/driver_lxc.go
index faf251a23e..282c4937b2 100644
--- a/lxd/instance/drivers/driver_lxc.go
+++ b/lxd/instance/drivers/driver_lxc.go
@@ -1578,7 +1578,9 @@ func (c *lxc) deviceUpdate(deviceName string, rawConfig 
deviceConfig.Device, old
 }
 
 // deviceStop loads a new device and calls its Stop() function.
-func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, 
stopHookNetnsPath string) error {
+// Accepts a stopHookNetnsPath argument which is required when run from the 
onStopNS hook before the
+// container's network namespace is unmounted (which is required for NIC 
device cleanup).
+func (c *lxc) deviceStop(deviceName string, rawConfig deviceConfig.Device, 
instanceRunning bool, stopHookNetnsPath string) error {
        logger := logging.AddContext(logger.Log, log.Ctx{"device": deviceName, 
"type": rawConfig["type"], "project": c.Project(), "instance": c.Name()})
        logger.Debug("Stopping device")
 
@@ -1604,7 +1606,7 @@ func (c *lxc) deviceStop(deviceName string, rawConfig 
deviceConfig.Device, stopH
        canHotPlug, _ := d.CanHotPlug()
 
        // An empty netns path means we haven't been called from the LXC stop 
hook, so are running.
-       if stopHookNetnsPath == "" && !canHotPlug {
+       if instanceRunning && !canHotPlug {
                return fmt.Errorf("Device cannot be stopped when container is 
running")
        }
 
@@ -1616,14 +1618,14 @@ func (c *lxc) deviceStop(deviceName string, rawConfig 
deviceConfig.Device, stopH
        if runConf != nil {
                // If network interface settings returned, then detach NIC from 
container.
                if len(runConf.NetworkInterface) > 0 {
-                       err = c.deviceDetachNIC(configCopy, 
runConf.NetworkInterface, stopHookNetnsPath)
+                       err = c.deviceDetachNIC(configCopy, 
runConf.NetworkInterface, instanceRunning, stopHookNetnsPath)
                        if err != nil {
                                return err
                        }
                }
 
                // Add cgroup rules if requested and container is running.
-               if len(runConf.CGroups) > 0 && stopHookNetnsPath == "" {
+               if len(runConf.CGroups) > 0 && instanceRunning {
                        err = c.deviceAddCgroupRules(runConf.CGroups)
                        if err != nil {
                                return err
@@ -1631,7 +1633,7 @@ func (c *lxc) deviceStop(deviceName string, rawConfig 
deviceConfig.Device, stopH
                }
 
                // Detach mounts if requested and container is running.
-               if len(runConf.Mounts) > 0 && stopHookNetnsPath == "" {
+               if len(runConf.Mounts) > 0 && instanceRunning {
                        err = c.deviceHandleMounts(runConf.Mounts)
                        if err != nil {
                                return err
@@ -1649,7 +1651,9 @@ func (c *lxc) deviceStop(deviceName string, rawConfig 
deviceConfig.Device, stopH
 }
 
 // deviceDetachNIC detaches a NIC device from a container.
-func (c *lxc) deviceDetachNIC(configCopy map[string]string, netIF 
[]deviceConfig.RunConfigItem, stopHookNetnsPath string) error {
+// Accepts a stopHookNetnsPath argument which is required when run from the 
onStopNS hook before the
+// container's network namespace is unmounted (which is required for NIC 
device cleanup).
+func (c *lxc) deviceDetachNIC(configCopy map[string]string, netIF 
[]deviceConfig.RunConfigItem, instanceRunning bool, stopHookNetnsPath string) 
error {
        // Get requested device name to detach interface back to on the host.
        devName := ""
        for _, dev := range netIF {
@@ -1664,7 +1668,7 @@ func (c *lxc) deviceDetachNIC(configCopy 
map[string]string, netIF []deviceConfig
        }
 
        // If container is running, perform live detach of interface back to 
host.
-       if stopHookNetnsPath == "" {
+       if instanceRunning {
                // For some reason, having network config confuses detach, so 
get our own go-lxc struct.
                cname := project.Instance(c.Project(), c.Name())
                cc, err := liblxc.NewContainer(cname, c.state.OS.LxcPath)
@@ -1686,9 +1690,13 @@ func (c *lxc) deviceDetachNIC(configCopy 
map[string]string, netIF []deviceConfig
 
                err = cc.DetachInterfaceRename(configCopy["name"], devName)
                if err != nil {
-                       return errors.Wrapf(err, "Failed to detach interface: 
%s to %s", configCopy["name"], devName)
+                       return errors.Wrapf(err, "Failed to detach interface: 
%q to %q", configCopy["name"], devName)
                }
        } else {
+               if stopHookNetnsPath != "" {
+                       return fmt.Errorf("Cannot detach NIC device %q without 
stopHookNetnsPath being provided", configCopy["name"])
+               }
+
                // Currently liblxc does not move devices back to the host on 
stop that were added
                // after the the container was started. For this reason we 
utilise the lxc.hook.stop
                // hook so that we can capture the netns path, enter the 
namespace and move the nics
@@ -1700,7 +1708,7 @@ func (c *lxc) deviceDetachNIC(configCopy 
map[string]string, netIF []deviceConfig
                if !shared.PathExists(fmt.Sprintf("/sys/class/net/%s", 
devName)) {
                        err := c.detachInterfaceRename(stopHookNetnsPath, 
configCopy["name"], devName)
                        if err != nil {
-                               return errors.Wrapf(err, "Failed to detach 
interface: %s to %s", configCopy["name"], devName)
+                               return errors.Wrapf(err, "Failed to detach 
interface: %q to %q", configCopy["name"], devName)
                        }
                }
        }
@@ -2141,7 +2149,7 @@ func (c *lxc) startCommon() (string, []func() error, 
error) {
                // Stop device on failure to setup container, pass non-empty 
stopHookNetnsPath so that stop code
                // doesn't think container is running.
                revert.Add(func() {
-                       err := c.deviceStop(dev.Name, dev.Config, "startfailed")
+                       err := c.deviceStop(dev.Name, dev.Config, false, "")
                        if err != nil {
                                logger.Errorf("Failed to cleanup device %q: 
%v", dev.Name, err)
                        }
@@ -2828,14 +2836,14 @@ func (c *lxc) onStopNS(args map[string]string) error {
        target := args["target"]
        netns := args["netns"]
 
-       // Validate target
+       // Validate target.
        if !shared.StringInSlice(target, []string{"stop", "reboot"}) {
                logger.Error("Container sent invalid target to OnStopNS", 
log.Ctx{"container": c.Name(), "target": target})
-               return fmt.Errorf("Invalid stop target: %s", target)
+               return fmt.Errorf("Invalid stop target %q", target)
        }
 
        // Clean up devices.
-       c.cleanupDevices(netns)
+       c.cleanupDevices(false, netns)
 
        return nil
 }
@@ -2860,6 +2868,9 @@ func (c *lxc) onStop(args map[string]string) error {
        // Make sure we can't call go-lxc functions by mistake
        c.fromHook = true
 
+       // Clean up devices.
+       c.cleanupDevices(false, "")
+
        // Remove directory ownership (to avoid issue if uidmap is re-used)
        err := os.Chown(c.Path(), 0, 0)
        if err != nil {
@@ -2968,14 +2979,22 @@ func (c *lxc) onStop(args map[string]string) error {
 }
 
 // cleanupDevices performs any needed device cleanup steps when container is 
stopped.
-func (c *lxc) cleanupDevices(netns string) {
+// Accepts a stopHookNetnsPath argument which is required when run from the 
onStopNS hook before the
+// container's network namespace is unmounted (which is required for NIC 
device cleanup).
+func (c *lxc) cleanupDevices(instanceRunning bool, stopHookNetnsPath string) {
        for _, dev := range c.expandedDevices.Reversed() {
+               // Only stop NIC devices when run from the onStopNS hook, and 
stop all other devices when run from
+               // the onStop hook. This way disk devices are stopped after the 
instance has been fully stopped.
+               if (stopHookNetnsPath != "" && dev.Config["type"] != "nic") || 
(stopHookNetnsPath == "" && dev.Config["type"] == "nic") {
+                       continue
+               }
+
                // Use the device interface if device supports it.
-               err := c.deviceStop(dev.Name, dev.Config, netns)
+               err := c.deviceStop(dev.Name, dev.Config, instanceRunning, 
stopHookNetnsPath)
                if err == device.ErrUnsupportedDevType {
                        continue
                } else if err != nil {
-                       logger.Errorf("Failed to stop device '%s': %v", 
dev.Name, err)
+                       logger.Errorf("Failed to stop device %q: %v", dev.Name, 
err)
                }
        }
 }
@@ -4572,7 +4591,7 @@ func (c *lxc) updateDevices(removeDevices 
deviceConfig.Devices, addDevices devic
        // Remove devices in reverse order to how they were added.
        for _, dev := range removeDevices.Reversed() {
                if isRunning {
-                       err := c.deviceStop(dev.Name, dev.Config, "")
+                       err := c.deviceStop(dev.Name, dev.Config, isRunning, "")
                        if err == device.ErrUnsupportedDevType {
                                continue // No point in trying to remove device 
below.
                        } else if err != nil {

From 9071005fd996e2da083aa764a4e9e753a161d3a3 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Tue, 10 Nov 2020 12:23:25 +0000
Subject: [PATCH 2/2] lxd/device/disk: Removes workaround for ceph disks now
 that disks are stopped after instance is stopped

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/device/disk.go | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/lxd/device/disk.go b/lxd/device/disk.go
index badf7b80a1..d303a5d02e 100644
--- a/lxd/device/disk.go
+++ b/lxd/device/disk.go
@@ -1293,16 +1293,10 @@ func (d *disk) postStop() error {
 
        if strings.HasPrefix(d.config["source"], "ceph:") {
                v := d.volatileGet()
-
-               // Run unmap async. This is needed as postStop is called from
-               // within a monitor hook, meaning that as we process this, the 
monitor
-               // process is still running, holding some references.
-               go func() {
-                       err := diskCephRbdUnmap(v["ceph_rbd"])
-                       if err != nil {
-                               logger.Errorf("Failed to unmap RBD volume %q 
for %q: %v", v["ceph_rbd"], project.Instance(d.inst.Project(), d.inst.Name()), 
err)
-                       }
-               }()
+               err := diskCephRbdUnmap(v["ceph_rbd"])
+               if err != nil {
+                       logger.Errorf("Failed to unmap RBD volume %q for %q: 
%v", v["ceph_rbd"], project.Instance(d.inst.Project(), d.inst.Name()), err)
+               }
        }
 
        return nil
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to