The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/8191
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) ===
From ab85b3dcc245dfbcfc326bbf88fb9a13061133b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Wed, 25 Nov 2020 18:44:25 -0500 Subject: [PATCH 1/6] lxd/instance/operations: Allow Wait/Done on nil struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/instance/operationlock/operationlock.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lxd/instance/operationlock/operationlock.go b/lxd/instance/operationlock/operationlock.go index 6b4a04c485..c9db4ba432 100644 --- a/lxd/instance/operationlock/operationlock.go +++ b/lxd/instance/operationlock/operationlock.go @@ -86,6 +86,10 @@ func (op *InstanceOperation) Reset() error { // Wait waits for an operation to finish. func (op *InstanceOperation) Wait() error { + if op == nil { + return nil + } + <-op.chanDone return op.err @@ -96,6 +100,11 @@ func (op *InstanceOperation) Done(err error) { instanceOperationsLock.Lock() defer instanceOperationsLock.Unlock() + // This function can be called on a nil struct. + if op == nil { + return + } + // Check if already done runningOp, ok := instanceOperations[op.id] if !ok || runningOp != op { From 5084fb9f6165af18c32777fedd13df01900cd8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Wed, 25 Nov 2020 18:48:12 -0500 Subject: [PATCH 2/6] lxd/instance/lxc: Improve use of operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/instance/drivers/driver_lxc.go | 104 ++++++++++++----------------- 1 file changed, 42 insertions(+), 62 deletions(-) diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index ab7eeba24c..c5ecf02179 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -2227,12 +2227,15 @@ func (d *lxc) Start(stateful bool) error { defer op.Done(nil) if !daemon.SharedMountsSetup { - return fmt.Errorf("Daemon failed to setup shared mounts base: %v. Does security.nesting need to be turned on?", err) + err = fmt.Errorf("Daemon failed to setup shared mounts base. Does security.nesting need to be turned on?") + op.Done(err) + return err } // Run the shared start code configPath, postStartHooks, err := d.startCommon() if err != nil { + op.Done(err) return errors.Wrap(err, "Failed preparing container for start") } @@ -2250,7 +2253,9 @@ func (d *lxc) Start(stateful bool) error { // If stateful, restore now if stateful { if !d.stateful { - return fmt.Errorf("Container has no existing state to restore") + err = fmt.Errorf("Container has no existing state to restore") + op.Done(err) + return err } criuMigrationArgs := instance.CriuMigrationArgs{ @@ -2265,6 +2270,7 @@ func (d *lxc) Start(stateful bool) error { err := d.Migrate(&criuMigrationArgs) if err != nil && !d.IsRunning() { + op.Done(err) return errors.Wrap(err, "Migrate") } @@ -2273,7 +2279,7 @@ func (d *lxc) Start(stateful bool) error { err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, false) if err != nil { - logger.Error("Failed starting container", ctxMap) + op.Done(err) return errors.Wrap(err, "Start container") } @@ -2281,8 +2287,9 @@ func (d *lxc) Start(stateful bool) error { err = d.runHooks(postStartHooks) if err != nil { // Attempt to stop container. - op.Done(err) d.Stop(false) + + op.Done(err) return err } @@ -2292,12 +2299,14 @@ func (d *lxc) Start(stateful bool) error { /* stateless start required when we have state, let's delete it */ err := os.RemoveAll(d.StatePath()) if err != nil { + op.Done(err) return err } d.stateful = false err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, false) if err != nil { + op.Done(err) return errors.Wrap(err, "Persist stateful flag") } } @@ -2342,6 +2351,7 @@ func (d *lxc) Start(stateful bool) error { logger.Error("Failed starting container", ctxMap) // Return the actual error + op.Done(err) return err } @@ -2349,8 +2359,9 @@ func (d *lxc) Start(stateful bool) error { err = d.runHooks(postStartHooks) if err != nil { // Attempt to stop container. - op.Done(err) d.Stop(false) + + op.Done(err) return err } @@ -2482,7 +2493,6 @@ func (d *lxc) Stop(stateful bool) error { err := os.MkdirAll(stateDir, 0700) if err != nil { op.Done(err) - logger.Error("Failed stopping container", ctxMap) return err } @@ -2500,28 +2510,25 @@ func (d *lxc) Stop(stateful bool) error { err = d.Migrate(&criuMigrationArgs) if err != nil { op.Done(err) - logger.Error("Failed stopping container", ctxMap) return err } err = op.Wait() if err != nil && d.IsRunning() { - logger.Error("Failed stopping container", ctxMap) return err } d.stateful = true err = d.state.Cluster.UpdateInstanceStatefulFlag(d.id, true) if err != nil { - op.Done(err) logger.Error("Failed stopping container", ctxMap) return err } - op.Done(nil) logger.Info("Stopped container", ctxMap) d.state.Events.SendLifecycle(d.project, "container-stopped", fmt.Sprintf("/1.0/containers/%s", d.name), nil) + return nil } else if shared.PathExists(d.StatePath()) { os.RemoveAll(d.StatePath()) @@ -2532,14 +2539,12 @@ func (d *lxc) Stop(stateful bool) error { err = d.initLXC(true) if err != nil { op.Done(err) - logger.Error("Failed stopping container", ctxMap) return err } } else { err = d.initLXC(false) if err != nil { op.Done(err) - logger.Error("Failed stopping container", ctxMap) return err } } @@ -2570,15 +2575,14 @@ func (d *lxc) Stop(stateful bool) error { } } - if err := d.c.Stop(); err != nil { + err = d.c.Stop() + if err != nil { op.Done(err) - logger.Error("Failed stopping container", ctxMap) return err } err = op.Wait() if err != nil && d.IsRunning() { - logger.Error("Failed stopping container", ctxMap) return err } @@ -2620,27 +2624,24 @@ func (d *lxc) Shutdown(timeout time.Duration) error { err = d.initLXC(true) if err != nil { op.Done(err) - logger.Error("Failed stopping container", ctxMap) return err } } else { err = d.initLXC(false) if err != nil { op.Done(err) - logger.Error("Failed stopping container", ctxMap) return err } } - if err := d.c.Shutdown(timeout); err != nil { + err = d.c.Shutdown(timeout) + if err != nil { op.Done(err) - logger.Error("Failed shutting down container", ctxMap) return err } err = op.Wait() if err != nil && d.IsRunning() { - logger.Error("Failed shutting down container", ctxMap) return err } @@ -2705,13 +2706,15 @@ func (d *lxc) onStop(args map[string]string) error { "stateful": false} if op == nil { - logger.Info(fmt.Sprintf("Container initiated %s", target), ctxMap) + logger.Debug(fmt.Sprintf("Container initiated %s", target), ctxMap) } // Record power state err := d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED") if err != nil { - logger.Error("Failed to set container state", log.Ctx{"container": d.Name(), "err": err}) + err = errors.Wrap(err, "Failed to set container state") + op.Done(err) + return err } go func(d *lxc, target string, op *operationlock.InstanceOperation) { @@ -2719,13 +2722,11 @@ func (d *lxc) onStop(args map[string]string) error { err = nil // Unlock on return - if op != nil { - defer op.Done(err) - } + defer op.Done(nil) // Wait for other post-stop actions to be done and the container actually stopping. d.IsRunning() - logger.Debug("Container stopped, starting storage cleanup", log.Ctx{"container": d.Name()}) + logger.Debug("Container stopped, cleaning up", log.Ctx{"container": d.Name()}) // Clean up devices. d.cleanupDevices(false, "") @@ -2733,56 +2734,42 @@ func (d *lxc) onStop(args map[string]string) error { // Remove directory ownership (to avoid issue if uidmap is re-used) err := os.Chown(d.Path(), 0, 0) if err != nil { - if op != nil { - op.Done(err) - } - - logger.Error("Failed clearing ownernship", log.Ctx{"container": d.Name(), "err": err, "path": d.Path()}) + op.Done(errors.Wrap(err, "Failed clearing ownership")) + return } err = os.Chmod(d.Path(), 0100) if err != nil { - if op != nil { - op.Done(err) - } - - logger.Error("Failed clearing permissions", log.Ctx{"container": d.Name(), "err": err, "path": d.Path()}) + op.Done(errors.Wrap(err, "Failed clearing permissions")) + return } // Stop the storage for this container _, err = d.unmount() if err != nil { - if op != nil { - op.Done(err) - } - - logger.Error("Failed unnounting container", log.Ctx{"container": d.Name(), "err": err}) + op.Done(errors.Wrap(err, "Failed unmounting container")) + return } // Unload the apparmor profile err = apparmor.InstanceUnload(d.state, d) if err != nil { - logger.Error("Failed to destroy apparmor namespace", log.Ctx{"container": d.Name(), "err": err}) + op.Done(errors.Wrap(err, "Failed to destroy apparmor namespace")) + return } // Clean all the unix devices err = d.removeUnixDevices() if err != nil { - if op != nil { - op.Done(err) - } - - logger.Error("Unable to remove unix devices", log.Ctx{"container": d.Name(), "err": err}) + op.Done(errors.Wrap(err, "Failed to remove unix devices")) + return } // Clean all the disk devices err = d.removeDiskDevices() if err != nil { - if op != nil { - op.Done(err) - } - - logger.Error("Unable to remove disk devices", log.Ctx{"container": d.Name(), "err": err}) + op.Done(errors.Wrap(err, "Failed to remove disk devices")) + return } // Log and emit lifecycle if not user triggered @@ -2796,11 +2783,7 @@ func (d *lxc) onStop(args map[string]string) error { // Start the container again err = d.Start(false) if err != nil { - if op != nil { - op.Done(err) - } - - logger.Error("Failed restarting container", log.Ctx{"container": d.Name(), "err": err}) + op.Done(errors.Wrap(err, "Failed restarting container")) return } @@ -2815,11 +2798,8 @@ func (d *lxc) onStop(args map[string]string) error { if d.ephemeral { err = d.Delete() if err != nil { - if op != nil { - op.Done(err) - } - - logger.Error("Failed deleting ephemeral", log.Ctx{"container": d.Name(), "err": err}) + op.Done(errors.Wrap(err, "Failed deleting ephemeral container")) + return } } }(d, target, op) From 80763eba205580b8dc9dc989611121227f72926f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Wed, 25 Nov 2020 18:48:44 -0500 Subject: [PATCH 3/6] lxd/instance/lxc: Improve locking on file ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/instance/drivers/driver_lxc.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index c5ecf02179..08b5758128 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -4980,6 +4980,9 @@ func (d *lxc) inheritInitPidFd() (int, *os.File) { // FileExists returns whether file exists inside instance. func (d *lxc) FileExists(path string) error { + // Check for ongoing operations (that may involve shifting). + operationlock.Get(d.id).Wait() + // Setup container storage if needed _, err := d.mount() if err != nil { @@ -5026,10 +5029,7 @@ func (d *lxc) FileExists(path string) error { // FilePull gets a file from the instance. func (d *lxc) FilePull(srcpath string, dstpath string) (int64, int64, os.FileMode, string, []string, error) { // Check for ongoing operations (that may involve shifting). - op := operationlock.Get(d.id) - if op != nil { - op.Wait() - } + operationlock.Get(d.id).Wait() // Setup container storage if needed _, err := d.mount() @@ -5152,10 +5152,7 @@ func (d *lxc) FilePull(srcpath string, dstpath string) (int64, int64, os.FileMod // FilePush sends a file into the instance. func (d *lxc) FilePush(fileType string, srcpath string, dstpath string, uid int64, gid int64, mode int, write string) error { // Check for ongoing operations (that may involve shifting). - op := operationlock.Get(d.id) - if op != nil { - op.Wait() - } + operationlock.Get(d.id).Wait() var rootUID int64 var rootGID int64 @@ -5244,6 +5241,9 @@ func (d *lxc) FilePush(fileType string, srcpath string, dstpath string, uid int6 // FileRemove removes a file inside the instance. func (d *lxc) FileRemove(path string) error { + // Check for ongoing operations (that may involve shifting). + operationlock.Get(d.id).Wait() + var errStr string // Setup container storage if needed From 9a6f8f6d6f772149e1a72fb3a790065eab2fb047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Wed, 25 Nov 2020 18:50:25 -0500 Subject: [PATCH 4/6] lxd/instance/operations: Introduce CreateWaitGet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/instance/operationlock/operationlock.go | 39 +++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/lxd/instance/operationlock/operationlock.go b/lxd/instance/operationlock/operationlock.go index c9db4ba432..e55f04dffc 100644 --- a/lxd/instance/operationlock/operationlock.go +++ b/lxd/instance/operationlock/operationlock.go @@ -4,6 +4,8 @@ import ( "fmt" "sync" "time" + + "github.com/lxc/lxd/shared" ) var instanceOperationsLock sync.Mutex @@ -66,6 +68,43 @@ func Create(instanceID int, action string, reusable bool, reuse bool) (*Instance return op, nil } +// CreateWaitGet is a weird function which does what we happen to want most of the time. +// +// If the instance has an operation of the same type and it's not reusable +// or the caller doesn't want to reuse it, the function will wait and +// indicate that it did so. +// +// If the instance has an operation of one of the alternate types, then +// the operation is returned to the user. +// +// If the instance doesn't have an operation, has an operation of a different +// type that is not in the alternate list or has the right type and is +// being reused, then this behaves as a Create call. +func CreateWaitGet(instanceID int, action string, altActions []string, reusable bool, reuse bool) (bool, *InstanceOperation, error) { + op := Get(instanceID) + + // No existing operation, call create. + if op == nil { + op, err := Create(instanceID, action, reusable, reuse) + return false, op, err + } + + // Operation matches and not reusable or asked to reuse, wait. + if op.action == action && (!reuse || !op.reusable) { + err := op.Wait() + return true, nil, err + } + + // Operation matches one the alternate actions, return the operation. + if shared.StringInSlice(op.action, altActions) { + return false, op, nil + } + + // Send the rest to Create + op, err := Create(instanceID, action, reusable, reuse) + return false, op, err +} + // Get retrieves an existing lock or returns nil if no lock exists. func Get(instanceID int) *InstanceOperation { instanceOperationsLock.Lock() From c001bbb6e71d61f802599448edec71214e6d258d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Wed, 25 Nov 2020 18:50:49 -0500 Subject: [PATCH 5/6] lxd/instance: Introduce restart tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #8010 Closes #8090 Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> Signed-off-by: Daniel Segal <dlem...@gmail.com> --- lxd/instance/drivers/driver_common.go | 27 ++++- lxd/instance/drivers/driver_lxc.go | 118 +++++++++++++++----- lxd/instance/drivers/driver_qemu.go | 148 ++++++++++++++++++-------- 3 files changed, 217 insertions(+), 76 deletions(-) diff --git a/lxd/instance/drivers/driver_common.go b/lxd/instance/drivers/driver_common.go index 395a200cf7..ecb5d9bf55 100644 --- a/lxd/instance/drivers/driver_common.go +++ b/lxd/instance/drivers/driver_common.go @@ -14,6 +14,7 @@ import ( "github.com/lxc/lxd/lxd/device/nictype" "github.com/lxc/lxd/lxd/instance" "github.com/lxc/lxd/lxd/instance/instancetype" + "github.com/lxc/lxd/lxd/instance/operationlock" "github.com/lxc/lxd/lxd/operations" "github.com/lxc/lxd/lxd/project" "github.com/lxc/lxd/lxd/state" @@ -430,23 +431,45 @@ func (d *common) expandDevices(profiles []api.Profile) error { // restart handles instance restarts. func (d *common) restart(inst instance.Instance, timeout time.Duration) error { + // Setup a new operation for the stop/shutdown phase. + op, err := operationlock.Create(d.id, "restart", true, true) + if err != nil { + return errors.Wrap(err, "Create restart operation") + } + if timeout == 0 { err := inst.Stop(false) if err != nil { + op.Done(err) return err } } else { if inst.IsFrozen() { - return errors.New("Instance is not running") + err = fmt.Errorf("Instance is not running") + op.Done(err) + return err } err := inst.Shutdown(timeout * time.Second) if err != nil { + op.Done(err) return err } } - return inst.Start(false) + // Setup a new operation for the start phase. + op, err = operationlock.Create(d.id, "restart", true, true) + if err != nil { + return errors.Wrap(err, "Create restart operation") + } + + err = inst.Start(false) + if err != nil { + op.Done(err) + return err + } + + return nil } // runHooks executes the callback functions returned from a function. diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index 08b5758128..8ca678e9bf 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -2220,10 +2220,14 @@ func (d *lxc) Start(stateful bool) error { var ctxMap log.Ctx // Setup a new operation - op, err := operationlock.Create(d.id, "start", false, false) + exists, op, err := operationlock.CreateWaitGet(d.id, "start", []string{"restart"}, false, false) if err != nil { return errors.Wrap(err, "Create container start operation") } + if exists { + // An existing matching operation has now succeeded, return. + return nil + } defer op.Done(nil) if !daemon.SharedMountsSetup { @@ -2248,7 +2252,9 @@ func (d *lxc) Start(stateful bool) error { "used": d.lastUsedDate, "stateful": stateful} - logger.Info("Starting container", ctxMap) + if op.Action() == "start" { + logger.Info("Starting container", ctxMap) + } // If stateful, restore now if stateful { @@ -2293,7 +2299,11 @@ func (d *lxc) Start(stateful bool) error { return err } - logger.Info("Started container", ctxMap) + if op.Action() == "start" { + logger.Info("Started container", ctxMap) + d.state.Events.SendLifecycle(d.project, "container-started", + fmt.Sprintf("/1.0/containers/%s", d.name), nil) + } return nil } else if d.stateful { /* stateless start required when we have state, let's delete it */ @@ -2365,9 +2375,11 @@ func (d *lxc) Start(stateful bool) error { return err } - logger.Info("Started container", ctxMap) - d.state.Events.SendLifecycle(d.project, "container-started", - fmt.Sprintf("/1.0/containers/%s", d.name), nil) + if op.Action() == "start" { + logger.Info("Started container", ctxMap) + d.state.Events.SendLifecycle(d.project, "container-started", + fmt.Sprintf("/1.0/containers/%s", d.name), nil) + } return nil } @@ -2462,16 +2474,22 @@ func (d *lxc) onStart(_ map[string]string) error { func (d *lxc) Stop(stateful bool) error { var ctxMap log.Ctx - // Check that we're not already stopped - if !d.IsRunning() { - return fmt.Errorf("The container is already stopped") - } - // Setup a new operation - op, err := operationlock.Create(d.id, "stop", false, true) + exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, false, true) if err != nil { return err } + if exists { + // An existing matching operation has now succeeded, return. + return nil + } + + // Check that we're not already stopped + if !d.IsRunning() { + err = fmt.Errorf("The container is already stopped") + op.Done(err) + return err + } ctxMap = log.Ctx{ "project": d.project, @@ -2482,7 +2500,9 @@ func (d *lxc) Stop(stateful bool) error { "used": d.lastUsedDate, "stateful": stateful} - logger.Info("Stopping container", ctxMap) + if op.Action() == "stop" { + logger.Info("Stopping container", ctxMap) + } // Handle stateful stop if stateful { @@ -2586,9 +2606,11 @@ func (d *lxc) Stop(stateful bool) error { return err } - logger.Info("Stopped container", ctxMap) - d.state.Events.SendLifecycle(d.project, "container-stopped", - fmt.Sprintf("/1.0/containers/%s", d.name), nil) + if op.Action() == "stop" { + logger.Info("Stopped container", ctxMap) + d.state.Events.SendLifecycle(d.project, "container-stopped", + fmt.Sprintf("/1.0/containers/%s", d.name), nil) + } return nil } @@ -2597,16 +2619,22 @@ func (d *lxc) Stop(stateful bool) error { func (d *lxc) Shutdown(timeout time.Duration) error { var ctxMap log.Ctx - // Check that we're not already stopped - if !d.IsRunning() { - return fmt.Errorf("The container is already stopped") - } - // Setup a new operation - op, err := operationlock.Create(d.id, "stop", true, true) + exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, true, false) if err != nil { return err } + if exists { + // An existing matching operation has now succeeded, return. + return nil + } + + // Check that we're not already stopped + if !d.IsRunning() { + err = fmt.Errorf("The container is already stopped") + op.Done(err) + return err + } ctxMap = log.Ctx{ "project": d.project, @@ -2617,7 +2645,9 @@ func (d *lxc) Shutdown(timeout time.Duration) error { "used": d.lastUsedDate, "timeout": timeout} - logger.Info("Shutting down container", ctxMap) + if op.Action() == "stop" { + logger.Info("Shutting down container", ctxMap) + } // Load the go-lxc struct if d.expandedConfig["raw.lxc"] != "" { @@ -2645,16 +2675,38 @@ func (d *lxc) Shutdown(timeout time.Duration) error { return err } - logger.Info("Shut down container", ctxMap) - d.state.Events.SendLifecycle(d.project, "container-shutdown", - fmt.Sprintf("/1.0/containers/%s", d.name), nil) + if op.Action() == "stop" { + logger.Info("Shut down container", ctxMap) + d.state.Events.SendLifecycle(d.project, "container-shutdown", + fmt.Sprintf("/1.0/containers/%s", d.name), nil) + } return nil } // Restart restart the instance. func (d *lxc) Restart(timeout time.Duration) error { - return d.common.restart(d, timeout) + ctxMap := log.Ctx{ + "project": d.project, + "name": d.name, + "action": "shutdown", + "created": d.creationDate, + "ephemeral": d.ephemeral, + "used": d.lastUsedDate, + "timeout": timeout} + + logger.Info("Restarting container", ctxMap) + + err := d.common.restart(d, timeout) + if err != nil { + return err + } + + logger.Info("Restarted container", ctxMap) + d.state.Events.SendLifecycle(d.project, "container-restarted", + fmt.Sprintf("/1.0/containers/%s", d.name), nil) + + return nil } // onStopNS is triggered by LXC's stop hook once a container is shutdown but before the container's @@ -2678,6 +2730,7 @@ func (d *lxc) onStopNS(args map[string]string) error { // onStop is triggered by LXC's post-stop hook once a container is shutdown and after the // container's namespaces have been closed. func (d *lxc) onStop(args map[string]string) error { + var err error target := args["target"] // Validate target @@ -2688,10 +2741,17 @@ func (d *lxc) onStop(args map[string]string) error { // Pick up the existing stop operation lock created in Stop() function. op := operationlock.Get(d.id) - if op != nil && op.Action() != "stop" { + if op != nil && !shared.StringInSlice(op.Action(), []string{"stop", "restart"}) { return fmt.Errorf("Container is already running a %s operation", op.Action()) } + if op == nil && target == "reboot" { + op, err = operationlock.Create(d.id, "restart", false, false) + if err != nil { + return errors.Wrap(err, "Create restart operation") + } + } + // Make sure we can't call go-lxc functions by mistake d.fromHook = true @@ -2710,7 +2770,7 @@ func (d *lxc) onStop(args map[string]string) error { } // Record power state - err := d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED") + err = d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED") if err != nil { err = errors.Wrap(err, "Failed to set container state") op.Done(err) diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 5bddefb8ba..54bad6669f 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -487,18 +487,21 @@ func (d *qemu) Freeze() error { // onStop is run when the instance stops. func (d *qemu) onStop(target string) error { - ctxMap := log.Ctx{ - "project": d.project, - "name": d.name, - "ephemeral": d.ephemeral, - } + var err error // Pick up the existing stop operation lock created in Stop() function. op := operationlock.Get(d.id) - if op != nil && op.Action() != "stop" { + if op != nil && !shared.StringInSlice(op.Action(), []string{"stop", "restart"}) { return fmt.Errorf("Instance is already running a %s operation", op.Action()) } + if op == nil && target == "reboot" { + op, err = operationlock.Create(d.id, "restart", false, false) + if err != nil { + return errors.Wrap(err, "Create restart operation") + } + } + // Cleanup. d.cleanupDevices() os.Remove(d.pidFilePath()) @@ -516,49 +519,61 @@ func (d *qemu) onStop(target string) error { // Record power state. err = d.state.Cluster.UpdateInstancePowerState(d.id, "STOPPED") if err != nil { - if op != nil { - op.Done(err) - } + op.Done(err) return err } // Unload the apparmor profile err = apparmor.InstanceUnload(d.state, d) if err != nil { - ctxMap["err"] = err - logger.Error("Failed to unload AppArmor profile", ctxMap) + op.Done(err) + return err } if target == "reboot" { err = d.Start(false) + if err != nil { + op.Done(err) + return err + } + d.state.Events.SendLifecycle(d.project, "virtual-machine-restarted", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) } else if d.ephemeral { // Destroy ephemeral virtual machines err = d.Delete() - } - if err != nil { - return err + if err != nil { + op.Done(err) + return err + } } - if op != nil { - op.Done(nil) + if target != "reboot" { + d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown", + fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) } + op.Done(nil) return nil } // Shutdown shuts the instance down. func (d *qemu) Shutdown(timeout time.Duration) error { - if !d.IsRunning() { - return fmt.Errorf("The instance is already stopped") - } - // Setup a new operation - op, err := operationlock.Create(d.id, "stop", true, true) + exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, true, false) if err != nil { return err } + if exists { + // An existing matching operation has now succeeded, return. + return nil + } + + if !d.IsRunning() { + err = fmt.Errorf("The instance is already stopped") + op.Done(err) + return err + } // Connect to the monitor. monitor, err := qmp.Connect(d.monitorPath(), qemuSerialChardevName, d.getMonitorEventHandler()) @@ -597,8 +612,9 @@ func (d *qemu) Shutdown(timeout time.Duration) error { case <-chDisconnect: break case <-time.After(timeout): - op.Done(fmt.Errorf("Instance was not shutdown after timeout")) - return fmt.Errorf("Instance was not shutdown after timeout") + err = fmt.Errorf("Instance was not shutdown after timeout") + op.Done(err) + return err } } else { <-chDisconnect // Block until VM is not running if no timeout provided. @@ -610,14 +626,24 @@ func (d *qemu) Shutdown(timeout time.Duration) error { return err } - op.Done(nil) - d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) + if op.Action() == "stop" { + d.state.Events.SendLifecycle(d.project, "virtual-machine-shutdown", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) + } + return nil } // Restart restart the instance. func (d *qemu) Restart(timeout time.Duration) error { - return d.common.restart(d, timeout) + err := d.common.restart(d, timeout) + if err != nil { + return err + } + + d.state.Events.SendLifecycle(d.project, "virtual-machine-restarted", + fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) + + return nil } func (d *qemu) ovmfPath() string { @@ -630,23 +656,29 @@ func (d *qemu) ovmfPath() string { // Start starts the instance. func (d *qemu) Start(stateful bool) error { + // Setup a new operation + exists, op, err := operationlock.CreateWaitGet(d.id, "start", []string{"restart"}, false, false) + if err != nil { + return errors.Wrap(err, "Create instance start operation") + } + if exists { + // An existing matching operation has now succeeded, return. + return nil + } + defer op.Done(nil) + // Ensure the correct vhost_vsock kernel module is loaded before establishing the vsock. - err := util.LoadModule("vhost_vsock") + err = util.LoadModule("vhost_vsock") if err != nil { + op.Done(err) return err } if d.IsRunning() { + op.Done(err) return fmt.Errorf("The instance is already running") } - // Setup a new operation - op, err := operationlock.Create(d.id, "start", false, false) - if err != nil { - return errors.Wrap(err, "Create instance start operation") - } - defer op.Done(nil) - revert := revert.New() defer revert.Fail() @@ -659,6 +691,7 @@ func (d *qemu) Start(stateful bool) error { os.Remove(logfile + ".old") err := os.Rename(logfile, logfile+".old") if err != nil { + op.Done(err) return err } } @@ -714,11 +747,13 @@ func (d *qemu) Start(stateful bool) error { // Start the virtiofsd process in non-daemon mode. proc, err := subprocess.NewProcess(cmd, []string{fmt.Sprintf("--socket-path=%s", sockPath), "-o", fmt.Sprintf("source=%s", filepath.Join(d.Path(), "config"))}, "", "") if err != nil { + op.Done(err) return err } err = proc.Start() if err != nil { + op.Done(err) return err } @@ -728,6 +763,7 @@ func (d *qemu) Start(stateful bool) error { err = proc.Save(pidPath) if err != nil { + op.Done(err) return err } @@ -741,7 +777,9 @@ func (d *qemu) Start(stateful bool) error { } if !shared.PathExists(sockPath) { - return fmt.Errorf("virtiofsd failed to bind socket within 1s") + err = fmt.Errorf("virtiofsd failed to bind socket within 1s") + op.Done(err) + return err } } else { logger.Warn("Unable to use virtio-fs for config drive, using 9p as a fallback: virtiofsd missing") @@ -905,6 +943,7 @@ func (d *qemu) Start(stateful bool) error { // Setup background process. p, err := subprocess.NewProcess(d.state.OS.ExecPath, append(forkLimitsCmd, qemuCmd...), d.EarlyLogFilePath(), d.EarlyLogFilePath()) if err != nil { + op.Done(err) return err } @@ -960,6 +999,7 @@ func (d *qemu) Start(stateful bool) error { err = p.StartWithFiles(files) if err != nil { + op.Done(err) return err } @@ -974,6 +1014,7 @@ func (d *qemu) Start(stateful bool) error { pid, err := d.pid() if err != nil { logger.Errorf(`Failed to get VM process ID "%d"`, pid) + op.Done(err) return err } @@ -1018,7 +1059,9 @@ func (d *qemu) Start(stateful bool) error { // Confirm nothing weird is going on. if len(pins) != len(pids) { - return fmt.Errorf("QEMU has less vCPUs than configured") + err = fmt.Errorf("QEMU has less vCPUs than configured") + op.Done(err) + return err } for i, pid := range pids { @@ -1068,7 +1111,11 @@ func (d *qemu) Start(stateful bool) error { } revert.Success() - d.state.Events.SendLifecycle(d.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) + + if op.Action() == "start" { + d.state.Events.SendLifecycle(d.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) + } + return nil } @@ -2462,19 +2509,27 @@ func (d *qemu) pid() (int, error) { // Stop the VM. func (d *qemu) Stop(stateful bool) error { + // Setup a new operation. + exists, op, err := operationlock.CreateWaitGet(d.id, "stop", []string{"restart"}, false, true) + if err != nil { + return err + } + if exists { + // An existing matching operation has now succeeded, return. + return nil + } + // Check that we're not already stopped. if !d.IsRunning() { - return fmt.Errorf("The instance is already stopped") + err = fmt.Errorf("The instance is already stopped") + op.Done(err) + return err } // Check that no stateful stop was requested. if stateful { - return fmt.Errorf("Stateful stop isn't supported for VMs at this time") - } - - // Setup a new operation. - op, err := operationlock.Create(d.id, "stop", false, true) - if err != nil { + err = fmt.Errorf("Stateful stop isn't supported for VMs at this time") + op.Done(err) return err } @@ -2519,7 +2574,10 @@ func (d *qemu) Stop(stateful bool) error { return err } - d.state.Events.SendLifecycle(d.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) + if op.Action() == "stop" { + d.state.Events.SendLifecycle(d.project, "virtual-machine-stopped", fmt.Sprintf("/1.0/virtual-machines/%s", d.name), nil) + } + return nil } From 619f8b327a170eb466ed710b76c3e53ea7b2042a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Wed, 25 Nov 2020 19:51:10 -0500 Subject: [PATCH 6/6] Makefile: Fix golint URL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index abf7d2d46a..592ada97dc 100644 --- a/Makefile +++ b/Makefile @@ -137,7 +137,7 @@ endif check: default go get -v -x github.com/rogpeppe/godeps go get -v -x github.com/tsenart/deadcode - go get -v -x github.com/golang/lint/golint + go get -v -x golang.org/x/lint/golint go test -v -tags "$(TAG_SQLITE3)" $(DEBUG) ./... cd test && ./main.sh
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel