The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/2523
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === These two close #2512
From b24188b3749b67029dd64191b40f3defc3249227 Mon Sep 17 00:00:00 2001 From: Tycho Andersen <tycho.ander...@canonical.com> Date: Tue, 18 Oct 2016 09:37:20 -0600 Subject: [PATCH 1/2] don't use the operation as a marker of success This caused several problems similar to #2512. Instead, let's just use another channel. Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com> --- lxd/migrate.go | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lxd/migrate.go b/lxd/migrate.go index b0b3d68..061f2a5 100644 --- a/lxd/migrate.go +++ b/lxd/migrate.go @@ -379,6 +379,8 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error { return abort(err) } + restoreSuccess := make(chan bool, 1) + dumpSuccess := make(chan error, 1) if s.live { if header.Criu == nil { return abort(fmt.Errorf("Got no CRIU socket type for live migration")) @@ -420,13 +422,9 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error { nil, nil, func(op *operation) error { - _, err := migrateOp.WaitFinal(-1) - if err != nil { - return err - } - - if migrateOp.status != shared.Success { - return fmt.Errorf("restore failed: %s", op.status.String()) + result := <-restoreSuccess + if !result { + return fmt.Errorf("restore failed, failing CRIU") } return nil }, @@ -468,15 +466,14 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error { return abort(err) } - migrateDone := make(chan error, 1) go func() { - migrateDone <- s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, true) + dumpSuccess <- s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true, true) os.RemoveAll(checkpointDir) }() select { /* the checkpoint failed, let's just abort */ - case err = <-migrateDone: + case err = <-dumpSuccess: return abort(err) /* the dump finished, let's continue on to the restore */ case <-dumpDone: @@ -513,6 +510,14 @@ func (s *migrationSourceWs) Do(migrateOp *operation) error { return err } + if s.live { + restoreSuccess <- *msg.Success + err := <-dumpSuccess + if err != nil { + shared.LogErrorf("dump failed after successful restore?: %q", err) + } + } + if !*msg.Success { return fmt.Errorf(*msg.Message) } From 78cf0f9a94615161a37653311aeef186635cfc0b Mon Sep 17 00:00:00 2001 From: Tycho Andersen <tycho.ander...@canonical.com> Date: Tue, 18 Oct 2016 09:39:34 -0600 Subject: [PATCH 2/2] copy: wait on the source operation too The source might need to do some cleanup (e.g. wait for the container to actually be killed by CRIU in the case of a live migration), so let's wait for it too. Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com> --- lxc/copy.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lxc/copy.go b/lxc/copy.go index 1f2eea4..ca69b62 100644 --- a/lxc/copy.go +++ b/lxc/copy.go @@ -215,6 +215,10 @@ func (c *copyCmd) copyContainer(config *lxd.Config, sourceResource string, destR continue } + if err := source.WaitForSuccess(sourceWSResponse.Operation); err != nil { + return err + } + // If push mode is implemented then MigrateFrom will return a // non-waitable operation. So this needs to be conditionalized // on pull mode.
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel