The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/2122
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === cherry picks of a few commits from master
From 26404a098eca76ac1c05cfe2b1acff563d96bc0f Mon Sep 17 00:00:00 2001 From: Tycho Andersen <tycho.ander...@canonical.com> Date: Wed, 1 Jun 2016 10:40:55 -0600 Subject: [PATCH 1/2] c/r: switch to the new ->migrate API We'll use this in the next patch to enable use of new liblxc features. Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com> --- lxd/container.go | 5 ++--- lxd/container_lxc.go | 14 +++++++++----- lxd/migrate.go | 3 +-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lxd/container.go b/lxd/container.go index 9c196ca..cf7884f 100644 --- a/lxd/container.go +++ b/lxd/container.go @@ -341,7 +341,7 @@ type container interface { // Snapshots & migration Restore(sourceContainer container) error - Checkpoint(opts lxc.CheckpointOptions) error + Migrate(cmd uint, stateDir string, stop bool) error StartFromMigration(imagesDir string) error Snapshots() ([]container, error) @@ -527,8 +527,7 @@ func containerCreateAsSnapshot(d *Daemon, args containerArgs, sourceContainer co * after snapshotting will fail. */ - opts := lxc.CheckpointOptions{Directory: stateDir, Stop: false, Verbose: true} - err = sourceContainer.Checkpoint(opts) + err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, false) err2 := CollectCRIULogFile(sourceContainer, stateDir, "snapshot", "dump") if err2 != nil { shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2}) diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go index 2f48cfa..6f3f117 100644 --- a/lxd/container_lxc.go +++ b/lxd/container_lxc.go @@ -1385,8 +1385,7 @@ func (c *containerLXC) Stop(stateful bool) error { } // Checkpoint - opts := lxc.CheckpointOptions{Directory: stateDir, Stop: true, Verbose: true} - err = c.Checkpoint(opts) + err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, true) err2 := CollectCRIULogFile(c, stateDir, "snapshot", "dump") if err2 != nil { shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2}) @@ -2712,14 +2711,19 @@ func (c *containerLXC) Export(w io.Writer) error { return tw.Close() } -func (c *containerLXC) Checkpoint(opts lxc.CheckpointOptions) error { - // Load the go-lxc struct +func (c *containerLXC) Migrate(cmd uint, stateDir string, stop bool) error { err := c.initLXC() if err != nil { return err } - return c.c.Checkpoint(opts) + opts := lxc.MigrateOptions{ + Stop: stop, + Directory: stateDir, + Verbose: true, + } + + return c.c.Migrate(cmd, opts) } func (c *containerLXC) TemplateApply(trigger string) error { diff --git a/lxd/migrate.go b/lxd/migrate.go index fdaab35..627e6d0 100644 --- a/lxd/migrate.go +++ b/lxd/migrate.go @@ -368,8 +368,7 @@ func (s *migrationSourceWs) Do(op *operation) error { } defer os.RemoveAll(checkpointDir) - opts := lxc.CheckpointOptions{Stop: true, Directory: checkpointDir, Verbose: true} - err = s.container.Checkpoint(opts) + err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, true) if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil { shared.Debugf("Error collecting checkpoint log file %s", err) From aba85baec275f692c4d1adff044df577518a7055 Mon Sep 17 00:00:00 2001 From: Tycho Andersen <tycho.ander...@canonical.com> Date: Tue, 14 Jun 2016 17:40:34 +0000 Subject: [PATCH 2/2] simplify checkpoint/restore code everywhere Some problems: * We had various entry points for migration, each which collected logs in various different and inconsistent ways. * We also had the StartFromMigrate call, and a Migrate() to which you could pass lxc.MIGRATE_RESTORE, which wasn't an obvious API. * at each point we had a check that did the rootfs shifting if necessary * we had to do findCriu everywhere manually Now that we have a Migrate() call, let's just route everything through that, and handle all of this in a uniform way. Note that some findCriu calls are still prudent to do e.g. in snapshot restore, before we actually do all the filesystem work to restore stuff if the snapshot is stateful. I've left those sorts of calls in. Note: this is a modified version (dropping the preserveInodes bits) from 31f1e20ca1c97e5a0b857502697fb1f9c6a8b5af. Signed-off-by: Tycho Andersen <tycho.ander...@canonical.com> --- lxd/container.go | 12 +-- lxd/container_lxc.go | 234 +++++++++++++++++++++++++++++---------------------- lxd/migrate.go | 93 +------------------- 3 files changed, 137 insertions(+), 202 deletions(-) diff --git a/lxd/container.go b/lxd/container.go index cf7884f..166e53d 100644 --- a/lxd/container.go +++ b/lxd/container.go @@ -11,8 +11,6 @@ import ( "gopkg.in/lxc/go-lxc.v2" "github.com/lxc/lxd/shared" - - log "gopkg.in/inconshreveable/log15.v2" ) // Helper functions @@ -341,8 +339,7 @@ type container interface { // Snapshots & migration Restore(sourceContainer container) error - Migrate(cmd uint, stateDir string, stop bool) error - StartFromMigration(imagesDir string) error + Migrate(cmd uint, stateDir string, function string, stop bool) error Snapshots() ([]container, error) // Config handling @@ -527,12 +524,7 @@ func containerCreateAsSnapshot(d *Daemon, args containerArgs, sourceContainer co * after snapshotting will fail. */ - err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, false) - err2 := CollectCRIULogFile(sourceContainer, stateDir, "snapshot", "dump") - if err2 != nil { - shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2}) - } - + err = sourceContainer.Migrate(lxc.MIGRATE_DUMP, stateDir, "snapshot", false) if err != nil { os.RemoveAll(sourceContainer.StatePath()) return nil, err diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go index 6f3f117..7505ae2 100644 --- a/lxd/container_lxc.go +++ b/lxd/container_lxc.go @@ -2,6 +2,7 @@ package main import ( "archive/tar" + "bufio" "encoding/json" "fmt" "io" @@ -1157,30 +1158,7 @@ func (c *containerLXC) Start(stateful bool) error { return fmt.Errorf("Container has no existing state to restore.") } - if err := findCriu("snapshot"); err != nil { - return err - } - - if !c.IsPrivileged() { - if err := c.IdmapSet().ShiftRootfs(c.StatePath()); err != nil { - return err - } - } - - out, err := exec.Command( - execPath, - "forkmigrate", - c.name, - c.daemon.lxcpath, - configPath, - c.StatePath()).CombinedOutput() - if string(out) != "" { - for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") { - shared.Debugf("forkmigrate: %s", line) - } - } - CollectCRIULogFile(c, c.StatePath(), "snapshot", "restore") - + err := c.Migrate(lxc.MIGRATE_RESTORE, c.StatePath(), "snapshot", false) if err != nil && !c.IsRunning() { return err } @@ -1228,41 +1206,6 @@ func (c *containerLXC) Start(stateful bool) error { return nil } -func (c *containerLXC) StartFromMigration(imagesDir string) error { - // Run the shared start code - configPath, err := c.startCommon() - if err != nil { - return err - } - - // Start the LXC container - out, err := exec.Command( - execPath, - "forkmigrate", - c.name, - c.daemon.lxcpath, - configPath, - imagesDir).CombinedOutput() - - if string(out) != "" { - for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") { - shared.Debugf("forkmigrate: %s", line) - } - } - - if err != nil && !c.IsRunning() { - return fmt.Errorf( - "Error calling 'lxd forkmigrate %s %s %s %s': err='%v'", - c.name, - c.daemon.lxcpath, - filepath.Join(c.LogPath(), "lxc.conf"), - imagesDir, - err) - } - - return nil -} - func (c *containerLXC) OnStart() error { // Make sure we can't call go-lxc functions by mistake c.fromHook = true @@ -1371,10 +1314,6 @@ func (c *containerLXC) setupStopping() *sync.WaitGroup { func (c *containerLXC) Stop(stateful bool) error { // Handle stateful stop if stateful { - if err := findCriu("snapshot"); err != nil { - return err - } - // Cleanup any existing state stateDir := c.StatePath() os.RemoveAll(stateDir) @@ -1385,12 +1324,7 @@ func (c *containerLXC) Stop(stateful bool) error { } // Checkpoint - err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, true) - err2 := CollectCRIULogFile(c, stateDir, "snapshot", "dump") - if err2 != nil { - shared.Log.Warn("failed to collect criu log file", log.Ctx{"error": err2}) - } - + err = c.Migrate(lxc.MIGRATE_DUMP, stateDir, "snapshot", true) if err != nil { return err } @@ -1761,32 +1695,7 @@ func (c *containerLXC) Restore(sourceContainer container) error { // If the container wasn't running but was stateful, should we restore // it as running? if shared.PathExists(c.StatePath()) { - configPath, err := c.startCommon() - if err != nil { - return err - } - - if !c.IsPrivileged() { - if err := c.IdmapSet().ShiftRootfs(c.StatePath()); err != nil { - return err - } - } - - out, err := exec.Command( - execPath, - "forkmigrate", - c.name, - c.daemon.lxcpath, - configPath, - c.StatePath()).CombinedOutput() - if string(out) != "" { - for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") { - shared.Debugf("forkmigrate: %s", line) - } - } - CollectCRIULogFile(c, c.StatePath(), "snapshot", "restore") - - if err != nil { + if err := c.Migrate(lxc.MIGRATE_RESTORE, c.StatePath(), "snapshot", false); err != nil { return err } @@ -2711,19 +2620,140 @@ func (c *containerLXC) Export(w io.Writer) error { return tw.Close() } -func (c *containerLXC) Migrate(cmd uint, stateDir string, stop bool) error { - err := c.initLXC() +func collectCRIULogFile(c container, imagesDir string, function string, method string) error { + t := time.Now().Format(time.RFC3339) + newPath := shared.LogPath(c.Name(), fmt.Sprintf("%s_%s_%s.log", function, method, t)) + return shared.FileCopy(filepath.Join(imagesDir, fmt.Sprintf("%s.log", method)), newPath) +} + +func getCRIULogErrors(imagesDir string, method string) (string, error) { + f, err := os.Open(path.Join(imagesDir, fmt.Sprintf("%s.log", method))) + if err != nil { + return "", err + } + + defer f.Close() + + scanner := bufio.NewScanner(f) + ret := []string{} + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, "Error") { + ret = append(ret, scanner.Text()) + } + } + + return strings.Join(ret, "\n"), nil +} + +func findCriu(host string) error { + _, err := exec.LookPath("criu") if err != nil { + return fmt.Errorf("CRIU is required for live migration but its binary couldn't be found on the %s server. Is it installed in LXD's path?", host) + } + + return nil +} + +func (c *containerLXC) Migrate(cmd uint, stateDir string, function string, stop bool) error { + if err := findCriu(function); err != nil { return err } - opts := lxc.MigrateOptions{ - Stop: stop, - Directory: stateDir, - Verbose: true, + prettyCmd := "" + switch cmd { + case lxc.MIGRATE_PRE_DUMP: + prettyCmd = "pre-dump" + case lxc.MIGRATE_DUMP: + prettyCmd = "dump" + case lxc.MIGRATE_RESTORE: + prettyCmd = "restore" + default: + prettyCmd = "unknown" + shared.Log.Warn("unknown migrate call", log.Ctx{"cmd": cmd}) + } + + var migrateErr error + + /* For restore, we need an extra fork so that we daemonize monitor + * instead of having it be a child of LXD, so let's hijack the command + * here and do the extra fork. + */ + if cmd == lxc.MIGRATE_RESTORE { + // Run the shared start + _, err := c.startCommon() + if err != nil { + return err + } + + /* + * For unprivileged containers we need to shift the + * perms on the images images so that they can be + * opened by the process after it is in its user + * namespace. + */ + if !c.IsPrivileged() { + if err := c.IdmapSet().ShiftRootfs(stateDir); err != nil { + return err + } + } + + configPath := filepath.Join(c.LogPath(), "lxc.conf") + + var out []byte + out, migrateErr = exec.Command( + execPath, + "forkmigrate", + c.name, + c.daemon.lxcpath, + configPath, + stateDir).CombinedOutput() + + if string(out) != "" { + for _, line := range strings.Split(strings.TrimRight(string(out), "\n"), "\n") { + shared.Debugf("forkmigrate: %s", line) + } + } + + if migrateErr != nil && !c.IsRunning() { + migrateErr = fmt.Errorf( + "Error calling 'lxd forkmigrate %s %s %s %s': err='%v' out='%v'", + c.name, + c.daemon.lxcpath, + filepath.Join(c.LogPath(), "lxc.conf"), + stateDir, + err, + string(out)) + } + + } else { + err := c.initLXC() + if err != nil { + return err + } + + opts := lxc.MigrateOptions{ + Stop: stop, + Directory: stateDir, + Verbose: true, + } + + migrateErr = c.c.Migrate(cmd, opts) + } + + collectErr := collectCRIULogFile(c, stateDir, function, prettyCmd) + if collectErr != nil { + shared.Log.Error("Error collecting checkpoint log file", log.Ctx{"err": collectErr}) + } + + if migrateErr != nil { + log, err2 := getCRIULogErrors(stateDir, prettyCmd) + if err2 == nil { + migrateErr = fmt.Errorf("%s %s failed\n%s", function, prettyCmd, log) + } } - return c.c.Migrate(cmd, opts) + return migrateErr } func (c *containerLXC) TemplateApply(trigger string) error { diff --git a/lxd/migrate.go b/lxd/migrate.go index 627e6d0..5528aa1 100644 --- a/lxd/migrate.go +++ b/lxd/migrate.go @@ -6,18 +6,13 @@ package main import ( - "bufio" "fmt" "io/ioutil" "net/http" "net/url" "os" - "os/exec" - "path" - "path/filepath" "strings" "sync" - "time" "github.com/golang/protobuf/proto" "github.com/gorilla/websocket" @@ -71,15 +66,6 @@ func (c *migrationFields) send(m proto.Message) error { return shared.WriteAll(w, data) } -func findCriu(host string) error { - _, err := exec.LookPath("criu") - if err != nil { - return fmt.Errorf("CRIU is required for live migration but its binary couldn't be found on the %s server. Is it installed in LXD's path?", host) - } - - return nil -} - func (c *migrationFields) recv(m proto.Message) error { mt, r, err := c.controlConn.NextReader() if err != nil { @@ -158,32 +144,6 @@ func (c *migrationFields) controlChannel() <-chan MigrationControl { return ch } -func CollectCRIULogFile(c container, imagesDir string, function string, method string) error { - t := time.Now().Format(time.RFC3339) - newPath := shared.LogPath(c.Name(), fmt.Sprintf("%s_%s_%s.log", function, method, t)) - return shared.FileCopy(filepath.Join(imagesDir, fmt.Sprintf("%s.log", method)), newPath) -} - -func GetCRIULogErrors(imagesDir string, method string) (string, error) { - f, err := os.Open(path.Join(imagesDir, fmt.Sprintf("%s.log", method))) - if err != nil { - return "", err - } - - defer f.Close() - - scanner := bufio.NewScanner(f) - ret := []string{} - for scanner.Scan() { - line := scanner.Text() - if strings.Contains(line, "Error") { - ret = append(ret, scanner.Text()) - } - } - - return strings.Join(ret, "\n"), nil -} - type migrationSourceWs struct { migrationFields @@ -368,24 +328,8 @@ func (s *migrationSourceWs) Do(op *operation) error { } defer os.RemoveAll(checkpointDir) - err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, true) - - if err2 := CollectCRIULogFile(s.container, checkpointDir, "migration", "dump"); err2 != nil { - shared.Debugf("Error collecting checkpoint log file %s", err) - } - + err = s.container.Migrate(lxc.MIGRATE_DUMP, checkpointDir, "migration", true) if err != nil { - driver.Cleanup() - log, err2 := GetCRIULogErrors(checkpointDir, "dump") - - /* couldn't find the CRIU log file which means we - * didn't even get that far; give back the liblxc - * error. */ - if err2 != nil { - log = err.Error() - } - - err = fmt.Errorf("checkpoint failed:\n%s", log) s.sendControl(err) return err } @@ -601,36 +545,12 @@ func (c *migrationSink) do() error { return } - defer func() { - err := CollectCRIULogFile(c.container, imagesDir, "migration", "restore") - /* - * If the checkpoint fails, we won't have any log to collect, - * so don't warn about that. - */ - if err != nil && !os.IsNotExist(err) { - shared.Debugf("Error collectiong migration log file %s", err) - } - - os.RemoveAll(imagesDir) - }() + defer os.RemoveAll(imagesDir) if err := RsyncRecv(shared.AddSlash(imagesDir), c.criuConn); err != nil { restore <- err return } - - /* - * For unprivileged containers we need to shift the - * perms on the images images so that they can be - * opened by the process after it is in its user - * namespace. - */ - if !c.container.IsPrivileged() { - if err := c.container.IdmapSet().ShiftRootfs(imagesDir); err != nil { - restore <- err - return - } - } } err := <-fsTransfer @@ -640,15 +560,8 @@ func (c *migrationSink) do() error { } if c.live { - err := c.container.StartFromMigration(imagesDir) + err = c.container.Migrate(lxc.MIGRATE_RESTORE, imagesDir, "migration", false) if err != nil { - log, err2 := GetCRIULogErrors(imagesDir, "restore") - /* restore failed before CRIU was invoked, give - * back the liblxc error */ - if err2 != nil { - log = err.Error() - } - err = fmt.Errorf("restore failed:\n%s", log) restore <- err return }
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel