The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/7746
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === Up until now there was no way (AFAIK) for the LXD client to indicate to the LXD server that the local network being created/updated was a precursor to joining the node to a cluster. This caused an issue because in LXD 4.4 new bridge networks have a stable volatile bridge MAC address generated on create/update. If a network on the existing cluster was created before the cluster was upgraded to LXD 4.4, then the network does not get a stable volatile MAC until it is updated (by design to avoid network interruption). Unfortunately this meant that when a fresh node was joined to the (now up to date) cluster, part of the process requires that the networks on the cluster be created on the new node before it is joined (and the cluster checks that the network configs match before allowing the join). What was happening is that the new local network request was being processed, and a stable volatile MAC address was being generated. When this was compared to the existing cluster network config (which didn't have the volatile MAC key) the mismatch was detected and the node was refused permission to join the cluster. This PR adds a `ClusterPreJoin` field to `NetworkPut`. This is used to indicate that network create/update request is being done as part of precursor steps to join node to a cluster, which inside the bridge driver is used to avoid generating volatile MAC keys.
From ffaf8b1fdeb8bd124c2dfa189e44d66e687aada8 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:02:39 +0100 Subject: [PATCH 01/14] lxd/instance/drivers/driver/qemu: Fix race in onStop getting operation Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/instance/drivers/driver_qemu.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 5e671ceb52..cc12ed6d14 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -503,7 +503,9 @@ func (vm *qemu) onStop(target string) error { // Record power state. err := vm.state.Cluster.UpdateInstancePowerState(vm.id, "STOPPED") if err != nil { - op.Done(err) + if op != nil { + op.Done(err) + } return err } From 4e465dba77fb6fed8a6146dedd0c170c0e5b96fb Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:17:14 +0100 Subject: [PATCH 02/14] api: Adds network_cluster_prejoin extension Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- doc/api-extensions.md | 6 ++++++ shared/version/api.go | 1 + 2 files changed, 7 insertions(+) diff --git a/doc/api-extensions.md b/doc/api-extensions.md index 1f39de3831..5492f02e90 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -1137,3 +1137,9 @@ specify which parent interface should be used for creating NIC device interfaces Also adds `network` configuration key support for `sriov` NICs to allow them to specify the associated network of the same type that they should use as the basis for the NIC device. + +## network\_cluster\_prejoin + +Introduces the `ClusterPreJoin` field to the `NetworkPut` API struct type. This allows a client to indicate that +this local network create/update request is a precursor to joining the node to a cluster (which can alter the way +that the network is created). diff --git a/shared/version/api.go b/shared/version/api.go index bbb6b9cf3a..6495eb57e3 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -222,6 +222,7 @@ var APIExtensions = []string{ "projects_limits_disk", "network_type_macvlan", "network_type_sriov", + "network_cluster_prejoin", } // APIExtensionsCount returns the number of available API extensions. From 8d2326543620810e5bf8d9ae5c4b177300f92b90 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:03:09 +0100 Subject: [PATCH 03/14] shared/api/network: Adds ClusterPreJoin to NetworkPut Used to indicate that network create/update request is being done as part of precursor steps to join node to a cluster. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- shared/api/network.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shared/api/network.go b/shared/api/network.go index 12b126dc33..203063c342 100644 --- a/shared/api/network.go +++ b/shared/api/network.go @@ -25,6 +25,9 @@ type NetworkPut struct { // API extension: entity_description Description string `json:"description" yaml:"description"` + + // API extension: network_cluster_prejoin + ClusterPreJoin bool `json:"clusterPreJoin" yaml:"clusterPreJoin"` } // NetworkStatusPending network is pending creation on other cluster nodes. From e314b84e62831bc18d795fff73622889c86d7e6f Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:04:50 +0100 Subject: [PATCH 04/14] lxd/init: Adds clusterPreJoin argument to initDataNodeApply This is passed to the Network create/update request as ClusterPreJoin. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/init.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lxd/init.go b/lxd/init.go index ba961a5b85..4bd90afc65 100644 --- a/lxd/init.go +++ b/lxd/init.go @@ -26,7 +26,7 @@ type initDataCluster struct { // It's used both by the 'lxd init' command and by the PUT /1.0/cluster API. // // In case of error, the returned function can be used to revert the changes. -func initDataNodeApply(d lxd.InstanceServer, config initDataNode) (func(), error) { +func initDataNodeApply(d lxd.InstanceServer, config initDataNode, clusterPreJoin bool) (func(), error) { // Handle reverts reverts := []func(){} revert := func() { @@ -78,6 +78,7 @@ func initDataNodeApply(d lxd.InstanceServer, config initDataNode) (func(), error // Network creator createNetwork := func(network api.NetworksPost) error { // Create the network if doesn't exist + network.ClusterPreJoin = clusterPreJoin err := d.CreateNetwork(network) if err != nil { return errors.Wrapf(err, "Failed to create network '%s'", network.Name) @@ -122,6 +123,7 @@ func initDataNodeApply(d lxd.InstanceServer, config initDataNode) (func(), error } // Apply it + newNetwork.ClusterPreJoin = clusterPreJoin err = d.UpdateNetwork(currentNetwork.Name, newNetwork, etag) if err != nil { return errors.Wrapf(err, "Failed to update network '%s'", network.Name) From ee2367d33b367a29ab698e4acd30a8f1a1df9b17 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:05:29 +0100 Subject: [PATCH 05/14] lxd/api/cluster: Set clusterPreJoin to true when calling initDataNodeApply for cluster join Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/api_cluster.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go index 0416cc872f..f184fbffdb 100644 --- a/lxd/api_cluster.go +++ b/lxd/api_cluster.go @@ -792,7 +792,7 @@ func clusterInitMember(d, client lxd.InstanceServer, memberConfig []api.ClusterM data.Networks = append(data.Networks, post) } - revert, err := initDataNodeApply(d, data) + revert, err := initDataNodeApply(d, data, true) if err != nil { revert() return errors.Wrap(err, "Failed to initialize storage pools and networks") From 475af201c22d1f506faa3090aa3194a6bf6d9251 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:06:11 +0100 Subject: [PATCH 06/14] lxd/main/init: Set clusterPreJoin to false in initDataNodeApply when doing single node init Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/main_init.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/main_init.go b/lxd/main_init.go index 9dd9a5f186..72f984595f 100644 --- a/lxd/main_init.go +++ b/lxd/main_init.go @@ -149,7 +149,7 @@ func (c *cmdInit) Run(cmd *cobra.Command, args []string) error { return nil } - revert, err := initDataNodeApply(d, config.Node) + revert, err := initDataNodeApply(d, config.Node, false) if err != nil { revert() return err From d84318a3e9e4c431696529f4a30bb0a575c4ee72 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:06:41 +0100 Subject: [PATCH 07/14] lxd/network/network/interface: Alters fillConfig to take api.NetworkPut This is to allow ClusterPreJoin to be passed through. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/network_interface.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/network/network_interface.go b/lxd/network/network_interface.go index 5d123ff0ac..8c471aa5e1 100644 --- a/lxd/network/network_interface.go +++ b/lxd/network/network_interface.go @@ -13,7 +13,7 @@ import ( type Network interface { // Load. init(state *state.State, id int64, name string, netType string, description string, config map[string]string, status string) - fillConfig(config map[string]string) error + fillConfig(net api.NetworkPut) error // Config. ValidateName(name string) error From 22a7979b5a9a478c47da14c1636cd0291e2d9db3 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:08:45 +0100 Subject: [PATCH 08/14] lxd/network/driver/common: fillConfig signature Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_common.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/network/driver_common.go b/lxd/network/driver_common.go index 9708c78057..0d9450331b 100644 --- a/lxd/network/driver_common.go +++ b/lxd/network/driver_common.go @@ -46,8 +46,8 @@ func (n *common) init(state *state.State, id int64, name string, netType string, n.status = status } -// fillConfig fills requested config with any default values, by default this is a no-op. -func (n *common) fillConfig(config map[string]string) error { +// fillConfig fills supplied network's Config map with any default values. This is a no-op. +func (n *common) fillConfig(net api.NetworkPut) error { return nil } From 763dc4cfb79d6671e85e6d7dec482a18a3bc61fb Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:08:59 +0100 Subject: [PATCH 09/14] lxd/network/driver/common: Update comment clarification Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_common.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lxd/network/driver_common.go b/lxd/network/driver_common.go index 0d9450331b..88cf17fab3 100644 --- a/lxd/network/driver_common.go +++ b/lxd/network/driver_common.go @@ -228,11 +228,10 @@ func (n *common) update(applyNetwork api.NetworkPut, targetNode string, clusterN // the config being supplied and not that in the database). n.init(n.state, n.id, n.name, n.netType, applyNetwork.Description, applyNetwork.Config, n.status) - // If this update isn't coming via a cluster notification itself, then notify all nodes of change and then - // update the database. + // If this update isn't coming via a cluster notification, then update the database. if !clusterNotification { + // If no target specified, notify all other nodes to update their local network before DB update. if targetNode == "" { - // Notify all other nodes to update the network if no target specified. notifier, err := cluster.NewNotifier(n.state, n.state.Endpoints.NetworkCert(), cluster.NotifyAll) if err != nil { return err From e4e22790e9c70ee0ed63398f4b2d83dfed3b502e Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:09:23 +0100 Subject: [PATCH 10/14] lxd/network/driver/common: Adds logging to generic Create Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_common.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lxd/network/driver_common.go b/lxd/network/driver_common.go index 88cf17fab3..0290e960df 100644 --- a/lxd/network/driver_common.go +++ b/lxd/network/driver_common.go @@ -357,6 +357,8 @@ func (n *common) delete(clusterNotification bool) error { // Create is a no-op. func (n *common) Create(clusterNotification bool) error { + n.logger.Debug("Create", log.Ctx{"clusterNotification": clusterNotification, "config": n.config}) + return nil } From 77d8ce443c4bbcaaf5cf37e535fe0f6c2ac3d402 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:09:40 +0100 Subject: [PATCH 11/14] lxd/network/network/load: Passes api.NetworkPut to n.fillConfig in FillConfig Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/network_load.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/network/network_load.go b/lxd/network/network_load.go index d65d7c2842..af56233232 100644 --- a/lxd/network/network_load.go +++ b/lxd/network/network_load.go @@ -77,7 +77,7 @@ func FillConfig(req *api.NetworksPost) error { n := driverFunc() n.init(nil, 0, req.Name, req.Type, req.Description, req.Config, "Unknown") - err := n.fillConfig(req.Config) + err := n.fillConfig(req.NetworkPut) if err != nil { return err } From 30610a66aa8e8cdf15d29c2445863ae271766f23 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:12:50 +0100 Subject: [PATCH 12/14] lxd/network/driver: Update logging tweaks Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_bridge.go | 2 +- lxd/network/driver_macvlan.go | 2 +- lxd/network/driver_sriov.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lxd/network/driver_bridge.go b/lxd/network/driver_bridge.go index cb098e4b88..24e48b6bfd 100644 --- a/lxd/network/driver_bridge.go +++ b/lxd/network/driver_bridge.go @@ -1439,7 +1439,7 @@ func (n *bridge) Stop() error { // Update updates the network. Accepts notification boolean indicating if this update request is coming from a // cluster notification, in which case do not update the database, just apply local changes needed. func (n *bridge) Update(newNetwork api.NetworkPut, targetNode string, clusterNotification bool) error { - n.logger.Debug("Update", log.Ctx{"clusterNotification": clusterNotification, "newNetwork": newNetwork}) + n.logger.Debug("Update", log.Ctx{"clusterNotification": clusterNotification, "config": newNetwork, "targetNode": targetNode}) // Populate default values if they are missing. err := n.fillConfig(newNetwork.Config) diff --git a/lxd/network/driver_macvlan.go b/lxd/network/driver_macvlan.go index 9581b3c255..9600d3ab18 100644 --- a/lxd/network/driver_macvlan.go +++ b/lxd/network/driver_macvlan.go @@ -78,7 +78,7 @@ func (n *macvlan) Stop() error { // Update updates the network. Accepts notification boolean indicating if this update request is coming from a // cluster notification, in which case do not update the database, just apply local changes needed. func (n *macvlan) Update(newNetwork api.NetworkPut, targetNode string, clusterNotification bool) error { - n.logger.Debug("Update", log.Ctx{"clusterNotification": clusterNotification, "newNetwork": newNetwork}) + n.logger.Debug("Update", log.Ctx{"clusterNotification": clusterNotification, "config": newNetwork, "targetNode": targetNode}) dbUpdateNeeeded, _, oldNetwork, err := n.common.configChanged(newNetwork) if err != nil { diff --git a/lxd/network/driver_sriov.go b/lxd/network/driver_sriov.go index f8edc3fd9f..f6ca39f44e 100644 --- a/lxd/network/driver_sriov.go +++ b/lxd/network/driver_sriov.go @@ -78,7 +78,7 @@ func (n *sriov) Stop() error { // Update updates the network. Accepts notification boolean indicating if this update request is coming from a // cluster notification, in which case do not update the database, just apply local changes needed. func (n *sriov) Update(newNetwork api.NetworkPut, targetNode string, clusterNotification bool) error { - n.logger.Debug("Update", log.Ctx{"clusterNotification": clusterNotification, "newNetwork": newNetwork}) + n.logger.Debug("Update", log.Ctx{"clusterNotification": clusterNotification, "config": newNetwork, "targetNode": targetNode}) dbUpdateNeeeded, _, oldNetwork, err := n.common.configChanged(newNetwork) if err != nil { From 4a269f70bf03963d278d360e6399544cab50f0d6 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:13:34 +0100 Subject: [PATCH 13/14] lxd/network/driver/bridge: Renames fillVolatileHwaddr for clarity Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_bridge.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/network/driver_bridge.go b/lxd/network/driver_bridge.go index 24e48b6bfd..0550f8226e 100644 --- a/lxd/network/driver_bridge.go +++ b/lxd/network/driver_bridge.go @@ -46,8 +46,8 @@ type bridge struct { common } -// fillHwaddr populates the volatile.bridge.hwaddr in config if it, nor bridge.hwaddr, are already set. -func (n *bridge) fillHwaddr(config map[string]string) error { +// fillVolatileHwaddr populates the volatile.bridge.hwaddr in config if it, nor bridge.hwaddr, are already set. +func (n *bridge) fillVolatileHwaddr(config map[string]string) error { // Fan bridge doesn't support having the same MAC on all nodes (it breaks host<->fan traffic). // Presumably because the host's MAC address is used for routing across the fan network. if config["bridge.mode"] == "fan" { From 266c8ce312c678d9638197796eb4c0b5c2e57dc8 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 7 Aug 2020 18:14:09 +0100 Subject: [PATCH 14/14] lxd/network/driver/bridge: Adds ClustePreJoin logic to fillConfig Updates to accept api.NetworkPut. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_bridge.go | 42 ++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/lxd/network/driver_bridge.go b/lxd/network/driver_bridge.go index 0550f8226e..fa2155298d 100644 --- a/lxd/network/driver_bridge.go +++ b/lxd/network/driver_bridge.go @@ -87,39 +87,43 @@ func (n *bridge) stableMACSafe() bool { return true } -// fillConfig fills requested config with any default values. -func (n *bridge) fillConfig(config map[string]string) error { +// fillConfig fills supplied network's Config map with any default values. +func (n *bridge) fillConfig(net api.NetworkPut) error { // Set some default values where needed. - if config["bridge.mode"] == "fan" { - if config["fan.underlay_subnet"] == "" { - config["fan.underlay_subnet"] = "auto" + if net.Config["bridge.mode"] == "fan" { + if net.Config["fan.underlay_subnet"] == "" { + net.Config["fan.underlay_subnet"] = "auto" } } else { - if config["ipv4.address"] == "" { - config["ipv4.address"] = "auto" + if net.Config["ipv4.address"] == "" { + net.Config["ipv4.address"] = "auto" } - if config["ipv4.address"] == "auto" && config["ipv4.nat"] == "" { - config["ipv4.nat"] = "true" + if net.Config["ipv4.address"] == "auto" && net.Config["ipv4.nat"] == "" { + net.Config["ipv4.nat"] = "true" } - if config["ipv6.address"] == "" { + if net.Config["ipv6.address"] == "" { content, err := ioutil.ReadFile("/proc/sys/net/ipv6/conf/default/disable_ipv6") if err == nil && string(content) == "0\n" { - config["ipv6.address"] = "auto" + net.Config["ipv6.address"] = "auto" } } - if config["ipv6.address"] == "auto" && config["ipv6.nat"] == "" { - config["ipv6.nat"] = "true" + if net.Config["ipv6.address"] == "auto" && net.Config["ipv6.nat"] == "" { + net.Config["ipv6.nat"] = "true" } } - // If no static hwaddr specified generate a volatile one to store in DB record so that - // there are no races when starting the network at the same time on multiple cluster nodes. - err := n.fillHwaddr(config) - if err != nil { - return err + // Only generate volatile MAC address when not about to join a cluster. + // If we are about to join a cluster, then we will need to use any existing MAC address settings. + if !net.ClusterPreJoin { + // If no static hwaddr specified generate a volatile one to store in DB record so that + // there are no races when starting the network at the same time on multiple cluster nodes. + err := n.fillVolatileHwaddr(net.Config) + if err != nil { + return err + } } return nil @@ -1442,7 +1446,7 @@ func (n *bridge) Update(newNetwork api.NetworkPut, targetNode string, clusterNot n.logger.Debug("Update", log.Ctx{"clusterNotification": clusterNotification, "config": newNetwork, "targetNode": targetNode}) // Populate default values if they are missing. - err := n.fillConfig(newNetwork.Config) + err := n.fillConfig(newNetwork) if err != nil { return err }
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel