On Mon, Dec 13, 2010 at 3:22 PM, Apollon Oikonomopoulos
<[email protected]> wrote:
> This patch introduces network configuration for KVM in Ganeti.
>
> There are three problems with having KVM perform network configuration via 
> ifup
> scripts:
>  a) Ganeti never gets to know the tap interface that is associated with an
>     instance's NIC
>  b) Migration of routed instances will cause network problems because the
>     incoming KVM side configures the network as soon as it is spawned and not
>     as soon as the migration finishes. This means that all routing
>     configuration will be present in both, primary and secondary, nodes at the
>     same time, possibly causing network disruption during the migration.
>  c) We never get to know if the network configuration succeeded or not.
>
> This patch moves network configuration from KVM to Ganeti, using KVM's ability
> to receive already open tap devices as file descriptors.
>

Wonderful.

> Minor modifications are made to _ExecKVMRuntime to handle tap device
> initialization. NIC <-> tap associations are stored under a new directory,
> _ROOT_DIR/nic in a file-per-nic fashion.
>
> The script generated by _WriteNetScript must be moved to live under
> _KVM_NET_SCRIPT as the default network configuration script.
>

Currently _KVM_NET_SCRIPT is an optional override for the net script.
You make it something the user must provide without documenting the
requirement or giving an example.
Either we ship/install an example, or even better we should provide a
script in /usr/lib/ganeti/ and make it optionally overridable (or make
it possible to optionally override parts of it) from /etc.
Requiring the user to do more work for the "simple" setup is not a
good idea. Also the way to override it should change from
all/or/nothing to hooks and various parts. :)

> Signed-off-by: Apollon Oikonomopoulos <[email protected]>
> ---
>  lib/hypervisor/hv_kvm.py |  219 
> +++++++++++++++++++---------------------------
>  1 files changed, 92 insertions(+), 127 deletions(-)
>
> diff --git a/lib/hypervisor/hv_kvm.py b/lib/hypervisor/hv_kvm.py
> index c5ed994..dd16a73 100644
> --- a/lib/hypervisor/hv_kvm.py
> +++ b/lib/hypervisor/hv_kvm.py
> @@ -33,6 +33,7 @@ import logging
>  import pwd
>  import struct
>  import fcntl
> +import glob
>  from cStringIO import StringIO
>
>  from ganeti import utils
> @@ -126,106 +127,6 @@ def _OpenTap(vnet_hdr=True):
>   return (ifname, tapfd)
>
>
> -def _WriteNetScript(instance, nic, index):
> -  """Write a script to connect a net interface to the proper bridge.
> -
> -  This can be used by any qemu-type hypervisor.
> -
> - �...@type instance: L{objects.Instance}
> - �...@param instance: Instance object
> - �...@type nic: L{objects.NIC}
> - �...@param nic: NIC object
> - �...@type index: int
> - �...@param index: NIC index
> - �...@return: Script
> - �...@rtype: string
> -
> -  """
> -  if instance.tags:
> -    tags = " ".join(instance.tags)
> -  else:
> -    tags = ""
> -
> -  buf = StringIO()
> -  sw = utils.ShellWriter(buf)
> -  sw.Write("#!/bin/sh")
> -  sw.Write("# this is autogenerated by Ganeti, please do not edit")
> -  sw.Write("export PATH=$PATH:/sbin:/usr/sbin")
> -  sw.Write("export INSTANCE=%s", utils.ShellQuote(instance.name))
> -  sw.Write("export MAC=%s", utils.ShellQuote(nic.mac))
> -  sw.Write("export MODE=%s",
> -           utils.ShellQuote(nic.nicparams[constants.NIC_MODE]))
> -  sw.Write("export INTERFACE=\"$1\"")
> -  sw.Write("export TAGS=%s", utils.ShellQuote(tags))
> -
> -  if nic.ip:
> -    sw.Write("export IP=%s", utils.ShellQuote(nic.ip))
> -
> -  if nic.nicparams[constants.NIC_LINK]:
> -    sw.Write("export LINK=%s",
> -             utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
> -
> -  if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
> -    sw.Write("export BRIDGE=%s",
> -             utils.ShellQuote(nic.nicparams[constants.NIC_LINK]))
> -
> -  # TODO: make this configurable at ./configure time
> -  sw.Write("if [ -x %s ]; then", utils.ShellQuote(_KVM_NETWORK_SCRIPT))
> -  sw.IncIndent()
> -  try:
> -    sw.Write("# Execute the user-specific vif file")
> -    sw.Write(_KVM_NETWORK_SCRIPT)
> -  finally:
> -    sw.DecIndent()
> -  sw.Write("else")
> -  sw.IncIndent()
> -  try:
> -    sw.Write("ifconfig $INTERFACE 0.0.0.0 up")
> -
> -    if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
> -      sw.Write("# Connect the interface to the bridge")
> -      sw.Write("brctl addif $BRIDGE $INTERFACE")
> -
> -    elif nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_ROUTED:
> -      if not nic.ip:
> -        raise errors.HypervisorError("nic/%d is routed, but has no IP"
> -                                     " address" % index)
> -
> -      sw.Write("# Route traffic targeted at the IP to the interface")
> -      if nic.nicparams[constants.NIC_LINK]:
> -        sw.Write("while ip rule del dev $INTERFACE; do :; done")
> -        sw.Write("ip rule add dev $INTERFACE table $LINK")
> -        sw.Write("ip route replace $IP table $LINK proto static"
> -                 " dev $INTERFACE")
> -      else:
> -        sw.Write("ip route replace $IP proto static dev $INTERFACE")
> -
> -      interface_v4_conf = "/proc/sys/net/ipv4/conf/$INTERFACE"
> -      sw.Write(" if [ -d %s ]; then", interface_v4_conf)
> -      sw.IncIndent()
> -      try:
> -        sw.Write("echo 1 > %s/proxy_arp", interface_v4_conf)
> -        sw.Write("echo 1 > %s/forwarding", interface_v4_conf)
> -      finally:
> -        sw.DecIndent()
> -      sw.Write("fi")
> -
> -      interface_v6_conf = "/proc/sys/net/ipv6/conf/$INTERFACE"
> -      sw.Write("if [ -d %s ]; then", interface_v6_conf)
> -      sw.IncIndent()
> -      try:
> -        sw.Write("echo 1 > %s/proxy_ndp", interface_v6_conf)
> -        sw.Write("echo 1 > %s/forwarding", interface_v6_conf)
> -      finally:
> -        sw.DecIndent()
> -      sw.Write("fi")
> -  finally:
> -    sw.DecIndent()
> -  sw.Write("fi")
> -
> -  return buf.getvalue()
> -
> -
>  class KVMHypervisor(hv_base.BaseHypervisor):
>   """KVM hypervisor interface"""
>   CAN_MIGRATE = True
> @@ -235,6 +136,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>   _UIDS_DIR = _ROOT_DIR + "/uid" # contains instances reserved uids
>   _CTRL_DIR = _ROOT_DIR + "/ctrl" # contains instances control sockets
>   _CONF_DIR = _ROOT_DIR + "/conf" # contains instances startup data
> +  _NICS_DIR = _ROOT_DIR + "/nic" # contains instances nic <-> tap 
> associations
>   # KVM instances with chroot enabled are started in empty chroot directories.
>   _CHROOT_DIR = _ROOT_DIR + "/chroot" # for empty chroot directories
>   # After an instance is stopped, its chroot directory is removed.
> @@ -243,7 +145,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>   # To support forensics, the non-empty chroot directory is quarantined in
>   # a separate directory, called 'chroot-quarantine'.
>   _CHROOT_QUARANTINE_DIR = _ROOT_DIR + "/chroot-quarantine"
> -  _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR,
> +  _DIRS = [_ROOT_DIR, _PIDS_DIR, _UIDS_DIR, _CTRL_DIR, _CONF_DIR, _NICS_DIR,
>            _CHROOT_DIR, _CHROOT_QUARANTINE_DIR]
>
>   PARAMETERS = {
> @@ -436,6 +338,20 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>     return utils.PathJoin(cls._CHROOT_DIR, instance_name)
>
>   @classmethod
> +  def _InstanceNICFile(cls, instance_name, seq):
> +    """Returns the name of the file containing the tap device for a given NIC
> +
> +    """
> +    return utils.PathJoin(cls._NICS_DIR, "%s-%d" % (instance_name, seq))
> +
> + �...@classmethod
> +  def _InstanceNICFiles(cls, instance_name):
> +    """Returns all existing NIC filenames
> +
> +    """
> +    return glob.glob(utils.PathJoin(cls._NICS_DIR, "%s-[0-9]" % 
> instance_name))
> +

Ouch, these two functions differ by a single "s" at the end? Not good,
we'll end up confusing them.
_AllInstanceNICFiles(...) or _InstanceAllNICFiles(....) vs
_InstanceNICFile(...) would be better.

> + �...@classmethod
>   def _TryReadUidFile(cls, uid_file):
>     """Try to read a uid file
>
> @@ -464,6 +380,8 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>     utils.RemoveFile(uid_file)
>     if uid is not None:
>       uidpool.ReleaseUid(uid)
> +    for nicfile in cls._InstanceNICFiles(instance_name):
> +      utils.RemoveFile(nicfile)

Good, for now. We should probably make sure a shutdown script is
called, before, now that we can.

>     try:
>       chroot_dir = cls._InstanceChrootDir(instance_name)
>       utils.RemoveDir(chroot_dir)
> @@ -484,10 +402,8 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>         raise
>
>   @staticmethod
> -  def _WriteNetScriptFile(instance, seq, nic):
> -    """Write a script to connect a net interface to the proper bridge.
> -
> -    This can be used by any qemu-type hypervisor.
> +  def _ConfigureNIC(instance, seq, nic, tap):
> +    """Run the network configuration script for a specified NIC
>
>     @param instance: instance we're acting on
>     @type instance: instance object
> @@ -495,22 +411,39 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>     @type seq: int
>     @param nic: nic we're acting on
>     @type nic: nic object
> -   �...@return: netscript file name
> -   �...@rtype: string
> +   �...@param tap: the host's tap interface this NIC corresponds to
> +   �...@type tap: str
>
>     """
> -    script = _WriteNetScript(instance, nic, seq)
>
> -    # As much as we'd like to put this in our _ROOT_DIR, that will happen to 
> be
> -    # mounted noexec sometimes, so we'll have to find another place.
> -    (tmpfd, tmpfile_name) = tempfile.mkstemp()
> -    tmpfile = os.fdopen(tmpfd, 'w')
> -    try:
> -      tmpfile.write(script)
> -    finally:
> -      tmpfile.close()
> -    os.chmod(tmpfile_name, 0755)
> -    return tmpfile_name
> +    if instance.tags:
> +      tags = " ".join(instance.tags)
> +    else:
> +      tags = ""
> +
> +    env = {
> +      "PATH": "%s:/sbin:/usr/sbin" % os.environ["PATH"],
> +      "INSTANCE": instance.name,
> +      "MAC": nic.mac,
> +      "MODE": nic.nicparams[constants.NIC_MODE],
> +      "INTERFACE": tap,
> +      "INTERFACE_INDEX": str(seq),
> +      "TAGS": tags,
> +    }
> +
> +    if nic.ip:
> +      env["IP"] = nic.ip
> +
> +    if nic.nicparams[constants.NIC_LINK]:
> +      env["LINK"] = nic.nicparams[constants.NIC_LINK]
> +
> +    if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
> +      env["BRIDGE"] = nic.nicparams[constants.NIC_LINK]
> +
> +    result = utils.RunCmd([_KVM_NETWORK_SCRIPT, tap], env=env)
> +    if result.failed:
> +      logging.warning("Failed to configure interface %s: %s (%s)" %
> +                      (tap, result.fail_reason, result.output))
>

See the comments in the change description.

>   def ListInstances(self):
>     """Get the list of running instances.
> @@ -761,16 +694,22 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>     kvm_nics = [objects.NIC.FromDict(snic) for snic in serialized_nics]
>     return (kvm_cmd, kvm_nics, hvparams)
>
> -  def _RunKVMCmd(self, name, kvm_cmd):
> +  def _RunKVMCmd(self, name, kvm_cmd, tap_fds=None):
>     """Run the KVM cmd and check for errors
>
>     @type name: string
>     @param name: instance name
>     @type kvm_cmd: list of strings
>     @param kvm_cmd: runcmd input for kvm
> +   �...@type tap_fds: list of int
> +   �...@param tap_fds: fds of tap devices opened by Ganeti
>
>     """
> -    result = utils.RunCmd(kvm_cmd)
> +    result = utils.RunCmd(kvm_cmd, noclose_fds=tap_fds)
> +
> +    for fd in tap_fds:
> +      utils._CloseFDNoErr(fd)
> +
>     if result.failed:
>       raise errors.HypervisorError("Failed to start instance %s: %s (%s)" %
>                                    (name, result.fail_reason, result.output))
> @@ -816,15 +755,19 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>     # We have reasons to believe changing something like the nic driver/type
>     # upon migration won't exactly fly with the instance kernel, so for nic
>     # related parameters we'll use up_hvp
> +    tapfds = []
> +    taps = []
>     if not kvm_nics:
>       kvm_cmd.extend(["-net", "none"])
>     else:
> +      vnet_hdr = False
>       tap_extra = ""
>       nic_type = up_hvp[constants.HV_NIC_TYPE]
>       if nic_type == constants.HT_NIC_PARAVIRTUAL:
>         # From version 0.12.0, kvm uses a new sintax for network 
> configuration.
>         if (v_major, v_min) >= (0, 12):
>           nic_model = "virtio-net-pci"
> +          vnet_hdr = True
>         else:
>           nic_model = "virtio"
>
> @@ -839,17 +782,17 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>         nic_model = nic_type
>
>       for nic_seq, nic in enumerate(kvm_nics):
> -        script = self._WriteNetScriptFile(instance, nic_seq, nic)
> -        temp_files.append(script)
> +        tapname, tapfd = _OpenTap(vnet_hdr)
> +        tapfds.append(tapfd)
> +        taps.append(tapname)
>         if (v_major, v_min) >= (0, 12):
>           nic_val = "%s,mac=%s,netdev=netdev%s" % (nic_model, nic.mac, 
> nic_seq)
> -          tap_val = "type=tap,id=netdev%s,script=%s%s" % (nic_seq,
> -                                                          script, tap_extra)
> +          tap_val = "type=tap,id=netdev%s,fd=%d%s" % (nic_seq, tapfd, 
> tap_extra)
>           kvm_cmd.extend(["-netdev", tap_val, "-device", nic_val])
>         else:
>           nic_val = "nic,vlan=%s,macaddr=%s,model=%s" % (nic_seq,
>                                                          nic.mac, nic_model)
> -          tap_val = "tap,vlan=%s,script=%s" % (nic_seq, script)
> +          tap_val = "tap,vlan=%s,fd=%d" % (nic_seq, tapfd)
>           kvm_cmd.extend(["-net", tap_val, "-net", nic_val])
>
>     if incoming:
> @@ -880,7 +823,7 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>       try:
>         username = pwd.getpwuid(uid.GetUid()).pw_name
>         kvm_cmd.extend(["-runas", username])
> -        self._RunKVMCmd(name, kvm_cmd)
> +        self._RunKVMCmd(name, kvm_cmd, tapfds)
>       except:
>         uidpool.ReleaseUid(uid)
>         raise
> @@ -888,7 +831,17 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>         uid.Unlock()
>         utils.WriteFile(self._InstanceUidFile(name), data=str(uid))
>     else:
> -      self._RunKVMCmd(name, kvm_cmd)
> +      self._RunKVMCmd(name, kvm_cmd, tapfds)
> +
> +    for nic_seq, tap in enumerate(taps):
> +      utils.WriteFile(self._InstanceNICFile(instance.name, nic_seq),
> +                      data=tap)
> +
> +    if not incoming:
> +      # Configure the network now for starting instances, during
> +      # FinalizeMigration for incoming instances
> +      for nic_seq, nic in enumerate(kvm_nics):
> +        self._ConfigureNIC(instance, nic_seq, nic, taps[nic_seq])
>
>     if vnc_pwd:
>       change_cmd = 'change vnc password %s' % vnc_pwd
> @@ -1025,6 +978,18 @@ class KVMHypervisor(hv_base.BaseHypervisor):
>
>     """
>     if success:
> +      kvm_runtime = self._LoadKVMRuntime(instance, serialized_runtime=info)
> +      kvm_nics = kvm_runtime[1]
> +
> +      for nic_seq, nic in enumerate(kvm_nics):
> +        try:
> +          tap = utils.ReadFile(self._InstanceNICFile(instance.name, nic_seq))
> +        except:
> +          logging.warning("Failed to find host interface for %s NIC #%d" %
> +                          (instance.name, nic_seq))
> +          continue
> +        self._ConfigureNIC(instance, nic_seq, nic, tap)
> +
>       self._WriteKVMRuntime(instance.name, info)
>     else:
>       self.StopInstance(instance, force=True)

In general a very welcome change, once we sort out the issues above! :)

Thanks a lot for improving this!

Guido

Reply via email to