Hi

we had the need to manage guests bandwidth. Therefore I wrote a qemu
hook script that achieves this and, if there is any interest in that,
would like to contribute it to the project.

This script will only work on linux hosts. It effectively limits the
bandwidth a guest can _send_. It only somewhat effectively limits the
bandwidth a guest can _receive_. While well behaving tcp connections
slow down when packets are being delayed/dropped, with udp, we are
facing a different situation. During my tests, udp was always way slower
(factor 10) than actually configured (using iperf -u). Since only tcp
matters to us in this situation, we consider it "good enough".

Also: This mechanism does by no means protect you from ddos or the like.

So what does the script do?

The script uses iptables to mark packets from- and to a virtual machine.
Depending on these marks, packets will be filtered into tc classes which
limit bandwidth to a configurable amount. The configuration allows for
groups of vms which share an amount of bandwidth while each machine has
its guaranteed bandwidth in high network usage situations.

Documentation is part of the script. If you like, feel free to include it.

If you have any comments or questions, talk! :)

Regards
Dominik
#!/bin/bash
# /etc/libvirt/hooks/qemu

# qemu hook script for libvirtd
# manages bandwidth limits for virtual machines
# see http://libvirt.org/hooks.html
#
# based on work by horms: http://events.linuxfoundation.org/linuxcon2010/horman

configfile=/etc/libvirt/bandwidth.conf

# this configfile is supposed to hold these variables:
# ceil<dev>="max bandwidth for dev"
# example:
# ceil_eth1="1000mbit"
# refer to tc(8) for UNITS

# <vmname><physdev>="<id> [<rate>] [<ceil>]"
# "id" is an arbitrary id used as iptables fw mark and classid for tc, has to 
be unique, don't use 333. the first digit of this id determines the group into 
which a vm is put. read more below.
# "rate" is guaranteed bandwidth, optional, if not specified, default is used 
(see below)
# "ceil" is maximum bandwidth when other classes in this hierarchy don't use it 
all, optional, if not specified, default is used (see below). each vm in the 
same group should have the same ceil. otherwise, the highest value is used.
# example:
# vm1_eth1="10 100mbit 1000mbit"

########################
# example configuration:
########################
#
# machine setup:
# eth0 - br0 - vnet[012]
#
# vms:
# cliff
# jason
# rob
#
# cliff and rob have to share 30mbit, 15 is guaranteed to each
# jason, since he's the coolest, can have 30 on its own
#
# configfile to achieve this:
#
# ceil_eth0="1000mbit"
# cliff_eth0="10 15mbit 30mbit"
# rob_eth0="11 15mbit 30mbit"
# jason_eth0="20 30mbit 30mbit"
#
# this means since cliff and robs "id" both start with 1, they join group 1
# jasons id starts with 2, so he is in group 2
#
# this will form the following tc hierarchy for eth0 and the corresponding 
virtual device of the vm:
#
#                /--- 1:10 rate 15 ceil 30
#               /
#     /--- 1:1111 rate 30 ceil 30
#    /          \
#   /            \--- 1:11 rate 15 ceil 30
#   |
#   |            /--- 1:20 rate 30 ceil 30
#   |           /
# root--- 1:2222 rate 30 ceil 30
#   |
#   \
#    \
#     \--- 1:333 rate 1000 ceil 1000
#
### end example configuration

defaultrate=100mbit
defaultceil=1000mbit
defaultglobalceil=1000mbit

# no changes past this line

if [ -r $configfile ]; then
        source $configfile
else
        echo "configfile $configfile not found. exiting."
        exit 1
fi

limits_start() {
        virtdev=$(xpath $domaincfg "//interface[$i]/target/@dev" 
2>/dev/null|cut -d \" -f 2)
        if [ -n "$virtdev" ]; then
                sourcedev=$(xpath $domaincfg 
"//interface[$...@type='bridge']/source/@bridge" 2>/dev/null|cut -d \" -f 2)
                if [ -n "$sourcedev" ]; then
                        # assuming the first device added to the bridge is the 
physical device
                        physdev=$(brctl show|grep ^$sourcedev|egrep -o 
"[a-z]*[0-9]*$")
                
                        id=$(eval "echo \$${domainname}_${physdev}"|cut -d " " 
-f 1)
                        groupid=$(eval "echo \$${domainname}_${physdev}"|cut -b 
1)
                        rate=$(eval "echo \$${domainname}_${physdev}"|cut -d " 
" -f 2)
                        ceil=$(eval "echo \$${domainname}_${physdev}"|cut -d " 
" -f 3)
                        gobalceil=$(eval "echo \$ceil_$physdev")
                        if [ -z "$id" -o "$id" -lte 0 ]; then
                                echo "no id configured for domain $domainname. 
exiting qemu hook" >&2
                                exit 1
                        fi
                        [ -z "$rate" ] && rate=$defaultrate
                        [ -z "$ceil" ] && ceil=$defaultceil
                        [ -z "$globalceil" ] && globalceil=$defaultglobalceil

                        # mark packets with iptables
                        iptables -t mangle -A FORWARD -m physdev --physdev-in 
$virtdev --physdev-out $physdev -j MARK --set-mark $id
                        iptables -t mangle -A FORWARD -m physdev --physdev-in 
$physdev --physdev-out $virtdev -j MARK --set-mark $id

                        # root qdiscs on virt and phys device
                        # these commands are most likely going to fail since 
the root qdisc will probably exist
                        tc qdisc add dev $physdev root handle 1: htb default 333
                        tc qdisc add dev $virtdev root handle 1: htb default 333

                        # default classes for unmarked traffic
                        # these are also likely to fail
                        tc class add dev $physdev parent 1: classid 1:333 htb 
rate $globalceil ceil $globalceil
                        tc class add dev $virtdev parent 1: classid 1:333 htb 
rate $globalceil ceil $globalceil

                        ##### create first hierarchical level
                        ##### we rely on having configfile like
                        ##### <domain>_<device>=["']<id> <rate> <ceil>["']
                        ##### the first digit of the id will serve as the 
"group" id
                                for i in $(grep _$physdev $configfile | grep -v 
^#| cut -d '=' -f 2|cut -b 2|sort|uniq); do
                                        grouplimit=$(grep _$interface 
$configfile | grep -v ^#| cut -d '=' -f 2|egrep '[[:space:]]'|egrep "^.$i"|cut 
-d ' ' -f 3|sort -n|tail -n 1|sed 's/.$//')
                                        tc class add dev $physdev parent 1: 
classid 1:$i$i$i$i htb rate $grouplimit ceil $grouplimit
                                        tc class add dev $virtdev parent 1: 
classid 1:$i$i$i$i htb rate $grouplimit ceil $grouplimit
                                done

                        # leaf classes with rate and ceil as configured
                        tc class add dev $physdev parent 
1:$groupid$groupid$groupid$groupid classid 1:$id htb rate $rate ceil $ceil
                        tc class add dev $virtdev parent 
1:$groupid$groupid$groupid$groupid classid 1:$id htb rate $rate ceil $ceil

                        # filter on fw-mark to classify into classid as defined 
above
                        tc filter add dev $physdev prio $id protocol ip parent 
1: handle $id fw flowid 1:$id
                        tc filter add dev $virtdev prio $id protocol ip parent 
1: handle $id fw flowid 1:$id
                fi
        fi
}

limits_stop() {
        virtdev=$(xpath $domaincfg "//interface[$i]/target/@dev" 
2>/dev/null|cut -d \" -f 2)
        if [ -n "$virtdev" ]; then
                sourcedev=$(xpath $domaincfg 
"//interface[$...@type='bridge']/source/@bridge" 2>/dev/null|cut -d \" -f 2)
                if [ -n "$sourcedev" ]; then
                        # assuming the first device added to the bridge is the 
physical device
                        physdev=$(brctl show|grep ^$sourcedev|egrep -o 
"[a-z]*[0-9]*$")
                        groupid=$(eval "echo \$${domainname}_${physdev}"|cut -b 
1)
                
                        id=$(eval "echo \$${domainname}_${physdev}"|cut -d " " 
-f 1)
                        iptables -t mangle -D FORWARD -m physdev --physdev-in 
$virtdev --physdev-out $physdev -j MARK --set-mark $id
                        iptables -t mangle -D FORWARD -m physdev --physdev-in 
$physdev --physdev-out $virtdev -j MARK --set-mark $id
                        # removing from physdev is sufficient since virtdev 
does not exist any more when this is called
                        tc filter del dev $physdev protocol ip pref $id fw
                        tc class del dev $physdev parent 
1:$groupid$groupid$groupid$groupid classid 1:$id
                fi
        fi
}

domainname=$1
domaintask=$2
domaincfg=/tmp/libvirt.qemuhook.intermediate.$domainname.$$
# the entire xml config is on stdin
cat - > $domaincfg
numifaces=$(egrep -o "<interface" $domaincfg | wc -l)

case "$domaintask" in 
# hook is called with <domainname> start begin -
start)
        for i in $(seq $numifaces); do 
                limits_start $i
        done
;;
# hook is called with <domainname> stopped end -
stopped)
        for i in $(seq $numifaces); do 
                limits_stop $i
        done
;;
*)
        echo "qemu hook called with unexpected options $*" >&2
;;
esac
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Reply via email to