On Mon, 26 Aug 2013 14:24:12 -0400, Andres Chavez
<fluxboxtrem...@gmail.com> wrote:
> Hi, can anyone tell me the best or at least the most used real time
> bandwith monitoring tool, when using the PF+ALTQ solution please?
> 
> thanks in advance.

We use Graphite for the display of data received by statsd, we then run
the following script using cron every 60 seconds to transmit the PF queues
to statsd (I'm not a great coder by any stretch of the imagination so
please feel free to clean/optimise/improve it (pls share and let me know if
you do ;)

NB; test_master is a check to exit the script if it is not the CARP
master, as this script also runs on all our remote office firewalls which
have IPSec VPNs to head office (where the graphite/statsd server is
located). Currently if a CARP backup firewall tries to send data back, it
will try and send the data itself (and not via the CARP master) even
thought the IPSec tunnel is only valid on the CARP master, this causes the
firewalls back at head office to immediately distrust the IPSec VPN and so
break it completely due to receiving the IPSec payload from the backup. I
have mentioned this before on here and think I need to build a gif tunnel
or do something else (someone gave a good suggestion for this but I haven't
had time to fix it).
NB; phys_iface is generated by a 'puppet facter' in our case, this can of
course be replaced with a manual string.
NB; The drop_rates_file_base file is monitored by Nagios to provide us an
Alarm if a particular queue is saturating and dropping packets heavily.
NB; Replace <%=@hostname-%> with the servers hostname or let puppet do it


#!/usr/local/bin/python

# Script to extract queue stats from the ALTQ PF Queues, and transmit to
statsd for graphite graphing, and local logging for Nagios alerting
# Written and maintained by Andrew Lemin

import re
import subprocess
import socket
import logging

from subprocess import Popen, PIPE
from time import sleep, time
logging.basicConfig(filename='/var/log/pf_queue_monitor.log',
level=logging.DEBUG, format='%(asctime)s %(levelname)s %(name)s
%(message)s')
logger=logging.getLogger(__name__)

# Check is 'master'
import sys
def test_master():
        p1 = subprocess.Popen(str("/sbin/ifconfig").split(),
stdout=subprocess.PIPE)
        p1.wait()
        p2 = subprocess.Popen(["grep","status: master"], stdin=p1.stdout,
stdout=subprocess.PIPE)
        p2.wait()
        p3 = subprocess.Popen(str("wc -l").split(), stdin=p2.stdout,
stdout=subprocess.PIPE)
        p1.stdout.close()
        p2.stdout.close()
        master = int(p3.communicate()[0])
        if master == 0: sys.exit()

test_master()

## all_iface set by puppet from 'facter interfaces'
#all_iface = "<%=@interfaces%>".split(',')
all_iface =
"lo0,em0,em1,em2,em3,em4,em5,enc0,pflog0,pflog1,pflog2,pflow0,pfsync0,carp0,carp1,carp2,carp3,carp4".split(',')
regex = re.compile('lo|pf|carp|enc')
phys_iface = [x for x in all_iface if not regex.match(x)]

drop_rates_file_base="/var/spool/pf_queue_drop_rate"
run_time=55
graphite="<IP ADDRESS of STATSD>"

def netcat(hostname, port, content):
        s = socket.socket(family=socket.AF_INET,type=socket.SOCK_DGRAM)
        s.settimeout(3)
        try:
                s.sendto(content,(hostname,port))
        except socket.error as err:
                logger.error(err)

        #print(content)
        s.close()
        s = None

def get_queue_stats(iface):
        try:
                p1 = subprocess.Popen(str("/sbin/pfctl -s queue
-v").split(), stdout=subprocess.PIPE)
                p1.wait()
                p2 = subprocess.Popen(str("grep -A1 %s" %
(iface)).split(), stdin=p1.stdout, stdout=subprocess.PIPE)
                p2.wait()
                p3 = subprocess.Popen(str("sed -e s/^q/Zq/;").split(),
stdin=p2.stdout, stdout=subprocess.PIPE)
                p3.wait()
                p4 = subprocess.Popen(["tr","\n"," "], stdin=p3.stdout,
stdout=subprocess.PIPE)
                p4.wait()
                p5 = subprocess.Popen(["tr","Z","\n"], stdin=p4.stdout,
stdout=subprocess.PIPE)
                p5.wait()
                p6 = subprocess.Popen(["sed","-e","s/  */ /g"],
stdin=p5.stdout, stdout=subprocess.PIPE)
                p6.wait()
                p7 = subprocess.Popen(str("grep _wan_").split(),
stdin=p6.stdout, stdout=subprocess.PIPE)
                p7.wait()
                p8 = subprocess.Popen(str("grep -v root_").split(),
stdin=p7.stdout, stdout=subprocess.PIPE)
                p8.wait()
                p9 = subprocess.Popen(str("grep -v {.*}").split(),
stdin=p8.stdout, stdout=subprocess.PIPE)
                p1.stdout.close()
                p2.stdout.close()
                p3.stdout.close()
                p4.stdout.close()
                p5.stdout.close()
                p6.stdout.close()
                p7.stdout.close()
                p8.stdout.close()
                output = p9.communicate()[0]
        except Exception as err:
                logger.error(err)

        stats={}
        for entry in output[1:].splitlines():
                split_entry = str(entry).split(" ")
                queue_name = split_entry[1]
                pkts = int(split_entry[-10])
                kb = 8*int(split_entry[-8])/1024
                drop_pkts = int(split_entry[-5])
                drop_kb = 8*int(split_entry[-3])/1024

                alloc_kb_str = split_entry[5]
                if alloc_kb_str[-2:] == "Gb":
                        alloc_kb = int(float( alloc_kb_str[:-2] )) * 1024
* 1024
                elif alloc_kb_str[-2:] == "Mb":
                        alloc_kb = int(float( alloc_kb_str[:-2] )) * 1024
                elif alloc_kb_str[-2:] == "Kb":
                        alloc_kb = int(float( alloc_kb_str[:-2] ))
                else:
                        alloc_kb = 0

                stats[queue_name] = [pkts,kb,drop_pkts,drop_kb,alloc_kb]

        try:
                p1 = subprocess.Popen(str("netstat -b -n -I %s" %
(iface)).split(), stdout=subprocess.PIPE)
                p1.wait()
                p2 = subprocess.Popen(str("grep %s" % (iface)).split(),
stdin=p1.stdout, stdout=subprocess.PIPE)
                p2.wait()
                p3 = subprocess.Popen(str("tail -1").split(),
stdin=p2.stdout, stdout=subprocess.PIPE)
                p3.wait()
                p4 = subprocess.Popen(["sed","-e","s/  */ /g"],
stdin=p3.stdout, stdout=subprocess.PIPE)
                p4.wait()
                p5 = subprocess.Popen(["tr","\n"," "], stdin=p4.stdout,
stdout=subprocess.PIPE)
                p1.stdout.close()
                p2.stdout.close()
                p3.stdout.close()
                p4.stdout.close()
                totalin = p5.communicate()[0]
        except Exception as err:
                logger.error(err)

        splitin = str(totalin).split(" ")
        kbtotalin = 8*int(splitin[4])/1024

        try:
                p1 = subprocess.Popen(str("netstat -b -n -I %s" %
(iface)).split(), stdout=subprocess.PIPE)
                p1.wait()
                p2 = subprocess.Popen(str("grep %s" % (iface)).split(),
stdin=p1.stdout, stdout=subprocess.PIPE)
                p2.wait()
                p3 = subprocess.Popen(str("tail -1").split(),
stdin=p2.stdout, stdout=subprocess.PIPE)
                p3.wait()
                p4 = subprocess.Popen(["sed","-e","s/  */ /g"],
stdin=p3.stdout, stdout=subprocess.PIPE)
                p4.wait()
                p5 = subprocess.Popen(["tr","\n"," "], stdin=p4.stdout,
stdout=subprocess.PIPE)
                p1.stdout.close()
                p2.stdout.close()
                p3.stdout.close()
                p4.stdout.close()
                totalout = p5.communicate()[0]
        except Exception as err:
                logger.error(err)

        splitout = str(totalout).split(" ")
        kbtotalout = 8*int(splitout[5])/1024

        return (stats,kbtotalin,kbtotalout)

def get_all_stats():
        all_stats = {}
        all_kbtotalin = {}
        all_kbtotalout = {}
        for iface in phys_iface:
               
all_stats[iface],all_kbtotalin[iface],all_kbtotalout[iface] =
get_queue_stats(iface)

        return (all_stats,all_kbtotalin,all_kbtotalout)

def
transmit_stats(base_stats,current_stats,start_kbtotalin,current_kbtotalin,start_kbtotalout,current_kbtotalout,interval=1):
        for iface in list(current_stats.keys()):
                prev_totin = start_kbtotalin[iface]
                curr_totin = current_kbtotalin[iface]
                totin_rate = (curr_totin - prev_totin) / interval
                if totin_rate > 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.ingress.kbps:%d|g" %
(iface,totin_rate)), 'utf-8'))
                prev_totout = start_kbtotalout[iface]
                curr_totout = current_kbtotalout[iface]
                totout_rate = (curr_totout - prev_totout) / interval
                if totout_rate > 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.egress.kbps:%d|g" %
(iface,totout_rate)), 'utf-8'))
                prev_array = base_stats[iface]
                curr_array = current_stats[iface]
                for queue_name in list(curr_array.keys()):
                        pkt_rate = (curr_array[queue_name][0] -
prev_array[queue_name][0]) / interval
                        kb_rate = (curr_array[queue_name][1] -
prev_array[queue_name][1]) / interval
                        pkt_drop_rate = (curr_array[queue_name][2] -
prev_array[queue_name][2]) / interval
                        kb_drop_rate = (curr_array[queue_name][3] -
prev_array[queue_name][3]) / interval
                        kb_alloc = curr_array[queue_name][4]
                        if pkt_rate > 0: pkt_drop_ratio = (pkt_drop_rate /
pkt_rate) * 100
                        else: pkt_drop_ratio = 0

                        if pkt_rate >= 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.%s.pps:%d|g" %
(iface,queue_name,pkt_rate)), 'utf-8'))
                        if kb_rate >= 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.%s.kbps:%d|g" %
(iface,queue_name,kb_rate)), 'utf-8'))
                        if pkt_drop_rate >= 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.%s.drop_pps:%d|g" %
(iface,queue_name,pkt_drop_rate)), 'utf-8'))
                        if kb_drop_rate >= 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.%s.drop_kbps:%d|g" %
(iface,queue_name,kb_drop_rate)), 'utf-8'))
                        if pkt_drop_ratio >= 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.%s.drop_ratio:%d|g" %
(iface,queue_name,pkt_drop_ratio)), 'utf-8'))
                        if kb_alloc >= 0: netcat(graphite, 8125,
bytes(("traffic.<%=@hostname-%>.%s.%s.queue_size:%d|g" %
(iface,queue_name,kb_alloc)), 'utf-8'))
                        if pkt_drop_ratio >= 0:
                                try:
                                        fh = open("%s_%s.data" %
(drop_rates_file_base,iface),'w')
                                        fh.write("%s %d\n" %
(queue_name,pkt_drop_ratio))
                                except Exception as err:
                                        logger.error(err)

                                fh.close()

# TODO: Verify drop_rates_file_base file age in nagios nrpe script

start_time = int(time())
start_stats,start_kbtotalin,start_kbtotalout = get_all_stats()
base_stats = start_stats
count = 0
while count < run_time:
        sleep(1)
        # TODO: Analyze jitter values
        count = count+1

end_time = int(time())
current_stats,current_kbtotalin,current_kbtotalout = get_all_stats()
transmit_stats(start_stats,current_stats,start_kbtotalin,current_kbtotalin,start_kbtotalout,current_kbtotalout,(end_time-start_time))


Hope this does what you're looking for,
Andrew Lemin

Reply via email to