#!/opt/csw/bin/perl
#
# Calculates percentiles using two different methods 

use strict;
use warnings;
use Perl6::Say;
use List::Util qw[max];
use File::Basename;
use Getopt::Long;
use RRDs;
use Carp;
use POSIX;

my %opt = (
    start      => -86400,
    end        => 'now',
    ds         => 'ds0',
    dsi        => 0,
    cf         => 'MAX',
    precision  => 2,
    percentile => 95,
    verbose    => 0,
); 

GetOptions(\%opt, qw[start=s end=s ds=s dsi=i cf=s precision=f percentile=f verbose help]) 
    or die "Can't parse command line options";

say basename($0) . " using RRDs-$RRDs::VERSION" if $opt{verbose};

for my $rrdfile (@ARGV) {
    say "$rrdfile:";

    my $f = ptile_fetch($rrdfile, $opt{start}, $opt{end});
    my $g = ptile_graph($rrdfile, $opt{start}, $opt{end});

    say "   manual  calculation using fetch() = $f";
    say "   PERCENT calculation using graph() = $g";
    say "   difference ~ " . int($f - $g) . " (" . diff_pct($f, $g) . "%)\n";
}


# ========
# = Subs =
# ========

# Usage: /path/to/file.rrd, start_time (in rrd format), end_time (same), percentile
# percentile may be either 0.0 - 1.0 or 0 - 100
sub ptile_fetch {
    my ($rrdfile, $start_time, $end_time, $percentile) = @_;
    $percentile ||= .95;
    $percentile  /= 100 if $percentile > 1;  # Easier with a float
    
    my ($start,$step,$names,$data) = RRDs::fetch(
        $rrdfile,
        "-s" => $start_time,
        "-e" => $end_time,
        $opt{cf},
    );
    warn RRDs::error() if RRDs::error();

    my @sorted = 
        sort { $a <=> $b } 
        grep { defined && !/^-?NaN$/ }    # ignore undefs/NaN's
        map  { $_->[$opt{dsi}] }
        @$data;

    return '-NaN' unless @sorted;

    my $i = sprintf "%.0f", scalar @sorted * $percentile;  # round off to get index
#	my $i = int(scalar @sorted * $percentile);  # truncate to get index

    if ($opt{verbose}) {
        my $subname = (caller(0))[3];

		# convert times to m/d/y if they were supplied in epoch seconds.
		$start_time = $start_time =~ /^\d+$/ ? strftime("%D", gmtime($start_time)) : $start_time;
		$end_time   = $end_time =~ /^\d+$/   ? strftime("%D", gmtime($end_time))   : $end_time;

		my $rows = scalar @sorted;
		
        say "	$subname()";
        say "	   start = $start_time";
        say "	   end = $end_time";
        say "	   rows = $rows";
        say "	   $opt{percentile}\% of $rows = " . $rows * $opt{percentile} / 100;
        say "	   $opt{percentile}th \%-ile row index = $i";
        say "	   discarded rows of -NaN's = " . (@$data - @sorted);
		say sprintf "	row %d = %.$opt{precision}lf", $i-1, $sorted[$i-1];
		say sprintf "	row %d = %.$opt{precision}lf", $i,   $sorted[$i];
		say sprintf "	row %d = %.$opt{precision}lf", $i+1, $sorted[$i+1];	
    }

    return sprintf "%.$opt{precision}lf", $sorted[$i];   # our percentile value
}

# Usage: /path/to/file.rrd, start_time (in rrd format), end_time (same), percentile
# percentile may be either 0.0-1.0 or 0-100
sub ptile_graph {
    my ($rrdfile, $start_time, $end_time, $percentile) = @_;

    $percentile ||= 95;
    $percentile  *= 100 if $percentile < 1;  # ptile_graph() accepts a float. graph() prefers an int.

    my ($ptile) = RRDs::graph(
        "/dev/null",    # We don't need to actually make a graph
        "--start" => $start_time,
        "--end"   => $end_time,
        "DEF:$opt{ds}=$rrdfile:$opt{ds}:$opt{cf}",
        "VDEF:ptile=$opt{ds},$percentile,PERCENT",
        "PRINT:ptile:%.$opt{precision}lf",
    );
    warn RRDs::error() if RRDs::error();
            
    return $ptile->[0];
}

sub diff_pct {
    my ($a, $b) = @_;
    
    my ($big, $small) = $a > $b
        ? ($a, $b)
        : ($b, $a);
    
    return 100 if not defined $big or $big == 0;
    return 100 - int($small * 100 / $big);
}

sub usage {
    print <<EOB;
$0 [args] file.rrd [file.rrd ...]

    --start=      |   -s     # default: -86400
    --end=        |   -e     # default: now
    --cf=         |   -c     # default: MAX
    --precision=  | --pr     # default: 7
    --percentile= | --pe     # default: 95

    --ds=                    # default: ds0     for graph() only. name of the datasource
    --dsi=                   # default: 0       for fetch() only. column index of above datasource

    --verbose     |   -v
    --help        |   -h

This program calculates a percentile manually using RRDs::fetch and automatically 
using RRDs::graph's PERCENT function and displays any differences between the two
values.

--ds and --dsi must correspond to the same datasource. graph() understands data-
source names, so --ds is called with a name (e.g. ds0, ds1, ingress, etc). fetch() 
returns an array of values that correspond to all of the datasources in the RRD.
Like any other array, they're indexed from 0, so if you have an RRD with datasources
ds0 and ds1, you'd call this program with either --ds=ds0 --dsi=0 or --ds=ds1 --dsi=1.

--cf is typically called with either MAX or AVERAGE

--precision specifies the number of digits after the decimal point.

--percentile varies between 0 and 100.

EOB
    
}
