Here is the code that calculate the "average repurchase time" as David
suggested. It runs approximately twice as fast as the previous script
I posted, and it is much more elegant.

#!/usr/bin/perl
use PDL;
use PDL::NiceSlice;
$PDL::IO::Misc::colsep = ",";
use constant DAYSEC => 24 * 60 * 60;

print "Reading from CSV file...\n";
($ts, $pid, $sub, $time) = rcols ("pay.csv", { perlcols => [4],
DEFTYPE => long });

%count = ();
print "Counting subscriptions...\n";
for ($i=0; $i<$sub->nelem(); $i++) { $count{$sub->at($i)}++; }

$global = $inc = 0;
print "Starting repurchase analysis...\n";
%avg_lapses = ();
foreach (keys %count)
{
        next if $count{$_} < 2;
        ($purchase_times, $purchase_durations) = where ($ts, $time, $sub  == 
$_);

        $start_times = $purchase_times(0:-2);
        $start_durations = $purchase_durations(0:-2) * DAYSEC;
        $next_times = $purchase_times(1:-1);

        $lapses = $next_times - ($start_times + $start_durations);
        $avg_lapses{$_} = $lapses->avg;
        $global += $avg_lapses{$_};
        $inc++;
}
        
print "\nGlobal average repurchase time: ";
printf "%d days, %d hours, %d minutes and %d seconds\n",(gmtime
($global/$inc))[7,2,1,0];
print "($inc subscribers analyzed)\n";

_______________________________________________
Perldl mailing list
[email protected]
http://mailman.jach.hawaii.edu/mailman/listinfo/perldl

Reply via email to