Here is the code that calculate the "average repurchase time" as David
suggested. It runs approximately twice as fast as the previous script
I posted, and it is much more elegant.
#!/usr/bin/perl
use PDL;
use PDL::NiceSlice;
$PDL::IO::Misc::colsep = ",";
use constant DAYSEC => 24 * 60 * 60;
print "Reading from CSV file...\n";
($ts, $pid, $sub, $time) = rcols ("pay.csv", { perlcols => [4],
DEFTYPE => long });
%count = ();
print "Counting subscriptions...\n";
for ($i=0; $i<$sub->nelem(); $i++) { $count{$sub->at($i)}++; }
$global = $inc = 0;
print "Starting repurchase analysis...\n";
%avg_lapses = ();
foreach (keys %count)
{
next if $count{$_} < 2;
($purchase_times, $purchase_durations) = where ($ts, $time, $sub ==
$_);
$start_times = $purchase_times(0:-2);
$start_durations = $purchase_durations(0:-2) * DAYSEC;
$next_times = $purchase_times(1:-1);
$lapses = $next_times - ($start_times + $start_durations);
$avg_lapses{$_} = $lapses->avg;
$global += $avg_lapses{$_};
$inc++;
}
print "\nGlobal average repurchase time: ";
printf "%d days, %d hours, %d minutes and %d seconds\n",(gmtime
($global/$inc))[7,2,1,0];
print "($inc subscribers analyzed)\n";
_______________________________________________
Perldl mailing list
[email protected]
http://mailman.jach.hawaii.edu/mailman/listinfo/perldl