On Thu, Sep 20, 2012 at 01:13:42PM -0400, Chris Marshall wrote:
> There are no known bugs with either.
>
> If you have a test case showing a failure,
> please report it.
>
> Note: The comparisons are strictly
> numeric so if you need some sort of
> approximate equality threshold for
> floating point, you would need to do
> the conversion yourself.
In fact, I have written some code implementing some of PDL::Ufunc
sorting/uniq functions with thresholds. Did not spend too much time on
updating the documentation/copyright info, but you can try the attached PP
code.
--
c u
henning
use strict; # be careful
pp_addpm({At=>'Top'},<<'EOD');
=head1 NAME
PDL::Approx - primitive approximate ufunc operations for pdl
=head1 DESCRIPTION
This module provides approximate vector sorting functions. Approximate
means in this case: component-wise comparison respecting a user-supplied
epsilon. This is particularly useful for PDLs of floating point types.
=head1 SYNOPSIS
use PDL::Ufunc;
=cut
use PDL::Slices;
use Carp;
EOD
# check for bad value support
use PDL::Config;
my $bvalflag = $PDL::Config{WITH_BADVAL} || 0;
# should we use the finite() routine in libm ?
# (is the Windows version _finite() ?)
#
pp_addhdr(<<'EOD');
#define IsNaN(x) (x != x)
EOD
# Internal utility sorting routine for approx_qsortvec routines.
#
# note: we export them to the PDL Core structure for use in
# other modules (eg Image2D)
for (keys %PDL::Types::typehash) {
my $ctype = $PDL::Types::typehash{$_}{ctype};
my $ppsym = $PDL::Types::typehash{$_}{ppsym};
pp_addhdr(<<"FOO"
/*******
* approx_qsortvec helper routines
* --HG 07-May-2012
*/
/* per-type epsilons for approximate comparisons
*/
$ctype approx_epsilon_$ctype=0;
/* Compare a vector in lexicographic order, returning the
* equivalent of "<=>".
*/
signed char pdl_approx_cmpvec_$ppsym($ctype *a, $ctype *b, int n) {
int i;
for(i=0; i<n; a++,b++,i++) {
if( (*a-*b) < -approx_epsilon_$ctype ) return -1;
if( (*a-*b) > approx_epsilon_$ctype ) return 1;
}
return 0;
}
void pdl_approx_qsortvec_$ppsym($ctype *xx, int n, int a, int b) {
int i,j, median_ind;
$ctype t;
i = a;
j = b;
median_ind = (i+j)/2;
do {
while( pdl_approx_cmpvec_$ppsym( &(xx[n*i]), &(xx[n*median_ind]), n )
< 0 )
i++;
while( pdl_approx_cmpvec_$ppsym( &(xx[n*j]), &(xx[n*median_ind]), n )
> 0 )
j--;
if(i<=j) {
int k;
$ctype *aa = &xx[n*i];
$ctype *bb = &xx[n*j];
for( k=0; k<n; aa++,bb++,k++ ) {
$ctype z;
z = *aa;
*aa = *bb;
*bb = z;
}
if (median_ind==i)
median_ind=j;
else if (median_ind==j)
median_ind=i;
i++;
j--;
}
} while (i <= j);
if (a < j)
pdl_approx_qsortvec_$ppsym( xx, n, a, j );
if (i < b)
pdl_approx_qsortvec_$ppsym( xx, n, i, b );
}
void pdl_approx_bsortvec_$ppsym($ctype *xx, int n, int l) {
int min_idx,max_idx;
int first_swap,last_swap;
int i,k;
$ctype z;
min_idx = 0;
max_idx = l;
last_swap = 42;
while (last_swap >= 0) {
last_swap = -1;
// printf(" %5d %5d %5d\\n",min_idx,max_idx,n);
// fflush(stdout);
for (i=min_idx;i<max_idx;i++) {
if (pdl_approx_cmpvec_$ppsym( &(xx[n*i]), &(xx[n*(i+1)]), n ) > 0) {
if (last_swap < 0) {
min_idx = (i > 1 ? i-1 : 0);
}
last_swap = i;
$ctype *aa = &xx[n*i];
$ctype *bb = &xx[n*(i+1)];
for( k=0; k<n; aa++,bb++,k++ ) {
z = *aa;
*aa = *bb;
*bb = z;
}
}
}
max_idx=last_swap+1;
}
}
void pdl_approx_isortvec_$ppsym($ctype *xx, $ctype *yy, int n, int l)
{
int offset_in[l+1];
int offset_sorted[l+1];
int i,j,k,o,cmpres;
int mv_start;
for (i=0,o=0;i<=l;i++,o+=n)
offset_in[i]=o;
for (i=0,o=0;i<=l;i++,o+=n)
offset_sorted[i]=-1;
offset_sorted[0]=offset_in[0];
for (i=1;i<=l;i++) {
for (j=i;j>0;j--) {
cmpres=pdl_approx_cmpvec_$ppsym( &(xx[offset_in[i]]),
&(xx[offset_sorted[j-1]]), n );
if (cmpres >= 0)
break;
}
mv_start = (j > 0 ? j : 1);
for (k=i;k>=mv_start;k--) {
offset_sorted[k]=offset_sorted[k-1];
}
offset_sorted[j]=offset_in[i];
}
$ctype *aa = yy;
for (i=0;i<=l;i++) {
$ctype *bb = &xx[offset_sorted[i]];
for (j=0;j<n;j++,aa++,bb++) {
*aa = *bb;
}
}
}
void pdl_approx_markvec_$ppsym($ctype *xx,int *res, int n, int l) {
int i;
res[l]=42;
for (i=0;i<l;i++) {
res[i]=pdl_approx_cmpvec_$ppsym( &(xx[n*i]), &(xx[n*(i+1)]),
n );
}
}
void pdl_approx_qsortvec_ind_$ppsym($ctype *xx, int *ix, int n, int a,
int b) {
int i,j, median_ind;
$ctype t;
i = a;
j = b;
median_ind = (i+j)/2;
do {
while( pdl_approx_cmpvec_$ppsym( &(xx[n*ix[i]]),
&(xx[n*ix[median_ind]]), n ) < 0 )
i++;
while( pdl_approx_cmpvec_$ppsym( &(xx[n*ix[j]]),
&(xx[n*ix[median_ind]]), n ) > 0 )
j--;
if(i<=j) {
int k;
k = ix[i];
ix[i] = ix[j];
ix[j] = k;
if (median_ind==i)
median_ind=j;
else if (median_ind==j)
median_ind=i;
i++;
j--;
}
} while (i <= j);
if (a < j)
pdl_qsortvec_ind_$ppsym( xx, ix, n, a, j );
if (i < b)
pdl_qsortvec_ind_$ppsym( xx, ix, n, i, b );
}
FOO
);
}
# when copying the data over to the temporary array,
# ignore the bad values and then only send the number
# of good elements to the sort routines
#
sub generic_approx_qsortvec {
my $pdl = shift;
my $ndim = shift;
return
'$TBSULQFD(pdl_approx_qsortvec_B,pdl_approx_qsortvec_S,pdl_approx_qsortvec_U,
pdl_approx_qsortvec_L,pdl_approx_qsortvec_Q,pdl_approx_qsortvec_F,pdl_approx_qsortvec_D)
($P(' . $pdl . '), '. $ndim.', 0, nn);';
}
sub generic_approx_bsortvec {
my $pdl = shift;
my $ndim = shift;
return
'$TBSULQFD(pdl_approx_bsortvec_B,pdl_approx_bsortvec_S,pdl_approx_bsortvec_U,
pdl_approx_bsortvec_L,pdl_approx_bsortvec_Q,pdl_approx_bsortvec_F,pdl_approx_bsortvec_D)
($P(' . $pdl . '), '. $ndim.', nn);';
}
sub generic_approx_isortvec {
my $src = shift;
my $dst = shift;
my $ndim = shift;
return
'$TBSULQFD(pdl_approx_isortvec_B,pdl_approx_isortvec_S,pdl_approx_isortvec_U,
pdl_approx_isortvec_L,pdl_approx_isortvec_Q,pdl_approx_isortvec_F,pdl_approx_isortvec_D)
($P(' . $src . '), $P(' . $dst . '), ' . $ndim.', nn);';
}
sub generic_approx_markvec {
my $pdl = shift;
my $res = shift;
my $ndim = shift;
return
'$TBSULQFD(pdl_approx_markvec_B,pdl_approx_markvec_S,pdl_approx_markvec_U,
pdl_approx_markvec_L,pdl_approx_markvec_Q,pdl_approx_markvec_F,pdl_approx_markvec_D)
($P(' . $pdl . '), $P(' . $res . '), '. $ndim.', nn);';
}
sub generic_approx_cmpvec {
my $pdl1 = shift;
my $pdl2 = shift;
return
'$TBSULQFD(pdl_approx_cmpvec_B,pdl_approx_cmpvec_S,pdl_approx_cmpvec_U,
pdl_approx_cmpvec_L,pdl_approx_cmpvec_Q,pdl_approx_cmpvec_F,pdl_approx_cmpvec_D)
($P(' . $pdl1 . '), $P(' . $pdl2 . '), nd);';
}
# move all bad values to the end of the array
#
pp_def(
'approx_qsortvec',
HandleBad => 1,
Pars => 'a(n,m);epsilon(); [o]b(n,m);',
Code =>
'int nn;
int nd;
approx_epsilon_$GENERIC() = $epsilon();
loop(n,m) %{ $b() = $a(); %}
nn = ($COMP(__m_size))-1;
nd = $COMP(__n_size);
' . generic_approx_qsortvec('b','nd'),
Doc => '
=for ref
Sort a list of vectors lexicographically. Comparison operations include an
epsilon.
Vector a is only counted as greater/less than vector b if 2 components differ
by more than epsilon.
The 0th dimension of the source piddle is dimension in the vector;
the 1st dimension is list order. Higher dimensions are threaded over.
=for example
print
pdl([[1.0000000,3],[1.0000001,2],[1.0000002,1]])->approx_qsortvec(0.00001)
[
[ 1.0000002 1]
[ 1.0000001 2]
[ 1 3]
]
=cut
',
BadDoc =>
'
Vectors with bad components should be moved to the end of the array:
',
); # pp_def qsort
pp_def(
'approx_bsortvec',
HandleBad => 1,
Pars => 'a(n,m);epsilon(); [o]b(n,m);',
Code =>
'int nn;
int nd;
approx_epsilon_$GENERIC() = $epsilon();
loop(n,m) %{ $b() = $a(); %}
nn = ($COMP(__m_size))-1;
nd = $COMP(__n_size);
' . generic_approx_bsortvec('b','nd')
);
pp_def(
'approx_isortvec',
HandleBad => 1,
Pars => 'a(n,m);epsilon(); [o]b(n,m);',
Code =>
'int nn;
int nd;
approx_epsilon_$GENERIC() = $epsilon();
nn = ($COMP(__m_size))-1;
nd = $COMP(__n_size);
' . generic_approx_isortvec('a', 'b', 'nd')
);
pp_def(
'approx_markvec',
HandleBad => 1,
Pars => 'a(n,m);epsilon(); int [o]res(m);',
Code =>
'int nn;
int nd;
approx_epsilon_$GENERIC() = $epsilon();
nn = ($COMP(__m_size))-1;
nd = $COMP(__n_size);
' . generic_approx_markvec('a','res','nd')
);
pp_def(
'approx_cmpvec',
HandleBad => 1,
Pars => 'a(n);b(n);epsilon(); int [o]res();',
Code =>
'int nd;
approx_epsilon_$GENERIC() = $epsilon();
nd = $COMP(__n_size);
$res() = ' . generic_approx_cmpvec('a','b','nd')
);
sub generic_approx_qsortvec_ind {
my $pdl = shift;
my $ndim = shift;
return
'$TBSULQFD(pdl_approx_qsortvec_ind_B,pdl_approx_qsortvec_ind_S,pdl_approx_qsortvec_ind_U,
pdl_approx_qsortvec_ind_L,pdl_approx_qsortvec_ind_Q,pdl_approx_qsortvec_ind_F,pdl_approx_qsortvec_ind_D)
($P(' . $pdl . '), $P(indx), '. $ndim.', 0, nn);';
}
pp_def(
'approx_qsortveci',
HandleBad => 1,
Pars => 'a(n,m); epsilon(); int [o]indx(m);',
Code =>
'int nd;
int nn=$COMP(__m_size)-1;
approx_epsilon_$GENERIC() = $epsilon();
loop(m) %{
$indx()=m;
%}
nd = $COMP(__n_size);
' . generic_approx_qsortvec_ind('a','nd'),
Doc => '
=for ref
Sort a list of vectors lexicographically, returning the indices of the
sorted vectors rather than the sorted list itself. Comparison operations
include an epsilon.
Vector a is only counted as greater/less than vector b if 2 components differ
by more than epsilon.
As with C<approx_qsortvec>, the input PDL should be an NxM array containing M
separate N-dimensional vectors. The return value is an integer M-PDL
containing the M-indices of original array rows, in sorted order.
As with C<approx_qsortvec>, the zeroth element of the vectors runs slowest in
the
sorted list.
Additional dimensions are threaded over: each plane is sorted separately,
so approx_qsortveci may be thought of as a collapse operator of sorts (groan).
=cut
',
BadDoc =>
'
Vectors with bad components should be moved to the end of the array:
',
);
pp_add_exported ('', 'approx_uniq');
pp_addpm(<<'EOPM');
*approx_uniq = \&PDL::approx_uniq;
# return unique elements of array
# find as jumps in the sorted array
# flattens in the process
sub PDL::approx_uniq {
use PDL::Core 'barf';
my ($arr,$epsilon) = @_;
return $arr if($arr->nelem == 0); # The null list is unique (CED)
my $srt = $arr->clump(-1)->where($arr==$arr)->qsort(); # no NaNs or BADs
for qsort
my $nans = $arr->clump(-1)->where($arr!=$arr);
my $uniq = ($srt->nelem > 0) ? $srt->where(abs($srt-$srt->rotate(-1)) >
$epsilon) : $srt;
# make sure we return something if there is only one value
my $answ = $nans; # NaN values always uniq
if ( $uniq->nelem > 0 ) {
$answ = $uniq->append($answ);
} else {
$answ = ( ($srt->nelem == 0) ? $srt : pdl( [$srt->index(0)] )
)->append($answ);
}
return $answ;
}
EOPM
pp_add_exported ('', 'approx_uniqind');
pp_addpm(<<'EOPM');
*approx_uniqind = \&PDL::approx_uniqind;
# return unique elements of array
# find as jumps in the sorted array
# flattens in the process
sub PDL::approx_uniqind {
use PDL::Core 'barf';
my ($arr,$epsilon) = @_;
return $arr if($arr->nelem == 0); # The null list is unique (CED)
# Different from uniq we sort and store the result in an intermediary
my $aflat = $arr->flat;
my $nanind = which($aflat!=$aflat); # NaN indexes
my $good = $aflat->sequence->long->where($aflat==$aflat); # good indexes
my $i_srt = $aflat->where($aflat==$aflat)->qsorti; # no BAD or NaN
values for qsorti
my $srt = $aflat->where($aflat==$aflat)->index($i_srt);
my $uniqind;
if ($srt->nelem > 0) {
$uniqind = which(abs($srt-$srt->rotate(-1)) > $epsilon);
$uniqind = $i_srt->slice('0') if $uniqind->isempty;
} else {
$uniqind = which($srt);
}
# Now map back to the original space
my $ansind = $nanind;
if ( $uniqind->nelem > 0 ) {
$ansind = ($good->index($i_srt->index($uniqind)))->append($ansind);
} else {
$ansind = $uniqind->append($ansind);
}
return $ansind;
}
EOPM
pp_add_exported ('', 'approx_uniqvec');
pp_addpm(<<'EOPM');
sub PDL::approx_uniqvec {
my($pdl,$epsilon) = @_;
return $pdl if ( $pdl->nelem == 0 || $pdl->ndims < 2 );
return $pdl if ( $pdl->slice("(0)")->nelem < 2 ); #
slice isn't cheap but uniqvec isn't either
my $pdl2d = null;
$pdl2d = $pdl->mv(0,-1)->clump($pdl->ndims-1)->mv(-1,0); #
clump all but dim(0)
my $ngood = null;
$ngood = $pdl2d->ones->sumover;
$ngood = $pdl2d->ngoodover if ($PDL::Bad::Status && $pdl->badflag); #
number of good values each vector
my $ngood2 = null;
$ngood2 = $ngood->where($ngood); #
number of good values with no all-BADs
$pdl2d = $pdl2d->mv(0,-1)->dice($ngood->which)->mv(-1,0); #
remove all-BAD vectors
my $numnan = null;
$numnan = ($pdl2d!=$pdl2d)->sumover; #
works since no all-BADs to confuse
my $presrt = null;
$presrt = $pdl2d->mv(0,-1)->dice($numnan->not->which)->mv(0,-1); #
remove vectors with any NaN values
my $nanvec = null;
$nanvec = $pdl2d->mv(0,-1)->dice($numnan->which)->mv(0,-1); # the
vectors with any NaN values
# use dice instead of nslice since qsortvec might be packing
# the badvals to the front of the array instead of the end like
# the docs say. If that is the case and it gets fixed, it won't
# bust uniqvec. DAL 14-March 2006
my $srt = null;
$srt = $presrt->approx_qsortvec($epsilon)->mv(0,-1);
# BADs are sorted by qsortvec
my $srtdice = $srt;
my $somebad = null;
if ($PDL::Bad::Status && $srt->badflag) {
$srtdice = $srt->dice($srt->mv(0,-1)->nbadover->not->which);
$somebad = $srt->dice($srt->mv(0,-1)->nbadover->which);
}
my $uniq = null;
if ($srtdice->nelem > 0) {
$uniq = (abs($srtdice-$srtdice->rotate(-1)) >
$epsilon)->mv(0,-1)->orover->which;
} else {
$uniq = $srtdice->orover->which;
}
my $ans = null;
if ( $uniq->nelem > 0 ) {
$ans = $srtdice->dice($uniq);
} else {
$ans = ($srtdice->nelem > 0) ? $srtdice->slice("0,:") : $srtdice;
}
return $ans->append($somebad)->append($nanvec->mv(0,-1))->mv(0,-1);
}
EOPM
pp_addpm({At=>'Bot'},<<'EOD');
=head1 AUTHOR
Copyright (C) Tuomas J. Lukka 1997 ([email protected]).
Contributions by Christian Soeller ([email protected]),
Karl Glazebrook ([email protected]) and
Henning Glawe ([email protected]). All rights
reserved. There is no warranty. You are allowed to redistribute this
software / documentation under certain conditions. For details, see
the file COPYING in the PDL distribution. If this file is separated
from the PDL distribution, the copyright notice should be included in
the file.
=cut
EOD
pp_done();
_______________________________________________
Perldl mailing list
[email protected]
http://mailman.jach.hawaii.edu/mailman/listinfo/perldl