On Thu, 24 Apr 2008 13:31:47 +0100, Tim Bunce <[EMAIL PROTECTED]> wrote:
Hi Tim,
> So I'd be happy to see an API like this:
>
> SV *_concat_hash_sorted( HV *hv, char *kv_sep, char *pair_sep, SV
> *value_format, SV *sort_type)
>
Attached is the latest draft of _concat_hash_sorted(). I cleaned up the
types, fixed a few bugs, and formalized the test suite.
Also, while I was testing, I found what looks to be a memory leak in
neat_svpv:
if (SvIOK(sv))
- nsv = newSVpvf("%"IVdf, SvIVX(sv));
- else nsv = newSVpvf("%"NVgf, SvNVX(sv));
+ nsv = sv_2mortal(newSVpvf("%"IVdf, SvIVX(sv)));
+ else nsv = sv_2mortal(newSVpvf("%"NVgf, SvNVX(sv)));
If there is anything you would like changed, let me know. I was not sure
about using strcat/strncat, so if you'd like, I will change those.
Oh, And is there a way to attach a string to an SV w/o copying it?
-r
--- DBI-1.604/DBI.xs 2008-03-24 09:44:38.000000000 -0400
+++ DBI-1.604-concat_hash/DBI.xs 2008-04-28 14:57:57.000000000 -0400
@@ -209,6 +209,95 @@
return buf;
}
+static int
+_cmp_number (val1, val2)
+ const void *val1;
+ const void *val2;
+{
+ dTHX;
+ double first, second;
+ char **endptr = 0;
+ int old_err;
+
+ old_err = errno; /* needed ? */
+ errno = 0;
+ first = strtod(*(char **)val1, endptr);
+ if (0 != errno) {
+ croak(strerror(errno));
+ }
+ errno = 0;
+ second = strtod(*(char **)val2, endptr);
+ if (0 != errno) {
+ croak(strerror(errno));
+ }
+ errno = old_err;
+
+ if (first == second)
+ return 0;
+ else if (first > second)
+ return 1;
+ else
+ return -1;
+}
+
+static int _cmp_str (val1, val2)
+ const void *val1;
+ const void *val2;
+{
+ dTHX;
+ return strcmp( *(char **)val1, *(char **)val2);
+}
+
+char **
+_sort_hash_keys (hash, sort_order, total_length)
+ HV *hash;
+ char sort_order;
+ STRLEN *total_length;
+{
+ dTHX;
+ I32 hv_len, key_len;
+ SV *look_num;
+ HE *entry;
+ char **keys;
+ void *sort;
+ unsigned int idx = 0;
+ STRLEN tot_len = 0;
+
+ hv_len = hv_iterinit(hash);
+ if (!hv_len)
+ return 0;
+
+ keys = malloc(sizeof(char *)*hv_len);
+ if (!keys)
+ croak("Unable to allocate memory");
+
+ while ((entry = hv_iternext(hash))) {
+ *(keys+(idx++)) = hv_iterkey(entry, &key_len);
+ tot_len += key_len;
+ }
+ if (0 != total_length)
+ *total_length = tot_len;
+
+ /* replace with function table */
+ if (sort_order < 0) {
+ look_num = sv_2mortal(newSVpv(keys[0],0));
+ if (looks_like_number(look_num))
+ sort = _cmp_number;
+ else
+ sort = _cmp_str;
+ } else if (0 == sort_order) {
+ sort = _cmp_str;
+ } else if (1 == sort_order) {
+ sort = _cmp_number;
+ } else {
+ croak("Unknown sort order %i", sort_order);
+ }
+ qsort(keys, hv_len, sizeof(char*), sort);
+ return keys;
+}
+
+
+
/* handy for embedding into condition expression for debugging */
/*
static int warn1(char *s) { warn(s); return 1; }
@@ -374,8 +463,8 @@
}
/* we don't use SvPV here since we don't want to alter sv in _any_ way */
if (SvIOK(sv))
- nsv = newSVpvf("%"IVdf, SvIVX(sv));
- else nsv = newSVpvf("%"NVgf, SvNVX(sv));
+ nsv = sv_2mortal(newSVpvf("%"IVdf, SvIVX(sv)));
+ else nsv = sv_2mortal(newSVpvf("%"NVgf, SvNVX(sv)));
if (infosv)
sv_catsv(nsv, infosv);
return SvPVX(nsv);
@@ -4236,6 +4325,102 @@
RETVAL
+SV *
+_concat_hash_sorted (hash, kv_separator, pair_separator, value_format,sort_type)
+ HV *hash
+ SV *kv_separator
+ SV *pair_separator
+ SV *value_format
+ SV *sort_type
+
+ PREINIT:
+ I32 hv_len;
+ STRLEN kv_sep_len, pair_sep_len, hv_val_len, pos=0, total_len = 0;
+ char **keys;
+ char *joined, *kv_sep, *pair_sep, *hv_val;
+ unsigned int i = 0;
+ char sort;
+ SV **hash_svp;
+ SV *return_sv;
+ bool not_neat;
+ CODE:
+
+ kv_sep = SvPV(kv_separator, kv_sep_len);
+ pair_sep = SvPV(pair_separator, pair_sep_len);
+
+ if (SvGMAGICAL(value_format))
+ mg_get(value_format);
+ not_neat = SvTRUE(value_format);
+
+ sort = -1;
+ if (SvOK(sort_type)) {
+ sort = SvIV(sort_type);
+ }
+
+
+ keys = _sort_hash_keys(hash, sort, &total_len);
+ if (!keys) {
+ ST(0) = Nullsv;
+ return;
+ }
+ hv_len = hv_iterinit(hash);
+ /* total_len += Separators + quotes + term null */
+ total_len += kv_sep_len*hv_len + pair_sep_len*hv_len+2*hv_len+1;
+ joined = malloc(total_len*sizeof(char));
+
+ for (i=0; i<hv_len; ++i) {
+ if (i > 0) {
+ strcpy(joined+pos, pair_sep);
+ pos += pair_sep_len;
+ }
+ strcpy(joined+pos, keys[i]);
+ pos += strlen(keys[i]);
+
+ hash_svp = hv_fetch(hash, keys[i], strlen(keys[i]), 0);
+ if (hash_svp) {
+ strcpy(joined+pos, kv_sep);
+ pos += kv_sep_len;
+ if (not_neat) {
+ if (!SvOK(*hash_svp)) {
+ strcpy(joined+(pos), "''");
+ pos += 2;
+ continue;
+ }
+ hv_val = SvPV(*hash_svp, hv_val_len);
+ total_len += hv_val_len;
+ if (!(joined=realloc(joined, total_len)))
+ croak("Unable to allocate memory");
+ strcpy(joined+(pos++), "'");
+ strcpy(joined+pos, hv_val);
+ pos += hv_val_len;
+ strcpy(joined+(pos++), "'");
+ } else {
+ hv_val = neatsvpv(*hash_svp, 0);
+ hv_val_len = strlen(hv_val);
+ total_len += hv_val_len;
+ if (!(joined=realloc(joined, total_len)))
+ croak("Unable to allocate memory");
+ strcpy(joined+pos, hv_val);
+ pos += hv_val_len;
+ }
+ }
+ }
+
+ free(keys);
+ /* assert(pos+1 < total_len); */
+
+ return_sv = newSVpvn(joined, pos);
+ free(joined);
+ RETVAL=return_sv;
+
+ OUTPUT:
+ RETVAL
+
+
+
+
+
+
MODULE = DBI PACKAGE = DBI::var
void
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl CatHash.t'
#########################
# change 'tests => 1' to 'tests => last_test_to_print';
use Test::More tests => 36;
BEGIN { use_ok('DBI') };
# null and undefs -- segfaults?;
is (DBI::_concat_hash_sorted({ }, "=", ":", 0, undef), undef);
eval {DBI::_concat_hash_sorted(undef, "=", ":", 0, undef), undef};
like ($@, qr/hash is not a hash reference/); #XXX check this
is (DBI::_concat_hash_sorted({ }, undef, ":", 0, undef), undef);
is (DBI::_concat_hash_sorted({ }, "=", undef, 0, undef), undef);
is (DBI::_concat_hash_sorted({ }, "=", ":", undef, undef), undef);
# Simple segfault tests?
ok(DBI::_concat_hash_sorted({bob=>'two', fred=>'one' }, "="x12000, ":", 1, undef));
ok(DBI::_concat_hash_sorted({bob=>'two', fred=>'one' }, "=", ":"x12000, 1, undef));
ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "="x12000, ":", 1, undef));
ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "=", ":"x12000, 1, undef), 'test');
ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "="x12000, ":"x12000, 1, undef), 'test');
my $simple_hash = {
bob=>"there",
jack=>12,
fred=>"there",
norman=>"there",
sam =>undef
};
my $simple_numeric = {
1=>"there",
2=>"there",
3=>"there",
32=>"there",
16 => 'yo',
07 => "buddy",
49 => undef,
};
my $simple_mixed = {
bob=>"there",
jack=>12,
fred=>"there",
norman=>"there",
sam =>undef,
1=>"there",
2=>"there",
3=>"there",
32=>"there",
16 => 'yo',
07 => "buddy",
49 => undef,
};
my $simple_float = {
1.12 =>"there",
3.1415926 =>"there",
2.718281828 =>"there",
32=>"there",
1.6 => 'yo',
0.78 => "buddy",
49 => undef,
};
eval {
DBI::_concat_hash_sorted($simple_hash, "=",,":",1,12);
};
like ($@, qr/Unknown sort order/, "Unknown sort order");
## Loopify and Add Neat
my %neats = ("Neat"=>0, "Not Neat"=> 0);
my %sort_types = (guess=>undef, numeric => 1, lexical=> 0);
my %hashes = (
Numeric=>$simple_numeric,
"Simple Hash" => $simple_hash,
"Mixed Hash" => $simple_mixed,
"Float Hash" => $simple_float
);
for $sort_type (keys %sort_types){
for $neat (keys %neats) {
for $hash(keys %hashes) {
test_concat_hash($hash, $neat, $sort_type);
}
}
}
sub test_concat_hash {
my ($hash, $neat, $sort_type) = @_;
is (
DBI::_concat_hash_sorted(
$hashes{$hash}, "=", ":",$neats{$neat}, $sort_types{$sort_type}
),
_concat_hash_sorted(
$hashes{$hash} , "=", ":",$neats{$neat}, $sort_types{$sort_type}
),
"$hash - $neat $sort_type"
);
}
if (0) {
eval {
use Benchmark qw(:all);
cmpthese(200_000, {
Perl => sub {_concat_hash_sorted($simple_hash, "=", ":",0,undef); },
C=> sub {DBI::_concat_hash_sorted($simple_hash, "=", ":",0,undef);}
});
print "\n";
cmpthese(200_000, {
NotNeat => sub {DBI::_concat_hash_sorted(
$simple_hash, "=", ":",1,undef);
},
Neat => sub {DBI::_concat_hash_sorted(
$simple_hash, "=", ":",0,undef);
}
});
};
}
#CatHash::_concat_hash_values({ }, ":-",,"::",1,1);
sub _concat_hash_sorted {
my ( $hash_ref, $kv_separator, $pair_separator, $value_format, $sort_type ) = @_;
# $value_format: false=use neat(), true=dumb quotes
# $sort_type: 0=lexical, 1=numeric, undef=try to guess
$keys = _get_sorted_hash_keys($hash_ref, $sort_type);
my $string = '';
for my $key (@$keys) {
$string .= $pair_separator if length $string > 0;
my $value = $hash_ref->{$key};
if ($value_format) {
$value = (defined $value) ? "'$value'" : 'undef';
}
else {
$value = DBI::neat($value,0);
}
$string .= $key . $kv_separator . $value;
}
return $string;
}
use Scalar::Util qw(looks_like_number);
sub _get_sorted_hash_keys {
my ($hash_ref, $sort_type) = @_;
if (not defined $sort_type) {
my $first_key = (each %$hash_ref)[0];
$sort_type = looks_like_number($first_key);
}
my @keys = keys %$hash_ref;
no warnings 'numeric';
my @keys = ($sort_type)
? sort {$a <=> $b} @keys
: sort @keys;
return [EMAIL PROTECTED];
}
#########################
# Insert your test code below, the Test::More module is use()ed here so read
# its man page ( perldoc Test::More ) for help writing this test script.