Christoph Otto wrote:
Nicholas Clark wrote:
On Sun, Nov 16, 2008 at 06:52:08PM -0800, chromatic wrote:
I'd rather remove the hash seed from the key calculation. Instead,
let's use a global seed (#defined somewhere) as the initial seed,
cache the calculated
You don't want a constant global seed, else you fall foul of Algorithmic
Complexity attacks: http://www.cs.rice.edu/~scrosby/hash/
Any mathematician is welcome to prove that this makes things worse,
however.
Scott A Crosby and Dan S Wallach appear to be computer scientists.
Will they
do? :-)
Nicholas Clark
What about a per-interp seed, set to some random value during Parrot's
initialization? This would keep any sharp edges away from users and
avoid those charming algorithmic complexity attacks (assuming
Parrot_int_rand can find sufficient randomness).
This patch starts to implement a per-interp hash seed by storing the seed as
part of the parrot_interp_t struct and initializing it in string_init(). It
also changes string_hash() to use the per-interp seed instead of an argument.
key_hash_STRING checks PObj_is_shared_FLAG and always recalculates the hash
if the flag is true, which should take care of STRINGs shared between
interpreters.
For now, the patch uses a fixed seed of 3793 because Parrot with a randomized
seed exhibits some new test failures. If the code is changed to use a
randomized seed, 11 new test failures appear, mostly in t/pmc/threads.t. It
appears that PObj_is_shared_FLAG is either the wrong flag to use, or some code
somewhere that marks STRINGs shared isn't doing its job.
With the fixed seed there is one new test failure (#5 of t/pmc/orderedhash.t),
which I'll be working on.
I'd appreciate comments or a quick code review as to whether I should apply
the patch as-is (sans randomization) once the failing OrderedHash test passes.
It's admittedly not a complete solution, but it does hide Parrot's hash seed
from any PARROT_EXPORT functions and makes testing and fixing the other bugs
easier.
Index: src/hash.c
===================================================================
--- src/hash.c (revision 33133)
+++ src/hash.c (working copy)
@@ -44,12 +44,14 @@
PARROT_WARN_UNUSED_RESULT
PARROT_MALLOC
static Hash * create_hash(
+ PARROT_INTERP,
PARROT_DATA_TYPE val_type,
Hash_key_type hkey_type,
ARGIN(hash_comp_fn compare),
ARGIN(hash_hash_key_fn keyhash))
- __attribute__nonnull__(3)
- __attribute__nonnull__(4);
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(4)
+ __attribute__nonnull__(5);
PARROT_WARN_UNUSED_RESULT
PARROT_PURE_FUNCTION
@@ -146,11 +148,11 @@
static size_t
key_hash_STRING(PARROT_INTERP, ARGMOD(STRING *s), size_t seed)
{
- if (s->hashval) {
- return s->hashval;
+ if (PObj_is_shared_TEST(s) || s->hashval == 0) {
+ return string_hash(interp, s);
}
- return string_hash(interp, s, seed);
+ return s->hashval;
}
/*
@@ -730,9 +732,10 @@
PARROT_EXPORT
void
-parrot_new_hash(SHIM_INTERP, ARGOUT(Hash **hptr))
+parrot_new_hash(PARROT_INTERP, ARGOUT(Hash **hptr))
{
- parrot_new_hash_x(hptr,
+ parrot_new_hash_x(interp,
+ hptr,
enum_type_PMC,
Hash_key_type_STRING,
STRING_compare, /* STRING compare */
@@ -751,9 +754,10 @@
PARROT_EXPORT
void
-parrot_new_pmc_hash(SHIM_INTERP, ARGOUT(PMC *container))
+parrot_new_pmc_hash(PARROT_INTERP, ARGOUT(PMC *container))
{
- parrot_new_pmc_hash_x(container,
+ parrot_new_pmc_hash_x(interp,
+ container,
enum_type_PMC,
Hash_key_type_STRING,
STRING_compare, /* STRING compare */
@@ -772,9 +776,10 @@
PARROT_EXPORT
void
-parrot_new_cstring_hash(SHIM_INTERP, ARGOUT(Hash **hptr))
+parrot_new_cstring_hash(PARROT_INTERP, ARGOUT(Hash **hptr))
{
- parrot_new_hash_x(hptr,
+ parrot_new_hash_x(interp,
+ hptr,
enum_type_PMC,
Hash_key_type_cstring,
(hash_comp_fn)cstring_compare, /* cstring compare */
@@ -799,7 +804,7 @@
PARROT_WARN_UNUSED_RESULT
PARROT_MALLOC
static Hash *
-create_hash(PARROT_DATA_TYPE val_type, Hash_key_type hkey_type,
+create_hash(PARROT_INTERP, PARROT_DATA_TYPE val_type, Hash_key_type hkey_type,
ARGIN(hash_comp_fn compare), ARGIN(hash_hash_key_fn keyhash))
{
size_t i;
@@ -812,8 +817,7 @@
hash->entry_type = val_type;
hash->key_type = hkey_type;
- /* RT #59472 - randomize */
- hash->seed = 3793;
+ hash->seed = interp->hash_seed;
PARROT_ASSERT(INITIAL_BUCKETS % 4 == 0);
@@ -919,13 +923,14 @@
*/
void
-parrot_new_hash_x(ARGOUT(Hash **hptr),
+parrot_new_hash_x(PARROT_INTERP,
+ ARGOUT(Hash **hptr),
PARROT_DATA_TYPE val_type,
Hash_key_type hkey_type,
NOTNULL(hash_comp_fn compare),
NOTNULL(hash_hash_key_fn keyhash))
{
- *hptr = create_hash(val_type, hkey_type, compare, keyhash);
+ *hptr = create_hash(interp, val_type, hkey_type, compare, keyhash);
}
/*
@@ -941,13 +946,14 @@
*/
void
-parrot_new_pmc_hash_x(ARGMOD(PMC *container),
+parrot_new_pmc_hash_x(PARROT_INTERP,
+ ARGMOD(PMC *container),
PARROT_DATA_TYPE val_type,
Hash_key_type hkey_type,
NOTNULL(hash_comp_fn compare),
NOTNULL(hash_hash_key_fn keyhash))
{
- Hash * const hash = create_hash(val_type, hkey_type, compare, keyhash);
+ Hash * const hash = create_hash(interp, val_type, hkey_type, compare, keyhash);
PMC_struct_val(container) = hash;
hash->container = container;
}
@@ -964,9 +970,9 @@
PARROT_EXPORT
void
-parrot_new_pointer_hash(SHIM_INTERP, ARGOUT(Hash **hptr))
+parrot_new_pointer_hash(PARROT_INTERP, ARGOUT(Hash **hptr))
{
- parrot_new_hash_x(hptr, enum_type_ptr, Hash_key_type_ptr, pointer_compare, key_hash_pointer);
+ parrot_new_hash_x(interp, hptr, enum_type_ptr, Hash_key_type_ptr, pointer_compare, key_hash_pointer);
}
/*
@@ -991,7 +997,8 @@
? constant_pmc_new_noinit(interp, enum_class_Hash)
: pmc_new_noinit(interp, enum_class_Hash);
- parrot_new_pmc_hash_x(h, enum_type_INTVAL, Hash_key_type_int, int_compare, key_hash_int);
+ parrot_new_pmc_hash_x(interp, h, enum_type_INTVAL, Hash_key_type_int,
+ int_compare, key_hash_int);
PObj_active_destroy_SET(h);
return h;
}
Index: src/string.c
===================================================================
--- src/string.c (revision 33133)
+++ src/string.c (working copy)
@@ -262,6 +262,13 @@
const size_t n_parrot_cstrings =
sizeof (parrot_cstrings) / sizeof (parrot_cstrings[0]);
+ /* TODO: hash_seed should be randomized on a per-interp basis. Before this
+ * can happen, shared STRINGs need to always be marked as such.
+ * See RT #59810 and #59472
+ * interp->hash_seed = Parrot_uint_rand(0);
+ */
+ interp->hash_seed = 3793;
+
/* Set up the cstring cache, then load the basic encodings and charsets */
if (!interp->parent_interpreter) {
parrot_new_cstring_hash(interp, &const_cstring_hash);
@@ -2257,11 +2264,11 @@
PARROT_EXPORT
PARROT_WARN_UNUSED_RESULT
size_t
-string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s), size_t seed)
+string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s))
{
register size_t h;
+ UINTVAL seed = interp->hash_seed;
- /* TODO: #59810 (using seed != 3793 breaks things) */
if (!s)
return seed;
Index: src/pmc/addrregistry.pmc
===================================================================
--- src/pmc/addrregistry.pmc (revision 33133)
+++ src/pmc/addrregistry.pmc (working copy)
@@ -55,7 +55,7 @@
PMC_data(SELF) = addr_registry;
PObj_custom_mark_destroy_SETALL(SELF);
- parrot_new_pmc_hash_x(SELF, enum_type_int, Hash_key_type_PMC,
+ parrot_new_pmc_hash_x(INTERP, SELF, enum_type_int, Hash_key_type_PMC,
int_compare, key_hash_int);
registry = (Hash *)PMC_struct_val(SELF);
Index: src/pmc/lexinfo.pmc
===================================================================
--- src/pmc/lexinfo.pmc (revision 33133)
+++ src/pmc/lexinfo.pmc (working copy)
@@ -65,7 +65,7 @@
PARROT_ASSERT(PObj_constant_TEST(SELF));
PMC_pmc_val(SELF) = sub;
- parrot_new_pmc_hash_x(SELF,
+ parrot_new_pmc_hash_x(INTERP, SELF,
(PARROT_DATA_TYPE)enum_hash_int,
Hash_key_type_STRING,
(hash_comp_fn)string_equal, /* STRING compare */
Index: include/parrot/hash.h
===================================================================
--- include/parrot/hash.h (revision 33133)
+++ include/parrot/hash.h (working copy)
@@ -215,25 +215,29 @@
FUNC_MODIFIES(*hash);
void parrot_new_hash_x(
+ PARROT_INTERP,
ARGOUT(Hash **hptr),
PARROT_DATA_TYPE val_type,
Hash_key_type hkey_type,
NOTNULL(hash_comp_fn compare),
NOTNULL(hash_hash_key_fn keyhash))
__attribute__nonnull__(1)
- __attribute__nonnull__(4)
+ __attribute__nonnull__(2)
__attribute__nonnull__(5)
+ __attribute__nonnull__(6)
FUNC_MODIFIES(*hptr);
void parrot_new_pmc_hash_x(
+ PARROT_INTERP,
ARGMOD(PMC *container),
PARROT_DATA_TYPE val_type,
Hash_key_type hkey_type,
NOTNULL(hash_comp_fn compare),
NOTNULL(hash_hash_key_fn keyhash))
__attribute__nonnull__(1)
- __attribute__nonnull__(4)
+ __attribute__nonnull__(2)
__attribute__nonnull__(5)
+ __attribute__nonnull__(6)
FUNC_MODIFIES(*container);
/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
Index: include/parrot/string_funcs.h
===================================================================
--- include/parrot/string_funcs.h (revision 33133)
+++ include/parrot/string_funcs.h (working copy)
@@ -291,7 +291,7 @@
PARROT_EXPORT
PARROT_WARN_UNUSED_RESULT
-size_t string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s), size_t seed)
+size_t string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s))
__attribute__nonnull__(1);
PARROT_EXPORT
Index: include/parrot/interpreter.h
===================================================================
--- include/parrot/interpreter.h (revision 33133)
+++ include/parrot/interpreter.h (working copy)
@@ -376,6 +376,8 @@
/* per interpreter global vars */
INTVAL world_inited; /* world_init_once() is done */
+ UINTVAL hash_seed; /* STRING hash seed */
+
PMC *iglobals; /* SArray of PMCs, containing: */
/* 0: PMC *Parrot_base_classname_hash; hash containing name->base_type */
/* 1: PMC *Parrot_compreg_hash; hash containing assembler/compilers */
Index: languages/lua/src/pmc/luatable.pmc
===================================================================
--- languages/lua/src/pmc/luatable.pmc (revision 33133)
+++ languages/lua/src/pmc/luatable.pmc (working copy)
@@ -101,7 +101,7 @@
h = PMC_int_val(key);
}
else if (PMC_type(key) == dynpmc_LuaString) {
- h = string_hash(interp, VTABLE_get_string(interp, key), 3793);
+ h = string_hash(interp, VTABLE_get_string(interp, key));
}
else {
h = (unsigned long)PMC_struct_val(key);
@@ -117,7 +117,7 @@
/* specialized version for strings */
static PMC** lua_getstr(PARROT_INTERP, const LuaHash *t, STRING *key)
{
- unsigned long h = string_hash(interp, key, 3793);
+ unsigned long h = string_hash(interp, key);
Node *n = &t->node[h&(t->size-1)];
do {
Index: languages/pipp/src/pipp_hash.c
===================================================================
--- languages/pipp/src/pipp_hash.c (revision 33133)
+++ languages/pipp/src/pipp_hash.c (working copy)
@@ -374,7 +374,7 @@
INTVAL key_hash, bucket_idx;
PippBucket *bucket;
- key_hash = string_hash(interp, key, PIPP_HASH_SEED);
+ key_hash = string_hash(interp, key);
bucket = ht->buckets[key_hash & ht->hashMask];
dprintf("pipp_hash_get_bucket called with key '%Ss', has hash 0x%X\n",
key, key_hash);
@@ -427,7 +427,7 @@
PippBucket *first_bucket, *curr_bucket;
INTVAL key_hash, bucket_idx;
- key_hash = string_hash(interp, key, PIPP_HASH_SEED);
+ key_hash = string_hash(interp, key);
bucket_idx = key_hash & ht->hashMask;
curr_bucket = ht->buckets[bucket_idx];
first_bucket = curr_bucket;
@@ -563,7 +563,7 @@
}
s_key = string_from_int(interp, ht->nextIndex);
- key_hash = string_hash(interp, s_key, PIPP_HASH_SEED);
+ key_hash = string_hash(interp, s_key);
bkt = mem_allocate_zeroed_typed(PippBucket);
bkt->key = s_key;
@@ -640,7 +640,7 @@
}
s_key = string_from_int(interp, 0);
- key_hash = string_hash(interp, s_key, PIPP_HASH_SEED);
+ key_hash = string_hash(interp, s_key);
bkt = mem_allocate_zeroed_typed(PippBucket);
bkt->key = s_key;