Christoph Otto wrote:
Nicholas Clark wrote:
On Sun, Nov 16, 2008 at 06:52:08PM -0800, chromatic wrote:

I'd rather remove the hash seed from the key calculation. Instead, let's use a global seed (#defined somewhere) as the initial seed, cache the calculated

You don't want a constant global seed, else you fall foul of Algorithmic
Complexity attacks:  http://www.cs.rice.edu/~scrosby/hash/

Any mathematician is welcome to prove that this makes things worse, however.

Scott A Crosby and Dan S Wallach appear to be computer scientists. Will they
do? :-)

Nicholas Clark


What about a per-interp seed, set to some random value during Parrot's initialization? This would keep any sharp edges away from users and avoid those charming algorithmic complexity attacks (assuming Parrot_int_rand can find sufficient randomness).


This patch starts to implement a per-interp hash seed by storing the seed as part of the parrot_interp_t struct and initializing it in string_init(). It also changes string_hash() to use the per-interp seed instead of an argument. key_hash_STRING checks PObj_is_shared_FLAG and always recalculates the hash if the flag is true, which should take care of STRINGs shared between interpreters.

For now, the patch uses a fixed seed of 3793 because Parrot with a randomized seed exhibits some new test failures. If the code is changed to use a randomized seed, 11 new test failures appear, mostly in t/pmc/threads.t. It appears that PObj_is_shared_FLAG is either the wrong flag to use, or some code somewhere that marks STRINGs shared isn't doing its job.

With the fixed seed there is one new test failure (#5 of t/pmc/orderedhash.t), which I'll be working on.

I'd appreciate comments or a quick code review as to whether I should apply the patch as-is (sans randomization) once the failing OrderedHash test passes. It's admittedly not a complete solution, but it does hide Parrot's hash seed from any PARROT_EXPORT functions and makes testing and fixing the other bugs easier.
Index: src/hash.c
===================================================================
--- src/hash.c	(revision 33133)
+++ src/hash.c	(working copy)
@@ -44,12 +44,14 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_MALLOC
 static Hash * create_hash(
+    PARROT_INTERP,
     PARROT_DATA_TYPE val_type,
     Hash_key_type hkey_type,
     ARGIN(hash_comp_fn compare),
     ARGIN(hash_hash_key_fn keyhash))
-        __attribute__nonnull__(3)
-        __attribute__nonnull__(4);
+        __attribute__nonnull__(1)
+        __attribute__nonnull__(4)
+        __attribute__nonnull__(5);
 
 PARROT_WARN_UNUSED_RESULT
 PARROT_PURE_FUNCTION
@@ -146,11 +148,11 @@
 static size_t
 key_hash_STRING(PARROT_INTERP, ARGMOD(STRING *s), size_t seed)
 {
-    if (s->hashval) {
-        return s->hashval;
+    if (PObj_is_shared_TEST(s) || s->hashval == 0) {
+        return string_hash(interp, s);
     }
 
-    return string_hash(interp, s, seed);
+    return s->hashval;
 }
 
 /*
@@ -730,9 +732,10 @@
 
 PARROT_EXPORT
 void
-parrot_new_hash(SHIM_INTERP, ARGOUT(Hash **hptr))
+parrot_new_hash(PARROT_INTERP, ARGOUT(Hash **hptr))
 {
-    parrot_new_hash_x(hptr,
+    parrot_new_hash_x(interp,
+            hptr,
             enum_type_PMC,
             Hash_key_type_STRING,
             STRING_compare,     /* STRING compare */
@@ -751,9 +754,10 @@
 
 PARROT_EXPORT
 void
-parrot_new_pmc_hash(SHIM_INTERP, ARGOUT(PMC *container))
+parrot_new_pmc_hash(PARROT_INTERP, ARGOUT(PMC *container))
 {
-    parrot_new_pmc_hash_x(container,
+    parrot_new_pmc_hash_x(interp,
+            container,
             enum_type_PMC,
             Hash_key_type_STRING,
             STRING_compare,     /* STRING compare */
@@ -772,9 +776,10 @@
 
 PARROT_EXPORT
 void
-parrot_new_cstring_hash(SHIM_INTERP, ARGOUT(Hash **hptr))
+parrot_new_cstring_hash(PARROT_INTERP, ARGOUT(Hash **hptr))
 {
-    parrot_new_hash_x(hptr,
+    parrot_new_hash_x(interp,
+            hptr,
             enum_type_PMC,
             Hash_key_type_cstring,
             (hash_comp_fn)cstring_compare,     /* cstring compare */
@@ -799,7 +804,7 @@
 PARROT_WARN_UNUSED_RESULT
 PARROT_MALLOC
 static Hash *
-create_hash(PARROT_DATA_TYPE val_type, Hash_key_type hkey_type,
+create_hash(PARROT_INTERP, PARROT_DATA_TYPE val_type, Hash_key_type hkey_type,
         ARGIN(hash_comp_fn compare), ARGIN(hash_hash_key_fn keyhash))
 {
     size_t      i;
@@ -812,8 +817,7 @@
     hash->entry_type = val_type;
     hash->key_type   = hkey_type;
 
-    /* RT #59472 - randomize */
-    hash->seed = 3793;
+    hash->seed = interp->hash_seed;
 
     PARROT_ASSERT(INITIAL_BUCKETS % 4 == 0);
 
@@ -919,13 +923,14 @@
 */
 
 void
-parrot_new_hash_x(ARGOUT(Hash **hptr),
+parrot_new_hash_x(PARROT_INTERP,
+        ARGOUT(Hash **hptr),
         PARROT_DATA_TYPE val_type,
         Hash_key_type hkey_type,
         NOTNULL(hash_comp_fn compare),
         NOTNULL(hash_hash_key_fn keyhash))
 {
-    *hptr = create_hash(val_type, hkey_type, compare, keyhash);
+    *hptr = create_hash(interp, val_type, hkey_type, compare, keyhash);
 }
 
 /*
@@ -941,13 +946,14 @@
 */
 
 void
-parrot_new_pmc_hash_x(ARGMOD(PMC *container),
+parrot_new_pmc_hash_x(PARROT_INTERP,
+        ARGMOD(PMC *container),
         PARROT_DATA_TYPE val_type,
         Hash_key_type hkey_type,
         NOTNULL(hash_comp_fn compare),
         NOTNULL(hash_hash_key_fn keyhash))
 {
-    Hash * const hash = create_hash(val_type, hkey_type, compare, keyhash);
+    Hash * const hash = create_hash(interp, val_type, hkey_type, compare, keyhash);
     PMC_struct_val(container) = hash;
     hash->container = container;
 }
@@ -964,9 +970,9 @@
 
 PARROT_EXPORT
 void
-parrot_new_pointer_hash(SHIM_INTERP, ARGOUT(Hash **hptr))
+parrot_new_pointer_hash(PARROT_INTERP, ARGOUT(Hash **hptr))
 {
-    parrot_new_hash_x(hptr, enum_type_ptr, Hash_key_type_ptr, pointer_compare, key_hash_pointer);
+    parrot_new_hash_x(interp, hptr, enum_type_ptr, Hash_key_type_ptr, pointer_compare, key_hash_pointer);
 }
 
 /*
@@ -991,7 +997,8 @@
             ? constant_pmc_new_noinit(interp, enum_class_Hash)
             : pmc_new_noinit(interp, enum_class_Hash);
 
-    parrot_new_pmc_hash_x(h, enum_type_INTVAL, Hash_key_type_int, int_compare, key_hash_int);
+    parrot_new_pmc_hash_x(interp, h, enum_type_INTVAL, Hash_key_type_int,
+            int_compare, key_hash_int);
     PObj_active_destroy_SET(h);
     return h;
 }
Index: src/string.c
===================================================================
--- src/string.c	(revision 33133)
+++ src/string.c	(working copy)
@@ -262,6 +262,13 @@
     const size_t n_parrot_cstrings =
         sizeof (parrot_cstrings) / sizeof (parrot_cstrings[0]);
 
+    /* TODO: hash_seed should be randomized on a per-interp basis.  Before this
+     * can happen, shared STRINGs need to always be marked as such.  
+     * See RT #59810 and #59472
+     * interp->hash_seed = Parrot_uint_rand(0);
+     */
+    interp->hash_seed = 3793;
+
     /* Set up the cstring cache, then load the basic encodings and charsets */
     if (!interp->parent_interpreter) {
         parrot_new_cstring_hash(interp, &const_cstring_hash);
@@ -2257,11 +2264,11 @@
 PARROT_EXPORT
 PARROT_WARN_UNUSED_RESULT
 size_t
-string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s), size_t seed)
+string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s))
 {
     register size_t h;
+    UINTVAL         seed = interp->hash_seed;
 
-    /* TODO: #59810 (using seed != 3793 breaks things) */
     if (!s)
         return seed;
 
Index: src/pmc/addrregistry.pmc
===================================================================
--- src/pmc/addrregistry.pmc	(revision 33133)
+++ src/pmc/addrregistry.pmc	(working copy)
@@ -55,7 +55,7 @@
         PMC_data(SELF)      = addr_registry;
         PObj_custom_mark_destroy_SETALL(SELF);
 
-        parrot_new_pmc_hash_x(SELF, enum_type_int, Hash_key_type_PMC,
+        parrot_new_pmc_hash_x(INTERP, SELF, enum_type_int, Hash_key_type_PMC,
                               int_compare, key_hash_int);
 
         registry            = (Hash *)PMC_struct_val(SELF);
Index: src/pmc/lexinfo.pmc
===================================================================
--- src/pmc/lexinfo.pmc	(revision 33133)
+++ src/pmc/lexinfo.pmc	(working copy)
@@ -65,7 +65,7 @@
         PARROT_ASSERT(PObj_constant_TEST(SELF));
         PMC_pmc_val(SELF) = sub;
 
-        parrot_new_pmc_hash_x(SELF,
+        parrot_new_pmc_hash_x(INTERP, SELF,
             (PARROT_DATA_TYPE)enum_hash_int,
             Hash_key_type_STRING,
             (hash_comp_fn)string_equal,     /* STRING compare */
Index: include/parrot/hash.h
===================================================================
--- include/parrot/hash.h	(revision 33133)
+++ include/parrot/hash.h	(working copy)
@@ -215,25 +215,29 @@
         FUNC_MODIFIES(*hash);
 
 void parrot_new_hash_x(
+    PARROT_INTERP,
     ARGOUT(Hash **hptr),
     PARROT_DATA_TYPE val_type,
     Hash_key_type hkey_type,
     NOTNULL(hash_comp_fn compare),
     NOTNULL(hash_hash_key_fn keyhash))
         __attribute__nonnull__(1)
-        __attribute__nonnull__(4)
+        __attribute__nonnull__(2)
         __attribute__nonnull__(5)
+        __attribute__nonnull__(6)
         FUNC_MODIFIES(*hptr);
 
 void parrot_new_pmc_hash_x(
+    PARROT_INTERP,
     ARGMOD(PMC *container),
     PARROT_DATA_TYPE val_type,
     Hash_key_type hkey_type,
     NOTNULL(hash_comp_fn compare),
     NOTNULL(hash_hash_key_fn keyhash))
         __attribute__nonnull__(1)
-        __attribute__nonnull__(4)
+        __attribute__nonnull__(2)
         __attribute__nonnull__(5)
+        __attribute__nonnull__(6)
         FUNC_MODIFIES(*container);
 
 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
Index: include/parrot/string_funcs.h
===================================================================
--- include/parrot/string_funcs.h	(revision 33133)
+++ include/parrot/string_funcs.h	(working copy)
@@ -291,7 +291,7 @@
 
 PARROT_EXPORT
 PARROT_WARN_UNUSED_RESULT
-size_t string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s), size_t seed)
+size_t string_hash(PARROT_INTERP, ARGMOD_NULLOK(STRING *s))
         __attribute__nonnull__(1);
 
 PARROT_EXPORT
Index: include/parrot/interpreter.h
===================================================================
--- include/parrot/interpreter.h	(revision 33133)
+++ include/parrot/interpreter.h	(working copy)
@@ -376,6 +376,8 @@
     /* per interpreter global vars */
     INTVAL world_inited;                      /* world_init_once() is done */
 
+    UINTVAL hash_seed;                        /* STRING hash seed */
+
     PMC *iglobals;                            /* SArray of PMCs, containing: */
     /* 0:   PMC *Parrot_base_classname_hash; hash containing name->base_type */
     /* 1:   PMC *Parrot_compreg_hash;    hash containing assembler/compilers */
Index: languages/lua/src/pmc/luatable.pmc
===================================================================
--- languages/lua/src/pmc/luatable.pmc	(revision 33133)
+++ languages/lua/src/pmc/luatable.pmc	(working copy)
@@ -101,7 +101,7 @@
         h = PMC_int_val(key);
     }
     else if (PMC_type(key) == dynpmc_LuaString) {
-        h = string_hash(interp, VTABLE_get_string(interp, key), 3793);
+        h = string_hash(interp, VTABLE_get_string(interp, key));
     }
     else {
         h = (unsigned long)PMC_struct_val(key);
@@ -117,7 +117,7 @@
 /* specialized version for strings */
 static PMC** lua_getstr(PARROT_INTERP, const LuaHash *t, STRING *key)
 {
-    unsigned long h = string_hash(interp, key, 3793);
+    unsigned long h = string_hash(interp, key);
     Node         *n = &t->node[h&(t->size-1)];
 
     do {
Index: languages/pipp/src/pipp_hash.c
===================================================================
--- languages/pipp/src/pipp_hash.c	(revision 33133)
+++ languages/pipp/src/pipp_hash.c	(working copy)
@@ -374,7 +374,7 @@
     INTVAL key_hash, bucket_idx;
     PippBucket *bucket;
 
-    key_hash = string_hash(interp, key, PIPP_HASH_SEED);
+    key_hash = string_hash(interp, key);
     bucket   = ht->buckets[key_hash & ht->hashMask];
     dprintf("pipp_hash_get_bucket called with key '%Ss', has hash 0x%X\n",
             key, key_hash);
@@ -427,7 +427,7 @@
     PippBucket *first_bucket, *curr_bucket;
     INTVAL      key_hash, bucket_idx;
 
-    key_hash     = string_hash(interp, key, PIPP_HASH_SEED);
+    key_hash     = string_hash(interp, key);
     bucket_idx   = key_hash & ht->hashMask;
     curr_bucket  = ht->buckets[bucket_idx];
     first_bucket = curr_bucket;
@@ -563,7 +563,7 @@
     }
 
     s_key    = string_from_int(interp, ht->nextIndex);
-    key_hash = string_hash(interp, s_key, PIPP_HASH_SEED);
+    key_hash = string_hash(interp, s_key);
     bkt      = mem_allocate_zeroed_typed(PippBucket);
 
     bkt->key       = s_key;
@@ -640,7 +640,7 @@
     }
 
     s_key    = string_from_int(interp, 0);
-    key_hash = string_hash(interp, s_key, PIPP_HASH_SEED);
+    key_hash = string_hash(interp, s_key);
     bkt      = mem_allocate_zeroed_typed(PippBucket);
 
     bkt->key       = s_key;

Reply via email to