In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/34dadc62d399176c3286094e10619c6300ab9243?hp=86f641010e0569b8d5a3f09a3011af1522b6c14a>

- Log -----------------------------------------------------------------
commit 34dadc62d399176c3286094e10619c6300ab9243
Author: David Mitchell <da...@iabyn.com>
Date:   Mon Jul 7 23:17:13 2014 +0100

    faster constant hash key lookups ($hash{const})
    
    On something like $hash{constantstring}, at compile-time the
    PVX string on the SV attached to the OP_CONST is converted into a
    HEK (with an appropriate offset shift).
    
    At run-time on hash keying, this HEK is used to speed up the bucket
    search; however it turns out that this can be improved. Currently,
    the main bucket loop does:
    
        for (; entry; entry = HeNEXT(entry)) {
        if (HeHASH(entry) != hash)
            continue;
        if (HeKLEN(entry) != (I32)klen)
            continue;
        if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))
            continue;
        if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8)
            continue;
    
    The 'HeKEY(entry) != key' test is the bit that allows us to skip the
    memNE() when 'key' is actually part of a HEK. However, this means that in
    the const HEK scenario, for a match, we do pointless hash, klen and
    HVhek_UTF8 tests, when HeKEY(entry) == key is sufficient for a
    match. Conversely, in the non-const-HEK scenario, the 'HeKEY(entry) !=
    key' will always fail, and so it's just dead weight in the loop.
    
    To work around this, this commit splits the code into two separate bucket
    search loops; one for const-HEKs that just compare HEK pointers, and a
    general loop that now doesn't have do the 'HeKEY(entry) != key' test.
    
    Analysing this code with cachegrind shows that with this commit, lookups
    of constant keys that exist (e.g. the typical perl object scenario,
    $self->{somefield}) takes 15% less instruction reads in hv_common(), 14%
    less data reads and 27% less writes.
    
    A lookup with a non-existing constant key ($hash{not_exist}) is about the
    same as before (0.7% improvement).
    
    Non-constant existing lookup ($hash{$existing_key}) is about 5% less
    instructions, while $hash{$non_existing_key} is about 0.7%.
-----------------------------------------------------------------------

Summary of changes:
 hv.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 89 insertions(+), 18 deletions(-)

diff --git a/hv.c b/hv.c
index a01cb76..5bab2d7 100644
--- a/hv.c
+++ b/hv.c
@@ -345,6 +345,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, 
STRLEN klen,
     bool is_utf8;
     int masked_flags;
     const int return_svp = action & HV_FETCH_JUST_SV;
+    HEK *keysv_hek = NULL;
 
     if (!hv)
        return NULL;
@@ -614,12 +615,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, 
STRLEN klen,
        }
     }
 
-    if (!hash) {
-        if (keysv && (SvIsCOW_shared_hash(keysv)))
-            hash = SvSHARED_HASH(keysv);
-        else
-            PERL_HASH(hash, key, klen);
+    if (keysv && (SvIsCOW_shared_hash(keysv))) {
+        if (HvSHAREKEYS(hv))
+            keysv_hek  = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
+        hash = SvSHARED_HASH(keysv);
     }
+    else if (!hash)
+        PERL_HASH(hash, key, klen);
 
     masked_flags = (flags & HVhek_MASK);
 
@@ -630,16 +632,48 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, 
STRLEN klen,
     {
        entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)];
     }
+
+    if (!entry)
+        goto not_found;
+
+    if (keysv_hek) {
+        /* keysv is actually a HEK in disguise, so we can match just by
+         * comparing the HEK pointers in the HE chain. There is a slight
+         * caveat: on something like "\x80", which has both plain and utf8
+         * representations, perl's hashes do encoding-insensitive lookups,
+         * but preserve the encoding of the stored key. Thus a particular
+         * key could map to two different HEKs in PL_strtab. We only
+         * conclude 'not found' if all the flags are the same; otherwise
+         * we fall back to a full search (this should only happen in rare
+         * cases).
+         */
+        int keysv_flags = HEK_FLAGS(keysv_hek);
+        HE  *orig_entry = entry;
+
+        for (; entry; entry = HeNEXT(entry)) {
+            HEK *hek = HeKEY_hek(entry);
+            if (hek == keysv_hek)
+                goto found;
+            if (HEK_FLAGS(hek) != keysv_flags)
+                break; /* need to do full match */
+        }
+        if (!entry)
+            goto not_found;
+        /* failed on shortcut - do full search loop */
+        entry = orig_entry;
+    }
+
     for (; entry; entry = HeNEXT(entry)) {
        if (HeHASH(entry) != hash)              /* strings can't be equal */
            continue;
        if (HeKLEN(entry) != (I32)klen)
            continue;
-       if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is 
this it? */
+       if (memNE(HeKEY(entry),key,klen))       /* is this it? */
            continue;
        if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8)
            continue;
 
+      found:
         if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) {
            if (HeKFLAGS(entry) != masked_flags) {
                /* We match if HVhek_UTF8 bit in our flags and hash key's
@@ -708,6 +742,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, 
STRLEN klen,
        }
        return entry;
     }
+
+  not_found:
 #ifdef DYNAMIC_ENV_FETCH  /* %ENV lookup?  If so, try to fetch the value now */
     if (!(action & HV_FETCH_ISSTORE) 
        && SvRMAGICAL((const SV *)hv)
@@ -955,9 +991,14 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char 
*key, STRLEN klen,
     XPVHV* xhv;
     HE *entry;
     HE **oentry;
-    HE *const *first_entry;
+    HE **first_entry;
     bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE;
     int masked_flags;
+    HEK *keysv_hek = NULL;
+    U8 mro_changes = 0; /* 1 = isa; 2 = package moved */
+    SV *sv;
+    GV *gv = NULL;
+    HV *stash = NULL;
 
     if (SvRMAGICAL(hv)) {
        bool needs_copy;
@@ -1022,32 +1063,60 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char 
*key, STRLEN klen,
         HvHASKFLAGS_on(MUTABLE_SV(hv));
     }
 
-    if (!hash) {
-        if (keysv && (SvIsCOW_shared_hash(keysv)))
-            hash = SvSHARED_HASH(keysv);
-        else
-            PERL_HASH(hash, key, klen);
+    if (keysv && (SvIsCOW_shared_hash(keysv))) {
+        if (HvSHAREKEYS(hv))
+            keysv_hek  = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
+        hash = SvSHARED_HASH(keysv);
     }
+    else if (!hash)
+        PERL_HASH(hash, key, klen);
 
     masked_flags = (k_flags & HVhek_MASK);
 
     first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
     entry = *oentry;
-    for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
-       SV *sv;
-       U8 mro_changes = 0; /* 1 = isa; 2 = package moved */
-       GV *gv = NULL;
-       HV *stash = NULL;
 
+    if (!entry)
+        goto not_found;
+
+    if (keysv_hek) {
+        /* keysv is actually a HEK in disguise, so we can match just by
+         * comparing the HEK pointers in the HE chain. There is a slight
+         * caveat: on something like "\x80", which has both plain and utf8
+         * representations, perl's hashes do encoding-insensitive lookups,
+         * but preserve the encoding of the stored key. Thus a particular
+         * key could map to two different HEKs in PL_strtab. We only
+         * conclude 'not found' if all the flags are the same; otherwise
+         * we fall back to a full search (this should only happen in rare
+         * cases).
+         */
+        int keysv_flags = HEK_FLAGS(keysv_hek);
+
+        for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
+            HEK *hek = HeKEY_hek(entry);
+            if (hek == keysv_hek)
+                goto found;
+            if (HEK_FLAGS(hek) != keysv_flags)
+                break; /* need to do full match */
+        }
+        if (!entry)
+            goto not_found;
+        /* failed on shortcut - do full search loop */
+        oentry = first_entry;
+        entry = *oentry;
+    }
+
+    for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
        if (HeHASH(entry) != hash)              /* strings can't be equal */
            continue;
        if (HeKLEN(entry) != (I32)klen)
            continue;
-       if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))        /* is 
this it? */
+       if (memNE(HeKEY(entry),key,klen))       /* is this it? */
            continue;
        if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8)
            continue;
 
+      found:
        if (hv == PL_strtab) {
            if (k_flags & HVhek_FREEKEY)
                Safefree(key);
@@ -1148,6 +1217,8 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char 
*key, STRLEN klen,
 
        return sv;
     }
+
+  not_found:
     if (SvREADONLY(hv)) {
        hv_notallowed(k_flags, key, klen,
                        "Attempt to delete disallowed key '%"SVf"' from"

--
Perl5 Master Repository

Reply via email to