In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/34dadc62d399176c3286094e10619c6300ab9243?hp=86f641010e0569b8d5a3f09a3011af1522b6c14a>
- Log ----------------------------------------------------------------- commit 34dadc62d399176c3286094e10619c6300ab9243 Author: David Mitchell <da...@iabyn.com> Date: Mon Jul 7 23:17:13 2014 +0100 faster constant hash key lookups ($hash{const}) On something like $hash{constantstring}, at compile-time the PVX string on the SV attached to the OP_CONST is converted into a HEK (with an appropriate offset shift). At run-time on hash keying, this HEK is used to speed up the bucket search; however it turns out that this can be improved. Currently, the main bucket loop does: for (; entry; entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) continue; if (HeKLEN(entry) != (I32)klen) continue; if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; The 'HeKEY(entry) != key' test is the bit that allows us to skip the memNE() when 'key' is actually part of a HEK. However, this means that in the const HEK scenario, for a match, we do pointless hash, klen and HVhek_UTF8 tests, when HeKEY(entry) == key is sufficient for a match. Conversely, in the non-const-HEK scenario, the 'HeKEY(entry) != key' will always fail, and so it's just dead weight in the loop. To work around this, this commit splits the code into two separate bucket search loops; one for const-HEKs that just compare HEK pointers, and a general loop that now doesn't have do the 'HeKEY(entry) != key' test. Analysing this code with cachegrind shows that with this commit, lookups of constant keys that exist (e.g. the typical perl object scenario, $self->{somefield}) takes 15% less instruction reads in hv_common(), 14% less data reads and 27% less writes. A lookup with a non-existing constant key ($hash{not_exist}) is about the same as before (0.7% improvement). Non-constant existing lookup ($hash{$existing_key}) is about 5% less instructions, while $hash{$non_existing_key} is about 0.7%. ----------------------------------------------------------------------- Summary of changes: hv.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 89 insertions(+), 18 deletions(-) diff --git a/hv.c b/hv.c index a01cb76..5bab2d7 100644 --- a/hv.c +++ b/hv.c @@ -345,6 +345,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, bool is_utf8; int masked_flags; const int return_svp = action & HV_FETCH_JUST_SV; + HEK *keysv_hek = NULL; if (!hv) return NULL; @@ -614,12 +615,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } } - if (!hash) { - if (keysv && (SvIsCOW_shared_hash(keysv))) - hash = SvSHARED_HASH(keysv); - else - PERL_HASH(hash, key, klen); + if (keysv && (SvIsCOW_shared_hash(keysv))) { + if (HvSHAREKEYS(hv)) + keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + hash = SvSHARED_HASH(keysv); } + else if (!hash) + PERL_HASH(hash, key, klen); masked_flags = (flags & HVhek_MASK); @@ -630,16 +632,48 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, { entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; } + + if (!entry) + goto not_found; + + if (keysv_hek) { + /* keysv is actually a HEK in disguise, so we can match just by + * comparing the HEK pointers in the HE chain. There is a slight + * caveat: on something like "\x80", which has both plain and utf8 + * representations, perl's hashes do encoding-insensitive lookups, + * but preserve the encoding of the stored key. Thus a particular + * key could map to two different HEKs in PL_strtab. We only + * conclude 'not found' if all the flags are the same; otherwise + * we fall back to a full search (this should only happen in rare + * cases). + */ + int keysv_flags = HEK_FLAGS(keysv_hek); + HE *orig_entry = entry; + + for (; entry; entry = HeNEXT(entry)) { + HEK *hek = HeKEY_hek(entry); + if (hek == keysv_hek) + goto found; + if (HEK_FLAGS(hek) != keysv_flags) + break; /* need to do full match */ + } + if (!entry) + goto not_found; + /* failed on shortcut - do full search loop */ + entry = orig_entry; + } + for (; entry; entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) continue; - if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */ + if (memNE(HeKEY(entry),key,klen)) /* is this it? */ continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; + found: if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) { if (HeKFLAGS(entry) != masked_flags) { /* We match if HVhek_UTF8 bit in our flags and hash key's @@ -708,6 +742,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } return entry; } + + not_found: #ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */ if (!(action & HV_FETCH_ISSTORE) && SvRMAGICAL((const SV *)hv) @@ -955,9 +991,14 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, XPVHV* xhv; HE *entry; HE **oentry; - HE *const *first_entry; + HE **first_entry; bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE; int masked_flags; + HEK *keysv_hek = NULL; + U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ + SV *sv; + GV *gv = NULL; + HV *stash = NULL; if (SvRMAGICAL(hv)) { bool needs_copy; @@ -1022,32 +1063,60 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HvHASKFLAGS_on(MUTABLE_SV(hv)); } - if (!hash) { - if (keysv && (SvIsCOW_shared_hash(keysv))) - hash = SvSHARED_HASH(keysv); - else - PERL_HASH(hash, key, klen); + if (keysv && (SvIsCOW_shared_hash(keysv))) { + if (HvSHAREKEYS(hv)) + keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + hash = SvSHARED_HASH(keysv); } + else if (!hash) + PERL_HASH(hash, key, klen); masked_flags = (k_flags & HVhek_MASK); first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; entry = *oentry; - for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { - SV *sv; - U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ - GV *gv = NULL; - HV *stash = NULL; + if (!entry) + goto not_found; + + if (keysv_hek) { + /* keysv is actually a HEK in disguise, so we can match just by + * comparing the HEK pointers in the HE chain. There is a slight + * caveat: on something like "\x80", which has both plain and utf8 + * representations, perl's hashes do encoding-insensitive lookups, + * but preserve the encoding of the stored key. Thus a particular + * key could map to two different HEKs in PL_strtab. We only + * conclude 'not found' if all the flags are the same; otherwise + * we fall back to a full search (this should only happen in rare + * cases). + */ + int keysv_flags = HEK_FLAGS(keysv_hek); + + for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { + HEK *hek = HeKEY_hek(entry); + if (hek == keysv_hek) + goto found; + if (HEK_FLAGS(hek) != keysv_flags) + break; /* need to do full match */ + } + if (!entry) + goto not_found; + /* failed on shortcut - do full search loop */ + oentry = first_entry; + entry = *oentry; + } + + for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) continue; - if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */ + if (memNE(HeKEY(entry),key,klen)) /* is this it? */ continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; + found: if (hv == PL_strtab) { if (k_flags & HVhek_FREEKEY) Safefree(key); @@ -1148,6 +1217,8 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, return sv; } + + not_found: if (SvREADONLY(hv)) { hv_notallowed(k_flags, key, klen, "Attempt to delete disallowed key '%"SVf"' from" -- Perl5 Master Repository