Since we already compute the char length, use that to detect all ASCII strings and handle those the same way we handle latin-1.
libguile/hash.c (scm_i_utf8_string_hash): when byte_len == char_len, (i.e. fixed-width ASCII) optimize hashing via existing narrow path. --- libguile/hash.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/libguile/hash.c b/libguile/hash.c index d92f60df8..bc65deb25 100644 --- a/libguile/hash.c +++ b/libguile/hash.c @@ -169,25 +169,29 @@ scm_i_latin1_string_hash (const char *str, size_t len) unsigned long scm_i_utf8_string_hash (const char *str, size_t len) { - const uint8_t *end, *ustr = (const uint8_t *) str; - unsigned long ret; - - /* The length of the string in characters. This name corresponds to - Jenkins' original name. */ - size_t length; - - uint32_t a, b, c, u32; - if (len == (size_t) -1) len = strlen (str); - end = ustr + len; - + const uint8_t *ustr = (const uint8_t *) str; if (u8_check (ustr, len) != NULL) /* Invalid UTF-8; punt. */ return scm_i_string_hash (scm_from_utf8_stringn (str, len)); - length = u8_mbsnlen (ustr, len); + /* The length of the string in characters. This name corresponds to + Jenkins' original name. */ + size_t length = u8_mbsnlen (ustr, len); + + if (len == length) // ascii, same as narrow_string_hash above + { + unsigned long ret; + JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); + ret >>= 2; /* Ensure that it fits in a fixnum. */ + return ret; + } + + const uint8_t *end = ustr + len; + uint32_t a, b, c, u32; + unsigned long ret; /* Set up the internal state. */ a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + 47; -- 2.43.0