Since we already compute the char length, use that to detect all ASCII
strings and handle those the same way we handle latin-1.
libguile/hash.c (scm_i_utf8_string_hash): when byte_len == char_len,
(i.e. fixed-width ASCII) optimize hashing via existing narrow path.
---
libguile/hash.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/libguile/hash.c b/libguile/hash.c
index d92f60df8..bc65deb25 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -169,25 +169,29 @@ scm_i_latin1_string_hash (const char *str, size_t len)
unsigned long
scm_i_utf8_string_hash (const char *str, size_t len)
{
- const uint8_t *end, *ustr = (const uint8_t *) str;
- unsigned long ret;
-
- /* The length of the string in characters. This name corresponds to
- Jenkins' original name. */
- size_t length;
-
- uint32_t a, b, c, u32;
-
if (len == (size_t) -1)
len = strlen (str);
- end = ustr + len;
-
+ const uint8_t *ustr = (const uint8_t *) str;
if (u8_check (ustr, len) != NULL)
/* Invalid UTF-8; punt. */
return scm_i_string_hash (scm_from_utf8_stringn (str, len));
- length = u8_mbsnlen (ustr, len);
+ /* The length of the string in characters. This name corresponds to
+ Jenkins' original name. */
+ size_t length = u8_mbsnlen (ustr, len);
+
+ if (len == length) // ascii, same as narrow_string_hash above
+ {
+ unsigned long ret;
+ JENKINS_LOOKUP3_HASHWORD2 (str, len, ret);
+ ret >>= 2; /* Ensure that it fits in a fixnum. */
+ return ret;
+ }
+
+ const uint8_t *end = ustr + len;
+ uint32_t a, b, c, u32;
+ unsigned long ret;
/* Set up the internal state. */
a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + 47;
--
2.43.0