[dpdk-dev] [PATCH] hash: update jhash function with the latest available

2015-04-17 Thread De Lara Guarch, Pablo


> -Original Message-
> From: Richardson, Bruce
> Sent: Thursday, April 16, 2015 3:01 PM
> To: De Lara Guarch, Pablo
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] hash: update jhash function with the latest
> available
> 
> On Thu, Apr 16, 2015 at 02:26:59PM +0100, Pablo de Lara wrote:
> > Jenkins hash function was developed originally in 1996,
> > and was integrated in first versions of DPDK.
> > The function has been improved in 2006,
> > achieving up to 60% better performance, compared to the original one.
> >
> > Check out: http://burtleburtle.net/bob/c/lookup3.c
> >
> > This patch integrates that code in the rte_jhash library,
> > adding also a new function rte_jhash_word2,
> > that returns two different hash values, for a single key.
> >
> 
> Should the addition of the new functionality not be a separate patch from
> the
> update to the existing code?

True, actually, I miss one extra function (2 in total).

> Also, do the new functions return the exact same values as the previous
> versions,
> just faster?

The new functions return different values from the previous version
AND faster (some cases, MUCH faster)

[...]

> > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > +   case 12:
> > +   c += k[2]; b += k[1]; a += k[0]; break;
> > +   case 11:
> > +   c += k[2]&0xff; b += k[1]; a += k[0]; break;
> > +   case 10:
> > +   c += k[2]&0x; b += k[1]; a += k[0]; break;
> > +   case 9:
> > +   c += k[2]&0xff; b += k[1]; a += k[0]; break;
> > +   case 8:
> > +   b += k[1]; a += k[0]; break;
> > +   case 7:
> > +   b += k[1]&0xff; a += k[0]; break;
> > +   case 6:
> > +   b += k[1]&0x; a += k[0]; break;
> > +   case 5:
> > +   b += k[1]&0xff; a += k[0]; break;
> > +   case 4:
> > +   a += k[0]; break;
> > +   case 3:
> > +   a += k[0]&0xff; break;
> > +   case 2:
> > +   a += k[0]&0x; break;
> > +   case 1:
> > +   a += k[0]&0xff; break;
> > +#else
> > +   case 12:
> > +   c += k[2]; b += k[1]; a += k[0]; break;
> > +   case 11:
> > +   c += k[2]&0xff00; b += k[1]; a += k[0]; break;
> > +   case 10:
> > +   c += k[2]&0x; b += k[1]; a += k[0]; break;
> > +   case 9:
> > +   c += k[2]&0xff00; b += k[1]; a += k[0]; break;
> > +   case 8:
> > +   b += k[1]; a += k[0]; break;
> > +   case 7:
> > +   b += k[1]&0xff00; a += k[0]; break;
> > +   case 6:
> > +   b += k[1]&0x; a += k[0]; break;
> > +   case 5:
> > +   b += k[1]&0xff00; a += k[0]; break;
> > +   case 4:
> > +   a += k[0]; break;
> > +   case 3:
> > +   a += k[0]&0xff00; break;
> > +   case 2:
> > +   a += k[0]&0x; break;
> > +   case 1:
> > +   a += k[0]&0xff00; break;
> > +#endif
> 
> Only the constants seem different in this block. Can we get rid of the
> #ifdefs using rte_XX_to_cpu() calls instead?

Will add that in next version.

> 
> > +   /* zero length strings require no mixing */
> > +   case 0:
> > +   return c;
> > +   };
> > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
> > +   } else if ((u.i & 0x1) == 0) {
> > +   /* read 16-bit chunks */
> > +   const uint16_t *k = (const uint16_t *)key;
> > +   const uint8_t  *k8;
> > +
> > +   /* all but last block: aligned reads and different mixing */
> > +   while (length > 12) {
> > +   a += k[0] + (((uint32_t)k[1])<<16);
> > +   b += k[2] + (((uint32_t)k[3])<<16);
> > +   c += k[4] + (((uint32_t)k[5])<<16);
> > +
> > +   __rte_jhash_mix(a, b, c);
> > +
> > +   k += 6;
> > +   length -= 12;
> > +   }
> > +
> > +   /* handle the last (probably partial) block */
> > +   k8 = (c

[dpdk-dev] [PATCH] hash: update jhash function with the latest available

2015-04-16 Thread Bruce Richardson
On Thu, Apr 16, 2015 at 02:26:59PM +0100, Pablo de Lara wrote:
> Jenkins hash function was developed originally in 1996,
> and was integrated in first versions of DPDK.
> The function has been improved in 2006,
> achieving up to 60% better performance, compared to the original one.
> 
> Check out: http://burtleburtle.net/bob/c/lookup3.c
> 
> This patch integrates that code in the rte_jhash library,
> adding also a new function rte_jhash_word2,
> that returns two different hash values, for a single key.
> 

Should the addition of the new functionality not be a separate patch from the
update to the existing code?
Also, do the new functions return the exact same values as the previous 
versions,
just faster?

> Signed-off-by: Pablo de Lara 
> ---
>  lib/librte_hash/rte_jhash.h |  407 
> ---
>  1 files changed, 347 insertions(+), 60 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
> index a4bf5a1..3de006d 100644
> --- a/lib/librte_hash/rte_jhash.h
> +++ b/lib/librte_hash/rte_jhash.h
> @@ -1,7 +1,7 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
>   *   All rights reserved.
>   *
>   *   Redistribution and use in source and binary forms, with or without
> @@ -45,38 +45,51 @@ extern "C" {
>  #endif
>  
>  #include 
> +#include 
>  
>  /* jhash.h: Jenkins hash support.
>   *
> - * Copyright (C) 1996 Bob Jenkins (bob_jenkins at burtleburtle.net)
> + * Copyright (C) 2006 Bob Jenkins (bob_jenkins at burtleburtle.net)
>   *
>   * http://burtleburtle.net/bob/hash/
>   *
>   * These are the credits from Bob's sources:
>   *
> - * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> - * hash(), hash2(), hash3, and mix() are externally useful functions.
> - * Routines to test the hash are included if SELF_TEST is defined.
> - * You can use this free for any purpose.  It has no warranty.
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions.  Routines to test the hash are included
> + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> + * the public domain.  It has no warranty.
>   *
>   * $FreeBSD$
>   */
>  
> +#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k
> +
>  /** @internal Internal function. NOTE: Arguments are modified. */
>  #define __rte_jhash_mix(a, b, c) do { \
> - a -= b; a -= c; a ^= (c>>13); \
> - b -= c; b -= a; b ^= (a<<8); \
> - c -= a; c -= b; c ^= (b>>13); \
> - a -= b; a -= c; a ^= (c>>12); \
> - b -= c; b -= a; b ^= (a<<16); \
> - c -= a; c -= b; c ^= (b>>5); \
> - a -= b; a -= c; a ^= (c>>3); \
> - b -= c; b -= a; b ^= (a<<10); \
> - c -= a; c -= b; c ^= (b>>15); \
> + a -= c; a ^= rot(c, 4); c += b; \
> + b -= a; b ^= rot(a, 6); a += c; \
> + c -= b; c ^= rot(b, 8); b += a; \
> + a -= c; a ^= rot(c, 16); c += b; \
> + b -= a; b ^= rot(a, 19); a += c; \
> + c -= b; c ^= rot(b, 4); b += a; \
> +} while (0)
> +
> +#define __rte_jhash_final(a, b, c) do { \
> + c ^= b; c -= rot(b, 14); \
> + a ^= c; a -= rot(c, 11); \
> + b ^= a; b -= rot(a, 25); \
> + c ^= b; c -= rot(b, 16); \
> + a ^= c; a -= rot(c, 4);  \
> + b ^= a; b -= rot(a, 14); \
> + c ^= b; c -= rot(b, 24); \
>  } while (0)
>  
>  /** The golden ratio: an arbitrary value. */
> -#define RTE_JHASH_GOLDEN_RATIO  0x9e3779b9
> +#define RTE_JHASH_GOLDEN_RATIO  0xdeadbeef
>  
>  /**
>   * The most generic version, hashes an arbitrary sequence
> @@ -95,42 +108,256 @@ extern "C" {
>  static inline uint32_t
>  rte_jhash(const void *key, uint32_t length, uint32_t initval)
>  {
> - uint32_t a, b, c, len;
> - const uint8_t *k = (const uint8_t *)key;
> - const uint32_t *k32 = (const uint32_t *)key;
> + uint32_t a, b, c;
> + union {
> + const void *ptr;
> + size_t i;
> + } u;
>  
> - len = length;
> - a = b = RTE_JHASH_GOLDEN_RATIO;
> - c = initval;
> + /* Set up the internal state */
> + a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;
>  
> - while (len >= 12) {
> - a += k32[0];
> - b += k32[1];
> - c += k32[2];
> + u.ptr = key;
>  
> - __rte_jhash_mix(a,b,c);
> + if ((u.i & 0x3) == 0) {
> + const uint32_t *k = (const uint32_t *)key;
>  
> - k += (3 * sizeof(uint32_t)), k32 += 3;
> - len -= (3 * sizeof(uint32_t));
> - }
> + while (length > 12) {
> + a += k[0];
> + b += k[1];
> + c += k[2];
>  
> - c += length;
> - switch (len) {
> - case 11: c += ((u

[dpdk-dev] [PATCH] hash: update jhash function with the latest available

2015-04-16 Thread Pablo de Lara
Jenkins hash function was developed originally in 1996,
and was integrated in first versions of DPDK.
The function has been improved in 2006,
achieving up to 60% better performance, compared to the original one.

Check out: http://burtleburtle.net/bob/c/lookup3.c

This patch integrates that code in the rte_jhash library,
adding also a new function rte_jhash_word2,
that returns two different hash values, for a single key.

Signed-off-by: Pablo de Lara 
---
 lib/librte_hash/rte_jhash.h |  407 ---
 1 files changed, 347 insertions(+), 60 deletions(-)

diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h
index a4bf5a1..3de006d 100644
--- a/lib/librte_hash/rte_jhash.h
+++ b/lib/librte_hash/rte_jhash.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -45,38 +45,51 @@ extern "C" {
 #endif

 #include 
+#include 

 /* jhash.h: Jenkins hash support.
  *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins at burtleburtle.net)
+ * Copyright (C) 2006 Bob Jenkins (bob_jenkins at burtleburtle.net)
  *
  * http://burtleburtle.net/bob/hash/
  *
  * These are the credits from Bob's sources:
  *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose.  It has no warranty.
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
  *
  * $FreeBSD$
  */

+#define rot(x, k) (((x)<<(k)) | ((x)>>(32-(k
+
 /** @internal Internal function. NOTE: Arguments are modified. */
 #define __rte_jhash_mix(a, b, c) do { \
-   a -= b; a -= c; a ^= (c>>13); \
-   b -= c; b -= a; b ^= (a<<8); \
-   c -= a; c -= b; c ^= (b>>13); \
-   a -= b; a -= c; a ^= (c>>12); \
-   b -= c; b -= a; b ^= (a<<16); \
-   c -= a; c -= b; c ^= (b>>5); \
-   a -= b; a -= c; a ^= (c>>3); \
-   b -= c; b -= a; b ^= (a<<10); \
-   c -= a; c -= b; c ^= (b>>15); \
+   a -= c; a ^= rot(c, 4); c += b; \
+   b -= a; b ^= rot(a, 6); a += c; \
+   c -= b; c ^= rot(b, 8); b += a; \
+   a -= c; a ^= rot(c, 16); c += b; \
+   b -= a; b ^= rot(a, 19); a += c; \
+   c -= b; c ^= rot(b, 4); b += a; \
+} while (0)
+
+#define __rte_jhash_final(a, b, c) do { \
+   c ^= b; c -= rot(b, 14); \
+   a ^= c; a -= rot(c, 11); \
+   b ^= a; b -= rot(a, 25); \
+   c ^= b; c -= rot(b, 16); \
+   a ^= c; a -= rot(c, 4);  \
+   b ^= a; b -= rot(a, 14); \
+   c ^= b; c -= rot(b, 24); \
 } while (0)

 /** The golden ratio: an arbitrary value. */
-#define RTE_JHASH_GOLDEN_RATIO  0x9e3779b9
+#define RTE_JHASH_GOLDEN_RATIO  0xdeadbeef

 /**
  * The most generic version, hashes an arbitrary sequence
@@ -95,42 +108,256 @@ extern "C" {
 static inline uint32_t
 rte_jhash(const void *key, uint32_t length, uint32_t initval)
 {
-   uint32_t a, b, c, len;
-   const uint8_t *k = (const uint8_t *)key;
-   const uint32_t *k32 = (const uint32_t *)key;
+   uint32_t a, b, c;
+   union {
+   const void *ptr;
+   size_t i;
+   } u;

-   len = length;
-   a = b = RTE_JHASH_GOLDEN_RATIO;
-   c = initval;
+   /* Set up the internal state */
+   a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + initval;

-   while (len >= 12) {
-   a += k32[0];
-   b += k32[1];
-   c += k32[2];
+   u.ptr = key;

-   __rte_jhash_mix(a,b,c);
+   if ((u.i & 0x3) == 0) {
+   const uint32_t *k = (const uint32_t *)key;

-   k += (3 * sizeof(uint32_t)), k32 += 3;
-   len -= (3 * sizeof(uint32_t));
-   }
+   while (length > 12) {
+   a += k[0];
+   b += k[1];
+   c += k[2];

-   c += length;
-   switch (len) {
-   case 11: c += ((uint32_t)k[10] << 24);
-   case 10: c += ((uint32_t)k[9] << 16);
-   case 9 : c += ((uint32_t)k[8] << 8);
-   case 8 : b += ((uint32_t)k[7] << 24);
-   case 7 : b += ((uint32_t)k[6] << 16);
-   case 6 : b += ((uint32_t)k[5] << 8);
-   case 5 : b += k[4];
-   case 4 : a += ((uint32_t)k[3] << 24);
-   case 3 : a += ((uint32_t)k[2] << 16);
-