subject:"\[dpdk\-dev\] \[PATCH\] Minor fixes in rte

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-12 Thread r k

Ravi Kerur (1):
  Minor fixes in rte_common.h file.

 lib/librte_eal/common/include/rte_common.h | 6 +++---
 lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
 lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

--
1.9.1

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-12 Thread r k

Subject: [PATCH] Minor fixes in rte_common.h file.

Fix rte_is_power_of_2 since 0 is not.
Avoid branching instructions in RTE_MAX and RTE_MIN.

Signed-off-by: Ravi Kerur 
---
 lib/librte_eal/common/include/rte_common.h | 6 +++---
 lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
 lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_common.h
b/lib/librte_eal/common/include/rte_common.h
index 921b91f..e163f35 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
inline int  rte_is_power_of_2(uint32_t n)  {
-   return ((n-1) & n) == 0;
+   return n && !(n & (n - 1));
 }

 /**
@@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b) ({ \
typeof (a) _a = (a); \
typeof (b) _b = (b); \
-   _a < _b ? _a : _b; \
+_b ^ ((_a ^ _b) & -(_a < _b)); \
})

 /**
@@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b) ({ \
typeof (a) _a = (a); \
typeof (b) _b = (b); \
-   _a > _b ? _a : _b; \
+   _a ^ ((_a ^ _b) & -(_a < _b)); \
})

 /*** Other general functions / macros / diff --git
a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
bc3816a..546499c 100644
--- a/lib/librte_pmd_e1000/igb_pf.c
+++ b/lib/librte_pmd_e1000/igb_pf.c
@@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, uint32_t
vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct rte_eth_dev
*dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
-   int i;
+   int16_t i;
uint32_t vector_bit;
uint32_t vector_reg;
uint32_t mta_reg;
-   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
+   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
E1000_VT_MSGINFO_SHIFT;
uint16_t *hash_list = (uint16_t *)&msgbuf[1];
struct e1000_hw *hw =
E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
__rte_unused uint32_t vf, uint32
struct ixgbe_hw *hw =
IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
struct ixgbe_vf_info *vfinfo =
*(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
-   int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
+   int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
IXGBE_VT_MSGINFO_SHIFT;
uint16_t *hash_list = (uint16_t *)&msgbuf[1];
uint32_t mta_idx;
@@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
__rte_unused uint32_t vf, uint32
const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) -
1;
uint32_t reg_val;
-   int i;
+   int16_t i;

/* only so many hash values supported */
nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
--
1.9.1

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-13 Thread Neil Horman

On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> Subject: [PATCH] Minor fixes in rte_common.h file.
> 
> Fix rte_is_power_of_2 since 0 is not.
> Avoid branching instructions in RTE_MAX and RTE_MIN.
> 
> Signed-off-by: Ravi Kerur 
> ---
>  lib/librte_eal/common/include/rte_common.h | 6 +++---
>  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
>  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
>  3 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_common.h
> b/lib/librte_eal/common/include/rte_common.h
> index 921b91f..e163f35 100644
> --- a/lib/librte_eal/common/include/rte_common.h
> +++ b/lib/librte_eal/common/include/rte_common.h
> @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> inline int  rte_is_power_of_2(uint32_t n)  {
> -   return ((n-1) & n) == 0;
> +   return n && !(n & (n - 1));
>  }
> 
>  /**
> @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b) ({ \
> typeof (a) _a = (a); \
> typeof (b) _b = (b); \
> -   _a < _b ? _a : _b; \
> +_b ^ ((_a ^ _b) & -(_a < _b)); \
Are you sure this is actually faster than the branch version?  What about using
a cmov instead?

> })
> 
>  /**
> @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b) ({ \
> typeof (a) _a = (a); \
> typeof (b) _b = (b); \
> -   _a > _b ? _a : _b; \
> +   _a ^ ((_a ^ _b) & -(_a < _b)); \
Same as above

> })
> 
>  /*** Other general functions / macros / diff --git
> a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> bc3816a..546499c 100644
> --- a/lib/librte_pmd_e1000/igb_pf.c
> +++ b/lib/librte_pmd_e1000/igb_pf.c
> @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, uint32_t
> vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct rte_eth_dev
> *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> -   int i;
> +   int16_t i;
> uint32_t vector_bit;
> uint32_t vector_reg;
> uint32_t mta_reg;
> -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> E1000_VT_MSGINFO_SHIFT;
NAK, this has nothing to do with the included changelog

> uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> struct e1000_hw *hw =
> E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> __rte_unused uint32_t vf, uint32
> struct ixgbe_hw *hw =
> IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> struct ixgbe_vf_info *vfinfo =
> *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> -   int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> +   int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> IXGBE_VT_MSGINFO_SHIFT;
ditto
> uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> uint32_t mta_idx;
> @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> __rte_unused uint32_t vf, uint32
> const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
> const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) -
> 1;
> uint32_t reg_val;
> -   int i;
> +   int16_t i;
ditto

> 
> /* only so many hash values supported */
> nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> --
> 1.9.1
>

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-16 Thread Ravi Kerur

On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman  wrote:
>
> On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > Subject: [PATCH] Minor fixes in rte_common.h file.
> >
> > Fix rte_is_power_of_2 since 0 is not.
> > Avoid branching instructions in RTE_MAX and RTE_MIN.
> >
> > Signed-off-by: Ravi Kerur 
> > ---
> >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> >  3 files changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_common.h
> > b/lib/librte_eal/common/include/rte_common.h
> > index 921b91f..e163f35 100644
> > --- a/lib/librte_eal/common/include/rte_common.h
> > +++ b/lib/librte_eal/common/include/rte_common.h
> > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> > inline int  rte_is_power_of_2(uint32_t n)  {
> > -   return ((n-1) & n) == 0;
> > +   return n && !(n & (n - 1));
> >  }
> >
> >  /**
> > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b)
> ({ \
> > typeof (a) _a = (a); \
> > typeof (b) _b = (b); \
> > -   _a < _b ? _a : _b; \
> > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> Are you sure this is actually faster than the branch version?  What about
> using
> a cmov instead?
>
>
 i am pretty sure modified code is faster than branching. I remember
cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
perform.

> })
> >
> >  /**
> > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b)
> ({ \
> > typeof (a) _a = (a); \
> > typeof (b) _b = (b); \
> > -   _a > _b ? _a : _b; \
> > +   _a ^ ((_a ^ _b) & -(_a < _b)); \
> Same as above
>
>  Same as above.

> > })
> >
> >  /*** Other general functions / macros / diff --git
> > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > bc3816a..546499c 100644
> > --- a/lib/librte_pmd_e1000/igb_pf.c
> > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> uint32_t
> > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> rte_eth_dev
> > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > -   int i;
> > +   int16_t i;
> > uint32_t vector_bit;
> > uint32_t vector_reg;
> > uint32_t mta_reg;
> > -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > E1000_VT_MSGINFO_SHIFT;
> NAK, this has nothing to do with the included changelog
>

 It does, it causes compilation errors such as

/root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
\u2018igb_pf_mbx_process\u2019:
/root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
subscript is above array bounds [-Werror=array-bounds]
   vfinfo->vf_mc_hashes[i] = hash_list[i];
   ^
cc1: all warnings being treated as errors

Also it is always better to use explicit int definitions esp. for 64bit
systems.



>
> > uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > struct e1000_hw *hw =
> > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > __rte_unused uint32_t vf, uint32
> > struct ixgbe_hw *hw =
> > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > struct ixgbe_vf_info *vfinfo =
> > *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private));
> > -   int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > +   int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
> > IXGBE_VT_MSGINFO_SHIFT;
> ditto
> > uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > uint32_t mta_idx;
> > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev,
> > __rte_unused uint32_t vf, uint32
> > const uint32_t IXGBE_MTA_BIT_SHIFT = 5;
> > const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT)
> -
> > 1;
> > uint32_t reg_val;
> > -   int i;
> > +   int16_t i;
> ditto
>
>  Same as above.

> >
> > /* only so many hash values supported */
> > nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES);
> > --
> > 1.9.1
> >
>

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-16 Thread Ananyev, Konstantin



> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur
> Sent: Tuesday, December 16, 2014 4:47 PM
> To: Neil Horman
> Cc: dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> 
> On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman  wrote:
> >
> > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > >
> > > Fix rte_is_power_of_2 since 0 is not.
> > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > >
> > > Signed-off-by: Ravi Kerur 
> > > ---
> > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> > >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > b/lib/librte_eal/common/include/rte_common.h
> > > index 921b91f..e163f35 100644
> > > --- a/lib/librte_eal/common/include/rte_common.h
> > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > -   return ((n-1) & n) == 0;
> > > +   return n && !(n & (n - 1));
> > >  }
> > >
> > >  /**
> > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b)
> > ({ \
> > > typeof (a) _a = (a); \
> > > typeof (b) _b = (b); \
> > > -   _a < _b ? _a : _b; \
> > > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> > Are you sure this is actually faster than the branch version?  What about
> > using
> > a cmov instead?
> >
> >
>  i am pretty sure modified code is faster than branching. I remember
> cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
> perform.

I also think most modern compilers are smart enough to avoid any branching here 
and will use cmov instead.
And we are way ahead of Pentium 4 times these days.
Konstantin

> 
> > })
> > >
> > >  /**
> > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b)
> > ({ \
> > > typeof (a) _a = (a); \
> > > typeof (b) _b = (b); \
> > > -   _a > _b ? _a : _b; \
> > > +   _a ^ ((_a ^ _b) & -(_a < _b)); \
> > Same as above
> >
> >  Same as above.
> 
> > > })
> > >
> > >  /*** Other general functions / macros / diff --git
> > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > bc3816a..546499c 100644
> > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > uint32_t
> > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > rte_eth_dev
> > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > -   int i;
> > > +   int16_t i;
> > > uint32_t vector_bit;
> > > uint32_t vector_reg;
> > > uint32_t mta_reg;
> > > -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > E1000_VT_MSGINFO_SHIFT;
> > NAK, this has nothing to do with the included changelog
> >
> 
>  It does, it causes compilation errors such as
> 
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> \u2018igb_pf_mbx_process\u2019:
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> subscript is above array bounds [-Werror=array-bounds]
>vfinfo->vf_mc_hashes[i] = hash_list[i];
>^
> cc1: all warnings being treated as errors
> 
> Also it is always better to use explicit int definitions esp. for 64bit
> systems.
> 
> 
> 
> >
> > > uint16_t *hash_list = (uint16_t *)&msgbuf[1];
> > > struct e1000_hw *hw =
> > > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644
> > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-16 Thread Ravi Kerur

On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin <
konstantin.ananyev at intel.com> wrote:
>
>
>
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur
> > Sent: Tuesday, December 16, 2014 4:47 PM
> > To: Neil Horman
> > Cc: dev at dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> >
> > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman 
> wrote:
> > >
> > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > >
> > > > Fix rte_is_power_of_2 since 0 is not.
> > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > >
> > > > Signed-off-by: Ravi Kerur 
> > > > ---
> > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > b/lib/librte_eal/common/include/rte_common.h
> > > > index 921b91f..e163f35 100644
> > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> static
> > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > -   return ((n-1) & n) == 0;
> > > > +   return n && !(n & (n - 1));
> > > >  }
> > > >
> > > >  /**
> > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a,
> b)
> > > ({ \
> > > > typeof (a) _a = (a); \
> > > > typeof (b) _b = (b); \
> > > > -   _a < _b ? _a : _b; \
> > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> > > Are you sure this is actually faster than the branch version?  What
> about
> > > using
> > > a cmov instead?
> > >
> > >
> >  i am pretty sure modified code is faster than branching. I remember
> > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> cpu's
> > perform.
>
> I also think most modern compilers are smart enough to avoid any branching
> here and will use cmov instead.
> And we are way ahead of Pentium 4 times these days.

Konstantin
>

Konstantin,  Can you please elaborate, is it something done
automatically with Intel's icc compiler? My understanding is branch
prediction can be influenced only by using compiler builtin i.e.
__builtin_expect() , without this compiler will generate regular
instructions(cmp/jump instructions). I wrote small program and compiled
with gcc -02/-03, don't see cmov instruction.


> >
> > > })
> > > >
> > > >  /**
> > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a,
> b)
> > > ({ \
> > > > typeof (a) _a = (a); \
> > > > typeof (b) _b = (b); \
> > > > -   _a > _b ? _a : _b; \
> > > > +   _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > Same as above
> > >
> > >  Same as above.
> >
> > > > })
> > > >
> > > >  /*** Other general functions / macros / diff --git
> > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > > bc3816a..546499c 100644
> > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > uint32_t
> > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > rte_eth_dev
> > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > -   int i;
> > > > +   int16_t i;
> > > > uint32_t vector_bit;
> > > > uint32_t vector_reg;
> > > > uint32_t mta_reg;
> > > > -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > E1000_VT_MSGINFO_SHIFT;
> > > NAK, this has nothing to do with the included changelog
> > >
> >
> >

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-16 Thread Neil Horman

On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman  wrote:
> >
> > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > >
> > > Fix rte_is_power_of_2 since 0 is not.
> > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > >
> > > Signed-off-by: Ravi Kerur 
> > > ---
> > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> > >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > b/lib/librte_eal/common/include/rte_common.h
> > > index 921b91f..e163f35 100644
> > > --- a/lib/librte_eal/common/include/rte_common.h
> > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;  static
> > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > -   return ((n-1) & n) == 0;
> > > +   return n && !(n & (n - 1));
> > >  }
> > >
> > >  /**
> > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a, b)
> > ({ \
> > > typeof (a) _a = (a); \
> > > typeof (b) _b = (b); \
> > > -   _a < _b ? _a : _b; \
> > > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> > Are you sure this is actually faster than the branch version?  What about
> > using
> > a cmov instead?
> >
> >
>  i am pretty sure modified code is faster than branching. I remember
> cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
> perform.
> 
Pretty sure isn't sure.  Theres no point in code churn if theres no obvious
advantage.  Some perf tests to deomonstrate the advantage here would be great.

> > })
> > >
> > >  /**
> > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a, b)
> > ({ \
> > > typeof (a) _a = (a); \
> > > typeof (b) _b = (b); \
> > > -   _a > _b ? _a : _b; \
> > > +   _a ^ ((_a ^ _b) & -(_a < _b)); \
> > Same as above
> >
> >  Same as above.
> 
> > > })
> > >
> > >  /*** Other general functions / macros / diff --git
> > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > bc3816a..546499c 100644
> > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > uint32_t
> > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > rte_eth_dev
> > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > -   int i;
> > > +   int16_t i;
> > > uint32_t vector_bit;
> > > uint32_t vector_reg;
> > > uint32_t mta_reg;
> > > -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > E1000_VT_MSGINFO_SHIFT;
> > NAK, this has nothing to do with the included changelog
> >
> 
>  It does, it causes compilation errors such as
> 
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> \u2018igb_pf_mbx_process\u2019:
> /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> subscript is above array bounds [-Werror=array-bounds]
>vfinfo->vf_mc_hashes[i] = hash_list[i];
>^
> cc1: all warnings being treated as errors
> 
> Also it is always better to use explicit int definitions esp. for 64bit
> systems.
> 

This is your changelog:
=
Subject: [PATCH] Minor fixes in rte_common.h file.

Fix rte_is_power_of_2 since 0 is not.
Avoid branching instructions in RTE_MAX and RTE_MIN
=

Nowhere does your changelog indicate that you are fixing compliation errors.
That would in and of itself be far more serious that making micro optimizations.
If you want to fix build breaks, great, please do, but send a patch that clearly
indicates what the break is and how your fixing it. Don't just toss it in with
whatever other work you happen to be doing.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-17 Thread Ananyev, Konstantin



> 
> From: Ravi Kerur [mailto:rkerur at gmail.com]
> Sent: Tuesday, December 16, 2014 8:14 PM
> To: Ananyev, Konstantin
> Cc: Neil Horman; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> 
> 
> 
> On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin  intel.com> wrote:
> 
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur
> > Sent: Tuesday, December 16, 2014 4:47 PM
> > To: Neil Horman
> > Cc: dev at dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> >
> > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman  
> > wrote:
> > >
> > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > >
> > > > Fix rte_is_power_of_2 since 0 is not.
> > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > >
> > > > Signed-off-by: Ravi Kerur 
> > > > ---
> > > >? lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > >? lib/librte_pmd_e1000/igb_pf.c? ? ? ? ? ? ? | 4 ++--
> > > >? lib/librte_pmd_ixgbe/ixgbe_pf.c? ? ? ? ? ? | 4 ++--
> > > >? 3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > b/lib/librte_eal/common/include/rte_common.h
> > > > index 921b91f..e163f35 100644
> > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;? static
> > > > inline int? rte_is_power_of_2(uint32_t n)? {
> > > > -? ? ? ?return ((n-1) & n) == 0;
> > > > +? ? ? ?return n && !(n & (n - 1));
> > > >? }
> > > >
> > > >? /**
> > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)? #define RTE_MIN(a, b)
> > > ({ \
> > > >? ? ? ? ? ? ? ? ?typeof (a) _a = (a); \
> > > >? ? ? ? ? ? ? ? ?typeof (b) _b = (b); \
> > > > -? ? ? ? ? ? ? ?_a < _b ? _a : _b; \
> > > > +? ? ? ? ? ? ? ? _b ^ ((_a ^ _b) & -(_a < _b)); \
> > > Are you sure this is actually faster than the branch version?? What about
> > > using
> > > a cmov instead?
> > >
> > >
> >  i am pretty sure modified code is faster than branching. I remember
> > cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's
> > perform.
> I also think most modern compilers are smart enough to avoid any branching 
> here and will use cmov instead.
> And we are way ahead of Pentium 4 times these days.
> Konstantin
> 
> Konstantin, ?Can you please elaborate, is it something done automatically 
> with Intel's icc compiler? My understanding is branch
> prediction can be influenced only by using compiler builtin i.e. 
> __builtin_expect() , without this compiler will generate regular
> instructions(cmp/jump instructions). I wrote small program and compiled with 
> gcc -02/-03, don't see cmov instruction.

I am saying that there is probably no need to modify these macros.
On IA , for constructions like: "_a < _b ? _a : _b;"
modern compilers in many cases will avoid any branches and emit cmov instead.

$ cat tcmv1.c

#include 
#include 

#define RTE_MIN(a, b) ({ \
typeof (a) _a = (a); \
typeof (b) _b = (b); \
_a < _b ? _a : _b; \
})

int
fxmini32(int a, int b)
{
return RTE_MIN(a, b);
}

int
fxminu64(uint64_t a, uint64_t b)
{
return RTE_MIN(a, b);
}

$gcc -O3 -m64 -S tcmv1.c

$ cat tcmv1.s
.file   "tcmv1.c"
.text
.p2align 4,,15
.globl  fxmini32
.type   fxmini32, @function
fxmini32:
.LFB0:
.cfi_startproc
cmpl%esi, %edi
movl%esi, %eax
cmovle  %edi, %eax
ret
.cfi_endproc
.LFE0:
.size   fxmini32, .-fxmini32
.p2align 4,,15
.globl  fxminu64
.type   fxminu64, @function
fxminu64:
.LFB1:
.cfi_startproc
cmpq%rsi, %rdi
movq%rsi, %rax
cmovbe  %rdi, %rax
ret
.cfi_endproc

gcc version 4.8.3
clang produces similar code.

Konstantin

> 
> >
> > >? ? ? ? ?})
> > > >
> > > >? /**
> > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)? #define RTE_MAX(a, b)
> > > ({ \
> > > >? ? ? ? ? ? ? ? ?typeof (a) _a = (a); \
> > > >? ? ? ? ? ? ? ? ?typeof (b) _b = (b);

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-17 Thread Ravi Kerur

On Tue, Dec 16, 2014 at 5:05 PM, Ananyev, Konstantin <
konstantin.ananyev at intel.com> wrote:
>
>
>
> >
> > From: Ravi Kerur [mailto:rkerur at gmail.com]
> > Sent: Tuesday, December 16, 2014 8:14 PM
> > To: Ananyev, Konstantin
> > Cc: Neil Horman; dev at dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> >
> >
> >
> > On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin <
> konstantin.ananyev at intel.com> wrote:
> >
> >
> > > -Original Message-
> > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur
> > > Sent: Tuesday, December 16, 2014 4:47 PM
> > > To: Neil Horman
> > > Cc: dev at dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> > >
> > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman 
> wrote:
> > > >
> > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > > >
> > > > > Fix rte_is_power_of_2 since 0 is not.
> > > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > > >
> > > > > Signed-off-by: Ravi Kerur 
> > > > > ---
> > > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > > >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> > > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> > > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > > b/lib/librte_eal/common/include/rte_common.h
> > > > > index 921b91f..e163f35 100644
> > > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> static
> > > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > > -   return ((n-1) & n) == 0;
> > > > > +   return n && !(n & (n - 1));
> > > > >  }
> > > > >
> > > > >  /**
> > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define
> RTE_MIN(a, b)
> > > > ({ \
> > > > > typeof (a) _a = (a); \
> > > > > typeof (b) _b = (b); \
> > > > > -   _a < _b ? _a : _b; \
> > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> > > > Are you sure this is actually faster than the branch version?  What
> about
> > > > using
> > > > a cmov instead?
> > > >
> > > >
> > >  i am pretty sure modified code is faster than branching. I
> remember
> > > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> cpu's
> > > perform.
> > I also think most modern compilers are smart enough to avoid any
> branching here and will use cmov instead.
> > And we are way ahead of Pentium 4 times these days.
> > Konstantin
> >
> > Konstantin,  Can you please elaborate, is it something done
> automatically with Intel's icc compiler? My understanding is branch
> > prediction can be influenced only by using compiler builtin i.e.
> __builtin_expect() , without this compiler will generate regular
> > instructions(cmp/jump instructions). I wrote small program and compiled
> with gcc -02/-03, don't see cmov instruction.
>
> I am saying that there is probably no need to modify these macros.
> On IA , for constructions like: "_a < _b ? _a : _b;"
> modern compilers in many cases will avoid any branches and emit cmov
> instead.
>
> $ cat tcmv1.c
>
> #include 
> #include 
>
> #define RTE_MIN(a, b) ({ \
> typeof (a) _a = (a); \
> typeof (b) _b = (b); \
> _a < _b ? _a : _b; \
> })
>
> int
> fxmini32(int a, int b)
> {
> return RTE_MIN(a, b);
> }
>
> int
> fxminu64(uint64_t a, uint64_t b)
> {
> return RTE_MIN(a, b);
> }
>
> $gcc -O3 -m64 -S tcmv1.c
>
> $ cat tcmv1.s
> .file   "tcmv1.c"
> .text
> .p2align 4,,15
> .globl  fxmini32
> .type   fxmini32, @function
> fxmini32:
> .LFB0:
> .cfi_startproc
> cmpl%esi, %edi
> movl%esi, %eax
> cmovle  %

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-17 Thread Ravi Kerur

On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman  wrote:
>
> On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman 
> wrote:
> > >
> > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > >
> > > > Fix rte_is_power_of_2 since 0 is not.
> > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > >
> > > > Signed-off-by: Ravi Kerur 
> > > > ---
> > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > b/lib/librte_eal/common/include/rte_common.h
> > > > index 921b91f..e163f35 100644
> > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> static
> > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > -   return ((n-1) & n) == 0;
> > > > +   return n && !(n & (n - 1));
> > > >  }
> > > >
> > > >  /**
> > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a,
> b)
> > > ({ \
> > > > typeof (a) _a = (a); \
> > > > typeof (b) _b = (b); \
> > > > -   _a < _b ? _a : _b; \
> > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> > > Are you sure this is actually faster than the branch version?  What
> about
> > > using
> > > a cmov instead?
> > >
> > >
> >  i am pretty sure modified code is faster than branching. I remember
> > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> cpu's
> > perform.
> >
> Pretty sure isn't sure.  Theres no point in code churn if theres no obvious
> advantage.  Some perf tests to deomonstrate the advantage here would be
> great.
>

 I have used this before with the intent to avoid branching and it was
part of other changes I did for performance improvement in our code.

>
> > > })
> > > >
> > > >  /**
> > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a,
> b)
> > > ({ \
> > > > typeof (a) _a = (a); \
> > > > typeof (b) _b = (b); \
> > > > -   _a > _b ? _a : _b; \
> > > > +   _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > Same as above
> > >
> > >  Same as above.
> >
> > > > })
> > > >
> > > >  /*** Other general functions / macros / diff --git
> > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > > bc3816a..546499c 100644
> > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > uint32_t
> > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > rte_eth_dev
> > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > -   int i;
> > > > +   int16_t i;
> > > > uint32_t vector_bit;
> > > > uint32_t vector_reg;
> > > > uint32_t mta_reg;
> > > > -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > E1000_VT_MSGINFO_SHIFT;
> > > NAK, this has nothing to do with the included changelog
> > >
> >
> >  It does, it causes compilation errors such as
> >
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > \u2018igb_pf_mbx_process\u2019:
> > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> > subscript is above array bounds [-Werror=array-bounds]
> >vfinfo->vf_mc_hashes[i] = hash_list[i];
> >^
> > cc1: all warnings being treated as errors
> >
> > Also it is always better to use explicit int definitions esp. for 64bit
> > systems.
> >
>
> This is your changelog:
> =
> Subject: [PATCH] Minor fixes in rte_common.h file.
>
> Fix rte_is_power_of_2 since 0 is not.
> Avoid branching instructions in RTE_MAX and RTE_MIN
> =
>
> Nowhere does your changelog indicate that you are fixing compliation
> errors.
> That would in and of itself be far more serious that making micro
> optimizations.
> If you want to fix build breaks, great, please do, but send a patch that
> clearly
> indicates what the break is and how your fixing it. Don't just toss it in
> with
> whatever other work you happen to be doing.
>

 Main reason was to replace int with explicit sized int, it happened to
fix compiler errors as well. I will make sure comments cover everything
next time. Anyways I will drop this patch and just include fix for
power_of_2.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-18 Thread Neil Horman

On Wed, Dec 17, 2014 at 08:40:17AM -0800, Ravi Kerur wrote:
> On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman  wrote:
> >
> > On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman 
> > wrote:
> > > >
> > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > > >
> > > > > Fix rte_is_power_of_2 since 0 is not.
> > > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > > >
> > > > > Signed-off-by: Ravi Kerur 
> > > > > ---
> > > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > > >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> > > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> > > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > > b/lib/librte_eal/common/include/rte_common.h
> > > > > index 921b91f..e163f35 100644
> > > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> > static
> > > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > > -   return ((n-1) & n) == 0;
> > > > > +   return n && !(n & (n - 1));
> > > > >  }
> > > > >
> > > > >  /**
> > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MIN(a,
> > b)
> > > > ({ \
> > > > > typeof (a) _a = (a); \
> > > > > typeof (b) _b = (b); \
> > > > > -   _a < _b ? _a : _b; \
> > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> > > > Are you sure this is actually faster than the branch version?  What
> > about
> > > > using
> > > > a cmov instead?
> > > >
> > > >
> > >  i am pretty sure modified code is faster than branching. I remember
> > > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> > cpu's
> > > perform.
> > >
> > Pretty sure isn't sure.  Theres no point in code churn if theres no obvious
> > advantage.  Some perf tests to deomonstrate the advantage here would be
> > great.
> >
> 
>  I have used this before with the intent to avoid branching and it was
> part of other changes I did for performance improvement in our code.
> 

Then it should be pretty easy to provide the perf data demonstrating the
advantage in this code.

> >
> > > > })
> > > > >
> > > > >  /**
> > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define RTE_MAX(a,
> > b)
> > > > ({ \
> > > > > typeof (a) _a = (a); \
> > > > > typeof (b) _b = (b); \
> > > > > -   _a > _b ? _a : _b; \
> > > > > +   _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > > Same as above
> > > >
> > > >  Same as above.
> > >
> > > > > })
> > > > >
> > > > >  /*** Other general functions / macros / diff --git
> > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index
> > > > > bc3816a..546499c 100644
> > > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev,
> > > > uint32_t
> > > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > > rte_eth_dev
> > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > > -   int i;
> > > > > +   int16_t i;
> > > > > uint32_t vector_bit;
> > > > > uint32_t vector_reg;
> > > > > uint32_t mta_reg;
> > > > > -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > E1000_VT_MSGINFO_SHIFT;
> > > > NAK, this has nothing to do with the included changelog
> > > >
> > >
> > >  It does, it causes compilation errors such as
> > >
> > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > > \u2018igb_pf_mbx_process\u2019:
> > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array
> > > subscript is above array bounds [-Werror=array-bounds]
> > >vfinfo->vf_mc_hashes[i] = hash_list[i];
> > >^
> > > cc1: all warnings being treated as errors
> > >
> > > Also it is always better to use explicit int definitions esp. for 64bit
> > > systems.
> > >
> >
> > This is your changelog:
> > =
> > Subject: [PATCH] Minor fixes in rte_common.h file.
> >
> > Fix rte_is_power_of_2 since 0 is not.
> > Avoid branching instructions in RTE_MAX and RTE_MIN
> > =
> >
> > Nowhere does your changelog indicate that you are fixing compliation
> > errors.
> > That would in and of itself be far more serious that making micro
> > optimizations.
> > If you want to fix build breaks, great, please do, but send a patch that
> > clearly
> > indicates what the break is and how your fix

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

2014-12-19 Thread Ravi Kerur

On Thu, Dec 18, 2014 at 11:07 AM, Neil Horman  wrote:

> On Wed, Dec 17, 2014 at 08:40:17AM -0800, Ravi Kerur wrote:
> > On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman 
> wrote:
> > >
> > > On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote:
> > > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman 
> > > wrote:
> > > > >
> > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote:
> > > > > > Subject: [PATCH] Minor fixes in rte_common.h file.
> > > > > >
> > > > > > Fix rte_is_power_of_2 since 0 is not.
> > > > > > Avoid branching instructions in RTE_MAX and RTE_MIN.
> > > > > >
> > > > > > Signed-off-by: Ravi Kerur 
> > > > > > ---
> > > > > >  lib/librte_eal/common/include/rte_common.h | 6 +++---
> > > > > >  lib/librte_pmd_e1000/igb_pf.c  | 4 ++--
> > > > > >  lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++--
> > > > > >  3 files changed, 7 insertions(+), 7 deletions(-)
> > > > > >
> > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h
> > > > > > b/lib/librte_eal/common/include/rte_common.h
> > > > > > index 921b91f..e163f35 100644
> > > > > > --- a/lib/librte_eal/common/include/rte_common.h
> > > > > > +++ b/lib/librte_eal/common/include/rte_common.h
> > > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;
> > > static
> > > > > > inline int  rte_is_power_of_2(uint32_t n)  {
> > > > > > -   return ((n-1) & n) == 0;
> > > > > > +   return n && !(n & (n - 1));
> > > > > >  }
> > > > > >
> > > > > >  /**
> > > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)  #define
> RTE_MIN(a,
> > > b)
> > > > > ({ \
> > > > > > typeof (a) _a = (a); \
> > > > > > typeof (b) _b = (b); \
> > > > > > -   _a < _b ? _a : _b; \
> > > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \
> > > > > Are you sure this is actually faster than the branch version?  What
> > > about
> > > > > using
> > > > > a cmov instead?
> > > > >
> > > > >
> > > >  i am pretty sure modified code is faster than branching. I
> remember
> > > > cmov had performance issues esp. on Pentuim-4 not sure how new intel
> > > cpu's
> > > > perform.
> > > >
> > > Pretty sure isn't sure.  Theres no point in code churn if theres no
> obvious
> > > advantage.  Some perf tests to deomonstrate the advantage here would be
> > > great.
> > >
> >
> >  I have used this before with the intent to avoid branching and it
> was
> > part of other changes I did for performance improvement in our code.
> >
>
> Then it should be pretty easy to provide the perf data demonstrating the
> advantage in this code.
>
>
 I decided to drop this change because

1. DPDK manual suggests gcc version 4.5.x or greater
2. I was testing code against gcc 4.4.6 (which generated cmp/jump
instructions) and Konstantin showed using gcc 4.8.3 it generates cmov
instructions

If you think it should be pursued further let me know. I can look into
difference between the code generated for both using gcc > 4.5 .x and
update.

Thanks,
Ravi


> > >
> > > > > })
> > > > > >
> > > > > >  /**
> > > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)  #define
> RTE_MAX(a,
> > > b)
> > > > > ({ \
> > > > > > typeof (a) _a = (a); \
> > > > > > typeof (b) _b = (b); \
> > > > > > -   _a > _b ? _a : _b; \
> > > > > > +   _a ^ ((_a ^ _b) & -(_a < _b)); \
> > > > > Same as above
> > > > >
> > > > >  Same as above.
> > > >
> > > > > > })
> > > > > >
> > > > > >  /*** Other general functions / macros / diff
> --git
> > > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c
> index
> > > > > > bc3816a..546499c 100644
> > > > > > --- a/lib/librte_pmd_e1000/igb_pf.c
> > > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c
> > > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev
> *dev,
> > > > > uint32_t
> > > > > > vf, uint32_t *msgbuf)  static int  igb_vf_set_multicast(struct
> > > > > rte_eth_dev
> > > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf)  {
> > > > > > -   int i;
> > > > > > +   int16_t i;
> > > > > > uint32_t vector_bit;
> > > > > > uint32_t vector_reg;
> > > > > > uint32_t mta_reg;
> > > > > > -   int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > > +   int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >>
> > > > > > E1000_VT_MSGINFO_SHIFT;
> > > > > NAK, this has nothing to do with the included changelog
> > > > >
> > > >
> > > >  It does, it causes compilation errors such as
> > > >
> > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function
> > > > \u2018igb_pf_mbx_process\u2019:
> > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error:
> array
> > > > subscript is above array bounds [-Werror=array-bounds]
> > > >vfinfo->vf_mc_hashes[i] = hash_list[i];
> > > >^
> > > > cc1: all warnings being treated as errors
> > > >
> > > > Also it is always better

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.

12 matches

Site Navigation

Mail list logo

Footer information