[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
Ravi Kerur (1): Minor fixes in rte_common.h file. lib/librte_eal/common/include/rte_common.h | 6 +++--- lib/librte_pmd_e1000/igb_pf.c | 4 ++-- lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) -- 1.9.1
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
Subject: [PATCH] Minor fixes in rte_common.h file. Fix rte_is_power_of_2 since 0 is not. Avoid branching instructions in RTE_MAX and RTE_MIN. Signed-off-by: Ravi Kerur --- lib/librte_eal/common/include/rte_common.h | 6 +++--- lib/librte_pmd_e1000/igb_pf.c | 4 ++-- lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 921b91f..e163f35 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; static inline int rte_is_power_of_2(uint32_t n) { - return ((n-1) & n) == 0; + return n && !(n & (n - 1)); } /** @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, b) ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ - _a < _b ? _a : _b; \ +_b ^ ((_a ^ _b) & -(_a < _b)); \ }) /** @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, b) ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ - _a > _b ? _a : _b; \ + _a ^ ((_a ^ _b) & -(_a < _b)); \ }) /*** Other general functions / macros / diff --git a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index bc3816a..546499c 100644 --- a/lib/librte_pmd_e1000/igb_pf.c +++ b/lib/librte_pmd_e1000/igb_pf.c @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { - int i; + int16_t i; uint32_t vector_bit; uint32_t vector_reg; uint32_t mta_reg; - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; uint16_t *hash_list = (uint16_t *)&msgbuf[1]; struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32 struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); - int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> + int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; uint16_t *hash_list = (uint16_t *)&msgbuf[1]; uint32_t mta_idx; @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, __rte_unused uint32_t vf, uint32 const uint32_t IXGBE_MTA_BIT_SHIFT = 5; const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) - 1; uint32_t reg_val; - int i; + int16_t i; /* only so many hash values supported */ nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES); -- 1.9.1
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > Subject: [PATCH] Minor fixes in rte_common.h file. > > Fix rte_is_power_of_2 since 0 is not. > Avoid branching instructions in RTE_MAX and RTE_MIN. > > Signed-off-by: Ravi Kerur > --- > lib/librte_eal/common/include/rte_common.h | 6 +++--- > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > 3 files changed, 7 insertions(+), 7 deletions(-) > > diff --git a/lib/librte_eal/common/include/rte_common.h > b/lib/librte_eal/common/include/rte_common.h > index 921b91f..e163f35 100644 > --- a/lib/librte_eal/common/include/rte_common.h > +++ b/lib/librte_eal/common/include/rte_common.h > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; static > inline int rte_is_power_of_2(uint32_t n) { > - return ((n-1) & n) == 0; > + return n && !(n & (n - 1)); > } > > /** > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, b) ({ \ > typeof (a) _a = (a); \ > typeof (b) _b = (b); \ > - _a < _b ? _a : _b; \ > +_b ^ ((_a ^ _b) & -(_a < _b)); \ Are you sure this is actually faster than the branch version? What about using a cmov instead? > }) > > /** > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, b) ({ \ > typeof (a) _a = (a); \ > typeof (b) _b = (b); \ > - _a > _b ? _a : _b; \ > + _a ^ ((_a ^ _b) & -(_a < _b)); \ Same as above > }) > > /*** Other general functions / macros / diff --git > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index > bc3816a..546499c 100644 > --- a/lib/librte_pmd_e1000/igb_pf.c > +++ b/lib/librte_pmd_e1000/igb_pf.c > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, uint32_t > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct rte_eth_dev > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > - int i; > + int16_t i; > uint32_t vector_bit; > uint32_t vector_reg; > uint32_t mta_reg; > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > E1000_VT_MSGINFO_SHIFT; NAK, this has nothing to do with the included changelog > uint16_t *hash_list = (uint16_t *)&msgbuf[1]; > struct e1000_hw *hw = > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644 > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, > __rte_unused uint32_t vf, uint32 > struct ixgbe_hw *hw = > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); > struct ixgbe_vf_info *vfinfo = > *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); > - int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> > + int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> > IXGBE_VT_MSGINFO_SHIFT; ditto > uint16_t *hash_list = (uint16_t *)&msgbuf[1]; > uint32_t mta_idx; > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, > __rte_unused uint32_t vf, uint32 > const uint32_t IXGBE_MTA_BIT_SHIFT = 5; > const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) - > 1; > uint32_t reg_val; > - int i; > + int16_t i; ditto > > /* only so many hash values supported */ > nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES); > -- > 1.9.1 >
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman wrote: > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > Fix rte_is_power_of_2 since 0 is not. > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > Signed-off-by: Ravi Kerur > > --- > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > b/lib/librte_eal/common/include/rte_common.h > > index 921b91f..e163f35 100644 > > --- a/lib/librte_eal/common/include/rte_common.h > > +++ b/lib/librte_eal/common/include/rte_common.h > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; static > > inline int rte_is_power_of_2(uint32_t n) { > > - return ((n-1) & n) == 0; > > + return n && !(n & (n - 1)); > > } > > > > /** > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, b) > ({ \ > > typeof (a) _a = (a); \ > > typeof (b) _b = (b); \ > > - _a < _b ? _a : _b; \ > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > Are you sure this is actually faster than the branch version? What about > using > a cmov instead? > > i am pretty sure modified code is faster than branching. I remember cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's perform. > }) > > > > /** > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, b) > ({ \ > > typeof (a) _a = (a); \ > > typeof (b) _b = (b); \ > > - _a > _b ? _a : _b; \ > > + _a ^ ((_a ^ _b) & -(_a < _b)); \ > Same as above > > Same as above. > > }) > > > > /*** Other general functions / macros / diff --git > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index > > bc3816a..546499c 100644 > > --- a/lib/librte_pmd_e1000/igb_pf.c > > +++ b/lib/librte_pmd_e1000/igb_pf.c > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, > uint32_t > > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct > rte_eth_dev > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > > - int i; > > + int16_t i; > > uint32_t vector_bit; > > uint32_t vector_reg; > > uint32_t mta_reg; > > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > E1000_VT_MSGINFO_SHIFT; > NAK, this has nothing to do with the included changelog > It does, it causes compilation errors such as /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function \u2018igb_pf_mbx_process\u2019: /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array subscript is above array bounds [-Werror=array-bounds] vfinfo->vf_mc_hashes[i] = hash_list[i]; ^ cc1: all warnings being treated as errors Also it is always better to use explicit int definitions esp. for 64bit systems. > > > uint16_t *hash_list = (uint16_t *)&msgbuf[1]; > > struct e1000_hw *hw = > > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644 > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c > > @@ -390,7 +390,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, > > __rte_unused uint32_t vf, uint32 > > struct ixgbe_hw *hw = > > IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); > > struct ixgbe_vf_info *vfinfo = > > *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); > > - int nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> > > + int32_t nb_entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> > > IXGBE_VT_MSGINFO_SHIFT; > ditto > > uint16_t *hash_list = (uint16_t *)&msgbuf[1]; > > uint32_t mta_idx; > > @@ -399,7 +399,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, > > __rte_unused uint32_t vf, uint32 > > const uint32_t IXGBE_MTA_BIT_SHIFT = 5; > > const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) > - > > 1; > > uint32_t reg_val; > > - int i; > > + int16_t i; > ditto > > Same as above. > > > > /* only so many hash values supported */ > > nb_entries = RTE_MIN(nb_entries, IXGBE_MAX_VF_MC_ENTRIES); > > -- > > 1.9.1 > > >
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> -Original Message- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur > Sent: Tuesday, December 16, 2014 4:47 PM > To: Neil Horman > Cc: dev at dpdk.org > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file. > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman wrote: > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > Signed-off-by: Ravi Kerur > > > --- > > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > b/lib/librte_eal/common/include/rte_common.h > > > index 921b91f..e163f35 100644 > > > --- a/lib/librte_eal/common/include/rte_common.h > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; static > > > inline int rte_is_power_of_2(uint32_t n) { > > > - return ((n-1) & n) == 0; > > > + return n && !(n & (n - 1)); > > > } > > > > > > /** > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, b) > > ({ \ > > > typeof (a) _a = (a); \ > > > typeof (b) _b = (b); \ > > > - _a < _b ? _a : _b; \ > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > > Are you sure this is actually faster than the branch version? What about > > using > > a cmov instead? > > > > > i am pretty sure modified code is faster than branching. I remember > cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's > perform. I also think most modern compilers are smart enough to avoid any branching here and will use cmov instead. And we are way ahead of Pentium 4 times these days. Konstantin > > > }) > > > > > > /** > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, b) > > ({ \ > > > typeof (a) _a = (a); \ > > > typeof (b) _b = (b); \ > > > - _a > _b ? _a : _b; \ > > > + _a ^ ((_a ^ _b) & -(_a < _b)); \ > > Same as above > > > > Same as above. > > > > }) > > > > > > /*** Other general functions / macros / diff --git > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index > > > bc3816a..546499c 100644 > > > --- a/lib/librte_pmd_e1000/igb_pf.c > > > +++ b/lib/librte_pmd_e1000/igb_pf.c > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, > > uint32_t > > > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct > > rte_eth_dev > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > > > - int i; > > > + int16_t i; > > > uint32_t vector_bit; > > > uint32_t vector_reg; > > > uint32_t mta_reg; > > > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > E1000_VT_MSGINFO_SHIFT; > > NAK, this has nothing to do with the included changelog > > > > It does, it causes compilation errors such as > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function > \u2018igb_pf_mbx_process\u2019: > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array > subscript is above array bounds [-Werror=array-bounds] >vfinfo->vf_mc_hashes[i] = hash_list[i]; >^ > cc1: all warnings being treated as errors > > Also it is always better to use explicit int definitions esp. for 64bit > systems. > > > > > > > > uint16_t *hash_list = (uint16_t *)&msgbuf[1]; > > > struct e1000_hw *hw = > > > E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); > > > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c > > > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 51da1fd..426caf9 100644 > > > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c > > > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin < konstantin.ananyev at intel.com> wrote: > > > > > -Original Message- > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur > > Sent: Tuesday, December 16, 2014 4:47 PM > > To: Neil Horman > > Cc: dev at dpdk.org > > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file. > > > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman > wrote: > > > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > > > Signed-off-by: Ravi Kerur > > > > --- > > > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > > b/lib/librte_eal/common/include/rte_common.h > > > > index 921b91f..e163f35 100644 > > > > --- a/lib/librte_eal/common/include/rte_common.h > > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; > static > > > > inline int rte_is_power_of_2(uint32_t n) { > > > > - return ((n-1) & n) == 0; > > > > + return n && !(n & (n - 1)); > > > > } > > > > > > > > /** > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, > b) > > > ({ \ > > > > typeof (a) _a = (a); \ > > > > typeof (b) _b = (b); \ > > > > - _a < _b ? _a : _b; \ > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > > > Are you sure this is actually faster than the branch version? What > about > > > using > > > a cmov instead? > > > > > > > > i am pretty sure modified code is faster than branching. I remember > > cmov had performance issues esp. on Pentuim-4 not sure how new intel > cpu's > > perform. > > I also think most modern compilers are smart enough to avoid any branching > here and will use cmov instead. > And we are way ahead of Pentium 4 times these days. Konstantin > Konstantin, Can you please elaborate, is it something done automatically with Intel's icc compiler? My understanding is branch prediction can be influenced only by using compiler builtin i.e. __builtin_expect() , without this compiler will generate regular instructions(cmp/jump instructions). I wrote small program and compiled with gcc -02/-03, don't see cmov instruction. > > > > > }) > > > > > > > > /** > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, > b) > > > ({ \ > > > > typeof (a) _a = (a); \ > > > > typeof (b) _b = (b); \ > > > > - _a > _b ? _a : _b; \ > > > > + _a ^ ((_a ^ _b) & -(_a < _b)); \ > > > Same as above > > > > > > Same as above. > > > > > > }) > > > > > > > > /*** Other general functions / macros / diff --git > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index > > > > bc3816a..546499c 100644 > > > > --- a/lib/librte_pmd_e1000/igb_pf.c > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, > > > uint32_t > > > > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct > > > rte_eth_dev > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > > > > - int i; > > > > + int16_t i; > > > > uint32_t vector_bit; > > > > uint32_t vector_reg; > > > > uint32_t mta_reg; > > > > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > E1000_VT_MSGINFO_SHIFT; > > > NAK, this has nothing to do with the included changelog > > > > > > >
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote: > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman wrote: > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > Signed-off-by: Ravi Kerur > > > --- > > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > b/lib/librte_eal/common/include/rte_common.h > > > index 921b91f..e163f35 100644 > > > --- a/lib/librte_eal/common/include/rte_common.h > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; static > > > inline int rte_is_power_of_2(uint32_t n) { > > > - return ((n-1) & n) == 0; > > > + return n && !(n & (n - 1)); > > > } > > > > > > /** > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, b) > > ({ \ > > > typeof (a) _a = (a); \ > > > typeof (b) _b = (b); \ > > > - _a < _b ? _a : _b; \ > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > > Are you sure this is actually faster than the branch version? What about > > using > > a cmov instead? > > > > > i am pretty sure modified code is faster than branching. I remember > cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's > perform. > Pretty sure isn't sure. Theres no point in code churn if theres no obvious advantage. Some perf tests to deomonstrate the advantage here would be great. > > }) > > > > > > /** > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, b) > > ({ \ > > > typeof (a) _a = (a); \ > > > typeof (b) _b = (b); \ > > > - _a > _b ? _a : _b; \ > > > + _a ^ ((_a ^ _b) & -(_a < _b)); \ > > Same as above > > > > Same as above. > > > > }) > > > > > > /*** Other general functions / macros / diff --git > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index > > > bc3816a..546499c 100644 > > > --- a/lib/librte_pmd_e1000/igb_pf.c > > > +++ b/lib/librte_pmd_e1000/igb_pf.c > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, > > uint32_t > > > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct > > rte_eth_dev > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > > > - int i; > > > + int16_t i; > > > uint32_t vector_bit; > > > uint32_t vector_reg; > > > uint32_t mta_reg; > > > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > E1000_VT_MSGINFO_SHIFT; > > NAK, this has nothing to do with the included changelog > > > > It does, it causes compilation errors such as > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function > \u2018igb_pf_mbx_process\u2019: > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array > subscript is above array bounds [-Werror=array-bounds] >vfinfo->vf_mc_hashes[i] = hash_list[i]; >^ > cc1: all warnings being treated as errors > > Also it is always better to use explicit int definitions esp. for 64bit > systems. > This is your changelog: = Subject: [PATCH] Minor fixes in rte_common.h file. Fix rte_is_power_of_2 since 0 is not. Avoid branching instructions in RTE_MAX and RTE_MIN = Nowhere does your changelog indicate that you are fixing compliation errors. That would in and of itself be far more serious that making micro optimizations. If you want to fix build breaks, great, please do, but send a patch that clearly indicates what the break is and how your fixing it. Don't just toss it in with whatever other work you happen to be doing.
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
> > From: Ravi Kerur [mailto:rkerur at gmail.com] > Sent: Tuesday, December 16, 2014 8:14 PM > To: Ananyev, Konstantin > Cc: Neil Horman; dev at dpdk.org > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file. > > > > On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin intel.com> wrote: > > > > -Original Message- > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur > > Sent: Tuesday, December 16, 2014 4:47 PM > > To: Neil Horman > > Cc: dev at dpdk.org > > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file. > > > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman > > wrote: > > > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > > > Signed-off-by: Ravi Kerur > > > > --- > > > >? lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > >? lib/librte_pmd_e1000/igb_pf.c? ? ? ? ? ? ? | 4 ++-- > > > >? lib/librte_pmd_ixgbe/ixgbe_pf.c? ? ? ? ? ? | 4 ++-- > > > >? 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > > b/lib/librte_eal/common/include/rte_common.h > > > > index 921b91f..e163f35 100644 > > > > --- a/lib/librte_eal/common/include/rte_common.h > > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error;? static > > > > inline int? rte_is_power_of_2(uint32_t n)? { > > > > -? ? ? ?return ((n-1) & n) == 0; > > > > +? ? ? ?return n && !(n & (n - 1)); > > > >? } > > > > > > > >? /** > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v)? #define RTE_MIN(a, b) > > > ({ \ > > > >? ? ? ? ? ? ? ? ?typeof (a) _a = (a); \ > > > >? ? ? ? ? ? ? ? ?typeof (b) _b = (b); \ > > > > -? ? ? ? ? ? ? ?_a < _b ? _a : _b; \ > > > > +? ? ? ? ? ? ? ? _b ^ ((_a ^ _b) & -(_a < _b)); \ > > > Are you sure this is actually faster than the branch version?? What about > > > using > > > a cmov instead? > > > > > > > > i am pretty sure modified code is faster than branching. I remember > > cmov had performance issues esp. on Pentuim-4 not sure how new intel cpu's > > perform. > I also think most modern compilers are smart enough to avoid any branching > here and will use cmov instead. > And we are way ahead of Pentium 4 times these days. > Konstantin > > Konstantin, ?Can you please elaborate, is it something done automatically > with Intel's icc compiler? My understanding is branch > prediction can be influenced only by using compiler builtin i.e. > __builtin_expect() , without this compiler will generate regular > instructions(cmp/jump instructions). I wrote small program and compiled with > gcc -02/-03, don't see cmov instruction. I am saying that there is probably no need to modify these macros. On IA , for constructions like: "_a < _b ? _a : _b;" modern compilers in many cases will avoid any branches and emit cmov instead. $ cat tcmv1.c #include #include #define RTE_MIN(a, b) ({ \ typeof (a) _a = (a); \ typeof (b) _b = (b); \ _a < _b ? _a : _b; \ }) int fxmini32(int a, int b) { return RTE_MIN(a, b); } int fxminu64(uint64_t a, uint64_t b) { return RTE_MIN(a, b); } $gcc -O3 -m64 -S tcmv1.c $ cat tcmv1.s .file "tcmv1.c" .text .p2align 4,,15 .globl fxmini32 .type fxmini32, @function fxmini32: .LFB0: .cfi_startproc cmpl%esi, %edi movl%esi, %eax cmovle %edi, %eax ret .cfi_endproc .LFE0: .size fxmini32, .-fxmini32 .p2align 4,,15 .globl fxminu64 .type fxminu64, @function fxminu64: .LFB1: .cfi_startproc cmpq%rsi, %rdi movq%rsi, %rax cmovbe %rdi, %rax ret .cfi_endproc gcc version 4.8.3 clang produces similar code. Konstantin > > > > > >? ? ? ? ?}) > > > > > > > >? /** > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v)? #define RTE_MAX(a, b) > > > ({ \ > > > >? ? ? ? ? ? ? ? ?typeof (a) _a = (a); \ > > > >? ? ? ? ? ? ? ? ?typeof (b) _b = (b);
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Tue, Dec 16, 2014 at 5:05 PM, Ananyev, Konstantin < konstantin.ananyev at intel.com> wrote: > > > > > > > From: Ravi Kerur [mailto:rkerur at gmail.com] > > Sent: Tuesday, December 16, 2014 8:14 PM > > To: Ananyev, Konstantin > > Cc: Neil Horman; dev at dpdk.org > > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file. > > > > > > > > On Tue, Dec 16, 2014 at 9:23 AM, Ananyev, Konstantin < > konstantin.ananyev at intel.com> wrote: > > > > > > > -Original Message- > > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ravi Kerur > > > Sent: Tuesday, December 16, 2014 4:47 PM > > > To: Neil Horman > > > Cc: dev at dpdk.org > > > Subject: Re: [dpdk-dev] [PATCH] Minor fixes in rte_common.h file. > > > > > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman > wrote: > > > > > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > > > > > Signed-off-by: Ravi Kerur > > > > > --- > > > > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > > > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > > > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > > > b/lib/librte_eal/common/include/rte_common.h > > > > > index 921b91f..e163f35 100644 > > > > > --- a/lib/librte_eal/common/include/rte_common.h > > > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; > static > > > > > inline int rte_is_power_of_2(uint32_t n) { > > > > > - return ((n-1) & n) == 0; > > > > > + return n && !(n & (n - 1)); > > > > > } > > > > > > > > > > /** > > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define > RTE_MIN(a, b) > > > > ({ \ > > > > > typeof (a) _a = (a); \ > > > > > typeof (b) _b = (b); \ > > > > > - _a < _b ? _a : _b; \ > > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > > > > Are you sure this is actually faster than the branch version? What > about > > > > using > > > > a cmov instead? > > > > > > > > > > > i am pretty sure modified code is faster than branching. I > remember > > > cmov had performance issues esp. on Pentuim-4 not sure how new intel > cpu's > > > perform. > > I also think most modern compilers are smart enough to avoid any > branching here and will use cmov instead. > > And we are way ahead of Pentium 4 times these days. > > Konstantin > > > > Konstantin, Can you please elaborate, is it something done > automatically with Intel's icc compiler? My understanding is branch > > prediction can be influenced only by using compiler builtin i.e. > __builtin_expect() , without this compiler will generate regular > > instructions(cmp/jump instructions). I wrote small program and compiled > with gcc -02/-03, don't see cmov instruction. > > I am saying that there is probably no need to modify these macros. > On IA , for constructions like: "_a < _b ? _a : _b;" > modern compilers in many cases will avoid any branches and emit cmov > instead. > > $ cat tcmv1.c > > #include > #include > > #define RTE_MIN(a, b) ({ \ > typeof (a) _a = (a); \ > typeof (b) _b = (b); \ > _a < _b ? _a : _b; \ > }) > > int > fxmini32(int a, int b) > { > return RTE_MIN(a, b); > } > > int > fxminu64(uint64_t a, uint64_t b) > { > return RTE_MIN(a, b); > } > > $gcc -O3 -m64 -S tcmv1.c > > $ cat tcmv1.s > .file "tcmv1.c" > .text > .p2align 4,,15 > .globl fxmini32 > .type fxmini32, @function > fxmini32: > .LFB0: > .cfi_startproc > cmpl%esi, %edi > movl%esi, %eax > cmovle %
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman wrote: > > On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote: > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman > wrote: > > > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > > > Signed-off-by: Ravi Kerur > > > > --- > > > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > > b/lib/librte_eal/common/include/rte_common.h > > > > index 921b91f..e163f35 100644 > > > > --- a/lib/librte_eal/common/include/rte_common.h > > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; > static > > > > inline int rte_is_power_of_2(uint32_t n) { > > > > - return ((n-1) & n) == 0; > > > > + return n && !(n & (n - 1)); > > > > } > > > > > > > > /** > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, > b) > > > ({ \ > > > > typeof (a) _a = (a); \ > > > > typeof (b) _b = (b); \ > > > > - _a < _b ? _a : _b; \ > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > > > Are you sure this is actually faster than the branch version? What > about > > > using > > > a cmov instead? > > > > > > > > i am pretty sure modified code is faster than branching. I remember > > cmov had performance issues esp. on Pentuim-4 not sure how new intel > cpu's > > perform. > > > Pretty sure isn't sure. Theres no point in code churn if theres no obvious > advantage. Some perf tests to deomonstrate the advantage here would be > great. > I have used this before with the intent to avoid branching and it was part of other changes I did for performance improvement in our code. > > > > }) > > > > > > > > /** > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, > b) > > > ({ \ > > > > typeof (a) _a = (a); \ > > > > typeof (b) _b = (b); \ > > > > - _a > _b ? _a : _b; \ > > > > + _a ^ ((_a ^ _b) & -(_a < _b)); \ > > > Same as above > > > > > > Same as above. > > > > > > }) > > > > > > > > /*** Other general functions / macros / diff --git > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index > > > > bc3816a..546499c 100644 > > > > --- a/lib/librte_pmd_e1000/igb_pf.c > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, > > > uint32_t > > > > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct > > > rte_eth_dev > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > > > > - int i; > > > > + int16_t i; > > > > uint32_t vector_bit; > > > > uint32_t vector_reg; > > > > uint32_t mta_reg; > > > > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > E1000_VT_MSGINFO_SHIFT; > > > NAK, this has nothing to do with the included changelog > > > > > > > It does, it causes compilation errors such as > > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function > > \u2018igb_pf_mbx_process\u2019: > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array > > subscript is above array bounds [-Werror=array-bounds] > >vfinfo->vf_mc_hashes[i] = hash_list[i]; > >^ > > cc1: all warnings being treated as errors > > > > Also it is always better to use explicit int definitions esp. for 64bit > > systems. > > > > This is your changelog: > = > Subject: [PATCH] Minor fixes in rte_common.h file. > > Fix rte_is_power_of_2 since 0 is not. > Avoid branching instructions in RTE_MAX and RTE_MIN > = > > Nowhere does your changelog indicate that you are fixing compliation > errors. > That would in and of itself be far more serious that making micro > optimizations. > If you want to fix build breaks, great, please do, but send a patch that > clearly > indicates what the break is and how your fixing it. Don't just toss it in > with > whatever other work you happen to be doing. > Main reason was to replace int with explicit sized int, it happened to fix compiler errors as well. I will make sure comments cover everything next time. Anyways I will drop this patch and just include fix for power_of_2.
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Wed, Dec 17, 2014 at 08:40:17AM -0800, Ravi Kerur wrote: > On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman wrote: > > > > On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote: > > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman > > wrote: > > > > > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > > > > > Signed-off-by: Ravi Kerur > > > > > --- > > > > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > > > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > > > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > > > b/lib/librte_eal/common/include/rte_common.h > > > > > index 921b91f..e163f35 100644 > > > > > --- a/lib/librte_eal/common/include/rte_common.h > > > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; > > static > > > > > inline int rte_is_power_of_2(uint32_t n) { > > > > > - return ((n-1) & n) == 0; > > > > > + return n && !(n & (n - 1)); > > > > > } > > > > > > > > > > /** > > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define RTE_MIN(a, > > b) > > > > ({ \ > > > > > typeof (a) _a = (a); \ > > > > > typeof (b) _b = (b); \ > > > > > - _a < _b ? _a : _b; \ > > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > > > > Are you sure this is actually faster than the branch version? What > > about > > > > using > > > > a cmov instead? > > > > > > > > > > > i am pretty sure modified code is faster than branching. I remember > > > cmov had performance issues esp. on Pentuim-4 not sure how new intel > > cpu's > > > perform. > > > > > Pretty sure isn't sure. Theres no point in code churn if theres no obvious > > advantage. Some perf tests to deomonstrate the advantage here would be > > great. > > > > I have used this before with the intent to avoid branching and it was > part of other changes I did for performance improvement in our code. > Then it should be pretty easy to provide the perf data demonstrating the advantage in this code. > > > > > > }) > > > > > > > > > > /** > > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define RTE_MAX(a, > > b) > > > > ({ \ > > > > > typeof (a) _a = (a); \ > > > > > typeof (b) _b = (b); \ > > > > > - _a > _b ? _a : _b; \ > > > > > + _a ^ ((_a ^ _b) & -(_a < _b)); \ > > > > Same as above > > > > > > > > Same as above. > > > > > > > > }) > > > > > > > > > > /*** Other general functions / macros / diff --git > > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index > > > > > bc3816a..546499c 100644 > > > > > --- a/lib/librte_pmd_e1000/igb_pf.c > > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c > > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev *dev, > > > > uint32_t > > > > > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct > > > > rte_eth_dev > > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > > > > > - int i; > > > > > + int16_t i; > > > > > uint32_t vector_bit; > > > > > uint32_t vector_reg; > > > > > uint32_t mta_reg; > > > > > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > > E1000_VT_MSGINFO_SHIFT; > > > > NAK, this has nothing to do with the included changelog > > > > > > > > > > It does, it causes compilation errors such as > > > > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function > > > \u2018igb_pf_mbx_process\u2019: > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: array > > > subscript is above array bounds [-Werror=array-bounds] > > >vfinfo->vf_mc_hashes[i] = hash_list[i]; > > >^ > > > cc1: all warnings being treated as errors > > > > > > Also it is always better to use explicit int definitions esp. for 64bit > > > systems. > > > > > > > This is your changelog: > > = > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > Fix rte_is_power_of_2 since 0 is not. > > Avoid branching instructions in RTE_MAX and RTE_MIN > > = > > > > Nowhere does your changelog indicate that you are fixing compliation > > errors. > > That would in and of itself be far more serious that making micro > > optimizations. > > If you want to fix build breaks, great, please do, but send a patch that > > clearly > > indicates what the break is and how your fix
[dpdk-dev] [PATCH] Minor fixes in rte_common.h file.
On Thu, Dec 18, 2014 at 11:07 AM, Neil Horman wrote: > On Wed, Dec 17, 2014 at 08:40:17AM -0800, Ravi Kerur wrote: > > On Tue, Dec 16, 2014 at 1:40 PM, Neil Horman > wrote: > > > > > > On Tue, Dec 16, 2014 at 08:46:51AM -0800, Ravi Kerur wrote: > > > > On Sat, Dec 13, 2014 at 2:39 AM, Neil Horman > > > wrote: > > > > > > > > > > On Fri, Dec 12, 2014 at 03:04:34PM -0800, r k wrote: > > > > > > Subject: [PATCH] Minor fixes in rte_common.h file. > > > > > > > > > > > > Fix rte_is_power_of_2 since 0 is not. > > > > > > Avoid branching instructions in RTE_MAX and RTE_MIN. > > > > > > > > > > > > Signed-off-by: Ravi Kerur > > > > > > --- > > > > > > lib/librte_eal/common/include/rte_common.h | 6 +++--- > > > > > > lib/librte_pmd_e1000/igb_pf.c | 4 ++-- > > > > > > lib/librte_pmd_ixgbe/ixgbe_pf.c| 4 ++-- > > > > > > 3 files changed, 7 insertions(+), 7 deletions(-) > > > > > > > > > > > > diff --git a/lib/librte_eal/common/include/rte_common.h > > > > > > b/lib/librte_eal/common/include/rte_common.h > > > > > > index 921b91f..e163f35 100644 > > > > > > --- a/lib/librte_eal/common/include/rte_common.h > > > > > > +++ b/lib/librte_eal/common/include/rte_common.h > > > > > > @@ -203,7 +203,7 @@ extern int RTE_BUILD_BUG_ON_detected_error; > > > static > > > > > > inline int rte_is_power_of_2(uint32_t n) { > > > > > > - return ((n-1) & n) == 0; > > > > > > + return n && !(n & (n - 1)); > > > > > > } > > > > > > > > > > > > /** > > > > > > @@ -259,7 +259,7 @@ rte_align64pow2(uint64_t v) #define > RTE_MIN(a, > > > b) > > > > > ({ \ > > > > > > typeof (a) _a = (a); \ > > > > > > typeof (b) _b = (b); \ > > > > > > - _a < _b ? _a : _b; \ > > > > > > +_b ^ ((_a ^ _b) & -(_a < _b)); \ > > > > > Are you sure this is actually faster than the branch version? What > > > about > > > > > using > > > > > a cmov instead? > > > > > > > > > > > > > > i am pretty sure modified code is faster than branching. I > remember > > > > cmov had performance issues esp. on Pentuim-4 not sure how new intel > > > cpu's > > > > perform. > > > > > > > Pretty sure isn't sure. Theres no point in code churn if theres no > obvious > > > advantage. Some perf tests to deomonstrate the advantage here would be > > > great. > > > > > > > I have used this before with the intent to avoid branching and it > was > > part of other changes I did for performance improvement in our code. > > > > Then it should be pretty easy to provide the perf data demonstrating the > advantage in this code. > > I decided to drop this change because 1. DPDK manual suggests gcc version 4.5.x or greater 2. I was testing code against gcc 4.4.6 (which generated cmp/jump instructions) and Konstantin showed using gcc 4.8.3 it generates cmov instructions If you think it should be pursued further let me know. I can look into difference between the code generated for both using gcc > 4.5 .x and update. Thanks, Ravi > > > > > > > > }) > > > > > > > > > > > > /** > > > > > > @@ -268,7 +268,7 @@ rte_align64pow2(uint64_t v) #define > RTE_MAX(a, > > > b) > > > > > ({ \ > > > > > > typeof (a) _a = (a); \ > > > > > > typeof (b) _b = (b); \ > > > > > > - _a > _b ? _a : _b; \ > > > > > > + _a ^ ((_a ^ _b) & -(_a < _b)); \ > > > > > Same as above > > > > > > > > > > Same as above. > > > > > > > > > > }) > > > > > > > > > > > > /*** Other general functions / macros / diff > --git > > > > > > a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c > index > > > > > > bc3816a..546499c 100644 > > > > > > --- a/lib/librte_pmd_e1000/igb_pf.c > > > > > > +++ b/lib/librte_pmd_e1000/igb_pf.c > > > > > > @@ -321,11 +321,11 @@ igb_vf_set_mac_addr(struct rte_eth_dev > *dev, > > > > > uint32_t > > > > > > vf, uint32_t *msgbuf) static int igb_vf_set_multicast(struct > > > > > rte_eth_dev > > > > > > *dev, __rte_unused uint32_t vf, uint32_t *msgbuf) { > > > > > > - int i; > > > > > > + int16_t i; > > > > > > uint32_t vector_bit; > > > > > > uint32_t vector_reg; > > > > > > uint32_t mta_reg; > > > > > > - int entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > > > + int32_t entries = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> > > > > > > E1000_VT_MSGINFO_SHIFT; > > > > > NAK, this has nothing to do with the included changelog > > > > > > > > > > > > > It does, it causes compilation errors such as > > > > > > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c: In function > > > > \u2018igb_pf_mbx_process\u2019: > > > > /root/dpdk-new/dpdk/lib/librte_pmd_e1000/igb_pf.c:350:23: error: > array > > > > subscript is above array bounds [-Werror=array-bounds] > > > >vfinfo->vf_mc_hashes[i] = hash_list[i]; > > > >^ > > > > cc1: all warnings being treated as errors > > > > > > > > Also it is always better