> Remove unnecessary type casting in functions.
> 
> Tested on Ubuntu (14.04 x86_64) with "make test".
> "make test" results match the results with baseline.
> "Memcpy perf" results match the results with baseline.
> 
> Signed-off-by: Ravi Kerur <rkerur at gmail.com>
> Acked-by: Stephen Hemminger <stephen at networkplumber.org>
> 
> ---
> .../common/include/arch/x86/rte_memcpy.h           | 340 +++++++++++----------
>  1 file changed, 175 insertions(+), 165 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h 
> b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
> index 6a57426..839d4ec 100644
> --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
> +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h

[...]

>  /**
> @@ -150,13 +150,16 @@ rte_mov64blocks(uint8_t *dst, const uint8_t *src, 
> size_t n)
>       __m256i ymm0, ymm1;
>  
>       while (n >= 64) {
> -             ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t 
> *)src + 0 * 32));
> +
> +             ymm0 = _mm256_loadu_si256((const __m256i *)(src + 0 * 32));
> +             ymm1 = _mm256_loadu_si256((const __m256i *)(src + 1 * 32));
> +
> +             _mm256_storeu_si256((__m256i *)(dst + 0 * 32), ymm0);
> +             _mm256_storeu_si256((__m256i *)(dst + 1 * 32), ymm1);
> +

Any particular reason to change the order of the statements here? :)
Overall this patch looks good.

>               n -= 64;
> -             ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t 
> *)src + 1 * 32));
> -             src = (const uint8_t *)src + 64;
> -             _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
> -             _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
> -             dst = (uint8_t *)dst + 64;
> +             src = src + 64;
> +             dst = dst + 64;
>       }
>  }
>  

Reply via email to