From: Vijay <vija...@cavium.com> Use Neon instructions to perform zero checking of buffer. This is helps in reducing downtime during live migration.
Signed-off-by: Vijaya Kumar K <vija...@caviumnetworks.com> Signed-off-by: Suresh <ksur...@caviumnetworks.com> --- util/cutils.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/util/cutils.c b/util/cutils.c index 43d1afb..bb61c91 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -352,6 +352,80 @@ static void *can_use_buffer_find_nonzero_offset_ifunc(void) return func; } #pragma GCC pop_options + +#elif defined __aarch64__ +#include "arm_neon.h" + +#define NEON_VECTYPE uint64x2_t +#define NEON_LOAD_N_ORR(v1, v2) (vld1q_u64(&v1) | vld1q_u64(&v2)) +#define NEON_ORR(v1, v2) ((v1) | (v2)) +#define NEON_NOT_EQ_ZERO(v1) \ + ((vgetq_lane_u64(v1, 0) != 0) || (vgetq_lane_u64(v1, 1) != 0)) + +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON 16 + +/* + * Zero page/buffer checking using SIMD(Neon) + */ + +static bool +can_use_buffer_find_nonzero_offset_neon(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON + * sizeof(NEON_VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(NEON_VECTYPE) == 0); +} + +static size_t buffer_find_nonzero_offset_neon(const void *buf, size_t len) +{ + size_t i; + NEON_VECTYPE qword0, qword1, qword2, qword3, qword4, qword5, qword6; + uint64_t const *data = buf; + + if (!len) { + return 0; + } + + assert(can_use_buffer_find_nonzero_offset_neon(buf, len)); + len /= sizeof(unsigned long); + + for (i = 0; i < len; i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON) { + qword0 = NEON_LOAD_N_ORR(data[i], data[i + 2]); + qword1 = NEON_LOAD_N_ORR(data[i + 4], data[i + 6]); + qword2 = NEON_LOAD_N_ORR(data[i + 8], data[i + 10]); + qword3 = NEON_LOAD_N_ORR(data[i + 12], data[i + 14]); + qword4 = NEON_ORR(qword0, qword1); + qword5 = NEON_ORR(qword2, qword3); + qword6 = NEON_ORR(qword4, qword5); + + if (NEON_NOT_EQ_ZERO(qword6)) { + break; + } + } + + return i * sizeof(unsigned long); +} + +static inline bool neon_support(void) +{ + /* + * Check if neon feature is supported. + * By default neon is supported for aarch64. + */ + return true; +} + +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return neon_support() ? can_use_buffer_find_nonzero_offset_neon(buf, len) : + can_use_buffer_find_nonzero_offset_inner(buf, len); +} + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return neon_support() ? buffer_find_nonzero_offset_neon(buf, len) : + buffer_find_nonzero_offset_inner(buf, len); +} #else bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) { -- 1.7.9.5