> > # detect extensions
> > # Requires intrinsics available in GCC 14.1.0+ and Clang 18.1.0+
> > if (riscv_extension_macros and
> >     (cc.get_define('__riscv_zicbop', args: machine_args) != ''))
> >   if ((cc.get_id() == 'gcc' and cc.version().version_compare('>=14.1.0')) > >       or (cc.get_id() == 'clang' and cc.version().version_compare('>=18.1.0')))
> >       message('Compiling with the zicbop extension')
> >       machine_args += ['-DRTE_RISCV_FEATURE_PREFETCH']
> >   else
> >     warning('Detected zicbop extension but cannot use because intrinsics are not available (present in GCC 14.1.0+ and Clang 18.1.0+)')
> >   endif
> > endif
>
> The implementation does not involve intrinsics

It looks like nothing has been changed here yet.

> #if defined(RTE_RISCV_FEATURE_V) && !(defined(RTE_RISCV_FEATURE_PREFETCH))
> #undef RTE_RISCV_FEATURE_V
> #endif
>
> static __rte_always_inline void
> _rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n)
> {
>     asm volatile (
>         "prefetch.r 64(%1)\n"
>         "prefetch.w 64(%0)\n"
>         "prefetch.r 128(%1)\n"
>         "prefetch.w 128(%0)\n"
>         "prefetch.r 192(%1)\n"
>         "prefetch.w 192(%0)\n"
>         "prefetch.r 256(%1)\n"
>         "prefetch.w 256(%0)\n"
>         "prefetch.r 320(%1)\n"
>         "prefetch.w 320(%0)\n"
>         "prefetch.r 384(%1)\n"
>         "prefetch.w 384(%0)\n"
>         "prefetch.r 448(%1)\n"
>         "prefetch.w 448(%0)\n"
>         "prefetch.r 512(%1)\n"
>         "li t6, 512\n"
>         "3:\n"
>         "li t5, 128;"
>         "vsetvli zero, t5, e8, m8, ta, ma\n"

With the current compilation conditions, if zicbop isn’t supported, the v-optimization also won’t be compiled. Have you tested the performance difference if you remove these prefetches and only use v?
Can we use a condition like this to support only v?

#if defined(RTE_RISCV_FEATURE_V)
   #if (defined(RTE_RISCV_FEATURE_PREFETCH))
        ...
   #endif
    ...
#endif

Reply via email to