Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-11-15 Thread flow gg
Okay, I have modified them to 64 and added some descriptions.

Rémi Denis-Courmont  于2023年11月15日周三 23:06写道:

> Le keskiviikkona 15. marraskuuta 2023, 10.59.55 EET flow gg a écrit :
> > Okay, I have updated these issues in the patch.
>
> It does not assemble but I can fix it locally. The narrowing shift
> trickery
> require Zve64x, or rather Zve64f in this case.
>
> The performance improvement is much better on newer hardware:
> fcmul_add_c: 4891.2
> fcmul_add_rvv_f64: 2399.5
>
> FWIW, VLSEG2E32.V remains slightly worse than with shifting:
> fcmul_add_c: 4891.2
> fcmul_add_rvv_f32: 2877.5
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
From 6b88fbf9b94c098841197c9fcb467006177ee4c6 Mon Sep 17 00:00:00 2001
From: sunyuechi 
Date: Fri, 3 Nov 2023 10:35:53 +0800
Subject: [PATCH] af_afir: RISC-V V fcmul_add

Segmented loads are slow, so here we use unit-strided load and narrowing shifts.

c910:
fcmul_add_c: 2179
fcmul_add_rvv_f64: 1652

c908:
fcmul_add_c: 4891.2
fcmul_add_rvv_f64: 2399.5
---
 libavfilter/af_afirdsp.h |  5 ++-
 libavfilter/riscv/Makefile   |  2 ++
 libavfilter/riscv/af_afir_init.c | 42 
 libavfilter/riscv/af_afir_rvv.S  | 55 
 4 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/riscv/Makefile
 create mode 100644 libavfilter/riscv/af_afir_init.c
 create mode 100644 libavfilter/riscv/af_afir_rvv.S

diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
index 4208501393..827e067a9b 100644
--- a/libavfilter/af_afirdsp.h
+++ b/libavfilter/af_afirdsp.h
@@ -33,6 +33,7 @@ typedef struct AudioFIRDSPContext {
   ptrdiff_t len);
 } AudioFIRDSPContext;
 
+void ff_afir_init_riscv(AudioFIRDSPContext *s);
 void ff_afir_init_x86(AudioFIRDSPContext *s);
 
 static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len)
@@ -74,7 +75,9 @@ static av_unused void ff_afir_init(AudioFIRDSPContext *dsp)
 dsp->fcmul_add = fcmul_add_c;
 dsp->dcmul_add = dcmul_add_c;
 
-#if ARCH_X86
+#if ARCH_RISCV
+ff_afir_init_riscv(dsp);
+#elif ARCH_X86
 ff_afir_init_x86(dsp);
 #endif
 }
diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
new file mode 100644
index 00..0b968a9c0d
--- /dev/null
+++ b/libavfilter/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS += riscv/af_afir_init.o
+RVV-OBJS += riscv/af_afir_rvv.o
diff --git a/libavfilter/riscv/af_afir_init.c b/libavfilter/riscv/af_afir_init.c
new file mode 100644
index 00..52aa18c126
--- /dev/null
+++ b/libavfilter/riscv/af_afir_init.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavfilter/af_afirdsp.h"
+
+void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
+   ptrdiff_t len);
+
+av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
+{
+#if HAVE_RVV
+int flags = av_get_cpu_flags();
+
+if (flags & AV_CPU_FLAG_RVV_F64) {
+if (flags & AV_CPU_FLAG_RVB_ADDR) {
+s->fcmul_add = ff_fcmul_add_rvv;
+}
+}
+#endif
+}
diff --git a/libavfilter/riscv/af_afir_rvv.S b/libavfilter/riscv/af_afir_rvv.S
new file mode 100644
index 00..04ec2e50d8
--- /dev/null
+++ b/libavfilter/riscv/af_afir_rvv.S
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS 

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-11-15 Thread Rémi Denis-Courmont
Le keskiviikkona 15. marraskuuta 2023, 10.59.55 EET flow gg a écrit :
> Okay, I have updated these issues in the patch.

It does not assemble but I can fix it locally. The narrowing shift trickery 
require Zve64x, or rather Zve64f in this case.

The performance improvement is much better on newer hardware:
fcmul_add_c: 4891.2
fcmul_add_rvv_f64: 2399.5

FWIW, VLSEG2E32.V remains slightly worse than with shifting:
fcmul_add_c: 4891.2
fcmul_add_rvv_f32: 2877.5

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-11-15 Thread flow gg
Okay, I have updated these issues in the patch.

Rémi Denis-Courmont  于2023年11月13日周一 23:35写道:

>Hi,
>
> Le maanantaina 13. marraskuuta 2023, 11.43.01 EET flow gg a écrit :
> > Sorry for the long delay in responding.
>
> No problem. Working with T-Head C910 (or C920?) cores is very tedious. I
> gave
> up on that and switched over to Kendryte K230 (based on C908) now.
>
> > How is the modified patch now?
>
> It looks better, but some minute improvements are still possible.
>
> > no longer using register stride(learn from your code) and have switched
> to
> > shNadd instead.
> >
> > (using m4 and m2 as they are slightly faster than m8 and m4)
> >
> > benchmark:
> > fcmul_add_c: 2179
> > fcmul_add_rvv_f32: 1652
>
> > diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
> > index 4208501393..d2d1e909c1 100644
> > --- a/libavfilter/af_afirdsp.h
> > +++ b/libavfilter/af_afirdsp.h
> > @@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
> >  } AudioFIRDSPContext;
> >
> >  void ff_afir_init_x86(AudioFIRDSPContext *s);
> > +void ff_afir_init_riscv(AudioFIRDSPContext *s);
>
> Nit: please stick to alphabetical order like most similar code.
>
> >
> >  static void fcmul_add_c(float *sum, const float *t, const float *c,
> > ptrdiff_t len)
> >  {
> > @@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext
> > *dsp)
> >
> >  #if ARCH_X86
> >  ff_afir_init_x86(dsp);
> > +#elif ARCH_RISCV
> > +ff_afir_init_riscv(dsp);
>
> Ditto.
>
> >  #endif
> >  }
> >
> > diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
> > new file mode 100644
> > index 00..0b968a9c0d
> > --- /dev/null
> > +++ b/libavfilter/riscv/Makefile
> > @@ -0,0 +1,2 @@
> > +OBJS += riscv/af_afir_init.o
> > +RVV-OBJS += riscv/af_afir_rvv.o
> > diff --git a/libavfilter/riscv/af_afir_init.c
> > b/libavfilter/riscv/af_afir_init.c new file mode 100644
> > index 00..13df8341e7
> > --- /dev/null
> > +++ b/libavfilter/riscv/af_afir_init.c
> > @@ -0,0 +1,39 @@
> > +/*
> > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> > (ISCAS).
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA
> > + */
> > +
> > +#include 
> > +
> > +#include "config.h"
> > +#include "libavutil/attributes.h"
> > +#include "libavutil/cpu.h"
> > +#include "libavfilter/af_afirdsp.h"
> > +
> > +void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
> > +   ptrdiff_t len);
> > +
> > +av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
> > +{
> > +#if HAVE_RVV
> > +int flags = av_get_cpu_flags();
> > +
> > +if (flags & AV_CPU_FLAG_RVV_F32)
>
> You need to check for Zba as well here. I doubt that we'll see hardware
> with V
> and without Zba in real life, but for the sake of correctness...
>
> > +s->fcmul_add = ff_fcmul_add_rvv;
> > +#endif
> > +}
> > diff --git a/libavfilter/riscv/af_afir_rvv.S
> > b/libavfilter/riscv/af_afir_rvv.S new file mode 100644
> > index 00..078cac8e7e
> > --- /dev/null
> > +++ b/libavfilter/riscv/af_afir_rvv.S
> > @@ -0,0 +1,61 @@
> > +/*
> > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> > (ISCAS).
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA
> > + */
> > +
> > +#include "libavutil/riscv/asm.S"
> > +
> > +//  void ff_fcmul_add(float *sum, const float *t, const float *c, int
> len)
> > +func ff_fcmul_add_rvv, zve32f
> > +li   

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-11-15 Thread flow gg
Okay, I have updated these issues in the patch.

Rémi Denis-Courmont  于2023年11月13日周一 23:35写道:

>Hi,
>
> Le maanantaina 13. marraskuuta 2023, 11.43.01 EET flow gg a écrit :
> > Sorry for the long delay in responding.
>
> No problem. Working with T-Head C910 (or C920?) cores is very tedious. I
> gave
> up on that and switched over to Kendryte K230 (based on C908) now.
>
> > How is the modified patch now?
>
> It looks better, but some minute improvements are still possible.
>
> > no longer using register stride(learn from your code) and have switched
> to
> > shNadd instead.
> >
> > (using m4 and m2 as they are slightly faster than m8 and m4)
> >
> > benchmark:
> > fcmul_add_c: 2179
> > fcmul_add_rvv_f32: 1652
>
> > diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
> > index 4208501393..d2d1e909c1 100644
> > --- a/libavfilter/af_afirdsp.h
> > +++ b/libavfilter/af_afirdsp.h
> > @@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
> >  } AudioFIRDSPContext;
> >
> >  void ff_afir_init_x86(AudioFIRDSPContext *s);
> > +void ff_afir_init_riscv(AudioFIRDSPContext *s);
>
> Nit: please stick to alphabetical order like most similar code.
>
> >
> >  static void fcmul_add_c(float *sum, const float *t, const float *c,
> > ptrdiff_t len)
> >  {
> > @@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext
> > *dsp)
> >
> >  #if ARCH_X86
> >  ff_afir_init_x86(dsp);
> > +#elif ARCH_RISCV
> > +ff_afir_init_riscv(dsp);
>
> Ditto.
>
> >  #endif
> >  }
> >
> > diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
> > new file mode 100644
> > index 00..0b968a9c0d
> > --- /dev/null
> > +++ b/libavfilter/riscv/Makefile
> > @@ -0,0 +1,2 @@
> > +OBJS += riscv/af_afir_init.o
> > +RVV-OBJS += riscv/af_afir_rvv.o
> > diff --git a/libavfilter/riscv/af_afir_init.c
> > b/libavfilter/riscv/af_afir_init.c new file mode 100644
> > index 00..13df8341e7
> > --- /dev/null
> > +++ b/libavfilter/riscv/af_afir_init.c
> > @@ -0,0 +1,39 @@
> > +/*
> > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> > (ISCAS).
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA
> > + */
> > +
> > +#include 
> > +
> > +#include "config.h"
> > +#include "libavutil/attributes.h"
> > +#include "libavutil/cpu.h"
> > +#include "libavfilter/af_afirdsp.h"
> > +
> > +void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
> > +   ptrdiff_t len);
> > +
> > +av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
> > +{
> > +#if HAVE_RVV
> > +int flags = av_get_cpu_flags();
> > +
> > +if (flags & AV_CPU_FLAG_RVV_F32)
>
> You need to check for Zba as well here. I doubt that we'll see hardware
> with V
> and without Zba in real life, but for the sake of correctness...
>
> > +s->fcmul_add = ff_fcmul_add_rvv;
> > +#endif
> > +}
> > diff --git a/libavfilter/riscv/af_afir_rvv.S
> > b/libavfilter/riscv/af_afir_rvv.S new file mode 100644
> > index 00..078cac8e7e
> > --- /dev/null
> > +++ b/libavfilter/riscv/af_afir_rvv.S
> > @@ -0,0 +1,61 @@
> > +/*
> > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> > (ISCAS).
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA
> > + */
> > +
> > +#include "libavutil/riscv/asm.S"
> > +
> > +//  void ff_fcmul_add(float *sum, const float *t, const float *c, int
> len)
> > +func ff_fcmul_add_rvv, zve32f
> > +li   

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-11-13 Thread Paul B Mahol
On Mon, Nov 13, 2023 at 4:35 PM Rémi Denis-Courmont  wrote:

>Hi,
>
> Le maanantaina 13. marraskuuta 2023, 11.43.01 EET flow gg a écrit :
> > Sorry for the long delay in responding.
>
> No problem. Working with T-Head C910 (or C920?) cores is very tedious. I
> gave
> up on that and switched over to Kendryte K230 (based on C908) now.
>
> > How is the modified patch now?
>
> It looks better, but some minute improvements are still possible.
>
> > no longer using register stride(learn from your code) and have switched
> to
> > shNadd instead.
> >
> > (using m4 and m2 as they are slightly faster than m8 and m4)
> >
> > benchmark:
> > fcmul_add_c: 2179
> > fcmul_add_rvv_f32: 1652
>
> > diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
> > index 4208501393..d2d1e909c1 100644
> > --- a/libavfilter/af_afirdsp.h
> > +++ b/libavfilter/af_afirdsp.h
> > @@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
> >  } AudioFIRDSPContext;
> >
> >  void ff_afir_init_x86(AudioFIRDSPContext *s);
> > +void ff_afir_init_riscv(AudioFIRDSPContext *s);
>
> Nit: please stick to alphabetical order like most similar code.
>
> >
> >  static void fcmul_add_c(float *sum, const float *t, const float *c,
> > ptrdiff_t len)
> >  {
> > @@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext
> > *dsp)
> >
> >  #if ARCH_X86
> >  ff_afir_init_x86(dsp);
> > +#elif ARCH_RISCV
> > +ff_afir_init_riscv(dsp);
>
> Ditto.
>
> >  #endif
> >  }
> >
> > diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
> > new file mode 100644
> > index 00..0b968a9c0d
> > --- /dev/null
> > +++ b/libavfilter/riscv/Makefile
> > @@ -0,0 +1,2 @@
> > +OBJS += riscv/af_afir_init.o
> > +RVV-OBJS += riscv/af_afir_rvv.o
> > diff --git a/libavfilter/riscv/af_afir_init.c
> > b/libavfilter/riscv/af_afir_init.c new file mode 100644
> > index 00..13df8341e7
> > --- /dev/null
> > +++ b/libavfilter/riscv/af_afir_init.c
> > @@ -0,0 +1,39 @@
> > +/*
> > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> > (ISCAS).
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA
> > + */
> > +
> > +#include 
> > +
> > +#include "config.h"
> > +#include "libavutil/attributes.h"
> > +#include "libavutil/cpu.h"
> > +#include "libavfilter/af_afirdsp.h"
> > +
> > +void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
> > +   ptrdiff_t len);
> > +
> > +av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
> > +{
> > +#if HAVE_RVV
> > +int flags = av_get_cpu_flags();
> > +
> > +if (flags & AV_CPU_FLAG_RVV_F32)
>
> You need to check for Zba as well here. I doubt that we'll see hardware
> with V
> and without Zba in real life, but for the sake of correctness...
>
> > +s->fcmul_add = ff_fcmul_add_rvv;
> > +#endif
> > +}
> > diff --git a/libavfilter/riscv/af_afir_rvv.S
> > b/libavfilter/riscv/af_afir_rvv.S new file mode 100644
> > index 00..078cac8e7e
> > --- /dev/null
> > +++ b/libavfilter/riscv/af_afir_rvv.S
> > @@ -0,0 +1,61 @@
> > +/*
> > + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> > (ISCAS).
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301
> > USA
> > + */
> > +
> > +#include "libavutil/riscv/asm.S"
> > +
> > +//  void ff_fcmul_add(float *sum, const float *t, const float *c, int
> len)
> > +func ff_fcmul_add_rvv, zve32f
> > +li  t1, 32
> > +1:
> > +

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-11-13 Thread Rémi Denis-Courmont
   Hi,

Le maanantaina 13. marraskuuta 2023, 11.43.01 EET flow gg a écrit :
> Sorry for the long delay in responding.

No problem. Working with T-Head C910 (or C920?) cores is very tedious. I gave 
up on that and switched over to Kendryte K230 (based on C908) now.

> How is the modified patch now?

It looks better, but some minute improvements are still possible.

> no longer using register stride(learn from your code) and have switched to
> shNadd instead.
> 
> (using m4 and m2 as they are slightly faster than m8 and m4)
> 
> benchmark:
> fcmul_add_c: 2179
> fcmul_add_rvv_f32: 1652

> diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
> index 4208501393..d2d1e909c1 100644
> --- a/libavfilter/af_afirdsp.h
> +++ b/libavfilter/af_afirdsp.h
> @@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
>  } AudioFIRDSPContext;
> 
>  void ff_afir_init_x86(AudioFIRDSPContext *s);
> +void ff_afir_init_riscv(AudioFIRDSPContext *s);

Nit: please stick to alphabetical order like most similar code.

> 
>  static void fcmul_add_c(float *sum, const float *t, const float *c,
> ptrdiff_t len)
>  {
> @@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext
> *dsp)
> 
>  #if ARCH_X86
>  ff_afir_init_x86(dsp);
> +#elif ARCH_RISCV
> +ff_afir_init_riscv(dsp);

Ditto.

>  #endif
>  }
> 
> diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
> new file mode 100644
> index 00..0b968a9c0d
> --- /dev/null
> +++ b/libavfilter/riscv/Makefile
> @@ -0,0 +1,2 @@
> +OBJS += riscv/af_afir_init.o
> +RVV-OBJS += riscv/af_afir_rvv.o
> diff --git a/libavfilter/riscv/af_afir_init.c
> b/libavfilter/riscv/af_afir_init.c new file mode 100644
> index 00..13df8341e7
> --- /dev/null
> +++ b/libavfilter/riscv/af_afir_init.c
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> (ISCAS).
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include 
> +
> +#include "config.h"
> +#include "libavutil/attributes.h"
> +#include "libavutil/cpu.h"
> +#include "libavfilter/af_afirdsp.h"
> +
> +void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
> +   ptrdiff_t len);
> +
> +av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
> +{
> +#if HAVE_RVV
> +int flags = av_get_cpu_flags();
> +
> +if (flags & AV_CPU_FLAG_RVV_F32)

You need to check for Zba as well here. I doubt that we'll see hardware with V 
and without Zba in real life, but for the sake of correctness...

> +s->fcmul_add = ff_fcmul_add_rvv;
> +#endif
> +}
> diff --git a/libavfilter/riscv/af_afir_rvv.S
> b/libavfilter/riscv/af_afir_rvv.S new file mode 100644
> index 00..078cac8e7e
> --- /dev/null
> +++ b/libavfilter/riscv/af_afir_rvv.S
> @@ -0,0 +1,61 @@
> +/*
> + * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences
> (ISCAS).
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA
> + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +//  void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
> +func ff_fcmul_add_rvv, zve32f
> +li  t1, 32
> +1:
> +vsetvli t0, a3, e64, m4, ta, ma

You can set SEW=32 and corresponding LMUL here. Then you can remove all other 
VSETVLI instances below. (Note that this will NOT work on draft 0.7.1 
hardware, but it does work on conformant hardware.)

> +vle64.v v12, (a0)

This requires 64-bit alignment. I don't know if this is correct for 

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-11-13 Thread flow gg
Sorry for the long delay in responding.

How is the modified patch now?

no longer using register stride(learn from your code) and have switched to
shNadd instead.

(using m4 and m2 as they are slightly faster than m8 and m4)

benchmark:
fcmul_add_c: 2179
fcmul_add_rvv_f32: 1652

Rémi Denis-Courmont  于2023年9月28日周四 21:33写道:

>
>
> Le 28 septembre 2023 08:45:44 GMT+03:00, flow gg  a
> écrit :
> >Okay, I revert the volatile in ff_read_time
> >
> >How about this version?
>
> It's still using register stride which is all but guaranteed to be slow on
> any hardware and should only be used as a last resort.
>
> The code is also missing scheduling for multi-issue and unrolling with the
> group multiplier.
>
> And lastly, while that probably won't change much, there are no reasons to
> use mul here. You can use shNadd like existing code does.
>
>
> >
> >use vls instead vlseg, and use vfmacc
> >
> >The benchmark is sometimes better, sometimes the same
> >
> >fcmul_add_c: 3.5
> >fcmul_add_rvv_f32: 3.5
> > - af_afir.fcmul_add [OK]
> >fcmul_add_c: 4.5
> >fcmul_add_rvv_f32: 4.2
> > - af_afir.fcmul_add [OK]
> >fcmul_add_c: 4.2
> >fcmul_add_rvv_f32: 4.2
> > - af_afir.fcmul_add [OK]
> >fcmul_add_c: 4.5
> >fcmul_add_rvv_f32: 4.2
> > - af_afir.fcmul_add [OK]
> >fcmul_add_c: 4.7
> >fcmul_add_rvv_f32: 3.5
> >
> >
> >Rémi Denis-Courmont  于2023年9月28日周四 00:41写道:
> >
> >> Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> >> > benchmark:
> >> > fcmul_add_c: 19.7
> >> > fcmul_add_rvv_f32: 6.7
> >>
> >> With optimisations enabled and the benchmarking fix, I get this (on the
> >> same
> >> hardware, I believe):
> >>
> >> fcmul_add_c: 3.5
> >> fcmul_add_rvv_f32: 6.7
> >>
> >> For sure unfortunate design limitations of T-Head C910 are to blame to
> no
> >> small extent. It is not the first occurrence of an RVV optimisation that
> >> turns
> >> out worse than scalar due to those, and I still have honest hopes that
> >> newer
> >> (and conformant) IP would give saner results, but... I also believe that
> >> the
> >> code could be improved regardless.
> >>
> >> --
> >> Rémi Denis-Courmont
> >> http://www.remlab.net/
> >>
> >>
> >>
> >> ___
> >> ffmpeg-devel mailing list
> >> ffmpeg-devel@ffmpeg.org
> >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >>
> >> To unsubscribe, visit link above, or email
> >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
> >>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
From 4199887247d31348385cd864b4efd6f4c02740f2 Mon Sep 17 00:00:00 2001
From: sunyuechi 
Date: Fri, 3 Nov 2023 10:35:53 +0800
Subject: [PATCH] af_afir: RISC-V V fcmul_add

benchmark:
fcmul_add_c: 2179
fcmul_add_rvv_f32: 1652
---
 libavfilter/af_afirdsp.h |  3 ++
 libavfilter/riscv/Makefile   |  2 ++
 libavfilter/riscv/af_afir_init.c | 39 
 libavfilter/riscv/af_afir_rvv.S  | 61 
 4 files changed, 105 insertions(+)
 create mode 100644 libavfilter/riscv/Makefile
 create mode 100644 libavfilter/riscv/af_afir_init.c
 create mode 100644 libavfilter/riscv/af_afir_rvv.S

diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
index 4208501393..d2d1e909c1 100644
--- a/libavfilter/af_afirdsp.h
+++ b/libavfilter/af_afirdsp.h
@@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
 } AudioFIRDSPContext;
 
 void ff_afir_init_x86(AudioFIRDSPContext *s);
+void ff_afir_init_riscv(AudioFIRDSPContext *s);
 
 static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len)
 {
@@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext *dsp)
 
 #if ARCH_X86
 ff_afir_init_x86(dsp);
+#elif ARCH_RISCV
+ff_afir_init_riscv(dsp);
 #endif
 }
 
diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
new file mode 100644
index 00..0b968a9c0d
--- /dev/null
+++ b/libavfilter/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS += riscv/af_afir_init.o
+RVV-OBJS += riscv/af_afir_rvv.o
diff --git a/libavfilter/riscv/af_afir_init.c b/libavfilter/riscv/af_afir_init.c
new file mode 100644
index 00..13df8341e7
--- /dev/null
+++ b/libavfilter/riscv/af_afir_init.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser 

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-28 Thread Rémi Denis-Courmont


Le 28 septembre 2023 08:45:44 GMT+03:00, flow gg  a écrit 
:
>Okay, I revert the volatile in ff_read_time
>
>How about this version?

It's still using register stride which is all but guaranteed to be slow on any 
hardware and should only be used as a last resort.

The code is also missing scheduling for multi-issue and unrolling with the 
group multiplier.

And lastly, while that probably won't change much, there are no reasons to use 
mul here. You can use shNadd like existing code does.


>
>use vls instead vlseg, and use vfmacc
>
>The benchmark is sometimes better, sometimes the same
>
>fcmul_add_c: 3.5
>fcmul_add_rvv_f32: 3.5
> - af_afir.fcmul_add [OK]
>fcmul_add_c: 4.5
>fcmul_add_rvv_f32: 4.2
> - af_afir.fcmul_add [OK]
>fcmul_add_c: 4.2
>fcmul_add_rvv_f32: 4.2
> - af_afir.fcmul_add [OK]
>fcmul_add_c: 4.5
>fcmul_add_rvv_f32: 4.2
> - af_afir.fcmul_add [OK]
>fcmul_add_c: 4.7
>fcmul_add_rvv_f32: 3.5
>
>
>Rémi Denis-Courmont  于2023年9月28日周四 00:41写道:
>
>> Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
>> > benchmark:
>> > fcmul_add_c: 19.7
>> > fcmul_add_rvv_f32: 6.7
>>
>> With optimisations enabled and the benchmarking fix, I get this (on the
>> same
>> hardware, I believe):
>>
>> fcmul_add_c: 3.5
>> fcmul_add_rvv_f32: 6.7
>>
>> For sure unfortunate design limitations of T-Head C910 are to blame to no
>> small extent. It is not the first occurrence of an RVV optimisation that
>> turns
>> out worse than scalar due to those, and I still have honest hopes that
>> newer
>> (and conformant) IP would give saner results, but... I also believe that
>> the
>> code could be improved regardless.
>>
>> --
>> Rémi Denis-Courmont
>> http://www.remlab.net/
>>
>>
>>
>> ___
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-27 Thread flow gg
Okay, I revert the volatile in ff_read_time

How about this version?

use vls instead vlseg, and use vfmacc

The benchmark is sometimes better, sometimes the same

fcmul_add_c: 3.5
fcmul_add_rvv_f32: 3.5
 - af_afir.fcmul_add [OK]
fcmul_add_c: 4.5
fcmul_add_rvv_f32: 4.2
 - af_afir.fcmul_add [OK]
fcmul_add_c: 4.2
fcmul_add_rvv_f32: 4.2
 - af_afir.fcmul_add [OK]
fcmul_add_c: 4.5
fcmul_add_rvv_f32: 4.2
 - af_afir.fcmul_add [OK]
fcmul_add_c: 4.7
fcmul_add_rvv_f32: 3.5


Rémi Denis-Courmont  于2023年9月28日周四 00:41写道:

> Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> > benchmark:
> > fcmul_add_c: 19.7
> > fcmul_add_rvv_f32: 6.7
>
> With optimisations enabled and the benchmarking fix, I get this (on the
> same
> hardware, I believe):
>
> fcmul_add_c: 3.5
> fcmul_add_rvv_f32: 6.7
>
> For sure unfortunate design limitations of T-Head C910 are to blame to no
> small extent. It is not the first occurrence of an RVV optimisation that
> turns
> out worse than scalar due to those, and I still have honest hopes that
> newer
> (and conformant) IP would give saner results, but... I also believe that
> the
> code could be improved regardless.
>
> --
> Rémi Denis-Courmont
> http://www.remlab.net/
>
>
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
From 3e956958b01e780b7360dace59b6248f61a6f12c Mon Sep 17 00:00:00 2001
From: h 
Date: Tue, 26 Sep 2023 15:03:12 +0800
Subject: [PATCH] af_afir: RISC-V V fcmul_add

---
 libavfilter/af_afirdsp.h |  3 ++
 libavfilter/riscv/Makefile   |  2 ++
 libavfilter/riscv/af_afir_init.c | 39 
 libavfilter/riscv/af_afir_rvv.S  | 61 
 4 files changed, 105 insertions(+)
 create mode 100644 libavfilter/riscv/Makefile
 create mode 100644 libavfilter/riscv/af_afir_init.c
 create mode 100644 libavfilter/riscv/af_afir_rvv.S

diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
index 4208501393..d2d1e909c1 100644
--- a/libavfilter/af_afirdsp.h
+++ b/libavfilter/af_afirdsp.h
@@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
 } AudioFIRDSPContext;
 
 void ff_afir_init_x86(AudioFIRDSPContext *s);
+void ff_afir_init_riscv(AudioFIRDSPContext *s);
 
 static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len)
 {
@@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext *dsp)
 
 #if ARCH_X86
 ff_afir_init_x86(dsp);
+#elif ARCH_RISCV
+ff_afir_init_riscv(dsp);
 #endif
 }
 
diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
new file mode 100644
index 00..0b968a9c0d
--- /dev/null
+++ b/libavfilter/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS += riscv/af_afir_init.o
+RVV-OBJS += riscv/af_afir_rvv.o
diff --git a/libavfilter/riscv/af_afir_init.c b/libavfilter/riscv/af_afir_init.c
new file mode 100644
index 00..ffa176abd2
--- /dev/null
+++ b/libavfilter/riscv/af_afir_init.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2023 hleft
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavfilter/af_afirdsp.h"
+
+void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
+   ptrdiff_t len);
+
+av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
+{
+#if HAVE_RVV
+int flags = av_get_cpu_flags();
+
+if (flags & AV_CPU_FLAG_RVV_F32)
+s->fcmul_add = ff_fcmul_add_rvv;
+#endif
+}
diff --git a/libavfilter/riscv/af_afir_rvv.S b/libavfilter/riscv/af_afir_rvv.S
new file mode 100644
index 00..6c15586007
--- /dev/null
+++ b/libavfilter/riscv/af_afir_rvv.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2023 hleft
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY 

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-27 Thread Rémi Denis-Courmont
Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> benchmark:
> fcmul_add_c: 19.7
> fcmul_add_rvv_f32: 6.7

With optimisations enabled and the benchmarking fix, I get this (on the same 
hardware, I believe):

fcmul_add_c: 3.5
fcmul_add_rvv_f32: 6.7

For sure unfortunate design limitations of T-Head C910 are to blame to no 
small extent. It is not the first occurrence of an RVV optimisation that turns 
out worse than scalar due to those, and I still have honest hopes that newer 
(and conformant) IP would give saner results, but... I also believe that the 
code could be improved regardless.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-27 Thread Rémi Denis-Courmont
Le keskiviikkona 27. syyskuuta 2023, 4.47.26 EEST flow gg a écrit :
> >>> please pad mnemonics to at least 8 columns for consistency
> 
> okay, changed
> 
> >>> It seems that you could just as well use vlseg2 without register
> 
> stride, no?
> 
> yes, vlseg will better, changed
> 
> >>> Note that you could do the double versions with very little extra
> 
> efforts.
> 
> okay
> 
> >>> But really, DO NOT use a fixed vector length here. At best, you're
> 
> wasting half
> 
> >>> the vector width. Your input has a variable size, use it.
> 
> okay, changed
> 
> >>> I'm a bit surprised that the performance improves this much,
> 
> considering that
> 
> >>> the C910 is notoriously bad at both segmented strided loads. It might
> 
> be that
> 
> >>> the C versions is just very bad due to lack of aliasing optimisations.
> 
> thanks, You reminded me.
> Sorry I had forgotten that there was a problem..
> A few days ago, I wanted to try running some existing benchmarks,
> 
> ```
> tests/checkasm/checkasm --bench --test=aacpsdsp
> tests/checkasm/checkasm --bench --test=alacdsp
> tests/checkasm/checkasm --bench --test=audiodsp
> tests/checkasm/checkasm --bench --test=g722dsp
> tests/checkasm/checkasm --bench --test=vorbisdsp
> tests/checkasm/checkasm --bench --test=float_dsp
> tests/checkasm/checkasm --bench --test=fixed_dsp
> tests/checkasm/checkasm --bench --test=af_afir
> ```
> 
> but they all returned 0.0.
> 
> For example,
> 
> ```
> butterflies_float_c: 0.0
> butterflies_float_rvv_f32: 0.0
> scalarproduct_float_c: 0.0
> scalarproduct_float_rvv_f32: 0.0
> vector_dmac_scalar_c: 0.0
> vector_dmac_scalar_rvv_f64: 0.0
> ...

OK, this reproduces on both SiFive and T-Head hardware here. You need to 
revert 09731fbfc3a914ec4f6ffad60aa9062db6a8f6aa.


-- 
レミ・デニ-クールモン
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-27 Thread Rémi Denis-Courmont
Le keskiviikkona 27. syyskuuta 2023, 4.47.26 EEST flow gg a écrit :
> ```
> tests/checkasm/checkasm --bench --test=aacpsdsp
> tests/checkasm/checkasm --bench --test=alacdsp
> tests/checkasm/checkasm --bench --test=audiodsp
> tests/checkasm/checkasm --bench --test=g722dsp
> tests/checkasm/checkasm --bench --test=vorbisdsp
> tests/checkasm/checkasm --bench --test=float_dsp
> tests/checkasm/checkasm --bench --test=fixed_dsp
> tests/checkasm/checkasm --bench --test=af_afir
> ```
> 
> but they all returned 0.0.

Your checkasm setup is broken however why (I have not tested on C910 recently 
so maybe it's just how it is). But in any case, performance metrics from C 
code compiled with -O0 are worthless.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-26 Thread flow gg
>>> please pad mnemonics to at least 8 columns for consistency

okay, changed

>>> It seems that you could just as well use vlseg2 without register
stride, no?

yes, vlseg will better, changed

>>> Note that you could do the double versions with very little extra
efforts.

okay

>>> But really, DO NOT use a fixed vector length here. At best, you're
wasting half
>>> the vector width. Your input has a variable size, use it.

okay, changed

>>> I'm a bit surprised that the performance improves this much,
considering that
>>> the C910 is notoriously bad at both segmented strided loads. It might
be that
>>> the C versions is just very bad due to lack of aliasing optimisations.

thanks, You reminded me.
Sorry I had forgotten that there was a problem..
A few days ago, I wanted to try running some existing benchmarks,

```
tests/checkasm/checkasm --bench --test=aacpsdsp
tests/checkasm/checkasm --bench --test=alacdsp
tests/checkasm/checkasm --bench --test=audiodsp
tests/checkasm/checkasm --bench --test=g722dsp
tests/checkasm/checkasm --bench --test=vorbisdsp
tests/checkasm/checkasm --bench --test=float_dsp
tests/checkasm/checkasm --bench --test=fixed_dsp
tests/checkasm/checkasm --bench --test=af_afir
```

but they all returned 0.0.

For example,

```
butterflies_float_c: 0.0
butterflies_float_rvv_f32: 0.0
scalarproduct_float_c: 0.0
scalarproduct_float_rvv_f32: 0.0
vector_dmac_scalar_c: 0.0
vector_dmac_scalar_rvv_f64: 0.0
...
```

I tried changing the -O3 in configure to -O2 or -O1, but still got 0.0.

Only by changing to -O0 did I receive non-zero results.

So, the benchmark I conducted was based on this, and I obtained the initial
results…

fcmul_add_c: 19.7
fcmul_add_rvv_f32: 6.7

Rémi Denis-Courmont  于2023年9月27日周三 02:44写道:

> Le tiistaina 26. syyskuuta 2023, 21.40.12 EEST Paul B Mahol a écrit :
> > On Tue, Sep 26, 2023 at 8:35 PM Rémi Denis-Courmont 
> wrote:
> > > Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> > > > benchmark:
> > > > fcmul_add_c: 19.7
> > > > fcmul_add_rvv_f32: 6.7
> > >
> > > Nit: please pad mnemonics to at least 8 columns for consistency.
> > >
> > > I'm a bit surprised that the performance improves this much,
> considering
> > > that
> > > the C910 is notoriously bad at both segmented strided loads. It might
> be
> > > that
> > > the C versions is just very bad due to lack of aliasing optimisations.
> Oh
> > > well.
> >
> > What you mean exactly that C version is missing?
>
> The C version does not have any restrict qualifier. This potentially
> prevents
> the C compiler from unrolling. Adding the keyword can improve performance
> gains of 20-30% on RISC-V scalar floating point.
>
> That said, sometimes you can't validly use restrict, and you simply can't
> tell
> the C compiler how to optimise properly. In those cases, even scalar
> floating
> point optimisations improve performance.
>
> --
> Rémi Denis-Courmont
> http://www.remlab.net/
>
>
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
From 5343b40c2e1849daadd79d6d531aeb7a762e0e06 Mon Sep 17 00:00:00 2001
From: h 
Date: Tue, 26 Sep 2023 15:03:12 +0800
Subject: [PATCH] af_afir: RISC-V V fcmul_add

fcmul_add_c: 19.7
fcmul_add_rvv_f32: 6.7
---
 libavfilter/af_afirdsp.h |  3 ++
 libavfilter/riscv/Makefile   |  2 ++
 libavfilter/riscv/af_afir_init.c | 39 ++
 libavfilter/riscv/af_afir_rvv.S  | 55 
 4 files changed, 99 insertions(+)
 create mode 100644 libavfilter/riscv/Makefile
 create mode 100644 libavfilter/riscv/af_afir_init.c
 create mode 100644 libavfilter/riscv/af_afir_rvv.S

diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
index 4208501393..d2d1e909c1 100644
--- a/libavfilter/af_afirdsp.h
+++ b/libavfilter/af_afirdsp.h
@@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
 } AudioFIRDSPContext;
 
 void ff_afir_init_x86(AudioFIRDSPContext *s);
+void ff_afir_init_riscv(AudioFIRDSPContext *s);
 
 static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len)
 {
@@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext *dsp)
 
 #if ARCH_X86
 ff_afir_init_x86(dsp);
+#elif ARCH_RISCV
+ff_afir_init_riscv(dsp);
 #endif
 }
 
diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
new file mode 100644
index 00..0b968a9c0d
--- /dev/null
+++ b/libavfilter/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS += riscv/af_afir_init.o
+RVV-OBJS += riscv/af_afir_rvv.o
diff --git a/libavfilter/riscv/af_afir_init.c b/libavfilter/riscv/af_afir_init.c
new file mode 100644
index 00..ffa176abd2
--- /dev/null
+++ b/libavfilter/riscv/af_afir_init.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2023 hleft
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it 

Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-26 Thread Rémi Denis-Courmont
Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> benchmark:
> fcmul_add_c: 19.7
> fcmul_add_rvv_f32: 6.7

+li t1, 4
+vsetvli  t0, t1, e32, m1, ta, ma

vsetivli t0, 4, ...

But really, DO NOT use a fixed vector length here. At best, you're wasting half 
the vector width. Your input has a variable size, use it.

+
+li t2, 8
+
+vlsseg2e32.v v0, (a1), t2

I'm not sure what you are trying to achieve here. It seems that you could just 
as well use vlseg2 without register stride, no?

+vlsseg2e32.v v2, (a2), t2
+vlsseg2e32.v v4, (a0), t2
+
+vfmul.vv v6, v0, v2
+vfmul.vv v7, v1, v3
+vfmul.vv v8, v0, v3
+vfmul.vv v9, v1, v2
+
+vfadd.vv v4, v4, v6
+vfsub.vv v4, v4, v7
+vfadd.vv v5, v5, v8
+vfadd.vv v5, v5, v9
+
+vssseg2e32.v v4, (a0), t2

Same here.


-- 
レミ・デニ-クールモン
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-26 Thread Rémi Denis-Courmont
Le tiistaina 26. syyskuuta 2023, 21.40.12 EEST Paul B Mahol a écrit :
> On Tue, Sep 26, 2023 at 8:35 PM Rémi Denis-Courmont  wrote:
> > Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> > > benchmark:
> > > fcmul_add_c: 19.7
> > > fcmul_add_rvv_f32: 6.7
> > 
> > Nit: please pad mnemonics to at least 8 columns for consistency.
> > 
> > I'm a bit surprised that the performance improves this much, considering
> > that
> > the C910 is notoriously bad at both segmented strided loads. It might be
> > that
> > the C versions is just very bad due to lack of aliasing optimisations. Oh
> > well.
> 
> What you mean exactly that C version is missing?

The C version does not have any restrict qualifier. This potentially prevents 
the C compiler from unrolling. Adding the keyword can improve performance 
gains of 20-30% on RISC-V scalar floating point.

That said, sometimes you can't validly use restrict, and you simply can't tell 
the C compiler how to optimise properly. In those cases, even scalar floating 
point optimisations improve performance.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-26 Thread Paul B Mahol
On Tue, Sep 26, 2023 at 8:35 PM Rémi Denis-Courmont  wrote:

> Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> > benchmark:
> > fcmul_add_c: 19.7
> > fcmul_add_rvv_f32: 6.7
>
> Nit: please pad mnemonics to at least 8 columns for consistency.
>
> I'm a bit surprised that the performance improves this much, considering
> that
> the C910 is notoriously bad at both segmented strided loads. It might be
> that
> the C versions is just very bad due to lack of aliasing optimisations. Oh
> well.
>

What you mean exactly that C version is missing?


>
> Note that you could do the double versions with very little extra efforts.
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-26 Thread Rémi Denis-Courmont
Le tiistaina 26. syyskuuta 2023, 12.24.58 EEST flow gg a écrit :
> benchmark:
> fcmul_add_c: 19.7
> fcmul_add_rvv_f32: 6.7

Nit: please pad mnemonics to at least 8 columns for consistency.

I'm a bit surprised that the performance improves this much, considering that 
the C910 is notoriously bad at both segmented strided loads. It might be that 
the C versions is just very bad due to lack of aliasing optimisations. Oh 
well.

Note that you could do the double versions with very little extra efforts.

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] af_afir: RISC-V V fcmul_add

2023-09-26 Thread flow gg
benchmark:
fcmul_add_c: 19.7
fcmul_add_rvv_f32: 6.7
From 6bef2523728a472bb803ce085a1aafdfd624e212 Mon Sep 17 00:00:00 2001
From: h 
Date: Tue, 26 Sep 2023 15:03:12 +0800
Subject: [PATCH] af_afir: RISC-V V fcmul_add

fcmul_add_c: 19.7
fcmul_add_rvv_f32: 6.7
---
 libavfilter/af_afirdsp.h |  3 ++
 libavfilter/riscv/Makefile   |  2 +
 libavfilter/riscv/af_afir_init.c | 39 +++
 libavfilter/riscv/af_afir_rvv.S  | 65 
 4 files changed, 109 insertions(+)
 create mode 100644 libavfilter/riscv/Makefile
 create mode 100644 libavfilter/riscv/af_afir_init.c
 create mode 100644 libavfilter/riscv/af_afir_rvv.S

diff --git a/libavfilter/af_afirdsp.h b/libavfilter/af_afirdsp.h
index 4208501393..d2d1e909c1 100644
--- a/libavfilter/af_afirdsp.h
+++ b/libavfilter/af_afirdsp.h
@@ -34,6 +34,7 @@ typedef struct AudioFIRDSPContext {
 } AudioFIRDSPContext;
 
 void ff_afir_init_x86(AudioFIRDSPContext *s);
+void ff_afir_init_riscv(AudioFIRDSPContext *s);
 
 static void fcmul_add_c(float *sum, const float *t, const float *c, ptrdiff_t len)
 {
@@ -76,6 +77,8 @@ static av_unused void ff_afir_init(AudioFIRDSPContext *dsp)
 
 #if ARCH_X86
 ff_afir_init_x86(dsp);
+#elif ARCH_RISCV
+ff_afir_init_riscv(dsp);
 #endif
 }
 
diff --git a/libavfilter/riscv/Makefile b/libavfilter/riscv/Makefile
new file mode 100644
index 00..0b968a9c0d
--- /dev/null
+++ b/libavfilter/riscv/Makefile
@@ -0,0 +1,2 @@
+OBJS += riscv/af_afir_init.o
+RVV-OBJS += riscv/af_afir_rvv.o
diff --git a/libavfilter/riscv/af_afir_init.c b/libavfilter/riscv/af_afir_init.c
new file mode 100644
index 00..ffa176abd2
--- /dev/null
+++ b/libavfilter/riscv/af_afir_init.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2023 hleft
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavfilter/af_afirdsp.h"
+
+void ff_fcmul_add_rvv(float *sum, const float *t, const float *c,
+   ptrdiff_t len);
+
+av_cold void ff_afir_init_riscv(AudioFIRDSPContext *s)
+{
+#if HAVE_RVV
+int flags = av_get_cpu_flags();
+
+if (flags & AV_CPU_FLAG_RVV_F32)
+s->fcmul_add = ff_fcmul_add_rvv;
+#endif
+}
diff --git a/libavfilter/riscv/af_afir_rvv.S b/libavfilter/riscv/af_afir_rvv.S
new file mode 100644
index 00..06c3979575
--- /dev/null
+++ b/libavfilter/riscv/af_afir_rvv.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2023 hleft
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+//  void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
+func ff_fcmul_add_rvv, zve32f
+1:
+li t1, 4
+vsetvli  t0, t1, e32, m1, ta, ma
+
+li t2, 8
+
+vlsseg2e32.v v0, (a1), t2
+vlsseg2e32.v v2, (a2), t2
+vlsseg2e32.v v4, (a0), t2
+
+vfmul.vv v6, v0, v2
+vfmul.vv v7, v1, v3
+vfmul.vv v8, v0, v3
+vfmul.vv v9, v1, v2
+
+vfadd.vv v4, v4, v6
+vfsub.vv v4, v4, v7
+vfadd.vv v5, v5, v8
+vfadd.vv v5, v5, v9
+
+vssseg2e32.v v4, (a0), t2
+
+mul t3, t2, t1
+add a0, a0, t3
+add a1, a1, t3
+add a2, a2, t3
+
+sub a3, a3, t0
+bgtz a3, 1b
+
+flw fa0, 0(a1)
+flw fa1, 0(a2)
+flw fa2, 0(a0)
+
+fmul.s  fa0, fa0, fa1
+fadd.s  fa2, fa2, fa0
+
+fsw fa2, 0(a0)
+
+ret
+endfunc
-- 
2.42.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To