On Thu, May 20, 2021 at 11:48:20PM +0200, Christian Weisgerber wrote:
> > On Tuesday, May 18, 2021, 10:42:53 AM EDT, Alexandre Ratchov
> > <[email protected]> wrote:
> > >On Tue, May 18, 2021 at 02:19:49PM +0000, Doug Moss wrote:
> > >> Another topic:
> > >> arm64 sndiod cannot be built with 24bit
> > >>
> > >> in /usr/src/usr.bin/sndiod
> > >> make COPTS=-DADATA_BITS=24
> > >> produces:
> > >> dsp.h:85:2: error "no 24-bit code for this architecture"
> > >
> > >You could just add arm64 to the users of this code (or should we just
> > >enable for all non-i386 archs?). Let us know if this works well on arm64.
>
> I think this should be enabled for all other archs.
>
> In fact, it should be used for _all_ archs. If you compare the
> inline assembly for fp24_mul() with the code clang generates on
> i386, there is no principal difference. It's imull followed by a
> double-register shift. There is no point in trying to hand-optimize
> this.
>
> Now, for fp24_muldiv() clang generates much longer code involving
> a subroutine call to __divdi3(). But the switch to the FIR filter
> for resampling has removed the only use of ADATA_MULDIV().
Here's the diff to drop the assembly. OK ?
Index: sndiod/dsp.h
===================================================================
RCS file: /cvs/src/usr.bin/sndiod/dsp.h,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 dsp.h
--- sndiod/dsp.h 12 Jan 2021 15:46:53 -0000 1.9
+++ sndiod/dsp.h 21 May 2021 05:11:02 -0000
@@ -40,50 +40,10 @@ typedef short adata_t;
#elif ADATA_BITS == 24
-#if defined(__i386__) && defined(__GNUC__)
-
-static inline int
-fp24_mul(int x, int a)
-{
- int res;
-
- asm volatile (
- "imull %2\n\t"
- "shrdl $23, %%edx, %%eax\n\t"
- : "=a" (res)
- : "a" (x), "r" (a)
- : "%edx"
- );
- return res;
-}
-
-static inline int
-fp24_muldiv(int x, int a, int b)
-{
- int res;
-
- asm volatile (
- "imull %2\n\t"
- "idivl %3\n\t"
- : "=a" (res)
- : "a" (x), "d" (a), "r" (b)
- );
- return res;
-}
-
-#define ADATA_MUL(x,y) fp24_mul(x, y)
-#define ADATA_MULDIV(x,y,z) fp24_muldiv(x, y, z);
-
-#elif defined(__amd64__) || defined(__sparc64__)
-
#define ADATA_MUL(x,y) \
((int)(((long long)(x) * (long long)(y)) >> (ADATA_BITS - 1)))
#define ADATA_MULDIV(x,y,z) \
((int)((long long)(x) * (long long)(y) / (long long)(z)))
-
-#else
-#error "no 24-bit code for this architecture"
-#endif
typedef int adata_t;
Index: aucat/dsp.h
===================================================================
RCS file: /cvs/src/usr.bin/aucat/dsp.h,v
retrieving revision 1.7
diff -u -p -u -p -r1.7 dsp.h
--- aucat/dsp.h 12 Jan 2021 15:46:53 -0000 1.7
+++ aucat/dsp.h 21 May 2021 05:11:02 -0000
@@ -40,50 +40,10 @@ typedef short adata_t;
#elif ADATA_BITS == 24
-#if defined(__i386__) && defined(__GNUC__)
-
-static inline int
-fp24_mul(int x, int a)
-{
- int res;
-
- asm volatile (
- "imull %2\n\t"
- "shrdl $23, %%edx, %%eax\n\t"
- : "=a" (res)
- : "a" (x), "r" (a)
- : "%edx"
- );
- return res;
-}
-
-static inline int
-fp24_muldiv(int x, int a, int b)
-{
- int res;
-
- asm volatile (
- "imull %2\n\t"
- "idivl %3\n\t"
- : "=a" (res)
- : "a" (x), "d" (a), "r" (b)
- );
- return res;
-}
-
-#define ADATA_MUL(x,y) fp24_mul(x, y)
-#define ADATA_MULDIV(x,y,z) fp24_muldiv(x, y, z);
-
-#elif defined(__amd64__) || defined(__sparc64__)
-
#define ADATA_MUL(x,y) \
((int)(((long long)(x) * (long long)(y)) >> (ADATA_BITS - 1)))
#define ADATA_MULDIV(x,y,z) \
((int)((long long)(x) * (long long)(y) / (long long)(z)))
-
-#else
-#error "no 24-bit code for this architecture"
-#endif
typedef int adata_t;