On Thu, May 20, 2021 at 11:48:20PM +0200, Christian Weisgerber wrote:
> > On Tuesday, May 18, 2021, 10:42:53 AM EDT, Alexandre Ratchov 
> > <[email protected]> wrote: 
> > >On Tue, May 18, 2021 at 02:19:49PM +0000, Doug Moss wrote:
> > >> Another topic:
> > >> arm64 sndiod cannot be built with 24bit
> > >> 
> > >> in /usr/src/usr.bin/sndiod
> > >> make COPTS=-DADATA_BITS=24
> > >> produces:
> > >> dsp.h:85:2: error "no 24-bit code for this architecture"
> > >
> > >You could just add arm64 to the users of this code (or should we just
> > >enable for all non-i386 archs?). Let us know if this works well on arm64.
> 
> I think this should be enabled for all other archs.
> 
> In fact, it should be used for _all_ archs.  If you compare the
> inline assembly for fp24_mul() with the code clang generates on
> i386, there is no principal difference.  It's imull followed by a
> double-register shift.  There is no point in trying to hand-optimize
> this.
> 
> Now, for fp24_muldiv() clang generates much longer code involving
> a subroutine call to __divdi3().  But the switch to the FIR filter
> for resampling has removed the only use of ADATA_MULDIV(). 

Here's the diff to drop the assembly. OK ?

Index: sndiod/dsp.h
===================================================================
RCS file: /cvs/src/usr.bin/sndiod/dsp.h,v
retrieving revision 1.9
diff -u -p -u -p -r1.9 dsp.h
--- sndiod/dsp.h        12 Jan 2021 15:46:53 -0000      1.9
+++ sndiod/dsp.h        21 May 2021 05:11:02 -0000
@@ -40,50 +40,10 @@ typedef short adata_t;
 
 #elif ADATA_BITS == 24
 
-#if defined(__i386__) && defined(__GNUC__)
-
-static inline int
-fp24_mul(int x, int a)
-{
-       int res;
-
-       asm volatile (
-               "imull  %2\n\t"
-               "shrdl $23, %%edx, %%eax\n\t"
-               : "=a" (res)
-               : "a" (x), "r" (a)
-               : "%edx"
-               );
-       return res;
-}
-
-static inline int
-fp24_muldiv(int x, int a, int b)
-{
-       int res;
-
-       asm volatile (
-               "imull %2\n\t"
-               "idivl %3\n\t"
-               : "=a" (res)
-               : "a" (x), "d" (a), "r" (b)
-               );
-       return res;
-}
-
-#define ADATA_MUL(x,y)         fp24_mul(x, y)
-#define ADATA_MULDIV(x,y,z)    fp24_muldiv(x, y, z);
-
-#elif defined(__amd64__) || defined(__sparc64__)
-
 #define ADATA_MUL(x,y)         \
        ((int)(((long long)(x) * (long long)(y)) >> (ADATA_BITS - 1)))
 #define ADATA_MULDIV(x,y,z)    \
        ((int)((long long)(x) * (long long)(y) / (long long)(z)))
-
-#else
-#error "no 24-bit code for this architecture"
-#endif
 
 typedef int adata_t;
 
Index: aucat/dsp.h
===================================================================
RCS file: /cvs/src/usr.bin/aucat/dsp.h,v
retrieving revision 1.7
diff -u -p -u -p -r1.7 dsp.h
--- aucat/dsp.h 12 Jan 2021 15:46:53 -0000      1.7
+++ aucat/dsp.h 21 May 2021 05:11:02 -0000
@@ -40,50 +40,10 @@ typedef short adata_t;
 
 #elif ADATA_BITS == 24
 
-#if defined(__i386__) && defined(__GNUC__)
-
-static inline int
-fp24_mul(int x, int a)
-{
-       int res;
-
-       asm volatile (
-               "imull  %2\n\t"
-               "shrdl $23, %%edx, %%eax\n\t"
-               : "=a" (res)
-               : "a" (x), "r" (a)
-               : "%edx"
-               );
-       return res;
-}
-
-static inline int
-fp24_muldiv(int x, int a, int b)
-{
-       int res;
-
-       asm volatile (
-               "imull %2\n\t"
-               "idivl %3\n\t"
-               : "=a" (res)
-               : "a" (x), "d" (a), "r" (b)
-               );
-       return res;
-}
-
-#define ADATA_MUL(x,y)         fp24_mul(x, y)
-#define ADATA_MULDIV(x,y,z)    fp24_muldiv(x, y, z);
-
-#elif defined(__amd64__) || defined(__sparc64__)
-
 #define ADATA_MUL(x,y)         \
        ((int)(((long long)(x) * (long long)(y)) >> (ADATA_BITS - 1)))
 #define ADATA_MULDIV(x,y,z)    \
        ((int)((long long)(x) * (long long)(y) / (long long)(z)))
-
-#else
-#error "no 24-bit code for this architecture"
-#endif
 
 typedef int adata_t;
 

Reply via email to