At Wed, 8 May 2019 18:14:28 +0000,
m...@netbsd.org wrote:
> Is there some scenario where this made a difference?
> it sounds like an optimization that a compiler should already do
> 
> 5234:#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && 
> defined(__GNUC__)
> 5235-                         *d++ += ((aint2_t)*s++) * track->volume >> 8;
> 5236-#else
> 5237-                         *d++ += ((aint2_t)*s++) * track->volume / 256;
> 5238-#endif

It's signed integer so these two output different code.

 Here is the fragment of amd64 asm of "(x * vol >> 8)"
    96a5:       0f af d1                imul   %ecx,%edx
    96a8:       c1 fa 08                sar    $0x8,%edx
    96ab:       66 89 14 07             mov    %dx,(%rdi,%rax,1)

 Here is "(x * vol / 256)"
    96a5:       0f af c1                imul   %ecx,%eax
    96a8:       85 c0                   test   %eax,%eax
    96aa:       79 05                   jns    96b1 <audio_track_chvol+0x82>
    96ac:       05 ff 00 00 00          add    $0xff,%eax
    96b1:       c1 f8 08                sar    $0x8,%eax
    96b4:       66 89 04 17             mov    %ax,(%rdi,%rdx,1)

I evaluated these (a year ago).  The former is 1.9 times faster on
my old amd64 than the later, 1.3 times faster on m68k, at that point.
# Although, above code is from current but evaluation is a year ago.

Let's consider about (-1 / 2).  The mathematical answer is -0.5 so
it's 0 for integers.  And (-1 >> 1) is -1.  These two answers are
different but this value is audio wave that human hear.  I took
performance.

However, I understand that my code looks strange.
How about this?

Index: sys/dev/audio/audio.c
===================================================================
RCS file: /cvsroot/src/sys/dev/audio/audio.c,v
retrieving revision 1.8
diff -u -r1.8 audio.c
--- sys/dev/audio/audio.c       21 May 2019 12:52:57 -0000      1.8
+++ sys/dev/audio/audio.c       23 May 2019 03:31:36 -0000
@@ -465,6 +465,29 @@
 #define SPECIFIED(x)   ((x) != ~0)
 #define SPECIFIED_CH(x)        ((x) != (u_char)~0)
 
+/*
+ * AUDIO_ASR() does Arithmetic Shift Right operation.
+ * This macro should be used for audio wave data only.
+ *
+ * Division by power of two is replaced with shift operation in the most
+ * compiler, but even then rounding-to-zero occurs on negative value.
+ * What we handle here is the audio wave data that human hear, so we can
+ * ignore the rounding difference.  Therefore we want to use faster
+ * arithmetic shift right operation.  But the right shift operator ('>>')
+ * for negative integer is "implementation defined" behavior in C (note
+ * that it's not "undefined" behavior).  So if implementation defines '>>'
+ * as ASR, we use it.
+ *
+ * Using ASR is 1.9 times faster than division on my amd64, and 1.3 times
+ * faster on my m68k.  -- isaki 201801.
+ */
+#if defined(__GNUC__)
+/* gcc defines '>>' as ASR. */
+#define AUDIO_ASR(value, shift)        ((value) >> (shift))
+#else
+#define AUDIO_ASR(value, shift)        ((value) / (1 << (shift)))
+#endif
+
 /* Device timeout in msec */
 #define AUDIO_TIMEOUT  (3000)
 
@@ -3304,11 +3327,7 @@
                for (ch = 0; ch < channels; ch++) {
                        aint2_t val;
                        val = *s++;
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-                       val = val * ch_volume[ch] >> 8;
-#else
-                       val = val * ch_volume[ch] / 256;
-#endif
+                       val = AUDIO_ASR(val * ch_volume[ch], 8);
                        *d++ = (aint_t)val;
                }
        }
@@ -3330,11 +3349,7 @@
        d = arg->dst;
 
        for (i = 0; i < arg->count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-               *d++ = (s[0] >> 1) + (s[1] >> 1);
-#else
-               *d++ = (s[0] / 2) + (s[1] / 2);
-#endif
+               *d++ = AUDIO_ASR(s[0], 1) + AUDIO_ASR(s[1], 1);
                s += arg->srcfmt->channels;
        }
 }
@@ -5131,11 +5146,7 @@
                if (vol != 256) {
                        m = mixer->mixsample;
                        for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-                               *m = *m * vol >> 8;
-#else
-                               *m = *m * vol / 256;
-#endif
+                               *m = AUDIO_ASR(*m * vol, 8);
                                m++;
                        }
                }
@@ -5223,11 +5234,9 @@
 #if defined(AUDIO_SUPPORT_TRACK_VOLUME)
                if (track->volume != 256) {
                        for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-                               *d++ = ((aint2_t)*s++) * track->volume >> 8;
-#else
-                               *d++ = ((aint2_t)*s++) * track->volume / 256;
-#endif
+                               aint2_t v;
+                               v = *s++;
+                               *d++ = AUDIO_ASR(v * track->volume, 8)
                        }
                } else
 #endif
@@ -5241,11 +5250,9 @@
 #if defined(AUDIO_SUPPORT_TRACK_VOLUME)
                if (track->volume != 256) {
                        for (i = 0; i < sample_count; i++) {
-#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__)
-                               *d++ += ((aint2_t)*s++) * track->volume >> 8;
-#else
-                               *d++ += ((aint2_t)*s++) * track->volume / 256;
-#endif
+                               aint2_t v;
+                               v = *s++;
+                               *d++ += AUDIO_ASR(v * track->volume, 8);
                        }
                } else
 #endif
Index: sys/dev/audio/audiodef.h
===================================================================
RCS file: /cvsroot/src/sys/dev/audio/audiodef.h,v
retrieving revision 1.2
diff -u -r1.2 audiodef.h
--- sys/dev/audio/audiodef.h    8 May 2019 13:40:17 -0000       1.2
+++ sys/dev/audio/audiodef.h    23 May 2019 03:31:36 -0000
@@ -63,12 +63,6 @@
  */
 /* #define AUDIO_SUPPORT_TRACK_VOLUME */
 
-/*
- * Whether use C language's "implementation defined" behavior (note that
- * it's not "undefined" behavior).  It improves performance well.
- */
-#define AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR
-
 /* conversion stage */
 typedef struct {
        audio_ring_t srcbuf;

---
Tetsuya Isaki <is...@pastel-flower.jp / is...@netbsd.org>

Reply via email to