At Wed, 8 May 2019 18:14:28 +0000, m...@netbsd.org wrote: > Is there some scenario where this made a difference? > it sounds like an optimization that a compiler should already do > > 5234:#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && > defined(__GNUC__) > 5235- *d++ += ((aint2_t)*s++) * track->volume >> 8; > 5236-#else > 5237- *d++ += ((aint2_t)*s++) * track->volume / 256; > 5238-#endif
It's signed integer so these two output different code. Here is the fragment of amd64 asm of "(x * vol >> 8)" 96a5: 0f af d1 imul %ecx,%edx 96a8: c1 fa 08 sar $0x8,%edx 96ab: 66 89 14 07 mov %dx,(%rdi,%rax,1) Here is "(x * vol / 256)" 96a5: 0f af c1 imul %ecx,%eax 96a8: 85 c0 test %eax,%eax 96aa: 79 05 jns 96b1 <audio_track_chvol+0x82> 96ac: 05 ff 00 00 00 add $0xff,%eax 96b1: c1 f8 08 sar $0x8,%eax 96b4: 66 89 04 17 mov %ax,(%rdi,%rdx,1) I evaluated these (a year ago). The former is 1.9 times faster on my old amd64 than the later, 1.3 times faster on m68k, at that point. # Although, above code is from current but evaluation is a year ago. Let's consider about (-1 / 2). The mathematical answer is -0.5 so it's 0 for integers. And (-1 >> 1) is -1. These two answers are different but this value is audio wave that human hear. I took performance. However, I understand that my code looks strange. How about this? Index: sys/dev/audio/audio.c =================================================================== RCS file: /cvsroot/src/sys/dev/audio/audio.c,v retrieving revision 1.8 diff -u -r1.8 audio.c --- sys/dev/audio/audio.c 21 May 2019 12:52:57 -0000 1.8 +++ sys/dev/audio/audio.c 23 May 2019 03:31:36 -0000 @@ -465,6 +465,29 @@ #define SPECIFIED(x) ((x) != ~0) #define SPECIFIED_CH(x) ((x) != (u_char)~0) +/* + * AUDIO_ASR() does Arithmetic Shift Right operation. + * This macro should be used for audio wave data only. + * + * Division by power of two is replaced with shift operation in the most + * compiler, but even then rounding-to-zero occurs on negative value. + * What we handle here is the audio wave data that human hear, so we can + * ignore the rounding difference. Therefore we want to use faster + * arithmetic shift right operation. But the right shift operator ('>>') + * for negative integer is "implementation defined" behavior in C (note + * that it's not "undefined" behavior). So if implementation defines '>>' + * as ASR, we use it. + * + * Using ASR is 1.9 times faster than division on my amd64, and 1.3 times + * faster on my m68k. -- isaki 201801. + */ +#if defined(__GNUC__) +/* gcc defines '>>' as ASR. */ +#define AUDIO_ASR(value, shift) ((value) >> (shift)) +#else +#define AUDIO_ASR(value, shift) ((value) / (1 << (shift))) +#endif + /* Device timeout in msec */ #define AUDIO_TIMEOUT (3000) @@ -3304,11 +3327,7 @@ for (ch = 0; ch < channels; ch++) { aint2_t val; val = *s++; -#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__) - val = val * ch_volume[ch] >> 8; -#else - val = val * ch_volume[ch] / 256; -#endif + val = AUDIO_ASR(val * ch_volume[ch], 8); *d++ = (aint_t)val; } } @@ -3330,11 +3349,7 @@ d = arg->dst; for (i = 0; i < arg->count; i++) { -#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__) - *d++ = (s[0] >> 1) + (s[1] >> 1); -#else - *d++ = (s[0] / 2) + (s[1] / 2); -#endif + *d++ = AUDIO_ASR(s[0], 1) + AUDIO_ASR(s[1], 1); s += arg->srcfmt->channels; } } @@ -5131,11 +5146,7 @@ if (vol != 256) { m = mixer->mixsample; for (i = 0; i < sample_count; i++) { -#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__) - *m = *m * vol >> 8; -#else - *m = *m * vol / 256; -#endif + *m = AUDIO_ASR(*m * vol, 8); m++; } } @@ -5223,11 +5234,9 @@ #if defined(AUDIO_SUPPORT_TRACK_VOLUME) if (track->volume != 256) { for (i = 0; i < sample_count; i++) { -#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__) - *d++ = ((aint2_t)*s++) * track->volume >> 8; -#else - *d++ = ((aint2_t)*s++) * track->volume / 256; -#endif + aint2_t v; + v = *s++; + *d++ = AUDIO_ASR(v * track->volume, 8) } } else #endif @@ -5241,11 +5250,9 @@ #if defined(AUDIO_SUPPORT_TRACK_VOLUME) if (track->volume != 256) { for (i = 0; i < sample_count; i++) { -#if defined(AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR) && defined(__GNUC__) - *d++ += ((aint2_t)*s++) * track->volume >> 8; -#else - *d++ += ((aint2_t)*s++) * track->volume / 256; -#endif + aint2_t v; + v = *s++; + *d++ += AUDIO_ASR(v * track->volume, 8); } } else #endif Index: sys/dev/audio/audiodef.h =================================================================== RCS file: /cvsroot/src/sys/dev/audio/audiodef.h,v retrieving revision 1.2 diff -u -r1.2 audiodef.h --- sys/dev/audio/audiodef.h 8 May 2019 13:40:17 -0000 1.2 +++ sys/dev/audio/audiodef.h 23 May 2019 03:31:36 -0000 @@ -63,12 +63,6 @@ */ /* #define AUDIO_SUPPORT_TRACK_VOLUME */ -/* - * Whether use C language's "implementation defined" behavior (note that - * it's not "undefined" behavior). It improves performance well. - */ -#define AUDIO_USE_C_IMPLEMENTATION_DEFINED_BEHAVIOR - /* conversion stage */ typedef struct { audio_ring_t srcbuf; --- Tetsuya Isaki <is...@pastel-flower.jp / is...@netbsd.org>