Hello everyone,

I've come up with a patch for TiMidity++ 2.13.2 which reduces the running
time of .wav generation by about 4% on my machine (the patch is attached).

The speedup patch is turned on with the "configure" switch "--with-ccg": it
requires Ian Piumarta's ccg (http://www.iam.unibe.ch/~denker/Squeak/j3/ccg/)
dynamic code generator to work, and only on the x86.

I tested it by using it to generate a .wav file from a .mid of Rakhmaninov's
G minor prelude (http://www.kunstderfuge.com/rachmaninov.htm -- Op. 23, No.
5); specifically:

        CFLAGS="-O6 -g" ./configure --with-ccg
        cd timidity
        make
        time ./timidity -Ow -o o.wav \
            ~/rachmaninov_57525a_prelude_\(nc\)smythe.mid

Anyway, is there any chance this patch might be included in the official
tree some way or other? :) I'm curious because I see that there's not been a
new release of TiMidity++ for quite some time...

Thanks!
diff -r -U5 --new-file --exclude=configure --exclude='*m4*' 
--exclude=Makefile.in TiMidity++-2.13.2/configure.in 
TiMidity++-2.13.2-ccg/configure.in
--- TiMidity++-2.13.2/configure.in      2004-10-03 20:39:52.000000000 +0800
+++ TiMidity++-2.13.2-ccg/configure.in  2007-09-29 12:25:45.000000000 +0800
@@ -752,10 +752,17 @@
     eval "au_enable_$DEFAULT_PLAYMODE=yes"
   else
     AC_MSG_WARN(--with-default-output=$withval: audio is not enabled)
   fi])
 
+AC_ARG_WITH(ccg,
+  [  --with-ccg              use Piumarta's ccg for code speedup (x86 only)],
+  [ if test x"$withval" = xyes; then
+      EXTRADEFS="$EXTRADEFS -DCCG"
+      SYSEXTRAS="$SYSEXTRAS jit_i386.c"
+    fi])
+
 dnl compatibility matters.
 dnl AC_ARG_ENABLE(esd,
 dnl   [  --enable-esd         EsounD (Obsoleted.  Use --enable-audio=esd)],
 dnl   [ au_enable_esd=$enableval ])
 dnl AC_ARG_ENABLE(nas,
@@ -1923,10 +1930,12 @@
 AM_CONDITIONAL(BORLANDC, test "x$BORLANDC" = xyes)
 AM_CONDITIONAL(WATCOM_C, test "x$WATCOM_C" = xyes)
 
 AM_CONDITIONAL(W32READDIR, test "x$W32READDIR" = "xyes")
 
+AM_CONDITIONAL(CCG, test "x$with_ccg" = xyes)
+
 SET_UNIQ_WORDS(LDFLAGS,$LDFLAGS)
 SET_UNIQ_WORDS(SHLDFLAGS,$SHLDFLAGS)
 SET_UNIQ_WORDS(CFLAGS,$EXTRACFLAGS $CFLAGS)
 SET_UNIQ_WORDS(CPPFLAGS,$CPPFLAGS $EXTRADEFS)
 
diff -r -U5 --new-file --exclude=configure --exclude='*m4*' 
--exclude=Makefile.in TiMidity++-2.13.2/timidity/jit_i386.cg 
TiMidity++-2.13.2-ccg/timidity/jit_i386.cg
--- TiMidity++-2.13.2/timidity/jit_i386.cg      1970-01-01 07:30:00.000000000 
+0730
+++ TiMidity++-2.13.2-ccg/timidity/jit_i386.cg  2007-09-29 15:21:10.000000000 
+0800
@@ -0,0 +1,336 @@
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "timidity.h"
+#include "reverb.h"
+
+#ifdef __GNUC__
+#localpc
+register int asm_pass __asm__("%esi");
+register unsigned char *asm_pc __asm__("%edi");
+#endif
+
+#define __STRICT_ANSI__
+#cpu pentium
+
+#ifdef iflush
+#undef iflush
+static void iflush(insn *start, insn *end)
+{
+       int page_size = getpagesize();
+       int page = (long)start & -page_size;
+       int length = ((long)end - page + page_size - 1) & -page_size;
+       mprotect((void *)page, length, PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+#endif
+
+extern int32 reverb_effect_buffer[];
+
+/*
+ * writes code to multiply %eax by signed 8.24 operand val, store 8.24 result
+ * in %eax, and trash %edx
+ */
+static void imuldiv24_jit(int32 val)
+{
+       #[                                              # no. of bytes:
+               movl    $val,%edx                       # 5
+!_W(0xeaf7); /*        imull   %edx                            # 2 */
+               shll    $8,%edx                         # 3
+               shrl    $24,%eax                        # 3
+               orl     %edx,%eax                       # 2
+       ]#
+}
+
+enum { imuldiv24_n_insns = 15 };
+
+/*
+ * writes code to compute
+ *     %(rd) = buf + (((%(rs) + incr) / sizeof(int32)) % size)
+ * where the value of %(rs) will be a multiple of 32 and smaller than upp
+ */
+static void circ_ptr_jit(int rd, int rs, int32 *buf, size_t size,
+size_t upp, size_t incr)
+{
+       size_t s = size * sizeof(int32), n = incr * sizeof(int32);
+       insn *wrap;
+       if (size == 1) #[
+               movl    $(long)buf,%(rd)                # 5
+       ]# else if ((size & (size - 1)) == 0) #[
+               # power of 2
+               leal    n(%(rs)),%(rd)                  # 7
+               andl    $(s - 1),%(rd)                  # 6
+               addl    $(long)buf,%(rd)                # 6
+       ]# else if ((upp + incr) / size > 3) #[
+               leal    n(%(rs)),%(rd)                  # 7
+               xchgl   %eax,%(rd)                      # 1
+               pushl   %edx                            # 1
+               cltd                                    # 1
+               pushl   $(s)                            # 5
+               idivl   (%esp),%eax                     # 3
+               addl    $4,%esp                         # 3
+               movl    %edx,%eax                       # 2
+               popl    %edx                            # 1
+               xchgl   %eax,%(rd)                      # 1
+               addl    $(long)buf,%(rd)                # 6
+       ]# else #[
+               leal    n(%(rs)),%(rd)                  # 7
+       wrap:   subl    $(s),%(rd)                      # 6
+               jnb     wrap                            # 2
+               addl    $((long)buf+s),%(rd)            # 6
+       ]#
+}
+
+enum { circ_ptr_n_insns = 31 };
+
+static void delay_jit(int stream_reg, delay *dl, int32 upp)
+{
+       #[
+       ## int32 output;
+       ## output = buf[*index];
+       ]#
+               circ_ptr_jit(#(%edx)#, #(%edi)#, dl->buf, dl->size, upp,
+                   dl->index);
+       #[
+               movl    (%edx),%eax                     # 2
+       ##
+       ## buf[*index] = *stream;
+       ## if (++*index >= size) {*index = 0;}
+       ##
+               movl    %(stream_reg),(%edx)            # 2
+       ##
+       ## *stream = output;
+       ##
+               movl    %eax,%(stream_reg)              # 2
+       ]#
+}
+
+enum { delay_n_insns = 6 + circ_ptr_n_insns };
+
+static void freeverb_comb_jit(int input_reg, int stream_reg, comb *cb,
+int32 upp)
+{
+       #[
+       ## int32 output;
+       ## output = buf[*index];
+       ]#
+               circ_ptr_jit(#(%ebx)#, #(%edi)#, cb->buf, cb->size, upp,
+                   cb->index);
+       #[
+               movl    (%ebx),%eax                     # 2
+       ##
+       ## *stream += output;
+       ##
+               addl    %eax,%(stream_reg)              # 2
+       ##
+       ## *fs = imuldiv24(output, damp2) + imuldiv24(*fs, damp1);
+       ]#
+               imuldiv24_jit(cb->damp2i);
+       #[
+               movl    %eax,%ecx                       # 2
+               movl    (&cb->filterstore),%eax         # 5
+       ]#
+               imuldiv24_jit(cb->damp1i);
+       #[
+               addl    %ecx,%eax                       # 2
+               movl    %eax,(&cb->filterstore)         # 5
+       ##
+       ## buf[*index] = input + imuldiv24(*fs, feedback);
+       ]#
+               imuldiv24_jit(cb->feedbacki);
+       #[
+               movl    %(input_reg),%ecx               # 2
+               addl    %ecx,%eax                       # 2
+               movl    %eax,(%ebx)                     # 2
+       ##
+       ## if (++*index >= size) {*index = 0;}
+       ##
+       ## ### unneeded
+       ]#
+}
+
+enum { freeverb_comb_n_insns = 24 + circ_ptr_n_insns + 3*imuldiv24_n_insns };
+
+static void freeverb_allpass_jit(int stream_reg, allpass *ap, int32 upp)
+{
+       #[
+       ## int32 bufout, output;
+       ## bufout = buf[*index];
+       ]#
+               circ_ptr_jit(#(%ebx)#, #(%edi)#, ap->buf, ap->size, upp,
+                   ap->index);
+       #[
+               movl    (%ebx),%ecx                     # 2
+       ##
+       ## buf[*index] = *stream + imuldiv24(bufout, feedback);
+       ##
+               movl    %ecx,%eax                       # 2
+       ]#
+               imuldiv24_jit(ap->feedbacki);
+       #[
+               addl    %(stream_reg),%eax              # 2
+               movl    %eax,(%ebx)                     # 2
+       ##
+       ## output = -*stream + bufout;
+       ## if (++*index >= size) {*index = 0;}
+       ## *stream = output;
+       ##
+               subl    %ecx,%(stream_reg)              # 2
+               negl    %(stream_reg)                   # 2
+       ]#
+}
+
+enum { freeverb_allpass_n_insns = 12 + circ_ptr_n_insns + imuldiv24_n_insns };
+
+void do_ch_freeverb_jit(int32 *buf, int32 count, InfoFreeverb *rev)
+{
+       int32 c2 = count / 2, j, outl;
+       comb *combL = rev->combL, *combR = rev->combR;
+       allpass *allpassL = rev->allpassL, *allpassR = rev->allpassR;
+       insn insn_buf[90 + 4 * imuldiv24_n_insns +
+                     delay_n_insns +
+                     2 * numcombs * freeverb_comb_n_insns +
+                     2 * numallpasses * freeverb_allpass_n_insns];
+       void (*frob)(void);
+       insn *loop;
+       int save_asm_pass = asm_pass;
+       insn *save_asm_pc = asm_pc;
+       #[
+       ## ### %eax, %ecx, %edx scratch regs.
+       ## ### %ebp == input
+       ## ### %esi == outl
+       ## ### %edi == k * sizeof(int32)
+       ##
+               .org    insn_buf
+       frob:   pushl   %ebx                            # 1
+               pushl   %ebp                            # 1
+               pushl   %esi                            # 1
+               pushl   %edi                            # 1
+       ##
+       ## for (k = 0; k < count; k++)
+       ## {
+       ##
+               xorl    %edi,%edi                       # 2
+       ##
+       ##      input = reverb_effect_buffer[k] + reverb_effect_buffer[k + 1];
+       ##
+       loop:   leal    ((long)reverb_effect_buffer)(%edi),%eax # 6
+               movl    (%eax),%ebp                     # 2
+               addl    4(%eax),%ebp                    # 3
+       ##
+       ##      outl = outr = reverb_effect_buffer[k] =
+       ##                    reverb_effect_buffer[k + 1] = 0;
+       ##
+               xorl    %esi,%esi                       # 2
+               movl    %esi,(%eax)                     # 2
+               movl    %esi,4(%eax)                    # 3
+       ##
+       ##      do_delay(&input, pdelay->buf, pdelay->size, &pdelay->index);
+       ##
+       ## ### %edi == (k / 2) * sizeof(int32)
+       ##
+               shrl    $1,%edi                         # 2
+       ]#
+               delay_jit(#(%ebp)#, &rev->pdelay, c2);
+       #[
+       ##      for (i = 0; i < numcombs; i++) {
+       ##              do_freeverb_comb(input, &outl, combL[i].buf,
+       ##                      combL[i].size, &combL[i].index,
+       ##                      combL[i].damp1i, combL[i].damp2i,
+       ##                      &combL[i].filterstore, combL[i].feedbacki);
+       ##
+       ##              do_freeverb_comb(input, &outr, combR[i].buf,
+       ##                      combR[i].size, &combR[i].index,
+       ##                      combR[i].damp1i, combR[i].damp2i,
+       ##                      &combR[i].filterstore, combR[i].feedbacki);
+       ##      }
+       ##      for (i = 0; i < numallpasses; i++) {
+       ##              do_freeverb_allpass(&outl, allpassL[i].buf,
+       ##                      allpassL[i].size, &allpassL[i].index,
+       ##                      allpassL[i].feedbacki);
+       ##              do_freeverb_allpass(&outr, allpassR[i].buf,
+       ##                      allpassR[i].size, &allpassR[i].index,
+       ##                      allpassR[i].feedbacki);
+       ##      }
+       ##
+       ## ### %ebx scratch reg.
+       ]#
+       for (j = 0; j < numcombs; ++j)
+               freeverb_comb_jit(#(%ebp)#, #(%esi)#, &combL[j], c2);
+       for (j = 0; j < numallpasses; ++j)
+               freeverb_allpass_jit(#(%esi)#, &allpassL[j], c2);
+       #[
+               movl    %esi,(&outl)                    # 6
+       ##
+       ## ### %esi == outr
+       ##
+               xorl    %esi,%esi                       # 2
+       ]#
+       for (j = 0; j < numcombs; ++j)
+               freeverb_comb_jit(#(%ebp)#, #(%esi)#, &combR[j], c2);
+       for (j = 0; j < numallpasses; ++j)
+               freeverb_allpass_jit(#(%esi)#, &allpassR[j], c2);
+       #[
+       ##      buf[k] += imuldiv24(outl, rev->wet1i) +
+       ##                      imuldiv24(outr, rev->wet2i);
+       ##
+       ## ### %ebx == outl
+       ## ### %ebp == &buf[k]
+       ## ### %edi == k * sizeof(int32)
+       ##
+               shll    $1,%edi                         # 2
+               movl    (&outl),%ebx                    # 6
+               leal    ((long)buf)(%edi),%ebp          # 6
+               movl    %ebx,%eax                       # 2
+       ]#
+               imuldiv24_jit(rev->wet1i);
+       #[
+               movl    %eax,%ecx                       # 2
+               movl    %esi,%eax                       # 2
+       ]#
+               imuldiv24_jit(rev->wet2i);
+       #[
+               addl    %eax,%ecx                       # 2
+!_W(0x4d01);_B(0x00);
+!          /*  addl    %ecx,(%ebp)                     # 3 */
+       #[
+       ##      buf[k + 1] += imuldiv24(outr, rev->wet1i) +
+       ##                      imuldiv24(outl, rev->wet2i);
+       ##
+               movl    %esi,%eax                       # 2
+       ]#
+               imuldiv24_jit(rev->wet1i);
+       #[
+               movl    %eax,%ecx                       # 2
+               movl    %ebx,%eax                       # 2
+       ]#
+               imuldiv24_jit(rev->wet2i);
+       #[
+               addl    %eax,%ecx                       # 2
+               addl    %ecx,4(%ebp)                    # 3
+       ##
+       ##      ++k;
+       ## }
+       ##
+               addl    $8,%edi                         # 3
+               cmpl    $(count*sizeof(32)),%edi        # 6
+               jb      loop                            # 6
+               popl    %edi                            # 1
+               popl    %esi                            # 1
+               popl    %ebp                            # 1
+               popl    %ebx                            # 1
+               ret                                     # 1
+       ]#
+       iflush(insn_buf, asm_pc);
+       frob();
+       for (j = 0; j < numcombs; ++j) {
+               combL[j].index = (combL[j].index + c2) % combL[j].size;
+               combR[j].index = (combR[j].index + c2) % combR[j].size;
+       }
+       for (j = 0; j < numallpasses; ++j) {
+               allpassL[j].index = (allpassL[j].index+c2) % allpassL[j].size;
+               allpassR[j].index = (allpassR[j].index+c2) % allpassR[j].size;
+       }
+       asm_pass = save_asm_pass;
+       asm_pc = save_asm_pc;
+}
diff -r -U5 --new-file --exclude=configure --exclude='*m4*' 
--exclude=Makefile.in TiMidity++-2.13.2/timidity/Makefile.am 
TiMidity++-2.13.2-ccg/timidity/Makefile.am
--- TiMidity++-2.13.2/timidity/Makefile.am      2004-06-01 08:05:29.000000000 
+0800
+++ TiMidity++-2.13.2-ccg/timidity/Makefile.am  2007-09-29 12:54:35.000000000 
+0800
@@ -124,10 +124,11 @@
        bsd20_a.c \
        esd_a.c \
        hpux_a.c \
        hpux_d_a.c \
        jack_a.c \
+       jit_i386.cg \
        oss_a.c \
        mac_a.c \
        mac_qt_a.c \
        mac_com.h \
        mac_dlog.c \
@@ -161,10 +162,22 @@
        w32_libOggFLAC_dll_g.h \
        w32_libOggFLAC_dll_i.h
 
 calcnewt_SOURCES = calcnewt.c
 
+CLEANFILES =
+
+if CCG
+
+SUFFIXES = .cg
+CLEANFILES += jit_i386.c
+
+.cg.c:
+       ccg -q -o $@ $<
+
+endif
+
 if BORLANDC
 
 if ENABLE_W32GUI
 W32GUI_RES = w32g_res.res
 else
@@ -173,11 +186,11 @@
 endif
 endif
 
 else
 
-CLEANFILES = newton_table.c
+CLEANFILES += newton_table.c
 
 if ENABLE_W32GUI
 W32GUI_RES = $(top_builddir)/interface/w32g_res.res
 else
 if ENABLE_W32G_SYN
diff -r -U5 --new-file --exclude=configure --exclude='*m4*' 
--exclude=Makefile.in TiMidity++-2.13.2/timidity/reverb.c 
TiMidity++-2.13.2-ccg/timidity/reverb.c
--- TiMidity++-2.13.2/timidity/reverb.c 2004-08-28 13:38:19.000000000 +0800
+++ TiMidity++-2.13.2-ccg/timidity/reverb.c     2007-09-28 19:09:44.000000000 
+0800
@@ -914,11 +914,11 @@
 #define REV_VAL0         5.3
 #define REV_VAL1        10.5
 #define REV_VAL2        44.12
 #define REV_VAL3        21.0
 
-static int32  reverb_effect_buffer[AUDIO_BUFFER_SIZE * 2];
+int32  reverb_effect_buffer[AUDIO_BUFFER_SIZE * 2];
 static int32  reverb_effect_bufsize = sizeof(reverb_effect_buffer);
 
 #if OPT_MODE != 0
 #if defined(_MSC_VER) || defined(__WATCOMC__) || ( defined(__BORLANDC__) 
&&(__BORLANDC__ >= 1380) )
 void set_ch_reverb(int32 *buf, int32 count, int32 level)
@@ -1547,10 +1547,15 @@
        } else if(count == MAGIC_FREE_EFFECT_INFO) {
                free_freeverb_buf(rev);
                return;
        }
 
+#ifdef CCG
+       void do_ch_freeverb_jit(int32 *, int32, InfoFreeverb *);
+       if (count != 0)
+               do_ch_freeverb_jit(buf, count, rev);
+#else
        for (k = 0; k < count; k++)
        {
                input = reverb_effect_buffer[k] + reverb_effect_buffer[k + 1];
                outl = outr = reverb_effect_buffer[k] = reverb_effect_buffer[k 
+ 1] = 0;
 
@@ -1568,10 +1573,11 @@
                }
                buf[k] += imuldiv24(outl, rev->wet1i) + imuldiv24(outr, 
rev->wet2i);
                buf[k + 1] += imuldiv24(outr, rev->wet1i) + imuldiv24(outl, 
rev->wet2i);
                ++k;
        }
+#endif
 }
 
 /*                                 */
 /*  Reverb: Delay & Panning Delay  */
 /*                                 */
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Timidity-talk mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/timidity-talk

Reply via email to