Hello everyone,
I've come up with a patch for TiMidity++ 2.13.2 which reduces the running
time of .wav generation by about 4% on my machine (the patch is attached).
The speedup patch is turned on with the "configure" switch "--with-ccg": it
requires Ian Piumarta's ccg (http://www.iam.unibe.ch/~denker/Squeak/j3/ccg/)
dynamic code generator to work, and only on the x86.
I tested it by using it to generate a .wav file from a .mid of Rakhmaninov's
G minor prelude (http://www.kunstderfuge.com/rachmaninov.htm -- Op. 23, No.
5); specifically:
CFLAGS="-O6 -g" ./configure --with-ccg
cd timidity
make
time ./timidity -Ow -o o.wav \
~/rachmaninov_57525a_prelude_\(nc\)smythe.mid
Anyway, is there any chance this patch might be included in the official
tree some way or other? :) I'm curious because I see that there's not been a
new release of TiMidity++ for quite some time...
Thanks!
diff -r -U5 --new-file --exclude=configure --exclude='*m4*'
--exclude=Makefile.in TiMidity++-2.13.2/configure.in
TiMidity++-2.13.2-ccg/configure.in
--- TiMidity++-2.13.2/configure.in 2004-10-03 20:39:52.000000000 +0800
+++ TiMidity++-2.13.2-ccg/configure.in 2007-09-29 12:25:45.000000000 +0800
@@ -752,10 +752,17 @@
eval "au_enable_$DEFAULT_PLAYMODE=yes"
else
AC_MSG_WARN(--with-default-output=$withval: audio is not enabled)
fi])
+AC_ARG_WITH(ccg,
+ [ --with-ccg use Piumarta's ccg for code speedup (x86 only)],
+ [ if test x"$withval" = xyes; then
+ EXTRADEFS="$EXTRADEFS -DCCG"
+ SYSEXTRAS="$SYSEXTRAS jit_i386.c"
+ fi])
+
dnl compatibility matters.
dnl AC_ARG_ENABLE(esd,
dnl [ --enable-esd EsounD (Obsoleted. Use --enable-audio=esd)],
dnl [ au_enable_esd=$enableval ])
dnl AC_ARG_ENABLE(nas,
@@ -1923,10 +1930,12 @@
AM_CONDITIONAL(BORLANDC, test "x$BORLANDC" = xyes)
AM_CONDITIONAL(WATCOM_C, test "x$WATCOM_C" = xyes)
AM_CONDITIONAL(W32READDIR, test "x$W32READDIR" = "xyes")
+AM_CONDITIONAL(CCG, test "x$with_ccg" = xyes)
+
SET_UNIQ_WORDS(LDFLAGS,$LDFLAGS)
SET_UNIQ_WORDS(SHLDFLAGS,$SHLDFLAGS)
SET_UNIQ_WORDS(CFLAGS,$EXTRACFLAGS $CFLAGS)
SET_UNIQ_WORDS(CPPFLAGS,$CPPFLAGS $EXTRADEFS)
diff -r -U5 --new-file --exclude=configure --exclude='*m4*'
--exclude=Makefile.in TiMidity++-2.13.2/timidity/jit_i386.cg
TiMidity++-2.13.2-ccg/timidity/jit_i386.cg
--- TiMidity++-2.13.2/timidity/jit_i386.cg 1970-01-01 07:30:00.000000000
+0730
+++ TiMidity++-2.13.2-ccg/timidity/jit_i386.cg 2007-09-29 15:21:10.000000000
+0800
@@ -0,0 +1,336 @@
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "timidity.h"
+#include "reverb.h"
+
+#ifdef __GNUC__
+#localpc
+register int asm_pass __asm__("%esi");
+register unsigned char *asm_pc __asm__("%edi");
+#endif
+
+#define __STRICT_ANSI__
+#cpu pentium
+
+#ifdef iflush
+#undef iflush
+static void iflush(insn *start, insn *end)
+{
+ int page_size = getpagesize();
+ int page = (long)start & -page_size;
+ int length = ((long)end - page + page_size - 1) & -page_size;
+ mprotect((void *)page, length, PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+#endif
+
+extern int32 reverb_effect_buffer[];
+
+/*
+ * writes code to multiply %eax by signed 8.24 operand val, store 8.24 result
+ * in %eax, and trash %edx
+ */
+static void imuldiv24_jit(int32 val)
+{
+ #[ # no. of bytes:
+ movl $val,%edx # 5
+!_W(0xeaf7); /* imull %edx # 2 */
+ shll $8,%edx # 3
+ shrl $24,%eax # 3
+ orl %edx,%eax # 2
+ ]#
+}
+
+enum { imuldiv24_n_insns = 15 };
+
+/*
+ * writes code to compute
+ * %(rd) = buf + (((%(rs) + incr) / sizeof(int32)) % size)
+ * where the value of %(rs) will be a multiple of 32 and smaller than upp
+ */
+static void circ_ptr_jit(int rd, int rs, int32 *buf, size_t size,
+size_t upp, size_t incr)
+{
+ size_t s = size * sizeof(int32), n = incr * sizeof(int32);
+ insn *wrap;
+ if (size == 1) #[
+ movl $(long)buf,%(rd) # 5
+ ]# else if ((size & (size - 1)) == 0) #[
+ # power of 2
+ leal n(%(rs)),%(rd) # 7
+ andl $(s - 1),%(rd) # 6
+ addl $(long)buf,%(rd) # 6
+ ]# else if ((upp + incr) / size > 3) #[
+ leal n(%(rs)),%(rd) # 7
+ xchgl %eax,%(rd) # 1
+ pushl %edx # 1
+ cltd # 1
+ pushl $(s) # 5
+ idivl (%esp),%eax # 3
+ addl $4,%esp # 3
+ movl %edx,%eax # 2
+ popl %edx # 1
+ xchgl %eax,%(rd) # 1
+ addl $(long)buf,%(rd) # 6
+ ]# else #[
+ leal n(%(rs)),%(rd) # 7
+ wrap: subl $(s),%(rd) # 6
+ jnb wrap # 2
+ addl $((long)buf+s),%(rd) # 6
+ ]#
+}
+
+enum { circ_ptr_n_insns = 31 };
+
+static void delay_jit(int stream_reg, delay *dl, int32 upp)
+{
+ #[
+ ## int32 output;
+ ## output = buf[*index];
+ ]#
+ circ_ptr_jit(#(%edx)#, #(%edi)#, dl->buf, dl->size, upp,
+ dl->index);
+ #[
+ movl (%edx),%eax # 2
+ ##
+ ## buf[*index] = *stream;
+ ## if (++*index >= size) {*index = 0;}
+ ##
+ movl %(stream_reg),(%edx) # 2
+ ##
+ ## *stream = output;
+ ##
+ movl %eax,%(stream_reg) # 2
+ ]#
+}
+
+enum { delay_n_insns = 6 + circ_ptr_n_insns };
+
+static void freeverb_comb_jit(int input_reg, int stream_reg, comb *cb,
+int32 upp)
+{
+ #[
+ ## int32 output;
+ ## output = buf[*index];
+ ]#
+ circ_ptr_jit(#(%ebx)#, #(%edi)#, cb->buf, cb->size, upp,
+ cb->index);
+ #[
+ movl (%ebx),%eax # 2
+ ##
+ ## *stream += output;
+ ##
+ addl %eax,%(stream_reg) # 2
+ ##
+ ## *fs = imuldiv24(output, damp2) + imuldiv24(*fs, damp1);
+ ]#
+ imuldiv24_jit(cb->damp2i);
+ #[
+ movl %eax,%ecx # 2
+ movl (&cb->filterstore),%eax # 5
+ ]#
+ imuldiv24_jit(cb->damp1i);
+ #[
+ addl %ecx,%eax # 2
+ movl %eax,(&cb->filterstore) # 5
+ ##
+ ## buf[*index] = input + imuldiv24(*fs, feedback);
+ ]#
+ imuldiv24_jit(cb->feedbacki);
+ #[
+ movl %(input_reg),%ecx # 2
+ addl %ecx,%eax # 2
+ movl %eax,(%ebx) # 2
+ ##
+ ## if (++*index >= size) {*index = 0;}
+ ##
+ ## ### unneeded
+ ]#
+}
+
+enum { freeverb_comb_n_insns = 24 + circ_ptr_n_insns + 3*imuldiv24_n_insns };
+
+static void freeverb_allpass_jit(int stream_reg, allpass *ap, int32 upp)
+{
+ #[
+ ## int32 bufout, output;
+ ## bufout = buf[*index];
+ ]#
+ circ_ptr_jit(#(%ebx)#, #(%edi)#, ap->buf, ap->size, upp,
+ ap->index);
+ #[
+ movl (%ebx),%ecx # 2
+ ##
+ ## buf[*index] = *stream + imuldiv24(bufout, feedback);
+ ##
+ movl %ecx,%eax # 2
+ ]#
+ imuldiv24_jit(ap->feedbacki);
+ #[
+ addl %(stream_reg),%eax # 2
+ movl %eax,(%ebx) # 2
+ ##
+ ## output = -*stream + bufout;
+ ## if (++*index >= size) {*index = 0;}
+ ## *stream = output;
+ ##
+ subl %ecx,%(stream_reg) # 2
+ negl %(stream_reg) # 2
+ ]#
+}
+
+enum { freeverb_allpass_n_insns = 12 + circ_ptr_n_insns + imuldiv24_n_insns };
+
+void do_ch_freeverb_jit(int32 *buf, int32 count, InfoFreeverb *rev)
+{
+ int32 c2 = count / 2, j, outl;
+ comb *combL = rev->combL, *combR = rev->combR;
+ allpass *allpassL = rev->allpassL, *allpassR = rev->allpassR;
+ insn insn_buf[90 + 4 * imuldiv24_n_insns +
+ delay_n_insns +
+ 2 * numcombs * freeverb_comb_n_insns +
+ 2 * numallpasses * freeverb_allpass_n_insns];
+ void (*frob)(void);
+ insn *loop;
+ int save_asm_pass = asm_pass;
+ insn *save_asm_pc = asm_pc;
+ #[
+ ## ### %eax, %ecx, %edx scratch regs.
+ ## ### %ebp == input
+ ## ### %esi == outl
+ ## ### %edi == k * sizeof(int32)
+ ##
+ .org insn_buf
+ frob: pushl %ebx # 1
+ pushl %ebp # 1
+ pushl %esi # 1
+ pushl %edi # 1
+ ##
+ ## for (k = 0; k < count; k++)
+ ## {
+ ##
+ xorl %edi,%edi # 2
+ ##
+ ## input = reverb_effect_buffer[k] + reverb_effect_buffer[k + 1];
+ ##
+ loop: leal ((long)reverb_effect_buffer)(%edi),%eax # 6
+ movl (%eax),%ebp # 2
+ addl 4(%eax),%ebp # 3
+ ##
+ ## outl = outr = reverb_effect_buffer[k] =
+ ## reverb_effect_buffer[k + 1] = 0;
+ ##
+ xorl %esi,%esi # 2
+ movl %esi,(%eax) # 2
+ movl %esi,4(%eax) # 3
+ ##
+ ## do_delay(&input, pdelay->buf, pdelay->size, &pdelay->index);
+ ##
+ ## ### %edi == (k / 2) * sizeof(int32)
+ ##
+ shrl $1,%edi # 2
+ ]#
+ delay_jit(#(%ebp)#, &rev->pdelay, c2);
+ #[
+ ## for (i = 0; i < numcombs; i++) {
+ ## do_freeverb_comb(input, &outl, combL[i].buf,
+ ## combL[i].size, &combL[i].index,
+ ## combL[i].damp1i, combL[i].damp2i,
+ ## &combL[i].filterstore, combL[i].feedbacki);
+ ##
+ ## do_freeverb_comb(input, &outr, combR[i].buf,
+ ## combR[i].size, &combR[i].index,
+ ## combR[i].damp1i, combR[i].damp2i,
+ ## &combR[i].filterstore, combR[i].feedbacki);
+ ## }
+ ## for (i = 0; i < numallpasses; i++) {
+ ## do_freeverb_allpass(&outl, allpassL[i].buf,
+ ## allpassL[i].size, &allpassL[i].index,
+ ## allpassL[i].feedbacki);
+ ## do_freeverb_allpass(&outr, allpassR[i].buf,
+ ## allpassR[i].size, &allpassR[i].index,
+ ## allpassR[i].feedbacki);
+ ## }
+ ##
+ ## ### %ebx scratch reg.
+ ]#
+ for (j = 0; j < numcombs; ++j)
+ freeverb_comb_jit(#(%ebp)#, #(%esi)#, &combL[j], c2);
+ for (j = 0; j < numallpasses; ++j)
+ freeverb_allpass_jit(#(%esi)#, &allpassL[j], c2);
+ #[
+ movl %esi,(&outl) # 6
+ ##
+ ## ### %esi == outr
+ ##
+ xorl %esi,%esi # 2
+ ]#
+ for (j = 0; j < numcombs; ++j)
+ freeverb_comb_jit(#(%ebp)#, #(%esi)#, &combR[j], c2);
+ for (j = 0; j < numallpasses; ++j)
+ freeverb_allpass_jit(#(%esi)#, &allpassR[j], c2);
+ #[
+ ## buf[k] += imuldiv24(outl, rev->wet1i) +
+ ## imuldiv24(outr, rev->wet2i);
+ ##
+ ## ### %ebx == outl
+ ## ### %ebp == &buf[k]
+ ## ### %edi == k * sizeof(int32)
+ ##
+ shll $1,%edi # 2
+ movl (&outl),%ebx # 6
+ leal ((long)buf)(%edi),%ebp # 6
+ movl %ebx,%eax # 2
+ ]#
+ imuldiv24_jit(rev->wet1i);
+ #[
+ movl %eax,%ecx # 2
+ movl %esi,%eax # 2
+ ]#
+ imuldiv24_jit(rev->wet2i);
+ #[
+ addl %eax,%ecx # 2
+!_W(0x4d01);_B(0x00);
+! /* addl %ecx,(%ebp) # 3 */
+ #[
+ ## buf[k + 1] += imuldiv24(outr, rev->wet1i) +
+ ## imuldiv24(outl, rev->wet2i);
+ ##
+ movl %esi,%eax # 2
+ ]#
+ imuldiv24_jit(rev->wet1i);
+ #[
+ movl %eax,%ecx # 2
+ movl %ebx,%eax # 2
+ ]#
+ imuldiv24_jit(rev->wet2i);
+ #[
+ addl %eax,%ecx # 2
+ addl %ecx,4(%ebp) # 3
+ ##
+ ## ++k;
+ ## }
+ ##
+ addl $8,%edi # 3
+ cmpl $(count*sizeof(32)),%edi # 6
+ jb loop # 6
+ popl %edi # 1
+ popl %esi # 1
+ popl %ebp # 1
+ popl %ebx # 1
+ ret # 1
+ ]#
+ iflush(insn_buf, asm_pc);
+ frob();
+ for (j = 0; j < numcombs; ++j) {
+ combL[j].index = (combL[j].index + c2) % combL[j].size;
+ combR[j].index = (combR[j].index + c2) % combR[j].size;
+ }
+ for (j = 0; j < numallpasses; ++j) {
+ allpassL[j].index = (allpassL[j].index+c2) % allpassL[j].size;
+ allpassR[j].index = (allpassR[j].index+c2) % allpassR[j].size;
+ }
+ asm_pass = save_asm_pass;
+ asm_pc = save_asm_pc;
+}
diff -r -U5 --new-file --exclude=configure --exclude='*m4*'
--exclude=Makefile.in TiMidity++-2.13.2/timidity/Makefile.am
TiMidity++-2.13.2-ccg/timidity/Makefile.am
--- TiMidity++-2.13.2/timidity/Makefile.am 2004-06-01 08:05:29.000000000
+0800
+++ TiMidity++-2.13.2-ccg/timidity/Makefile.am 2007-09-29 12:54:35.000000000
+0800
@@ -124,10 +124,11 @@
bsd20_a.c \
esd_a.c \
hpux_a.c \
hpux_d_a.c \
jack_a.c \
+ jit_i386.cg \
oss_a.c \
mac_a.c \
mac_qt_a.c \
mac_com.h \
mac_dlog.c \
@@ -161,10 +162,22 @@
w32_libOggFLAC_dll_g.h \
w32_libOggFLAC_dll_i.h
calcnewt_SOURCES = calcnewt.c
+CLEANFILES =
+
+if CCG
+
+SUFFIXES = .cg
+CLEANFILES += jit_i386.c
+
+.cg.c:
+ ccg -q -o $@ $<
+
+endif
+
if BORLANDC
if ENABLE_W32GUI
W32GUI_RES = w32g_res.res
else
@@ -173,11 +186,11 @@
endif
endif
else
-CLEANFILES = newton_table.c
+CLEANFILES += newton_table.c
if ENABLE_W32GUI
W32GUI_RES = $(top_builddir)/interface/w32g_res.res
else
if ENABLE_W32G_SYN
diff -r -U5 --new-file --exclude=configure --exclude='*m4*'
--exclude=Makefile.in TiMidity++-2.13.2/timidity/reverb.c
TiMidity++-2.13.2-ccg/timidity/reverb.c
--- TiMidity++-2.13.2/timidity/reverb.c 2004-08-28 13:38:19.000000000 +0800
+++ TiMidity++-2.13.2-ccg/timidity/reverb.c 2007-09-28 19:09:44.000000000
+0800
@@ -914,11 +914,11 @@
#define REV_VAL0 5.3
#define REV_VAL1 10.5
#define REV_VAL2 44.12
#define REV_VAL3 21.0
-static int32 reverb_effect_buffer[AUDIO_BUFFER_SIZE * 2];
+int32 reverb_effect_buffer[AUDIO_BUFFER_SIZE * 2];
static int32 reverb_effect_bufsize = sizeof(reverb_effect_buffer);
#if OPT_MODE != 0
#if defined(_MSC_VER) || defined(__WATCOMC__) || ( defined(__BORLANDC__)
&&(__BORLANDC__ >= 1380) )
void set_ch_reverb(int32 *buf, int32 count, int32 level)
@@ -1547,10 +1547,15 @@
} else if(count == MAGIC_FREE_EFFECT_INFO) {
free_freeverb_buf(rev);
return;
}
+#ifdef CCG
+ void do_ch_freeverb_jit(int32 *, int32, InfoFreeverb *);
+ if (count != 0)
+ do_ch_freeverb_jit(buf, count, rev);
+#else
for (k = 0; k < count; k++)
{
input = reverb_effect_buffer[k] + reverb_effect_buffer[k + 1];
outl = outr = reverb_effect_buffer[k] = reverb_effect_buffer[k
+ 1] = 0;
@@ -1568,10 +1573,11 @@
}
buf[k] += imuldiv24(outl, rev->wet1i) + imuldiv24(outr,
rev->wet2i);
buf[k + 1] += imuldiv24(outr, rev->wet1i) + imuldiv24(outl,
rev->wet2i);
++k;
}
+#endif
}
/* */
/* Reverb: Delay & Panning Delay */
/* */
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Timidity-talk mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/timidity-talk