On Tue, 9 Oct 2001, Dan Sugalski wrote: > At 01:20 PM 10/9/2001 +0200, Paolo Molaro wrote: > >On 10/07/01 Bryan C. Warnock wrote: > > > while (*pc) { > > > switch (*pc) { > > > } > > > } > If anyone wants an official ruling... > > DO_OP can contain any code you like as long as it: > > *) Some version ultimately compiles everywhere > *) Allows opcodes above a certain point (probably 256 or 512) to be > overridden at runtime > > Computed gotos, switches, function table dispatches, generated machine > code, or some other form of coding madness are all OK. I don't care which > way a particular platform goes as long as it doesn't constrain another > platform's ability to go another way.
Hehe. Ok, I delved into the particulars of gcc and found: void runloop(int**code) { static void* array[] = { &&op_end, &&op1, &&op2 }; start: goto *array[*code]; op1: foo; goto start; op2: foo; goto start; op_end: return; } // end code The key was &&lbl which returns the physical address of the a jumpable label. This is highly gcc-specific, but given that we can use #ifdef's I don't see a problem with it. :) I even wrote an ugly version that avoided that array indirection. What it did was "convert" the code-base to change the op-codes into physical addresses. This obviously has some negative ramifications, but is definately doable. By removing that extra indirection I got a couple percent extra improvement (12%). I can't imagine you'll get much faster than that (except for an inlining jit or perl6->C compiler). The only problem was that the &&lbl's didn't want to be non-local, so I had to perform some evil magic. The unpolished code is attached with a cheesy instruction set which was just designed to prevent the optimizer from optimizing away my op-codes. Here are the benchmarks on a 466MHZ dual proc celeron: w/ code=500,000,000 and loop_cnt = 10 (cache-bound) gcc speeds.c ----- switcher time delta=45 caller time delta=47 jumper time delta=43 code compile time delta=4 fast jumper time delta=34 gcc -O speeds.c ----- switcher time delta=30 caller time delta=41 jumper time delta=28 code compile time delta=3 fast jumper time delta=26 gcc -O2 speeds.c ------- switcher time delta=30 caller time delta=41 jumper time delta=28 code compile time delta=3 fast jumper time delta=25 gcc -O3 speeds.c -------- switcher time delta=29 caller time delta=41 jumper time delta=28 code compile time delta=2 fast jumper time delta=26 ========== ========== w/ code=1000, loop_cnt = 5,000,000 gcc -O3 speeds.c ------ switcher time delta=195 caller time delta=268 jumper time delta=185 code compile time delta=0 fast jumper time delta=154 # Note a marginal improvement from switch -> jump, yet an even bigger improvement in jump to fast-jump. It's unlikely to have a 50M (x4B+) code-base. But it's possible to have an inner loop that has 500,000 iterations. -Michael
#include<time.h> #include<stdio.h> #include<stdlib.h> #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> #include <sys/stat.h> #include <sys/types.h> //#define SIZE 50000000 #define SIZE 1000 int codep[SIZE]; void dump_mappings() { char strbuf[1024]; int pid = getpid(); sprintf( strbuf, "cat /proc/%i/maps", pid ); system( strbuf ); sprintf( strbuf, "ls -l /proc/%i/fd" ); system( strbuf ); printf("press enter to continue"); scanf("%c",strbuf); } int x,y,z; void switcher( int*code) { while(1) switch(*code) { case 0: goto DONE; break; case 1: x++; code += 1; break; case 2: y++; code += 1; break; case 3: z++; code += 1; break; case 4: x = y + z; code += 1; break; } DONE: } // end switcher int* end(int*code) { return 0; } int* o1(int*code) { x++; return code+1; } int* o2(int*code) { y++; return code+1; } int* o3(int*code) { z++; return code+1; } int* o4(int*code) { x = y + z; return code+1; } typedef int* (*op_handler_t)(int*code); op_handler_t op_codes[] = { end, o1, o2, o3, o4 }; void caller( int*code) { while(*code) code = op_codes[ *code ](code); } // end caller void gotoer( int*code) { static int* array[] = { &&lend, &&lo1, &&lo2, &&lo3, &&lo4 }; start: /* if ( !code ) { puts("null code"); exit(-1); } if ( *code > 3 ) { puts("Invalid idx"); exit(-1); } */ goto *array[ *code ]; lo1: x++; code += 1; goto start; lo2: y++; code += 1; goto start; lo3: z++; code += 1; goto start; lo4: z++; code += 1; goto start; lend: return; } // end gotoer int* convert( int size, int*code, int*map ) { int* retval, *base; int idx =0; //dump_mappings(); retval = base = (int*)malloc(size * sizeof(int)); if ( !retval ) { puts("out of memory"); exit(-1); } //dump_mappings(); //printf("retval=%p, codep=%p\n", retval, codep ); /* if (!retval) { puts("couldn't allocate retval"); exit(-1); } */ code[size - 1] = 0; // to make sure while( *code ) { *retval++ = map[ *code++ ]; //idx++; } //printf("size=%i, idx=%i\n", size, idx ); //exit(0); *retval++ = map[ 0 ]; return base; } // end convert int** fgotoer( int* code, int f_fake ) { static int* array[] = { &&fend, &&fo1, &&fo2, &&fo3, &&fo4 }; if ( f_fake ) { return (int**)array; } fstart: goto **code; fo1: x++; code += 1; //puts("o1"); goto fstart; fo2: y++; code += 1; //puts("o2"); goto fstart; fo3: z++; code += 1; //puts("o3"); goto fstart; fo4: z++; code += 1; //puts("o4"); goto fstart; fend: //puts("end"); return; } // end fgotoer int main() { time_t start_time, stop_time; int* code_ref = codep; int i; int** compiled_code; //int cnt = 10; int cnt = 500000; for( i = 0; i < SIZE ; i++ ) *code_ref++ = ( i % 3 ) + 1; codep[ SIZE - 1 ] = 0; time(&start_time); for( i = 0; i < cnt; i++ ) switcher( codep ); time(&stop_time); printf("switcher time delta=%i\n", stop_time - start_time ); start_time = stop_time; for( i = 0; i < cnt; i++ ) caller( codep ); time(&stop_time); printf("caller time delta=%i\n", stop_time - start_time ); start_time = stop_time; for( i = 0; i < cnt; i++ ) gotoer( codep ); time(&stop_time); printf("jumper time delta=%i\n", stop_time - start_time ); start_time = stop_time; compiled_code = (int**)convert( SIZE, codep, (int*)fgotoer(0, 1) ); time(&stop_time); printf("code compile time delta=%i\n", stop_time - start_time ); start_time = stop_time; for( i = 0; i < cnt; i++ ) fgotoer( (int*)compiled_code, 0 ); time(&stop_time); printf("code jumper time delta=%i\n", stop_time - start_time ); start_time = stop_time; return 0; }