There are two main purposes to this reorg:
* Split up the tremndously huge xor.c.
* Make it easier to write pure assembly routines without having
to interface with C structure offsets. You can see how nasty
the Alpha and Sparc64 routines were; it promised to be even
worse for IA-64.
The bulk of the patch is just moving code around. But additionally,
* Bug fixed in the SSE code ("8*(x*2)" isn't the same as "16*(x)"
when x expands to "4+1").
* Alpha code doesn't run into problems with gcc 2.95.
* New IA-64 routines.
Jakub, I've inspected the restructured VIS routines by eye, and
run the result through a cross-assembler, but couldn't test it.
All the others have been run through the attached user-land test
harness, which someone else might find useful.
r~
#include <sys/types.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>
struct xor_block_template {
struct xor_block_template *next;
const char *name;
int speed;
void (*do_2)(unsigned long, unsigned long *, unsigned long *);
void (*do_3)(unsigned long, unsigned long *, unsigned long *,
unsigned long *);
void (*do_4)(unsigned long, unsigned long *, unsigned long *,
unsigned long *, unsigned long *);
void (*do_5)(unsigned long, unsigned long *, unsigned long *,
unsigned long *, unsigned long *, unsigned long *);
};
extern struct xor_block_template TMPL;
static unsigned long pagesize;
static void *
get_page(void)
{
char *base;
base = mmap(0, 3*pagesize, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
if (base == (char *) -1)
return NULL;
mprotect (base + pagesize, pagesize, PROT_READ|PROT_WRITE);
return base + pagesize;
}
static void
check_contents (void *c_base, char c_expect)
{
unsigned long *l_base, l_expect;
long i;
for (l_expect = 0, i = 0; i < sizeof(long); ++i)
l_expect = (l_expect << 8) + c_expect;
l_base = (unsigned long *)c_base;
for (i = pagesize / sizeof(long); --i >= 0; l_base++)
if (*l_base != l_expect)
abort ();
}
int
main()
{
void *p1, *p2, *p3, *p4, *p5;
struct xor_block_template *tmpl = &TMPL;
pagesize = getpagesize();
p1 = get_page();
p2 = get_page();
p3 = get_page();
p4 = get_page();
p5 = get_page();
memset(p2, 2, pagesize);
memset(p3, 4, pagesize);
memset(p4, 8, pagesize);
memset(p5, 16, pagesize);
memset(p1, 1, pagesize);
tmpl->do_2(pagesize, p1, p2);
check_contents(p1, 3);
memset(p1, 1, pagesize);
tmpl->do_3(pagesize, p1, p2, p3);
check_contents(p1, 7);
memset(p1, 1, pagesize);
tmpl->do_4(pagesize, p1, p2, p3, p4);
check_contents(p1, 15);
memset(p1, 1, pagesize);
tmpl->do_5(pagesize, p1, p2, p3, p4, p5);
check_contents(p1, 31);
return 0;
}
d-xor-2.gz