There are two main purposes to this reorg: 

 * Split up the tremndously huge xor.c.

 * Make it easier to write pure assembly routines without having
   to interface with C structure offsets.  You can see how nasty
   the Alpha and Sparc64 routines were; it promised to be even
   worse for IA-64.

The bulk of the patch is just moving code around.  But additionally,

 * Bug fixed in the SSE code ("8*(x*2)" isn't the same as "16*(x)"
   when x expands to "4+1").

 * Alpha code doesn't run into problems with gcc 2.95.

 * New IA-64 routines.

Jakub, I've inspected the restructured VIS routines by eye, and 
run the result through a cross-assembler, but couldn't test it.
All the others have been run through the attached user-land test
harness, which someone else might find useful.



r~
#include <sys/types.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>

struct xor_block_template {
        struct xor_block_template *next;
        const char *name;
        int speed;
        void (*do_2)(unsigned long, unsigned long *, unsigned long *);
        void (*do_3)(unsigned long, unsigned long *, unsigned long *,
                     unsigned long *);
        void (*do_4)(unsigned long, unsigned long *, unsigned long *,
                     unsigned long *, unsigned long *);
        void (*do_5)(unsigned long, unsigned long *, unsigned long *,
                     unsigned long *, unsigned long *, unsigned long *);
};

extern struct xor_block_template TMPL;

static unsigned long pagesize;

static void *
get_page(void)
{
  char *base;
  
  base = mmap(0, 3*pagesize, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0);
  if (base == (char *) -1)
    return NULL;

  mprotect (base + pagesize, pagesize, PROT_READ|PROT_WRITE);

  return base + pagesize;
}

static void
check_contents (void *c_base, char c_expect)
{
  unsigned long *l_base, l_expect;
  long i;

  for (l_expect = 0, i = 0; i < sizeof(long); ++i)
    l_expect = (l_expect << 8) + c_expect;

  l_base = (unsigned long *)c_base;
  for (i = pagesize / sizeof(long); --i >= 0; l_base++)
    if (*l_base != l_expect)
      abort ();
}

int
main()
{
  void *p1, *p2, *p3, *p4, *p5;
  struct xor_block_template *tmpl = &TMPL;

  pagesize = getpagesize();
  p1 = get_page();
  p2 = get_page();
  p3 = get_page();
  p4 = get_page();
  p5 = get_page();

  memset(p2, 2, pagesize);
  memset(p3, 4, pagesize);
  memset(p4, 8, pagesize);
  memset(p5, 16, pagesize);

  memset(p1, 1, pagesize);
  tmpl->do_2(pagesize, p1, p2);
  check_contents(p1, 3);

  memset(p1, 1, pagesize);
  tmpl->do_3(pagesize, p1, p2, p3);
  check_contents(p1, 7);

  memset(p1, 1, pagesize);
  tmpl->do_4(pagesize, p1, p2, p3, p4);
  check_contents(p1, 15);

  memset(p1, 1, pagesize);
  tmpl->do_5(pagesize, p1, p2, p3, p4, p5);
  check_contents(p1, 31);

  return 0;
}

d-xor-2.gz

Reply via email to