Attached test program shows some additional effects of PMC size and
timing. A PMC is 32 byte, a SPMC is 16 byte, matching current and minimal PMC sizes for i386 (or a typical 32 bit system).

1) for linear access half sized PMCs give double mark speed. This is
in good relation to stress.pasm

2) As we have only one free_list per pool, memory usage will get more
and more random, when e.g. parts of arrays are rewritten with new
values, which come from the free_list. The worst case behaviour will be
total random access to our pools data.

For PMCs, the worst case random access takes about double the time of
a linear access. SPMCs take almost 3 times, but are still faster then
PMCs. But the advantage of smaller PMCs is almost gone.

3) Conclusion
To keep PMCs more tightly together when reusing them, these numbers
seem to indicate, that we might need a free_list per pool->arena,
which would need a pool->arena pointer in the PMC.

4) With a pool->arena pointer in the PMC, we could also try to use a
spearate flags field, which could be 1 byte per PMC sized.

5) On my system pools of different sizes are in totally different
ranges of memory (sbrk vs. mmap). This makes the quick bit mask test
in trace_mem_block totally unusable.

Comments, further experiments with 4) and so on welcome,

leo
/* test program for pmc flag access */
/* run program with
 *  cc -o tpmc -Wall tpmc.c -O3 && ./tpmc
 *
 * the timing macro needs adjustment for !i386
 */

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdarg.h>

#ifndef NO_USE_RDTSC
#define rdtsclh(low, hi) \
    __asm__ __volatile__ ("rdtsc" : "=a" (low), "=d" (hi) )
void rdtsc(long *t)
{
    long sec, usec;
    rdtsclh(usec, sec);
    *t = (long)(1000.0 * ((double)sec + ((double)usec / 1000000.0)));
}
#else
#include <sys/time.h>
double
floatval_time(void)
{
    struct timeval t;
    gettimeofday(&t, NULL);
    return (double)t.tv_sec + ((double)t.tv_usec / 1000000.0);
}
void rdtsc(long *t)
{
    double s = floatval_time();
    *t = (long)(s * 1000.0);
}
#endif

#define N 16
#define SIZE 65536
struct pool {
    char *mem;
    char *flags;
} pool[N];

typedef struct pmc {
    int flags;
    int fill[7];
} PMC;

typedef struct spmc {
    int flags;
    int fill[3];
} SPMC;


int main(int argc, char *argv[])
{
    int i, j, k, l, n;
    long a, b, e;

    rdtsc(&a);
    for (j = 0; j < N; j++) {
        l = (int) ((double)N * rand()/(RAND_MAX+1.0));
        for (i = 0; i < SIZE; i++) {
            k = (int) ((double)SIZE * rand()/(RAND_MAX+1.0));
        }
    }
    rdtsc(&b);
    printf("%d empty       ticks %10ld\n", i, e = b - a);

    for (i = 0; i < N; i++) {
        pool[i].mem = calloc(SIZE, sizeof(PMC));
        pool[i].flags = calloc(SIZE, sizeof(char));
    }

    for  (n = 0; n < 3; n++) {
        rdtsc(&a);
        for (j = 0; j < N; j++) {
            l = (int) ((double)N * rand()/(RAND_MAX+1.0));
            for (i = 0; i < SIZE; i++) {
                k = (int) ((double)SIZE * rand()/(RAND_MAX+1.0));
                ((PMC*)pool[j].mem)[i].flags |= 1;
            }
        }
        rdtsc(&b);
        printf("%d linear  PMC ticks %10ld\n", i, b - a - e);

        rdtsc(&a);
        for (j = 0; j < N; j++) {
            l = (int) ((double)N * rand()/(RAND_MAX+1.0));
            for (i = 0; i < SIZE; i++) {
                k = (int) ((double)SIZE * rand()/(RAND_MAX+1.0));
                ((PMC*)pool[l].mem)[k].flags |= 1;
            }
        }
        rdtsc(&b);
        printf("%d random  PMC ticks %10ld\n", i, b - a - e);
    }


    for (i = 0; i < N; i++) {
        free(pool[i].mem);
        free(pool[i].flags);
    }
    for (i = 0; i < N; i++) {
        pool[i].mem = calloc(SIZE, sizeof(SPMC));
        pool[i].flags = calloc(SIZE, sizeof(char));
    }
    for  (n = 0; n < 3; n++) {
        rdtsc(&a);
        for (j = 0; j < N; j++) {
            l = (int) ((double)N * rand()/(RAND_MAX+1.0));
            for (i = 0; i < SIZE; i++) {
                k = (int) ((double)SIZE * rand()/(RAND_MAX+1.0));
                ((SPMC*)pool[j].mem)[i].flags |= 1;
            }
        }
        rdtsc(&b);
        printf("%d linear SPMC ticks %10ld\n", i, b - a - e);

        rdtsc(&a);
        for (j = 0; j < N; j++) {
            l = (int) ((double)N * rand()/(RAND_MAX+1.0));
            for (i = 0; i < SIZE; i++) {
                k = (int) ((double)SIZE * rand()/(RAND_MAX+1.0));
                ((SPMC*)pool[l].mem)[k].flags |= 1;
            }
        }
        rdtsc(&b);
        printf("%d random SPMC ticks %10ld\n", i, b - a - e);
    }

    /* now aligned pool with sep flags */

    for (i = 0; i < N; i++) {
        free(pool[i].mem);
        free(pool[i].flags);
    }
    for (i = 0; i < N; i++) {
        pool[i].mem = memalign(SIZE*sizeof(PMC), SIZE*sizeof(PMC));
        pool[i].flags = calloc(SIZE, sizeof(char));
        /* printf("pool %d %p\n", i, pool[i].mem); */
    }

    rdtsc(&a);
    for (j = 0; j < N; j++) {
        l = (int) ((double)N * rand()/(RAND_MAX+1.0));
        for (i = 0; i < SIZE; i++) {
            k = (int) ((double)SIZE * rand()/(RAND_MAX+1.0));
        }
    }
    rdtsc(&b);
    printf("%d empty       ticks %10ld\n", i, b - a - e);

    return 0;
}

/*
 * Local variables:
 * c-indentation-style: bsd
 * c-basic-offset: 4
 * indent-tabs-mode: nil
 * End:
 *
 * vim: expandtab shiftwidth=4:
*/

Reply via email to