On Fri, May 03, 2013 at 11:57:51AM -0700, Tejun Heo wrote:
> On Fri, May 03, 2013 at 01:56:52PM -0400, Vivek Goyal wrote:
> > > Yeah, I think that's what *should* be happening but not what I'm
> > > seeing.  I'm seeing ~15% penalty.
> > 
> > What test are you running. I am running a simple dd with directIO and
> > I am not seeing any penalty.
> 
> Combination of dd and a test program that I've been using for some
> while which can generate concurrent direct random IOs.  Attaching the
> source code for the latter.

And actually attaching...

-- 
tejun
#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <ctype.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/ioctl.h>
#include <signal.h>
#include <pthread.h>
#include <time.h>
#include <string.h>
#include <sys/time.h>

#include <sys/user.h>
#include <linux/fs.h>

static int dev_fd, blocks_per_rq, concurrency, do_write;
static int block_size;
static uint64_t device_size, nr_blocks;

static int exiting, nr_exited;

static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
static uint64_t *dispenser_ar;
static unsigned nr_succeeded, nr_failed;

static void sigexit_handler(int dummy)
{
        exiting = 1;
}

static uint64_t dispense_block(int idx)
{
        while (1) {
                uint64_t block;
                int i;
                block = ((uint64_t)random() << 31 | random())
                        % (nr_blocks - blocks_per_rq + 1);
                for (i = 0; i < concurrency; i++) {
                        if (block + blocks_per_rq > dispenser_ar[i] &&
                            block < dispenser_ar[i] + blocks_per_rq)
                                break;
                }
                if (i == concurrency) {
                        dispenser_ar[idx] = block;
                        return block;
                }
        }
}

static void * do_rawio(void *arg)
{
        int idx = (int)(unsigned long)arg, my_exiting = 0, i;
        size_t bufsz = blocks_per_rq * block_size;
        char *rbuf, *wbuf = NULL;
        uint64_t block;
        ssize_t ret;

        if ((rbuf = malloc(bufsz + PAGE_SIZE)) == NULL ||
            (do_write && (wbuf = malloc(bufsz + PAGE_SIZE)) == NULL)) {
                perror("malloc");
                exit(1);
        }

        rbuf = (void *)((unsigned long)(rbuf + PAGE_SIZE-1) & ~(PAGE_SIZE-1));
        wbuf = (void *)((unsigned long)(wbuf + PAGE_SIZE-1) & ~(PAGE_SIZE-1));

        if (do_write)
                for (i = 0; i < bufsz / sizeof(int); i++)
                        wbuf[i] = idx + i;

        pthread_mutex_lock(&mutex);
 again:
        if (exiting || my_exiting) {
                nr_exited++;
                pthread_mutex_unlock(&mutex);
                return NULL;
        }
        block = dispense_block(idx);
        pthread_mutex_unlock(&mutex);

        if (do_write) {
                ret = pwrite(dev_fd, wbuf, bufsz, block * block_size);
                if (ret != bufsz) {
                        fprintf(stderr, "\rThread %02d: write failed on "
                                "block %"PRIu64" ret=%zd errno=%d wbuf=%p\n",
                                idx, block, ret, errno, wbuf);
                        goto failed;
                }
        }

        ret = pread(dev_fd, rbuf, bufsz, block * block_size);
        if (ret != bufsz) {
                fprintf(stderr, "\rThread %02d: read failed on block "
                        "%"PRIu64" ret=%zd errno=%d rbuf=%p\n",
                        idx, block, ret, errno, rbuf);
                goto failed;
        }

        if (do_write && memcmp(wbuf, rbuf, bufsz) != 0) {
                fprintf(stderr, "\rThread %02d: data mismatch on block "
                        "%"PRIu64" ret=%zd errno=%d\n", idx, block, ret, errno);
                goto failed;
        }

        nr_succeeded++;
        pthread_mutex_lock(&mutex);
        goto again;

 failed:
        nr_failed++;
        my_exiting = 1;
        pthread_mutex_lock(&mutex);
        goto again;
}

static uint64_t now_in_usec(void)
{
        struct timeval tv;

        gettimeofday(&tv, NULL);
        return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
}

int main(int argc, char **argv)
{
        struct stat sbuf;
        int i, summary_only;
        pthread_t *thrs;
        uint64_t started_at, last_tstmp;
        unsigned last_succeeded = 0;
        double iops = 0;

        if (argc < 5) {
                fprintf(stderr,
                "Usage: test_rawio BLOCKDEV BLOCKS_PER_RQ CONCURRENCY (r|w) 
[s(ummary)|w(ait)]\n");
                return 1;
        }

        blocks_per_rq = atoi(argv[2]);
        concurrency = atoi(argv[3]);

        if (blocks_per_rq <= 0 || concurrency <= 0) {
                fprintf(stderr, "invalid parameters\n");
                return 1;
        }

        if (!(dispenser_ar = malloc(sizeof(dispenser_ar[0]) * concurrency)) ||
            !(thrs = malloc(sizeof(thrs[0]) * concurrency))) {
                perror("malloc");
                return 1;
        }
        memset(dispenser_ar, 0, sizeof(dispenser_ar[0]) * concurrency);

        do_write = tolower(argv[4][0]) == 'w';

        summary_only = 0;
        if (argc >= 6 && strchr(argv[5], 's'))
                summary_only = 1;

        if (argc >= 6 && strchr(argv[5], 'w')) {
                char buf[64];
                printf("press enter to continue\n");
                fgets(buf, sizeof(buf), stdin);
        }

        dev_fd = open(argv[1], (do_write ? O_RDWR : O_RDONLY) | O_DIRECT);
        if (dev_fd < 0) {
                perror("open");
                return 1;
        }

        if (fstat(dev_fd, &sbuf) < 0) {
                perror("fstat");
                return 1;
        }

        if (!S_ISBLK(sbuf.st_mode)) {
                fprintf(stderr, "not a block device\n");
                return 1;
        }

        if (ioctl(dev_fd, BLKSSZGET, &block_size) < 0 ||
            ioctl(dev_fd, BLKGETSIZE64, &device_size) < 0) {
                perror("ioctl");
                return 1;
        }
        nr_blocks = device_size / block_size;

        if (!summary_only)
                printf("%s block_size=%d nr_blocks=%"PRIu64" (%.2lfGiB)\n",
                       argv[1], block_size, nr_blocks,
                       (double)device_size / (1 << 30));

        if (signal(SIGINT, sigexit_handler) == SIG_ERR) {
                perror("signal");
                return 1;
        }

        srandom(getpid());

        for (i = 0; i < concurrency; i++)
                if ((errno = pthread_create(&thrs[i], NULL, do_rawio,
                                            (void *)(unsigned long)i))) {
                        perror("pthread_create");
                        return 1;
                }

        started_at = last_tstmp = now_in_usec();

        while (nr_exited < concurrency) {
                struct timespec ts_200ms = { 0, 200 * 1000 * 1000 };
                const char pgstr[] = "|/-\\";

                if (!summary_only) {
                        uint64_t now = now_in_usec();
                        double time_delta = ((double)now - last_tstmp) / 
1000000;
                        double io_delta = nr_succeeded - last_succeeded;

                        if (last_tstmp - started_at < 1000000)
                                iops = io_delta / time_delta;
                        else
                                iops = iops * 0.9 + io_delta / time_delta * 0.1;

                        printf("\rnr_succeeded=%-8u nr_failed=%-8u iops=%7.03lf 
kbps=%9.03lf %s%c",
                               nr_succeeded, nr_failed, iops,
                               iops * block_size * blocks_per_rq / 1024,
                               exiting ? "exiting..." : "",
                               pgstr[i++%(sizeof(pgstr)-1)]);

                        last_tstmp = now;
                        last_succeeded += io_delta;
                }

                fflush(stdout);
                nanosleep(&ts_200ms, NULL);
        }

        if (!summary_only)
                printf("\n");
        else
                printf("nr_succeeded=%u nr_failed=%8u iops=%03.03lf\n",
                       nr_succeeded, nr_failed,
                       (double)nr_succeeded /
                       (((double)now_in_usec() - started_at) / 1000000));

        return 0;
}

Reply via email to