Josh Berkus wrote: > On 12/6/10 6:13 PM, Tom Lane wrote: > > Josh Berkus <j...@agliodbs.com> writes: > >> OK, patch coming then. Right now test_fsync aborts when O_DIRECT fails. > >> What should I have it do instead? > > > > Report that it fails, and keep testing the other methods. > > Patch attached. Includes a fair amount of comment cleanup, since > existing comments did not meet our current project standards. Tests all > 6 of the methods we support separately. > > Some questions, though: > > (1) Why are we doing the open_sync different-size write test? AFAIK, > this doesn't match any behavior which PostgreSQL has.
I added program output to explain this. > (2) In this patch, I'm stepping down the number of loops which > fsync_writethrough does by 90%. The reason for that was that on the > platforms where I tested writethrough (desktop machines), doing 10,000 > loops took 15-20 *minutes*, which seems hard on the user. Would be easy > to revert if you think it's a bad idea. > Possibly auto-sizing the number of loops based on the first fsync test > might be a good idea, but seems like going a bit too far. I did not know why writethough we always be much slower than other sync methods so I just reduced the loop count to 2k. > (3) Should the multi-descriptor test be using writethrough on platforms > which support it? Thank you for your patch. I have applied most of it, attached. -- Bruce Momjian <br...@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + It's impossible for everything to be true. +
diff --git a/src/tools/fsync/Makefile b/src/tools/fsync/Makefile index fe3e626..44419ee 100644 *** /tmp/pgdiff.17122/M3047c_Makefile Sat Jan 15 11:53:43 2011 --- src/tools/fsync/Makefile Fri Jan 14 22:35:30 2011 *************** override CPPFLAGS := -I$(libpq_srcdir) $ *** 16,24 **** OBJS= test_fsync.o ! all: test_fsync ! test_fsync: test_fsync.o | submake-libpq submake-libpgport $(CC) $(CFLAGS) test_fsync.o $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) clean distclean maintainer-clean: --- 16,24 ---- OBJS= test_fsync.o ! all: submake-libpq submake-libpgport test_fsync ! test_fsync: test_fsync.o $(libpq_builddir)/libpq.a $(CC) $(CFLAGS) test_fsync.o $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) clean distclean maintainer-clean: diff --git a/src/tools/fsync/README b/src/tools/fsync/README index 6d9acd3..a1c2ae4 100644 *** /tmp/pgdiff.17122/wFeqrc_README Sat Jan 15 11:53:43 2011 --- src/tools/fsync/README Sat Jan 15 11:34:58 2011 *************** *** 1,11 **** ! src/tools/fsync/README ! ! fsync ! ===== This program tests fsync. The tests are described as part of the program output. Usage: test_fsync [-f filename] [loops] - Loops defaults to 5000. The default output file is /var/tmp/test_fsync.out. - Consider that /tmp or /var/tmp might be memory-based file systems. --- 1,20 ---- ! test_fsync ! ========== This program tests fsync. The tests are described as part of the program output. Usage: test_fsync [-f filename] [loops] + + test_fsync is intended to give you a reasonable idea of what the fastest + fsync_method is on your specific system, as well as supplying diagnostic + information in the event of an identified I/O problem. However, + differences shown by test_fsync might not make any difference in real + database throughput, especially since many database servers are not + speed-limited by their transaction logs. + + The output filename defaults to test_fsync.out in the current directory. + test_fsync should be run in the same filesystem as your transaction log + directory (pg_xlog). + + Loops default to 2000. Increase this to get more accurate measurements. diff --git a/src/tools/fsync/test_fsync.c b/src/tools/fsync/test_fsync.c index 28c2119..831e2a0 100644 *** /tmp/pgdiff.17122/OSeljb_test_fsync.c Sat Jan 15 11:53:43 2011 --- src/tools/fsync/test_fsync.c Sat Jan 15 11:52:03 2011 *************** *** 3,9 **** * * * test_fsync.c ! * test various fsync() methods */ #include "postgres.h" --- 3,9 ---- * * * test_fsync.c ! * tests all supported fsync() methods */ #include "postgres.h" *************** *** 22,40 **** #include <unistd.h> #include <string.h> ! ! #ifdef WIN32 #define FSYNC_FILENAME "./test_fsync.out" - #else - /* /tmp might be a memory file system */ - #define FSYNC_FILENAME "/var/tmp/test_fsync.out" - #endif #define WRITE_SIZE (8 * 1024) /* 8k */ #define LABEL_FORMAT "\t%-30s" ! int loops = 10000; void die(char *str); void print_elapse(struct timeval start_t, struct timeval stop_t); --- 22,39 ---- #include <unistd.h> #include <string.h> ! /* ! * put the temp files in the local directory ! * unless the user specifies otherwise ! */ #define FSYNC_FILENAME "./test_fsync.out" #define WRITE_SIZE (8 * 1024) /* 8k */ #define LABEL_FORMAT "\t%-30s" ! ! int loops = 2000; void die(char *str); void print_elapse(struct timeval start_t, struct timeval stop_t); *************** void print_elapse(struct timeval start_ *** 42,55 **** int main(int argc, char *argv[]) { ! struct timeval start_t; ! struct timeval stop_t; ! int tmpfile, ! i; char *full_buf = (char *) malloc(XLOG_SEG_SIZE), ! *buf; ! char *filename = FSYNC_FILENAME; if (argc > 2 && strcmp(argv[1], "-f") == 0) { filename = argv[2]; --- 41,54 ---- int main(int argc, char *argv[]) { ! struct timeval start_t, stop_t; ! int tmpfile, i; char *full_buf = (char *) malloc(XLOG_SEG_SIZE), ! *buf, *filename = FSYNC_FILENAME; + /* + * arguments: loops and filename (optional) + */ if (argc > 2 && strcmp(argv[1], "-f") == 0) { filename = argv[2]; *************** main(int argc, char *argv[]) *** 57,73 **** argc -= 2; } ! if (argc > 1) loops = atoi(argv[1]); for (i = 0; i < XLOG_SEG_SIZE; i++) full_buf[i] = random(); if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) die("Cannot open output file."); if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) die("write failed"); ! /* fsync now so later fsync's don't have to do it */ if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); --- 56,77 ---- argc -= 2; } ! if (argc > 1) loops = atoi(argv[1]); for (i = 0; i < XLOG_SEG_SIZE; i++) full_buf[i] = random(); + /* + * test if we can open the target file + */ if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) die("Cannot open output file."); if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE) die("write failed"); ! /* ! * fsync now so that dirty buffers don't skew later tests ! */ if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); *************** main(int argc, char *argv[]) *** 77,83 **** printf("Loops = %d\n\n", loops); /* ! * Simple write */ printf("Simple write:\n"); printf(LABEL_FORMAT, "8k write"); --- 81,87 ---- printf("Loops = %d\n\n", loops); /* ! * Test a simple write without fsync */ printf("Simple write:\n"); printf(LABEL_FORMAT, "8k write"); *************** main(int argc, char *argv[]) *** 95,104 **** print_elapse(start_t, stop_t); /* ! * Compare file sync methods with one 8k write */ printf("\nCompare file sync methods using one write:\n"); #ifdef OPEN_DATASYNC_FLAG printf(LABEL_FORMAT, "open_datasync 8k write"); fflush(stdout); --- 99,111 ---- print_elapse(start_t, stop_t); /* ! * Test all fsync methods using single 8k writes */ printf("\nCompare file sync methods using one write:\n"); + /* + * Test open_datasync if available + */ #ifdef OPEN_DATASYNC_FLAG printf(LABEL_FORMAT, "open_datasync 8k write"); fflush(stdout); *************** main(int argc, char *argv[]) *** 115,124 **** --- 122,161 ---- gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + /* + * If O_DIRECT is enabled, test that with open_datasync + */ + #if PG_O_DIRECT != 0 + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) + printf("\t(unavailable: o_direct on this filesystem)\n"); + else + { + printf(LABEL_FORMAT, "open_datasync 8k direct I/O write"); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + } + #else + printf("\t(unavailable: o_direct)\n"); + #endif + #else printf("\t(unavailable: open_datasync)\n"); #endif + /* + * Test open_sync if available + */ #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "open_sync 8k write"); fflush(stdout); *************** main(int argc, char *argv[]) *** 135,144 **** --- 172,211 ---- gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + /* + * If O_DIRECT is enabled, test that with open_sync + */ + #if PG_O_DIRECT != 0 + printf(LABEL_FORMAT, "open_sync 8k direct I/O write"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) + printf("\t(unavailable: o_direct on this filesystem)\n"); + else + { + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + } + #else + printf("\t(unavailable: o_direct)\n"); + #endif + #else printf("\t(unavailable: open_sync)\n"); #endif + /* + * Test fdatasync if available + */ #ifdef HAVE_FDATASYNC printf(LABEL_FORMAT, "8k write, fdatasync"); fflush(stdout); *************** main(int argc, char *argv[]) *** 160,165 **** --- 227,235 ---- printf("\t(unavailable: fdatasync)\n"); #endif + /* + * Test fsync + */ printf(LABEL_FORMAT, "8k write, fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) *************** main(int argc, char *argv[]) *** 177,190 **** gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); /* ! * Compare file sync methods with two 8k write */ printf("\nCompare file sync methods using two writes:\n"); #ifdef OPEN_DATASYNC_FLAG ! printf(LABEL_FORMAT, "2 open_datasync 8k writes"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1) die("Cannot open output file."); --- 247,289 ---- gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + /* + * If fsync_writethrough is available, test as well + */ + #ifdef HAVE_FSYNC_WRITETHROUGH + printf(LABEL_FORMAT, "8k write, fsync_writethrough"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (fcntl(tmpfile, F_FULLFSYNC ) != 0) + die("fsync failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + #else + printf("\t(unavailable: fsync_writethrough)\n"); + #endif /* ! * Compare some of the file sync methods with ! * two 8k writes to see if timing is different */ printf("\nCompare file sync methods using two writes:\n"); + /* + * Test open_datasync with and without o_direct + */ #ifdef OPEN_DATASYNC_FLAG ! printf(LABEL_FORMAT, "2 open_datasync 8k writes"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1) die("Cannot open output file."); *************** main(int argc, char *argv[]) *** 201,210 **** --- 300,335 ---- gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + #if PG_O_DIRECT != 0 + printf(LABEL_FORMAT, "2 open_datasync direct I/O 8k writes"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + #else + printf("\t(unavailable: o_direct)\n"); + #endif + #else printf("\t(unavailable: open_datasync)\n"); #endif + /* + * Test open_sync with and without o_direct + */ #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "2 open_sync 8k writes"); fflush(stdout); *************** main(int argc, char *argv[]) *** 223,230 **** --- 348,383 ---- gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + #if PG_O_DIRECT != 0 + printf(LABEL_FORMAT, "2 open_sync direct I/O 8k writes"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + #else + printf("\t(unavailable: o_direct)\n"); + #endif + + #else + printf("\t(unavailable: open_sync)\n"); #endif + /* + * Test fdatasync + */ #ifdef HAVE_FDATASYNC printf(LABEL_FORMAT, "8k write, 8k write, fdatasync"); fflush(stdout); *************** main(int argc, char *argv[]) *** 248,253 **** --- 401,409 ---- printf("\t(unavailable: fdatasync)\n"); #endif + /* + * Test basic fsync + */ printf(LABEL_FORMAT, "8k write, 8k write, fsync"); fflush(stdout); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) *************** main(int argc, char *argv[]) *** 267,278 **** --- 423,466 ---- gettimeofday(&stop_t, NULL); close(tmpfile); print_elapse(start_t, stop_t); + + /* + * Test fsync_writethrough if available + */ + #ifdef HAVE_FSYNC_WRITETHROUGH + printf(LABEL_FORMAT, "8k write, 8k write, fsync_writethrough"); + fflush(stdout); + if ((tmpfile = open(filename, O_RDWR, 0)) == -1) + die("Cannot open output file."); + gettimeofday(&start_t, NULL); + for (i = 0; i < loops; i++) + { + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE) + die("write failed"); + if (fcntl(tmpfile, F_FULLFSYNC) != 0) + die("fsync failed"); + if (lseek(tmpfile, 0, SEEK_SET) == -1) + die("seek failed"); + } + gettimeofday(&stop_t, NULL); + close(tmpfile); + print_elapse(start_t, stop_t); + #else + printf("\t(unavailable: fsync_writethrough)\n"); + #endif /* * Compare 1 to 2 writes */ printf("\nCompare open_sync with different sizes:\n"); + printf("(This is designed to compare the cost of one large\n"); + printf("sync'ed write and two smaller sync'ed writes.)\n"); + /* + * Test open_sync with different size files + */ #ifdef OPEN_SYNC_FLAG printf(LABEL_FORMAT, "open_sync 16k write"); fflush(stdout); *************** main(int argc, char *argv[]) *** 312,323 **** #endif /* ! * Fsync another file descriptor? */ printf("\nTest if fsync on non-write file descriptor is honored:\n"); printf("(If the times are similar, fsync() can sync data written\n"); printf("on a different descriptor.)\n"); printf(LABEL_FORMAT, "8k write, fsync, close"); fflush(stdout); gettimeofday(&start_t, NULL); --- 500,519 ---- #endif /* ! * Test whether fsync can sync data written on a different ! * descriptor for the same file. This checks the efficiency ! * of multi-process fsyncs against the same file. ! * Possibly this should be done with writethrough on platforms ! * which support it. */ printf("\nTest if fsync on non-write file descriptor is honored:\n"); printf("(If the times are similar, fsync() can sync data written\n"); printf("on a different descriptor.)\n"); + /* + * first write, fsync and close, which is the + * normal behavior without multiple descriptors + */ printf(LABEL_FORMAT, "8k write, fsync, close"); fflush(stdout); gettimeofday(&start_t, NULL); *************** main(int argc, char *argv[]) *** 330,343 **** if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); - /* do nothing but the open/close the tests are consistent. */ close(tmpfile); } gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); printf(LABEL_FORMAT, "8k write, close, fsync"); fflush(stdout); gettimeofday(&start_t, NULL); --- 526,547 ---- if (fsync(tmpfile) != 0) die("fsync failed"); close(tmpfile); + /* + * open and close the file again to be consistent + * with the following test + */ if ((tmpfile = open(filename, O_RDWR, 0)) == -1) die("Cannot open output file."); close(tmpfile); } gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); + /* + * Now open, write, close, open again and fsync + * This simulates processes fsyncing each other's + * writes. + */ printf(LABEL_FORMAT, "8k write, close, fsync"); fflush(stdout); gettimeofday(&start_t, NULL); *************** main(int argc, char *argv[]) *** 358,375 **** gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); ! /* cleanup */ free(full_buf); unlink(filename); return 0; } void print_elapse(struct timeval start_t, struct timeval stop_t) { double total_time = (stop_t.tv_sec - start_t.tv_sec) + - /* usec subtraction might be negative, e.g. 5.4 - 4.8 */ (stop_t.tv_usec - start_t.tv_usec) * 0.000001; double per_second = loops / total_time; --- 562,583 ---- gettimeofday(&stop_t, NULL); print_elapse(start_t, stop_t); ! /* ! * cleanup ! */ free(full_buf); unlink(filename); return 0; } + /* + * print out the writes per second for tests + */ void print_elapse(struct timeval start_t, struct timeval stop_t) { double total_time = (stop_t.tv_sec - start_t.tv_sec) + (stop_t.tv_usec - start_t.tv_usec) * 0.000001; double per_second = loops / total_time;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers