Josh Berkus wrote:
> On 12/6/10 6:13 PM, Tom Lane wrote:
> > Josh Berkus <[email protected]> writes:
> >> OK, patch coming then. Right now test_fsync aborts when O_DIRECT fails.
> >> What should I have it do instead?
> >
> > Report that it fails, and keep testing the other methods.
>
> Patch attached. Includes a fair amount of comment cleanup, since
> existing comments did not meet our current project standards. Tests all
> 6 of the methods we support separately.
>
> Some questions, though:
>
> (1) Why are we doing the open_sync different-size write test? AFAIK,
> this doesn't match any behavior which PostgreSQL has.
I added program output to explain this.
> (2) In this patch, I'm stepping down the number of loops which
> fsync_writethrough does by 90%. The reason for that was that on the
> platforms where I tested writethrough (desktop machines), doing 10,000
> loops took 15-20 *minutes*, which seems hard on the user. Would be easy
> to revert if you think it's a bad idea.
> Possibly auto-sizing the number of loops based on the first fsync test
> might be a good idea, but seems like going a bit too far.
I did not know why writethough we always be much slower than other sync
methods so I just reduced the loop count to 2k.
> (3) Should the multi-descriptor test be using writethrough on platforms
> which support it?
Thank you for your patch. I have applied most of it, attached.
--
Bruce Momjian <[email protected]> http://momjian.us
EnterpriseDB http://enterprisedb.com
+ It's impossible for everything to be true. +
diff --git a/src/tools/fsync/Makefile b/src/tools/fsync/Makefile
index fe3e626..44419ee 100644
*** /tmp/pgdiff.17122/M3047c_Makefile Sat Jan 15 11:53:43 2011
--- src/tools/fsync/Makefile Fri Jan 14 22:35:30 2011
*************** override CPPFLAGS := -I$(libpq_srcdir) $
*** 16,24 ****
OBJS= test_fsync.o
! all: test_fsync
! test_fsync: test_fsync.o | submake-libpq submake-libpgport
$(CC) $(CFLAGS) test_fsync.o $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
clean distclean maintainer-clean:
--- 16,24 ----
OBJS= test_fsync.o
! all: submake-libpq submake-libpgport test_fsync
! test_fsync: test_fsync.o $(libpq_builddir)/libpq.a
$(CC) $(CFLAGS) test_fsync.o $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
clean distclean maintainer-clean:
diff --git a/src/tools/fsync/README b/src/tools/fsync/README
index 6d9acd3..a1c2ae4 100644
*** /tmp/pgdiff.17122/wFeqrc_README Sat Jan 15 11:53:43 2011
--- src/tools/fsync/README Sat Jan 15 11:34:58 2011
***************
*** 1,11 ****
! src/tools/fsync/README
!
! fsync
! =====
This program tests fsync. The tests are described as part of the program output.
Usage: test_fsync [-f filename] [loops]
- Loops defaults to 5000. The default output file is /var/tmp/test_fsync.out.
- Consider that /tmp or /var/tmp might be memory-based file systems.
--- 1,20 ----
! test_fsync
! ==========
This program tests fsync. The tests are described as part of the program output.
Usage: test_fsync [-f filename] [loops]
+
+ test_fsync is intended to give you a reasonable idea of what the fastest
+ fsync_method is on your specific system, as well as supplying diagnostic
+ information in the event of an identified I/O problem. However,
+ differences shown by test_fsync might not make any difference in real
+ database throughput, especially since many database servers are not
+ speed-limited by their transaction logs.
+
+ The output filename defaults to test_fsync.out in the current directory.
+ test_fsync should be run in the same filesystem as your transaction log
+ directory (pg_xlog).
+
+ Loops default to 2000. Increase this to get more accurate measurements.
diff --git a/src/tools/fsync/test_fsync.c b/src/tools/fsync/test_fsync.c
index 28c2119..831e2a0 100644
*** /tmp/pgdiff.17122/OSeljb_test_fsync.c Sat Jan 15 11:53:43 2011
--- src/tools/fsync/test_fsync.c Sat Jan 15 11:52:03 2011
***************
*** 3,9 ****
*
*
* test_fsync.c
! * test various fsync() methods
*/
#include "postgres.h"
--- 3,9 ----
*
*
* test_fsync.c
! * tests all supported fsync() methods
*/
#include "postgres.h"
***************
*** 22,40 ****
#include <unistd.h>
#include <string.h>
!
! #ifdef WIN32
#define FSYNC_FILENAME "./test_fsync.out"
- #else
- /* /tmp might be a memory file system */
- #define FSYNC_FILENAME "/var/tmp/test_fsync.out"
- #endif
#define WRITE_SIZE (8 * 1024) /* 8k */
#define LABEL_FORMAT "\t%-30s"
! int loops = 10000;
void die(char *str);
void print_elapse(struct timeval start_t, struct timeval stop_t);
--- 22,39 ----
#include <unistd.h>
#include <string.h>
! /*
! * put the temp files in the local directory
! * unless the user specifies otherwise
! */
#define FSYNC_FILENAME "./test_fsync.out"
#define WRITE_SIZE (8 * 1024) /* 8k */
#define LABEL_FORMAT "\t%-30s"
!
! int loops = 2000;
void die(char *str);
void print_elapse(struct timeval start_t, struct timeval stop_t);
*************** void print_elapse(struct timeval start_
*** 42,55 ****
int
main(int argc, char *argv[])
{
! struct timeval start_t;
! struct timeval stop_t;
! int tmpfile,
! i;
char *full_buf = (char *) malloc(XLOG_SEG_SIZE),
! *buf;
! char *filename = FSYNC_FILENAME;
if (argc > 2 && strcmp(argv[1], "-f") == 0)
{
filename = argv[2];
--- 41,54 ----
int
main(int argc, char *argv[])
{
! struct timeval start_t, stop_t;
! int tmpfile, i;
char *full_buf = (char *) malloc(XLOG_SEG_SIZE),
! *buf, *filename = FSYNC_FILENAME;
+ /*
+ * arguments: loops and filename (optional)
+ */
if (argc > 2 && strcmp(argv[1], "-f") == 0)
{
filename = argv[2];
*************** main(int argc, char *argv[])
*** 57,73 ****
argc -= 2;
}
! if (argc > 1)
loops = atoi(argv[1]);
for (i = 0; i < XLOG_SEG_SIZE; i++)
full_buf[i] = random();
if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
die("Cannot open output file.");
if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE)
die("write failed");
! /* fsync now so later fsync's don't have to do it */
if (fsync(tmpfile) != 0)
die("fsync failed");
close(tmpfile);
--- 56,77 ----
argc -= 2;
}
! if (argc > 1)
loops = atoi(argv[1]);
for (i = 0; i < XLOG_SEG_SIZE; i++)
full_buf[i] = random();
+ /*
+ * test if we can open the target file
+ */
if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1)
die("Cannot open output file.");
if (write(tmpfile, full_buf, XLOG_SEG_SIZE) != XLOG_SEG_SIZE)
die("write failed");
! /*
! * fsync now so that dirty buffers don't skew later tests
! */
if (fsync(tmpfile) != 0)
die("fsync failed");
close(tmpfile);
*************** main(int argc, char *argv[])
*** 77,83 ****
printf("Loops = %d\n\n", loops);
/*
! * Simple write
*/
printf("Simple write:\n");
printf(LABEL_FORMAT, "8k write");
--- 81,87 ----
printf("Loops = %d\n\n", loops);
/*
! * Test a simple write without fsync
*/
printf("Simple write:\n");
printf(LABEL_FORMAT, "8k write");
*************** main(int argc, char *argv[])
*** 95,104 ****
print_elapse(start_t, stop_t);
/*
! * Compare file sync methods with one 8k write
*/
printf("\nCompare file sync methods using one write:\n");
#ifdef OPEN_DATASYNC_FLAG
printf(LABEL_FORMAT, "open_datasync 8k write");
fflush(stdout);
--- 99,111 ----
print_elapse(start_t, stop_t);
/*
! * Test all fsync methods using single 8k writes
*/
printf("\nCompare file sync methods using one write:\n");
+ /*
+ * Test open_datasync if available
+ */
#ifdef OPEN_DATASYNC_FLAG
printf(LABEL_FORMAT, "open_datasync 8k write");
fflush(stdout);
*************** main(int argc, char *argv[])
*** 115,124 ****
--- 122,161 ----
gettimeofday(&stop_t, NULL);
close(tmpfile);
print_elapse(start_t, stop_t);
+
+ /*
+ * If O_DIRECT is enabled, test that with open_datasync
+ */
+ #if PG_O_DIRECT != 0
+ fflush(stdout);
+ if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
+ printf("\t(unavailable: o_direct on this filesystem)\n");
+ else
+ {
+ printf(LABEL_FORMAT, "open_datasync 8k direct I/O write");
+ gettimeofday(&start_t, NULL);
+ for (i = 0; i < loops; i++)
+ {
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ gettimeofday(&stop_t, NULL);
+ close(tmpfile);
+ print_elapse(start_t, stop_t);
+ }
+ #else
+ printf("\t(unavailable: o_direct)\n");
+ #endif
+
#else
printf("\t(unavailable: open_datasync)\n");
#endif
+ /*
+ * Test open_sync if available
+ */
#ifdef OPEN_SYNC_FLAG
printf(LABEL_FORMAT, "open_sync 8k write");
fflush(stdout);
*************** main(int argc, char *argv[])
*** 135,144 ****
--- 172,211 ----
gettimeofday(&stop_t, NULL);
close(tmpfile);
print_elapse(start_t, stop_t);
+
+ /*
+ * If O_DIRECT is enabled, test that with open_sync
+ */
+ #if PG_O_DIRECT != 0
+ printf(LABEL_FORMAT, "open_sync 8k direct I/O write");
+ fflush(stdout);
+ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
+ printf("\t(unavailable: o_direct on this filesystem)\n");
+ else
+ {
+ gettimeofday(&start_t, NULL);
+ for (i = 0; i < loops; i++)
+ {
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ gettimeofday(&stop_t, NULL);
+ close(tmpfile);
+ print_elapse(start_t, stop_t);
+ }
+ #else
+ printf("\t(unavailable: o_direct)\n");
+ #endif
+
#else
printf("\t(unavailable: open_sync)\n");
#endif
+ /*
+ * Test fdatasync if available
+ */
#ifdef HAVE_FDATASYNC
printf(LABEL_FORMAT, "8k write, fdatasync");
fflush(stdout);
*************** main(int argc, char *argv[])
*** 160,165 ****
--- 227,235 ----
printf("\t(unavailable: fdatasync)\n");
#endif
+ /*
+ * Test fsync
+ */
printf(LABEL_FORMAT, "8k write, fsync");
fflush(stdout);
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
*************** main(int argc, char *argv[])
*** 177,190 ****
gettimeofday(&stop_t, NULL);
close(tmpfile);
print_elapse(start_t, stop_t);
/*
! * Compare file sync methods with two 8k write
*/
printf("\nCompare file sync methods using two writes:\n");
#ifdef OPEN_DATASYNC_FLAG
! printf(LABEL_FORMAT, "2 open_datasync 8k writes");
fflush(stdout);
if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1)
die("Cannot open output file.");
--- 247,289 ----
gettimeofday(&stop_t, NULL);
close(tmpfile);
print_elapse(start_t, stop_t);
+
+ /*
+ * If fsync_writethrough is available, test as well
+ */
+ #ifdef HAVE_FSYNC_WRITETHROUGH
+ printf(LABEL_FORMAT, "8k write, fsync_writethrough");
+ fflush(stdout);
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("Cannot open output file.");
+ gettimeofday(&start_t, NULL);
+ for (i = 0; i < loops; i++)
+ {
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (fcntl(tmpfile, F_FULLFSYNC ) != 0)
+ die("fsync failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ gettimeofday(&stop_t, NULL);
+ close(tmpfile);
+ print_elapse(start_t, stop_t);
+ #else
+ printf("\t(unavailable: fsync_writethrough)\n");
+ #endif
/*
! * Compare some of the file sync methods with
! * two 8k writes to see if timing is different
*/
printf("\nCompare file sync methods using two writes:\n");
+ /*
+ * Test open_datasync with and without o_direct
+ */
#ifdef OPEN_DATASYNC_FLAG
! printf(LABEL_FORMAT, "2 open_datasync 8k writes");
fflush(stdout);
if ((tmpfile = open(filename, O_RDWR | O_DSYNC, 0)) == -1)
die("Cannot open output file.");
*************** main(int argc, char *argv[])
*** 201,210 ****
--- 300,335 ----
gettimeofday(&stop_t, NULL);
close(tmpfile);
print_elapse(start_t, stop_t);
+
+ #if PG_O_DIRECT != 0
+ printf(LABEL_FORMAT, "2 open_datasync direct I/O 8k writes");
+ fflush(stdout);
+ if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1)
+ die("Cannot open output file.");
+ gettimeofday(&start_t, NULL);
+ for (i = 0; i < loops; i++)
+ {
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ gettimeofday(&stop_t, NULL);
+ close(tmpfile);
+ print_elapse(start_t, stop_t);
+ #else
+ printf("\t(unavailable: o_direct)\n");
+ #endif
+
#else
printf("\t(unavailable: open_datasync)\n");
#endif
+ /*
+ * Test open_sync with and without o_direct
+ */
#ifdef OPEN_SYNC_FLAG
printf(LABEL_FORMAT, "2 open_sync 8k writes");
fflush(stdout);
*************** main(int argc, char *argv[])
*** 223,230 ****
--- 348,383 ----
gettimeofday(&stop_t, NULL);
close(tmpfile);
print_elapse(start_t, stop_t);
+
+ #if PG_O_DIRECT != 0
+ printf(LABEL_FORMAT, "2 open_sync direct I/O 8k writes");
+ fflush(stdout);
+ if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1)
+ die("Cannot open output file.");
+ gettimeofday(&start_t, NULL);
+ for (i = 0; i < loops; i++)
+ {
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ gettimeofday(&stop_t, NULL);
+ close(tmpfile);
+ print_elapse(start_t, stop_t);
+ #else
+ printf("\t(unavailable: o_direct)\n");
+ #endif
+
+ #else
+ printf("\t(unavailable: open_sync)\n");
#endif
+ /*
+ * Test fdatasync
+ */
#ifdef HAVE_FDATASYNC
printf(LABEL_FORMAT, "8k write, 8k write, fdatasync");
fflush(stdout);
*************** main(int argc, char *argv[])
*** 248,253 ****
--- 401,409 ----
printf("\t(unavailable: fdatasync)\n");
#endif
+ /*
+ * Test basic fsync
+ */
printf(LABEL_FORMAT, "8k write, 8k write, fsync");
fflush(stdout);
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
*************** main(int argc, char *argv[])
*** 267,278 ****
--- 423,466 ----
gettimeofday(&stop_t, NULL);
close(tmpfile);
print_elapse(start_t, stop_t);
+
+ /*
+ * Test fsync_writethrough if available
+ */
+ #ifdef HAVE_FSYNC_WRITETHROUGH
+ printf(LABEL_FORMAT, "8k write, 8k write, fsync_writethrough");
+ fflush(stdout);
+ if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
+ die("Cannot open output file.");
+ gettimeofday(&start_t, NULL);
+ for (i = 0; i < loops; i++)
+ {
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (write(tmpfile, buf, WRITE_SIZE) != WRITE_SIZE)
+ die("write failed");
+ if (fcntl(tmpfile, F_FULLFSYNC) != 0)
+ die("fsync failed");
+ if (lseek(tmpfile, 0, SEEK_SET) == -1)
+ die("seek failed");
+ }
+ gettimeofday(&stop_t, NULL);
+ close(tmpfile);
+ print_elapse(start_t, stop_t);
+ #else
+ printf("\t(unavailable: fsync_writethrough)\n");
+ #endif
/*
* Compare 1 to 2 writes
*/
printf("\nCompare open_sync with different sizes:\n");
+ printf("(This is designed to compare the cost of one large\n");
+ printf("sync'ed write and two smaller sync'ed writes.)\n");
+ /*
+ * Test open_sync with different size files
+ */
#ifdef OPEN_SYNC_FLAG
printf(LABEL_FORMAT, "open_sync 16k write");
fflush(stdout);
*************** main(int argc, char *argv[])
*** 312,323 ****
#endif
/*
! * Fsync another file descriptor?
*/
printf("\nTest if fsync on non-write file descriptor is honored:\n");
printf("(If the times are similar, fsync() can sync data written\n");
printf("on a different descriptor.)\n");
printf(LABEL_FORMAT, "8k write, fsync, close");
fflush(stdout);
gettimeofday(&start_t, NULL);
--- 500,519 ----
#endif
/*
! * Test whether fsync can sync data written on a different
! * descriptor for the same file. This checks the efficiency
! * of multi-process fsyncs against the same file.
! * Possibly this should be done with writethrough on platforms
! * which support it.
*/
printf("\nTest if fsync on non-write file descriptor is honored:\n");
printf("(If the times are similar, fsync() can sync data written\n");
printf("on a different descriptor.)\n");
+ /*
+ * first write, fsync and close, which is the
+ * normal behavior without multiple descriptors
+ */
printf(LABEL_FORMAT, "8k write, fsync, close");
fflush(stdout);
gettimeofday(&start_t, NULL);
*************** main(int argc, char *argv[])
*** 330,343 ****
if (fsync(tmpfile) != 0)
die("fsync failed");
close(tmpfile);
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("Cannot open output file.");
- /* do nothing but the open/close the tests are consistent. */
close(tmpfile);
}
gettimeofday(&stop_t, NULL);
print_elapse(start_t, stop_t);
printf(LABEL_FORMAT, "8k write, close, fsync");
fflush(stdout);
gettimeofday(&start_t, NULL);
--- 526,547 ----
if (fsync(tmpfile) != 0)
die("fsync failed");
close(tmpfile);
+ /*
+ * open and close the file again to be consistent
+ * with the following test
+ */
if ((tmpfile = open(filename, O_RDWR, 0)) == -1)
die("Cannot open output file.");
close(tmpfile);
}
gettimeofday(&stop_t, NULL);
print_elapse(start_t, stop_t);
+ /*
+ * Now open, write, close, open again and fsync
+ * This simulates processes fsyncing each other's
+ * writes.
+ */
printf(LABEL_FORMAT, "8k write, close, fsync");
fflush(stdout);
gettimeofday(&start_t, NULL);
*************** main(int argc, char *argv[])
*** 358,375 ****
gettimeofday(&stop_t, NULL);
print_elapse(start_t, stop_t);
! /* cleanup */
free(full_buf);
unlink(filename);
return 0;
}
void
print_elapse(struct timeval start_t, struct timeval stop_t)
{
double total_time = (stop_t.tv_sec - start_t.tv_sec) +
- /* usec subtraction might be negative, e.g. 5.4 - 4.8 */
(stop_t.tv_usec - start_t.tv_usec) * 0.000001;
double per_second = loops / total_time;
--- 562,583 ----
gettimeofday(&stop_t, NULL);
print_elapse(start_t, stop_t);
! /*
! * cleanup
! */
free(full_buf);
unlink(filename);
return 0;
}
+ /*
+ * print out the writes per second for tests
+ */
void
print_elapse(struct timeval start_t, struct timeval stop_t)
{
double total_time = (stop_t.tv_sec - start_t.tv_sec) +
(stop_t.tv_usec - start_t.tv_usec) * 0.000001;
double per_second = loops / total_time;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers