>From 931ab8fa7e9181f6b69601ad279e0ee5acb103d4 Mon Sep 17 00:00:00 2001
From: Takashi Menjo <takashi.menjou.vg@hco.ntt.co.jp>
Date: Wed, 24 Jun 2020 15:07:56 +0900
Subject: [PATCH v3 1/5] Support GUCs for external WAL buffer

To implement non-volatile WAL buffer, we add two new GUCs nvwal_path
and nvwal_size.  Now postgres maps a file at that path onto memory to
use it as WAL buffer.  Note that the buffer is still volatile for now.
---
 configure                                     | 262 ++++++++++++++++++
 configure.in                                  |  43 +++
 src/backend/access/transam/Makefile           |   3 +-
 src/backend/access/transam/nv_xlog_buffer.c   |  95 +++++++
 src/backend/access/transam/xlog.c             | 164 ++++++++++-
 src/backend/utils/misc/guc.c                  |  23 +-
 src/backend/utils/misc/postgresql.conf.sample |   2 +
 src/bin/initdb/initdb.c                       |  93 ++++++-
 src/include/access/nv_xlog_buffer.h           |  71 +++++
 src/include/access/xlog.h                     |   2 +
 src/include/pg_config.h.in                    |   6 +
 src/include/utils/guc.h                       |   4 +
 12 files changed, 747 insertions(+), 21 deletions(-)
 create mode 100644 src/backend/access/transam/nv_xlog_buffer.c
 create mode 100644 src/include/access/nv_xlog_buffer.h

diff --git a/configure b/configure
index 2feff37fe3..3f16feeb54 100755
--- a/configure
+++ b/configure
@@ -866,6 +866,7 @@ with_libxml
 with_libxslt
 with_system_tzdata
 with_zlib
+with_nvwal
 with_gnu_ld
 enable_largefile
 '
@@ -1570,6 +1571,7 @@ Optional Packages:
   --with-system-tzdata=DIR
                           use system time zone data in DIR
   --without-zlib          do not use Zlib
+  --with-nvwal            use non-volatile WAL buffer (NVWAL)
   --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
 
 Some influential environment variables:
@@ -8504,6 +8506,203 @@ fi
 
 
 
+#
+# Non-volatile WAL buffer (NVWAL)
+#
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with non-volatile WAL buffer (NVWAL)" >&5
+$as_echo_n "checking whether to build with non-volatile WAL buffer (NVWAL)... " >&6; }
+
+
+
+# Check whether --with-nvwal was given.
+if test "${with_nvwal+set}" = set; then :
+  withval=$with_nvwal;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_NVWAL 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-nvwal option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_nvwal=no
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_nvwal" >&5
+$as_echo "$with_nvwal" >&6; }
+
+#
+# Elf
+#
+
+# Assume system is ELF if it predefines __ELF__ as 1,
+# otherwise believe host_os based default.
+case $host_os in
+    freebsd1*|freebsd2*) elf=no;;
+    freebsd3*|freebsd4*) elf=yes;;
+esac
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#if __ELF__
+  yes
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "yes" >/dev/null 2>&1; then :
+  ELF_SYS=true
+else
+  if test "X$elf" = "Xyes" ; then
+  ELF_SYS=true
+else
+  ELF_SYS=
+fi
+fi
+rm -f conftest*
+
+
+
 #
 # Assignments
 #
@@ -12861,6 +13060,57 @@ fi
 fi
 
 
+# for non-volatile WAL buffer (NVWAL)
+if test "$with_nvwal" = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pmem_map_file in -lpmem" >&5
+$as_echo_n "checking for pmem_map_file in -lpmem... " >&6; }
+if ${ac_cv_lib_pmem_pmem_map_file+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpmem  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pmem_map_file ();
+int
+main ()
+{
+return pmem_map_file ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_pmem_pmem_map_file=yes
+else
+  ac_cv_lib_pmem_pmem_map_file=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pmem_pmem_map_file" >&5
+$as_echo "$ac_cv_lib_pmem_pmem_map_file" >&6; }
+if test "x$ac_cv_lib_pmem_pmem_map_file" = xyes; then :
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBPMEM 1
+_ACEOF
+
+  LIBS="-lpmem $LIBS"
+
+else
+  as_fn_error $? "library 'libpmem' is required for non-volatile WAL buffer (NVWAL)" "$LINENO" 5
+fi
+
+fi
+
 
 ##
 ## Header files
@@ -13540,6 +13790,18 @@ fi
 
 done
 
+fi
+
+# for non-volatile WAL buffer (NVWAL)
+if test "$with_nvwal" = yes ; then
+  ac_fn_c_check_header_mongrel "$LINENO" "libpmem.h" "ac_cv_header_libpmem_h" "$ac_includes_default"
+if test "x$ac_cv_header_libpmem_h" = xyes; then :
+
+else
+  as_fn_error $? "header file <libpmem.h> is required for non-volatile WAL buffer (NVWAL)" "$LINENO" 5
+fi
+
+
 fi
 
 if test "$PORTNAME" = "win32" ; then
diff --git a/configure.in b/configure.in
index 0188c6ff07..a5f9c9fb9d 100644
--- a/configure.in
+++ b/configure.in
@@ -992,6 +992,38 @@ PGAC_ARG_BOOL(with, zlib, yes,
               [do not use Zlib])
 AC_SUBST(with_zlib)
 
+#
+# Non-volatile WAL buffer (NVWAL)
+#
+AC_MSG_CHECKING([whether to build with non-volatile WAL buffer (NVWAL)])
+PGAC_ARG_BOOL(with, nvwal, no, [use non-volatile WAL buffer (NVWAL)],
+              [AC_DEFINE([USE_NVWAL], 1, [Define to 1 to use non-volatile WAL buffer (NVWAL). (--with-nvwal)])])
+AC_MSG_RESULT([$with_nvwal])
+
+#
+# Elf
+#
+
+# Assume system is ELF if it predefines __ELF__ as 1,
+# otherwise believe host_os based default.
+case $host_os in
+    freebsd1*|freebsd2*) elf=no;;
+    freebsd3*|freebsd4*) elf=yes;;
+esac
+
+AC_EGREP_CPP(yes,
+[#if __ELF__
+  yes
+#endif
+],
+[ELF_SYS=true],
+[if test "X$elf" = "Xyes" ; then
+  ELF_SYS=true
+else
+  ELF_SYS=
+fi])
+AC_SUBST(ELF_SYS)
+
 #
 # Assignments
 #
@@ -1293,6 +1325,12 @@ elif test "$with_uuid" = ossp ; then
 fi
 AC_SUBST(UUID_LIBS)
 
+# for non-volatile WAL buffer (NVWAL)
+if test "$with_nvwal" = yes; then
+  AC_CHECK_LIB(pmem, pmem_map_file, [],
+               [AC_MSG_ERROR([library 'libpmem' is required for non-volatile WAL buffer (NVWAL)])])
+fi
+
 
 ##
 ## Header files
@@ -1470,6 +1508,11 @@ elif test "$with_uuid" = ossp ; then
       [AC_MSG_ERROR([header file <ossp/uuid.h> or <uuid.h> is required for OSSP UUID])])])
 fi
 
+# for non-volatile WAL buffer (NVWAL)
+if test "$with_nvwal" = yes ; then
+  AC_CHECK_HEADER(libpmem.h, [], [AC_MSG_ERROR([header file <libpmem.h> is required for non-volatile WAL buffer (NVWAL)])])
+fi
+
 if test "$PORTNAME" = "win32" ; then
    AC_CHECK_HEADERS(crtdefs.h)
 fi
diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 595e02de72..b41a710e7e 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -32,7 +32,8 @@ OBJS = \
 	xlogfuncs.o \
 	xloginsert.o \
 	xlogreader.o \
-	xlogutils.o
+	xlogutils.o \
+	nv_xlog_buffer.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/access/transam/nv_xlog_buffer.c b/src/backend/access/transam/nv_xlog_buffer.c
new file mode 100644
index 0000000000..cfc6a6376b
--- /dev/null
+++ b/src/backend/access/transam/nv_xlog_buffer.c
@@ -0,0 +1,95 @@
+/*-------------------------------------------------------------------------
+ *
+ * nv_xlog_buffer.c
+ *		PostgreSQL non-volatile WAL buffer
+ *
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/nv_xlog_buffer.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#ifdef USE_NVWAL
+
+#include <libpmem.h>
+#include "access/nv_xlog_buffer.h"
+
+#include "miscadmin.h" /* IsBootstrapProcessingMode */
+#include "common/file_perm.h" /* pg_file_create_mode */
+
+/*
+ * Maps non-volatile WAL buffer on shared memory.
+ *
+ * Returns a mapped address if success; PANICs and never return otherwise.
+ */
+void *
+MapNonVolatileXLogBuffer(const char *fname, Size fsize)
+{
+	void	   *addr;
+	size_t		map_len = 0;
+	int			is_pmem = 0;
+
+	Assert(fname != NULL);
+	Assert(fsize > 0);
+
+	if (IsBootstrapProcessingMode())
+	{
+		/*
+		 * Create and map a new file if we are in bootstrap mode (typically
+		 * executed by initdb).
+		 */
+		addr = pmem_map_file(fname, fsize, PMEM_FILE_CREATE|PMEM_FILE_EXCL,
+							 pg_file_create_mode, &map_len, &is_pmem);
+	}
+	else
+	{
+		/*
+		 * Map an existing file.  The second argument (len) should be zero,
+		 * the third argument (flags) should have neither PMEM_FILE_CREATE nor
+		 * PMEM_FILE_EXCL, and the fourth argument (mode) will be ignored.
+		 */
+		addr = pmem_map_file(fname, 0, 0, 0, &map_len, &is_pmem);
+	}
+
+	if (addr == NULL)
+		elog(PANIC, "could not map non-volatile WAL buffer '%s': %m", fname);
+
+	if (map_len != fsize)
+		elog(PANIC, "size of non-volatile WAL buffer '%s' is invalid; "
+					"expected %zu; actual %zu",
+			 fname, fsize, map_len);
+
+	if (!is_pmem)
+		elog(PANIC, "non-volatile WAL buffer '%s' is not on persistent memory",
+			 fname);
+
+	/*
+	 * Assert page boundary alignment (8KiB as default).  It should pass because
+	 * PMDK considers hugepage boundary alignment (2MiB or 1GiB on x64).
+	 */
+	Assert((uint64) addr % XLOG_BLCKSZ == 0);
+
+	elog(LOG, "non-volatile WAL buffer '%s' is mapped on [%p-%p)",
+		 fname, addr, (char *) addr + map_len);
+	return addr;
+}
+
+void
+UnmapNonVolatileXLogBuffer(void *addr, Size fsize)
+{
+	Assert(addr != NULL);
+
+	if (pmem_unmap(addr, fsize) < 0)
+	{
+		elog(WARNING, "could not unmap non-volatile WAL buffer: %m");
+		return;
+	}
+
+	elog(LOG, "non-volatile WAL buffer unmapped");
+}
+
+#endif
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a1256a103b..0681ba1262 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -37,6 +37,7 @@
 #include "access/xloginsert.h"
 #include "access/xlogreader.h"
 #include "access/xlogutils.h"
+#include "access/nv_xlog_buffer.h"
 #include "catalog/catversion.h"
 #include "catalog/pg_control.h"
 #include "catalog/pg_database.h"
@@ -873,6 +874,12 @@ static bool InRedo = false;
 /* Have we launched bgwriter during recovery? */
 static bool bgwriterLaunched = false;
 
+/* For non-volatile WAL buffer (NVWAL) */
+char	   *NvwalPath = NULL;	/* a GUC parameter */
+int			NvwalSizeMB = 1024;	/* a direct GUC parameter */
+static Size	NvwalSize = 0;		/* an indirect GUC parameter */
+static bool	NvwalAvail = false;
+
 /* For WALInsertLockAcquire/Release functions */
 static int	MyLockNo = 0;
 static bool holdingAllLocks = false;
@@ -5014,6 +5021,76 @@ check_wal_buffers(int *newval, void **extra, GucSource source)
 	return true;
 }
 
+/*
+ * GUC check_hook for nvwal_path.
+ */
+bool
+check_nvwal_path(char **newval, void **extra, GucSource source)
+{
+#ifndef USE_NVWAL
+	Assert(!NvwalAvail);
+
+	if (**newval != '\0')
+	{
+		GUC_check_errcode(ERRCODE_INVALID_PARAMETER_VALUE);
+		GUC_check_errmsg("nvwal_path is invalid parameter without NVWAL");
+		return false;
+	}
+#endif
+
+	return true;
+}
+
+void
+assign_nvwal_path(const char *newval, void *extra)
+{
+	/* true if not empty; false if empty */
+	NvwalAvail = (bool) (*newval != '\0');
+}
+
+/*
+ * GUC check_hook for nvwal_size.
+ *
+ * It checks the boundary only and DOES NOT check if the size is multiple
+ * of wal_segment_size because the segment size (probably stored in the
+ * control file) have not been set properly here yet.
+ *
+ * See XLOGShmemSize for more validation.
+ */
+bool
+check_nvwal_size(int *newval, void **extra, GucSource source)
+{
+#ifdef USE_NVWAL
+	Size		buf_size;
+	int64		npages;
+
+	Assert(*newval > 0);
+
+	buf_size = (Size) (*newval) * 1024 * 1024;
+	npages = (int64) buf_size / XLOG_BLCKSZ;
+	Assert(npages > 0);
+
+	if (npages > INT_MAX)
+	{
+		/* XLOG_BLCKSZ could be so small that npages exceeds INT_MAX */
+		GUC_check_errcode(ERRCODE_INVALID_PARAMETER_VALUE);
+		GUC_check_errmsg("invalid value for nvwal_size (%dMB): "
+						 "the number of WAL pages too large; "
+						 "buf_size %zu; XLOG_BLCKSZ %d",
+						 *newval, buf_size, (int) XLOG_BLCKSZ);
+		return false;
+	}
+#endif
+
+	return true;
+}
+
+void
+assign_nvwal_size(int newval, void *extra)
+{
+	NvwalSize = (Size) newval * 1024 * 1024;
+}
+
 /*
  * Read the control file, set respective GUCs.
  *
@@ -5042,13 +5119,49 @@ XLOGShmemSize(void)
 {
 	Size		size;
 
+	/*
+	 * If we use non-volatile WAL buffer, we don't use the given wal_buffers.
+	 * Instead, we set it the value based on the size of the file for the
+	 * buffer. This should be done here because of xlblocks array calculation.
+	 */
+	if (NvwalAvail)
+	{
+		char		buf[32];
+		int64		npages;
+
+		Assert(NvwalSizeMB > 0);
+		Assert(NvwalSize > 0);
+		Assert(wal_segment_size > 0);
+		Assert(wal_segment_size % XLOG_BLCKSZ == 0);
+
+		/*
+		 * At last, we can check if the size of non-volatile WAL buffer
+		 * (nvwal_size) is multiple of WAL segment size.
+		 *
+		 * Note that NvwalSize has already been calculated in assign_nvwal_size.
+		 */
+		if (NvwalSize % wal_segment_size != 0)
+		{
+			elog(PANIC,
+				 "invalid value for nvwal_size (%dMB): "
+				 "it should be multiple of WAL segment size; "
+				 "NvwalSize %zu; wal_segment_size %d",
+				 NvwalSizeMB, NvwalSize, wal_segment_size);
+		}
+
+		npages = (int64) NvwalSize / XLOG_BLCKSZ;
+		Assert(npages > 0 && npages <= INT_MAX);
+
+		snprintf(buf, sizeof(buf), "%d", (int) npages);
+		SetConfigOption("wal_buffers", buf, PGC_POSTMASTER, PGC_S_OVERRIDE);
+	}
 	/*
 	 * If the value of wal_buffers is -1, use the preferred auto-tune value.
 	 * This isn't an amazingly clean place to do this, but we must wait till
 	 * NBuffers has received its final value, and must do it before using the
 	 * value of XLOGbuffers to do anything important.
 	 */
-	if (XLOGbuffers == -1)
+	else if (XLOGbuffers == -1)
 	{
 		char		buf[32];
 
@@ -5064,10 +5177,13 @@ XLOGShmemSize(void)
 	size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
 	/* xlblocks array */
 	size = add_size(size, mul_size(sizeof(XLogRecPtr), XLOGbuffers));
-	/* extra alignment padding for XLOG I/O buffers */
-	size = add_size(size, XLOG_BLCKSZ);
-	/* and the buffers themselves */
-	size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
+	if (!NvwalAvail)
+	{
+		/* extra alignment padding for XLOG I/O buffers */
+		size = add_size(size, XLOG_BLCKSZ);
+		/* and the buffers themselves */
+		size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
+	}
 
 	/*
 	 * Note: we don't count ControlFileData, it comes out of the "slop factor"
@@ -5161,13 +5277,32 @@ XLOGShmemInit(void)
 	}
 
 	/*
-	 * Align the start of the page buffers to a full xlog block size boundary.
-	 * This simplifies some calculations in XLOG insertion. It is also
-	 * required for O_DIRECT.
+	 * Open and memory-map a file for non-volatile XLOG buffer. The PMDK will
+	 * align the start of the buffer to 2-MiB boundary if the size of the
+	 * buffer is larger than or equal to 4 MiB.
 	 */
-	allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
-	XLogCtl->pages = allocptr;
-	memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
+	if (NvwalAvail)
+	{
+		/* Logging and error-handling should be done in the function */
+		XLogCtl->pages = MapNonVolatileXLogBuffer(NvwalPath, NvwalSize);
+
+		/*
+		 * Do not memset non-volatile XLOG buffer (XLogCtl->pages) here
+		 * because it would contain records for recovery. We should do so in
+		 * checkpoint after the recovery completes successfully.
+		 */
+	}
+	else
+	{
+		/*
+		 * Align the start of the page buffers to a full xlog block size
+		 * boundary. This simplifies some calculations in XLOG insertion. It
+		 * is also required for O_DIRECT.
+		 */
+		allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
+		XLogCtl->pages = allocptr;
+		memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
+	}
 
 	/*
 	 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
@@ -8522,6 +8657,13 @@ ShutdownXLOG(int code, Datum arg)
 
 		CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
 	}
+
+	/*
+	 * If we use non-volatile XLOG buffer, unmap it.
+	 */
+	if (NvwalAvail)
+		UnmapNonVolatileXLogBuffer(XLogCtl->pages, NvwalSize);
+
 	ShutdownCLOG();
 	ShutdownCommitTs();
 	ShutdownSUBTRANS();
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 75fc6f11d6..140a99faee 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2707,7 +2707,7 @@ static struct config_int ConfigureNamesInt[] =
 			GUC_UNIT_XBLOCKS
 		},
 		&XLOGbuffers,
-		-1, -1, (INT_MAX / XLOG_BLCKSZ),
+		-1, -1, INT_MAX,
 		check_wal_buffers, NULL, NULL
 	},
 
@@ -3381,6 +3381,17 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, assign_tcp_user_timeout, show_tcp_user_timeout
 	},
 
+	{
+		{"nvwal_size", PGC_POSTMASTER, WAL_SETTINGS,
+			gettext_noop("Size of non-volatile WAL buffer (NVWAL)."),
+			NULL,
+			GUC_UNIT_MB
+		},
+		&NvwalSizeMB,
+		1024, 1, INT_MAX,
+		check_nvwal_size, assign_nvwal_size, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
@@ -4419,6 +4430,16 @@ static struct config_string ConfigureNamesString[] =
 		check_backtrace_functions, assign_backtrace_functions, NULL
 	},
 
+	{
+		{"nvwal_path", PGC_POSTMASTER, WAL_SETTINGS,
+			gettext_noop("Path to file for non-volatile WAL buffer (NVWAL)."),
+			NULL
+		},
+		&NvwalPath,
+		"",
+		check_nvwal_path, assign_nvwal_path, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, NULL, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 3a25287a39..866f77828d 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -226,6 +226,8 @@
 #checkpoint_timeout = 5min		# range 30s-1d
 #max_wal_size = 1GB
 #min_wal_size = 80MB
+#nvwal_path = '/path/to/nvwal'
+#nvwal_size = 1GB
 #checkpoint_completion_target = 0.5	# checkpoint target duration, 0.0 - 1.0
 #checkpoint_flush_after = 0		# measured in pages, 0 disables
 #checkpoint_warning = 30s		# 0 disables
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 786672b1b6..1b18097580 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -144,7 +144,10 @@ static bool show_setting = false;
 static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
+static char *nvwal_path = NULL;
+static char *str_nvwal_size_mb = NULL;
 static int	wal_segment_size_mb;
+static int	nvwal_size_mb;
 
 
 /* internal vars */
@@ -1109,14 +1112,78 @@ setup_config(void)
 	conflines = replace_token(conflines, "#port = 5432", repltok);
 #endif
 
-	/* set default max_wal_size and min_wal_size */
-	snprintf(repltok, sizeof(repltok), "min_wal_size = %s",
-			 pretty_wal_size(DEFAULT_MIN_WAL_SEGS));
-	conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok);
+	if (nvwal_path != NULL)
+	{
+		int nr_segs;
+
+		if (str_nvwal_size_mb == NULL)
+			nvwal_size_mb = 1024;
+		else
+		{
+			char *endptr;
+
+			/* check that the argument is a number */
+			nvwal_size_mb = strtol(str_nvwal_size_mb, &endptr, 10);
+
+			/* verify that the size of non-volatile WAL buffer is valid */
+			if (endptr == str_nvwal_size_mb || *endptr != '\0')
+			{
+				pg_log_error("argument of --nvwal-size must be a number; "
+							 "str_nvwal_size_mb '%s'",
+							 str_nvwal_size_mb);
+				exit(1);
+			}
+			if (nvwal_size_mb <= 0)
+			{
+				pg_log_error("argument of --nvwal-size must be a positive number; "
+							 "str_nvwal_size_mb '%s'; nvwal_size_mb %d",
+							 str_nvwal_size_mb, nvwal_size_mb);
+				exit(1);
+			}
+			if (nvwal_size_mb % wal_segment_size_mb != 0)
+			{
+				pg_log_error("argument of --nvwal-size must be multiple of WAL segment size; "
+							 "str_nvwal_size_mb '%s'; nvwal_size_mb %d; wal_segment_size_mb %d",
+							 str_nvwal_size_mb, nvwal_size_mb, wal_segment_size_mb);
+				exit(1);
+			}
+		}
+
+		/*
+		 * XXX We set {min_,max_,nv}wal_size to the same value.  Note that
+		 * postgres might bootstrap and run if the three config does not have
+		 * the same value, but have not been tested yet.
+		 */
+		nr_segs = nvwal_size_mb / wal_segment_size_mb;
 
-	snprintf(repltok, sizeof(repltok), "max_wal_size = %s",
-			 pretty_wal_size(DEFAULT_MAX_WAL_SEGS));
-	conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok);
+		snprintf(repltok, sizeof(repltok), "min_wal_size = %s",
+				 pretty_wal_size(nr_segs));
+		conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok);
+
+		snprintf(repltok, sizeof(repltok), "max_wal_size = %s",
+				 pretty_wal_size(nr_segs));
+		conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok);
+
+		snprintf(repltok, sizeof(repltok), "nvwal_path = '%s'",
+				 nvwal_path);
+		conflines = replace_token(conflines,
+								  "#nvwal_path = '/path/to/nvwal'", repltok);
+
+		snprintf(repltok, sizeof(repltok), "nvwal_size = %s",
+				 pretty_wal_size(nr_segs));
+		conflines = replace_token(conflines, "#nvwal_size = 1GB", repltok);
+	}
+	else
+	{
+		/* set default max_wal_size and min_wal_size */
+		snprintf(repltok, sizeof(repltok), "min_wal_size = %s",
+				 pretty_wal_size(DEFAULT_MIN_WAL_SEGS));
+		conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok);
+
+		snprintf(repltok, sizeof(repltok), "max_wal_size = %s",
+				 pretty_wal_size(DEFAULT_MAX_WAL_SEGS));
+		conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok);
+	}
 
 	snprintf(repltok, sizeof(repltok), "lc_messages = '%s'",
 			 escape_quotes(lc_messages));
@@ -2321,6 +2388,8 @@ usage(const char *progname)
 	printf(_("  -W, --pwprompt            prompt for a password for the new superuser\n"));
 	printf(_("  -X, --waldir=WALDIR       location for the write-ahead log directory\n"));
 	printf(_("      --wal-segsize=SIZE    size of WAL segments, in megabytes\n"));
+	printf(_("  -P, --nvwal-path=FILE     path to file for non-volatile WAL buffer (NVWAL)\n"));
+	printf(_("  -Q, --nvwal-size=SIZE     size of NVWAL, in megabytes\n"));
 	printf(_("\nLess commonly used options:\n"));
 	printf(_("  -d, --debug               generate lots of debugging output\n"));
 	printf(_("  -k, --data-checksums      use data page checksums\n"));
@@ -2989,6 +3058,8 @@ main(int argc, char *argv[])
 		{"sync-only", no_argument, NULL, 'S'},
 		{"waldir", required_argument, NULL, 'X'},
 		{"wal-segsize", required_argument, NULL, 12},
+		{"nvwal-path", required_argument, NULL, 'P'},
+		{"nvwal-size", required_argument, NULL, 'Q'},
 		{"data-checksums", no_argument, NULL, 'k'},
 		{"allow-group-access", no_argument, NULL, 'g'},
 		{NULL, 0, NULL, 0}
@@ -3032,7 +3103,7 @@ main(int argc, char *argv[])
 
 	/* process command-line options */
 
-	while ((c = getopt_long(argc, argv, "dD:E:kL:nNU:WA:sST:X:g", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "dD:E:kL:nNU:WA:sST:X:P:Q:g", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -3126,6 +3197,12 @@ main(int argc, char *argv[])
 			case 12:
 				str_wal_segment_size_mb = pg_strdup(optarg);
 				break;
+			case 'P':
+				nvwal_path = pg_strdup(optarg);
+				break;
+			case 'Q':
+				str_nvwal_size_mb = pg_strdup(optarg);
+				break;
 			case 'g':
 				SetDataDirectoryCreatePerm(PG_DIR_MODE_GROUP);
 				break;
diff --git a/src/include/access/nv_xlog_buffer.h b/src/include/access/nv_xlog_buffer.h
new file mode 100644
index 0000000000..b58878c92b
--- /dev/null
+++ b/src/include/access/nv_xlog_buffer.h
@@ -0,0 +1,71 @@
+/*
+ * nv_xlog_buffer.h
+ *
+ * PostgreSQL non-volatile WAL buffer
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/nv_xlog_buffer.h
+ */
+#ifndef NV_XLOG_BUFFER_H
+#define NV_XLOG_BUFFER_H
+
+extern void *MapNonVolatileXLogBuffer(const char *fname, Size fsize);
+extern void	UnmapNonVolatileXLogBuffer(void *addr, Size fsize);
+
+#ifdef USE_NVWAL
+#include <libpmem.h>
+
+#define nv_memset_persist	pmem_memset_persist
+#define nv_memcpy_nodrain	pmem_memcpy_nodrain
+#define nv_flush			pmem_flush
+#define nv_drain			pmem_drain
+#define nv_persist			pmem_persist
+
+#else
+void *
+MapNonVolatileXLogBuffer(const char *fname, Size fsize)
+{
+	return NULL;
+}
+
+void
+UnmapNonVolatileXLogBuffer(void *addr, Size fsize)
+{
+	return;
+}
+
+static inline void *
+nv_memset_persist(void *pmemdest, int c, size_t len)
+{
+	return NULL;
+}
+
+static inline void *
+nv_memcpy_nodrain(void *pmemdest, const void *src,
+				  size_t len)
+{
+	return NULL;
+}
+
+static inline void
+nv_flush(void *pmemdest, size_t len)
+{
+	return;
+}
+
+static inline void
+nv_drain(void)
+{
+	return;
+}
+
+static inline void
+nv_persist(const void *addr, size_t len)
+{
+	return;
+}
+
+#endif							/* USE_NVWAL */
+#endif							/* NV_XLOG_BUFFER_H */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 347a38f57c..0a05e79524 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -131,6 +131,8 @@ extern int	recovery_min_apply_delay;
 extern char *PrimaryConnInfo;
 extern char *PrimarySlotName;
 extern bool wal_receiver_create_temp_slot;
+extern char *NvwalPath;
+extern int  NvwalSizeMB;
 
 /* indirectly set via GUC system */
 extern TransactionId recoveryTargetXid;
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index c199cd46d2..90d23b46d1 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -325,6 +325,9 @@
 /* Define to 1 if you have the `pam' library (-lpam). */
 #undef HAVE_LIBPAM
 
+/* Define to 1 if you have the `pmem' library (-lpmem). */
+#undef HAVE_LIBPMEM
+
 /* Define if you have a function readline library */
 #undef HAVE_LIBREADLINE
 
@@ -880,6 +883,9 @@
 /* Define to select named POSIX semaphores. */
 #undef USE_NAMED_POSIX_SEMAPHORES
 
+/* Define to 1 to use non-volatile WAL buffer (NVWAL). (--with-nvwal) */
+#undef USE_NVWAL
+
 /* Define to build with OpenSSL support. (--with-openssl) */
 #undef USE_OPENSSL
 
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 2819282181..d941a76d43 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -438,6 +438,10 @@ extern void assign_search_path(const char *newval, void *extra);
 
 /* in access/transam/xlog.c */
 extern bool check_wal_buffers(int *newval, void **extra, GucSource source);
+extern bool check_nvwal_path(char **newval, void **extra, GucSource source);
+extern void assign_nvwal_path(const char *newval, void *extra);
+extern bool check_nvwal_size(int *newval, void **extra, GucSource source);
+extern void assign_nvwal_size(int newval, void *extra);
 extern void assign_xlog_sync_method(int new_sync_method, void *extra);
 
 #endif							/* GUC_H */
-- 
2.17.1

