Forking thread "WAL logging problem in 9.4.3?" for this tangent:
On Mon, Dec 09, 2019 at 06:04:06PM +0900, Kyotaro Horiguchi wrote:
> I don't understand why mdclose checks for (v->mdfd_vfd >= 0) of open
> segment but anyway mdimmedsync is believing that that won't happen and
> I follow the assumption. (I suspect that the if condition in mdclose
> should be an assertion..)
That check helps when data_sync_retry=on and FileClose() raised an error in a
previous mdclose() invocation. However, the check is not sufficient to make
that case work; the attached test case (not for commit) gets an assertion
failure or SIGSEGV.
I am inclined to fix this by decrementing md_num_open_segs before modifying
md_seg_fds (second attachment). An alternative would be to call
_fdvec_resize() after every FileClose(), like mdtruncate() does; however, the
repalloc() overhead could be noticeable. (mdclose() is called much more
frequently than mdtruncate().)
Incidentally, _mdfd_openseg() has this:
if (segno <= reln->md_num_open_segs[forknum])
_fdvec_resize(reln, forknum, segno + 1);
That should be >=, not <=. If the less-than case happened, this would delete
the record of a vfd for a higher-numbered segno. There's no live bug, because
only segno == reln->md_num_open_segs[forknum] actually happens. I am inclined
to make an assertion of that and remove the condition:
Assert(segno == reln->md_num_open_segs[forknum]);
_fdvec_resize(reln, forknum, segno + 1);
diff --git a/configure b/configure
index 9de5037..a1f09e8 100755
--- a/configure
+++ b/configure
@@ -3725,7 +3725,7 @@ $as_echo "${segsize}GB" >&6; }
cat >>confdefs.h <<_ACEOF
-#define RELSEG_SIZE ${RELSEG_SIZE}
+#define RELSEG_SIZE 2
_ACEOF
diff --git a/configure.in b/configure.in
index 9c5e5e7..86f3604 100644
--- a/configure.in
+++ b/configure.in
@@ -289,7 +289,7 @@ RELSEG_SIZE=`expr '(' 1024 / ${blocksize} ')' '*'
${segsize} '*' 1024`
test $? -eq 0 || exit 1
AC_MSG_RESULT([${segsize}GB])
-AC_DEFINE_UNQUOTED([RELSEG_SIZE], ${RELSEG_SIZE}, [
+AC_DEFINE_UNQUOTED([RELSEG_SIZE], 2, [
RELSEG_SIZE is the maximum number of blocks allowed in one disk file.
Thus, the maximum size of a single file is RELSEG_SIZE * BLCKSZ;
relations bigger than that are divided into multiple files.
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f4ab19f..350ce7f 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -1741,6 +1741,8 @@ PathNameDeleteTemporaryFile(const char *path, bool
error_on_failure)
return true;
}
+char *fail_to_close_file;
+
/*
* close a file when done with it
*/
@@ -1751,8 +1753,11 @@ FileClose(File file)
Assert(FileIsValid(file));
- DO_DB(elog(LOG, "FileClose: %d (%s)",
- file, VfdCache[file].fileName));
+ elog(LOG, "FileClose: %d (%s)",
+ file, VfdCache[file].fileName);
+
+ if (strcmp(VfdCache[file].fileName, fail_to_close_file) == 0)
+ elog(ERROR, "failing to close %s, as requested",
fail_to_close_file);
vfdP = &VfdCache[file];
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8d951ce..c9f5f53 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3504,6 +3504,16 @@ static struct config_real ConfigureNamesReal[] =
static struct config_string ConfigureNamesString[] =
{
{
+ {"fail_to_close_file", PGC_SUSET, DEVELOPER_OPTIONS,
+ gettext_noop("Raise an error when attempt to close this
file."),
+ gettext_noop("This is a temporary GUC to demonstrate a
bug."),
+ GUC_NOT_IN_SAMPLE
+ },
+ &fail_to_close_file,
+ "",
+ NULL, NULL, NULL
+ },
+ {
{"archive_command", PGC_SIGHUP, WAL_ARCHIVING,
gettext_noop("Sets the shell command that will be
called to archive a WAL file."),
NULL
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 625fbc3..8c21d76 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -48,6 +48,7 @@ typedef int File;
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
+extern PGDLLIMPORT char *fail_to_close_file;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()
diff --git a/src/test/regress/sql/boolean.sql b/src/test/regress/sql/boolean.sql
index df61fa4..50a4550 100644
--- a/src/test/regress/sql/boolean.sql
+++ b/src/test/regress/sql/boolean.sql
@@ -2,6 +2,22 @@
-- BOOLEAN
--
+-- Row count must be large enough to create more than one segment. This value
+-- is enough at RELSEG_SIZE=2. That is unrealistic for production use, but it
+-- makes the test finish quickly, with low disk consumption.
+create table fileclose (c) as select * from generate_series(1, 2000);
+
+-- This closes segno==1, then fails the attempt to close segno==0. This
+-- leaves reln->md_num_open_segs[MAIN_FORKNUM]==2 despite
+-- reln->md_seg_fds[MAIN_FORKNUM][1]==-1; md.c can't cope with that.
+begin;
+select set_config('fail_to_close_file', pg_relation_filepath('fileclose'),
true);
+analyze fileclose;
+commit;
+
+analyze fileclose; -- TRAP: FailedAssertion("FileIsValid(file)", File:
"fd.c", Line: 2039)
+drop table fileclose;
+
--
-- sanity check - if this fails go insane!
--
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 82442db..02983cd 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -516,12 +516,9 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
{
MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1];
- /* if not closed already */
- if (v->mdfd_vfd >= 0)
- {
- FileClose(v->mdfd_vfd);
- v->mdfd_vfd = -1;
- }
+ FileClose(v->mdfd_vfd);
+ reln->md_num_open_segs[forknum] = nopensegs - 1;
+ v->mdfd_vfd = -1;
nopensegs--;
}