date:20200820

On Fri, 21 Aug 2020 at 12:39, Fujii Masao  wrote:
>
>
>
> On 2020/08/21 12:29, Masahiko Sawada wrote:
> > On Fri, 21 Aug 2020 at 11:18, Fujii Masao  
> > wrote:
> >>
> >>
> >>
> >> On 2020/08/21 10:58, Masahiko Sawada wrote:
> >>> Hi,
> >>>
> >>> I've attached the patch for $subject.
> >>>
> >>> s/replications lots/replication slots/
> >>
> >> Thanks for the patch!
> >>
> >> Also it's better to s/replications slots/replication slots/ ?
> >>
> >> --- a/src/backend/storage/ipc/procarray.c
> >> +++ b/src/backend/storage/ipc/procarray.c
> >> @@ -198,7 +198,7 @@ typedef struct ComputeXidHorizonsResult
> >>* be removed.
> >>*
> >>* This likely should only be needed to determine whether 
> >> pg_subtrans can
> >> -* be truncated. It currently includes the effects of replications 
> >> slots,
> >> +* be truncated. It currently includes the effects of replication 
> >> slots,
> >>* for historical reasons. But that could likely be changed.
> >>*/
> >>   TransactionId oldest_considered_running;
> >>
> >
> > Indeed. I agree with you.
>
> Thanks! So I pushed both fixes.
>

Thanks!

Regards,

-- 
Masahiko Sawadahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Re: Asymmetric partition-wise JOIN

2020-08-20 Thread Andrey V. Lepikhov


On 7/1/20 2:10 PM, Daniel Gustafsson wrote:

On 27 Dec 2019, at 08:34, Kohei KaiGai  wrote:



The attached v2 fixed the problem, and regression test finished correctly.


This patch no longer applies to HEAD, please submit an rebased version.
Marking the entry Waiting on Author in the meantime.

Rebased version of the patch on current master (d259afa736).

I rebased it because it is a base of my experimental feature than we 
don't break partitionwise join of a relation with foreign partition and 
a local relation if we have info that remote server has foreign table 
link to the local relation (by analogy with shippable extensions).


Maybe mark as 'Needs review'?

--
regards,
Andrey Lepikhov
Postgres Professional
>From 8dda8c4ba29ed4b2a54f66746ebedd9ab0bfded9 Mon Sep 17 00:00:00 2001
From: Andrey Lepikhov 
Date: Fri, 21 Aug 2020 10:38:59 +0500
Subject: [PATCH] Add one more planner strategy to JOIN with a partitioned
 relation.

Try to join inner relation with each partition of outer relation
and append results.
This strategy has potential benefits because it allows partitionwise
join with an unpartitioned relation or with a relation that is
partitioned by another schema.
---
 src/backend/optimizer/path/allpaths.c|   9 +-
 src/backend/optimizer/path/joinpath.c|   9 ++
 src/backend/optimizer/path/joinrels.c| 132 +
 src/backend/optimizer/plan/planner.c |   6 +-
 src/backend/optimizer/util/appendinfo.c  |  18 ++-
 src/backend/optimizer/util/relnode.c |  14 +-
 src/include/optimizer/paths.h|  10 +-
 src/test/regress/expected/partition_join.out | 145 +++
 src/test/regress/sql/partition_join.sql  |  63 
 9 files changed, 385 insertions(+), 21 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 6da0dcd61c..4f110c5a2f 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -1278,7 +1278,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	}
 
 	/* Add paths to the append relation. */
-	add_paths_to_append_rel(root, rel, live_childrels);
+	add_paths_to_append_rel(root, rel, live_childrels, NIL);
 }
 
 
@@ -1295,7 +1295,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
  */
 void
 add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
-		List *live_childrels)
+		List *live_childrels,
+		List *original_partitioned_rels)
 {
 	List	   *subpaths = NIL;
 	bool		subpaths_valid = true;
@@ -1307,7 +1308,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 	List	   *all_child_pathkeys = NIL;
 	List	   *all_child_outers = NIL;
 	ListCell   *l;
-	List	   *partitioned_rels = NIL;
+	List	   *partitioned_rels = original_partitioned_rels;
 	double		partial_rows = -1;
 
 	/* If appropriate, consider parallel append */
@@ -3950,7 +3951,7 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
 	}
 
 	/* Build additional paths for this rel from child-join paths. */
-	add_paths_to_append_rel(root, rel, live_children);
+	add_paths_to_append_rel(root, rel, live_children, NIL);
 	list_free(live_children);
 }
 
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index db54a6ba2e..36464e31aa 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -324,6 +324,15 @@ add_paths_to_joinrel(PlannerInfo *root,
 	if (set_join_pathlist_hook)
 		set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
 			   jointype, &extra);
+
+	/*
+	 * 7. If outer relation is delivered from partition-tables, consider
+	 * distributing inner relation to every partition-leaf prior to
+	 * append these leafs.
+	 */
+	try_asymmetric_partitionwise_join(root, joinrel,
+	  outerrel, innerrel,
+	  jointype, &extra);
 }
 
 /*
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 2d343cd293..4a7d0d0604 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -16,6 +16,7 @@
 
 #include "miscadmin.h"
 #include "optimizer/appendinfo.h"
+#include "optimizer/cost.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
@@ -1551,6 +1552,137 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 	}
 }
 
+static List *
+extract_asymmetric_partitionwise_subjoin(PlannerInfo *root,
+		 RelOptInfo *joinrel,
+		 AppendPath *append_path,
+		 RelOptInfo *inner_rel,
+		 JoinType jointype,
+		 JoinPathExtraData *extra)
+{
+	List		*result = NIL;
+	ListCell	*lc;
+
+	foreach (lc, append_path->subpaths)
+	{
+		Path			*child_path = lfirst(lc);
+		RelOptInfo		*child_rel = child_path->parent;
+		Relids			child_join_relids;
+		RelOptInfo		*child_join_rel;
+		SpecialJoinInfo	*child_sjinfo;
+		List			*child_restrictlist;
+		AppendRel

Re: [PG13] Planning (time + buffers) data structure in explain plan (format text)

2020-08-20 Thread Pierre Giraud

Le 20/08/2020 à 17:41, Fujii Masao a écrit :
> 
> 
> On 2020/08/20 22:34, Julien Rouhaud wrote:
>> On Thu, Aug 20, 2020 at 12:29 PM David Rowley 
>> wrote:
>>>
>>> On Thu, 20 Aug 2020 at 19:58, Fujii Masao
>>>  wrote:

 On 2020/08/20 13:00, David Rowley wrote:
> I forgot to mention earlier, you'll also need to remove the part in
> the docs that mentions BUFFERS requires ANALYZE.

 Thanks for the review! I removed that.
 Attached is the updated version of the patch.
 I also added the regression test performing "explain (buffers on)"
 without "analyze" option.
>>>
>>> Looks good to me.
>>
>> Looks good to me too.
> 
> David and Julien, thanks for the review! I'd like to wait for
> Pierre's opinion about this change before committing the patch.
> 
> Pierre,
> could you share your opinion about this change?

It looks good to me too. Thanks a lot!
Let's not forget to notify Hubert (depesz) once integrated.

> 
> Regards,
>

Re: PATCH: logical_work_mem and logical streaming of large in-progress transactions

2020-08-20 Thread Dilip Kumar

On Fri, Aug 21, 2020 at 10:20 AM Amit Kapila  wrote:
>
> On Fri, Aug 21, 2020 at 9:14 AM Amit Kapila  wrote:
> >
> > On Thu, Aug 20, 2020 at 5:42 PM Dilip Kumar  wrote:
> > >
> > > On Thu, Aug 20, 2020 at 2:30 PM Amit Kapila  
> > > wrote:
> > > >
> > > >
> > > > Right, I think this can happen if one has changed those by BufFileSeek
> > > > before doing truncate. We should fix that case as well.
> > >
> > > Right.
> > >
> > > > >  I will work on those along with your other comments and
> > > > > submit the updated patch.
> > >
> > > I have fixed this in the attached patch along with your other
> > > comments.  I have also attached a contrib module that is just used for
> > > testing the truncate API.
> > >
> >
> > Few comments:
> > ==
> > +void
> > +BufFileTruncateShared(BufFile *file, int fileno, off_t offset)
> > {
> > ..
> > + if ((i != fileno || offset == 0) && i != 0)
> > + {
> > + SharedSegmentName(segment_name, file->name, i);
> > + FileClose(file->files[i]);
> > + if (!SharedFileSetDelete(file->fileset, segment_name, true))
> > + ereport(ERROR,
> > + (errcode_for_file_access(),
> > + errmsg("could not delete shared fileset \"%s\": %m",
> > + segment_name)));
> > + numFiles--;
> > + newOffset = MAX_PHYSICAL_FILESIZE;
> > +
> > + if (i == fileno)
> > + newFile--;
> > + }
> >
> > Here, shouldn't it be i <= fileno? Because we need to move back the
> > curFile up to newFile whenever curFile is greater than newFile
> >
>
> I think now I have understood why you have added this condition but
> probably a comment on the lines "This is required to indicate that we
> have removed the given fileno" would be better for future readers.

Okay.


-- 
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

Re: PATCH: logical_work_mem and logical streaming of large in-progress transactions

2020-08-20 Thread Dilip Kumar

On Fri, Aug 21, 2020 at 9:14 AM Amit Kapila  wrote:
>
> On Thu, Aug 20, 2020 at 5:42 PM Dilip Kumar  wrote:
> >
> > On Thu, Aug 20, 2020 at 2:30 PM Amit Kapila  wrote:
> > >
> > >
> > > Right, I think this can happen if one has changed those by BufFileSeek
> > > before doing truncate. We should fix that case as well.
> >
> > Right.
> >
> > > >  I will work on those along with your other comments and
> > > > submit the updated patch.
> >
> > I have fixed this in the attached patch along with your other
> > comments.  I have also attached a contrib module that is just used for
> > testing the truncate API.
> >
>
> Few comments:
> ==
> +void
> +BufFileTruncateShared(BufFile *file, int fileno, off_t offset)
> {
> ..
> + if ((i != fileno || offset == 0) && i != 0)
> + {
> + SharedSegmentName(segment_name, file->name, i);
> + FileClose(file->files[i]);
> + if (!SharedFileSetDelete(file->fileset, segment_name, true))
> + ereport(ERROR,
> + (errcode_for_file_access(),
> + errmsg("could not delete shared fileset \"%s\": %m",
> + segment_name)));
> + numFiles--;
> + newOffset = MAX_PHYSICAL_FILESIZE;
> +
> + if (i == fileno)
> + newFile--;
> + }
>
> Here, shouldn't it be i <= fileno? Because we need to move back the
> curFile up to newFile whenever curFile is greater than newFile

+/* Loop over all the files upto the fileno which we want to truncate. */
+for (i = file->numFiles - 1; i >= fileno; i--)

Because the above loop is up to the fileno, so I feel there is no
point of that check or any assert.

> 2.
> + /*
> + * If the new location is smaller then the current location in file then
> + * we need to set the curFile and the curOffset to the new values and also
> + * reset the pos and nbytes.  Otherwise nothing to do.
> + */
> + else if ((newFile < file->curFile) ||
> + newOffset < file->curOffset + file->pos)
> + {
> + file->curFile = newFile;
> + file->curOffset = newOffset;
> + file->pos = 0;
> + file->nbytes = 0;
> + }
>
> Shouldn't there be && instead of || because if newFile is greater than
> curFile then there is no meaning to update it?

I think this condition is wrong it should be,

else if ((newFile < file->curFile) || ((newFile == file->curFile) &&
(newOffset < file->curOffset + file->pos)

Basically, either new file is smaller otherwise if it is the same
then-new offset should be smaller.

-- 
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

Re: Libpq support to connect to standby server as priority

2020-08-20 Thread Greg Nancarrow

Hi Peter,

Thanks for the further review, an updated patch is attached. Please
see my responses to your comments below:

On Thu, Aug 20, 2020 at 11:36 AM Peter Smith  wrote:
>

>
> COMMENT (help text)
>
> The help text is probably accurate but it does seem a bit confusing still.
>
> ...
>
> IMO if there was some up-front paragraphs to say how different
> versions calculate the r/w support and recovery mode, then all the
> different parameter values can be expressed in a much simpler way and
> have less repetition (e.g they can all look like the "read-only" one
> does now).
>

GN RESPONSE:
I have updated the documentation, taking this view into account.

> 
>
> COMMENT fe-connect.c (sizeof)
>
> - "Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
> + "Target-Session-Attrs", "", 15, /* sizeof("prefer-standby") = 15 */
>
> You said changing this 15 to 17 is debatable. So I will debate it.
>
> IIUC the dispsize is defined as /* Field size in characters for dialog */
> I imagine this could be used as potential max character length of a
> text input field.
>
> Therefore, so long as "prefer-secondary" remains a valid value for
> target_session_attrs then I think dispsize ought to be 17 (not 15) to
> accommodate it.
> Otherwise setting to 15 may be preventing dialog entry of this
> perfectly valid (albeit uncommon) value.
>

GN RESPONSE:
My initial reasoning was that even though "prefer-secondary" is a
valid value, a GUI for target_session_attrs probably wouldn't present
that option, it would present "prefer-standby" instead (I was
imagining a drop-down menu, and it certainly wouldn't present both
"prefer-standby" and "prefer-secondary", as they are synonyms). If the
GUI did want to present the PGJDBC-compatible option values, it should
be looking at the dispsize for "Target-Server-Type" (which is 17, for
"prefer-secondary").
However, I guess there could be a number of ways to specify the option
value, even explicitly typing it into a textbox in the "database
connection dialog" that uses this information.
So in that case, I've updated the code, as you suggested, to use
dispsize=17 (for "prefer-secondary") in this case.

> 
>
> COMMENT (typo)
>
> + /*
> + * Type of server to connect to. Possible values: "any", "primary",
> + * "prefer-secondary", "secondary" This overrides any connection type
> + * specified by target_session_attrs. This option supports a subset of the
>
> Missing period before "This overrides"
>

GN RESPONSE: Fixed.

> 
>
> COMMENT (comment)
>
> + /*
> + * Type of server to connect to. Possible values: "any", "primary",
> + * "prefer-secondary", "secondary" This overrides any connection type
> + * specified by target_session_attrs. This option supports a subset of the
> + * target_session_attrs option values, and its purpose is to closely
> + * reflect the similar PGJDBC targetServerType option. Note also that this
> + * option only accepts single option values, whereas in future,
> + * target_session_attrs may accept multiple session attribute values.
> + */
> + char*target_server_type;
>
> Perhaps the part saying "... in future, target_session_attrs may
> accept multiple session attribute values." more rightly belongs as a
> comment for the *target_session_attrs field.
>

GN RESPONSE:
I can't really compare and contrast the two parameters without
mentioning "target_session_attrs" here.
"target_session_attrs" implies the possibility of multiple attributes.
If the difference between the attributes is provided in separate bits
of information for each attribute, the reader may not pick up on this
subtle difference between them.

> 
>
> COMMENT (comments)
>
> @@ -436,6 +486,8 @@ struct pg_conn
>   pgParameterStatus *pstatus; /* ParameterStatus data */
>   int client_encoding; /* encoding id */
>   bool std_strings; /* standard_conforming_strings */
> + bool transaction_read_only; /* transaction_read_only */
> + bool in_recovery; /* in_recovery */
>
> Just repeating the field name does not make for a very useful comment.
> Can it be improved?
>

GN RESPONSE: Yes, improved.

>
> COMMENT (blank line removal?)
>
> @@ -540,7 +592,6 @@ struct pg_cancel
>   int be_key; /* key of backend --- needed for cancels */
>  };
>
> -
>
> Removal of this blank line is cleanup in some place unrelated to this patch.
>

GN RESPONSE:
Blank line put back - but this appears to be because pg_indent was NOT
previously run on this code prior to me running it.

>
> COMMENT (typo in test comment)
>
> +# Connect to standby1 in "prefer-ssecondary" mode with standby1,primary list.
> +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1,
> + "prefer-secondary", 0);
> +
>
> Typo: "prefer-ssecondary"
>

GN RESPONSE: Fixed.

>
> COMMENT (fe-connect.c - suggest if/else instead of if/if)
>
> + /*
> + * For servers before 7.4 (which don't support read-only), if
> + * the requested type of connection is prefer-standby, then
> + * record this host index and try other specified hosts

Re: PATCH: logical_work_mem and logical streaming of large in-progress transactions

On Fri, Aug 21, 2020 at 9:14 AM Amit Kapila  wrote:
>
> On Thu, Aug 20, 2020 at 5:42 PM Dilip Kumar  wrote:
> >
> > On Thu, Aug 20, 2020 at 2:30 PM Amit Kapila  wrote:
> > >
> > >
> > > Right, I think this can happen if one has changed those by BufFileSeek
> > > before doing truncate. We should fix that case as well.
> >
> > Right.
> >
> > > >  I will work on those along with your other comments and
> > > > submit the updated patch.
> >
> > I have fixed this in the attached patch along with your other
> > comments.  I have also attached a contrib module that is just used for
> > testing the truncate API.
> >
>
> Few comments:
> ==
> +void
> +BufFileTruncateShared(BufFile *file, int fileno, off_t offset)
> {
> ..
> + if ((i != fileno || offset == 0) && i != 0)
> + {
> + SharedSegmentName(segment_name, file->name, i);
> + FileClose(file->files[i]);
> + if (!SharedFileSetDelete(file->fileset, segment_name, true))
> + ereport(ERROR,
> + (errcode_for_file_access(),
> + errmsg("could not delete shared fileset \"%s\": %m",
> + segment_name)));
> + numFiles--;
> + newOffset = MAX_PHYSICAL_FILESIZE;
> +
> + if (i == fileno)
> + newFile--;
> + }
>
> Here, shouldn't it be i <= fileno? Because we need to move back the
> curFile up to newFile whenever curFile is greater than newFile
>

I think now I have understood why you have added this condition but
probably a comment on the lines "This is required to indicate that we
have removed the given fileno" would be better for future readers.

-- 
With Regards,
Amit Kapila.

Re: PATCH: logical_work_mem and logical streaming of large in-progress transactions

On Fri, Aug 21, 2020 at 9:14 AM Amit Kapila  wrote:
>
> On Thu, Aug 20, 2020 at 5:42 PM Dilip Kumar  wrote:
> >
> > On Thu, Aug 20, 2020 at 2:30 PM Amit Kapila  wrote:
> > >
> > >
> > > Right, I think this can happen if one has changed those by BufFileSeek
> > > before doing truncate. We should fix that case as well.
> >
> > Right.
> >
> > > >  I will work on those along with your other comments and
> > > > submit the updated patch.
> >
> > I have fixed this in the attached patch along with your other
> > comments.  I have also attached a contrib module that is just used for
> > testing the truncate API.
> >
>
> Few comments:
> ==
> +void
> +BufFileTruncateShared(BufFile *file, int fileno, off_t offset)
> {
> ..
> + if ((i != fileno || offset == 0) && i != 0)
> + {
> + SharedSegmentName(segment_name, file->name, i);
> + FileClose(file->files[i]);
> + if (!SharedFileSetDelete(file->fileset, segment_name, true))
> + ereport(ERROR,
> + (errcode_for_file_access(),
> + errmsg("could not delete shared fileset \"%s\": %m",
> + segment_name)));
> + numFiles--;
> + newOffset = MAX_PHYSICAL_FILESIZE;
> +
> + if (i == fileno)
> + newFile--;
> + }
>
> Here, shouldn't it be i <= fileno? Because we need to move back the
> curFile up to newFile whenever curFile is greater than newFile
>
> 2.
> + /*
> + * If the new location is smaller then the current location in file then
> + * we need to set the curFile and the curOffset to the new values and also
> + * reset the pos and nbytes.  Otherwise nothing to do.
> + */
> + else if ((newFile < file->curFile) ||
> + newOffset < file->curOffset + file->pos)
> + {
> + file->curFile = newFile;
> + file->curOffset = newOffset;
> + file->pos = 0;
> + file->nbytes = 0;
> + }
>
> Shouldn't there be && instead of || because if newFile is greater than
> curFile then there is no meaning to update it?
>

Wait, actually, it is not clear to me which case second condition
(newOffset < file->curOffset + file->pos) is trying to cover, so I
can't recommend anything for this. Can you please explain to me why
you have added the second condition in the above check?


-- 
With Regards,
Amit Kapila.

Re: PATCH: logical_work_mem and logical streaming of large in-progress transactions

On Thu, Aug 20, 2020 at 5:42 PM Dilip Kumar  wrote:
>
> On Thu, Aug 20, 2020 at 2:30 PM Amit Kapila  wrote:
> >
> >
> > Right, I think this can happen if one has changed those by BufFileSeek
> > before doing truncate. We should fix that case as well.
>
> Right.
>
> > >  I will work on those along with your other comments and
> > > submit the updated patch.
>
> I have fixed this in the attached patch along with your other
> comments.  I have also attached a contrib module that is just used for
> testing the truncate API.
>

Few comments:
==
+void
+BufFileTruncateShared(BufFile *file, int fileno, off_t offset)
{
..
+ if ((i != fileno || offset == 0) && i != 0)
+ {
+ SharedSegmentName(segment_name, file->name, i);
+ FileClose(file->files[i]);
+ if (!SharedFileSetDelete(file->fileset, segment_name, true))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not delete shared fileset \"%s\": %m",
+ segment_name)));
+ numFiles--;
+ newOffset = MAX_PHYSICAL_FILESIZE;
+
+ if (i == fileno)
+ newFile--;
+ }

Here, shouldn't it be i <= fileno? Because we need to move back the
curFile up to newFile whenever curFile is greater than newFile

2.
+ /*
+ * If the new location is smaller then the current location in file then
+ * we need to set the curFile and the curOffset to the new values and also
+ * reset the pos and nbytes.  Otherwise nothing to do.
+ */
+ else if ((newFile < file->curFile) ||
+ newOffset < file->curOffset + file->pos)
+ {
+ file->curFile = newFile;
+ file->curOffset = newOffset;
+ file->pos = 0;
+ file->nbytes = 0;
+ }

Shouldn't there be && instead of || because if newFile is greater than
curFile then there is no meaning to update it?

-- 
With Regards,
Amit Kapila.

Re: Fix typo in procarrary.c





On 2020/08/21 12:29, Masahiko Sawada wrote:

On Fri, 21 Aug 2020 at 11:18, Fujii Masao  wrote:




On 2020/08/21 10:58, Masahiko Sawada wrote:

Hi,

I've attached the patch for $subject.

s/replications lots/replication slots/


Thanks for the patch!

Also it's better to s/replications slots/replication slots/ ?

--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -198,7 +198,7 @@ typedef struct ComputeXidHorizonsResult
   * be removed.
   *
   * This likely should only be needed to determine whether pg_subtrans 
can
-* be truncated. It currently includes the effects of replications 
slots,
+* be truncated. It currently includes the effects of replication slots,
   * for historical reasons. But that could likely be changed.
   */
  TransactionId oldest_considered_running;



Indeed. I agree with you.


Thanks! So I pushed both fixes.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Re: Fix typo in procarrary.c

On Fri, 21 Aug 2020 at 11:18, Fujii Masao  wrote:
>
>
>
> On 2020/08/21 10:58, Masahiko Sawada wrote:
> > Hi,
> >
> > I've attached the patch for $subject.
> >
> > s/replications lots/replication slots/
>
> Thanks for the patch!
>
> Also it's better to s/replications slots/replication slots/ ?
>
> --- a/src/backend/storage/ipc/procarray.c
> +++ b/src/backend/storage/ipc/procarray.c
> @@ -198,7 +198,7 @@ typedef struct ComputeXidHorizonsResult
>   * be removed.
>   *
>   * This likely should only be needed to determine whether 
> pg_subtrans can
> -* be truncated. It currently includes the effects of replications 
> slots,
> +* be truncated. It currently includes the effects of replication 
> slots,
>   * for historical reasons. But that could likely be changed.
>   */
>  TransactionId oldest_considered_running;
>

Indeed. I agree with you.

Regards,

-- 
Masahiko Sawadahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Re: New statistics for tuning WAL buffer size





On 2020/08/21 12:08, tsunakawa.ta...@fujitsu.com wrote:

From: Fujii Masao 

Just idea; it may be worth exposing the number of when new WAL file is
created and zero-filled. This initialization may have impact on
the performance of write-heavy workload generating lots of WAL. If this
number is reported high, to reduce the number of this initialization,
we can tune WAL-related parameters so that more "recycled" WAL files
can be hold.


Sounds good.  Actually, I want to know how much those zeroing affected the 
transaction response times, but it may be the target of the wait event 
statistics that Imai-san is addressing.


Maybe, so I'm ok if the first pg_stat_walwriter patch doesn't expose
this number. We can extend it to include that later after we confirm
that number is really useful.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

RE: New statistics for tuning WAL buffer size

2020-08-20 Thread tsunakawa.ta...@fujitsu.com

From: Fujii Masao 
> Just idea; it may be worth exposing the number of when new WAL file is
> created and zero-filled. This initialization may have impact on
> the performance of write-heavy workload generating lots of WAL. If this
> number is reported high, to reduce the number of this initialization,
> we can tune WAL-related parameters so that more "recycled" WAL files
> can be hold.

Sounds good.  Actually, I want to know how much those zeroing affected the 
transaction response times, but it may be the target of the wait event 
statistics that Imai-san is addressing.

(I wonder how the fallocate() patch went that tries to minimize the zeroing 
time.)


Regards
Takayuki Tsunakawa

RE: New statistics for tuning WAL buffer size

2020-08-20 Thread tsunakawa.ta...@fujitsu.com

From: Fujii Masao 
> I agree to expose the number of WAL write caused by full of WAL buffers.
> It's helpful when tuning wal_buffers size. Haribabu separated that number
> into two fields in his patch; one is the number of WAL write by backend,
> and another is by background processes and workers. But I'm not sure
> how useful such separation is. I'm ok with just one field for that number.

I agree with you.  I don't think we need to separate the numbers for foreground 
processes and background ones.  WAL buffer is a single resource.  So "Writes 
due to full WAL buffer are happening.  We may be able to boost performance by 
increasing wal_buffers" would be enough.


Regards
Takayuki Tsunakawa

Re: Creating foreign key on partitioned table is too slow

2020-08-20 Thread Amit Langote

On Thu, Aug 20, 2020 at 10:50 AM Amit Langote  wrote:
 On Thu, Aug 20, 2020 at 3:06 AM Alvaro Herrera
 wrote:
> > On 2020-Aug-19, Amit Langote wrote:
> > > On Thu, Aug 6, 2020 at 4:25 PM kato-...@fujitsu.com
> > >  wrote:
> > > > On Wednesday, August 5, 2020 9:43 AM I wrote:
> > > > > I'll report the result before the end of August .
> > > >
> > > > I test v2-0001-build-partdesc-memcxt.patch at 9a9db08ae4 and it is ok.
>
> Fwiw, I am fine with applying the memory-leak fix in all branches down
> to v12 if we are satisfied with the implementation.

I have revised the above patch slightly to introduce a variable for
the condition whether to use a temporary memory context.

-- 
Amit Langote
EnterpriseDB: http://www.enterprisedb.com


v3-0001-build-partdesc-memcxt.patch
Description: Binary data

Re: Fix typo in procarrary.c





On 2020/08/21 10:58, Masahiko Sawada wrote:

Hi,

I've attached the patch for $subject.

s/replications lots/replication slots/


Thanks for the patch!

Also it's better to s/replications slots/replication slots/ ?

--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -198,7 +198,7 @@ typedef struct ComputeXidHorizonsResult
 * be removed.
 *
 * This likely should only be needed to determine whether pg_subtrans 
can
-* be truncated. It currently includes the effects of replications 
slots,
+* be truncated. It currently includes the effects of replication slots,
 * for historical reasons. But that could likely be changed.
 */
TransactionId oldest_considered_running;

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Fix typo in procarrary.c

Hi,

I've attached the patch for $subject.

s/replications lots/replication slots/

Regards,

-- 
Masahiko Sawadahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


fix_typo.patch
Description: Binary data

Re: SyncRepLock acquired exclusively in default configuration

On Wed, 19 Aug 2020 at 21:41, Fujii Masao  wrote:
>
>
>
> On 2020/08/12 15:32, Masahiko Sawada wrote:
> > On Wed, 12 Aug 2020 at 14:06, Asim Praveen  wrote:
> >>
> >>
> >>
> >>> On 11-Aug-2020, at 8:57 PM, Robert Haas  wrote:
> >>>
> >>> I think this gets to the root of the issue. If we check the flag
> >>> without a lock, we might see a slightly stale value. But, considering
> >>> that there's no particular amount of time within which configuration
> >>> changes are guaranteed to take effect, maybe that's OK. However, there
> >>> is one potential gotcha here: if the walsender declares the standby to
> >>> be synchronous, a user can see that, right? So maybe there's this
> >>> problem: a user sees that the standby is synchronous and expects a
> >>> transaction committing afterward to provoke a wait, but really it
> >>> doesn't. Now the user is unhappy, feeling that the system didn't
> >>> perform according to expectations.
> >>
> >> Yes, pg_stat_replication reports a standby in sync as soon as walsender 
> >> updates priority of the standby to something other than 0.
> >>
> >> The potential gotcha referred above doesn’t seem too severe.  What is the 
> >> likelihood of someone setting synchronous_standby_names GUC with either 
> >> “*” or a standby name and then immediately promoting that standby?  If the 
> >> standby is promoted before the checkpointer on master gets a chance to 
> >> update sync_standbys_defined in shared memory, commits made during this 
> >> interval on master may not make it to standby.  Upon promotion, those 
> >> commits may be lost.
> >
> > I think that if the standby is quite behind the primary and in case of
> > the primary crashes, the likelihood of losing commits might get
> > higher. The user can see the standby became synchronous standby via
> > pg_stat_replication but commit completes without a wait because the
> > checkpointer doesn't update sync_standbys_defined yet. If the primary
> > crashes before standby catching up and the user does failover, the
> > committed transaction will be lost, even though the user expects that
> > transaction commit has been replicated to the standby synchronously.
> > And this can happen even without the patch, right?
>
> I think you're right. This issue can happen even without the patch.
>
> Maybe we should not mark the standby as "sync" whenever sync_standbys_defined
> is false even if synchronous_standby_names is actually set and walsenders have
> already detect that?

It seems good. I guess that we can set 'async' to sync_status and 0 to
sync_priority when sync_standbys_defined is not true regardless of
walsender's actual priority value. We print the message "standby
\"%s\" now has synchronous standby priority %u" in SyncRepInitConfig()
regardless of sync_standbys_defined but maybe it's fine as the message
isn't incorrect and it's DEBUG1 message.

> Or we need more aggressive approach;
> make the checkpointer update sync_standby_priority values of
> all the walsenders? ISTM that the latter looks overkill...

I think so too.

Regards,

-- 
Masahiko Sawadahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

Problems with the FSM, heap fillfactor, and temporal locality

2020-08-20 Thread Peter Geoghegan

I'm concerned about how the FSM gives out pages to heapam. Disabling
the FSM entirely helps TPC-C/BenchmarkSQL, which uses non-default heap
fillfactors for most tables [1]. Testing has shown that this actually
increases throughput for the benchmark (as measured in TPM) by 5% -
9%, even though my approach is totally naive and stupid. My approach
makes one or two small tables much bigger, but that doesn't have that
much downside for the workload in practice. My approach helps by
accidentally improving temporal locality -- related records are more
consistently located on the same block, which in practice reduces the
number of pages dirtied and the number of FPIs generated. TPC-C has a
tendency to insert a set of related tuples together (e.g., order lines
from an order), while later updating all of those records together.

Interestingly, the problems start before we even begin the benchmark
proper, and can be observed directly using simple ad-hoc queries (I
developed some simple SQL queries involving ctid for this).
BenchmarkSQL's initial bulk loading is performed by concurrent workers
that insert related groups of tuples into tables, so that we start out
with a realistic amount of old orders to refer back to, etc. I can
clearly observe that various natural groupings (e.g., grouping order
lines by order number, grouping customers by district + warehouse)
actually get initially inserted in a way that leaves tuples in a
grouping spread around an excessive number of heap blocks. For
example, while most order lines do fit on one block, there is a
significant minority of orderlines that span two or more blocks for
the master branch case. Whereas with the FSM artificially disabled,
the heap relation looks more "pristine" in that related tuples are
located on the same blocks (or at least on neighboring blocks). It's
possible that one orderline will span two neighboring blocks here, but
it will never span three or more blocks. Each order has 5 - 15 order
lines, and so I was surprised to see that a small minority or order
line tuples end up occupying as many as 5 or 7 heap pages on the
master branch (i.e. with the current FSM intact during bulk loading).

The underlying cause of this "bulk inserts are surprisingly
indifferent to locality" issue seems to be that heap am likes to
remember small amounts of space from earlier pages when the backend
couldn't fit one last tuple on an earlier target page (before
allocating a new page that became the new relcache target page in
turn). This is penny wise and pound foolish, because we're eagerly
using a little bit more space in a case where we are applying a heap
fill factor anyway. I think that we also have a number of related
problems.

It seems to me that we don't make enough effort to store related heap
tuples together -- both during bulk inserts like this, but also during
subsequent updates that cannot fit successor tuples on the same heap
page. The current design of the FSM seems to assume that it doesn't
matter where the free space comes from, as long as you get it from
somewhere and as long as fill factor isn't violated -- it cares about
the letter of the fill factor law without caring about its spirit or
intent.

If the FSM tried to get free space from a close-by block, then we
might at least see related updates that cannot fit a successor tuple
on the same block behave in a coordinated fashion. We might at least
have both updates relocate the successor tuple to the same
mostly-empty block -- they both notice the same nearby free block, so
both sets of successor tuples end up going on the same most-empty
block. The two updating backends don't actually coordinate, of course
-- this just happens as a consequence of looking for nearby free
space.

The FSM should probably be taught to treat pages as free space
candidates (candidates to give out free space from) based on more
sophisticated, locality-aware criteria. The FSM should care about the
*rate of change* for a block over time. Suppose you have heap fill
factor set to 90. Once a heap block reaches fill factor% full, it
ought to not be used to insert new tuples unless and until the used
space on the block shrinks *significantly* -- the free space is now
supposed to be reserved. It should not be enough for the space used on
the page to shrink by just 1% (to 89%). Maybe it should have to reach
as low as 50% or less before we flip it back to "free to take space
from for new unrelated tuples". The idea is that fill factor space is
truly reserved for updates -- that should be "sticky" for all of this
to work well.

What's the point in having the concept of a heap fill factor at all if
we don't make any real effort to enforce that the extra free space
left behind is used as intended, for updates of tuples located on the
same heap page?

Thoughts?

[1]
https://github.com/petergeoghegan/benchmarksql/commit/3ef4fe71077b40f56b91286d4b874a15835c241e
--
Peter Geoghegan

Re: Fix a couple of typos in JIT

On Fri, 21 Aug 2020 at 02:25, Andres Freund  wrote:
> David, sounds good, after adapting to Abhijit's concerns.

Thank you both for having a look. Now pushed.

David

Re: deb repo doesn't have latest. or possible to update web page?

2020-08-20 Thread Roger Pack

On Wed, Aug 19, 2020 at 11:23 AM Magnus Hagander  wrote:
>
>
>
> On Wed, Aug 19, 2020 at 7:04 PM Roger Pack  wrote:
>>
>> As a note I tried to use the deb repo today:
>>
>> https://www.postgresql.org/download/linux/debian/
>>
>> with an old box on Wheezy.
>> It only seems to have binaries up to postgres 10.
>>
>> Might be nice to make a note on the web page so people realize some
>> distro's aren't supported fully instead of (if they're like me)
>> wondering "why don't these instructions work? It says to run  apt-get
>> install postgresql-12" ...
>
>
> The page lists which distros *are* supported. You can assume that anything 
> *not* listed is unsupported.
>
> In the case of wheezy, whatever was the latest when it stopped being 
> supported, is still there. Which I guess can cause some confusing if you just 
> run the script without reading the note that's there. I'm unsure how to fix 
> that though.

The confusion in my case is I wasn't sure why my distro was named,
tried the instructions and it...half worked.

Maybe something like this?

The PostgreSQL apt repository supports the currently supported stable
versions of Debian with the latest versions of Postgres:

xxx
xxx

Older versions of Debian may also be supported with older versions of Postgres.



Or get rid of the wheezy side altogether?

Cheers!

Re: proposal - function string_to_table

2020-08-20 Thread Pavel Stehule

Hi

čt 20. 8. 2020 v 4:07 odesílatel Peter Smith  napsal:

> Hi.
>
> I have been looking at the patch: string_to_table-20200706-2.patch
>
> Below are some review comments for your consideration.
>
> 
>
> COMMENT func.sgml (style)
>
> +   
> +splits string into table using supplied delimiter and
> +optional null string.
> +   
>
> The format style of the short description is inconsistent with the
> other functions.
> e.g. Should start with Capital letter.
> e.g. Should tag the parameter names properly
>
> Something like:
> 
> Splits string into a table
> using supplied delimiter
> and optional null string nullstr.
> 
>
>
done


> 
>
> COMMENT func.sgml (what does nullstr do)
>
> The description does not sufficiently describe the purpose/behaviour
> of the nullstr.
>
> e.g. Firstly I thought that it meant if 2 consecutive delimiters were
> encountered it would substitute this string as the row value. But it
> is doing the opposite of what I guessed - if the extracted row value
> is the same as nullstr then a NULL row is inserted instead.
>
>
done


> 
>
> COMMENT func.sgml (wrong sample output)
>
> +xx
> +yy,
> +zz
>
> This output is incorrect for the sample given. There is no "yy," in
> the output because there is a 'yy' nullstr substitution.
>
> Should be:
> ---
> xx
> NULL
> zz
> ---
>

fixed


> 
>
> COMMENT func.sgml (related to regexp_split_to_table)
>
> Because this new function is similar to the existing
> regexp_split_to_table, perhaps they should cross-reference each other
> so a reader of this documentation is made aware of the alternative
> function?
>

I wrote new sentence with ref


>
> 
>
> COMMENT (test cases)
>
> It is impossible to tell difference in the output between empty
> strings and nulls currently, so maybe you can change all the tests to
> have a form like below so they can be validated properly:
>
> # select v, v IS NULL as "is null" from
> string_to_table('a,b,*,c,d,',',','*') g(v);
>  v | is null
> ---+-
>  a | f
>  b | f
>| t
>  c | f
>  d | f
>| f
> (6 rows)
>
> or maybe like this is even easier:
>
> # select quote_nullable(string_to_table('a|*||c|d|','|','*'));
>  quote_nullable
> 
>  'a'
>  NULL
>  ''
>  'c'
>  'd'
>  ''
> (6 rows)
>

I prefer the first variant, it is clean. It is good idea, done


> Something similar was already proposed before [1] but that never got
> put into the test code.
> [1]
> https://www.postgresql.org/message-id/CAFj8pRDSzDYmaS06dfMXBfbr8x%2B3xjDJxA5kbL3h8%2BeOGoRUcA%40mail.gmail.com
>
> 
>
> COMMENT (test cases)
>
> There are multiple combinations of the parameters to this function and
> MANY different results depending on different values they can take, so
> the existing tests only cover a small sample.
>
> I have attached a lot more test scenarios that you may want to include
> for better test coverage. Everything seemed to work as expected.
>

ok, merged


> PSA test-cases.pdf
>
> 
>
> COMMENT (accum_result)
>
> + Datum values[1];
> + bool nulls[1];
> +
> + values[0] = PointerGetDatum(result_text);
> + nulls[0] = is_null;
>
> Why not use variables instead of arrays with only 1 element?
>

Technically it is equivalent,  but I think so using one element array is
more correct, because function heap_form_tuple expects an array. Sure in C
language there is no difference between pointer to value or pointer to
array, but minimally the name of the argument "values" implies so argument
is an array.

This pattern is used more times in Postgres. You can find a fragments where
although we know so array has only one field, still we works with array

misc.c
hash.c
execTuples.c

but I can this code simplify little bit - I can use function
tuplestore_putvalues(tupstore, tupdesc, values, nulls);

I see, so this code can be reduced more, and I don't need local variables,
but I prefer to be consistent with other parts, and I feel better if I pass
an array where the array is expected.

This is not extra important, and I can it change, just I think this variant
is cleaner little bit



> 
>
> COMMENT (text_to_array_internal)
>
> + if (!tstate.astate)
> + PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
>
> Maybe the condition is more readable when expressed as:
> if (tstate.astate == NULL)
>
>
done


new patch attached

Thank you for precious review

Regards

Pavel


>
> Kind Regards,
> Peter Smith.
> Fujitsu Australia
>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9a4ac5a1ea..bead1b6b74 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3458,6 +3458,38 @@ repeat('Pg', 4) PgPgPgPg

   
 
+  
+   
+
+ string_to_table
+
+string_to_table ( string text, delimiter text  , nullstr text   )
+set of text
+   
+   
+Splits the string at occurrences
+of delimiter and forms the remaining data
+into a text tavke.
+If delimiter is NULL,
+

Re: language cleanups in code and docs

2020-08-20 Thread Ashwin Agrawal

On Wed, Jun 17, 2020 at 9:27 AM David Steele  wrote:

> On 6/17/20 12:08 PM, Magnus Hagander wrote:
> > On Wed, Jun 17, 2020 at 4:15 PM Andrew Dunstan
> > mailto:andrew.duns...@2ndquadrant.com>>
>
> >
> > I'm not sure I like doing s/Black/Block/ here. It reads oddly. There
> are
> > too many other uses of Block in the sources. Forbidden might be a
> better
> > substitution, or Banned maybe. BanList is even less characters than
> > BlackList.
> >
> > I'd be OK with either of those really -- I went with block because it
> > was the easiest one :)
> >
> > Not sure the number of characters is the important part :) Banlist does
> > make sense to me for other reasons though -- it's what it is, isn't it?
> > It bans those oids from being used in the current session -- I don't
> > think there's any struggle to "make that sentence work", which means
> > that seems like the relevant term.
>
> I've seen also seen allowList/denyList as an alternative. I do agree
> that blockList is a bit confusing since we often use block in a very
> different context.
>

+1 for allowList/denyList as alternative

> I do think it's worth doing -- it's a small round of changes, and it
> > doesn't change anything user-exposed, so the cost for us is basically
> zero.
>
> +1


Agree number of occurrences for whitelist and blacklist are not many, so
cleaning these would be helpful and patches already proposed for it

git grep whitelist | wc -l
10
git grep blacklist | wc -l
40

Thanks a lot for language cleanups. Greenplum, fork of PostgreSQL, wishes
to perform similar cleanups and upstream doing it really helps us
downstream.

Re: "ccold" left by reindex concurrently are droppable?

2020-08-20 Thread Alvaro Herrera

Thanks, Michael and Julien!  Pushed to 12-master, with a slight
rewording to use the passive voice, hopefully matching the surrounding
text.  I also changed "temporary" to "transient" in another line, for
consistency.

-- 
Álvaro Herrerahttps://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

ReplicationSlotsComputeRequiredXmin seems pretty questionable

2020-08-20 Thread Tom Lane

While trying to make sense of Adam Sjøgren's problem [1], I found
myself staring at ReplicationSlotsComputeRequiredXmin() in slot.c.
It seems to me that that is very shaky code, on two different
grounds:

1. Sometimes it's called with ProcArrayLock already held exclusively.
This means that any delay in acquiring the ReplicationSlotControlLock
translates directly into a hold on ProcArrayLock; in other words,
every acquisition of the ReplicationSlotControlLock is just as bad
for concurrency as an acquisition of ProcArrayLock.  While I didn't
see any places that were doing really obviously slow things while
holding ReplicationSlotControlLock, this is disturbing.  Do we really
need it to be like that?

2. On the other hand, the code is *releasing* the
ReplicationSlotControlLock before it calls
ProcArraySetReplicationSlotXmin, and that seems like a flat out
concurrency bug.  How can we be sure that the values we're storing
into the shared xmin fields aren't stale by the time we acquire
the ProcArrayLock (in the case where we didn't hold it already)?
I'm concerned that in the worst case this code could make the
shared xmin fields go backwards.

Both of these issues could be solved, I think, if we got rid of
the provision for calling with ProcArrayLock already held and
moved the ProcArraySetReplicationSlotXmin call inside the hold
of ReplicationSlotControlLock.  But maybe I'm missing something
about why that would be worse.

regards, tom lane

[1] 
https://www.postgresql.org/message-id/flat/87364kdsim.fsf%40tullinup.koldfront.dk

Re: [PG13] Planning (time + buffers) data structure in explain plan (format text)





On 2020/08/20 22:34, Julien Rouhaud wrote:

On Thu, Aug 20, 2020 at 12:29 PM David Rowley  wrote:


On Thu, 20 Aug 2020 at 19:58, Fujii Masao  wrote:


On 2020/08/20 13:00, David Rowley wrote:

I forgot to mention earlier, you'll also need to remove the part in
the docs that mentions BUFFERS requires ANALYZE.


Thanks for the review! I removed that.
Attached is the updated version of the patch.
I also added the regression test performing "explain (buffers on)"
without "analyze" option.


Looks good to me.


Looks good to me too.


David and Julien, thanks for the review! I'd like to wait for
Pierre's opinion about this change before committing the patch.

Pierre,
could you share your opinion about this change?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Re: Transactions involving multiple postgres foreign servers, take 2





On 2020/07/27 15:59, Masahiko Sawada wrote:

On Thu, 23 Jul 2020 at 22:51, Muhammad Usama  wrote:




On Wed, Jul 22, 2020 at 12:42 PM Masahiko Sawada 
 wrote:


On Sat, 18 Jul 2020 at 01:55, Fujii Masao  wrote:




On 2020/07/16 14:47, Masahiko Sawada wrote:

On Tue, 14 Jul 2020 at 11:19, Fujii Masao  wrote:




On 2020/07/14 9:08, Masahiro Ikeda wrote:

I've attached the latest version patches. I've incorporated the review
comments I got so far and improved locking strategy.


Thanks for updating the patch!


+1
I'm interested in these patches and now studying them. While checking
the behaviors of the patched PostgreSQL, I got three comments.


Thank you for testing this patch!



1. We can access to the foreign table even during recovery in the HEAD.
But in the patched version, when I did that, I got the following error.
Is this intentional?

ERROR:  cannot assign TransactionIds during recovery


No, it should be fixed. I'm going to fix this by not collecting
participants for atomic commit during recovery.


Thanks for trying to fix the issues!

I'd like to report one more issue. When I started new transaction
in the local server, executed INSERT in the remote server via
postgres_fdw and then quit psql, I got the following assertion failure.

TRAP: FailedAssertion("fdwxact", File: "fdwxact.c", Line: 1570)
0   postgres0x00010d52f3c0 ExceptionalCondition 
+ 160
1   postgres0x00010cefbc49 
ForgetAllFdwXactParticipants + 313
2   postgres0x00010cefff14 AtProcExit_FdwXact + 
20
3   postgres0x00010d313fe3 shmem_exit + 179
4   postgres0x00010d313e7a proc_exit_prepare + 
122
5   postgres0x00010d313da3 proc_exit + 19
6   postgres0x00010d35112f PostgresMain + 3711
7   postgres0x00010d27bb3a BackendRun + 570
8   postgres0x00010d27af6b BackendStartup + 475
9   postgres0x00010d279ed1 ServerLoop + 593
10  postgres0x00010d277940 PostmasterMain + 6016
11  postgres0x00010d1597b9 main + 761
12  libdyld.dylib   0x7fff7161e3d5 start + 1
13  ??? 0x0003 0x0 + 3



Thank you for reporting the issue!

I've attached the latest version patch that incorporated all comments
I got so far. I've removed the patch adding the 'prefer' mode of
foreign_twophase_commit to keep the patch set simple.



I have started to review the patchset. Just a quick comment.

Patch v24-0002-Support-atomic-commit-among-multiple-foreign-ser.patch
contains changes (adding fdwxact includes) for
src/backend/executor/nodeForeignscan.c,  src/backend/executor/nodeModifyTable.c
and  src/backend/executor/execPartition.c files that doesn't seem to be
required with the latest version.


Thanks for your comment.

Right. I've removed these changes on the local branch.


The latest patches failed to be applied to the master branch. Could you rebase 
the patches?

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Re: Fix a couple of typos in JIT

2020-08-20 Thread Andres Freund

Hi,

On 2020-08-20 15:59:26 +0530, Abhijit Menon-Sen wrote:
> The original sentence may not be the most shining example of
> sentence-ry, but it is correct, and removing the "That" breaks it.

That made me laugh ;)

David, sounds good, after adapting to Abhijit's concerns.

Greetings,

Andres Freund

Re: [PG13] Planning (time + buffers) data structure in explain plan (format text)

2020-08-20 Thread Julien Rouhaud

On Thu, Aug 20, 2020 at 12:29 PM David Rowley  wrote:
>
> On Thu, 20 Aug 2020 at 19:58, Fujii Masao  wrote:
> >
> > On 2020/08/20 13:00, David Rowley wrote:
> > > I forgot to mention earlier, you'll also need to remove the part in
> > > the docs that mentions BUFFERS requires ANALYZE.
> >
> > Thanks for the review! I removed that.
> > Attached is the updated version of the patch.
> > I also added the regression test performing "explain (buffers on)"
> > without "analyze" option.
>
> Looks good to me.

Looks good to me too.

Re: Small doubt on update a partition when some rows need to move among partition

2020-08-20 Thread Ashutosh Bapat

On Thu, Aug 20, 2020 at 5:22 PM movead...@highgo.ca  wrote:
>
> Hello,
>
> I am trying to handle the limit that we can't do a tuple move caused by 
> BEFORE TRIGGER,
> during which I get two doubt points:
>
> The first issue:
> In ExecBRUpdateTriggers() or ExecBRInsertTriggers() function why we need to 
> check the
> result slot after every trigger call. If we should check the result slot 
> after all triggers are
> called.
>
> For example, we have a table t1(i int, j int), and a BEFORE INSERT TRIGGER on 
> t1 make i - 1,
> and another BEFORE INSERT TRIGGER on t1 make i + 1. If the first trigger 
> causes a partition
> move, then the insert query will be interrupted. However, it will not change 
> partition after
> all triggers are called.

This was discussed at
https://www.postgresql.org/message-id/20200318210213.GA9781@alvherre.pgsql.

-- 
Best Wishes,
Ashutosh Bapat

Re: display offset along with block number in vacuum errors

On Thu, Aug 20, 2020 at 12:32 PM Amit Kapila  wrote:
>
> On Thu, Aug 20, 2020 at 12:18 PM Masahiko Sawada
>  wrote:
> >
> > On Thu, 20 Aug 2020 at 14:01, Amit Kapila  wrote:
> > >
> > > On Wed, Aug 19, 2020 at 12:54 PM Masahiko Sawada
> > >  wrote:
> > >
> > > Here, we can notice that for the index, we are getting context
> > > information but not for the heap. The reason is that in
> > > vacuum_error_callback, we are not printing additional information for
> > > phases VACUUM_ERRCB_PHASE_SCAN_HEAP and VACUUM_ERRCB_PHASE_VACUUM_HEAP
> > > when block number is invalid. If we want to cover the 'info' messages
> > > then won't it be better if we print a message in those phases even
> > > block number is invalid (something like 'while scanning relation
> > > \"%s.%s\"")
> >
> > Yeah, there is an inconsistency. I agree to print the message even
> > when the block number is invalid.
> >
>
> Okay, I will update this and send this patch and rebased patch to
> display offsets later today or tomorrow.
>

Attached are both the patches. The first one is to improve existing
error context information, so I think we should back-patch to 13. The
second one is to add additional vacuum error context information, so
that is for only HEAD. Does that make sense? Also, let me know if you
have any more comments.

-- 
With Regards,
Amit Kapila.


v7-0001-Improve-the-vacuum-error-context-phase-informatio.patch
Description: Binary data


v7-0002-Add-additional-information-in-the-vacuum-error-co.patch
Description: Binary data

Small doubt on update a partition when some rows need to move among partition

2020-08-20 Thread movead...@highgo.ca

Hello,

I am trying to handle the limit that we can't do a tuple move caused by BEFORE 
TRIGGER,
during which I get two doubt points:

The first issue:
In ExecBRUpdateTriggers() or ExecBRInsertTriggers() function why we need to 
check the
result slot after every trigger call. If we should check the result slot after 
all triggers are
called.

For example, we have a table t1(i int, j int), and a BEFORE INSERT TRIGGER on 
t1 make i - 1, 
and another BEFORE INSERT TRIGGER on t1 make i + 1. If the first trigger causes 
a partition
move, then the insert query will be interrupted. However, it will not change 
partition after
all triggers are called.

The second issue:
I read the code for partition move caused by an update, it deletes tuple in an 
old partition
and inserts a new tuple in a partition. But during the insert, it triggers the 
trigger on the new
partition, so the result value may be changed again, I want to know if it's 
intended way? In
my mind, if an insert produced by partition move should not trigger before 
trigger again.


I make an initial patch as my thought, sorry if I missing some of the 
historical discussion.




Regards,
Highgo Software (Canada/China/Pakistan) 
URL : www.highgo.ca 
EMAIL: mailto:movead(dot)li(at)highgo(dot)ca


partition_move_issues.patch
Description: Binary data

Re: Libpq support to connect to standby server as priority

2020-08-20 Thread Peter Smith

On Thu, Aug 20, 2020 at 10:26 AM Greg Nancarrow  wrote:

> I have updated the patch (attached) based on your comments, with
> adjustments made for additional changes based on feedback (which I
> tend to agree with) from Robert Haas and Tsunakawa san, who suggested
> read-write/read-only should be functionally different to
> primary/standby, and not just have "read-write" a synonym for
> "primary".
> I also thought it appropriate to remove "read-write", "standby" and
> "prefer-standby" from accepted values for "target_server_type"
> (instead just support "secondary" and "prefer-secondary") to match the
> similar targetServerType PGJDBC option.
> So currently have as supported option values:
>
> target_session_attrs:
> any/read-write/read-only/primary/standby(/secondary)/prefer-standby(/prefer-secondary)
> target_server_type: any/primary/secondary/prefer-secondary
>

+1 to your changes for the option values of these 2 variables.

Thanks for addressing my previous review comments in the v18 patch.

I have re-reviewed v18. Below are some additional (mostly minor)
things I noticed.

COMMENT (help text)

The help text is probably accurate but it does seem a bit confusing still.

Example1:

+   
+If this parameter is set to read-write,
only a connection in which
+read-write transactions are accepted by default is considered
acceptable. To determine
+whether the server supports read-write transactions, then if
the server is version 14
+or greater, the support of read-write transactions is
determined by the value of the
+transaction_read_only configuration
parameter that is reported by
+the server upon successful connection. Otherwise if the
server is prior to version 14,
+the query SHOW transaction_read_only will
be sent upon any successful
+connection; if it returns on, it means the
server doesn't support
+read-write transactions.
+   

That fragment "To determine whether the server supports read-write
transactions, then" seems redundant.

Example2:

The Parameter Value descriptions seem inconsistently worded. e.g.
* "read-write" gives details about how "SHOW transaction_read_only"
can be called to decide r/w server.
* but then "read-only" doesn't mention about it
* but then "primary" does
* but then "standby" doesn't

IMO if there was some up-front paragraphs to say how different
versions calculate the r/w support and recovery mode, then all the
different parameter values can be expressed in a much simpler way and
have less repetition (e.g they can all look like the "read-only" one
does now).

e.g. I mean something similar to this (which is same wording as yours,
just rearranged a bit):
--
SERVER STATES

If the server is version 14 or greater, the support of read-write
transactions is determined by the value of the transaction_read_only
configuration parameter that is reported by the server upon successful
connection. Otherwise if the server is prior to version 14, the query
SHOW transaction_read_only will be sent upon any successful
connection; if it returns on, it means the server doesn't support
read-write transaction

The recovery mode state is determined by the value of the in_recovery
configuration parameter that is reported by the server upon successful
connection

PARAMETER VALUES

If this parameter is set to read-write, only a connection in which
read-write transactions are accepted by default is considered
acceptable.

If this parameter is set to read-only, only a connection in which
read-only transactions are accepted by default is considered
acceptable.

If this parameter is set to primary, then if the server is version 14
or greater, only a connection in which the server is not in recovery
mode is considered acceptable. Otherwise, if the server is prior to
version 14, only a connection in which read-write transactions are
accepted by default is considered acceptable.

If this parameter is set to standby, then if the server is version 14
or greater, only a connection in which the server is in recovery mode
is considered acceptable. Otherwise, if the server is prior to version
14, only a connection for which the server only supports read-only
transactions is considered acceptable.

If this parameter is set to prefer-standby, then if the server is
version 14 or greater, a connection in which the server is in recovery
mode is preferred. Otherwise, if the server is prior to version 14, a
connection for which the server only supports read-only transactions
is preferred. If no such connections can be found, then a connection
in which the server is not in recovery mode (server is version 14 or
greater) or a connection for which the server supports read-write
transactions (server is prior to version 14) will be considered
--

COMMENT fe-connect.c (sizeof)

- "Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
+ "Target-Session-Attrs", "", 15, /* sizeof("prefer-standby") = 15 */

You said changing this

Re: extension patch of CREATE OR REPLACE TRIGGER

2020-08-20 Thread Peter Smith

On Thu, Aug 20, 2020 at 12:11 PM osumi.takami...@fujitsu.com
 wrote:
> I have fixed my patch again.

Hi Osumi-san,

FYI - The latest patch (v06) has conflicts when applied.

Kind Regards,
Peter Smith.
Fujitsu Australia

Re: new heapcheck contrib module

2020-08-20 Thread Amul Sul

On Thu, Aug 20, 2020 at 8:00 AM Mark Dilger
 wrote:
>
>
>
> > On Aug 16, 2020, at 9:37 PM, Amul Sul  wrote:
> >
> > In addition to this, I found a few more things while reading v13 patch are 
> > as
> > below:
> >
> > Patch v13-0001:
> >
> > -
> > +#include "amcheck.h"
> >
> > Not in correct order.
>
> Fixed.
>
> > +typedef struct BtreeCheckContext
> > +{
> > + TupleDesc tupdesc;
> > + Tuplestorestate *tupstore;
> > + bool is_corrupt;
> > + bool on_error_stop;
> > +} BtreeCheckContext;
> >
> > Unnecessary spaces/tabs between } and BtreeCheckContext.
>
> This refers to a change in verify_nbtree.c that has been removed.  Per 
> discussions with Peter and Robert, I have simply withdrawn that portion of 
> the patch.
>
> > static void bt_index_check_internal(Oid indrelid, bool parentcheck,
> > - bool heapallindexed, bool rootdescend);
> > + bool heapallindexed, bool rootdescend,
> > + BtreeCheckContext * ctx);
> >
> > Unnecessary space between * and ctx. The same changes needed for other 
> > places as
> > well.
>
> Same as above.  The changes to verify_nbtree.c have been withdrawn.
>
> > ---
> >
> > Patch v13-0002:
> >
> > +-- partitioned tables (the parent ones) don't have visibility maps
> > +create table test_partitioned (a int, b text default repeat('x', 5000))
> > + partition by list (a);
> > +-- these should all fail
> > +select * from verify_heapam('test_partitioned',
> > + on_error_stop := false,
> > + skip := NULL,
> > + startblock := NULL,
> > + endblock := NULL);
> > +ERROR:  "test_partitioned" is not a table, materialized view, or TOAST 
> > table
> > +create table test_partition partition of test_partitioned for values in 
> > (1);
> > +create index test_index on test_partition (a);
> >
> > Can't we make it work? If the input is partitioned, I think we could
> > collect all its leaf partitions and process them one by one. Thoughts?
>
> I was following the example from pg_visibility.  I haven't thought about your 
> proposal enough to have much opinion as yet, except that if we do this for 
> pg_amcheck we should do likewise to pg_visibility, for consistency of the 
> user interface.
>

pg_visibility does exist from before the declarative partitioning came
in, I think it's time to improve that as well.

> > + ctx->chunkno++;
> >
> > Instead of incrementing  in check_toast_tuple(), I think incrementing should
> > happen at the caller  -- just after check_toast_tuple() call.
>
> I agree.
>
> > ---
> >
> > Patch v13-0003:
> >
> > + resetPQExpBuffer(query);
> > + destroyPQExpBuffer(query);
> >
> > resetPQExpBuffer() will be unnecessary if the next call is 
> > destroyPQExpBuffer().
>
> Thanks.  I removed it in cases where destroyPQExpBuffer is obviously the very 
> next call.
>
> > + appendPQExpBuffer(query,
> > +   "SELECT c.relname, v.blkno, v.offnum, v.lp_off, "
> > +   "v.lp_flags, v.lp_len, v.attnum, v.chunk, v.msg"
> > +   "\nFROM verify_heapam(rel := %u, on_error_stop := %s, "
> > +   "skip := %s, startblock := %s, endblock := %s) v, "
> > +   "pg_class c"
> > +   "\nWHERE c.oid = %u",
> > +   tbloid, stop, skip, settings.startblock,
> > +   settings.endblock, tbloid
> >
> > pg_class should be schema-qualified like elsewhere.
>
> Agreed, and changed.
>
> > IIUC, pg_class is meant to
> > get relname only, instead, we could use '%u'::pg_catalog.regclass in the 
> > target
> > list for the relname. Thoughts?
>
> get_table_check_list() creates the list of all tables to be checked, which 
> check_tables() then iterates over, calling check_table() for each one.  I 
> think some verification that the table still exists is in order.  Using 
> '%u'::pg_catalog.regclass for a table that has since been dropped would pass 
> in the old table Oid and draw an error of the 'ERROR:  could not open 
> relation with OID 36311' variety, whereas the current coding will just skip 
> the dropped table.
>
> > Also I think we should skip '\n' from the query string (see 
> > appendPQExpBuffer()
> > in pg_dump.c)
>
> I'm not sure I understand.  pg_dump.c uses "\n" in query strings it passes to 
> appendPQExpBuffer(), in a manner very similar to what this patch does.
>

I see there is a mix of styles, I was referring to dumpDatabase() from pg_dump.c
which doesn't include '\n'.

> > + appendPQExpBuffer(query,
> > +   "SELECT i.indexrelid"
> > +   "\nFROM pg_catalog.pg_index i, pg_catalog.pg_class c"
> > +   "\nWHERE i.indexrelid = c.oid"
> > +   "\nAND c.relam = %u"
> > +   "\nAND i.indrelid = %u",
> > +   BTREE_AM_OID, tbloid);
> > +
> > + ExecuteSqlStatement("RESET search_path");
> > + res = ExecuteSqlQuery(query->data, PGRES_TUPLES_OK);
> > + PQclear(ExecuteSqlQueryForSingleRow(ALWAYS_SECURE_SEARCH_PATH_SQL));
> >
> > I don't think we need the search_path query. The main query doesn't have any
> > dependencies on it.  Same is in check_indexes(), check_index (),
> > expand_table_name_patterns() & get_table_check_list().
> > Correct me if I am missing something.
>
> Right.
>
> > + output = PageOutput(lines + 2, N

Re: Fix a couple of typos in JIT

2020-08-20 Thread Abhijit Menon-Sen

At 2020-08-20 22:51:41 +1200, dgrowle...@gmail.com wrote:
>
> > +This is done at query execution time, possibly even only in cases where
> > +the relevant task is done a number of times, makes it JIT, rather than
> > +ahead-of-time (AOT). Given the way JIT compilation is used in PostgreSQL,
> > +the lines between interpretation, AOT and JIT are somewhat blurry.
> > […]
> 
> Oh, I see. I missed that. Perhaps it would be better changed to "The
> fact that this"

Or maybe even:

This is JIT, rather than ahead-of-time (AOT) compilation, because it
is done at query execution time, and perhaps only in cases where the
relevant task is repeated a number of times. Given the way …

-- Abhijit

Re: New statistics for tuning WAL buffer size





On 2020/08/20 20:01, Fujii Masao wrote:



On 2020/08/19 14:10, Masahiro Ikeda wrote:

On 2020-08-19 13:49, tsunakawa.ta...@fujitsu.com wrote:

From: Masahiro Ikeda 

If my understanding is correct, we have to measure the performance
impact first.
Do you know HariBabu is now trying to solve it? If not, I will try to
modify patches to apply HEAD.


No, he's not doing it anymore.  It'd be great if you could resume it.


OK, thanks.


However, I recommend sharing your understanding about what were the
issues with those two threads and how you're trying to solve them.
Was the performance overhead the blocker in both of the threads?


In my understanding, some comments are not solved in both of the threads.
I think the following works are remained.

1) Modify patches to apply HEAD
2) Get consensus what metrics we collect and how to use them for tuning.


I agree to expose the number of WAL write caused by full of WAL buffers.
It's helpful when tuning wal_buffers size. Haribabu separated that number
into two fields in his patch; one is the number of WAL write by backend,
and another is by background processes and workers. But I'm not sure
how useful such separation is. I'm ok with just one field for that number.


Just idea; it may be worth exposing the number of when new WAL file is
created and zero-filled. This initialization may have impact on
the performance of write-heavy workload generating lots of WAL. If this
number is reported high, to reduce the number of this initialization,
we can tune WAL-related parameters so that more "recycled" WAL files
can be hold.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Re: New statistics for tuning WAL buffer size





On 2020/08/19 14:10, Masahiro Ikeda wrote:

On 2020-08-19 13:49, tsunakawa.ta...@fujitsu.com wrote:

From: Masahiro Ikeda 

If my understanding is correct, we have to measure the performance
impact first.
Do you know HariBabu is now trying to solve it? If not, I will try to
modify patches to apply HEAD.


No, he's not doing it anymore.  It'd be great if you could resume it.


OK, thanks.


However, I recommend sharing your understanding about what were the
issues with those two threads and how you're trying to solve them.
Was the performance overhead the blocker in both of the threads?


In my understanding, some comments are not solved in both of the threads.
I think the following works are remained.

1) Modify patches to apply HEAD
2) Get consensus what metrics we collect and how to use them for tuning.


I agree to expose the number of WAL write caused by full of WAL buffers.
It's helpful when tuning wal_buffers size. Haribabu separated that number
into two fields in his patch; one is the number of WAL write by backend,
and another is by background processes and workers. But I'm not sure
how useful such separation is. I'm ok with just one field for that number.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Re: Fix a couple of typos in JIT

On Thu, 20 Aug 2020 at 22:29, Abhijit Menon-Sen  wrote:
>
> At 2020-08-20 22:19:49 +1200, dgrowle...@gmail.com wrote:
> >
> > I was just looking over the JIT code and noticed a few comment and
> > documentation typos.  The attached fixes them.
>
> The first change does not seem to be correct:
>
> -That this is done at query execution time, possibly even only in cases
> -where the relevant task is done a number of times, makes it JIT,
> -rather than ahead-of-time (AOT). Given the way JIT compilation is used
> -in PostgreSQL, the lines between interpretation, AOT and JIT are
> -somewhat blurry.
> +This is done at query execution time, possibly even only in cases where
> +the relevant task is done a number of times, makes it JIT, rather than
> +ahead-of-time (AOT). Given the way JIT compilation is used in PostgreSQL,
> +the lines between interpretation, AOT and JIT are somewhat blurry.
>
> The original sentence may not be the most shining example of
> sentence-ry, but it is correct, and removing the "That" breaks it.

Oh, I see. I missed that. Perhaps it would be better changed to "The
fact that this"

David

Re: Fix a couple of typos in JIT

2020-08-20 Thread Abhijit Menon-Sen

At 2020-08-20 22:19:49 +1200, dgrowle...@gmail.com wrote:
>
> I was just looking over the JIT code and noticed a few comment and
> documentation typos.  The attached fixes them.

The first change does not seem to be correct:

-That this is done at query execution time, possibly even only in cases
-where the relevant task is done a number of times, makes it JIT,
-rather than ahead-of-time (AOT). Given the way JIT compilation is used
-in PostgreSQL, the lines between interpretation, AOT and JIT are
-somewhat blurry.
+This is done at query execution time, possibly even only in cases where
+the relevant task is done a number of times, makes it JIT, rather than
+ahead-of-time (AOT). Given the way JIT compilation is used in PostgreSQL,
+the lines between interpretation, AOT and JIT are somewhat blurry.

The original sentence may not be the most shining example of
sentence-ry, but it is correct, and removing the "That" breaks it.

-- Abhijit

Re: [PG13] Planning (time + buffers) data structure in explain plan (format text)

On Thu, 20 Aug 2020 at 19:58, Fujii Masao  wrote:
>
> On 2020/08/20 13:00, David Rowley wrote:
> > I forgot to mention earlier, you'll also need to remove the part in
> > the docs that mentions BUFFERS requires ANALYZE.
>
> Thanks for the review! I removed that.
> Attached is the updated version of the patch.
> I also added the regression test performing "explain (buffers on)"
> without "analyze" option.

Looks good to me.

David

Fix a couple of typos in JIT

Hi,

I was just looking over the JIT code and noticed a few comment and
documentation typos.  The attached fixes them.

I'll push this in my UTC+12 morning if nobody objects to any of the
changes before then.

Unsure if it'll be worth backpatching or not.

David


fix_a_few_jit_typos.patch
Description: Binary data

Re: PATCH: logical_work_mem and logical streaming of large in-progress transactions

On Thu, Aug 20, 2020 at 1:41 PM Dilip Kumar  wrote:
>
> On Wed, Aug 19, 2020 at 1:35 PM Dilip Kumar  wrote:
> >
> > On Wed, Aug 19, 2020 at 12:20 PM Amit Kapila  
> > wrote:
> > >
> > > On Wed, Aug 19, 2020 at 10:10 AM Amit Kapila  
> > > wrote:
> > > >
> > > > On Mon, Aug 17, 2020 at 6:29 PM Dilip Kumar  
> > > > wrote:
> > > > >
> > > > >
> > > > > In last patch v49-0001, there is one issue,  Basically, I have called
> > > > > BufFileFlush in all the cases.  But, ideally, we can not call this if
> > > > > the underlying files are deleted/truncated because those files/blocks
> > > > > might not exist now.  So I think if the truncate position is within
> > > > > the same buffer we just need to adjust the buffer,  otherwise we just
> > > > > need to set the currFile and currOffset to the absolute number and set
> > > > > the pos and nbytes 0.  Attached patch fixes this issue.
> > > > >
> > > >
> > > > Few comments on the latest patch v50-0001-Extend-the-BufFile-interface
> > > > 1.
> > > > +
> > > > + /*
> > > > + * If the truncate point is within existing buffer then we can just
> > > > + * adjust pos-within-buffer, without flushing buffer.  Otherwise,
> > > > + * we don't need to do anything because we have already 
> > > > deleted/truncated
> > > > + * the underlying files.
> > > > + */
> > > > + if (curFile == file->curFile &&
> > > > + curOffset >= file->curOffset &&
> > > > + curOffset <= file->curOffset + file->nbytes)
> > > > + {
> > > > + file->pos = (int) (curOffset - file->curOffset);
> > > > + return;
> > > > + }
> > > >
> > > > I think in this case you have set the position correctly but what
> > > > about file->nbytes? In BufFileSeek, it was okay not to update 'nbytes'
> > > > because the contents of the buffer are still valid but I don't think
> > > > the same is true here.
> > > >
> > >
> > > I think you need to set 'nbytes' to curOffset as per your current
> > > patch as that is the new size of the file.
> > > --- a/src/backend/storage/file/buffile.c
> > > +++ b/src/backend/storage/file/buffile.c
> > > @@ -912,6 +912,7 @@ BufFileTruncateShared(BufFile *file, int fileno,
> > > off_t offset)
> > > curOffset <= file->curOffset + file->nbytes)
> > > {
> > > file->pos = (int) (curOffset - file->curOffset);
> > > +   file->nbytes = (int) curOffset;
> > > return;
> > > }
> > >
> > > Also, what about file 'numFiles', that can also change due to the
> > > removal of certain files, shouldn't that be also set in this case
> >
> > Right, we need to set the numFile.  I will fix this as well.
>
> I think there are a couple of more problems in the truncate APIs,
> basically, if the curFile and curOffset are already smaller than the
> truncate location the truncate should not change that.  So the
> truncate should only change the curFile and curOffset if it is
> truncating the part of the file where the curFile or curOffset is
> pointing.
>

Right, I think this can happen if one has changed those by BufFileSeek
before doing truncate. We should fix that case as well.

>  I will work on those along with your other comments and
> submit the updated patch.
>

Thanks.

-- 
With Regards,
Amit Kapila.

Re: Refactor pg_rewind code and make it work against a standby

2020-08-20 Thread Kyotaro Horiguchi

Hello.

At Wed, 19 Aug 2020 15:50:16 +0300, Heikki Linnakangas  wrote 
in 
> Hi,
> 
> I started to hack on making pg_rewind crash-safe (see [1]), but I
> quickly got side-tracked into refactoring and tidying up up the code
> in general. I ended up with this series of patches:

^^;

> The first four patches are just refactoring that make the code and the
> logic more readable. Tom Lane commented about the messy comments
> earlier (see [2]), and I hope these patches will alleviate that
> confusion. See commit messages for details.

0001: It looks fine. The new location is reasonable but adding one
extern is a bit annoying. But I don't object it.

0002: Rewording that old->target and new->source makes the meaning far
   clearer. Moving decisions core code into filemap_finalize is
   reasonable.

By the way, some of the rules are remaining in
process_source/target_file. For example, pg_wal that is a symlink,
or tmporary directories. and excluded files.  The number of
excluded files doesn't seem so large so it doesn't seem that the
exclusion give advantage so much.  They seem to me movable to
filemap_finalize, and we can get rid of the callbacks by doing
so. Is there any reason that the remaining rules should be in the
callbacks?

0003: Thomas is propsing sort template. It could be used if committed.

0004:

 The names of many of the functions gets an additional word "local"
 but I don't get the meaning clearly. but its about linguistic sense
 and I'm not fit to that..
 
-rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc)
+local_fetch_file_range(rewind_source *source, const char *path, uint64 off,

 The function actually copying the soruce range to the target file. So
 "fetch" might give somewhat different meaning, but its about
 linguistic (omitted..).


> The last patch refactors the logic in libpq_fetch.c, so that it no
> longer uses a temporary table in the source system. That allows using
> a hot standby server as the pg_rewind source.

That sounds nice.

> This doesn't do anything about pg_rewind's crash-safety yet, but I'll
> try work on that after these patches.
> 
> [1]
> https://www.postgresql.org/message-id/d8dcc760-8780-5084-f066-6d663801d2e2%40iki.fi
> 
> [2]
> https://www.postgresql.org/message-id/30255.1522711675%40sss.pgh.pa.us
> 
> - Heikki

I'm going to continue reviewing this later.

reagards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center

Re: "ccold" left by reindex concurrently are droppable?

2020-08-20 Thread Julien Rouhaud

On Thu, Aug 20, 2020 at 7:17 AM Michael Paquier  wrote:
>
> On Wed, Aug 19, 2020 at 05:13:12PM -0400, Alvaro Herrera wrote:
> > In other words I propose to reword this paragraph as follows:
> >
> >If the transient index created during the concurrent operation is
> >suffixed ccnew, the recommended recovery method
> >is to drop the invalid index using DROP INDEX,
> >and try to perform REINDEX CONCURRENTLY again.
> >If the transient index is instead suffixed ccold,
> >it corresponds to the original index which we failed to drop;
> >the recommended recovery method is to just drop said index, since the
> >rebuild proper has been successful.
>
> Yes, that's an improvement.  It would be better to backpatch that.  So
> +1 from me.

+1, that's an improvement and should be backpatched.

>
> > (The original talks about "the concurrent index", which seems somewhat
> > sloppy thinking.  I used the term "transient index" instead.)
>
> Using transient to refer to an index aimed at being ephemeral sounds
> fine to me in this context.

Agreed.

Re: Include access method in listTables output

2020-08-20 Thread Georgios


‐‐‐ Original Message ‐‐‐
On Thursday, 20 August 2020 07:31, Justin Pryzby  wrote:

> On Mon, Aug 17, 2020 at 11:10:05PM +0530, vignesh C wrote:
>
> > On Sat, Aug 1, 2020 at 8:12 AM vignesh C vignes...@gmail.com wrote:
> >
> > > > > +-- access method column should not be displayed for sequences
> > > > > +\ds+
> > > > >
> > > > > -List of relations
> > > > >
> > > > >
> > > > > -   Schema | Name | Type | Owner | Persistence | Size | Description
> > > > > ++--+--+---+-+--+-
> > > > > +(0 rows)
> > > > > We can include one test for view.
> > > > >
> >
> > I felt adding one test for view is good and added it.
> > Attached a new patch for the same.
> > I felt patch is in shape for committer to have a look at this.
>
> The patch tester shows it's failing xmllint ; could you send a fixed patch ?
>
> /usr/bin/xmllint --path . --noout --valid postgres.sgml
> ref/psql-ref.sgml:1189: parser error : Opening and ending tag mismatch: link 
> line 1187 and para
>
> http://cfbot.cputube.org/georgios-kokolatos.html

Thank you for pointing it out!

Please find version 7 attached which hopefully addresses the error along with a 
proper
expansion of the test coverage and removal of recently introduced
whitespace warnings.

>
> --
>
> Justin



0001-Include-access-method-in-listTables-output-v7.patch
Description: Binary data

Re: PATCH: logical_work_mem and logical streaming of large in-progress transactions

2020-08-20 Thread Dilip Kumar

On Wed, Aug 19, 2020 at 1:35 PM Dilip Kumar  wrote:
>
> On Wed, Aug 19, 2020 at 12:20 PM Amit Kapila  wrote:
> >
> > On Wed, Aug 19, 2020 at 10:10 AM Amit Kapila  
> > wrote:
> > >
> > > On Mon, Aug 17, 2020 at 6:29 PM Dilip Kumar  wrote:
> > > >
> > > >
> > > > In last patch v49-0001, there is one issue,  Basically, I have called
> > > > BufFileFlush in all the cases.  But, ideally, we can not call this if
> > > > the underlying files are deleted/truncated because those files/blocks
> > > > might not exist now.  So I think if the truncate position is within
> > > > the same buffer we just need to adjust the buffer,  otherwise we just
> > > > need to set the currFile and currOffset to the absolute number and set
> > > > the pos and nbytes 0.  Attached patch fixes this issue.
> > > >
> > >
> > > Few comments on the latest patch v50-0001-Extend-the-BufFile-interface
> > > 1.
> > > +
> > > + /*
> > > + * If the truncate point is within existing buffer then we can just
> > > + * adjust pos-within-buffer, without flushing buffer.  Otherwise,
> > > + * we don't need to do anything because we have already deleted/truncated
> > > + * the underlying files.
> > > + */
> > > + if (curFile == file->curFile &&
> > > + curOffset >= file->curOffset &&
> > > + curOffset <= file->curOffset + file->nbytes)
> > > + {
> > > + file->pos = (int) (curOffset - file->curOffset);
> > > + return;
> > > + }
> > >
> > > I think in this case you have set the position correctly but what
> > > about file->nbytes? In BufFileSeek, it was okay not to update 'nbytes'
> > > because the contents of the buffer are still valid but I don't think
> > > the same is true here.
> > >
> >
> > I think you need to set 'nbytes' to curOffset as per your current
> > patch as that is the new size of the file.
> > --- a/src/backend/storage/file/buffile.c
> > +++ b/src/backend/storage/file/buffile.c
> > @@ -912,6 +912,7 @@ BufFileTruncateShared(BufFile *file, int fileno,
> > off_t offset)
> > curOffset <= file->curOffset + file->nbytes)
> > {
> > file->pos = (int) (curOffset - file->curOffset);
> > +   file->nbytes = (int) curOffset;
> > return;
> > }
> >
> > Also, what about file 'numFiles', that can also change due to the
> > removal of certain files, shouldn't that be also set in this case
>
> Right, we need to set the numFile.  I will fix this as well.

I think there are a couple of more problems in the truncate APIs,
basically, if the curFile and curOffset are already smaller than the
truncate location the truncate should not change that.  So the
truncate should only change the curFile and curOffset if it is
truncating the part of the file where the curFile or curOffset is
pointing.  I will work on those along with your other comments and
submit the updated patch.

-- 
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com

Re: please update ps display for recovery checkpoint

2020-08-20 Thread Michael Paquier

On Wed, Aug 19, 2020 at 12:20:50AM +, k.jami...@fujitsu.com wrote:
> On Wednesday, August 19, 2020 7:53 AM (GMT+9), Justin Pryzby wrote: 
>> During crash recovery, the server writes this to log:
>> Please change to say "recovery checkpoint" or similar, as I mentioned here.
>> https://www.postgresql.org/message-id/2020011820.GP26045@telsasoft.c
>> om
> 
> Yes, I agree that it is helpful to tell users about that.

That could be helpful.  Wouldn't it be better to use "end-of-recovery
checkpoint" instead?  That's the common wording in the code comments.

I don't see the point of patch 0002.  In the same paragraph, we
already know that this applies to any checkpoints.

> About 0003 patch, there are similar phrases in bgwriter_flush_after and 
> backend_flush_after. Should those be updated too?

Yep, makes sense.
--
Michael


signature.asc
Description: PGP signature

Re: [PG13] Planning (time + buffers) data structure in explain plan (format text)





On 2020/08/20 17:03, Pierre Giraud wrote:

Can you please show what the plan would look like for?

=# explain (buffers on, summary on, format JSON) select * from t;


With my patch, the following is reported in that case.

=# explain (buffers on, summary on, format JSON) select * from pg_class;
 QUERY PLAN

 [ +
   {   +
 "Plan": { +
   "Node Type": "Seq Scan",+
   "Parallel Aware": false,+
   "Relation Name": "pg_class",+
   "Alias": "pg_class",+
   "Startup Cost": 0.00,   +
   "Total Cost": 16.87,+
   "Plan Rows": 387,   +
   "Plan Width": 265,  +
   "Shared Hit Blocks": 0, +
   "Shared Read Blocks": 0,+
   "Shared Dirtied Blocks": 0, +
   "Shared Written Blocks": 0, +
   "Local Hit Blocks": 0,  +
   "Local Read Blocks": 0, +
   "Local Dirtied Blocks": 0,  +
   "Local Written Blocks": 0,  +
   "Temp Read Blocks": 0,  +
   "Temp Written Blocks": 0+
 },+
 "Planning": { +
   "Shared Hit Blocks": 103,   +
   "Shared Read Blocks": 12,   +
   "Shared Dirtied Blocks": 0, +
   "Shared Written Blocks": 0, +
   "Local Hit Blocks": 0,  +
   "Local Read Blocks": 0, +
   "Local Dirtied Blocks": 0,  +
   "Local Written Blocks": 0,  +
   "Temp Read Blocks": 0,  +
   "Temp Written Blocks": 0+
 },+
 "Planning Time": 8.132+
   }   +
 ]
(1 row)

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION

Re: [PG13] Planning (time + buffers) data structure in explain plan (format text)

2020-08-20 Thread Pierre Giraud

Can you please show what the plan would look like for?

=# explain (buffers on, summary on, format JSON) select * from t;



Le 20/08/2020 à 09:58, Fujii Masao a écrit :
> 
> 
> On 2020/08/20 13:00, David Rowley wrote:
>> On Thu, 20 Aug 2020 at 03:31, Fujii Masao
>>  wrote:
>>> With the patch, for example, whatever "summary" settng is, "buffers on"
>>> displays the planner's buffer usage if it happens.
>>
>> I forgot to mention earlier, you'll also need to remove the part in
>> the docs that mentions BUFFERS requires ANALYZE.
> 
> Thanks for the review! I removed that.
> Attached is the updated version of the patch.
> I also added the regression test performing "explain (buffers on)"
> without "analyze" option.
> 
> Regards,
>

Re: [PG13] Planning (time + buffers) data structure in explain plan (format text)




On 2020/08/20 13:00, David Rowley wrote:

On Thu, 20 Aug 2020 at 03:31, Fujii Masao  wrote:

With the patch, for example, whatever "summary" settng is, "buffers on"
displays the planner's buffer usage if it happens.


I forgot to mention earlier, you'll also need to remove the part in
the docs that mentions BUFFERS requires ANALYZE.


Thanks for the review! I removed that.
Attached is the updated version of the patch.
I also added the regression test performing "explain (buffers on)"
without "analyze" option.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION
diff --git a/doc/src/sgml/ref/explain.sgml b/doc/src/sgml/ref/explain.sgml
index 1c19e254dc..906b2ccd50 100644
--- a/doc/src/sgml/ref/explain.sgml
+++ b/doc/src/sgml/ref/explain.sgml
@@ -187,8 +187,7 @@ ROLLBACK;
   query processing.
   The number of blocks shown for an
   upper-level node includes those used by all its child nodes.  In text
-  format, only non-zero values are printed.  This parameter may only be
-  used when ANALYZE is also enabled.  It defaults to
+  format, only non-zero values are printed.  It defaults to
   FALSE.
  
 
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 30e0a7ee7f..c98c9b5547 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -116,7 +116,8 @@ static void show_instrumentation_count(const char *qlabel, 
int which,
 static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static void show_eval_params(Bitmapset *bms_params, ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
-static void show_buffer_usage(ExplainState *es, const BufferUsage *usage);
+static void show_buffer_usage(ExplainState *es, const BufferUsage *usage,
+ bool planning);
 static void show_wal_usage(ExplainState *es, const WalUsage *usage);
 static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,

ExplainState *es);
@@ -221,11 +222,6 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 parser_errposition(pstate, 
opt->location)));
}
 
-   if (es->buffers && !es->analyze)
-   ereport(ERROR,
-   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-errmsg("EXPLAIN option BUFFERS requires 
ANALYZE")));
-
if (es->wal && !es->analyze)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -586,8 +582,13 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, 
ExplainState *es,
/* Create textual dump of plan tree */
ExplainPrintPlan(es, queryDesc);
 
-   if (es->summary && (planduration || bufusage))
+   /* Show buffer usage in planning */
+   if (bufusage)
+   {
ExplainOpenGroup("Planning", "Planning", true, es);
+   show_buffer_usage(es, bufusage, true);
+   ExplainCloseGroup("Planning", "Planning", true, es);
+   }
 
if (es->summary && planduration)
{
@@ -596,19 +597,6 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, 
ExplainState *es,
ExplainPropertyFloat("Planning Time", "ms", 1000.0 * plantime, 
3, es);
}
 
-   /* Show buffer usage */
-   if (es->summary && bufusage)
-   {
-   if (es->format == EXPLAIN_FORMAT_TEXT)
-   es->indent++;
-   show_buffer_usage(es, bufusage);
-   if (es->format == EXPLAIN_FORMAT_TEXT)
-   es->indent--;
-   }
-
-   if (es->summary && (planduration || bufusage))
-   ExplainCloseGroup("Planning", "Planning", true, es);
-
/* Print info about runtime of triggers */
if (es->analyze)
ExplainPrintTriggers(es, queryDesc);
@@ -1996,7 +1984,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 
/* Show buffer/WAL usage */
if (es->buffers && planstate->instrument)
-   show_buffer_usage(es, &planstate->instrument->bufusage);
+   show_buffer_usage(es, &planstate->instrument->bufusage, false);
if (es->wal && planstate->instrument)
show_wal_usage(es, &planstate->instrument->walusage);
 
@@ -2015,7 +2003,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 
ExplainOpenWorker(n, es);
if (es->buffers)
-   show_buffer_usage(es, &instrument->bufusage);
+   show_buffer_usage(es, &instrument->bufusage, 
false);
if (es->wal)
show_wal_usage(es, &instrument->walusage);
ExplainCloseWorker

Re: Asynchronous Append on postgres_fdw nodes.

2020-08-20 Thread Kyotaro Horiguchi

At Wed, 19 Aug 2020 23:25:36 -0500, Justin Pryzby  wrote 
in 
> On Thu, Jul 02, 2020 at 11:14:48AM +0900, Kyotaro Horiguchi wrote:
> > As the result of a discussion with Fujita-san off-list, I'm going to
> > hold off development until he decides whether mine or Thomas' is
> > better.
> 
> The latest patch doesn't apply so I set as WoA.
> https://commitfest.postgresql.org/29/2491/

Thanks. This is rebased version.

At Fri, 14 Aug 2020 13:29:16 +1200, Thomas Munro  wrote 
in 
> Either way, we definitely need patch 0001.  One comment:
> 
> -CreateWaitEventSet(MemoryContext context, int nevents)
> +CreateWaitEventSet(MemoryContext context, ResourceOwner res, int nevents)
> 
> I wonder if it's better to have it receive ResourceOwner like that, or
> to have it capture CurrentResourceOwner.  I think the latter is more
> common in existing code.

There's no existing WaitEventSets belonging to a resowner. So
unconditionally capturing CurrentResourceOwner doesn't work well. I
could pass a bool instead but that make things more complex.

Come to think of "complex", ExecAsync stuff in this patch might be
too-much for a short-term solution until executor overhaul, if it
comes shortly. (the patch of mine here as a whole is like that,
though..). The queueing stuff in postgres_fdw is, too.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
>From 18176c9caa856c707ef6e8ab64bfc7f8abd9aea6 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi 
Date: Mon, 22 May 2017 12:42:58 +0900
Subject: [PATCH v6 1/3] Allow wait event set to be registered to resource
 owner

WaitEventSet needs to be released using resource owner for a certain
case. This change adds WaitEventSet reowner and allow the creator of a
WaitEventSet to specify a resource owner.
---
 src/backend/libpq/pqcomm.c|  2 +-
 src/backend/postmaster/pgstat.c   |  2 +-
 src/backend/postmaster/syslogger.c|  2 +-
 src/backend/storage/ipc/latch.c   | 20 ++--
 src/backend/utils/resowner/resowner.c | 67 +++
 src/include/storage/latch.h   |  4 +-
 src/include/utils/resowner_private.h  |  8 
 7 files changed, 98 insertions(+), 7 deletions(-)

diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c
index ac986c0505..799fa5006d 100644
--- a/src/backend/libpq/pqcomm.c
+++ b/src/backend/libpq/pqcomm.c
@@ -218,7 +218,7 @@ pq_init(void)
 (errmsg("could not set socket to nonblocking mode: %m")));
 #endif
 
-	FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, 3);
+	FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, NULL, 3);
 	AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE, MyProcPort->sock,
 	  NULL, NULL);
 	AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, -1, MyLatch, NULL);
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 73ce944fb1..9d6b3778b4 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -4488,7 +4488,7 @@ PgstatCollectorMain(int argc, char *argv[])
 	pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
 
 	/* Prepare to wait for our latch or data in our socket. */
-	wes = CreateWaitEventSet(CurrentMemoryContext, 3);
+	wes = CreateWaitEventSet(CurrentMemoryContext, NULL, 3);
 	AddWaitEventToSet(wes, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL);
 	AddWaitEventToSet(wes, WL_POSTMASTER_DEATH, PGINVALID_SOCKET, NULL, NULL);
 	AddWaitEventToSet(wes, WL_SOCKET_READABLE, pgStatSock, NULL, NULL);
diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c
index ffcb54968f..a4de6d90e2 100644
--- a/src/backend/postmaster/syslogger.c
+++ b/src/backend/postmaster/syslogger.c
@@ -300,7 +300,7 @@ SysLoggerMain(int argc, char *argv[])
 	 * syslog pipe, which implies that all other backends have exited
 	 * (including the postmaster).
 	 */
-	wes = CreateWaitEventSet(CurrentMemoryContext, 2);
+	wes = CreateWaitEventSet(CurrentMemoryContext, NULL, 2);
 	AddWaitEventToSet(wes, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL);
 #ifndef WIN32
 	AddWaitEventToSet(wes, WL_SOCKET_READABLE, syslogPipe[0], NULL, NULL);
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 4153cc8557..e771ac9610 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -57,6 +57,7 @@
 #include "storage/pmsignal.h"
 #include "storage/shmem.h"
 #include "utils/memutils.h"
+#include "utils/resowner_private.h"
 
 /*
  * Select the fd readiness primitive to use. Normally the "most modern"
@@ -85,6 +86,8 @@ struct WaitEventSet
 	int			nevents;		/* number of registered events */
 	int			nevents_space;	/* maximum number of events in this set */
 
+	ResourceOwner	resowner;	/* Resource owner */
+
 	/*
 	 * Array, of nevents_space length, storing the definition of events this
 	 * set is waiting for.
@@ -257,7 +260,7 @@ InitializeLatchWaitSet(void)
 	Assert(LatchWaitSet == NULL);
 
 	/* Set up the WaitEventSet used by WaitLatch(). */
-	LatchWaitSet = CreateWaitEventSet(TopMemor

Re: display offset along with block number in vacuum errors