Re: [HACKERS] Parallel Plans and Cost of non-filter functions

2017-11-04 Thread Amit Kapila
On Sat, Nov 4, 2017 at 4:43 AM, Tom Lane  wrote:
> Paul Ramsey  writes:
>>> Whether I get a parallel aggregate seems entirely determined by the number
>>> of rows, not the cost of preparing those rows.
>
>> This is true, as far as I can tell and unfortunate. Feeding tables with
>> 100ks of rows, I get parallel plans, feeding 10ks of rows, never do, no
>> matter how costly the work going on within. That's true of changing costs
>> on the subquery select list, and on the aggregate transfn.
>
> This sounds like it might be the same issue being discussed in
>
> https://www.postgresql.org/message-id/flat/CAMkU=1ycXNipvhWuweUVpKuyu6SpNjF=yhwu4c4us5jgvgx...@mail.gmail.com
>

I have rebased the patch being discussed on that thread.

Paul, you might want to once check with the recent patch [1] posted on
the thread mentioned by Tom.

[1] - 
https://www.postgresql.org/message-id/CAA4eK1%2B1H5Urm0_Wp-n5XszdLX1YXBqS_zW0f-vvWKwdh3eCJA%40mail.gmail.com

-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] why not parallel seq scan for slow functions

2017-11-04 Thread Amit Kapila
On Thu, Sep 21, 2017 at 2:35 AM, Jeff Janes  wrote:
> On Tue, Sep 19, 2017 at 9:15 PM, Amit Kapila 
> wrote:
>>
>> On Wed, Sep 20, 2017 at 3:05 AM, Jeff Janes  wrote:
>> > On Tue, Sep 19, 2017 at 1:17 PM, Thomas Munro
>> >  wrote:
>> >>
>> >> On Thu, Sep 14, 2017 at 3:19 PM, Amit Kapila 
>> >> wrote:
>> >> > The attached patch fixes both the review comments as discussed above.
>> >
>> >
>> > that should be fixed by turning costs on the explain, as is the
>> > tradition.
>> >
>>
>> Right.  BTW, did you get a chance to run the original test (for which
>> you have reported the problem) with this patch?
>
>
> Yes, this patch makes it use a parallel scan, with great improvement.
>

Thanks for the confirmation.  Find rebased patch attached.


-- 
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com


parallel_paths_include_tlist_cost_v5.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] possible encoding issues with libxml2 functions

2017-11-04 Thread Noah Misch
On Tue, Oct 17, 2017 at 06:06:40AM +0200, Pavel Stehule wrote:
> Please, if you can, try it write. I am little bit lost :)

I'm attaching the patch I desired.  Please review.  This will probably miss
this week's minor releases.  If there's significant support, I could instead
push before the wrap.
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 24229c2..3e3aed8 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -3845,6 +3845,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, 
ArrayType *namespaces,
int32   xpath_len;
xmlChar*string;
xmlChar*xpath_expr;
+   size_t  xmldecl_len = 0;
int i;
int ndim;
Datum  *ns_names_uris;
@@ -3900,6 +3901,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, 
ArrayType *namespaces,
string = pg_xmlCharStrndup(datastr, len);
xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
 
+   /*
+* In a UTF8 database, skip any xml declaration, which might assert
+* another encoding.  Ignore parse_xml_decl() failure, letting
+* xmlCtxtReadMemory() report parse errors.  Documentation disclaims
+* xpath() support for non-ASCII data in non-UTF8 databases, so leave
+* those scenarios bug-compatible with historical behavior.
+*/
+   if (GetDatabaseEncoding() == PG_UTF8)
+   parse_xml_decl(string, _len, NULL, NULL, NULL);
+
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
PG_TRY();
@@ -3914,7 +3925,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, 
ArrayType *namespaces,
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser 
context");
-   doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 
0);
+   doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
+   len - 
xmldecl_len, NULL, NULL, 0);
if (doc == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, 
ERRCODE_INVALID_XML_DOCUMENT,
"could not parse XML document");
diff --git a/src/test/regress/expected/xml.out 
b/src/test/regress/expected/xml.out
index bcc585d..f7a8c38 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -670,6 +670,37 @@ SELECT xpath('/nosuchtag', '');
  {}
 (1 row)
 
+-- Round-trip non-ASCII data through xpath().
+DO $$
+DECLARE
+  xml_declaration text := '';
+  degree_symbol text;
+  res xml[];
+BEGIN
+  -- Per the documentation, xpath() doesn't work on non-ASCII data when
+  -- the server encoding is not UTF8.  The EXCEPTION block below,
+  -- currently dead code, will be relevant if we remove this limitation.
+  IF current_setting('server_encoding') <> 'UTF8' THEN
+RAISE LOG 'skip: encoding % unsupported for xml',
+  current_setting('server_encoding');
+RETURN;
+  END IF;
+
+  degree_symbol := convert_from('\xc2b0', 'UTF8');
+  res := xpath('text()', (xml_declaration ||
+'' || degree_symbol || '')::xml);
+  IF degree_symbol <> res[1]::text THEN
+RAISE 'expected % (%), got % (%)',
+  degree_symbol, convert_to(degree_symbol, 'UTF8'),
+  res[1], convert_to(res[1]::text, 'UTF8');
+  END IF;
+EXCEPTION
+  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no 
equivalent in encoding "LATIN8"
+  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
+  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does 
not exist
+  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
+END
+$$;
 -- Test xmlexists and xpath_exists
 SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF 
'Bidford-on-AvonCwmbranBristol');
  xmlexists 
diff --git a/src/test/regress/expected/xml_1.out 
b/src/test/regress/expected/xml_1.out
index d3bd8c9..1a9efa2 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -576,6 +576,41 @@ LINE 1: SELECT xpath('/nosuchtag', '');
^
 DETAIL:  This functionality requires the server to be built with libxml 
support.
 HINT:  You need to rebuild PostgreSQL using --with-libxml.
+-- Round-trip non-ASCII data through xpath().
+DO $$
+DECLARE
+  xml_declaration text := '';
+  degree_symbol text;
+  res xml[];
+BEGIN
+  -- Per the documentation, xpath() doesn't work on non-ASCII data when
+  -- the server encoding is not UTF8.  The EXCEPTION block below,
+  -- currently dead code, will be relevant if we remove this limitation.
+  IF current_setting('server_encoding') <> 'UTF8' THEN
+RAISE LOG 'skip: encoding % unsupported for xml',
+  

Re: [HACKERS] [POC] Faster processing at Gather node

2017-11-04 Thread Andres Freund
On 2017-11-05 01:05:59 +0100, Robert Haas wrote:
> skip-gather-project-v1.patch does what it says on the tin.  I still
> don't have a test case for this, and I didn't find that it helped very
> much, but it would probably help more in a test case with more
> columns, and you said this looked like a big bottleneck in your
> testing, so here you go.

The query where that showed a big benefit was

SELECT * FROM lineitem WHERE l_suppkey > '5012' OFFSET 10 LIMIT 1;

(i.e a not very selective filter, and then just throwing the results away)

still shows quite massive benefits:

before:
set parallel_setup_cost=0;set parallel_tuple_cost=0;set 
min_parallel_table_scan_size=0;set max_parallel_workers_per_gather=8;
tpch_5[17938][1]=# explain analyze SELECT * FROM lineitem WHERE l_suppkey > 
'5012' OFFSET 10 LIMIT 1;
┌
│ QUERY PLAN
├
│ Limit  (cost=635802.67..635802.69 rows=1 width=127) (actual 
time=8675.097..8675.097 rows=0 loops=1)
│   ->  Gather  (cost=0.00..635802.67 rows=27003243 width=127) (actual 
time=0.289..7904.849 rows=26989780 loops=1)
│ Workers Planned: 8
│ Workers Launched: 7
│ ->  Parallel Seq Scan on lineitem  (cost=0.00..635802.67 rows=3375405 
width=127) (actual time=0.124..528.667 rows=3373722 loops=8)
│   Filter: (l_suppkey > 5012)
│   Rows Removed by Filter: 376252
│ Planning time: 0.098 ms
│ Execution time: 8676.125 ms
└
(9 rows)
after:
tpch_5[19754][1]=# EXPLAIN ANALYZE SELECT * FROM lineitem WHERE l_suppkey > 
'5012' OFFSET 10 LIMIT 1;
┌
│ QUERY PLAN
├
│ Limit  (cost=635802.67..635802.69 rows=1 width=127) (actual 
time=5984.916..5984.916 rows=0 loops=1)
│   ->  Gather  (cost=0.00..635802.67 rows=27003243 width=127) (actual 
time=0.214..5123.238 rows=26989780 loops=1)
│ Workers Planned: 8
│ Workers Launched: 7
│ ->  Parallel Seq Scan on lineitem  (cost=0.00..635802.67 rows=3375405 
width=127) (actual time=0.025..649.887 rows=3373722 loops=8)
│   Filter: (l_suppkey > 5012)
│   Rows Removed by Filter: 376252
│ Planning time: 0.076 ms
│ Execution time: 5986.171 ms
└
(9 rows)

so there clearly is still benefit (this is scale 100, but that shouldn't
make much of a difference).

Did not review the code.

> shm-mq-reduce-receiver-latch-set-v1.patch causes the receiver to only
> consume input from the shared queue when the amount of unconsumed
> input exceeds 1/4 of the queue size.  This caused a large performance
> improvement in my testing because it causes the number of times the
> latch gets set to drop dramatically. I experimented a bit with
> thresholds of 1/8 and 1/2 before setting on 1/4; 1/4 seems to be
> enough to capture most of the benefit.

Hm. Is consuming the relevant part, or notifying the sender about it?  I
suspect most of the benefit can be captured by updating bytes read (and
similarly on the other side w/ bytes written), but not setting the latch
unless thresholds are reached.  The advantage of updating the value,
even without notifying the other side, is that in the common case that
the other side gets around to checking the queue without having blocked,
it'll see the updated value.  If that works, that'd address the issue
that we might wait unnecessarily in a number of common cases.

Did not review the code.

> remove-memory-leak-protection-v1.patch removes the memory leak
> protection that Tom installed upon discovering that the original
> version of tqueue.c leaked memory like crazy.  I think that it
> shouldn't do that any more, courtesy of
> 6b65a7fe62e129d5c2b85cd74d6a91d8f7564608.  Assuming that's correct, we
> can avoid a whole lot of tuple copying in Gather Merge and a much more
> modest amount of overhead in Gather.

Yup, that conceptually makes sense.

Did not review the code.


> Even with all of these patches applied, there's clearly still room for
> more optimization, but MacOS's "sample" profiler seems to show that
> the bottlenecks are largely shifting elsewhere:
>
> Sort by top of stack, same 

Re: [HACKERS] [POC] Faster processing at Gather node

2017-11-04 Thread Robert Haas
On Sat, Nov 4, 2017 at 5:55 PM, Andres Freund  wrote:
>> master: 21436.745, 20978.355, 19918.617
>> patch: 15896.573, 15880.652, 15967.176
>>
>> Median-to-median, that's about a 24% improvement.
>
> Neat!

With the attached stack of 4 patches, I get: 10811.768 ms, 10743.424
ms, 10632.006 ms, about a 49% improvement median-to-median.  Haven't
tried it on hydra or any other test cases yet.

skip-gather-project-v1.patch does what it says on the tin.  I still
don't have a test case for this, and I didn't find that it helped very
much, but it would probably help more in a test case with more
columns, and you said this looked like a big bottleneck in your
testing, so here you go.

shm-mq-less-spinlocks-v2.patch is updated from the version I posted
before based on your review comments.  I don't think it's really
necessary to mention that the 8-byte atomics have fallbacks here;
whatever needs to be said about that should be said in some central
place that talks about atomics, not in each user individually.  I
agree that there might be some further speedups possible by caching
some things in local memory, but I haven't experimented with that.

shm-mq-reduce-receiver-latch-set-v1.patch causes the receiver to only
consume input from the shared queue when the amount of unconsumed
input exceeds 1/4 of the queue size.  This caused a large performance
improvement in my testing because it causes the number of times the
latch gets set to drop dramatically. I experimented a bit with
thresholds of 1/8 and 1/2 before setting on 1/4; 1/4 seems to be
enough to capture most of the benefit.

remove-memory-leak-protection-v1.patch removes the memory leak
protection that Tom installed upon discovering that the original
version of tqueue.c leaked memory like crazy.  I think that it
shouldn't do that any more, courtesy of
6b65a7fe62e129d5c2b85cd74d6a91d8f7564608.  Assuming that's correct, we
can avoid a whole lot of tuple copying in Gather Merge and a much more
modest amount of overhead in Gather.  Since my test case exercised
Gather Merge, this bought ~400 ms or so.

Even with all of these patches applied, there's clearly still room for
more optimization, but MacOS's "sample" profiler seems to show that
the bottlenecks are largely shifting elsewhere:

Sort by top of stack, same collapsed (when >= 5):
slot_getattr  (in postgres)706
slot_deform_tuple  (in postgres)560
ExecAgg  (in postgres)378
ExecInterpExpr  (in postgres)372
AllocSetAlloc  (in postgres)319
_platform_memmove$VARIANT$Haswell  (in
libsystem_platform.dylib)314
read  (in libsystem_kernel.dylib)303
heap_compare_slots  (in postgres)296
combine_aggregates  (in postgres)273
shm_mq_receive_bytes  (in postgres)272

I'm probably not super-excited about spending too much more time
trying to make the _platform_memmove time (only 20% or so of which
seems to be due to the shm_mq stuff) or the shm_mq_receive_bytes time
go down until, say, somebody JIT's slot_getattr and slot_deform_tuple.
:-)

One thing that might be worth doing is hammering on the AllocSetAlloc
time.  I think that's largely caused by allocating space for heap
tuples and then freeing them and allocating space for new heap tuples.
Gather/Gather Merge are guilty of that, but I think there may be other
places in the executor with the same issue. Maybe we could have
fixed-size buffers for small tuples that just get reused and only
palloc for large tuples (cf. SLAB_SLOT_SIZE).

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


skip-gather-project-v1.patch
Description: Binary data


shm-mq-less-spinlocks-v2.patch
Description: Binary data


shm-mq-reduce-receiver-latch-set-v1.patch
Description: Binary data


remove-memory-leak-protection-v1.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Small improvement to compactify_tuples

2017-11-04 Thread Peter Geoghegan

Юрий Соколов  wrote:

tps is also reflects changes:
~17ktps with qsort
~19ktps with bucket sort

Also vacuum of benchmark's table is also improved:
~3s with qsort,
~2.4s with bucket sort


One thing that you have to be careful with when it comes to our qsort
with partially presored inputs is what I like to call "banana skin
effects":

https://postgr.es/m/cah2-wzku2xk2dpz7n8-a1mvuuttuvhqkfna+eutwnwctgyc...@mail.gmail.com

This may have nothing at all to do with your results; I'm just pointing
it out as a possibility.

--
Peter Geoghegan


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Small improvement to compactify_tuples

2017-11-04 Thread Юрий Соколов
2017-11-03 5:46 GMT+03:00 Tom Lane :
>
> Sokolov Yura  writes:
> > [ 0001-Improve-compactify_tuples.patch, v5 or thereabouts ]
>
> I went to check the shellsort algorithm against Wikipedia's entry,
> and found that this appears to be an incorrect implementation of
> shellsort: where pg_shell_sort_pass has
>
> for (_i = off; _i < _n; _i += off) \
>
> it seems to me that we need to have
>
> for (_i = off; _i < _n; _i += 1) \
>
> or maybe just _i++.


Shame on me :-(
I've wrote shell sort several times, so I forgot to recheck myself once
again.
And looks like best gap sequence from wikipedia is really best
( {301, 132, 57, 23, 10 , 4} in my notation),


2017-11-03 17:37 GMT+03:00 Claudio Freire :
> On Thu, Nov 2, 2017 at 11:46 PM, Tom Lane  wrote:
>> BTW, the originally given test case shows no measurable improvement
>> on my box.
>
> I did manage to reproduce the original test and got a consistent
improvement.

I've rechecked my self using my benchmark.
Without memmove, compactify_tuples comsumes:
- with qsort 11.66% cpu (pg_qsort + med3 + swapfunc + itemoffcompare +
compactify_tuples = 5.97 + 0.51 + 2.87 + 1.88 + 0.44)
- with just insertion sort 6.65% cpu (sort is inlined, itemoffcompare also
inlined, so whole is compactify_tuples)
- with just shell sort 5,98% cpu (sort is inlined again)
- with bucket sort 1,76% cpu (sort_itemIds + compactify_tuples = 1.30 +
0.46)

(memmove consumes 1.29% cpu)

tps is also reflects changes:
~17ktps with qsort
~19ktps with bucket sort

Also vacuum of benchmark's table is also improved:
~3s with qsort,
~2.4s with bucket sort

Of course, this benchmark is quite synthetic: table is unlogged, and tuple
is small,
and synchronous commit is off. Though, such table is still useful in some
situations
(think of not-too-important, but useful counters, like "photo watch count").
And patch affects not only this synthetic benchmark. It affects restore
performance,
as Heikki mentioned, and cpu consumption of Vacuum (though vacuum is more io
bound).

> I think we should remove pg_shell_sort and just use pg_insertion_sort.

Using shell sort is just a bit safer. Doubtfully worst pattern (for
insertion sort) will
appear, but what if? Shellsort is a bit better on whole array (5.98% vs
6.65%).
Though on small array difference will be much smaller.

With regards,
Sokolov Yura aka funny_falcon


[HACKERS] Release notes for next week's minor releases are up for review

2017-11-04 Thread Tom Lane
... at
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=42de8a0255c2509bf179205e94b9d65f9d6f3cf9

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Support to COMMENT ON DATABASE CURRENT_DATABASE

2017-11-04 Thread Bossart, Nathan
On 10/5/17, 11:53 PM, "Jing Wang"  wrote:
> The patch has been updated according to Nathan's comments. 
> Thanks Nathan's review.

Thanks for the new versions of the patches.  I apologize for
the long delay for this new review.

It looks like the no-pgdump patch needs a rebase at this point.
I was able to apply the code portions with a 3-way merge, but
the documentation changes still did not apply.  I didn't have
any problems applying the pgdump patch.

+
+ 
+  The name of a database or keyword current_database. When using 
+  current_database,it means using the name of the connecting database.
+ 
+

For commands that accept the CURRENT_USER and SESSION_USER
keywords, the keywords are typically listed in the 'Synopsis'
section.  I think CURRENT_DATABASE should be no different.  For
example, the parameter type above could be
"database_specification," and the following definition could be
included at the bottom of the synopsis:

where database_specification can be:

object_name
  | CURRENT_DATABASE

Then, in the parameters section, the CURRENT_DATABASE keyword
would be defined:

CURRENT_DATABASE

Comment on the current database instead of an
explicitly identified role.

Also, it looks like only the COMMENT and SECURITY LABEL
documentation is being updated in this patch set.  However, this
keyword seems applicable to many other commands, too (e.g.
GRANT, ALTER DATABASE, ALTER ROLE, etc.).

+static ObjectAddress
+get_object_address_database(ObjectType objtype, DbSpec *object, bool 
missing_ok)
+{
+   char*dbname;
+   ObjectAddress   address;
+
+   dbname = get_dbspec_name(object);
+
+   address.classId = DatabaseRelationId;
+   address.objectId = get_database_oid(dbname, missing_ok);
+   address.objectSubId = 0;
+
+   return address;
+}

This helper function is only used once, and it seems simple
enough to build the ObjectAddress in the switch statement.
Also, instead of get_database_oid(), could we use
get_dbspec_oid()?

if (stmt->objtype == OBJECT_DATABASE)
{
-   char   *database = strVal((Value *) stmt->object);
-
-   if (!OidIsValid(get_database_oid(database, true)))
+   if (!OidIsValid(get_dbspec_oid((DbSpec*)stmt->object, true)))
{
+   char*dbname = NULL;
+
+   dbname = get_dbspec_name((DbSpec*)stmt->object);
+
ereport(WARNING,
(errcode(ERRCODE_UNDEFINED_DATABASE),
-errmsg("database \"%s\" does not 
exist", database)));
+errmsg("database \"%s\" does not 
exist", dbname)));
return address;
}
}

This section seems to assume that the DbSpec will be of type
DBSPEC_CSTRING in the error handling.  That should be safe for
now, as you cannot drop the current database, but I would
suggest adding assertions here to be sure.

+   dbname = get_dbspec_name((DbSpec*)stmt->dbspec);

As a general note, casts are typically styled as "(DbSpec *)
stmt" (with the spaces) in PostgreSQL.

+   case DBSPEC_CURRENT_DATABASE:
+   {
+   HeapTuple   tuple;
+   Form_pg_database dbForm;

Can we just declare "tuple" and "dbForm" at the beginning of
get_dbspec_name() so we don't need the extra set of braces?

+   if (fout->remoteVersion >= 10)
+   appendPQExpBuffer(dbQry, "COMMENT ON DATABASE 
CURRENT_DATABASE IS ");
+   else
+   appendPQExpBuffer(dbQry, "COMMENT ON DATABASE 
%s IS ", fmtId(datname));

This feature would probably only be added to v11, so the version
checks in the pgdump patch will need to be updated.

Nathan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Add some const decorations to prototypes

2017-11-04 Thread Fabien COELHO



Just leave it as char*.  If you change the endptr argument you're going to
force every call site to change their return variable, and some of them
would end up having to cast away the const on their end.


OK, here is an updated patch with the controversial bits removed.


I'm in general favor in helping compilers, but if you have to cheat.

ISTM That there is still at least one strange cast:

 +static const char **LWLockTrancheArray = NULL;
 +   LWLockTrancheArray = (const char **) // twice

Maybe some function should return a "const char **", or the const is not 
really justified?


--
Fabien.


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] Display number of heap accesses for index scans

2017-11-04 Thread Andres Freund
Hi,

right now it's hard to figure out whether a plain indexscan returns
matches that then get eliminated due to visibility checks in the
heap. For both index only scans (via "Heap Fetches") and bitmapscans
(via row mismatches between bitmap heap and index scans) one can gather
that to some degree from explain analyze.

The number of index lookups that failed to return anything can be a
critical performance factor in OLTP workloads.  Therefore it seems like
it'd be a good idea to extend the explain analyze output to include that
information.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] A hook for session start

2017-11-04 Thread Fabrízio de Royes Mello
On Sat, Nov 4, 2017 at 1:23 AM, Michael Paquier 
wrote:
>
> On Fri, Nov 3, 2017 at 1:55 PM, Fabrízio de Royes Mello
>  wrote:
> >> Passing the database name and user name does not look much useful to
> >> me. You can have access to this data already with CurrentUserId and
> >> MyDatabaseId.
> >
> > This way we don't need to convert oid to names inside hook code.
>
> Well, arguments of hooks are useful if they are used. Now if I look at
> the two examples mainly proposed in this thread, be it in your set of
> patches or the possibility to do some SQL transaction, I see nothing
> using them. So I'd vote for keeping an interface minimal.
>

Maybe the attached patch wit improved test module can illustrate better the
feature.

Regards,

--
Fabrízio de Royes Mello
Consultoria/Coaching PostgreSQL
>> Timbira: http://www.timbira.com.br
>> Blog: http://fabriziomello.github.io
>> Linkedin: http://br.linkedin.com/in/fabriziomello
>> Twitter: http://twitter.com/fabriziomello
>> Github: http://github.com/fabriziomello
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2c7260e..24346fb 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -169,6 +169,9 @@ static ProcSignalReason RecoveryConflictReason;
 static MemoryContext row_description_context = NULL;
 static StringInfoData row_description_buf;
 
+/* Hook for plugins to get control at start of session */
+session_start_hook_type session_start_hook = NULL;
+
 /* 
  *		decls for routines only used in this file
  * 
@@ -3857,6 +3860,9 @@ PostgresMain(int argc, char *argv[],
 	if (!IsUnderPostmaster)
 		PgStartTime = GetCurrentTimestamp();
 
+	if (session_start_hook)
+		(*session_start_hook) (dbname, username);
+
 	/*
 	 * POSTGRES main processing loop begins here
 	 *
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index f8c535c..d349592 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -35,6 +35,11 @@ extern PGDLLIMPORT const char *debug_query_string;
 extern int	max_stack_depth;
 extern int	PostAuthDelay;
 
+/* Hook for plugins to get control at start of session */
+typedef void (*session_start_hook_type) (const char *dbname,
+		 const char *username);
+extern PGDLLIMPORT session_start_hook_type session_start_hook;
+
 /* GUC-configurable parameters */
 
 typedef enum
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 20f1d27..029eeb4 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -76,6 +76,8 @@ static bool ThereIsAtLeastOneRole(void);
 static void process_startup_options(Port *port, bool am_superuser);
 static void process_settings(Oid databaseid, Oid roleid);
 
+/* Hook for plugins to get control at end of session */
+session_end_hook_type session_end_hook = NULL;
 
 /*** InitPostgres support ***/
 
@@ -1154,6 +1156,16 @@ ShutdownPostgres(int code, Datum arg)
 	 * them explicitly.
 	 */
 	LockReleaseAll(USER_LOCKMETHOD, true);
+
+	/* Hook just normal backends */
+	if (session_end_hook && MyBackendId != InvalidBackendId)
+	{
+		(*session_end_hook) (MyProcPort->database_name, MyProcPort->user_name);
+
+		/* Make don't leave any active transactions and/or locks behind */
+		AbortOutOfAnyTransaction();
+		LockReleaseAll(USER_LOCKMETHOD, true);
+	}
 }
 
 
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index d349592..b7fb8c3 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -35,10 +35,14 @@ extern PGDLLIMPORT const char *debug_query_string;
 extern int	max_stack_depth;
 extern int	PostAuthDelay;
 
-/* Hook for plugins to get control at start of session */
+/* Hook for plugins to get control at start and end of session */
 typedef void (*session_start_hook_type) (const char *dbname,
 		 const char *username);
+typedef void (*session_end_hook_type) (const char *dbname,
+	   const char *username);
+
 extern PGDLLIMPORT session_start_hook_type session_start_hook;
+extern PGDLLIMPORT session_end_hook_type session_end_hook;
 
 /* GUC-configurable parameters */
 
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index b7ed0af..a3c8c1e 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -11,6 +11,7 @@ SUBDIRS = \
 		  snapshot_too_old \
 		  test_ddl_deparse \
 		  test_extensions \
+		  test_session_hooks \
 		  test_parser \
 		  test_pg_dump \
 		  test_rbtree \
diff --git a/src/test/modules/test_session_hooks/.gitignore b/src/test/modules/test_session_hooks/.gitignore
new file mode 100644
index 000..5dcb3ff
--- /dev/null
+++ b/src/test/modules/test_session_hooks/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/test_session_hooks/Makefile 

Re: [HACKERS] [COMMITTERS] pgsql: Fix freezing of a dead HOT-updated tuple

2017-11-04 Thread Andres Freund
On 2017-11-04 06:15:00 -0700, Andres Freund wrote:
> The current testcase, and I think the descriptions in the relevant
> threads, all actually fail to point out the precise way the bug is
> triggered.  As e.g. evidenced that the freeze-the-dead case appears to
> not cause any failures in !assertion builds even if the bug is present.

Trying to write up tests reproducing more of the issues in the area, I
think I might have found a third issue - although I'm not sure how
practically relevant it is:

When FreezeMultiXactId() decides it needs to create a new multi because
the old one is below the cutoff, that attempt can be defeated by the
multixact id caching. If the new multi has exactly the same members the
multixact id cache will just return the existing multi with the same
members. The cache will routinely be primed from the lookup of its
members.

I'm not yet sure how easily this can be hit in practice, because
commonly the multixact horizon should prevent a multi with all its
members living from being below the horizon. I found a situation where
that's not the case with the current bug, but I'm not sif that can
happen otherwise.

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [POC] Faster processing at Gather node

2017-11-04 Thread Andres Freund
Hi,

On 2017-11-04 16:38:31 +0530, Robert Haas wrote:
> On hydra (PPC), these changes didn't help much.  Timings:
>
> master: 29605.582, 29753.417, 30160.485
> patch: 28218.396, 27986.951, 26465.584
>
> That's about a 5-6% improvement.  On my MacBook, though, the
> improvement was quite a bit more:

Hm. I wonder why that is. Random unverified theories (this plane doesn't
have power supplies for us mere mortals in coach, therefore I'm not
going to run benchmarks):

- Due to the lower per-core performance the leader backend is so
  bottlenecked that there's just not a whole lot of
  contention. Therefore removing the lock doesn't help much. That might
  be a bit different if the redundant projection is removed.
- IO performance on hydra is notoriously bad so there might just not be
  enough data available for workers to process rows fast enough to cause
  contention.

> master: 21436.745, 20978.355, 19918.617
> patch: 15896.573, 15880.652, 15967.176
>
> Median-to-median, that's about a 24% improvement.

Neat!


> - * mq_detached can be set by either the sender or the receiver, so the mutex
> - * must be held to read or write it.  Memory barriers could be used here as
> - * well, if needed.
> + * mq_bytes_read and mq_bytes_written are not protected by the mutex.  
> Instead,
> + * they are written atomically using 8 byte loads and stores.  Memory 
> barriers
> + * must be carefully used to synchronize reads and writes of these values 
> with
> + * reads and writes of the actual data in mq_ring.

Maybe mention that there's a fallback for ancient platforms?


> @@ -900,15 +921,12 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, 
> const void *data,
>   }
>   else if (available == 0)
>   {
> - shm_mq_result res;
> -
> - /* Let the receiver know that we need them to read some 
> data. */
> - res = shm_mq_notify_receiver(mq);
> - if (res != SHM_MQ_SUCCESS)
> - {
> - *bytes_written = sent;
> - return res;
> - }
> + /*
> +  * Since mq->mqh_counterparty_attached is known to be 
> true at this
> +  * point, mq_receiver has been set, and it can't change 
> once set.
> +  * Therefore, we can read it without acquiring the 
> spinlock.
> +  */
> + SetLatch(>mq_receiver->procLatch);

Might not hurt to assert mqh_counterparty_attached, just for slightly
easier debugging.

> @@ -983,19 +1009,27 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, 
> bool nowait,
>   for (;;)
>   {
>   Sizeoffset;
> - booldetached;
> + uint64  read;
>
>   /* Get bytes written, so we can compute what's available to 
> read. */
> - written = shm_mq_get_bytes_written(mq, );
> - used = written - mq->mq_bytes_read;
> + written = pg_atomic_read_u64(>mq_bytes_written);
> + read = pg_atomic_read_u64(>mq_bytes_read);

Theoretically we don't actually need to re-read this from shared memory,
we could just have the information in the local memory too. Right?
Doubtful however that it's important enough to bother given that we've
to move the cacheline for `mq_bytes_written` anyway, and will later also
dirty it to *update* `mq_bytes_read`.  Similarly on the write side.


> -/*
>   * Increment the number of bytes read.
>   */
>  static void
> @@ -1157,63 +1164,51 @@ shm_mq_inc_bytes_read(volatile shm_mq *mq, Size n)
>  {
>   PGPROC *sender;
>
> - SpinLockAcquire(>mq_mutex);
> - mq->mq_bytes_read += n;
> + /*
> +  * Separate prior reads of mq_ring from the increment of mq_bytes_read
> +  * which follows.  Pairs with the full barrier in shm_mq_send_bytes().
> +  * We only need a read barrier here because the increment of 
> mq_bytes_read
> +  * is actually a read followed by a dependent write.
> +  */
> + pg_read_barrier();
> +
> + /*
> +  * There's no need to use pg_atomic_fetch_add_u64 here, because nobody
> +  * else can be changing this value.  This method avoids taking the bus
> +  * lock unnecessarily.
> +  */

s/the bus lock/a bus lock/?  Might also be worth rephrasing away from
bus locks - there's a lot of different ways atomics are implemented.

>  /*
> - * Get the number of bytes written.  The sender need not use this to access
> - * the count of bytes written, but the receiver must.
> - */
> -static uint64
> -shm_mq_get_bytes_written(volatile shm_mq *mq, bool *detached)
> -{
> - uint64  v;
> -
> - SpinLockAcquire(>mq_mutex);
> - v = mq->mq_bytes_written;
> - *detached = mq->mq_detached;
> - SpinLockRelease(>mq_mutex);
> -
> - return v;
> -}

You reference this function in a comment 

Re: [HACKERS] [COMMITTERS] pgsql: Account for catalog snapshot in PGXACT->xmin updates.

2017-11-04 Thread Tom Lane
Noah Misch  writes:
> I plan to use the attached patch after the minor release tags land.  If
> there's significant support, I could instead push before the wrap.

This looks fine to me --- I think you should push now.

(which reminds me, I'd better get on with making release notes.)

regards, tom lane


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] taking stdbool.h into use

2017-11-04 Thread Peter Eisentraut
On 10/29/17 08:50, Michael Paquier wrote:
> On Thu, Oct 26, 2017 at 9:25 AM, Peter Eisentraut
>  wrote:
>> Here is an updated patch set.  This is just a rebase of the previous
>> set, no substantial changes.  Based on the discussion so far, I'm
>> proposing that 0001 through 0007 could be ready to commit after review,
>> whereas the remaining two need more work at some later time.
> 
> I had a look at this patch series. Patches 1, 2 (macos headers indeed
> show that NSUNLINKMODULE_OPTION_NONE is set to 0x0), 3 to 7 look fine
> to me.

Committed 1, 2, 3; will work on the rest later and incorporate your
findings.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [COMMITTERS] pgsql: Fix freezing of a dead HOT-updated tuple

2017-11-04 Thread Andres Freund
On 2017-11-03 12:36:59 -0700, Peter Geoghegan wrote:
> Andres Freund  wrote:
> > Here's that patch.  I've stared at this some, and Robert did too. Robert
> > mentioned that the commit message might need some polish and I'm not
> > 100% sure about the error message texts yet.
> 
> The commit message should probably say that the bug involves the
> resurrection of previously dead tuples, which is different to there
> being duplicates because a constraint is not enforced because HOT chains
> are broken (that's a separate, arguably less serious problem).

The reason for that is that I hadn't yet quite figured out how the bug I
described in the commit message (and the previously committed testcase)
would cause that. I was planning to diagnose / experiment more with this
and write an email if I couldn't come up with an explanation.   The
committed test does *not* actually trigger that.

The reason I couldn't quite figure out how the problem triggers is that
the xmax removing branch in FreezeMultiXactId() can only be reached if
the multi is from before the cutoff - which it can't have been for a
single vacuum execution to trigger the bug, because that'd require the
multi to be running to falsely return RECENTLY_DEAD rather than DEAD (by
definition a multi can't be below the cutoff if running).

For the problem to occur I think vacuum has to be executed *twice*: The
first time through HTSV mistakenly returns RECENTLY_DEAD preventing the
tuple from being pruned. That triggers FreezeMultiXactId() to create a
*new* multi with dead members. At this point the database already is in
a bad state. Then in a second vacuum HTSV returns DEAD, but
 * Ordinarily, DEAD tuples would have 
been removed by
 * heap_page_prune(), but it's possible 
that the tuple
 * state changed since 
heap_page_prune() looked.  In
 * particular an INSERT_IN_PROGRESS 
tuple could have
 * changed to DEAD if the inserter 
aborted.  So this
 * cannot be considered an error 
condition.
 *
..
if (HeapTupleIsHotUpdated() ||
HeapTupleIsHeapOnly())
{
nkeep += 1;

prevents the tuple from being removed. If now the multi xmax is below
the xmin horizon it triggers
/*
 * If the xid is older than the cutoff, it has to have 
aborted,
 * otherwise the tuple would have gotten pruned away.
 */
if (TransactionIdPrecedes(xid, cutoff_xid))
{
if (TransactionIdDidCommit(xid))
elog(ERROR, "can't freeze committed 
xmax");
*flags |= FRM_INVALIDATE_XMAX;
in FreezeMultiXact. Without the new elog, this then causes xmax to be
removed, reviving the tuple.


The current testcase, and I think the descriptions in the relevant
threads, all actually fail to point out the precise way the bug is
triggered.  As e.g. evidenced that the freeze-the-dead case appears to
not cause any failures in !assertion builds even if the bug is present.


The good news is that the error checks I added in the patch upthread
prevent all of this from happening, even though I'd not yet understood
the mechanics fully - it's imnsho pretty clear that we need to be more
paranoid in production builds around this.   A bunch of users that
triggered largely "invisible" corruption (the first vacuum described
above) will possibly run into one of these elog()s, but that seems far
preferrable to making the corruption a lot worse.


I think unfortunately the check + elog() in the
HeapTupleIsHeapOnly())
{
nkeep += 1;

/*
 * If this were to happen for a 
tuple that actually
 * needed to be frozen, we'd be 
in trouble, because
 * it'd leave a tuple below the 
relation's xmin
 * horizon alive.
 */
if 
(heap_tuple_needs_freeze(tuple.t_data, FreezeLimit,

MultiXactCutoff, buf))
{
 

Re: [HACKERS] Dynamic result sets from procedures

2017-11-04 Thread Daniel Verite
Peter Eisentraut wrote:

> > CREATE PROCEDURE test()
> > LANGUAGE plpgsql
> > AS $$
> >   RETURN QUERY  EXECUTE 'SELECT 1 AS col1, 2 AS col2';
> > END;
> > $$;
> > 
> > Or is that not possible or not desirable?
> 
> RETURN means the execution ends there, so how would you return multiple
> result sets?

RETURN alone yes, but RETURN QUERY continues the execution, appending
rows to the single result set of the function. In the case of a
procedure, I guess each RETURN QUERY could generate an independant
result set.

> But maybe you don't want to return all those results, so you'd need a
> way to designate which ones, e.g.,
> 
> AS $$
> SELECT set_config('something', 'value');
> SELECT * FROM interesting_table;  -- return only this one
> SELECT set_config('something', 'oldvalue');
> $$;

Yes, in that case, lacking PERFORM in SQL, nothing simple comes to
mind on how to return certain results and not others.
But if it was in an SQL function, it wouldn't return the rows of
"interesting_table" either. I think it would be justified to say to just
use plpgsql for that kind of sequence.


Best regards,
-- 
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Parallel Plans and Cost of non-filter functions

2017-11-04 Thread Tels
Moin,

On Fri, November 3, 2017 7:13 pm, Tom Lane wrote:
> Paul Ramsey  writes:
>>> Whether I get a parallel aggregate seems entirely determined by the
>>> number
>>> of rows, not the cost of preparing those rows.
>
>> This is true, as far as I can tell and unfortunate. Feeding tables with
>> 100ks of rows, I get parallel plans, feeding 10ks of rows, never do, no
>> matter how costly the work going on within. That's true of changing
>> costs
>> on the subquery select list, and on the aggregate transfn.
>
> This sounds like it might be the same issue being discussed in
>
> https://www.postgresql.org/message-id/flat/CAMkU=1ycXNipvhWuweUVpKuyu6SpNjF=yhwu4c4us5jgvgx...@mail.gmail.com

When looking at the web archive, the link is broken, even though in the
mail above it appears correct for me:

https://www.postgresql.org/message-id/28621.1509750807%40sss.pgh.pa.us

(shortened: http://bit.ly/2zetO5T)

Seems the email-obfuskation breaks such links?

Here is a short-link for people reading it via the archive on http:

http://bit.ly/2hF4lIt

Best regards,

Tels


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Add some const decorations to prototypes

2017-11-04 Thread Peter Eisentraut
On 11/3/17 13:54, Tom Lane wrote:
>> Would you prefer leaving the input argument as char *, or change the
>> endptr argument to const as well?
> 
> Just leave it as char*.  If you change the endptr argument you're going to
> force every call site to change their return variable, and some of them
> would end up having to cast away the const on their end.

OK, here is an updated patch with the controversial bits removed.

-- 
Peter Eisentraut  http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From b0fe8bb86a37938dad6bd6d6b7a51ded6afbf78a Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Tue, 31 Oct 2017 10:34:31 -0400
Subject: [PATCH v2] Add some const decorations to prototypes

---
 contrib/dict_xsyn/dict_xsyn.c  |  2 +-
 contrib/fuzzystrmatch/dmetaphone.c |  4 +--
 contrib/pgcrypto/pgcrypto.c|  4 +--
 contrib/seg/seg.c  |  4 +--
 contrib/seg/segdata.h  |  2 +-
 contrib/seg/segparse.y |  4 +--
 contrib/unaccent/unaccent.c|  2 +-
 contrib/uuid-ossp/uuid-ossp.c  |  2 +-
 src/backend/access/common/reloptions.c | 12 
 src/backend/access/gist/gistbuild.c|  2 +-
 src/backend/access/transam/xact.c  |  6 ++--
 src/backend/access/transam/xlogarchive.c   |  4 +--
 src/backend/catalog/heap.c | 10 +++
 src/backend/commands/comment.c |  4 +--
 src/backend/commands/event_trigger.c   |  4 +--
 src/backend/commands/extension.c   |  4 +--
 src/backend/commands/indexcmds.c   |  8 +++---
 src/backend/commands/opclasscmds.c |  2 +-
 src/backend/commands/tablecmds.c   | 16 +--
 src/backend/commands/typecmds.c|  6 ++--
 src/backend/commands/view.c|  2 +-
 src/backend/libpq/auth.c   | 24 
 src/backend/libpq/hba.c|  6 ++--
 src/backend/parser/parse_expr.c|  2 +-
 src/backend/parser/parse_func.c|  4 +--
 src/backend/parser/parse_relation.c|  8 +++---
 src/backend/parser/parse_target.c  |  2 +-
 src/backend/port/dynloader/darwin.c|  8 +++---
 src/backend/port/dynloader/darwin.h|  4 +--
 src/backend/port/dynloader/hpux.c  |  4 +--
 src/backend/port/dynloader/hpux.h  |  4 +--
 src/backend/port/dynloader/linux.c |  4 +--
 src/backend/postmaster/postmaster.c|  2 +-
 src/backend/replication/basebackup.c   |  8 +++---
 src/backend/rewrite/rewriteDefine.c|  4 +--
 src/backend/snowball/dict_snowball.c   |  2 +-
 src/backend/storage/lmgr/lwlock.c  |  8 +++---
 src/backend/tsearch/dict_thesaurus.c   |  2 +-
 src/backend/tsearch/spell.c|  4 +--
 src/backend/utils/adt/genfile.c|  2 +-
 src/backend/utils/adt/ruleutils.c  |  4 +--
 src/backend/utils/adt/varlena.c|  2 +-
 src/backend/utils/adt/xml.c| 32 +++---
 src/bin/initdb/initdb.c| 12 
 src/bin/pg_dump/pg_backup_db.c |  2 +-
 src/bin/pg_dump/pg_backup_db.h |  2 +-
 src/bin/pg_rewind/fetch.c  |  2 +-
 src/bin/pg_rewind/fetch.h  |  2 +-
 src/bin/pg_upgrade/option.c|  6 ++--
 src/bin/pg_upgrade/pg_upgrade.c|  4 +--
 src/bin/pg_waldump/pg_waldump.c|  2 +-
 src/bin/pgbench/pgbench.c  |  4 +--
 src/include/access/gist_private.h  |  2 +-
 src/include/access/reloptions.h| 14 +-
 src/include/access/xact.h  |  6 ++--
 src/include/access/xlog_internal.h |  4 +--
 src/include/catalog/heap.h |  2 +-
 src/include/commands/comment.h |  4 +--
 src/include/commands/defrem.h  |  4 +--
 src/include/commands/typecmds.h|  2 +-
 src/include/commands/view.h|  2 +-
 src/include/executor/tablefunc.h   |  8 +++---
 src/include/parser/parse_relation.h|  6 ++--
 src/include/parser/parse_target.h  |  2 +-
 src/include/postmaster/bgworker.h  |  2 +-
 src/include/rewrite/rewriteDefine.h|  2 +-
 src/include/storage/lwlock.h   |  2 +-
 src/include/utils/dynamic_loader.h |  4 +--
 

Re: [HACKERS] pow support for pgbench

2017-11-04 Thread Fabien COELHO


Hello Raúl,


Sorry about the patch. Attaching it now so it can be considered as
submitted.


There is a typo in the XML doc:

1024.0/

Please check that the documentation compiles.

I'm at odds with having the integer version rely on a double pow(), even 
if it works. I think that there should be a specific integer version which 
does use integer operations. From stack overflow, the following is 
suggested:


 int ipow(int base, int exp)
 {
int result = 1;
while (exp)
{
if (exp & 1)
result *= base;
exp >>= 1;
base *= base;
}

return result;
 }

The integer version should be when x & y are integers *AND* y >= 0.

if y is a negative integer, the double version should be used.

--
Fabien.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [POC] Faster processing at Gather node

2017-11-04 Thread Robert Haas
On Wed, Oct 18, 2017 at 3:09 AM, Andres Freund  wrote:
> 2) The spinlocks both on the the sending and receiving side a quite hot:
>
>rafia query leader:
> +   36.16%  postgres  postgres[.] shm_mq_receive
> +   19.49%  postgres  postgres[.] s_lock
> +   13.24%  postgres  postgres[.] SetLatch

Here's a patch which, as per an off-list discussion between Andres,
Amit, and myself, removes the use of the spinlock for most
send/receive operations in favor of memory barriers and the atomics
support for 8-byte reads and writes.  I tested with a pgbench -i -s
300 database with pgbench_accounts_pkey dropped and
max_parallel_workers_per_gather boosted to 10.  I used this query:

select aid, count(*) from pgbench_accounts group by 1 having count(*) > 1;

which produces this plan:

 Finalize GroupAggregate  (cost=1235865.51..5569468.75 rows=1000 width=12)
   Group Key: aid
   Filter: (count(*) > 1)
   ->  Gather Merge  (cost=1235865.51..4969468.75 rows=3000 width=12)
 Workers Planned: 6
 ->  Partial GroupAggregate  (cost=1234865.42..1322365.42
rows=500 width=12)
   Group Key: aid
   ->  Sort  (cost=1234865.42..1247365.42 rows=500 width=4)
 Sort Key: aid
 ->  Parallel Seq Scan on pgbench_accounts
(cost=0.00..541804.00 rows=500 width=4)
(10 rows)

On hydra (PPC), these changes didn't help much.  Timings:

master: 29605.582, 29753.417, 30160.485
patch: 28218.396, 27986.951, 26465.584

That's about a 5-6% improvement.  On my MacBook, though, the
improvement was quite a bit more:

master: 21436.745, 20978.355, 19918.617
patch: 15896.573, 15880.652, 15967.176

Median-to-median, that's about a 24% improvement.

Any reviews appreciated.

Thanks,

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


shm-mq-less-spinlocks-v1.2.patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pgbench - allow to store select results into variables

2017-11-04 Thread Fabien COELHO


Hello Pavel,


Here is a v13, which is just a rebase after the documentation xml-ization.


Here is a v14, after yet another rebase, and some comments added to answer 
your new comments.



I am looking to this patch.

Not sure if "cset" is best name - maybe "eset" .. like embeded set?


I used c for "compound", because they compound SQL commands.

Now I do not have a very strong opinion, only that it should be some 
letter which some logic followed by "set".


The variables and fields in the source currently use "compound" 
everywhere, if this is changed they should be updated accordingly.


ISTM that the ";" is embedded, but the commands are compound, so 
"compound" seems better word to me. However it is debatable.


If there some standard naming for the concept, it should be used.


The code of append_sql_command is not too readable and is not enough
commented.


Ok. I have added comments in the code.


I don't understand why you pass a param compounds to append_sql_command,
when this value is stored in my_command->compound from create_sql_command?


This is the number of compound commands in the "more" string. It must be 
updated as well as the command text, so that the my_command text and

number of compounds is consistant.

Think of one initialization followed by two appends:

  SELECT 1 AS x \cset
  SELECT 2 \; SELECT 3 AS y \cset
  SELECT 4 \; SELECT 5 \; SELECT 6 AS z \gset

In the end, we must have the full 6 queries

  "SELECT 1 AS x \; SELECT 2 \; SELECT 3 AS y \; SELECT 4 \; SELECT 5 \; SELECT 6 AS 
z"

and know that we want to set variables from queries 1, 3 and 6 and ignore 
the 3 others.



Or maybe some unhappy field or variable names was chosen.


It seems ok to me. What would your suggest?

--
Fabien.diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index e509e6c..44e8896 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -818,6 +818,51 @@ pgbench  options  d
   
 
   
+   
+
+ \cset [prefix] or
+ \gset [prefix]
+
+
+
+ 
+  These commands may be used to end SQL queries, replacing a semicolon.
+  \cset replaces an embedded semicolon (\;) within
+  a compound SQL command, and \gset replaces a final
+  (;) semicolon which ends the SQL command. 
+ 
+
+ 
+  When these commands are used, the preceding SQL query is expected to
+  return one row, the columns of which are stored into variables named after
+  column names, and prefixed with prefix if provided.
+ 
+
+ 
+  The following example puts the final account balance from the first query
+  into variable abalance, and fills variables
+  one, two and
+  p_three with integers from a compound query.
+
+UPDATE pgbench_accounts
+  SET abalance = abalance + :delta
+  WHERE aid = :aid
+  RETURNING abalance \gset
+-- compound of two queries
+SELECT 1 AS one, 2 AS two \cset
+SELECT 3 AS three \gset p_
+
+ 
+
+ 
+  
+\cset and \gset commands do not work when
+empty SQL queries appear within a compound SQL command.
+  
+ 
+
+   
+

 
  \set varname expression
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d4a6035..32262df 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -384,12 +384,15 @@ static const char *QUERYMODE[] = {"simple", "extended", "prepared"};
 
 typedef struct
 {
-	char	   *line;			/* text of command line */
+	char	   *line;			/* first line for short display */
+	char	   *lines;			/* full multi-line text of command */
 	int			command_num;	/* unique index of this Command struct */
 	int			type;			/* command type (SQL_COMMAND or META_COMMAND) */
 	MetaCommand meta;			/* meta command identifier, or META_NONE */
 	int			argc;			/* number of command words */
 	char	   *argv[MAX_ARGS]; /* command word list */
+	int		compound;   /* last compound command (number of \;) */
+	char	  **gset;   /* per-compound command prefix */
 	PgBenchExpr *expr;			/* parsed expression, if needed */
 	SimpleStats stats;			/* time spent in this command */
 } Command;
@@ -1264,6 +1267,104 @@ getQueryParams(CState *st, const Command *command, const char **params)
 		params[i] = getVariable(st, command->argv[i + 1]);
 }
 
+/* read all responses from backend */
+static bool
+read_response(CState *st, char **gset)
+{
+	PGresult   *res;
+	int			compound = 0;
+
+	while ((res = PQgetResult(st->con)) != NULL)
+	{
+		switch (PQresultStatus(res))
+		{
+			case PGRES_COMMAND_OK: /* non-SELECT commands */
+			case PGRES_EMPTY_QUERY: /* may be used for testing no-op overhead */
+if (gset[compound] != NULL)
+{
+	fprintf(stderr,
+			"client %d file %d command %d compound %d: "
+			"\\gset expects a row\n",
+			st->id, st->use_file, st->command, compound);
+	st->ecnt++;
+	return false;
+}
+break; /* OK */
+
+			case PGRES_TUPLES_OK:
+if (gset[compound] != NULL)
+{
+	/* store 

[HACKERS] Re: [COMMITTERS] pgsql: Account for catalog snapshot in PGXACT->xmin updates.

2017-11-04 Thread Noah Misch
On Mon, Aug 21, 2017 at 07:41:52AM +0100, Simon Riggs wrote:
> On 19 August 2017 at 20:54, Noah Misch  wrote:
> > On Tue, Dec 06, 2016 at 01:59:18PM -0500, Robert Haas wrote:
> >> On Tue, Dec 6, 2016 at 1:36 PM, Tom Lane  wrote:
> >> > Robert Haas  writes:
> >> >> On Tue, Nov 15, 2016 at 3:55 PM, Tom Lane  wrote:
> >> >>> Account for catalog snapshot in PGXACT->xmin updates.
> >> >
> >> >> It seems to me that this makes it possible for
> >> >> ThereAreNoPriorRegisteredSnapshots() to fail spuriously.  The only
> >> >> core code that calls that function is in copy.c, and apparently we
> >> >> never reach that point with the catalog snapshot set.  But that seems
> >> >> fragile.
> >
> > I recently noticed this by way of the copy2 regression test failing, in 9.4
> > and later, under REPEATABLE READ and SERIALIZABLE.  That failure started 
> > with
> > $SUBJECT.

> > Fair summary.  ThereAreNoPriorRegisteredSnapshots() has functioned that way
> > since an early submission[1], and I don't see that we ever discussed it
> > explicitly.  Adding Simon for his recollection.
> 
> The intention, IIRC, was to allow the current snapshot (i.e. 1) but no
> earlier (i.e. prior) snapshots (so not >=2)
> 
> The name was chosen so it was (hopefully) clear what it did --
> ThereAreNoPriorRegisteredSnapshots

I now understand the function name, so I added a comment but didn't rename it.
I plan to use the attached patch after the minor release tags land.  If
there's significant support, I could instead push before the wrap.
commit d68eed7 (HEAD, ThereAreNoPriorRegisteredSnapshots-CatalogSnapshot)
Author: Noah Misch 
AuthorDate: Sat Nov 4 00:08:04 2017 -0700
Commit: Noah Misch 
CommitDate: Sat Nov 4 00:33:37 2017 -0700

Ignore CatalogSnapshot when checking COPY FREEZE prerequisites.

This restores the ability, essentially lost in commit
ffaa44cb559db332baeee7d25dedd74a61974203, to use COPY FREEZE under
REPEATABLE READ isolation.  Back-patch to 9.4, like that commit.

Discussion: 
https://postgr.es/m/ca+tgmoahwdm-7fperbxzu9uz99lpmumepsxltw9tmrogzwn...@mail.gmail.com

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 8006df3..1bdd492 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2394,13 +2394,25 @@ CopyFrom(CopyState cstate)
/*
 * Optimize if new relfilenode was created in this subxact or one of its
 * committed children and we won't see those rows later as part of an
-* earlier scan or command. This ensures that if this subtransaction
-* aborts then the frozen rows won't be visible after xact cleanup. Note
+* earlier scan or command. The subxact test ensures that if this 
subxact
+* aborts then the frozen rows won't be visible after xact cleanup.  
Note
 * that the stronger test of exactly which subtransaction created it is
-* crucial for correctness of this optimization.
+* crucial for correctness of this optimization. The test for an earlier
+* scan or command tolerates false negatives. FREEZE causes other 
sessions
+* to see rows they would not see under MVCC, and a false negative 
merely
+* spreads that anomaly to the current session.
 */
if (cstate->freeze)
{
+   /*
+* Tolerate one registration for the benefit of 
FirstXactSnapshot.
+* Scan-bearing queries generally create at least two 
registrations,
+* though relying on that is fragile, as is ignoring 
ActiveSnapshot.
+* Clear CatalogSnapshot to avoid counting its registration.  
We'll
+* still detect ongoing catalog scans, each of which separately
+* registers the snapshot it uses.
+*/
+   InvalidateCatalogSnapshot();
if (!ThereAreNoPriorRegisteredSnapshots() || 
!ThereAreNoReadyPortals())
ereport(ERROR,

(errcode(ERRCODE_INVALID_TRANSACTION_STATE),
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 294ab70..addf87d 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1645,6 +1645,14 @@ DeleteAllExportedSnapshotFiles(void)
FreeDir(s_dir);
 }
 
+/*
+ * ThereAreNoPriorRegisteredSnapshots
+ * Is the registered snapshot count less than or equal to one?
+ *
+ * Don't use this to settle important decisions.  While zero registrations and
+ * no ActiveSnapshot would confirm a certain idleness, the system makes no
+ * guarantees about the significance of one registered snapshot.
+ */
 bool
 ThereAreNoPriorRegisteredSnapshots(void)
 {

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription: