Re: [HACKERS] Get more from indices.
Hello, this is the revised patch. Hmm, that sounds quite resonable in general. But the conflation is already found in grouping_planner to some extent. Although this new patch is not split into unique-index sutff and others, it removes current_pathkeys from grouping_planner, and adds pathkeys and is_unique into struct Plan instead. This eliminates independent and longstanding current_pathkeys variable and calculus involving it from grouping_planner() so it would be made clearer and easier to read, I expect. - is_unique and pathkeys is added to the type Path. (umm...) - create function like pathkeys_satisfies(check_pathkeys, pathkeys, isuniq) or path_ordered_by(pathkeys, path) as needed. This was different from my thought. Finally I added plan_is_ordered(plan, path) and some of make_nodename() functions take pathkeys and/or uniquely_ordered as parameter and some of others take them from given leftree plan. Plan nodes propagate the attributes in autonomous way so explicit current_pathkeys handling could be thrown away. - Plan will be set ordered and pathkeys derived from source path and its process and grouping_planner consults it to deceide whether to do sort (to hide the currently naked code). - Add check for NULLuble columns :-) Now IndexOptInfo has new attribute full_ordered and it is set true if the index does not cover any nullAble columns in get_relation_info(). And expect file of isolation test is modified to fit the change in result plan. Finally, this version became to conflict with the another UNION patch so I detached from it and rebased to current master. regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 606734a..43be0a5 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys = build_index_pathkeys(root, index, ForwardScanDirection); useful_pathkeys = truncate_useless_pathkeys(root, rel, - index_pathkeys); + index_pathkeys, + index-full_ordered); orderbyclauses = NIL; orderbyclausecols = NIL; } @@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys = build_index_pathkeys(root, index, BackwardScanDirection); useful_pathkeys = truncate_useless_pathkeys(root, rel, - index_pathkeys); + index_pathkeys, + index-full_ordered); if (useful_pathkeys != NIL) { ipath = create_index_path(root, index, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 6724996..954d8a8 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -363,6 +363,68 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, } /* + * path_is_uniquely_ordered + * Return true if the path is apparently uniquely ordered on the pathkeys. + */ +bool +path_is_uniquely_ordered(Path *path, List *pathkeys) +{ + if (pathkeys path-pathkeys + IsA(path, IndexPath) + ((IndexPath*)path)-indexinfo-full_ordered + (pathkeys_contained_in(path-pathkeys, pathkeys))) + { + return true; + } + + return false; +} + + +/* + * path_is_ordered + * Return true if the path is apparently ordered on the pathkeys. The given + * path might be modified so that it will be recognized later as sufficiently + * ordered. + */ +bool +path_is_ordered(Path *path, List *pathkeys) +{ + if (pathkeys_contained_in(pathkeys, path-pathkeys)) + return true; + + /* path is obviously ordered when uniquely ordered */ + if (path_is_uniquely_ordered(path, pathkeys)) + return true; + + /* + * MergeAppendPath's pathkeys can be replaced by arbitrary pathkeys when + * all subpaths are uniquely ordered on the target pathkeys. + */ + if (IsA(path, MergeAppendPath)) + { + ListCell *lc; + MergeAppendPath *mpath = (MergeAppendPath *)path; + + foreach (lc, mpath-subpaths) + { + Path *subpath = (Path *) lfirst(lc); + if (!path_is_uniquely_ordered(subpath, pathkeys)) break; + } + if (!lc) + { + /* + * This MergeAppendPath is sufficiently ordered on the target + * pathkeys. Reflect that on this path. + */ + mpath-path.pathkeys = pathkeys; + return true; + } + } + return false; +} + +/* * get_cheapest_fractional_path_for_pathkeys * Find the cheapest path (for retrieving a specified fraction of all * the tuples) that satisfies the given pathkeys and parameterization. @@ -397,7 +459,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, compare_fractional_path_costs(matched_path, path, fraction) = 0) continue; - if (pathkeys_contained_in(pathkeys, path-pathkeys) + if (path_is_ordered(path, pathkeys) bms_is_subset(PATH_REQ_OUTER(path), required_outer)) matched_path = path; } @@ -733,7 +795,7 @@
Re: [HACKERS] pg_dump and pg_dumpall in real life (proposal)
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 11/11/2013 11:20 PM, Josh Berkus wrote: On 11/11/2013 06:24 AM, Stephen Frost wrote: * Rafael Martinez (r.m.guerr...@usit.uio.no) wrote: * We need a pg_dump solution that can generate in one step all the necessary pieces of information needed when restoring or cloning a database. (schema, data, privileges, users and alter database/role data) [.] As for 'what we need', I'd think someone with the time and energy to write the patch and work with the community to implement it.. +1 Well, I am willing to take a chance on the first suggestion if nobody else has the time or energy. I have never sent a patch or have worked with the postgres code, but I think it can be done without a lot of work with some reuse of the code used in pg_dumpall. This is a proposal based on the feedback we have received: * pg_dump will also deliver information about ALTER DATABASE ... SET data for a given database when the option '--create' is used. * pg_dump will deliver information about ROLES used and ALTER ROLE ... SET data for a given database when a new option i.e. --roles-global is used. * pg_restore will restore ROLE information when used with a new option i.e. --roles-global and ALTER DATABASE ... SET information when used with the '--create' option. * We need to do something with how pg_restore will handle ROLES information because some security concerns when restoring roles that already exists on the target server. Some of the suggestions are: a) Implement and use CREATE ROLE IF NOT EXISTS and just throw a warning or handle the role already exists error message gracefully. b) Use a new option i.e. --reuse-roles-in-conflict to behave like suggestion a). If this option is not used, pg_restore will stop with a fatal error when a role already exist. c) Use a new option i.e. --on-role-error-stop to stop with a fatal error when a role already exist. If this option is not used pg_restore will behave like suggestion a). d) Use a new option i.e. --rename-roles-in-conflict to rename the roles that already exists. If this option is not used, pg_restore will stop with a fatal error when a role already exist. I think I prefer b) to continue with the postgres spirit of security by default. d) is too complicated for me due to lack of knowledge of the postgres code. Comments? regards, - -- Rafael Martinez Guerrero Center for Information Technology University of Oslo, Norway PGP Public Key: http://folk.uio.no/rafael/ -BEGIN PGP SIGNATURE- Version: GnuPG v2.0.14 (GNU/Linux) iEYEARECAAYFAlKB8w0ACgkQBhuKQurGihSq7QCfZdzreRGNRx0vUzXPjYqzNOIP LqgAoJnfeCYjsfEUmsYvvp3DSL959IRL =8Ynv -END PGP SIGNATURE- -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] TABLE not synonymous with SELECT * FROM?
David Johnston wrote The paragraph is unnecessary if the Synopsis section of the SELECT documentation is updated to correctly reflect all the valid clauses that can be attached to TABLE. The current reading implies that you cannot attach anything so when you said LIMIT worked I was surprised. Also, testing seems to confirm that the allowance of LIMIT implies that OFFSET is allowed as well. If TABLE is allowed as a top-level command why doesn't it get its own page in the SQL commands section? It really doesn't matter - and honestly while I've known about it I've never actually thought to use it in actual queries because as soon as you want to do something special you have to switch it out for SELECT * FROM anyway - but it does seem inconsistent. I'd be in favour of the first. Since it's sort-of synonymous for SELECT * FROM it would make sense to include it on the same page to avoid having to explain the same clauses again. But then it should be listed earlier, before the clauses which can be used with it. Yes, all LIMIT, OFFSET, FETCH stuff works. Also very useful in a WITH: WITH x AS ( TABLE foo ORDER BY colX DESC LIMIT 10 ) ...; Note that set operations work too, and that's how I most often use it for testing rewritten queries: create table x as original_query; create table y as rewritten_query; table x except table y; table y except table x; Cheers, Colin -- View this message in context: http://postgresql.1045698.n5.nabble.com/TABLE-not-synonymous-with-SELECT-FROM-tp5777695p5777883.html Sent from the PostgreSQL - hackers mailing list archive at Nabble.com. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Extension Templates S03E11
Stephen Frost sfr...@snowman.net writes: Are there any other changes you have pending for this..? Would be nice to see the latest version which you've tested and which patches cleanly against master... ;) I just rebased now, please see attached. I had to pick new OIDs in some places too, but that's about it. I'll still go ahead and start looking through this, per our discussion. Thanks! -- Dimitri Fontaine http://2ndQuadrant.fr PostgreSQL : Expertise, Formation et Support templates.v16.patch.gz Description: Binary data -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] pg_upgrade: delete_old_cluster.sh issues
Hello, IMHO, there is a serious issue in the script to clean the old data directory when running pg_upgrade in link mode. in short: When working with symbolic links, the first step in delete_old_cluster.sh is to delete the old $PGDATA folder that may contain tablespaces used by the new instance. in long, our use case: our postgres data directories are organized as follow: 1) they are all registered in a root location, i.e. /opt/data, but can be located somewhere else using symbolic links: ll /opt/app/ ... postgresql-data-1 - /pgdata/postgresql-data-1 2) we have fixed names for root locations of tablespaces within $PGDATA. these can be real folders or again symbolic links to some other places: ll /pgdata/postgresql-data-1 ... tblspc_data tblspc_idx - /datarep/pg1/tblspc_idx (additionally, each schema has its own tablespaces in these locations, but this is not relevant here) 3 ) we do have some custom content within $PGDATA. e.g. an extra log folder used by our deployment script After running pg_upgrade, checking the tablespace location within the NEW instance: ll pg_tblspc 16428 - /opt/app/postgresql-data-1/tblspc_data/foo 16429 - /opt/app/postgresql-data-1/tblspc_idx/foo which, resolving the symbolic links is equivalent to: /pgdata/postgresql-data-1/tblspc_data/foo (x) /datarep/pg1/tblspc_idx/foo (y) I called pg_upgrade using the true paths (no symbolic links): ./pg_upgrade \ --link\ --check\ --old-datadir /pgdata/postgresql-data-1\ --new-datadir /pgdata/postgresql_93-data-1 now, checking what the cleanup script would like to do: cat delete_old_cluster.sh #!/bin/sh (a) rm -rf /pgdata/postgresql-data-1 (b) rm -rf /opt/app/postgresql-data-1/tblspc_data/foo/PG_9.1_201105231 (c) rm -rf /opt/app/postgresql-data-1/tblspc_err_data/foo/PG_9.1_201105231 a: will delete the folder (x) which contains data for the NEW Postgres instance ! b: already gone through (a) c: still exists in /datarep/pg1/tblspc_idx/foo but can't be found as the symbolic link in /pgdata/postgresql-data-1 is already deleted through (a) moreover, our custom content in $OLD_PGATA would be gone too It seems that these issues could all be avoided while first removing the expected content of $OLD_PGATA and then only unlink $OLD_PGATA itself when empty (or add a note in the output of pg_restore): replace rm -rf /pgdata/postgresql-data-1 with cd /pgdata/postgresql-data-1 rm -rf base rm -rf global rm -rf pg_clog rm -rf pg_hba.conf (*) rm -rf pg_ident.conf (*) rm -rf pg_log rm -rf pg_multixact rm -rf pg_notify rm -rf pg_serial rm -rf pg_stat_tmp rm -rf pg_subtrans rm -rf pg_tblspc rm -rf pg_twophase rm -rf PG_VERSION (*) rm -rf pg_xlog rm -rf postgresql.conf (*) rm -rf postmaster.log rm -rf postmaster.opts (*) (*): could be nice to keep as a reference. best regards, Marc Mamin
[HACKERS] Information about Access methods
Hi, I am looking in Postgresql code for the first time. Specifically in Indexing scheme. I want to focus on B tree for my testing. Inside access methods in Backend, I found Index folder and Btree folder but i don't know which code to look for. i want to change something in B tree implementation. Please guide. Regards, Rohit Goyal
[HACKERS] Links in README.git are broken
Hi, While trying to find instructions to build documentation, I noticed that the links in README.git are broken, both redirect to http://www.postgresql.org/docs/devel/static/ In addition, why isn't INSTALL stored in git? Cheers, Colin -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Links in README.git are broken
On Tue, Nov 12, 2013 at 12:19 PM, Colin 't Hart co...@sharpheart.orgwrote: Hi, While trying to find instructions to build documentation, I noticed that the links in README.git are broken, both redirect to http://www.postgresql.org/docs/devel/static/ Fixed. Forgotten when we moved the developer docs a while ago. In addition, why isn't INSTALL stored in git? Because it's built from SGML, to have the same contents as the html/pdf docs. We don't typically store derived files in git - only some exceptions like configure. -- Magnus Hagander Me: http://www.hagander.net/ Work: http://www.redpill-linpro.com/
Re: [HACKERS] Information about Access methods
On 11/12/2013 07:00 PM, Rohit Goyal wrote: Inside access methods in Backend, I found Index folder and Btree folder but i don't know which code to look for. i want to change something in B tree implementation. Start here: http://www.postgresql.org/docs/current/static/indexam.html -- Craig Ringer http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Tom Lane t...@sss.pgh.pa.us wrote: quite a lot of people have looked at Postgres with Coverity already. If Clang is throwing up lots and lots of warnings, the odds are *very* high that most of them are false positives. Running through such a list to see if there's anything real isn't all that exciting a prospect. Here is the summary of what was reported: All Bugs: 313 API Argument with 'nonnull' attribute passed null: 13 Dead store Dead assignment: 65 Dead increment: 11 Logic error Assigned value is garbage or undefined: 19 Branch condition evaluates to a garbage value: 2 Dereference of null pointer: 98 Division by zero: 15 Out-of-bound array access: 1 Result of operation is garbage or undefined: 9 Stack address stored into global variable: 1 Uninitialized argument value: 74 Memory Error Double free: 1 Memory leak: 1 Unix API Allocator sizeof operand mismatch: 3 Does anything stand out as something that is particularly worth looking into? Does anything here seem worth assuming is completely bogus because of the Coverity and Valgrind passes? -- Kevin Grittner EDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Kevin Grittner kgri...@ymail.com wrote: Logic error Stack address stored into global variable: 1 I took a look at this one, and it is a totally legitimate use, the reason for which is explained with this comment: /* * check_stack_depth: check for excessively deep recursion * * This should be called someplace in any recursive routine that might possibly * recurse deep enough to overflow the stack. Most Unixen treat stack * overflow as an unrecoverable SIGSEGV, so we want to error out ourselves * before hitting the hardware limit. */ Which raises the question: do these clang tools have any way to record which errors have been established to be false positives, so that they don't show up in subsequent runs. I know Valgrind has that. Without such a capability, these tools don't seem very valuable. -- Kevin Grittner EDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Fast insertion indexes: why no developments
On 5 November 2013 14:28, Leonardo Francalanci m_li...@yahoo.it wrote: Either my sql is not correct (likely), or my understanding of the minmax index is not correct (even more likely), or the minmax index is not usable in a random inputs scenario. Please show the real world SQL you intend to run, so we can comment on it. Inventing a use case that breaks effectiveness of any optimisation is always easy, but the question is whether the use case is likely or even desirable. If we have a query to show the most recent calls by a particular caller SELECT * FROM cdr WHERE callerid = X ORDER BY call_timestamp DESC LIMIT 100 then this could potentially be optimised using a minmax index, by traversing the data ranges in call_timestamp order. That is not part of the code in this initial release, since the main use case is for WHERE call_timestamp = X, or WHERE primarykey = Y I don't believe there is a credible business case for running that same query but without the ORDER BY and LIMIT, since it could potentially return gazillions of rows, so it isn't surprising at all that it would access a large % of the table. Saying but I really do want to run it isn't an argument in favour of it being a sensible query to run - we are only interested in optimising sensible real world queries. -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_dump and pg_dumpall in real life (proposal)
* Rafael Martinez (r.m.guerr...@usit.uio.no) wrote: Comments? Create a wiki page for it. :) Thanks, Stephen signature.asc Description: Digital signature
Re: [HACKERS] Clang 3.3 Analyzer Results
* Peter Geoghegan (p...@heroku.com) wrote: I seem to also recall Coverity correctly handling that, although perhaps I'm unfairly crediting them with taking advantage of the abort() trick because of the state of Postgres when I tried each of those two tools - it might be that scan-build *would* have taken advantage of that at the time, if only the trick was there. With Coverity, we completely 'punt' on it because we make elog(FATAL) into a 'program-exit' case when it clearly isn't. I've discussed this w/ the Coverity folks but, aiui, there's no solution to this any time soon.. Thanks, Stephen signature.asc Description: Digital signature
Re: [HACKERS] Row-security writer-side checks proposal
On Sat, Nov 9, 2013 at 10:01 AM, Craig Ringer cr...@2ndquadrant.com wrote: On 11/08/2013 11:03 PM, Robert Haas wrote: Separate READ DELETE etc would only be interesting if we wanted to let someone DELETE rows they cannot SELECT. Since we have DELETE ... RETURNING, and since users can write a predicate function for DELETE that leaks the information even if we didn't, in practice if you give the user any READ right you've given them all of them. So I don't think we can support that (except maybe by column RLS down the track). Well, we could require SELECT privilege when a a RETURNING clause is present... Absolutely could. Wouldn't stop them grabbing the data via a predicate function on the update/delete, though, and we can't sanely (IMO) require SELECT rights if they want to use non-LEAKPROOF functions/operators either. Hmm, good point. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Postgresql c function returning one row with 2 fileds
On Sat, Nov 9, 2013 at 5:52 AM, lucamarletta i...@beopen.it wrote: I'm new in postgresql c function and I start following examples. I want to write a simple function that have inside an SQL and passing parameter evaluete anbd return 2 fields as sum (for now to be simpler). The function below has problem passing the check (get_call_result_type(fcinfo, resultTypeId, resultTupleDesc) != TYPEFUNC_COMPOSITE) This is checking whether the CREATE FUNCTION statement that you use to invoke this code is set up to return a composite type; it's essentially cross-checking your C with your SQL. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Kevin Grittner kgri...@ymail.com writes: Does anything stand out as something that is particularly worth looking into? Does anything here seem worth assuming is completely bogus because of the Coverity and Valgrind passes? I thought most of it was obvious junk: if there were actually uninitialized-variable bugs in the bison grammar, for instance, not only we but half the programs on the planet would be coredumping all the time. Not to mention that valgrind testing would certainly have caught it. I'd suggest looking only at the reports that pertain to seldom-exercised code paths, as those would be the places where actual bugs might possibly have escaped notice. One thought for the Clang people is that most of the reports such as null pointer dereference presumably mean I think I see an execution path whereby we could get here with a null pointer. If so, it'd be awfully helpful if the complaint included some description of what that path is. I think Coverity does that, or at least I've seen output from some tool that does it. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Add CREATE support to event triggers
On Fri, Nov 8, 2013 at 10:33 AM, Alvaro Herrera alvhe...@2ndquadrant.com wrote: Attached you can find a very-much-WIP patch to add CREATE info support for event triggers (normalized commands). This patch builds mainly on two things: 1. Dimitri's DDL rewrite patch he submitted way back, in http://www.postgresql.org/message-id/m2zk1j9c44@2ndquadrant.fr I borrowed the whole ddl_rewrite.c code, and tweaked it a bit. There are several things still wrong with it and which will need to be fixed before a final patch can even be contemplated; but there are some questions that require a consensus answer before I go and fix it all, because what it will look like will depend on said answers. I'm still unhappy with this whole concept. It adds a significant maintenance burden that must be carried by everyone who adds new DDL syntax in the future, and it's easy to imagine this area of the code ending up very poorly tested and rife with bugs. After all, event triggers, as nifty as they are, are only going to be used by a small minority of users. And new DDL features are typically going to be things that are fairly obscure, so again they will only be used by a small minority of users. I think we need to avoid the situation where we have bugs that can only get found when those minorities happen to intersect. If we're going to have DDL rewrite code, then we need a way of making sure that it gets tested in a comprehensive way on a regular basis. Here's one idea: create a contrib module that (somehow, via APIs to be invented) runs every DDL command that gets executed through the deparsing code, and then parses the result and executes *that* instead of the original command. Then, add a build target that runs the regression test suite in that mode, and get the buildfarm configured to run that build target regularly on at least some machines. That way, adding syntax to the regular regression test suite also serves to test that the deparsing logic for that syntax is working. If we do this, there's still some maintenance burden associated with having DDL deparsing code, but at least our chances of noticing when we've failed to maintain it should be pretty good. The other thing that bothers me here is that, while a normalized command string sounds great in theory, as soon as you want to allow (for example) mapping schema A on node 1 to schema B on node 2, the wheels come off: you'll have to deparse that normalized command string so you can change out the schema name and then reassemble it back into a command string again. So we're going to parse the user input, then deparse it, hand over the results to the application code, which will then parse it, modify that, and deparse it again. At every step of that process, any mistake will lead to subtle bugs in the resulting system. Larry Wall once wrote (approximately) that a good programming language makes simple things simple and hard things possible; I think this design fails the second prong of that test. Now, I guess I can live with that if it's what everyone else wants, but I don't have a great feeling about the long-term utility of it. Exposing the data from the DDL statement in some structured way - like what we've done with drops, or a JSON blob, or something like that, feels much more robust and useful than a normalized command string to me. If the normalized command string can easily be built up from that data, then you don't really need to expose the command string separately. If it can't, then you're not doing a good job exposing the data in a usable form. Saying well, people can always parse the normalized command string is a cop-out. Parsing SQL is *hard*; the only external project I know of that parses our SQL syntax well is pgpool, and that's because they copy our parser wholesale, surely not the sort of solution we want to foist off on event trigger authors. Finally, I'm very skeptical of the word normalized. To me, that sounds like an alias for modifying the command string in unspecified ways that big brother thinks will be useful to event trigger authors. Color me skeptical. What if somebody doesn't want their command string normalized? What if they want it normalized in a way that's different from the way that we've chosen to normalize it? I fear that this whole direction amounts to we don't know how to design a real API so let's just do surgery on the command string and call whatever pops out the API. Maybe that's harsh, but if it is I don't know why. The normalization steps we build into this process constitute assumptions about how the feature will be used, and they back the user into using that feature in just that way and no other. 2. The ideas we used to build DROP support. Mainly, the interesting thing here is the fact that we use a SRF to report, at ddl_command_end, all the objects that were created during execution of that command. We do this by collecting them in a list in some raw form
Re: [HACKERS] Clang 3.3 Analyzer Results
* Tom Lane (t...@sss.pgh.pa.us) wrote: I think Coverity does that, or at least I've seen output from some tool that does it. Coverity does provide the path (including going through multiple interations of a loop, if applicable). Doesn't make it perfect, sadly, but I've been trying to feed back false positives to their dev group to address. Frustratingly, it doesn't handle global variables terribly well and I've found a couple of false positives around cases involving them. Thanks, Stephen signature.asc Description: Digital signature
[HACKERS] What's needed for cache-only table scan?
Hello, It is a brief design proposal of a feature I'd like to implement on top of custom-scan APIs. Because it (probably) requires a few additional base features not only custom-scan, I'd like to see feedback from the hackers. The cache-only table scan, being in subject line, is an alternative scan logic towards sequential scan if all the referenced columns are cached. It shall allow to scan a particular table without storage access, thus make scan performance improved. So what? Which is difference from large shared_buffers configuration? This mechanism intends to cache a part of columns being referenced in the query, not whole of the records. It makes sense to the workloads that scan a table with many columns but qualifier references just a few columns, typically used to analytic queries, because it enables to reduce memory consumption to be cached, thus more number of records can be cached. In addition, it has another role from my standpoint. It also performs as fast data supplier towards GPU/MIC devices. When we move data to GPU device, the source address has to be a region marked as page- locked that is exempted from concurrent swap out, if we want CUDA or OpenCL to run asynchronous DMA transfer mode; the fastest one. Probably, here is no problem on construction of this table cache. All we need to do is inject a custom-scan node instead of seq-scan, then it can construct table cache in concurrence with regular seq- scan, even though the first access become a little bit slow down. My concern is how to handle a case when table gets modified. A straightforward idea is that each cached entries being modified shall be invalidated by callback mechanism. Trigger can help in case of INSERT, UPDATE, DELETE and TRUNCATE. Furthermore, it's better if extension could inject its own trigger definition at RelationBuildTriggers() on the fly, to perform the feature transparently. On the other hand, we have no way to get control around VACUUM. I want to have a hook that allows extensions to get control when a page got vacuumed. Once we have such a hook, it enables to invalidate cached entries being indexed by tid, but already vacuumed. Its best location is probably lazy_scan_heap() to call back extension for each page getting vacuumed, with How about your opinion? I'd like to find out the best way to implement this table-caching mechanism within scope of v9.4 functionality set. Any ideas, comments or suggestions are welcome. Thanks, -- KaiGai Kohei kai...@kaigai.gr.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] ERROR during end-of-xact/FATAL
On Fri, Nov 8, 2013 at 4:13 PM, Noah Misch n...@leadboat.com wrote: A PANIC will reinitialize everything relevant, largely resolving the problems around ERROR during FATAL. It's a heavy-handed solution, but it may well be the best solution. Efforts to harden CommitTransaction() and AbortTransaction() seem well-spent, but the additional effort to make FATAL exit cope where AbortTransaction() or another exit action could not cope seems to be slicing ever-smaller portions of additional robustness. I pondered a variant of that conclusion that distinguished critical cleanup needs from the rest. Each shared resource (heavyweight locks, buffer pins, LWLocks) would have an on_shmem_exit() callback that cleans up the resource under a critical section. (AtProcExit_Buffers() used to fill such a role, but resowner.c's work during AbortTransaction() has mostly supplanted it.) The ShutdownPostgres callback would not use a critical section, so lesser failures in AbortTransaction() would not upgrade to a PANIC. But I'm leaning against such a complication on the grounds that it would add seldom-tested code paths posing as much a chance of eroding robustness as bolstering it. The current situation is just plain weird: in the ERROR-then-ERROR case, we emit a WARNING and bounce right back into AbortTransaction(), and if it doesn't work any better the second time than the first time, we recurse again, and eventually if it fails enough times in a row, we just give up and PANIC. But in the ERROR-then-FATAL case, we *don't* retry AbortTransaction(); instead, we just continue running the rest of the on_shmem_exit callbacks and then exit. So, in short, ERROR + ERROR*10 = PANIC, but FATAL + ERROR*10 = FATAL. That's bizarre. Given that that's where we are, promoting an ERROR during FATAL processing to PANIC doesn't seem like it's losing much; we're essentially already doing that in the (probably more likely) case of a persistent ERROR during ERROR processing. But since PANIC sucks, I'd rather go the other direction: let's make an ERROR during ERROR processing promote to FATAL. And then let's do what you write above: make sure that there's a separate on-shmem-exit callback for each critical shared memory resource and that we call of those during FATAL processing. It seems to me that that's how things were originally designed to work, but that we've drifted away from it basically because the low-level callbacks to release heavyweight locks and buffer pins turned out to be kinda, uh, slow, and we thought those code paths couldn't be taken anyway (turns out they can). I think we could either make those routines very fast, or arrange only to run that code at all in the case where AbortTransaction() didn't complete successfully. It's true that such code will be rarely run, but the logic is simple enough that I think we can verify it by hand, and it's sure nice to avoid PANICs. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
Kohei KaiGai kai...@kaigai.gr.jp writes: The cache-only table scan, being in subject line, is an alternative scan logic towards sequential scan if all the referenced columns are cached. This seems like a pretty dubious idea to me --- you're talking about expending large amounts of memory on a single-purpose cache, without any clear way to know if the data will ever be used before it gets invalidated (and the life expectancy of the cache doesn't sound like it'd be very high either). I'd like to find out the best way to implement this table-caching mechanism within scope of v9.4 functionality set. There's no possible way you'll finish this for 9.4. Keep in mind that CF3 starts Friday, and CF4 starts 2014-01-15, and project policy is that major feature patches should arrive (in at least rough form) by CF3. Even ignoring that policy, this is more than 2 months worth of work. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [COMMITTERS] pgsql: Replace duplicate_oids with Perl implementation
On Sun, Nov 10, 2013 at 6:12 PM, Tom Lane t...@sss.pgh.pa.us wrote: Andrew Dunstan and...@dunslane.net writes: It might be a bit more portable if we replaced the shebang lines on perl scripts with #!/bin/env perl Perhaps, if we're worried about people keeping perl somewhere other than /usr/bin. However, the most likely reason for having a /usr/local/bin/perl or whatever is that it's a newer and shinier one than what's in /usr/bin. Since we're only interested in bog-standard perl, there's no real reason for us to want to pick up the local one. FWIW, there was a big discussion at Red Hat a few years ago about whether to run around and do that to all perl/python scripts, and the outcome of the discussion was that using env was deprecated, not encouraged. I don't remember the reasoning in detail, but I think the core idea was that if a distro knows they ship perl in /usr/bin, then inserting env into the equation doesn't do anything but add cycles and failure modes. I'm not sure that that argument applies too well to our scenario, but it's out there. The particular application to this case might be: what makes you so sure env is in /bin? +1. If we're concerned about non-standard Perl locations, I think the right way to handle that is to design the makefiles to invoke Perl scripts using $(PERL) $(srcdir)/whatever.pl; and to make the Windows build scripts do the equivalent. We are, at least in some places, already doing that. IMV, the role of the #! line is just to cater to the less-likely scenario where someone wants to run one of those scripts outside the build process; if their perl happens to be in the standard location, they can do that as ./whatever.pl rather than perl whatever.pl. If their perl does not happen to be at that location, then I think it's anybody's guess what the #! line would need to be to save those three keystrokes. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Relax table alias conflict rule in 9.3?
On Sun, Nov 10, 2013 at 8:06 PM, Tom Lane t...@sss.pgh.pa.us wrote: We had a complaint http://www.postgresql.org/message-id/e1vjuby-0002a1...@wrigleys.postgresql.org about the fact that 9.3 rejects queries with duplicate table aliases like this: select * from tenk1 a left join (int4_tbl a cross join int8_tbl b) c on unique1 = f1; ERROR: table name a specified more than once I pushed back on this on the grounds that this is illegal per SQL spec: the standard is pretty clear that you can't use the same table alias more than once in a given level of SELECT (cf SQL:2008 7.6 table reference, syntax rules 6 and 7). However, the complainant has a good point that if we've accepted this since forever, ceasing to accept it is going to cause people problems. Moreover, my argument that it makes things ambiguous for LATERAL doesn't hold a lot of water. Duplicate table aliases were potentially ambiguous before, too, but we allowed the case anyway and only complained if there's actually an ambiguous reference. We could do the same for LATERAL references. I poked into the code a bit and soon realized that the problem stems from the checkNameSpaceConflicts call that I added to transformFromClauseItem in the LATERAL patch (in HEAD, line 729 of parse_clause.c). That throws an error if the left side of a JOIN exposes any aliases that are already exposed at top level of the FROM clause. There's no such check pre-9.3, and I'm not sure now why I thought it was appropriate here, since the column-reference lookup code is perfectly capable of dealing with ambiguous references. Taking out that call allows the above query to work again, while changing no existing regression test results. Cases that are actually ambiguous will throw an appropriate error: select * from tenk1 a left join (int4_tbl a cross join lateral (select a.f1) b) c on unique1 = f1; ERROR: table reference a is ambiguous LINE 1: ... left join (int4_tbl a cross join lateral (select a.f1) b) ... ^ This discovery also shows that there's nothing particularly principled about 9.3's behavior, because it complains about select * from tenk1 a left join (int4_tbl a cross join int8_tbl b) c on unique1 = f1; but it's perfectly happy with select * from tenk1 a left join (int4_tbl b cross join int8_tbl a) c on unique1 = f1; So I propose removing that call, adding regression tests like these examples, and back-patching to 9.3. Any objections? I see that you already did this, so perhaps it's moot, but FWIW, +1 from me. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Race condition in b-tree page deletion
On Mon, Nov 11, 2013 at 3:03 AM, Heikki Linnakangas hlinnakan...@vmware.com wrote: On 10.11.2013 01:47, Robert Haas wrote: I think we've tried pretty hard to avoid algorithms where the maximum number of lwlocks that must be held at one time is not a constant, and I think we're in for a bad time of it if we start to deviate from that principal. I'm not sure what to do about this problem, but I think locking N levels of the tree at once, where N can be as large as the tree is deep, is probably a bad plan, whether the number of locks required is N or 3N. I think I found a solution that accomplishes that. It's actually not that complicated: Like we currently do, first climb up the tree to check that it's safe to delete, ie. the downlink in the first non-empty parent is not the rightmost entry. But when we reach the level where the parent is non-empty - I'll call that the topmost parent - we keep that page locked. The leaf page is kept locked while we climb. This is enough to plug the race condition. As long as we hold a lock on the topmost parent containing the downlink to the branch of pages we're about to delete, it cannot become the rightmost entry. And as long as we hold a lock on the leaf page, no new insertions can happen on any of the internal pages in the branch, as insertions to internal pages only happen when a child is split. However, the rest of the algorithm needs to be slightly modified, as we cannot re-lock the lower-level pages until we release the lock on the topmost page, to avoid deadlock. So at this point, we hold two locks: the leaf page, and the topmost parent containing the downlink to the branch we're deleting. Next, we remove the downlink from the topmost parent, and mark the leaf page as half-dead in one atomic operation. Also, a pointer to the highest internal page in the branch we're deleting - the one the removed downlink pointed to - is put on the leaf page. We can now release the locks. At this point, searches and inserts work fine. The leaf page has been marked as half-dead, so any insertions to the deleted page's keyspace will go to its right sibling. The downlink is to the top of the branch is gone, so even if the right sibling is split many times, any keys in the transferred keyspace that propagate to the higher levels won't be out-of-order. All that is left is to unlink the all the lingering pages in the branch we're deleting from their left and right siblings. This can be done at any later time, and if we error out or crash for some reason, next vacuum that comes along can finish the job. This is done one level at a time. Lock the leaf page, and the internal page the leaf page points to, and the internal page's left and right siblings (in the right order, not this order). Change the left and right sibling's right- and left-links, mark the internal page as deleted, and update the pointer in the leaf page to point to the child of the deleted internal page. Then recurse to the child, until we reach the leaf level. This has the nice extra property that we don't need the incomplete-action tracking in WAL recovery. I'd like to get rid of that anyway. I am not very knowledgeable of this code, but at least with my limited knowledge I can't spot any problems with that approach. The only thing that seems a little unfortunate is that the next vacuum may need to finish cleaning things up; I thought at one point you were trying to get rid of the amvacuumcleanup stuff. But maybe I'm misremembering, and anyway it still seems like a step forward. I'm not sure what to do about stable branches. This could be back-patched, with the caveat that this introduces new WAL record types so standbys need to be upgraded before the master. But given the lack of field reports in the ten years this race condition has existed, I'm not sure it's worth the hassle. In the absence of at least one (and maybe several) reports from the field, -1 for back-patching this. At this point, it seems like far more people will be confused by the need to upgrade things in order than will benefit from the fix. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
On Tue, Nov 12, 2013 at 9:45 AM, Kohei KaiGai kai...@kaigai.gr.jp wrote: It is a brief design proposal of a feature I'd like to implement on top of custom-scan APIs. Because it (probably) requires a few additional base features not only custom-scan, I'd like to see feedback from the hackers. The cache-only table scan, being in subject line, is an alternative scan logic towards sequential scan if all the referenced columns are cached. It shall allow to scan a particular table without storage access, thus make scan performance improved. So what? Which is difference from large shared_buffers configuration? This mechanism intends to cache a part of columns being referenced in the query, not whole of the records. It makes sense to the workloads that scan a table with many columns but qualifier references just a few columns, typically used to analytic queries, because it enables to reduce memory consumption to be cached, thus more number of records can be cached. In addition, it has another role from my standpoint. It also performs as fast data supplier towards GPU/MIC devices. When we move data to GPU device, the source address has to be a region marked as page- locked that is exempted from concurrent swap out, if we want CUDA or OpenCL to run asynchronous DMA transfer mode; the fastest one. Probably, here is no problem on construction of this table cache. All we need to do is inject a custom-scan node instead of seq-scan, then it can construct table cache in concurrence with regular seq- scan, even though the first access become a little bit slow down. My concern is how to handle a case when table gets modified. A straightforward idea is that each cached entries being modified shall be invalidated by callback mechanism. Trigger can help in case of INSERT, UPDATE, DELETE and TRUNCATE. Furthermore, it's better if extension could inject its own trigger definition at RelationBuildTriggers() on the fly, to perform the feature transparently. On the other hand, we have no way to get control around VACUUM. I want to have a hook that allows extensions to get control when a page got vacuumed. Once we have such a hook, it enables to invalidate cached entries being indexed by tid, but already vacuumed. Its best location is probably lazy_scan_heap() to call back extension for each page getting vacuumed, with How about your opinion? I think it's hard to say in the abstract. I'd suggest adding the hooks you feel like you need and see what the resulting patch looks like. Then post that and we can talk about how (and whether) to do it better. My personal bet is that triggers are the wrong way to do something like this; I'd look to do it all with hooks. Of course, figuring how to position those hooks so that they are maintainable and don't affect performance when not used is the tricky part. I'd like to find out the best way to implement this table-caching mechanism within scope of v9.4 functionality set. Any ideas, comments or suggestions are welcome. I think that Tom is right: there's not time to get something like this done for 9.4. If you start working relatively soon, I think you could hope to have a good proposal in time for 9.5. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] pg_dump and pg_dumpall in real life (proposal)
-BEGIN PGP SIGNED MESSAGE- Hash: SHA1 On 11/12/2013 03:28 PM, Stephen Frost wrote: * Rafael Martinez (r.m.guerr...@usit.uio.no) wrote: Comments? Create a wiki page for it. :) What about this to start with?: https://wiki.postgresql.org/wiki/Pg_dump_improvements - -- Rafael Martinez Guerrero Center for Information Technology University of Oslo, Norway PGP Public Key: http://folk.uio.no/rafael/ -BEGIN PGP SIGNATURE- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/ iEYEARECAAYFAlKCTfAACgkQBhuKQurGihSrYQCeKyYVthpbk47hGjayBjidqaFL nysAn3JJjGT/8SuDUi2Nt6hEZ4eu1smz =7wjV -END PGP SIGNATURE- -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
2013/11/12 Tom Lane t...@sss.pgh.pa.us: Kohei KaiGai kai...@kaigai.gr.jp writes: The cache-only table scan, being in subject line, is an alternative scan logic towards sequential scan if all the referenced columns are cached. This seems like a pretty dubious idea to me --- you're talking about expending large amounts of memory on a single-purpose cache, without any clear way to know if the data will ever be used before it gets invalidated (and the life expectancy of the cache doesn't sound like it'd be very high either). Thanks for your comments. I assume this table-cache shall be applied on tables that holds data set for analytic workloads. Even though it may consume large amount of memory, it will hit its major workload in this use scenario. Probably, it will have upper limit of cache memory usage, and need a mechanism to reclaim the cache block from the oldest one. Here is nothing special. Also, even I call it table cache, it is similar to in-memory database being supported by rich memory hardware. I'd like to find out the best way to implement this table-caching mechanism within scope of v9.4 functionality set. There's no possible way you'll finish this for 9.4. Keep in mind that CF3 starts Friday, and CF4 starts 2014-01-15, and project policy is that major feature patches should arrive (in at least rough form) by CF3. Even ignoring that policy, this is more than 2 months worth of work. Yes, I understand it is not possible to submit whole of the patch until CF3 deadline. So, I'd like to find out a way to implement it as an extension using facilities being supported or to be enhanced on v9.4. Best regards, -- KaiGai Kohei kai...@kaigai.gr.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Errors on missing pg_subtrans/ files with 9.3
G'day list. Didn't get any interest in pgsql-general, thought I'd try my luck here, which perhaps would be more fitting in case I've stumbled upon an edge case issue or something... I've recently upgraded a number of servers from PostgreSQL 9.2.5 to 9.3.1 and have started getting the following errors every couple of hours along with some failed transactions. I have been unable to track down any sort of rhyme or reason for the errors yet, so I figured I'd check with the mailing list to see if I've potentially hit into a potential bug. The errors look like the following: ERROR could not access status of transaction 4179979 Could not open file pg_subtrans/003F: No such file or directory. Indeed, pg_subtrans/003F does not exist on the file system. I have found similar errors mentioned before a number of years ago with a thread starting here, but couldn't find any real resolution. http://www.postgresql.org/message-id/2009150225.076c2...@list.ru A few details: - the new database cluster was created by dumping global settings using pg_dumpall and then dumping each database individually. Several of the databases within this cluster have PostGIS installed and I wanted to do a full PostGIS upgrade, which necessitated individual dumps. - this is the only database in a cluster of 14 databases that is exhibiting the problem and it is sporadic at best. This database is fairly write-intensive, and has been up since November 6th. In that time, we've had 17 such errors. - the pg_subtrans files mentioned in the log file do not exist on the file system. - I have rebuilt the cluster several times, as well as cleaned out the pg_statistic table and run `VACUUM FULL ANALYZE` on every database in case that was affecting it based on some similar threads back from the PostgreSQL 8.4 days, but there was no affect and the errors still occur. Anyone have any suggestions or ideas? Thankfully these are dev systems so I have some room to experiment and can post some more details as necessary. The following are the log file entries from one of the errors. Some details have been sanitized for privacy reasons, but the overall lines themselves are accurate. Nov 10 10:14:02 dev-server postgres[29835]: [4-1] user=dev,db=dev ERROR: could not access status of transaction 4179979 Nov 10 10:14:02 dev-server postgres[29835]: [4-2] user=dev,db=dev DETAIL: Could not open file pg_subtrans/003F: No such file or directory. Nov 10 10:14:02 dev-server postgres[29835]: [4-3] user=dev,db=dev CONTEXT: SQL statement SELECT 1 FROM ONLY typhon.collection_batches x WHERE id OPERATOR(pg_catalog.=) $1 FOR KEY SHARE OF x Nov 10 10:14:02 dev-server postgres[29835]: [4-4] user=dev,db=dev STATEMENT: insert into raw (url, id) values ($1, $2) Nov 10 10:14:02 dev-server postgres[29839]: [4-1] user=dev,db=dev ERROR: could not access status of transaction 4179979 Nov 10 10:14:02 dev-server postgres[29839]: [4-2] user=dev,db=dev DETAIL: Could not open file pg_subtrans/003F: No such file or directory. Nov 10 10:14:02 dev-server postgres[29839]: [4-3] user=dev,db=dev CONTEXT: SQL statement SELECT 1 FROM ONLY typhon.collection_batches x WHERE id OPERATOR(pg_catalog.=) $1 FOR KEY SHARE OF x Nov 10 10:14:02 dev-server postgres[29839]: [4-4] user=dev,db=dev STATEMENT: update listings set value_a=$1 where id=$2 Nov 10 10:14:02 dev-server postgres[29827]: [4-1] user=dev,db=dev ERROR: could not access status of transaction 4179979 Nov 10 10:14:02 dev-server postgres[29827]: [4-2] user=dev,db=dev DETAIL: Could not open file pg_subtrans/003F: No such file or directory. Nov 10 10:14:02 dev-server postgres[29827]: [4-3] user=dev,db=dev CONTEXT: SQL statement SELECT 1 FROM ONLY typhon.collection_batches x WHERE id OPERATOR(pg_catalog.=) $1 FOR KEY SHARE OF x I can provide additional details like my postgresql.conf if it would help, although it's fairly standard outside of tuning as provided by pgtune. Cheers and thanks, List. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Re: [BUGS] BUG #7873: pg_restore --clean tries to drop tables that don't exist
Hello here is patch with fault tolerant drop trigger and drop rule support drop trigger [if exists] trgname on [if exists] tablename; drop rule [if exists] trgname on [if exists] tablename; Regards Pavel 2013/11/11 Pavel Stehule pavel.steh...@gmail.com 2013/11/11 Tom Lane t...@sss.pgh.pa.us Andres Freund and...@2ndquadrant.com writes: Turns out that's bogus - ALTER TABLE has two levels of NOT EXISTS. Maybe we should just do the same for DROP TRIGGER? DROP TRIGGER [ IF EXISTS ] name ON table_name [ IF EXISTS ] [ CASCADE | RESTRICT ] Works for me. for me too tomorrow I'll prepare patch Regards Pavel regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers diff --git a/doc/src/sgml/ref/drop_rule.sgml b/doc/src/sgml/ref/drop_rule.sgml index c845872..585f67b 100644 --- a/doc/src/sgml/ref/drop_rule.sgml +++ b/doc/src/sgml/ref/drop_rule.sgml @@ -21,7 +21,7 @@ PostgreSQL documentation refsynopsisdiv synopsis -DROP RULE [ IF EXISTS ] replaceable class=PARAMETERname/replaceable ON replaceable class=PARAMETERtable_name/replaceable [ CASCADE | RESTRICT ] +DROP RULE [ IF EXISTS ] replaceable class=PARAMETERname/replaceable ON [ IF EXISTS ] replaceable class=PARAMETERtable_name/replaceable [ CASCADE | RESTRICT ] /synopsis /refsynopsisdiv @@ -42,8 +42,8 @@ DROP RULE [ IF EXISTS ] replaceable class=PARAMETERname/replaceable ON re termliteralIF EXISTS/literal/term listitem para - Do not throw an error if the rule does not exist. A notice is issued - in this case. + Do not throw an error if the rule does not exist (or if a parent table + does not exist). A notice is issued in this case. /para /listitem /varlistentry diff --git a/doc/src/sgml/ref/drop_trigger.sgml b/doc/src/sgml/ref/drop_trigger.sgml index 3ec6cc7..1f46eff 100644 --- a/doc/src/sgml/ref/drop_trigger.sgml +++ b/doc/src/sgml/ref/drop_trigger.sgml @@ -21,7 +21,7 @@ PostgreSQL documentation refsynopsisdiv synopsis -DROP TRIGGER [ IF EXISTS ] replaceable class=PARAMETERname/replaceable ON replaceable class=PARAMETERtable_name/replaceable [ CASCADE | RESTRICT ] +DROP TRIGGER [ IF EXISTS ] replaceable class=PARAMETERname/replaceable ON [ IF EXISTS ] replaceable class=PARAMETERtable_name/replaceable [ CASCADE | RESTRICT ] /synopsis /refsynopsisdiv @@ -44,8 +44,8 @@ DROP TRIGGER [ IF EXISTS ] replaceable class=PARAMETERname/replaceable ON termliteralIF EXISTS/literal/term listitem para - Do not throw an error if the trigger does not exist. A notice is issued - in this case. + Do not throw an error if the trigger does not exist (or parent table + does not exist). A notice is issued in this case. /para /listitem /varlistentry diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index cecddf1..36ef9ae 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -423,7 +423,8 @@ static ObjectAddress get_relation_by_qualified_name(ObjectType objtype, List *objname, Relation *relp, LOCKMODE lockmode, bool missing_ok); static ObjectAddress get_object_address_relobject(ObjectType objtype, - List *objname, Relation *relp, bool missing_ok); + List *objname, Relation *relp, + bool missing_ok, bool missing_parent_ok); static ObjectAddress get_object_address_attribute(ObjectType objtype, List *objname, Relation *relp, LOCKMODE lockmode, bool missing_ok); @@ -464,7 +465,8 @@ static void getRelationIdentity(StringInfo buffer, Oid relid); */ ObjectAddress get_object_address(ObjectType objtype, List *objname, List *objargs, - Relation *relp, LOCKMODE lockmode, bool missing_ok) + Relation *relp, LOCKMODE lockmode, + bool missing_ok, bool missing_parent_ok) { ObjectAddress address; ObjectAddress old_address = {InvalidOid, InvalidOid, 0}; @@ -507,7 +509,9 @@ get_object_address(ObjectType objtype, List *objname, List *objargs, case OBJECT_TRIGGER: case OBJECT_CONSTRAINT: address = get_object_address_relobject(objtype, objname, - relation, missing_ok); + relation, + missing_ok, + missing_parent_ok); break; case OBJECT_DATABASE: case OBJECT_EXTENSION: @@ -622,7 +626,7 @@ get_object_address(ObjectType objtype, List *objname, List *objargs, */ if (!OidIsValid(address.objectId)) { - Assert(missing_ok); + Assert(missing_ok || missing_parent_ok); return address; } @@ -898,7 +902,9 @@ get_relation_by_qualified_name(ObjectType objtype, List *objname, */ static ObjectAddress get_object_address_relobject(ObjectType objtype, List *objname, - Relation *relp, bool missing_ok) + Relation *relp, + bool
Re: [HACKERS] Patch for fail-back without fresh backup
On Thu, Oct 24, 2013 at 11:14:14PM +0300, Heikki Linnakangas wrote: On 24.10.2013 23:07, Josh Berkus wrote: On 10/24/2013 11:12 AM, Heikki Linnakangas wrote: On 24.10.2013 20:39, Josh Berkus wrote: On 10/24/2013 04:15 AM, Pavan Deolasee wrote: If we do what you are suggesting, it seems like a single line patch to me. In XLogSaveBufferForHint(), we probably need to look at this additional GUC to decide whether or not to backup the block. Wait, what? Why are we having an additional GUC? I'm opposed to the idea of having a GUC to enable failback. When would anyone using replication ever want to disable that? For example, if you're not replicating for high availability purposes, but to keep a reporting standby up-to-date. What kind of overhead are we talking about here? You probably said, but I've had a mail client meltdown and lost a lot of my -hackers emails. One extra WAL record whenever a hint bit is set on a page, for the first time after a checkpoint. In other words, a WAL record needs to be written in the same circumstances as with page checksums, but the WAL records are much smaller as they don't need to contain a full page image, just the block number of the changed block. Or maybe we'll write the full page image after all, like with page checksums, just without calculating the checksums. It might be tricky to skip the full-page image, because then a subsequent change of the page (which isn't just a hint-bit update) needs to somehow know it needs to take a full page image even though a WAL record for it was already written. Sorry to be replying late to this, but while I am not worried about the additional WAL volume, does this change require the transaction to now wait for a WAL sync to disk before continuing? I thought that was the down-side to WAL logging hint bits, not the WAL volume itself. -- Bruce Momjian br...@momjian.ushttp://momjian.us EnterpriseDB http://enterprisedb.com + Everyone has their own god. + -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Add \i option to bring in the specified file as a quoted literal
On Wed, Oct 23, 2013 at 10:31:39AM +0530, Amit Kapila wrote: On Tue, Oct 22, 2013 at 3:04 AM, Piotr Marcinczyk pmarc...@gmail.com wrote: Hi, I would like to implement item from TODO marked as easy: Add \i option to bring in the specified file as a quoted literal. I understand intent of this item, to be able to have parts of query written in separate files (now it is impossible, because \i tries to execute content of file as a separate command by function process_file). For the usecase discussed in the mail chain of that TODO item, Robert Haas has provided an alternative to achieve it, please check below link: http://www.postgresql.org/message-id/AANLkTi=7c8xfyf7uqw0y+si8ondkoy2nx8jc4bu0g...@mail.gmail.com If you think that alternative is not sufficient for the use case, then adding new option/syntax is worth, otherwise it might be a shortcut or other form of some existing way which can be useful depending on how frequently users use this syntax. So, can we remove this TODO item? -- Bruce Momjian br...@momjian.ushttp://momjian.us EnterpriseDB http://enterprisedb.com + Everyone has their own god. + -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
2013/11/12 Robert Haas robertmh...@gmail.com: On Tue, Nov 12, 2013 at 9:45 AM, Kohei KaiGai kai...@kaigai.gr.jp wrote: It is a brief design proposal of a feature I'd like to implement on top of custom-scan APIs. Because it (probably) requires a few additional base features not only custom-scan, I'd like to see feedback from the hackers. The cache-only table scan, being in subject line, is an alternative scan logic towards sequential scan if all the referenced columns are cached. It shall allow to scan a particular table without storage access, thus make scan performance improved. So what? Which is difference from large shared_buffers configuration? This mechanism intends to cache a part of columns being referenced in the query, not whole of the records. It makes sense to the workloads that scan a table with many columns but qualifier references just a few columns, typically used to analytic queries, because it enables to reduce memory consumption to be cached, thus more number of records can be cached. In addition, it has another role from my standpoint. It also performs as fast data supplier towards GPU/MIC devices. When we move data to GPU device, the source address has to be a region marked as page- locked that is exempted from concurrent swap out, if we want CUDA or OpenCL to run asynchronous DMA transfer mode; the fastest one. Probably, here is no problem on construction of this table cache. All we need to do is inject a custom-scan node instead of seq-scan, then it can construct table cache in concurrence with regular seq- scan, even though the first access become a little bit slow down. My concern is how to handle a case when table gets modified. A straightforward idea is that each cached entries being modified shall be invalidated by callback mechanism. Trigger can help in case of INSERT, UPDATE, DELETE and TRUNCATE. Furthermore, it's better if extension could inject its own trigger definition at RelationBuildTriggers() on the fly, to perform the feature transparently. On the other hand, we have no way to get control around VACUUM. I want to have a hook that allows extensions to get control when a page got vacuumed. Once we have such a hook, it enables to invalidate cached entries being indexed by tid, but already vacuumed. Its best location is probably lazy_scan_heap() to call back extension for each page getting vacuumed, with How about your opinion? I think it's hard to say in the abstract. I'd suggest adding the hooks you feel like you need and see what the resulting patch looks like. Then post that and we can talk about how (and whether) to do it better. My personal bet is that triggers are the wrong way to do something like this; I'd look to do it all with hooks. Of course, figuring how to position those hooks so that they are maintainable and don't affect performance when not used is the tricky part. Yep, right now, all the idea is still in my brain, so it takes additional times to make up it as a patch form. The reason why I'm inclined to use trigger is, it is already supported on the place I want to get control. So, it enables to omit efforts to add and maintenance similar features in the limited time slot. I'd like to find out the best way to implement this table-caching mechanism within scope of v9.4 functionality set. Any ideas, comments or suggestions are welcome. I think that Tom is right: there's not time to get something like this done for 9.4. If you start working relatively soon, I think you could hope to have a good proposal in time for 9.5. Yes, I agree his suggestion is realistic. It is nearly impossible to get whole of the table-cache mechanism by CF3 deadline this week. So, I assume to implement it as extension using facilities in v9.4. It seems to me the last piece to implement this feature (of course, custom-scan is pre-requisite) is a hook in vacuum. Does it make sense to propose something other worth but small contrib module that utilizes this last piece? For example, a contrib module that reports progress of vacuum... Thanks, -- KaiGai Kohei kai...@kaigai.gr.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
Hi, On 2013-11-12 10:56:55 -0500, J Smith wrote: G'day list. Didn't get any interest in pgsql-general, thought I'd try my luck here, which perhaps would be more fitting in case I've stumbled upon an edge case issue or something... Normally the bug report for/the -bugs mailing list is the right one for things like this. ERROR could not access status of transaction 4179979 Could not open file pg_subtrans/003F: No such file or directory. A couple of question: * Did you have any OS, hardware crashes? * Do you run with fsync=off or full_page_writes=off? * Could you post the pg_controldata output? * Is it always the same xid? * Does SELECT count(*) FROM pg_prepared_xacts; return 0? Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Transaction-lifespan memory leak with plpgsql DO blocks
I spent a bit of time looking into the memory leak reported here: http://www.postgresql.org/message-id/52376c35.5040...@gmail.com I think this test case doesn't have much to do with the complainant's original problem, but anyway it is exposing a genuine leakage issue. The difficulty is that when plpgsql wants to execute a simple expression, it does an ExecInitExpr to build the execution state tree for the expression, and then it keeps that around for the duration of the transaction. This was an intentional memory for speed tradeoff that we made years ago (replacing logic that had to do the ExecInitExpr over again during each function call). I think it works well enough for plain old functions, although somebody whose transactions executed thousands of distinct plpgsql functions might beg to differ. However, in the example shown in the above message, we're executing tens of thousands of plpgsql DO blocks in a single transaction, and there's no possibility of sharing execution state trees across executions for those. We recover most of the memory associated with each DO block ... but not the ExecInitExpr trees. I believe this could be fixed in a reasonably localized fashion by making each DO block create its very own simple_eval_estate instead of sharing one with the rest of the transaction, and then making sure to throw that away on exit from the DO block. This would add some overhead per DO block, but not a huge amount (probably negligible in comparison to the block's parsing costs, anyway). A bigger objection is that the behavior around simple_eval_estate and simple_econtext_stack would become a lot more complicated and hard to understand. Possibly it would help to use PLpgSQL_execstate fields for them. Or we could say what the heck are you doing executing tens of thousands of DO blocks? Make it into a real live function; you'll save a lot of cycles on parsing costs. I'm not sure that this is a usage pattern we ought to be optimizing for. Thoughts? regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
* J Smith (dark.panda+li...@gmail.com) wrote: I've recently upgraded a number of servers from PostgreSQL 9.2.5 to 9.3.1 and have started getting the following errors every couple of hours along with some failed transactions. How was this upgrade done? If you used pg_upgrade, what version of the pg_upgrade code did you use? As I recall, there was a bug in older versions which could exhibit in this way.. http://wiki.postgresql.org/wiki/20110408pg_upgrade_fix Thanks, Stephen signature.asc Description: Digital signature
Re: [HACKERS] Transaction-lifespan memory leak with plpgsql DO blocks
On 2013-11-12 11:18:32 -0500, Tom Lane wrote: Or we could say what the heck are you doing executing tens of thousands of DO blocks? Make it into a real live function; you'll save a lot of cycles on parsing costs. I'm not sure that this is a usage pattern we ought to be optimizing for. Exactly, I think it's not worth spending much code/complexity on that issue. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
Kohei KaiGai kai...@kaigai.gr.jp writes: 2013/11/12 Tom Lane t...@sss.pgh.pa.us: There's no possible way you'll finish this for 9.4. Yes, I understand it is not possible to submit whole of the patch until CF3 deadline. So, I'd like to find out a way to implement it as an extension using facilities being supported or to be enhanced on v9.4. Oh! Okay, I misunderstood the context --- you meant this as an example use-case for the custom plan feature, right? Makes more sense now. I'm still dubious that it'd actually be very useful in itself, but it seems reasonable as a test case to make sure that a set of hooks for custom plans are sufficient to do something useful with. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Re: [COMMITTERS] pgsql: Fix pg_isolation_regress to work outside its build directory.
On Tue, Nov 12, 2013 at 7:05 AM, Andres Freund and...@2ndquadrant.com wrote: The only hack I currently can think of is to store argv[0] in a global variable and to run the find_other_exec() in the first run through isolation_start_test() :/. A patch to that end attached. Alternatively we could add a startup_function startup argument to regression_main() that's called later, when the environment is setup. That might be a better long-term solution, although I'll defer to the opinions of people who have worked on this code more than I have. But I've committed this for now, in the hopes of making the buildfarm green again. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
On 2013-11-12 11:25:03 -0500, Stephen Frost wrote: * J Smith (dark.panda+li...@gmail.com) wrote: I've recently upgraded a number of servers from PostgreSQL 9.2.5 to 9.3.1 and have started getting the following errors every couple of hours along with some failed transactions. How was this upgrade done? If you used pg_upgrade, what version of the pg_upgrade code did you use? As I recall, there was a bug in older versions which could exhibit in this way.. He referred to using pg_dumpall/pg_dump. But that bug was erroring out on pg_clog, not pg_subtrans, right? Getting such errors on pg_subtrans indicates corruption or a bug somewhere, we don't do them if the to-be-looked up xid is older than TransactionXmin and we zero pg_subtrans at restarts. My gut feeling is thats it's related to foreign key locks doing XactLockTableWait()s on too old transactions or something like that. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
On Tue, Nov 12, 2013 at 11:25 AM, Stephen Frost sfr...@snowman.net wrote: How was this upgrade done? If you used pg_upgrade, what version of the pg_upgrade code did you use? As I recall, there was a bug in older versions which could exhibit in this way.. http://wiki.postgresql.org/wiki/20110408pg_upgrade_fix As mentioned in the original post... the new database cluster was created by dumping global settings using pg_dumpall and then dumping each database individually. We haven't been able to use pg_upgrade as we rely heavily on PostGIS and do hard upgrades via pg_dump and postgis_restore.pl when we upgrade. Cheers -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Kevin Grittner kgri...@ymail.com wrote: Memory Error Double free: 1 Memory leak: 1 These both seemed legitimate to me. Patch attached. Any objections to applying it? I realize the memory leak is a tiny one in the regression testing code, so it could never amount to enough to matter; but it seems worth fixing just to avoid noise in code analyzers. -- Kevin Grittner EDB: http://www.enterprisedb.com The Enterprise PostgreSQL Companydiff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c index 1dfb80f..5faef52 100644 --- a/src/bin/pg_basebackup/streamutil.c +++ b/src/bin/pg_basebackup/streamutil.c @@ -117,7 +117,10 @@ GetConnection(void) while (true) { if (password) + { free(password); + password = NULL; + } if (dbpassword) { diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index a902e9b..e8dec46 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -1655,6 +1655,8 @@ run_schedule(const char *schedule, test_function tfunc) } } + free_stringlist(ignorelist); + fclose(scf); } -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
On Tue, Nov 12, 2013 at 11:45 AM, Kohei KaiGai kai...@kaigai.gr.jp wrote: Hello, It is a brief design proposal of a feature I'd like to implement on top of custom-scan APIs. Because it (probably) requires a few additional base features not only custom-scan, I'd like to see feedback from the hackers. The cache-only table scan, being in subject line, is an alternative scan logic towards sequential scan if all the referenced columns are cached. It shall allow to scan a particular table without storage access, thus make scan performance improved. So what? Which is difference from large shared_buffers configuration? This mechanism intends to cache a part of columns being referenced in the query, not whole of the records. It makes sense to the workloads that scan a table with many columns but qualifier references just a few columns, typically used to analytic queries, because it enables to reduce memory consumption to be cached, thus more number of records can be cached. In addition, it has another role from my standpoint. It also performs as fast data supplier towards GPU/MIC devices. When we move data to GPU device, the source address has to be a region marked as page- locked that is exempted from concurrent swap out, if we want CUDA or OpenCL to run asynchronous DMA transfer mode; the fastest one. Wouldn't a columnar heap format be a better solution to this? -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
G'day Andres. On Tue, Nov 12, 2013 at 11:13 AM, Andres Freund and...@2ndquadrant.com wrote: Hi, On 2013-11-12 10:56:55 -0500, J Smith wrote: G'day list. Didn't get any interest in pgsql-general, thought I'd try my luck here, which perhaps would be more fitting in case I've stumbled upon an edge case issue or something... Normally the bug report for/the -bugs mailing list is the right one for things like this. Ah, okay. I can redirect there if that's the better location. (Assuming it's a bug and not a misconfiguration on my part, although the configuration on these servers has been consistent since 9.0, and we've upgraded through each of the 9.x series while merging together new settings accordingly.) ERROR could not access status of transaction 4179979 Could not open file pg_subtrans/003F: No such file or directory. A couple of question: * Did you have any OS, hardware crashes? Haven't had any issues on this machine. * Do you run with fsync=off or full_page_writes=off? Both fsync and full_page_writes are in their default on configurations. I'll post our full postgresql.conf below, as it is quite short. * Could you post the pg_controldata output? Sure, posted below. * Is it always the same xid? Seems to be different every time. * Does SELECT count(*) FROM pg_prepared_xacts; return 0? Yes it does. Here's the postgresql.conf and pg_controldata dump. # postgresql.conf listen_addresses = '*' wal_level = hot_standby checkpoint_completion_target = 0.7 max_wal_senders = 3 wal_keep_segments = 64 logging_collector = off datestyle = 'iso, mdy' timezone = 'Canada/Eastern' lc_messages = 'en_US.UTF-8' lc_monetary = 'en_US.UTF-8' lc_numeric = 'en_US.UTF-8' lc_time = 'en_US.UTF-8' default_text_search_config = 'pg_catalog.english' default_statistics_target = 100 maintenance_work_mem = 1GB effective_cache_size = 44GB work_mem = 320MB wal_buffers = 4MB checkpoint_segments = 64 shared_buffers = 15GB max_connections = 200 # pg_controldata pg_control version number:937 Catalog version number: 201306121 Database system identifier: 5942827484423487452 Database cluster state: in production pg_control last modified: Tue 12 Nov 2013 11:39:07 AM EST Latest checkpoint location: 30/50F7A180 Prior checkpoint location:30/4914C428 Latest checkpoint's REDO location:30/4CB2C450 Latest checkpoint's REDO WAL file:00010030004C Latest checkpoint's TimeLineID: 1 Latest checkpoint's PrevTimeLineID: 1 Latest checkpoint's full_page_writes: on Latest checkpoint's NextXID: 0/5138174 Latest checkpoint's NextOID: 3574036 Latest checkpoint's NextMultiXactId: 5762623 Latest checkpoint's NextMultiOffset: 11956824 Latest checkpoint's oldestXID:1673 Latest checkpoint's oldestXID's DB: 1 Latest checkpoint's oldestActiveXID: 5138174 Latest checkpoint's oldestMultiXid: 1 Latest checkpoint's oldestMulti's DB: 1 Time of latest checkpoint:Tue 12 Nov 2013 11:35:37 AM EST Fake LSN counter for unlogged rels: 0/1 Minimum recovery ending location: 0/0 Min recovery ending loc's timeline: 0 Backup start location:0/0 Backup end location: 0/0 End-of-backup record required:no Current wal_level setting:hot_standby Current max_connections setting: 200 Current max_prepared_xacts setting: 0 Current max_locks_per_xact setting: 64 Maximum data alignment: 8 Database block size: 8192 Blocks per segment of large relation: 131072 WAL block size: 8192 Bytes per WAL segment:16777216 Maximum length of identifiers:64 Maximum columns in an index: 32 Maximum size of a TOAST chunk:1996 Date/time type storage: 64-bit integers Float4 argument passing: by value Float8 argument passing: by value Data page checksum version: 0 Cheers and thanks for the help all. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
* Andres Freund (and...@2ndquadrant.com) wrote: He referred to using pg_dumpall/pg_dump. But that bug was erroring out on pg_clog, not pg_subtrans, right? Yeah, that was pg_clog. Obviously responded before really looking at it. :) My gut feeling is thats it's related to foreign key locks doing XactLockTableWait()s on too old transactions or something like that. Ugh, that wouldn't be good.. Thanks, Stephen signature.asc Description: Digital signature
[HACKERS] Can we add sample systemd service file to git repo?
Hi, Many distros are switching/switched to systemd. In the git repo, we only have init script for SysV. Would it make sense to add systemd service script to our tarball as well? This could also standardize the systemd scripts used in other distros. http://svn.pgrpms.org/browser/rpm/redhat/9.3/postgresql/F-19/postgresql-9.3.service is an example of what we use in the RPMs. (if website fails, please just reload) Attached is a modified version that will work with the compile defaults. Regards, -- Devrim GÜNDÜZ Principal Systems Engineer @ EnterpriseDB: http://www.enterprisedb.com PostgreSQL Danışmanı/Consultant, Red Hat Certified Engineer Community: devrim~PostgreSQL.org, devrim.gunduz~linux.org.tr http://www.gunduz.org Twitter: http://twitter.com/devrimgunduz [Unit] Description=PostgreSQL 9.3 database server After=syslog.target After=network.target [Service] Type=forking User=postgres Group=postgres # Note: avoid inserting whitespace in these Environment= lines, or you may # break postgresql-setup. # Port number for server to listen on Environment=PGPORT=5432 # Location of database directory Environment=PGDATA=/usr/local/pgsql/data/ # Where to send early-startup messages from the server (before the logging # options of postgresql.conf take effect) StandardOutput=syslog # Disable OOM kill on the postmaster OOMScoreAdjust=-1000 ExecStart=/usr/local/pgsql/bin/pg_ctl start -D ${PGDATA} -s -o -p ${PGPORT} -w -t 300 ExecStop=/usr/local/pgsql/bin/pg_ctl stop -D ${PGDATA} -s -m fast ExecReload=/usr/local/pgsql/bin/pg_ctl reload -D ${PGDATA} -s # Give a reasonable amount of time for the server to start up/shut down TimeoutSec=300 [Install] WantedBy=multi-user.target signature.asc Description: This is a digitally signed message part
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
* J Smith (dark.panda+li...@gmail.com) wrote: We haven't been able to use pg_upgrade as we rely heavily on PostGIS and do hard upgrades via pg_dump and postgis_restore.pl when we upgrade. Did you also upgrade to PostGIS 2.x as part of this..? Seems like it'd be unrelated, but one never knows. Any chance you could distill this down into a small test case which exhibits the problem? I'm guessing 'no', but figured I'd ask anyway.. Thanks, Stephen signature.asc Description: Digital signature
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
Hi, On 2013-11-12 11:46:19 -0500, J Smith wrote: * Does SELECT count(*) FROM pg_prepared_xacts; return 0? Yes it does. Could you show the output? Do you actually use prepared xacts actively? Do you actively use row level locking? Is there high concurrency in that environment? In short, could you describe your database usage a bit? Did you restart the database since getting the errors? Latest checkpoint's NextXID: 0/5138174 Latest checkpoint's NextOID: 3574036 Latest checkpoint's NextMultiXactId: 5762623 Latest checkpoint's NextMultiOffset: 11956824 Latest checkpoint's oldestXID:1673 Latest checkpoint's oldestXID's DB: 1 Latest checkpoint's oldestActiveXID: 5138174 Latest checkpoint's oldestMultiXid: 1 Interesting. Pretty high multixact usage. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Kevin Grittner kgri...@ymail.com writes: Kevin Grittner kgri...@ymail.com wrote: Memory Error Double free: 1 Memory leak: 1 These both seemed legitimate to me. Patch attached. Any objections to applying it? I realize the memory leak is a tiny one in the regression testing code, so it could never amount to enough to matter; but it seems worth fixing just to avoid noise in code analyzers. The logic, if you can call it that, in streamutil.c makes my head hurt. I think it needs more work than what you did here --- for one thing, the free(password) call results in values[i] becoming a dangling pointer to freed memory, and it's not exactly obvious that that pointer will get overwritten again before it's used. Moreover, although the apparent intention of the dbpassword static state is to allow a password to be saved and reused across calls, it kinda looks like that's broken by the odd choice to make dbgetpassword also static. At the very least that choice makes it a lot harder to analyze what will happen in calls after the first. I think the looping to get a password here should be thrown out and rewritten from scratch, or at least cribbed from someplace with less contorted logic. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
2013/11/12 Claudio Freire klaussfre...@gmail.com: On Tue, Nov 12, 2013 at 11:45 AM, Kohei KaiGai kai...@kaigai.gr.jp wrote: Hello, It is a brief design proposal of a feature I'd like to implement on top of custom-scan APIs. Because it (probably) requires a few additional base features not only custom-scan, I'd like to see feedback from the hackers. The cache-only table scan, being in subject line, is an alternative scan logic towards sequential scan if all the referenced columns are cached. It shall allow to scan a particular table without storage access, thus make scan performance improved. So what? Which is difference from large shared_buffers configuration? This mechanism intends to cache a part of columns being referenced in the query, not whole of the records. It makes sense to the workloads that scan a table with many columns but qualifier references just a few columns, typically used to analytic queries, because it enables to reduce memory consumption to be cached, thus more number of records can be cached. In addition, it has another role from my standpoint. It also performs as fast data supplier towards GPU/MIC devices. When we move data to GPU device, the source address has to be a region marked as page- locked that is exempted from concurrent swap out, if we want CUDA or OpenCL to run asynchronous DMA transfer mode; the fastest one. Wouldn't a columnar heap format be a better solution to this? I've implemented using FDW, however, it requires application adjust its SQL to replace CREATE TABLE by CREATE FOREIGN TABLE. In addition, it lost a good feature of regular heap, like index scan if its cost is smaller than sequential columnar scan. Thanks, -- KaiGai Kohei kai...@kaigai.gr.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] logical changeset generation v6.6
On Mon, Nov 11, 2013 at 12:00 PM, Andres Freund and...@2ndquadrant.com wrote: [ updated patch-set ] I'm pretty happy with what's now patch #1, f/k/a known as patch #3, and probably somewhere else in the set before that. At any rate, I refer to 0001-wal_decoding-Add-wal_level-logical-and-log-data-requ.patch.gz. I think the documentation still needs a bit of work. It's not particularly clean to just change all the places that refer to the need to set wal_level to archive (or hot_standby) level to instead refer to archive (or hot_standby, logical). If we're going to do it that way, I think we definitely need a connecting word between hot_standby and logical, specifically or. But I'm wondering if would be better to instead change those places to say archive (or any higher setting). You've actually changed the meaning of this section (and not in a good way): be set at server start. varnamewal_level/ must be set -to literalarchive/ or literalhot_standby/ to allow -connections from standby servers. +to literalarchive/, literalhot_standby/ or literallogical/ +to allow connections from standby servers. I think that the previous text meant that you needed archive - or, if you want to allow connections, hot_standby. The new text loses that nuance. I'm tempted to think that we're better off calling this logical decoding rather than logical replication. At least, we should standardize on one or the other. If we go with decoding, then fix these: +* For logical replication, we need the tuple even if we're doing a +/* Do we need to WAL-log information required only for Hot Standby and logical replication? */ +/* Do we need to WAL-log information required only for logical replication? */ (and we should go back and correct the instance already added to the ALTER TABLE documentation) Is there any special reason why RelationIsLogicallyLogged(), which is basically a three-pronged test, does one of those tests in a macro and defers the other two to a function? Why not just put it all in the macro? I did some performance testing on the previous iteration of this patch, just my usual set of 30-minute pgbench runs. I tried it with wal_level=hot_standby and wal_level=logical. 32-clients, scale factor 300, shared_buffers = 8GB, maintenance_work_mem = 4GB, synchronous_commit = off, checkpoint_segments = 300, checkpoint_timeout = 15min, checkpoint_completion_target = 0.9. The results came out like this: hot_standby tps = 15070.229005 (including connections establishing) hot_standby tps = 14769.905054 (including connections establishing) hot_standby tps = 15119.350014 (including connections establishing) logical tps = 14713.523689 (including connections establishing) logical tps = 14799.242855 (including connections establishing) logical tps = 14557.538038 (including connections establishing) The runs were interleaved, but I've shown them here grouped by the wal_level in use. If you compare the median values, there's about a 1% regression there with wal_level=logical, but that might not even be significant - and if it is, well, that's why this feature has an off switch. -* than its parent. Musn't recurse here, or we might get a stack overflow +* than its parent. May not recurse here, or we might get a stack overflow You don't need this change; it doesn't change the meaning. +* with fewer than PGPROC_MAX_CACHED_SUBXIDS. Note that it is fine +* if didLogXid isn't set for a transaction even though it appears +* in a wal record, we'll just superfluously log something. It'd be good to rewrite this comment to explain under what circumstances that can happen, or why it can't happen but that it would be OK if it did. I think we'd better separate the changes to catalog.c from the rest of this. Those are changing semantics in a significant way that needs to be separately called out. In particular, a user-created table in pg_catalog will be able to have indexes defined on it, will be able to be truncated, will be allowed to have triggers, etc. I think that's OK, but it shouldn't be a by-blow of the rest of this patch. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
On Tue, Nov 12, 2013 at 11:54 AM, Stephen Frost sfr...@snowman.net wrote: Did you also upgrade to PostGIS 2.x as part of this..? Seems like it'd be unrelated, but one never knows. Any chance you could distill this down into a small test case which exhibits the problem? I'm guessing 'no', but figured I'd ask anyway.. Yeah, we upgraded to PostGIS 2.1 as part of the upgrade. In the case of the specific database that is exhibiting the problem, we only really use PostGIS for storage and don't do a lot of spatial querying, as this database is basically for an aggregation service we run. In the case of the transactions that appear to be causing the problems, we don't even use geometry fields at all. Cheers -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
2013/11/12 Tom Lane t...@sss.pgh.pa.us: Kohei KaiGai kai...@kaigai.gr.jp writes: 2013/11/12 Tom Lane t...@sss.pgh.pa.us: There's no possible way you'll finish this for 9.4. Yes, I understand it is not possible to submit whole of the patch until CF3 deadline. So, I'd like to find out a way to implement it as an extension using facilities being supported or to be enhanced on v9.4. Oh! Okay, I misunderstood the context --- you meant this as an example use-case for the custom plan feature, right? Makes more sense now. I'm still dubious that it'd actually be very useful in itself, but it seems reasonable as a test case to make sure that a set of hooks for custom plans are sufficient to do something useful with. Yes. I intend to put most of this table-caching feature on the custom-scan APIs set, even though it may take additional hooks due to its nature, independent from planner and executor structure. So, are you thinking it is a feasible approach to focus on custom-scan APIs during the upcoming CF3, then table-caching feature as use-case of this APIs on CF4? Thanks, -- KaiGai Kohei kai...@kaigai.gr.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Possible memory leak with SQL function?
Yeb Havinga yebhavi...@gmail.com writes: On 2013-09-13 18:32, Robert Haas wrote: On Thu, Sep 12, 2013 at 5:29 AM, Yeb Havinga yebhavi...@gmail.com wrote: We have a function that takes a value and returns a ROW type. With the function implemented in language SQL, when executing this function in a large transaction, memory usage of the backend process increases. When the function is implemented in PL/pgSQL, the memory usage was much less. I spent some time writing a test case, but failed to make a test case that showed the memory difference I described upthread, in contrast, in the test below, the SQL function actually shows a smaller memory footprint than the plpgsql counterpart. This test case only demonstrates that in a long running transaction, calling sql or plpgsql functions causes increasing memory usage that is not released until after commit. I looked into this, and found that what the test is showing is that use of a simple expression in a plpgsql DO block leaks some memory that's not reclaimed till end of transaction; see analysis at http://www.postgresql.org/message-id/7438.1384273...@sss.pgh.pa.us You had -- SELECT 'a' into b; -- memory constant i := fp('a'); -- memory increases -- i := fs('a'); -- memory increases but slow The SELECT doesn't leak because it's not a simple expression. The other two cases exhibit what's basically the same leak, though the SQL-function case leaks less memory per iteration and probably takes more cycles to do it, as a consequence of inlining the function's constant result into the calling expression. I'm not sure whether we're going to put much effort into fixing this leak; this usage pattern seems outside what DO blocks are intended for. (If you're going to execute the same code over and over again, it makes a whole lot more sense to define it as a real function, to avoid parsing overhead. Or just put it inside a loop in the DO text.) But anyway, the bottom line is that this test case doesn't seem to have much to do with your original problem with SQL functions. Can you have another go at recreating that leak? regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
Kohei KaiGai kai...@kaigai.gr.jp writes: So, are you thinking it is a feasible approach to focus on custom-scan APIs during the upcoming CF3, then table-caching feature as use-case of this APIs on CF4? Sure. If you work on this extension after CF3, and it reveals that the custom scan stuff needs some adjustments, there would be time to do that in CF4. The policy about what can be submitted in CF4 is that we don't want new major features that no one has seen before, not that you can't make fixes to previously submitted stuff. Something like a new hook in vacuum wouldn't be a major feature, anyway. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] logical changeset generation v6.6
Hi, On 2013-11-12 12:13:54 -0500, Robert Haas wrote: On Mon, Nov 11, 2013 at 12:00 PM, Andres Freund and...@2ndquadrant.com wrote: [ updated patch-set ] I'm pretty happy with what's now patch #1, f/k/a known as patch #3, and probably somewhere else in the set before that. At any rate, I refer to 0001-wal_decoding-Add-wal_level-logical-and-log-data-requ.patch.gz. Cool. I think the documentation still needs a bit of work. It's not particularly clean to just change all the places that refer to the need to set wal_level to archive (or hot_standby) level to instead refer to archive (or hot_standby, logical). If we're going to do it that way, I think we definitely need a connecting word between hot_standby and logical, specifically or. Hm. I tried to make it archive, hot_standby or logical, but I see I screwed up along the way. But I'm wondering if would be better to instead change those places to say archive (or any higher setting). Works for me. We'd need to make sure there's a clear ordering recognizable in at least one place, but that's a good idea anyway. You've actually changed the meaning of this section (and not in a good way): be set at server start. varnamewal_level/ must be set -to literalarchive/ or literalhot_standby/ to allow -connections from standby servers. +to literalarchive/, literalhot_standby/ or literallogical/ +to allow connections from standby servers. I think that the previous text meant that you needed archive - or, if you want to allow connections, hot_standby. The new text loses that nuance. Yea, that's because it was lost on me in the first place... I'm tempted to think that we're better off calling this logical decoding rather than logical replication. At least, we should standardize on one or the other. If we go with decoding, then fix these: I agree. It all used to be logical replication but this feature really isn't about the replication, but about the extraction part. +* For logical replication, we need the tuple even if we're doing a +/* Do we need to WAL-log information required only for Hot Standby and logical replication? */ +/* Do we need to WAL-log information required only for logical replication? */ (and we should go back and correct the instance already added to the ALTER TABLE documentation) Is there any special reason why RelationIsLogicallyLogged(), which is basically a three-pronged test, does one of those tests in a macro and defers the other two to a function? Why not just put it all in the macro? We could, I basically didn't want to add too much inlined code everywhere when wal_level != logical, but the functions reduced in size since. I did some performance testing on the previous iteration of this patch, just my usual set of 30-minute pgbench runs. I tried it with wal_level=hot_standby and wal_level=logical. 32-clients, scale factor 300, shared_buffers = 8GB, maintenance_work_mem = 4GB, synchronous_commit = off, checkpoint_segments = 300, checkpoint_timeout = 15min, checkpoint_completion_target = 0.9. The results came out like this: hot_standby tps = 15070.229005 (including connections establishing) hot_standby tps = 14769.905054 (including connections establishing) hot_standby tps = 15119.350014 (including connections establishing) logical tps = 14713.523689 (including connections establishing) logical tps = 14799.242855 (including connections establishing) logical tps = 14557.538038 (including connections establishing) The runs were interleaved, but I've shown them here grouped by the wal_level in use. If you compare the median values, there's about a 1% regression there with wal_level=logical, but that might not even be significant - and if it is, well, that's why this feature has an off switch. That matches my test and is imo pretty ok. The overhead is from a slight increase in wal volume because during FPWs we do not just log the FPW but also the tuples. It will be worse if primary keys were changed regularly though. -* than its parent. Musn't recurse here, or we might get a stack overflow +* than its parent. May not recurse here, or we might get a stack overflow You don't need this change; it doesn't change the meaning. I thought that Musn't was a typo, because of the missing t before the n. But it obviously doesn't have to be part of this patch. +* with fewer than PGPROC_MAX_CACHED_SUBXIDS. Note that it is fine +* if didLogXid isn't set for a transaction even though it appears +* in a wal record, we'll just superfluously log something. It'd be good to rewrite this comment to explain under what circumstances that can happen, or why it can't happen but that it would be OK if it did. Ok. I think we'd better separate the changes to catalog.c from the rest of this. Those are changing semantics in a significant way that needs to be separately
Re: [HACKERS] logical changeset generation v6.6
On Tue, Nov 12, 2013 at 12:50 PM, Andres Freund and...@2ndquadrant.com wrote: Completely agreed. As evidenced by the fact that the current change doesn't update all relevant comments code. I wonder if we shouldn't leave the function the current way and just add a new function for the new behaviour. The hard thing with that would be coming up with a new name. IsSystemRelationId() having a different behaviour than IsSystemRelation() seems strange to me, so just keeping that and adapting the callers seems wrong to me. IsInternalRelation()? IsCatalogRelation()? Well, I went through and looked at the places that were affected by this and I tend to think that most places will be happier with the new definition. Picking one at random, consider the calls in cluster.c. The first is used to set the is_system_catalog flag that is passed to finish_heap_swap(), which controls whether we queue invalidation messages after doing the CLUSTER. Well, unless I'm quite mistaken, user-defined relations in pg_catalog will not have catalog caches and thus don't need invalidations. The second call in that file is used to decide whether to warn about inserts or deletes that appear to be in progress on a table that we have x-locked; that should only apply to real system catalogs, because other things we create in pg_catalog won't have short-duration locks. (Maybe the user-catalog-tables patch will modify this test; I'm not sure, but if this needs to work differently it seems that it should be conditional on that, not what schema the table lives in.) If there are call sites that want the existing test, maybe we should have IsRelationInSystemNamespace() for that, and reserve IsSystemRelation() for the test as to whether it's a bona fide system catalog. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] better atomics - spinlock fallback?
On Mon, Nov 11, 2013 at 1:57 PM, Andres Freund and...@2ndquadrant.com wrote: Instead of de-supporting platforms that don't have CAS support or providing parallel implementations we could relatively easily build a spinlock based fallback using the already existing requirement for tas(). Something like an array of 16 spinlocks, indexed by a more advanced version of ((char *)(atomics) sizeof(char *)) % 16. The platforms that would fallback aren't that likely to be used under heavy concurrency, so the price for that shouldn't be too high. The only real problem with that would be that we'd need to remove the spinnlock fallback for barriers, but that seems to be pretty much disliked. I think this is worth considering. I'm not too clear what to do about the barriers problem, though. I feel like we've dug ourselves into a bit of a hole, there, and I'm not sure I understand the issues well enough to dig us back out of it. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] better atomics - spinlock fallback?
On 2013-11-12 13:21:30 -0500, Robert Haas wrote: The only real problem with that would be that we'd need to remove the spinnlock fallback for barriers, but that seems to be pretty much disliked. I think this is worth considering. Ok, cool. The prototype patch I have for that is pretty small, so it doesn't look too bad. What currently scares me is the amount of code I have to write that I can't test... I really can't see me being able to provide a patch that doesn't require some buildfarm cycles to really work on all platforms. I'm not too clear what to do about the barriers problem, though. I feel like we've dug ourselves into a bit of a hole, there, and I'm not sure I understand the issues well enough to dig us back out of it. I think any platform where we aren't able to provide a proper compiler/memory barrier will also have broken spinlock relase semantics (as in missing release memory barrier). So arguably removing the fallback is a good idea anyway. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] logical changeset generation v6.6
On 2013-11-12 13:18:19 -0500, Robert Haas wrote: On Tue, Nov 12, 2013 at 12:50 PM, Andres Freund and...@2ndquadrant.com wrote: Completely agreed. As evidenced by the fact that the current change doesn't update all relevant comments code. I wonder if we shouldn't leave the function the current way and just add a new function for the new behaviour. The hard thing with that would be coming up with a new name. IsSystemRelationId() having a different behaviour than IsSystemRelation() seems strange to me, so just keeping that and adapting the callers seems wrong to me. IsInternalRelation()? IsCatalogRelation()? Well, I went through and looked at the places that were affected by this and I tend to think that most places will be happier with the new definition. I agree that many if not most want the new definition. If there are call sites that want the existing test, maybe we should have IsRelationInSystemNamespace() for that, and reserve IsSystemRelation() for the test as to whether it's a bona fide system catalog. The big reason that I think we do not want the new behaviour for all is: * NB: TOAST relations are considered system relations by this test * for compatibility with the old IsSystemRelationName function. * This is appropriate in many places but not all. Where it's not, * also check IsToastRelation. the current state of things would allow to modify toast relations in some places :/ I'd suggest renaming the current IsSystemRelation() to your IsRelationInSystemNamespace() and add IsCatalogRelation() for the new meaning, so we are sure to break old users. Let me come up with something like that. (Maybe the user-catalog-tables patch will modify this test; I'm not sure, but if this needs to work differently it seems that it should be conditional on that, not what schema the table lives in.) No, they shouldn't change that. We might want to allow such locking semantics at some points, but that'd be a separate patch. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] additional json functionality
In the last year I have created a number of modules providing extra JSON functionality. I'd like to include some of these in release 9.4. Candidates for inclusion are: json_build: which provides extra functionality for building up non-regualr and arbitrarily complex json, as well as a way of aggregating key value pairs as a json record. See https://github.com/pgexperts/json_build json_object: take a one or two dimensional array of text and turn it into a json object, similar to the way hstore currently does. See https://bitbucket.org/qooleot/json_object json_to_record: turn json into a record or recordset, using the same logic as json_pupolate_record and json_populate_recordset, but inferring the return type from the call instead of from the first argument. See https://bitbucket.org/qooleot/json_to_record json_build is probably the most important in terms of additional functionality. Both json_build and json_to_record have had to copy non-exposed parts of the internal Json code, which is one of the many reasons I would like to include them, particularly. These extensions have all been created in response to client requirements, so there is a definiite use case for all of them. If there's agreement on taking these, I will prepare patches and submit them by the 15th. cheers andrew -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Errors on missing pg_subtrans/ files with 9.3
On Tue, Nov 12, 2013 at 11:55 AM, Andres Freund and...@2ndquadrant.com wrote: Hi, On 2013-11-12 11:46:19 -0500, J Smith wrote: * Does SELECT count(*) FROM pg_prepared_xacts; return 0? Yes it does. Could you show the output? Do you actually use prepared xacts actively? jay:dev@jagger=# select * from pg_prepared_xacts; transaction | gid | prepared | owner | database -+-+--+---+-- (0 rows) We don't actively use prepared transactions, no. We use a lot of prepared statements, but no two-phase committing. Do you actively use row level locking? Not to my knowledge, no. I've enabled statement logging for a little while so we can get a snapshot of what our average queries look like, but I'm not seeing any PREPARE TRANSACTION or explicit LOCK statements. Is there high concurrency in that environment? In short, could you describe your database usage a bit? This particular database is used for an aggregation service. There are a handful of Hibernate-based Java processes that manipulate and massage the data from a number of data feeds we receive 24-7 From a quick grepping of the statement log file we captured the following over the course of approximately 13 minutes: 167452 SELECTs 72388 UPDATEs 6782 INSERTs 2 DELETEs Did you restart the database since getting the errors? Yes, we've restarted several times. Latest checkpoint's NextXID: 0/5138174 Latest checkpoint's NextOID: 3574036 Latest checkpoint's NextMultiXactId: 5762623 Latest checkpoint's NextMultiOffset: 11956824 Latest checkpoint's oldestXID:1673 Latest checkpoint's oldestXID's DB: 1 Latest checkpoint's oldestActiveXID: 5138174 Latest checkpoint's oldestMultiXid: 1 Interesting. Pretty high multixact usage. Part of our process involves a collecting several records and aggregating them within a single transaction. There will usually be a handful of SELECTs to fetch the data, a bit of processing in the Java backend, then a set of either UPDATEs or INSERTs into two tables to store the results of the processing or in rarer cases a couple of DELETEs. Our Java guy would have more details, as I just have the output of the log files and a high-level view of how our backend processes work, but this is the general gist of things as I understand it. Cheers -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] What's needed for cache-only table scan?
2013/11/12 Tom Lane t...@sss.pgh.pa.us: Kohei KaiGai kai...@kaigai.gr.jp writes: So, are you thinking it is a feasible approach to focus on custom-scan APIs during the upcoming CF3, then table-caching feature as use-case of this APIs on CF4? Sure. If you work on this extension after CF3, and it reveals that the custom scan stuff needs some adjustments, there would be time to do that in CF4. The policy about what can be submitted in CF4 is that we don't want new major features that no one has seen before, not that you can't make fixes to previously submitted stuff. Something like a new hook in vacuum wouldn't be a major feature, anyway. Thanks for this clarification. 3 days are too short to write a patch, however, 2 month may be sufficient to develop a feature on top of the scheme being discussed in the previous comitfest. Best regards, -- KaiGai Kohei kai...@kaigai.gr.jp -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] writable FDWs / update targets confusion
Hi, I'm working on adding write support to one of my FDWs. Adding INSERT went pretty fine, but when adding DELETE/UPDATE I got really confused about how the update targets are supposed to work. My understanding of how it's supposed to work is this: (1) AddForeignUpdateTargets adds columns that serve as ID of the record (e.g. postgres_fdw adds 'ctid') (2) planning the inner foreign scan handles the new column appropriately (e.g. scans system columns, as in case of 'ctid' etc.) (3) IterateForeignScan will see the column in the tuple descriptor, will set it just like any other column, etc. (4) ExecForeignDelete will fetch the new column and do something with it However no matter what I do, I can't get the steps (3) and (4) working this way. This is what I do in AddForeignUpdateTargets (pretty much a 1:1 copy from postgres_fdw, except that I'm using INT8OID instead of TIDOID): static void myAddForeignUpdateTargets(Query *parsetree, RangeTblEntry *target_rte, Relation target_relation) { Var *var; const char *attrname; TargetEntry *tle; /* Make a Var representing the desired value */ var = makeVar(parsetree-resultRelation, SelfItemPointerAttributeNumber, INT8OID, -1, InvalidOid, 0); /* Wrap it in a resjunk TLE with the right name ... */ attrname = ctid; tle = makeTargetEntry((Expr *) var, list_length(parsetree-targetList) + 1, pstrdup(attrname), true); /* ... and add it to the query's targetlist */ parsetree-targetList = lappend(parsetree-targetList, tle); } Then in GetForeignPlan I collect all the attnums (including the new one), and in BeginForeignScan decide which columns to actually fetch. So if any attnum is (attnum 0) I know I need to fetch the new 'ctid' column. However in IterateForeignScan, the tuple descriptor does not change. It still has only the columns of the foreign table, so I have no place to set the ctid to. So even though ExecFindJunkAttributeInTlist(subplan-targetlist, ctid); return 1 I can't really set any of the column to the CTID, because the column might be used in a WHERE condition. And looking at postgres_fdw it seems to me it does not really set the ctid into the tuple as a column, but just does this: if (ctid) tuple-t_self = *ctid; which I can't really do because I need to use INT8 and not TID. But even if I do this, Interestingly, if I do this in ExecForeignDelete static TupleTableSlot * myExecForeignDelete(EState *estate, ResultRelInfo *resultRelInfo, TupleTableSlot *slot, TupleTableSlot *planSlot) { bool isNull; MyModifyState state = (MyModifyState)resultRelInfo-ri_FdwState; int64 ctid; Datum datum = ExecGetJunkAttribute(planSlot, state-ctidAttno, isNull); ctid = DatumGetInt64(datum); elog(WARNING, ID = %ld, ctid); if (isNull) elog(ERROR, ctid is NULL); /* FIXME not yet implemented */ return NULL; } I do get (isNull=FALSE) but the ctid evaluates to some random number, e.g. WARNING: ID = 44384788 (44384788) WARNING: ID = 44392980 (44392980) and so on. So what did I get wrong? Is it possible to use arbitrary hidden column as junk columns (documentation seems to suggest that)? What is the right way to do that / whad did I get wrong? regards Tomas -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] GIN bugs in master branch
Suppose, some last changes in GIN are broken, 9.3 works fine. CREATE OR REPLACE FUNCTION generate_tags() RETURNS int[] LANGUAGE SQL VOLATILE AS $$ SELECT ARRAY( SELECT (random()*random()*1000)::int4 FROM generate_series(1, 1 + (random()*random()*100)::int4) ); $$; DROP TABLE IF EXISTS t; SELECT i AS id, generate_tags() AS tags INTOt FROM generate_series(1, 100) AS i; CREATE INDEX tagidx ON t USING gin (tags); TRAP: FailedAssertion(!(( ( (GinPageOpaque) ( ((void) ((bool) ((! assert_enabled) || ! (!(((const void*)(page) != ((void *)0 || (ExceptionalCondition(!(((const void*)(page) != ((void *)0))), (FailedAssertion), gindatapage.c, 51), 0, (char *) ((char *) (page) + ((PageHeader) (page))-pd_special) ) )-flags (1 0) )), File: gindatapage.c, Line: 51) -- Teodor Sigaev E-mail: teo...@sigaev.ru WWW: http://www.sigaev.ru/ -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On Tue, Nov 12, 2013 at 9:38 AM, Tom Lane t...@sss.pgh.pa.us wrote: ... One thought for the Clang people is that most of the reports such as null pointer dereference presumably mean I think I see an execution path whereby we could get here with a null pointer. If so, it'd be awfully helpful if the complaint included some description of what that path is. I think Coverity does that, or at least I've seen output from some tool that does it. Clang can be trained with asserts. If you are certain that a parameter cannot be NULL, then pass the knowledge onto Clang: assert(param != NULL). Clang will stop analyzing that path for NULL-ness. Or, you could check it for NULL and fail the function if the param is NULL. If its a spurious test, then the optimizer will remove it. Jeff -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On 2013-11-12 15:17:18 -0500, Jeffrey Walton wrote: On Tue, Nov 12, 2013 at 9:38 AM, Tom Lane t...@sss.pgh.pa.us wrote: ... One thought for the Clang people is that most of the reports such as null pointer dereference presumably mean I think I see an execution path whereby we could get here with a null pointer. If so, it'd be awfully helpful if the complaint included some description of what that path is. I think Coverity does that, or at least I've seen output from some tool that does it. Clang can be trained with asserts. It might not recognize our Assert() because it expands as: #define TrapMacro(condition, errorType) \ ((bool) ((! assert_enabled) || ! (condition) || \ (ExceptionalCondition(CppAsString(condition), (errorType), \ __FILE__, __LINE__), 0))) #define Assert(condition) \ Trap(!(condition), FailedAssertion) Kevin, perhaps it reports less errors if you remove the assert_enabled check from TrapMacro? I guess you already compiled with --enable-cassert? Or, you could check it for NULL and fail the function if the param is NULL. If its a spurious test, then the optimizer will remove it. Only in the case it can prove that it's redundant - and in that case the analyzer presumably wouldn't have reported the error in the first place. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On Tue, Nov 12, 2013 at 3:25 PM, Andres Freund and...@2ndquadrant.com wrote: On 2013-11-12 15:17:18 -0500, Jeffrey Walton wrote: On Tue, Nov 12, 2013 at 9:38 AM, Tom Lane t...@sss.pgh.pa.us wrote: ... One thought for the Clang people is that most of the reports such as null pointer dereference presumably mean I think I see an execution path whereby we could get here with a null pointer. If so, it'd be awfully helpful if the complaint included some description of what that path is. I think Coverity does that, or at least I've seen output from some tool that does it. Clang can be trained with asserts. It might not recognize our Assert() because it expands as: #define TrapMacro(condition, errorType) \ ((bool) ((! assert_enabled) || ! (condition) || \ (ExceptionalCondition(CppAsString(condition), (errorType), \ __FILE__, __LINE__), 0))) #define Assert(condition) \ Trap(!(condition), FailedAssertion) Kevin, perhaps it reports less errors if you remove the assert_enabled check from TrapMacro? I guess you already compiled with --enable-cassert? Also see http://clang-analyzer.llvm.org/annotations.html (ignore the OS X specific stuff). There's a couple of ways to annotate source code and custom asserts. In this case, a `noreturn` annotation will probably do the trick. You can even guard it under the Clang analyzer (notwithstanding the opinions of polluting source code with #define): #ifdef __clang_analyzer__ // Code to be analyzed or modified #endif Or, you could check it for NULL and fail the function if the param is NULL. If its a spurious test, then the optimizer will remove it. Only in the case it can prove that it's redundant - and in that case the analyzer presumably wouldn't have reported the error in the first place. :) Jeff -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On 2013-11-12 15:33:13 -0500, Jeffrey Walton wrote: On Tue, Nov 12, 2013 at 3:25 PM, Andres Freund and...@2ndquadrant.com wrote: On 2013-11-12 15:17:18 -0500, Jeffrey Walton wrote: On Tue, Nov 12, 2013 at 9:38 AM, Tom Lane t...@sss.pgh.pa.us wrote: ... One thought for the Clang people is that most of the reports such as null pointer dereference presumably mean I think I see an execution path whereby we could get here with a null pointer. If so, it'd be awfully helpful if the complaint included some description of what that path is. I think Coverity does that, or at least I've seen output from some tool that does it. Clang can be trained with asserts. It might not recognize our Assert() because it expands as: #define TrapMacro(condition, errorType) \ ((bool) ((! assert_enabled) || ! (condition) || \ (ExceptionalCondition(CppAsString(condition), (errorType), \ __FILE__, __LINE__), 0))) #define Assert(condition) \ Trap(!(condition), FailedAssertion) Kevin, perhaps it reports less errors if you remove the assert_enabled check from TrapMacro? I guess you already compiled with --enable-cassert? Also see http://clang-analyzer.llvm.org/annotations.html (ignore the OS X specific stuff). There's a couple of ways to annotate source code and custom asserts. In this case, a `noreturn` annotation will probably do the trick. ExceptionalCondition is annotated with noreturn, but that doesn't necessarily help because the compiler cannot know that assert_enabled is true. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On Tue, Nov 12, 2013 at 9:38 AM, Tom Lane t...@sss.pgh.pa.us wrote: Kevin Grittner kgri...@ymail.com writes: Does anything stand out as something that is particularly worth looking into? Does anything here seem worth assuming is completely bogus because of the Coverity and Valgrind passes? I thought most of it was obvious junk: if there were actually uninitialized-variable bugs in the bison grammar, for instance, not only we but half the programs on the planet would be coredumping all the time. Not to mention that valgrind testing would certainly have caught it. I'd suggest looking only at the reports that pertain to seldom-exercised code paths, as those would be the places where actual bugs might possibly have escaped notice. Clang also has a page FAQ and How to Deal with Common False Positives, http://clang-analyzer.llvm.org/faq.html. It demonstrates how to force analysis on a path. Jeff -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On Tue, Nov 12, 2013 at 3:35 PM, Andres Freund and...@2ndquadrant.com wrote: On 2013-11-12 15:33:13 -0500, Jeffrey Walton wrote: On Tue, Nov 12, 2013 at 3:25 PM, Andres Freund and...@2ndquadrant.com wrote: On 2013-11-12 15:17:18 -0500, Jeffrey Walton wrote: ... It might not recognize our Assert() because it expands as: #define TrapMacro(condition, errorType) \ ((bool) ((! assert_enabled) || ! (condition) || \ (ExceptionalCondition(CppAsString(condition), (errorType), \ __FILE__, __LINE__), 0))) #define Assert(condition) \ Trap(!(condition), FailedAssertion) Kevin, perhaps it reports less errors if you remove the assert_enabled check from TrapMacro? I guess you already compiled with --enable-cassert? Also see http://clang-analyzer.llvm.org/annotations.html (ignore the OS X specific stuff). There's a couple of ways to annotate source code and custom asserts. In this case, a `noreturn` annotation will probably do the trick. ExceptionalCondition is annotated with noreturn, but that doesn't necessarily help because the compiler cannot know that assert_enabled is true. Oh, my bad. I overlooked that. Jeff -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Can we add sample systemd service file to git repo?
On Tue, Nov 12, 2013 at 11:47 AM, Devrim GÜNDÜZ dev...@gunduz.org wrote: Hi, Many distros are switching/switched to systemd. In the git repo, we only have init script for SysV. Would it make sense to add systemd service script to our tarball as well? This could also standardize the systemd scripts used in other distros. http://svn.pgrpms.org/browser/rpm/redhat/9.3/postgresql/F-19/postgresql-9.3.service is an example of what we use in the RPMs. (if website fails, please just reload) Attached is a modified version that will work with the compile defaults. Hi, should we put PGPORT in the systemd file without an easy way to override it? As an example, when yum updating minor versions on fedora 18 (using the yum.postgresql.org rpms), any changes to the systemd service file are overwritten by the new rpm and the port is forced back to 5432. This makes having pg9.2 and pg9.3 on the same box conflict after each minor version update. On RHEL6 with SysV we have /etc/sysconfig/[..] files to override the init.d script defaults. The same can be done with systemd using the EnvironmentFile directive, like this: [Service] EnvironmentFile=-/etc/sysconfig/postgresql which would let us keep our overrides of PGPORT and PGDATA across updates (the =- makes the file optional). Given a choice, I'd rather rely on postgresql.conf to set the port number though... -nigel. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Can we add sample systemd service file to git repo?
On 11/12/2013 03:54 PM, Nigel Heron wrote: On Tue, Nov 12, 2013 at 11:47 AM, Devrim GÜNDÜZ dev...@gunduz.org wrote: Hi, Many distros are switching/switched to systemd. In the git repo, we only have init script for SysV. Would it make sense to add systemd service script to our tarball as well? This could also standardize the systemd scripts used in other distros. http://svn.pgrpms.org/browser/rpm/redhat/9.3/postgresql/F-19/postgresql-9.3.service is an example of what we use in the RPMs. (if website fails, please just reload) Attached is a modified version that will work with the compile defaults. Hi, should we put PGPORT in the systemd file without an easy way to override it? As an example, when yum updating minor versions on fedora 18 (using the yum.postgresql.org rpms), any changes to the systemd service file are overwritten by the new rpm and the port is forced back to 5432. This makes having pg9.2 and pg9.3 on the same box conflict after each minor version update. On RHEL6 with SysV we have /etc/sysconfig/[..] files to override the init.d script defaults. The same can be done with systemd using the EnvironmentFile directive, like this: [Service] EnvironmentFile=-/etc/sysconfig/postgresql which would let us keep our overrides of PGPORT and PGDATA across updates (the =- makes the file optional). Given a choice, I'd rather rely on postgresql.conf to set the port number though... You might also find this useful, in particular Tom Lane's comment: http://people.planetpostgresql.org/andrew/index.php?/archives/233-Multi-tenancy-under-systemd.html cheers andrew -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Transaction-lifespan memory leak with plpgsql DO blocks
On Tue, Nov 12, 2013 at 11:18 AM, Tom Lane t...@sss.pgh.pa.us wrote: Or we could say what the heck are you doing executing tens of thousands of DO blocks? Make it into a real live function; you'll save a lot of cycles on parsing costs. I'm not sure that this is a usage pattern we ought to be optimizing for. I'm not volunteering to spend time fixing this, but I disagree with the premise that it isn't worth fixing, because I think it's a POLA violation. From the user's point of view, there are plausible reasons for this to be slow, but there's really no reason to expect it to leak memory. That's a sufficiently astonishing result that it wouldn't be surprising for this to get reported as a bug where a simple performance gap wouldn't be, and I think if we don't fix it the perception will be that we've left that bug unfixed. Now, there are lots of things we don't fix just because there is not an infinitely large army of trained PostgreSQL hackers who love to fix other people's bugs for free, so I'm not going to say we HAVE to fix this or whatever - but neither do I think fixing it is useless and worthless. -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Fast insertion indexes: why no developments
2013/11/2 Simon Riggs si...@2ndquadrant.com: On 29 October 2013 16:10, Peter Geoghegan p...@heroku.com wrote: Presumably someone will get around to implementing a btree index insertion buffer one day. I think that would be a particularly compelling optimization for us, because we could avoid ever inserting index tuples that are already dead when the deferred insertion actually occurs. That's pretty much what the LSM-tree is. [ Disclaimer: I have only skimmed the LSM-trees paper rather than read it thoroughly. ] LSM-trees seem to hit a wall when the total amount of data gets big enough, unless one uses “multi-component LSM-trees” (as described in the paper). Having a B-tree with deferred insertion would be similar to having an LSM-tree without the multi-component property. The underlying problem with fast random insertions into a B-tree is that each write of a whole block writes only a small amount of “useful changes” (once the tree gets large enough relative to memory). The “multi-component” concept fixes that. I think that simply combining that concept with normal B-trees is a rather elegant way of (at least theoretically) solving the problem: Definitions: * Define a rather small integer K (with K ≥ 2), that will influence maximum insertion speed (higher K = higher insertion speed), but also look-up time (higher K = slower look-up). * Define some size S “that easily fits in memory.” * F is the fan-out of the B-tree. (If F K, the algorithm results in slower amortized insertion speed than simply using one single big B-tree if only the amount of blocks read/written are taken into account; it may still be better because of seek times.) * n is the total number of entries in the index. Algorithm: * Start inserting into a small B-tree until it gets size S, then start inserting into a new B-tree until that fills up to size S, etc. * When K such B-trees (of size S) exist, merge them together into one (S × K)-sized B-tree (delete the old ones). * Do this recursively: Once K B-trees of size (K × S) exist, merge them together into one (S × K^2)-sized B-tree, etc. * Let each look-up walk all trees, and return the union of all results. (Note that K B-trees can be merged by simply scanning all of them concurrently, and merging them just like a merge sort merges runs. Also, all B-trees except for the first level (of size S) can be compacted 100% as there is no need to reserve space for further insertions in them.) Insertion speed can be calculated as follows (I disregard seek times to make this easy; it also happens to be the right assumption for SSDs): Assume that a small B-tree (size S) can simply be written out without having to write each block multiple times. Each entry has to be written log_K(n × log_F(n)) times. All blocks written are 100% “useful changes.” Therefore, insertion speed is log_K(n × log_F(n)) times less than raw disk speed. (Note that I also disregard the time needed for reading; This may make everything about twice as slow.) Example: For K = 10, F = 100 (i.e., 80B per entry), blocksize = 8kB, and n = 10^9 (i.e., 80GB of entries), the insertion speed is log_10(10^9 × log_100(10^9)) = log_10(10^9 × 4.5) = ~9.5 times slower than writing the 80GB of index entries sequentially. For the “one single big B-tree” scenario, the insertion speed would be ~F = ~100 times slower than raw disk speed (assuming that all writes but the ones to the leaf-level can be combined). Look-up speed is as follows: Each look-up must look through all B-trees. Assume for simplicity that all trees have the same height as the single B-tree in the “one single big B-tree” case (i.e., which is rather wrong (most are less tall), but seems good enough as an estimate), there are K trees of each size (idem), and there are log_K(n) different sizes. Then, the number of trees to walk is K × log_K(n), and therefore each look-up is K × log_K(n) slower than in the “one single big B-tree” case. Example: (using the same parameters as above) Look-up speed is 10 × log_10(10^9) = 90 times slower (i.e., because there are ~90 trees). Index size: I think (didn’t calculate) that the combined size of the B-trees will be about the same as (a little bit more than) the size of a single big B-tree containing the same entries. I have no idea whether someone already gave this kind of “forest of B-trees” structure a name. Otherwise, I suggest “B-forest” :-). In conclusion, use a “B-forest” when: * The index entries are small (large fan-out). * The insertion throughput is high. * It’s OK for look-ups to be slow. * Extra points when the storage medium has high seek times. Major missing piece in PostgreSQL (I think): * Functionality to merge K indexes into one (bigger) combined index. Nicolas -- A. Because it breaks the logical sequence of discussion. Q. Why is top posting bad? -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription:
Re: [HACKERS] Fast insertion indexes: why no developments
2013/11/12 Nicolas Barbier nicolas.barb...@gmail.com: In conclusion, use a “B-forest” when: * The index entries are small (large fan-out). * The insertion throughput is high. * It’s OK for look-ups to be slow. * Extra points when the storage medium has high seek times. Oops, forgot the most important ones: * Insertions are random. * The total amount of data is very high. Nicolas -- A. Because it breaks the logical sequence of discussion. Q. Why is top posting bad? -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Transaction-lifespan memory leak with plpgsql DO blocks
Robert Haas wrote That's a sufficiently astonishing result that it wouldn't be surprising for this to get reported as a bug where a simple performance gap wouldn't be, and I think if we don't fix it the perception will be that we've left that bug unfixed. Now, there are lots of things we don't fix just because there is not an infinitely large army of trained PostgreSQL hackers who love to fix other people's bugs for free, so I'm not going to say we HAVE to fix this or whatever - but neither do I think fixing it is useless and worthless. Having had this same thought WRT the FOR UPDATE in LOOP bug posting the lack of a listing of outstanding bugs does leave some gaps. I would imagine people would appreciate something like: Frequency: Rare Severity: Low Fix Complexity: Moderate Work Around: Easy - create an actual function; create some form of loop Status: Confirmed - Awaiting Volunteers to Fix Even without a formal system it may not hurt for bug threads to have a posting with this kind of information summarizing the thread. As Tom is apt to do - for the sake of the archives - though mostly I see those once something has been fixed and not for items that are being left open. Ideally these could also be migrated to the wiki, with links back to the main thread, to provide a basic known open items interface - something that I imagine would make corporate acceptance of PostgreSQL more likely. I don't see where there are a considerably large number of these unresolved items - most things do indeed get fixed or explained away as normal user learning. Sorry for the digression but it seems relevant. David J. -- View this message in context: http://postgresql.1045698.n5.nabble.com/Transaction-lifespan-memory-leak-with-plpgsql-DO-blocks-tp5777942p5778001.html Sent from the PostgreSQL - hackers mailing list archive at Nabble.com. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Fast insertion indexes: why no developments
On 12 November 2013 21:41, Nicolas Barbier nicolas.barb...@gmail.com wrote: Look-up speed is as follows: Each look-up must look through all B-trees. That can be optimised by using a min max approach, so we need only look at sub-trees that may contain data. Index size: I think (didn’t calculate) that the combined size of the B-trees will be about the same as (a little bit more than) the size of a single big B-tree containing the same entries. Agreed Major missing piece in PostgreSQL (I think): * Functionality to merge K indexes into one (bigger) combined index. Good analysis. I would add that it is possible to optimise large DELETEs from a table if complete sub-trees of the btree can be easily removed. This for me would be the compelling benefit of this approach. I still think we need to look at this from a query perspective though. We need to check whether there is a class of real world queries that are not well optimised by minmax indexes, or cannot be made to be in future releases. For example, large DELETEs from a table are almost trivially optimised for min max. -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On 11/12/13, 8:18 AM, Kevin Grittner wrote: Here is the summary of what was reported: All Bugs: 313 Does anything stand out as something that is particularly worth looking into? Does anything here seem worth assuming is completely bogus because of the Coverity and Valgrind passes? I have tracked scan-build for some time, and I'm sure that almost all of these bugs are false positives at this point. I have a private branch somewhere that I have badly hacked up (e.g., hardcoding enable_assert = 1), which gets that number down to 251 according to my latest notes. That's about the best you can hope for. Btw., you can also keep score here: http://pgci.eisentraut.org/jenkins/view/PostgreSQL/job/postgresql_master_scan-build/. This uses an older version of clang, so the number of bugs is lower, but the nature of the bugs is also more stupid. I plan to upgrade that at some point. It's worth keeping an eye on this, but it's not worth losing sleep over. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Can we add sample systemd service file to git repo?
On 11/12/13, 11:47 AM, Devrim GÜNDÜZ wrote: Attached is a modified version that will work with the compile defaults. I think you don't need both a PGDATA environment variable and the -D option to pg_ctl. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Information about Access methods
Hi, Thanks for document. It was really helpful. Now, as index folder contains basically interface for all index type, do I need to change it, if I want to modify only b tree index algorithm? Do I need to change only btree index files or Do i have to change index folder files like indexam.c Please confirm. :) Regards, Rohit On Tue, Nov 12, 2013 at 1:22 PM, Craig Ringer cr...@2ndquadrant.com wrote: On 11/12/2013 07:00 PM, Rohit Goyal wrote: Inside access methods in Backend, I found Index folder and Btree folder but i don't know which code to look for. i want to change something in B tree implementation. Start here: http://www.postgresql.org/docs/current/static/indexam.html -- Craig Ringer http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Regards, Rohit Goyal
Re: [HACKERS] Fast insertion indexes: why no developments
On Tue, Nov 12, 2013 at 6:41 PM, Nicolas Barbier nicolas.barb...@gmail.com wrote: (Note that K B-trees can be merged by simply scanning all of them concurrently, and merging them just like a merge sort merges runs. Also, all B-trees except for the first level (of size S) can be compacted 100% as there is no need to reserve space for further insertions in them.) Unless you can guarantee strong correlation of index-order vs physical-order, scanning multiple indexes in index-order will be quite slow (random I/O). On Tue, Nov 12, 2013 at 7:14 PM, Simon Riggs si...@2ndquadrant.com wrote: I still think we need to look at this from a query perspective though. We need to check whether there is a class of real world queries that are not well optimised by minmax indexes, or cannot be made to be in future releases. For example, large DELETEs from a table are almost trivially optimised for min max. Only if you don't have a PK (or other index). -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Peter Eisentraut pete...@gmx.net wrote: I have tracked scan-build for some time, and I'm sure that almost all of these bugs are false positives at this point. From poking around, I agree. One particular error I noticed that it makes a lot is that in a loop it says that an assigned value is not referenced if the reference will not be hit until the next iteration of the loop. Btw., you can also keep score here: http://pgci.eisentraut.org/jenkins/view/PostgreSQL/job/postgresql_master_scan-build/ Cool. I wasn't aware that anyone was already looking at this. It's worth keeping an eye on this, but it's not worth losing sleep over. Agreed in general; however, with this 3.4 development build the Memory Error section only showed two problems, and those were the only two problems I found that were real. It might be worth monitoring that one section. If nobody objects, I'll fix that small memory leak in the regression test driver. Hopefully someone more familiar with pg_basebackup will fix the double-free (and related problems mentioned by Tom) in streamutil.c. -- Kevin Grittner EDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On Tue, Nov 12, 2013 at 6:04 PM, Kevin Grittner kgri...@ymail.com wrote: Peter Eisentraut pete...@gmx.net wrote: I have tracked scan-build for some time, and I'm sure that almost all of these bugs are false positives at this point. From poking around, I agree. One particular error I noticed that it makes a lot is that in a loop it says that an assigned value is not referenced if the reference will not be hit until the next iteration of the loop. Btw., you can also keep score here: http://pgci.eisentraut.org/jenkins/view/PostgreSQL/job/postgresql_master_scan-build/ Cool. I wasn't aware that anyone was already looking at this. It's worth keeping an eye on this, but it's not worth losing sleep over. Agreed in general; however, with this 3.4 development build the Memory Error section only showed two problems, and those were the only two problems I found that were real. It might be worth monitoring that one section. If nobody objects, I'll fix that small memory leak in the regression test driver. Hopefully someone more familiar with pg_basebackup will fix the double-free (and related problems mentioned by Tom) in streamutil.c. Well, how about the use of the unintialized values? I did not check any with the long path lengths, but the `pqsecure_write` in fe-secure.c looks valid to me. `spinfo` is declared, Clang builds/finds the path, then the unitializaed `spinfo` is used in `RESTORE_SIGPIPE(conn, spinfo);`. Anyway, I don't mean to sound argumentative. Jeff -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] FDW: possible resjunk columns in AddForeignUpdateTargets
On 8.11.2013 16:13, Albe Laurenz wrote: Tom Lane wrote: Albe Laurenz laurenz.a...@wien.gv.at writes: What I would like to do is add a custom resjunk column (e.g. a bytea) in AddForeignUpdateTargets that carries a row identifier from the scan state to the modify state. Would that be possible? Can I have anything else than a Var in a resjunk column? [ thinks for awhile... ] Hm. In principle you can put any expression you want into the tlist during AddForeignUpdateTargets. However, if it's not a Var then the planner won't understand that it's something that needs to be supplied by the table scan, so things won't work right in any but the most trivial cases (maybe not even then :-(). What I'd try is creating a Var that has the attno of ctid (ie, SelfItemPointerAttributeNumber) but the datatype you want, ie bytea. This won't match what the catalogs say your table's ctid is, but I think that nothing will care much about that. It's definitely an area that could use more work. IIRC we'd discussed providing some way for an FDW to specify the type of the ctid column for its tables, but we didn't do anything about it in 9.3. Thanks a lot, I will try that. Hi Laurenz, have you found a way to pass data types other than TID as a resjunk column? I'm trying to solve almost the same thing (pass INT8 instead of TID), but I got stuck. Adding a custom Var with INT8OID instead of TIDOID seems to work fine, but I've found no way to populate this in IterateForeignScan :-( regards Tomas -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Relax table alias conflict rule in 9.3?
Am 11.11.2013 02:06, schrieb Tom Lane: We had a complaint http://www.postgresql.org/message-id/e1vjuby-0002a1...@wrigleys.postgresql.org about the fact that 9.3 rejects queries with duplicate table aliases like this: select * from tenk1 a left join (int4_tbl a cross join int8_tbl b) c on unique1 = f1; ERROR: table name a specified more than once I pushed back on this on the grounds that this is illegal per SQL spec: the standard is pretty clear that you can't use the same table alias more than once in a given level of SELECT (cf SQL:2008 7.6 table reference, syntax rules 6 and 7). However, the complainant has a good point that if we've accepted this since forever, ceasing to accept it is going to cause people problems. Moreover, my argument that it makes things ambiguous for LATERAL doesn't hold a lot of water. Duplicate table aliases were potentially ambiguous before, too, but we allowed the case anyway and only complained if there's actually an ambiguous reference. We could do the same for LATERAL references. [...] So I propose removing that call, adding regression tests like these examples, and back-patching to 9.3. Any objections? regards, tom lane Great! :) This change would help me upgrade to 9.3x because I've got lots of views that use a table alias that gets rejected by 9.3 while restoring the dump of 9.2. When do you plan to have it in an official release? Cheers Andreas -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Kevin Grittner escribió: These both seemed legitimate to me. Patch attached. Any objections to applying it? I realize the memory leak is a tiny one in the regression testing code, so it could never amount to enough to matter; but it seems worth fixing just to avoid noise in code analyzers. We have marked a large number of memory leak reports by Coverity in initdb and other short-lived programs as false positive, on the grounds that there's no point in freeing memory in a program that's about to terminate anyway. I'm not saying I agree necessarily with that POV, but if we take that stance then there's similarly no point in fixing this leak in the regression test code, is there? -- Álvaro Herrerahttp://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On Tue, Nov 12, 2013 at 5:19 PM, Peter Eisentraut pete...@gmx.net wrote: On 11/12/13, 8:18 AM, Kevin Grittner wrote: Here is the summary of what was reported: All Bugs: 313 Does anything stand out as something that is particularly worth looking into? Does anything here seem worth assuming is completely bogus because of the Coverity and Valgrind passes? I have tracked scan-build for some time, and I'm sure that almost all of these bugs are false positives at this point. I have a private branch somewhere that I have badly hacked up (e.g., hardcoding enable_assert = 1), which gets that number down to 251 according to my latest notes. That's about the best you can hope for. Btw., you can also keep score here: http://pgci.eisentraut.org/jenkins/view/PostgreSQL/job/postgresql_master_scan-build/. This uses an older version of clang, so the number of bugs is lower, but the nature of the bugs is also more stupid. I plan to upgrade that at some point. I thinks its good Postgres is using the tools and publishing the results. The reports being generated with Clang 3.3 on Postgres 9.3.1 are different that posted. For example, french.c is not listed in the Clang 3.3 reports. What version of Clang is used in the online report? Jeff -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
On Tue, Nov 12, 2013 at 7:11 PM, Alvaro Herrera alvhe...@2ndquadrant.com wrote: Kevin Grittner escribió: These both seemed legitimate to me. Patch attached. Any objections to applying it? I realize the memory leak is a tiny one in the regression testing code, so it could never amount to enough to matter; but it seems worth fixing just to avoid noise in code analyzers. We have marked a large number of memory leak reports by Coverity in initdb and other short-lived programs as false positive, on the grounds that there's no point in freeing memory in a program that's about to terminate anyway. I'm not saying I agree necessarily with that POV, but if we take that stance then there's similarly no point in fixing this leak in the regression test code, is there? Ah, OK. So I would disagree here. Test code has to meet the same standards as production code. Otherwise, it could create spurious noise that could mask real findings :) It kind of begs a few questions. How is an user, integrator or auditor supposed to know * devs write 'real code' in the libraries and programs, as opposed to 'non-real code' in their test suite * what the devs have deemed 'not a good expenditure of resources'? Anyway, its just my philosophy. I know many projects share the opposing point of view. Jeff -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] nested hstore patch
On 11/12/2013 01:35 PM, Teodor Sigaev wrote: Hi! Attatched patch adds nesting feature, types (string, boll and numeric values), arrays and scalar to hstore type. All new features are described in PGConf.EU talk http://www.sai.msu.su/~megera/postgres/talks/hstore-dublin-2013.pdf (since PGCon some features was added). Patch includes: 1 implementaion SRF_RETURN_NEXT_NULL() 2 contrib/hstore changes 3 docs of new hstore module (many thanks to David E. Wheeler david.whee...@pgexperts.com) In current state patch is in WIP status, for short period I plan to move support of binary nested structure to core to share binary representation for hstore and json types. Thanks, Teodor. As soon as we have that shared binary representation available, I will be working on adapting it to JSON. cheers andrew -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Clang 3.3 Analyzer Results
Jeffrey Walton noloa...@gmail.com writes: On Tue, Nov 12, 2013 at 7:11 PM, Alvaro Herrera alvhe...@2ndquadrant.com wrote: We have marked a large number of memory leak reports by Coverity in initdb and other short-lived programs as false positive, on the grounds that there's no point in freeing memory in a program that's about to terminate anyway. I'm not saying I agree necessarily with that POV, but if we take that stance then there's similarly no point in fixing this leak in the regression test code, is there? Ah, OK. So I would disagree here. Test code has to meet the same standards as production code. No, this isn't about test code vs production, it's about not bothering to free memory explicitly when a program is about to terminate. Alvaro is suggesting that the proposed addition to pg_regress.c is just a waste of cycles. IMO it's not that big a deal either way in this case, since it's just one line of code that isn't going to take too long. However, I would push back pretty darn hard if you suggested that, say, the server needed to explicitly free everything it'd allocated before terminating --- the amount of bookkeeping needed for that would be ginormous, and it would be 100% pointless. From a testing standpoint, that means we need a way to explain to any analysis code that we don't care about the fact that specific allocations will still be there at termination. This isn't all that much different from suppressing other types of false positive, of course. As Alvaro and a couple of other people have mentioned, we've gone to some effort to identify and mark false positives with Coverity. I'm not convinced yet that clang's tools are mature enough to justify putting in similar effort with it. regards, tom lane -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] TODO : Allow parallel cores to be used by vacuumdb [ WIP ]
On Thu, Nov 7, 2013 at 8:42 PM, Dilip kumar dilip.ku...@huawei.com wrote: This patch implementing the following TODO item Allow parallel cores to be used by vacuumdb http://www.postgresql.org/message-id/4f10a728.7090...@agliodbs.com Like Parallel pg_dump, vacuumdb is provided with the option to run the vacuum of multiple tables in parallel. [ vacuumdb –j ] 1. One new option is provided with vacuumdb to give the number of workers. 2. All worker will be started in beginning and all will be waiting for the vacuum instruction from the master. 3. Now, if table list is provided in vacuumdb command using –t then, it will send the vacuum of one table to one of the IDLE worker, next table to next IDLE worker and so on. 4. If vacuum is given for one DB then, it will execute select on pg_class to get the table list and fetch the table name one by one and also assign the vacuum responsibility to IDLE workers. Performance Data by parallel vacuumdb: Machine Configuration: Core : 8 RAM: 24GB Test Scenario: 16 tables all with 4M records. [many records are deleted and inserted using some pattern, (files is attached in the mail)] Test Result {Base Code}Time(s)%CPU Usage Avg Read(kB/s)Avg Write(kB/s) 521 3% 12000 2 {With Parallel Vacuum Patch} worker Time(s)%CPU UsageAvg Read(kB/s) Avg Write(kB/s) 1 518 3% 12000 2 -- this will take the same path as base code 2 390 5% 14000 3 8 235 7% 18000 4 16 197 8% 2 5 Conclusion: By running the vacuumdb in parallel, CPU and I/O throughput is increasing and it can give 50% performance improvement. Work to be Done: 1. Documentations of the new command. 2. Parallel support for vacuum all db. Is it required to move the common code for parallel operation of pg_dump and vacuumdb to one place and reuse it ? Prototype patch is attached in the mail, please provide your feedback/Suggestions… Thanks Regards, Dilip Kumar -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers -- Michael -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] TODO : Allow parallel cores to be used by vacuumdb [ WIP ]
On Thu, Nov 7, 2013 at 8:42 PM, Dilip kumar dilip.ku...@huawei.com wrote: This patch implementing the following TODO item Allow parallel cores to be used by vacuumdb http://www.postgresql.org/message-id/4f10a728.7090...@agliodbs.com Cool. Could you add this patch to the next commit fest for 9.4? It begins officially in a couple of days. Here is the URL to it: https://commitfest.postgresql.org/action/commitfest_view?id=20 Regards, -- Michael -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Information about Access methods
On 11/13/2013 06:36 AM, Rohit Goyal wrote: Hi, Thanks for document. It was really helpful. Now, as index folder contains basically interface for all index type, do I need to change it, if I want to modify only b tree index algorithm? Do I need to change only btree index files or Do i have to change index folder files like indexam.c Without knowing what you are trying to do, it's really hard to answer that usefully. In general if you want to modify b-tree indexes you'd only have to modify the b-tree index implementation. If you want to add new capabilities or features to the indexing system in general then use them in b-tree, you'd probably also have to modify the access method interface. If you aren't just doing this for learning/research you should explain in detail what you are trying to do. There is no point spending lots of time creating a patch that would never get accepted into the PostgreSQL core, so you have to maintain it forever more... -- Craig Ringer http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training Services -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [COMMITTERS] pgsql: Replace duplicate_oids with Perl implementation
On Sun, 2013-11-10 at 18:12 -0500, Tom Lane wrote: Perhaps, if we're worried about people keeping perl somewhere other than /usr/bin. However, the most likely reason for having a /usr/local/bin/perl or whatever is that it's a newer and shinier one than what's in /usr/bin. Since we're only interested in bog-standard perl, there's no real reason for us to want to pick up the local one. After the Perl version of duplicate_oids was committed, some non-Windows build farm member failed because it didn't have perl in /usr/bin. So that appears to be a real issue. The particular application to this case might be: what makes you so sure env is in /bin? I don't have a /bin/env, but the normal invocation is /usr/bin/env anyway. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [COMMITTERS] pgsql: Replace duplicate_oids with Perl implementation
On Tue, 2013-11-12 at 10:02 -0500, Robert Haas wrote: IMV, the role of the #! line is just to cater to the less-likely scenario where someone wants to run one of those scripts outside the build process; Let's remember that we are talking about unused_oids here. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [COMMITTERS] pgsql: Replace duplicate_oids with Perl implementation
On 11/12/2013 09:01 PM, Peter Eisentraut wrote: On Sun, 2013-11-10 at 18:12 -0500, Tom Lane wrote: Perhaps, if we're worried about people keeping perl somewhere other than /usr/bin. However, the most likely reason for having a /usr/local/bin/perl or whatever is that it's a newer and shinier one than what's in /usr/bin. Since we're only interested in bog-standard perl, there's no real reason for us to want to pick up the local one. After the Perl version of duplicate_oids was committed, some non-Windows build farm member failed because it didn't have perl in /usr/bin. So that appears to be a real issue. As Robert pointed out, The build process should be, and now is, invoking it via $(PERL), so how is this still an issue? cheers andrew -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [COMMITTERS] pgsql: Replace duplicate_oids with Perl implementation
On Tue, 2013-11-12 at 21:30 -0500, Andrew Dunstan wrote: As Robert pointed out, The build process should be, and now is, invoking it via $(PERL), so how is this still an issue? unused_oids is not part of the build process. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Re: [COMMITTERS] pgsql: Fix whitespace issues found by git diff --check, add gitattribut
On Mon, 2013-11-11 at 14:38 -0500, Andrew Dunstan wrote: Can we please agree on a minimum version of git and just support its features? Relying on versions close to the bleeding We're not relying on it. With an older version, you just can't take advantage of the newer features, so everything stays the same for you. That said, I have fixed this particular issue. -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] [PATCH] ecpg: Split off mmfatal() from mmerror()
Similar to recent pg_upgrade changes (https://commitfest.postgresql.org/action/patch_view?id=1216), here is a patch to separate the terminating and nonterminating variants of mmerror() in ecpg. From 2c8cc7ea1135c9ceddb5b60c30e4363d3784cb29 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut pete...@gmx.net Date: Tue, 12 Nov 2013 22:12:08 -0500 Subject: [PATCH] ecpg: Split off mmfatal() from mmerror() This allows decorating mmfatal() with noreturn compiler hints, leading to better diagnostics. --- src/interfaces/ecpg/preproc/descriptor.c | 4 +-- src/interfaces/ecpg/preproc/ecpg.header | 47 +--- src/interfaces/ecpg/preproc/ecpg.trailer | 2 +- src/interfaces/ecpg/preproc/extern.h | 6 ++-- src/interfaces/ecpg/preproc/nls.mk | 4 +-- src/interfaces/ecpg/preproc/pgc.l| 42 ++-- src/interfaces/ecpg/preproc/type.c | 14 +- src/interfaces/ecpg/preproc/type.h | 2 +- src/interfaces/ecpg/preproc/variable.c | 36 9 files changed, 85 insertions(+), 72 deletions(-) diff --git a/src/interfaces/ecpg/preproc/descriptor.c b/src/interfaces/ecpg/preproc/descriptor.c index 115cb17..053a7af 100644 --- a/src/interfaces/ecpg/preproc/descriptor.c +++ b/src/interfaces/ecpg/preproc/descriptor.c @@ -274,7 +274,7 @@ output_set_descr(char *desc_name, char *index) case ECPGd_di_precision: case ECPGd_precision: case ECPGd_scale: -mmerror(PARSE_ERROR, ET_FATAL, descriptor item \%s\ is not implemented, +mmfatal(PARSE_ERROR, descriptor item \%s\ is not implemented, descriptor_item_name(results-value)); break; @@ -284,7 +284,7 @@ output_set_descr(char *desc_name, char *index) case ECPGd_octet: case ECPGd_ret_length: case ECPGd_ret_octet: -mmerror(PARSE_ERROR, ET_FATAL, descriptor item \%s\ cannot be set, +mmfatal(PARSE_ERROR, descriptor item \%s\ cannot be set, descriptor_item_name(results-value)); break; diff --git a/src/interfaces/ecpg/preproc/ecpg.header b/src/interfaces/ecpg/preproc/ecpg.header index 88d9cf5..71b11f4 100644 --- a/src/interfaces/ecpg/preproc/ecpg.header +++ b/src/interfaces/ecpg/preproc/ecpg.header @@ -64,11 +64,9 @@ struct ECPGtype ecpg_query = {ECPGt_char_variable, NULL, NULL, NULL, {NULL}, 0}; /* * Handle parsing errors and warnings */ -void -mmerror(int error_code, enum errortype type, const char *error, ...) +static void __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0))) +vmmerror(int error_code, enum errortype type, const char *error, va_list ap) { - va_list ap; - /* internationalize the error message string */ error = _(error); @@ -80,14 +78,11 @@ mmerror(int error_code, enum errortype type, const char *error, ...) fprintf(stderr, _(WARNING: )); break; case ET_ERROR: - case ET_FATAL: fprintf(stderr, _(ERROR: )); break; } - va_start(ap, error); vfprintf(stderr, error, ap); - va_end(ap); fprintf(stderr, \n); @@ -98,18 +93,38 @@ mmerror(int error_code, enum errortype type, const char *error, ...) case ET_ERROR: ret_value = error_code; break; - case ET_FATAL: - if (yyin) -fclose(yyin); - if (yyout) -fclose(yyout); - - if (strcmp(output_filename, -) != 0 unlink(output_filename) != 0) -fprintf(stderr, _(could not remove output file \%s\\n), output_filename); - exit(error_code); } } +void +mmerror(int error_code, enum errortype type, const char *error, ...) +{ + va_list ap; + + va_start(ap, error); + vmmerror(error_code, type, error, ap); + va_end(ap); +} + +void +mmfatal(int error_code, const char *error, ...) +{ + va_list ap; + + va_start(ap, error); + vmmerror(error_code, ET_ERROR, error, ap); + va_end(ap); + + if (yyin) + fclose(yyin); + if (yyout) + fclose(yyout); + + if (strcmp(output_filename, -) != 0 unlink(output_filename) != 0) + fprintf(stderr, _(could not remove output file \%s\\n), output_filename); + exit(error_code); +} + /* * string concatenation */ diff --git a/src/interfaces/ecpg/preproc/ecpg.trailer b/src/interfaces/ecpg/preproc/ecpg.trailer index 58155ab..342b7bc 100644 --- a/src/interfaces/ecpg/preproc/ecpg.trailer +++ b/src/interfaces/ecpg/preproc/ecpg.trailer @@ -1687,7 +1687,7 @@ cvariable: CVARIABLE { case '[': if (brace) -mmerror(PARSE_ERROR, ET_FATAL, multidimensional arrays for simple data types are not supported); +mmfatal(PARSE_ERROR, multidimensional arrays for simple data types are not supported); brace_open++; break; case ']': diff --git a/src/interfaces/ecpg/preproc/extern.h b/src/interfaces/ecpg/preproc/extern.h index ccf5548..3bbb6a4 100644 --- a/src/interfaces/ecpg/preproc/extern.h +++ b/src/interfaces/ecpg/preproc/extern.h @@ -73,10 +73,8 @@ extern int base_yylex(void); extern void base_yyerror(const char *); extern void *mm_alloc(size_t), *mm_realloc(void *, size_t); extern char *mm_strdup(const char *); -extern void -mmerror(int, enum