[HACKERS] Regression stoping PostgreSQL 9.4.13 if a walsender is running
I have noticed that after the 9.4.13 release PostgreSQL reliably fails to shutdown with smart and fast method if there is a running walsender. The postmaster continues waiting forever for the walsender termination. It works perfectly with all the other major releases. I bisected the issue to commit 1cdc0ab9c180222a94e1ea11402e728688ddc37d After some investigation I discovered that the instruction that sets got_SIGUSR2 was lost during the backpatch in the WalSndLastCycleHandler function. The trivial patch is the following: ~~~ diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index a0601b3..b24f9a1 100644 *** a/src/backend/replication/walsender.c --- b/src/backend/replication/walsender.c *** WalSndLastCycleHandler(SIGNAL_ARGS) *** 2658,2663 --- 2658,2664 { int save_errno = errno; + got_SIGUSR2 = true; if (MyWalSnd) SetLatch(&MyWalSnd->latch); ~~~ Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] Re: [BUGS] BUG #14230: Wrong timeline returned by pg_stop_backup on a standby
On 08/07/16 13:10, Michael Paquier wrote: > On Fri, Jul 8, 2016 at 6:40 PM, Marco Nenciarini > wrote: >> The resulting backup is working perfectly, because Postgres has no use >> for pg_stop_backup LSN, but this can confuse any tool that uses the stop >> LSN to figure out which WAL files are needed by the backup (in this case >> the only file needed is the one containing the start checkpoint). >> >> After some discussion with Álvaro, my proposal is to avoid that by >> returning the stoppoint as the maximum between the startpoint and the >> min_recovery_end_location, in case of backup from the standby. > > You are facing a pattern similar to the problem reported already on > this thread by Horiguchi-san: > http://www.postgresql.org/message-id/20160609.215558.118976703.horiguchi.kyot...@lab.ntt.co.jp > And it seems to me that you are jumping to an incorrect conclusion, > what we'd want to do is to update a bit more aggressively the minimum > recovery point in cases on a node in recovery in the case where no > buffers are flushed by other backends. > Yes, it is exactly the same bug. My proposal was based on the assumption that it were only a cosmetic issue, but given that it can trigger errors, I agree that the right solution is to advance the minimum recovery point in that case. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] Re: [BUGS] BUG #14230: Wrong timeline returned by pg_stop_backup on a standby
On 07/07/16 08:38, Michael Paquier wrote: > On Thu, Jul 7, 2016 at 12:57 AM, Marco Nenciarini > wrote: >> After further analysis, the issue is that we retrieve the starttli from >> the ControlFile structure, but it was using ThisTimeLineID when writing >> the backup label. >> >> I've attached a very simple patch that fixes it. > > ThisTimeLineID is always set at 0 on purpose on a standby, so we > cannot rely on it (well it is set temporarily when recycling old > segments). At recovery when parsing the backup_label file there is no > actual use of the start segment name, so that's only a cosmetic > change. But surely it would be better to get that fixed, because > that's useful for debugging. > > While looking at your patch, I thought that it would have been > tempting to use GetXLogReplayRecPtr() to get the timeline ID when in > recovery, but what we really want to know here is the timeline of the > last REDO pointer, which is starttli, and that's more consistent with > the fact that we use startpoint when writing the backup_label file. In > short, +1 for this fix. > > I am adding that in the list of open items, adding Magnus in CC whose > commit for non-exclusive backups is at the origin of this defect. > While we were testing the patch we noticed another behavior that is not strictly a bug, but can confuse backup tools: To quickly produce some WAL files we were executing a series of pg_switch_xlog+CHECKPOINT, and we noticed that doing a backup from a standby after that results in a startpoint higher than the stoppoint. Let me show it on a brand new master/replica cluster (master is port 5496, replica is 6496). The script is attached. --- You are now connected to database "postgres" as user "postgres" via socket in "/tmp" at port "5496". SELECT pg_is_in_recovery(); -[ RECORD 1 ]-+-- pg_is_in_recovery | f CHECKPOINT; CHECKPOINT SELECT pg_switch_xlog(); -[ RECORD 1 ]--+-- pg_switch_xlog | 0/3E8 CHECKPOINT; CHECKPOINT SELECT pg_switch_xlog(); -[ RECORD 1 ]--+-- pg_switch_xlog | 0/4E8 You are now connected to database "postgres" as user "postgres" via socket in "/tmp" at port "6496". SELECT pg_is_in_recovery(); -[ RECORD 1 ]-+-- pg_is_in_recovery | t SELECT pg_start_backup('tst backup',TRUE,FALSE); -[ RECORD 1 ]---+-- pg_start_backup | 0/428 SELECT * FROM pg_stop_backup(FALSE); -[ RECORD 1 ]- lsn| 0/2F8 labelfile | START WAL LOCATION: 0/428 (file 0004)+ | CHECKPOINT LOCATION: 0/460 + | BACKUP METHOD: streamed + | BACKUP FROM: standby + | START TIME: 2016-07-08 10:46:55 CEST + | LABEL: tst backup+ | spcmapfile | SELECT * FROM pg_control_checkpoint(); -[ RECORD 1 ]+- checkpoint_location | 0/460 prior_location | 0/260 redo_location| 0/428 redo_wal_file| 00010004 timeline_id | 1 prev_timeline_id | 1 full_page_writes | t next_xid | 0:865 next_oid | 12670 next_multixact_id| 1 next_multi_offset| 0 oldest_xid | 858 oldest_xid_dbid | 1 oldest_active_xid| 865 oldest_multi_xid | 1 oldest_multi_dbid| 1 oldest_commit_ts_xid | 865 newest_commit_ts_xid | 865 checkpoint_time | 2016-07-08 10:46:55+02 SELECT * FROM pg_control_recovery(); -[ RECORD 1 ]-+-- min_recovery_end_location | 0/2F8 min_recovery_end_timeline | 1 backup_start_location | 0/0 backup_end_location | 0/0 end_of_backup_record_required | f --- In particular, the pg_start_backup LSN is 0/428 and the pg_stop_backup LSN is 0/2F8. The same issue is present when you do a backup using pg_basebackup: --- transaction log start point: 0/828 on timeline 1 pg_basebackup: starting background WAL receiver 22244/22244 kB (100%), 1/1 tablespace transaction log end point: 0/2F8 pg_basebackup: waiting for background process to finish streaming ... pg_basebackup: base backup completed --- The resulting backup is working perfectly, because Postgres has no use for pg_stop_backup LSN, but this can confuse any tool that uses the stop LSN to figure out which WAL files are needed by the backup (in this case
[HACKERS] Re: [BUGS] BUG #14230: Wrong timeline returned by pg_stop_backup on a standby
On 06/07/16 17:41, Marco Nenciarini wrote: > On 06/07/16 17:37, Marco Nenciarini wrote: >> Hi, >> >> On 06/07/16 17:07, francesco.cano...@2ndquadrant.it wrote: >>> The following bug has been logged on the website: >>> >>> Bug reference: 14230 >>> Logged by: Francesco Canovai >>> Email address: francesco.cano...@2ndquadrant.it >>> PostgreSQL version: 9.6beta2 >>> Operating system: Linux >>> Description: >>> >>> I'm taking a concurrent backup from a standby in PostgreSQL beta2 and I get >>> the wrong timeline from pg_stop_backup(false). >>> >>> This is what I'm doing: >>> >>> 1) I set up an environment with a primary server and a replica in streaming >>> replication. >>> >>> 2) On the replica, I run >>> >>> postgres=# SELECT pg_start_backup('test_backup', true, false); >>> pg_start_backup >>> - >>> 0/3000A00 >>> (1 row) >>> >>> 3) When I run pg_stop_backup, it returns a start wal location belonging to a >>> file with timeline 0. >>> >>> postgres=# SELECT pg_stop_backup(false); >>> pg_stop_backup >>> >>> --- >>> (0/3000AE0,"START WAL LOCATION: 0/3000A00 (file >>> 0003)+ >>> CHECKPOINT LOCATION: 0/3000A38 >>> + >>> BACKUP METHOD: streamed >>> + >>> BACKUP FROM: standby >>> + >>> START TIME: 2016-07-06 16:44:31 CEST >>> + >>> LABEL: test_backup >>> + >>> ","") >>> (1 row) >>> >>> The timeline returned is fine (is 1) when running the same commands on the >>> master. >>> >>> An incorrect backup label doesn't prevent PostgreSQL from starting up, but >>> it affects the tools using that information. >>> >>> >> >> The issue here is that the do_pg_stop_backup function uses the >> ThisTimeLineID variable that is not valid on standbys. >> >> I think that it should read it from >> ControlFile->checkPointCopy.ThisTimeLineID as we do in do_pg_start_backup. >> > > No, that's not the solution. > > The backup_label is generated during the do_pg_start_backup call, so > also the copy in ControlFile->checkPointCopy.ThisTimeLineID is > uninitialized. > After further analysis, the issue is that we retrieve the starttli from the ControlFile structure, but it was using ThisTimeLineID when writing the backup label. I've attached a very simple patch that fixes it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e4645a3..aecede1 100644 *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *** do_pg_start_backup(const char *backupids *** 9974,9980 } while (!gotUniqueStartpoint); XLByteToSeg(startpoint, _logSegNo); ! XLogFileName(xlogfilename, ThisTimeLineID, _logSegNo); /* * Construct tablespace_map file --- 9974,9980 } while (!gotUniqueStartpoint); XLByteToSeg(startpoint, _logSegNo); ! XLogFileName(xlogfilename, starttli, _logSegNo); /* * Construct tablespace_map file signature.asc Description: OpenPGP digital signature
[HACKERS] Re: [BUGS] BUG #14230: Wrong timeline returned by pg_stop_backup on a standby
On 06/07/16 17:37, Marco Nenciarini wrote: > Hi, > > On 06/07/16 17:07, francesco.cano...@2ndquadrant.it wrote: >> The following bug has been logged on the website: >> >> Bug reference: 14230 >> Logged by: Francesco Canovai >> Email address: francesco.cano...@2ndquadrant.it >> PostgreSQL version: 9.6beta2 >> Operating system: Linux >> Description: >> >> I'm taking a concurrent backup from a standby in PostgreSQL beta2 and I get >> the wrong timeline from pg_stop_backup(false). >> >> This is what I'm doing: >> >> 1) I set up an environment with a primary server and a replica in streaming >> replication. >> >> 2) On the replica, I run >> >> postgres=# SELECT pg_start_backup('test_backup', true, false); >> pg_start_backup >> - >> 0/3000A00 >> (1 row) >> >> 3) When I run pg_stop_backup, it returns a start wal location belonging to a >> file with timeline 0. >> >> postgres=# SELECT pg_stop_backup(false); >> pg_stop_backup >> >> --- >> (0/3000AE0,"START WAL LOCATION: 0/3000A00 (file >> 0003)+ >> CHECKPOINT LOCATION: 0/3000A38 >> + >> BACKUP METHOD: streamed >> + >> BACKUP FROM: standby >> + >> START TIME: 2016-07-06 16:44:31 CEST >> + >> LABEL: test_backup >> + >> ","") >> (1 row) >> >> The timeline returned is fine (is 1) when running the same commands on the >> master. >> >> An incorrect backup label doesn't prevent PostgreSQL from starting up, but >> it affects the tools using that information. >> >> > > The issue here is that the do_pg_stop_backup function uses the > ThisTimeLineID variable that is not valid on standbys. > > I think that it should read it from > ControlFile->checkPointCopy.ThisTimeLineID as we do in do_pg_start_backup. > No, that's not the solution. The backup_label is generated during the do_pg_start_backup call, so also the copy in ControlFile->checkPointCopy.ThisTimeLineID is uninitialized. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] Re: [BUGS] BUG #14230: Wrong timeline returned by pg_stop_backup on a standby
Hi, On 06/07/16 17:07, francesco.cano...@2ndquadrant.it wrote: > The following bug has been logged on the website: > > Bug reference: 14230 > Logged by: Francesco Canovai > Email address: francesco.cano...@2ndquadrant.it > PostgreSQL version: 9.6beta2 > Operating system: Linux > Description: > > I'm taking a concurrent backup from a standby in PostgreSQL beta2 and I get > the wrong timeline from pg_stop_backup(false). > > This is what I'm doing: > > 1) I set up an environment with a primary server and a replica in streaming > replication. > > 2) On the replica, I run > > postgres=# SELECT pg_start_backup('test_backup', true, false); > pg_start_backup > - > 0/3000A00 > (1 row) > > 3) When I run pg_stop_backup, it returns a start wal location belonging to a > file with timeline 0. > > postgres=# SELECT pg_stop_backup(false); > pg_stop_backup > > --- > (0/3000AE0,"START WAL LOCATION: 0/3000A00 (file > 0003)+ > CHECKPOINT LOCATION: 0/3000A38 > + > BACKUP METHOD: streamed > + > BACKUP FROM: standby > + > START TIME: 2016-07-06 16:44:31 CEST > + > LABEL: test_backup > + > ","") > (1 row) > > The timeline returned is fine (is 1) when running the same commands on the > master. > > An incorrect backup label doesn't prevent PostgreSQL from starting up, but > it affects the tools using that information. > > The issue here is that the do_pg_stop_backup function uses the ThisTimeLineID variable that is not valid on standbys. I think that it should read it from ControlFile->checkPointCopy.ThisTimeLineID as we do in do_pg_start_backup. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [BUGS] BUG #13473: VACUUM FREEZE mistakenly cancel standby sessions
On 27/06/15 01:13, Jim Nasby wrote: > On 6/26/15 8:50 AM, Marco Nenciarini wrote: >>> >In the heap_xlog_freeze we need to subtract one to the value of >>> cutoff_xid >>> >before passing it to ResolveRecoveryConflictWithSnapshot. >>> > >>> > >>> > >> Attached a proposed patch that solves the issue. > I have hit the bug again, as it has been fixed only from 9.5+ The procedure to reproduce it sent in the original post is not fully accurate, below there is one that always works: Run the following operation on an idle cluster. 1) connect to the master and run the following script create table t(id int primary key); insert into t select generate_series(1, 1); 2) connect to the standby and simulate a long running query: select pg_sleep(3600); 3) on the master and run the following commands: vacuum freeze verbose t; drop table t; 4) after 30 seconds the pg_sleep query on standby will be canceled. Attached there is a patch that apply on every version that misses the fix (9.0, 9.1, 9.2, 9.3, 9.4) Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index eb8eada..434880a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -4764,7 +4764,13 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record) * consider the frozen xids as running. */ if (InHotStandby) - ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node); + { + TransactionId latestRemovedXid = cutoff_xid; + + TransactionIdRetreat(latestRemovedXid); + + ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node); + } /* If we have a full-page image, restore it and we're done */ if (record->xl_info & XLR_BKP_BLOCK(0)) signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Updated backup APIs for non-exclusive backups
Hi Magnus, I've finally found some time to take a look to the patch. It applies with some fuzziness on master, but the result looks correct. Unfortunately the OID of the new pg_stop_backup function conflicts with "pg_blocking_pids()" patch (52f5d578d6c29bf254e93c69043b817d4047ca67). After changing it the patch does not compile: gcc -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -Wno-unused-command-line-argument -g -O0 -g -fno-omit-frame-pointer -I../../../../src/include -I/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/usr/include/libxml2 -I/usr/local/include -I/usr/local/opt/openssl/include -c -o xlog.o xlog.c -MMD -MP -MF .deps/xlog.Po xlog.c:1:19: error: use of undeclared identifier 'tblspc_mapfbuf'; did you mean 'tblspcmapfile'? initStringInfo(&tblspc_mapfbuf); ^~ tblspcmapfile xlog.c:9790:19: note: 'tblspcmapfile' declared here StringInfo tblspcmapfile, bool infotbssize, ^ xlog.c:10073:22: error: use of undeclared identifier 'tblspc_mapfbuf'; did you mean 'tblspcmapfile'? appendStringInfo(&tblspc_mapfbuf, "%s %s\n", ti->oid, ti->path); ^~ tblspcmapfile xlog.c:9790:19: note: 'tblspcmapfile' declared here StringInfo tblspcmapfile, bool infotbssize, ^ xlog.c:10092:19: error: use of undeclared identifier 'labelfbuf'; did you mean 'labelfile'? initStringInfo(&labelfbuf); ^ labelfile xlog.c:9789:19: note: 'labelfile' declared here StringInfo labelfile, DIR *tblspcdir, List **tablespaces, ^ xlog.c:10099:21: error: use of undeclared identifier 'labelfbuf'; did you mean 'labelfile'? appendStringInfo(&labelfbuf, "START WAL LOCATION: %X/%X (file %s)\n", ^ labelfile xlog.c:9789:19: note: 'labelfile' declared here StringInfo labelfile, DIR *tblspcdir, List **tablespaces, ^ xlog.c:10101:21: error: use of undeclared identifier 'labelfbuf'; did you mean 'labelfile'? appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n", ^ labelfile xlog.c:9789:19: note: 'labelfile' declared here StringInfo labelfile, DIR *tblspcdir, List **tablespaces, ^ xlog.c:10103:21: error: use of undeclared identifier 'labelfbuf'; did you mean 'labelfile'? appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n", ^ labelfile xlog.c:9789:19: note: 'labelfile' declared here StringInfo labelfile, DIR *tblspcdir, List **tablespaces, ^ xlog.c:10105:21: error: use of undeclared identifier 'labelfbuf'; did you mean 'labelfile'? appendStringInfo(&labelfbuf, "BACKUP FROM: %s\n", ^ labelfile xlog.c:9789:19: note: 'labelfile' declared here StringInfo labelfile, DIR *tblspcdir, List **tablespaces, ^ xlog.c:10107:21: error: use of undeclared identifier 'labelfbuf'; did you mean 'labelfile'? appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf); ^ labelfile xlog.c:9789:19: note: 'labelfile' declared here StringInfo labelfile, DIR *tblspcdir, List **tablespaces, ^ xlog.c:10108:21: error: use of undeclared identifier 'labelfbuf'; did you mean 'labelfile'? appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr); ^ labelfile xlog.c:9789:19: note: 'labelfile' declared here StringInfo labelfile, DIR *tblspcdir, List **tablespaces, ^ xlog.c:10142:15: error: use of undeclared identifier 'labelfbuf' if (fwrite(labelfbuf.data, labelfbuf.len, 1, fp) != 1 || ^ xlog.c:10142:31: error: use of undeclared identifier 'labelfbuf' if (fwrite(labelfbuf.data, labelfbuf.len, 1, fp) != 1 || ^ xlog.c:10151:10: error: use of undeclared identifier 'labelfbuf' pfree(labelfbuf.data); ^ xlog.c:10154:8: error: use of undeclared identifier 'tblspc_mapfbuf' if (tblspc_mapfbuf.len > 0) ^ xlog.c:10178:16: error: use of undeclared identifier 'tblspc_mapfbuf' if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 || ^ xlog.c:10178:37: error: use of undeclared identifier 'tblspc_mapfbuf' if (fwrite(tblspc_mapfbuf.data, tblspc_mapfbuf.len, 1, fp) != 1 || ^ xlog.c:10189:10: error: use of undeclared identifier 'tblspc_mapfbuf' pfree(tblspc_mapfbuf.data); ^ xlog.c:10193:17: error: use of undeclared identifier 'labelfbuf' *labelfile = labelfbuf.data; ^ xlog.c:10194:8: error: use of undeclared identifier 'tblspc_mapfbuf' if (tblspc_mapfbuf.len > 0) ^ xlog.c:10195:22: error: use of undeclared identifier 'tblspc_mapfbuf' *tblspcmapfile = tblspc_mapfbuf.data; ^ 19 errors generated. make[4]: *** [xlog.o] Error 1 make[3]: *** [transam-recursive] Error 2 make[2]: *** [access-recursive] Error 2 make[1]: *** [all-backend-recurse] Error 2 make: *** [all-src-recurse] Error 2 I've searched in past commits for any recent change that involves the affected lines, but I have not found any. Maybe some changes are missing? Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Updated backup APIs for non-exclusive backups
Hi Magnus, thanks for working on this topic. What it does is very similar to what Barman's pgespresso extension does and I'd like to see it implemented soon in the core. I've added myself as reviewer for the patch on commitfest site. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Uninterruptible slow geo_ops.c
On 11/12/15 19:18, Marco Nenciarini wrote: > On 11/12/15 18:48, Alvaro Herrera wrote: >> Hi, >> >> A customer of ours hit some very slow code while running the >> @>(polygon, polygon) operator with some big polygons. I'm not familiar >> with this stuff but I think the problem is that the algorithm converges >> too slowly to a solution and also has some pretty expensive calls >> somewhere. (Perhaps there is also a problem that the algorithm *never* >> converges for some inputs ...) >> >> While I'm not familiar with the code itself, and can't post the exact >> slow query just yet, I have noticed that it is missing a >> CHECK_FOR_INTERRUPTS() call to enable cancelling the slow query. I'd >> backpatch this all the way back. (The exact issue they hit is mutual >> recursion between touched_lseg_between_poly and lseg_between_poly. >> Since the latter also recurses on itself, the best way forward seem to >> add a check for interrupts in the loop there.) >> >> I will follow up on the actual slowness later, as warranted. >> > > I would add that it was not simply a slow computation, but more probably they > hit a case where the algorithm doesn't converge at all. > > I've killed it manually by calling ProcessInterrupts() through gdb after 7 > days and half of CPU time (100% of one CPU). > The server CPU is an Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz. > > The query doesn't involve any table and is a simple call of @>(polygon, > polygon) operator. > > SELECT polygon 'poligon literal with 522 points' @> polygon 'poligon box' > > I'm checking if we can share the full query. > The full query is attached. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it SELECT polygon '((-88000,419013470),(-88000,419007240),(-88000,419007240),(-88000,418987250),(-88000,418987250),(-88000,418970240),(-88000,418970240),(-88000,418950990),(-88000,418950990),(-88000,418923770),(-88000,418923770),(-88000,418917630),(-88000,418917630),(-88000,418912100),(-88000,418912100),(-88000,418900410),(-88000,418900410),(-88000,418895420),(-88000,418895420),(-88000,418893450),(-88000,418893450),(-88000,418859340),(-88000,418859340),(-88000,418842280),(-88000,418842280),(-88000,418824820),(-88000,418824820),(-88000,418819820),(-88000,418819820),(-88000,418812320),(-88000,418812320),(-88000,418781690),(-88000,418781690),(-88000,418750360),(-88000,418750360),(-88000,41875),(-88000,41875),(-879957280,41875),(-879957280,41875),(-879946700,41875),(-879946700,41875),(-879932880,41875),(-879932880,41875),(-879921140,41875),(-879921140,41875),(-879901800,41875),(-879901800,41875),(-879884730,41875),(-879884730,41875),(-879869770,41875),(-879869770,41875),(-879853620,41875),(-879853620,41875),(-879838680,41875),(-879838680,41875),(-879820670,41875),(-879820670,41875),(-879803830,41875),(-879803830,41875),(-879788600,41875),(-879788600,41875),(-879771820,41875),(-879771820,41875),(-879755740,41875),(-879755740,41875),(-879738740,41875),(-879738740,41875),(-879738580,41875),(-879738580,41875),(-879702620,41875),(-879702620,41875),(-879687560,41875),(-879687560,41875),(-879617590,41875),(-879617590,41875),(-879614270,41875),(-879614270,41875),(-879611560,41875),(-89611560,41875),(-879551870,41875),(-879551870,41875),(-879544600,41875),(-879544600,41875),(-879531980,41875),(-879531980,41875),(-879519330,41875),(-879519330,41875),(-879507350,41875),(-879507350,41875),(-879494370,41875),(-879494370,41875),(-879481570,41875),(-879481570,41875),(-879469050,41875),(-879469050,41875),(-879457200,41875),(-879457200,41875),(-879446660,41875),(-879446660,41875),(-879435070,41875),(-879435070,41875),(-879423630,41875),(-879423630,41875),(-879411030,41875),(-879411030,41875),(-879397520,41875),(-879397520,41875),(-879385310,41875),(-879385310,41875),(-879364550,41875),(-879364550,41875),(-879347030,41875),(-879347030,41875),(-879336090,41875),(-879336090,41875),(-879323380,41875),(-879323380,41875),(-87932310,41875),(-879312310,41875),(-879300290,41875),(-879300290,41875),(-879288280,41875),(-879288280,41875),(-879256170,41875),(-879256170,41875),(-879245250,41875),(-879245250,41875),(-879230540,41875),(-879230540,418
Re: [HACKERS] Uninterruptible slow geo_ops.c
On 11/12/15 18:48, Alvaro Herrera wrote: > Hi, > > A customer of ours hit some very slow code while running the > @>(polygon, polygon) operator with some big polygons. I'm not familiar > with this stuff but I think the problem is that the algorithm converges > too slowly to a solution and also has some pretty expensive calls > somewhere. (Perhaps there is also a problem that the algorithm *never* > converges for some inputs ...) > > While I'm not familiar with the code itself, and can't post the exact > slow query just yet, I have noticed that it is missing a > CHECK_FOR_INTERRUPTS() call to enable cancelling the slow query. I'd > backpatch this all the way back. (The exact issue they hit is mutual > recursion between touched_lseg_between_poly and lseg_between_poly. > Since the latter also recurses on itself, the best way forward seem to > add a check for interrupts in the loop there.) > > I will follow up on the actual slowness later, as warranted. > I would add that it was not simply a slow computation, but more probably they hit a case where the algorithm doesn't converge at all. I've killed it manually by calling ProcessInterrupts() through gdb after 7 days and half of CPU time (100% of one CPU). The server CPU is an Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz. The query doesn't involve any table and is a simple call of @>(polygon, polygon) operator. SELECT polygon 'poligon literal with 522 points' @> polygon 'poligon box' I'm checking if we can share the full query. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] pg_receivexlog: spurious error message connecting to 9.3
Hi Robert, On 17/11/15 20:10, Robert Haas wrote: > On Tue, Nov 10, 2015 at 1:35 AM, Craig Ringer wrote: >> On 10 November 2015 at 01:47, Marco Nenciarini >> wrote: >> >>> I've attached a little patch that removes the errors when connected to 9.3. >> >> Looks good to me. No point confusing users. >> >> The other callers of RunIdentifySystem are pg_basebackup and >> pg_receivelogical. >> >> pg_basebackup doesn't ask for the db_name (passes null). >> >> pg_receivelogical handles it being null already (and if it didn't, >> it'd die with or without this patch). >> >> pg_receivexlog expects it to be null and fails gracefully if it isn't. >> >> So this change just removes some pointless noise. > > The fprintf(stderr, ...) does not cause a non-local exit, so the > "else" just after it should be deleted. Otherwise, when that branch > is taken, *db_name doesn't get initialized at all. > > Actually, I'd suggest doing it like the attached instead, which seems > a bit tighter. > I agree, your patch is better. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] pg_receivexlog: spurious error message connecting to 9.3
On 10/11/15 07:35, Craig Ringer wrote: > On 10 November 2015 at 01:47, Marco Nenciarini > wrote: > >> I've attached a little patch that removes the errors when connected to 9.3. > > Looks good to me. No point confusing users. > > The other callers of RunIdentifySystem are pg_basebackup and > pg_receivelogical. > > pg_basebackup doesn't ask for the db_name (passes null). > > pg_receivelogical handles it being null already (and if it didn't, > it'd die with or without this patch). > > pg_receivexlog expects it to be null and fails gracefully if it isn't. > > So this change just removes some pointless noise. > I've added it to the next commit fest to keep track of it. I've also set Craig as reviewer, as he has already sent a review for it (and it's a really simple patch). Thanks, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] pg_receivexlog: spurious error message connecting to 9.3
Hi, I was testing the compatibility of pg_receivexlog with the previous PostgreSQL versions and I've discovered that in 9.5 and 9.6, although being compatible with 9.3, it prints an ugly but harmless error message. $ 9.5/bin/pg_receivexlog -D /tmp/testx -v -p 5493 *pg_receivexlog: could not identify system: got 1 rows and 3 fields, expected 1 rows and 4 or more fields* *column number 3 is out of range 0..2* pg_receivexlog: starting log streaming at 0/400 (timeline 1) After the error, the streaming starts and continue without issue, as it was printed by the code that checks if the connection is not database specific, and this check is not needed on 9.3. I've attached a little patch that removes the errors when connected to 9.3. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c index 2c963b6..273b2cf 100644 *** a/src/bin/pg_basebackup/streamutil.c --- b/src/bin/pg_basebackup/streamutil.c *** RunIdentifySystem(PGconn *conn, char **s *** 295,308 if (db_name != NULL) { if (PQnfields(res) < 4) ! fprintf(stderr, ! _("%s: could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields\n"), ! progname, PQntuples(res), PQnfields(res), 1, 4); ! ! if (PQgetisnull(res, 0, 3)) ! *db_name = NULL; else ! *db_name = pg_strdup(PQgetvalue(res, 0, 3)); } PQclear(res); --- 295,315 if (db_name != NULL) { if (PQnfields(res) < 4) ! { ! if (PQserverVersion(conn) >= 90400) ! fprintf(stderr, ! _("%s: could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields\n"), ! progname, PQntuples(res), PQnfields(res), 1, 4); ! else ! *db_name = NULL; ! } else ! { ! if (PQgetisnull(res, 0, 3)) ! *db_name = NULL; ! else ! *db_name = pg_strdup(PQgetvalue(res, 0, 3)); ! } } PQclear(res); signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [BUGS] BUG #13473: VACUUM FREEZE mistakenly cancel standby sessions
Il 26/06/15 15:43, marco.nenciar...@2ndquadrant.it ha scritto: > The following bug has been logged on the website: > > Bug reference: 13473 > Logged by: Marco Nenciarini > Email address: marco.nenciar...@2ndquadrant.it > PostgreSQL version: 9.4.4 > Operating system: all > Description: > > = Symptoms > > Let's have a simple master -> standby setup, with hot_standby_feedback > activated, > if a backend on standby is holding the cluster xmin and the master runs a > VACUUM FREEZE > on the same database of the standby's backend, it will generate a conflict > and the query > running on standby will be canceled. > > = How to reproduce it > > Run the following operation on an idle cluster. > > 1) connect to the standby and simulate a long running query: > >select pg_sleep(3600); > > 2) connect to the master and run the following script > >create table t(id int primary key); >insert into t select generate_series(1, 1); >vacuum freeze verbose t; >drop table t; > > 3) after 30 seconds the pg_sleep query on standby will be canceled. > > = Expected output > > The hot standby feedback should have prevented the query cancellation > > = Analysis > > Ive run postgres at DEBUG2 logging level, and I can confirm that the vacuum > correctly see the OldestXmin propagated by the standby through the hot > standby feedback. > The issue is in heap_xlog_freeze function, which calls > ResolveRecoveryConflictWithSnapshot as first thing, passing the cutoff_xid > value as first argument. > The cutoff_xid is the OldestXmin active when the vacuum, so it represents a > running xid. > The issue is that the function ResolveRecoveryConflictWithSnapshot expects > as first argument of is latestRemovedXid, which represent the higher xid > that has been actually removed, so there is an off-by-one error. > > I've been able to reproduce this issue for every version of postgres since > 9.0 (9.0, 9.1, 9.2, 9.3, 9.4 and current master) > > = Proposed solution > > In the heap_xlog_freeze we need to subtract one to the value of cutoff_xid > before passing it to ResolveRecoveryConflictWithSnapshot. > > > Attached a proposed patch that solves the issue. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index caacc10..28edb17 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -7571,9 +7571,12 @@ heap_xlog_freeze_page(XLogReaderState *record) if (InHotStandby) { RelFileNode rnode; + TransactionId latestRemovedXid = cutoff_xid; + + TransactionIdRetreat(latestRemovedXid); XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL); - ResolveRecoveryConflictWithSnapshot(cutoff_xid, rnode); + ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode); } if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) signature.asc Description: OpenPGP digital signature
Re: [HACKERS] File based Incremental backup v8
Il 05/03/15 05:42, Bruce Momjian ha scritto: > On Thu, Mar 5, 2015 at 01:25:13PM +0900, Fujii Masao wrote: >>>> Yeah, it might make the situation better than today. But I'm afraid that >>>> many users might get disappointed about that behavior of an incremental >>>> backup after the release... >>> >>> I don't get what do you mean here. Can you elaborate this point? >> >> The proposed version of LSN-based incremental backup has some limitations >> (e.g., every database files need to be read even when there is no >> modification >> in database since last backup, and which may make the backup time longer than >> users expect) which may disappoint users. So I'm afraid that users who can >> benefit from the feature might be very limited. IOW, I'm just sticking to >> the idea of timestamp-based one :) But I should drop it if the majority in >> the list prefers the LSN-based one even if it has such limitations. > > We need numbers on how effective each level of tracking will be. Until > then, the patch can't move forward. > I've written a little test script to estimate how much space can be saved by file level incremental, and I've run it on some real backups I have access to. The script takes two basebackup directory and simulate how much data can be saved in the 2nd backup using incremental backup (using file size/time and LSN) It assumes that every file in base, global and pg_tblspc which matches both size and modification time will also match from the LSN point of view. The result is that many databases can take advantage of incremental, even if not do big, and considering LSNs yield a result almost identical to the approach based on filesystem metadata. == Very big geographic database (similar to openstreetmap main DB), it contains versioned data, interval two months First backup size: 13286623850656 (12.1TiB) Second backup size: 13323511925626 (12.1TiB) Matching files count: 17094 Matching LSN count: 14580 Matching files size: 9129755116499 (8.3TiB, 68.5%) Matching LSN size: 9128568799332 (8.3TiB, 68.5%) == Big on-line store database, old data regularly moved to historic partitions, interval one day First backup size: 1355285058842 (1.2TiB) Second backup size: 1358389467239 (1.2TiB) Matching files count: 3937 Matching LSN count: 2821 Matching files size: 762292960220 (709.9GiB, 56.1%) Matching LSN size: 762122543668 (709.8GiB, 56.1%) == Ticketing system database, interval one day First backup size: 144988275 (138.3MiB) Second backup size: 146135155 (139.4MiB) Matching files count: 3124 Matching LSN count: 2641 Matching files size: 76908986 (73.3MiB, 52.6%) Matching LSN size: 67747928 (64.6MiB, 46.4%) == Online store, interval one day First backup size: 20418561133 (19.0GiB) Second backup size: 20475290733 (19.1GiB) Matching files count: 5744 Matching LSN count: 4302 Matching files size: 4432709876 (4.1GiB, 21.6%) Matching LSN size: 4388993884 (4.1GiB, 21.4%) == Heavily updated database, interval one week First backup size: 3203198962 (3.0GiB) Second backup size: 3222409202 (3.0GiB) Matching files count: 1801 Matching LSN count: 1273 Matching files size: 91206317 (87.0MiB, 2.8%) Matching LSN size: 69083532 (65.9MiB, 2.1%) Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it #!/usr/bin/env python from __future__ import print_function import collections from optparse import OptionParser import os import sys __author__ = 'Marco Nenciarini ' usage = """usage: %prog [options] backup_1 backup_2 """ parser = OptionParser(usage=usage) (options, args) = parser.parse_args() # need 2 directories if len(args) != 2: parser.print_help() sys.exit(1) FileItem = collections.namedtuple('FileItem', 'size time path') def get_files(target_dir): """Return a set of FileItem""" files = set() for dir_path, _, file_names in os.walk(target_dir, followlinks=True): for filename in file_names: path = os.path.join(dir_path, filename) rel_path = path[len(target_dir) + 1:] stats = os.stat(path) files.add(FileItem(stats.st_size, stats.st_mtime, rel_path)) return files def size_fmt(num, suffix='B'): """Format a size""" for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: if abs(num) < 1024.0: return "%3.1f%s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f%s%s" % (num, 'Yi', suffix) def percent_fmt(a, b): """Format a percent""" return "%.1f
Re: [HACKERS] File based Incremental backup v8
Hi Fujii, Il 03/03/15 11:48, Fujii Masao ha scritto: > On Tue, Mar 3, 2015 at 12:36 AM, Marco Nenciarini > wrote: >> Il 02/03/15 14:21, Fujii Masao ha scritto: >>> On Thu, Feb 12, 2015 at 10:50 PM, Marco Nenciarini >>> wrote: >>>> Hi, >>>> >>>> I've attached an updated version of the patch. >>> >>> basebackup.c:1565: warning: format '%lld' expects type 'long long >>> int', but argument 8 has type '__off_t' >>> basebackup.c:1565: warning: format '%lld' expects type 'long long >>> int', but argument 8 has type '__off_t' >>> pg_basebackup.c:865: warning: ISO C90 forbids mixed declarations and code >>> >> >> I'll add the an explicit cast at that two lines. >> >>> When I applied three patches and compiled the code, I got the above >>> warnings. >>> >>> How can we get the full backup that we can use for the archive recovery, >>> from >>> the first full backup and subsequent incremental backups? What commands >>> should >>> we use for that, for example? It's better to document that. >>> >> >> I've sent a python PoC that supports the plain format only (not the tar one). >> I'm currently rewriting it in C (with also the tar support) and I'll send a >> new patch containing it ASAP. > > Yeah, if special tool is required for that purpose, the patch should include > it. > I'm working on it. The interface will be exactly the same of the PoC script I've attached to 54c7cdad.6060...@2ndquadrant.it >>> What does "1" of the heading line in backup_profile mean? >>> >> >> Nothing. It's a version number. If you think it's misleading I will remove >> it. > > A version number of file format of backup profile? If it's required for > the validation of backup profile file as a safe-guard, it should be included > in the profile file. For example, it might be useful to check whether > pg_basebackup executable is compatible with the "source" backup that > you specify. But more info might be needed for such validation. > The current implementation bail out with an error if the header line is different from what it expect. It also reports and error if the 2nd line is not the start WAL location. That's all that pg_basebackup needs to start a new incremental backup. All the other information are useful to reconstruct a full backup in case of an incremental backup, or maybe to check the completeness of an archived full backup. Initially the profile was present only in incremental backups, but after some discussion on list we agreed to always write it. >>> Sorry if this has been already discussed so far. Why is a backup profile >>> file >>> necessary? Maybe it's necessary in the future, but currently seems not. >> >> It's necessary because it's the only way to detect deleted files. > > Maybe I'm missing something. Seems we can detect that even without a profile. > For example, please imagine the case where the file has been deleted since > the last full backup and then the incremental backup is taken. In this case, > that deleted file exists only in the full backup. We can detect the deletion > of > the file by checking both full and incremental backups. > When you take an incremental backup, only changed files are sent. Without the backup_profile in the incremental backup, you cannot detect a deleted file, because it's indistinguishable from a file that is not changed. >>> We've really gotten the consensus about the current design, especially that >>> every files basically need to be read to check whether they have been >>> modified >>> since last backup even when *no* modification happens since last backup? >> >> The real problem here is that there is currently no way to detect that a >> file is not changed since the last backup. We agreed to not use file system >> timestamps as they are not reliable for that purpose. > > TBH I prefer timestamp-based approach in the first version of incremental > backup > even if's less reliable than LSN-based one. I think that some users who are > using timestamp-based rsync (i.e., default mode) for the backup would be > satisfied with timestamp-based one. The original design was to compare size+timestamp+checksums (only if everything else matches and the file has been modified after the start of the backup), but the feedback from the list was that we cannot trust the filesystem mtime and we must use LSN instead. > >> Using
Re: [HACKERS] File based Incremental backup v8
Il 02/03/15 14:21, Fujii Masao ha scritto: > On Thu, Feb 12, 2015 at 10:50 PM, Marco Nenciarini > wrote: >> Hi, >> >> I've attached an updated version of the patch. > > basebackup.c:1565: warning: format '%lld' expects type 'long long > int', but argument 8 has type '__off_t' > basebackup.c:1565: warning: format '%lld' expects type 'long long > int', but argument 8 has type '__off_t' > pg_basebackup.c:865: warning: ISO C90 forbids mixed declarations and code > I'll add the an explicit cast at that two lines. > When I applied three patches and compiled the code, I got the above warnings. > > How can we get the full backup that we can use for the archive recovery, from > the first full backup and subsequent incremental backups? What commands should > we use for that, for example? It's better to document that. > I've sent a python PoC that supports the plain format only (not the tar one). I'm currently rewriting it in C (with also the tar support) and I'll send a new patch containing it ASAP. > What does "1" of the heading line in backup_profile mean? > Nothing. It's a version number. If you think it's misleading I will remove it. > Sorry if this has been already discussed so far. Why is a backup profile file > necessary? Maybe it's necessary in the future, but currently seems not. It's necessary because it's the only way to detect deleted files. > Several infos like LSN, modification time, size, etc are tracked in a backup > profile file for every backup files, but they are not used for now. If it's > now > not required, I'm inclined to remove it to simplify the code. I've put LSN there mainly for debugging purpose, but it can also be used to check the file during pg_restorebackup execution. The sent field is probably redundant (if sent = False and LSN is not set, we should probably simply avoid to write a line about that file) and I'll remove it in the next patch. > > We've really gotten the consensus about the current design, especially that > every files basically need to be read to check whether they have been modified > since last backup even when *no* modification happens since last backup? The real problem here is that there is currently no way to detect that a file is not changed since the last backup. We agreed to not use file system timestamps as they are not reliable for that purpose. Using LSN have a significant advantage over using checksum, as we can start the full copy as soon as we found a block whith a LSN greater than the threshold. There are two cases: 1) the file is changed, so we can assume that we detect it after reading 50% of the file, then we send it taking advantage of file system cache; 2) the file is not changed, so we read it without sending anything. It will end up producing an I/O comparable to a normal backup. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] pg_check_dir comments and implementation mismatch
Il 02/02/15 21:48, Robert Haas ha scritto: > On Sat, Jan 31, 2015 at 8:28 AM, Marco Nenciarini > wrote: >> Il 30/01/15 03:54, Michael Paquier ha scritto: >>> On Fri, Jan 30, 2015 at 2:49 AM, Tom Lane wrote: >>>> There is at least one other bug in that function now that I look at it: >>>> in event of a readdir() failure, it neglects to execute closedir(). >>>> Perhaps not too significant since all existing callers will just exit() >>>> anyway after a failure, but still ... >>> I would imagine that code scanners like coverity or similar would not >>> be happy about that. ISTM that it is better to closedir() >>> appropriately in all the code paths. >>> >> >> I've attached a new version of the patch fixing the missing closedir on >> readdir error. > > If readir() fails and closedir() succeeds, the return will be -1 but > errno will be 0. > The attached patch should fix it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it diff --git a/src/port/pgcheckdir.c b/src/port/pgcheckdir.c index 7061893..7102f2c 100644 *** a/src/port/pgcheckdir.c --- b/src/port/pgcheckdir.c *** *** 22,28 * Returns: *0 if nonexistent *1 if exists and empty ! *2 if exists and not empty *-1 if trouble accessing directory (errno reflects the error) */ int --- 22,30 * Returns: *0 if nonexistent *1 if exists and empty ! *2 if exists and contains _only_ dot files ! *3 if exists and contains a mount point ! *4 if exists and not empty *-1 if trouble accessing directory (errno reflects the error) */ int *** pg_check_dir(const char *dir) *** 32,37 --- 34,41 DIR*chkdir; struct dirent *file; booldot_found = false; + boolmount_found = false; + int readdir_errno; chkdir = opendir(dir); if (chkdir == NULL) *** pg_check_dir(const char *dir) *** 51,60 { dot_found = true; } else if (strcmp("lost+found", file->d_name) == 0) { ! result = 3; /* not empty, mount point */ ! break; } #endif else --- 55,64 { dot_found = true; } + /* lost+found directory */ else if (strcmp("lost+found", file->d_name) == 0) { ! mount_found = true; } #endif else *** pg_check_dir(const char *dir) *** 64,72 } } ! if (errno || closedir(chkdir)) result = -1;/* some kind of I/O error? */ /* We report on dot-files if we _only_ find dot files */ if (result == 1 && dot_found) result = 2; --- 68,87 } } ! if (errno) result = -1;/* some kind of I/O error? */ + /* Close chkdir and avoid overwriting the readdir errno on success */ + readdir_errno = errno; + if (closedir(chkdir)) + result = -1;/* error executing closedir */ + else + errno = readdir_errno; + + /* We report on mount point if we find a lost+found directory */ + if (result == 1 && mount_found) + result = 3; + /* We report on dot-files if we _only_ find dot files */ if (result == 1 && dot_found) result = 2; signature.asc Description: OpenPGP digital signature
Re: [HACKERS] File based Incremental backup v8
Hi, I've attached an updated version of the patch. This fixes the issue on checksum calculation for segments after the first one. To solve it I've added an optional uint32 *segno argument to parse_filename_for_nontemp_relation, so I can know the segment number and calculate the block number correctly. Il 29/01/15 18:57, Robert Haas ha scritto: > On Thu, Jan 29, 2015 at 9:47 AM, Marco Nenciarini > wrote: >> The current implementation of copydir function is incompatible with LSN >> based incremental backups. The problem is that new files are created, >> but their blocks are still with the old LSN, so they will not be backed >> up because they are looking old enough. > > I think this is trying to pollute what's supposed to be a pure > fs-level operation ("copy a directory") into something that is aware > of specific details like the PostgreSQL page format. I really think > that nothing in storage/file should know about the page format. If we > need a function that copies a file while replacing the LSNs, I think > it should be a new function living somewhere else. > I've named it copydir_set_lsn and placed it as static function in dbcommands.c. This lefts the copydir and copy_file functions in copydir.c untouched. The copydir function in copydir.c is now unused, while the copy_file function is still used during unlogged tables reinit. > A bigger problem is that you are proposing to stamp those files with > LSNs that are, for lack of a better word, fake. I would expect that > this would completely break if checksums are enabled. Also, unlogged > relations typically have an LSN of 0; this would change that in some > cases, and I don't know whether that's OK. > I've investigate a bit and I have not been able to find any problem here. > The issues here are similar to those in > http://www.postgresql.org/message-id/20150120152819.gc24...@alap3.anarazel.de > - basically, I think we need to make CREATE DATABASE and ALTER > DATABASE .. SET TABLESPACE fully WAL-logged operations, or this is > never going to work right. If we're not going to allow that, we need > to disallow hot backups while those operations are in progress. > As already said the copydir-LSN patch should be treated as a "temporary" until a proper WAL logging of CREATE DATABASE and ALTER DATABASE SET TABLESPACE will be implemented. At that time we could probably get rid of the whole copydir.[ch] file moving the copy_file function inside reinit.c Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index afd9255..02b5fee 100644 *** a/src/backend/storage/file/reinit.c --- b/src/backend/storage/file/reinit.c *** static void ResetUnloggedRelationsInTabl *** 28,35 int op); static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op); - static bool parse_filename_for_nontemp_relation(const char *name, - int *oidchars, ForkNumber *fork); typedef struct { --- 28,33 *** ResetUnloggedRelationsInDbspaceDir(const *** 388,446 fsync_fname((char *) dbspacedirname, true); } } - - /* - * Basic parsing of putative relation filenames. - * - * This function returns true if the file appears to be in the correct format - * for a non-temporary relation and false otherwise. - * - * NB: If this function returns true, the caller is entitled to assume that - * *oidchars has been set to the a value no more than OIDCHARS, and thus - * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID - * portion of the filename. This is critical to protect against a possible - * buffer overrun. - */ - static bool - parse_filename_for_nontemp_relation(const char *name, int *oidchars, - ForkNumber *fork) - { - int pos; - - /* Look for a non-empty string of digits (that isn't too long). */ - for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) - ; - if (pos == 0 || pos > OIDCHARS) - return false; - *oidchars = pos; - - /* Check for a fork name. */ - if (name[pos] != '_') - *fork = MAIN_FORKNUM; - else - { - int forkchar; - - forkchar = forkname_chars(&name[pos + 1], fork); - if (forkchar <= 0) - return false; -
Re: [HACKERS] New CF app deployment
Il 08/02/15 17:04, Magnus Hagander ha scritto: > > Filenames are now shown for attachments, including a direct link to the > attachment itself. I've also run a job to populate all old threads. > I wonder what is the algorithm to detect when an attachment is a patch. If you look at https://commitfest.postgresql.org/4/94/ all the attachments are marked as "Patch: no", but many of them are clearly a patch. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] File based Incremental backup v9
Il 02/02/15 22:28, Magnus Hagander ha scritto: > On Mon, Feb 2, 2015 at 10:06 PM, Robert Haas <mailto:robertmh...@gmail.com>> wrote: > > On Sat, Jan 31, 2015 at 6:47 PM, Marco Nenciarini > <mailto:marco.nenciar...@2ndquadrant.it>> wrote: > > Il 31/01/15 17:22, Erik Rijkers ha scritto: > >> On Sat, January 31, 2015 15:14, Marco Nenciarini wrote: > >> > >>> 0001-public-parse_filename_for_nontemp_relation.patch > >>> 0002-copydir-LSN-v2.patch > >>> 0003-File-based-incremental-backup-v8.patch > >> > >> Hi, > >> > >> It looks like it only compiles with assert enabled. > >> > > > > It is due to a typo (assert instead of Assert). You can find the updated > > patch attached to this message. > > I would sure like it if you would avoid changing the subject line > every time you post a new version of this patch. It breaks the > threading for me. > > > +1 - it does break gmail. Ok, sorry for that. > > > > It seems to have also broken it for the CommitFest app, which thinks > v3 is the last version. I was not able to attach the new version. > > > The CF app has detected that it's the same thread, because of the > headers (gmail is the buggy one here - the headers of the email are > perfectly correct). > > It does not, however, pick up and show the change of subject there (but > you can see if if you click the link for the latest version into the > archives - the link under "latest" or "latest attachment" both go to the > v9 patch). > > > > When I clicked on "attach thread" without having logged in, it took me > to a bad URL. When I clicked on it after having logged in, it > > > Clearly a bug. > > > > purported to work, but AFAICS, it didn't actually do anything. > > > That's because the thread is already there, and you're adding it again. > Of course, it wouldn't hurt if it actually told you that :) > I'm also confused from the "(Patch: No)" part at the end of every line if you expand the last attachment line. Every message shown here contains one or more patch attached. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] File based Incremental backup v9
Il 31/01/15 17:22, Erik Rijkers ha scritto: > On Sat, January 31, 2015 15:14, Marco Nenciarini wrote: > >> 0001-public-parse_filename_for_nontemp_relation.patch >> 0002-copydir-LSN-v2.patch >> 0003-File-based-incremental-backup-v8.patch > > Hi, > > It looks like it only compiles with assert enabled. > It is due to a typo (assert instead of Assert). You can find the updated patch attached to this message. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 3e451077283de8e99c4eceb748d49c34329c6ef8 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Thu, 29 Jan 2015 12:18:47 +0100 Subject: [PATCH 1/3] public parse_filename_for_nontemp_relation --- src/backend/storage/file/reinit.c | 58 --- src/common/relpath.c | 56 + src/include/common/relpath.h | 2 ++ 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index afd9255..02b5fee 100644 *** a/src/backend/storage/file/reinit.c --- b/src/backend/storage/file/reinit.c *** static void ResetUnloggedRelationsInTabl *** 28,35 int op); static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op); - static bool parse_filename_for_nontemp_relation(const char *name, - int *oidchars, ForkNumber *fork); typedef struct { --- 28,33 *** ResetUnloggedRelationsInDbspaceDir(const *** 388,446 fsync_fname((char *) dbspacedirname, true); } } - - /* - * Basic parsing of putative relation filenames. - * - * This function returns true if the file appears to be in the correct format - * for a non-temporary relation and false otherwise. - * - * NB: If this function returns true, the caller is entitled to assume that - * *oidchars has been set to the a value no more than OIDCHARS, and thus - * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID - * portion of the filename. This is critical to protect against a possible - * buffer overrun. - */ - static bool - parse_filename_for_nontemp_relation(const char *name, int *oidchars, - ForkNumber *fork) - { - int pos; - - /* Look for a non-empty string of digits (that isn't too long). */ - for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) - ; - if (pos == 0 || pos > OIDCHARS) - return false; - *oidchars = pos; - - /* Check for a fork name. */ - if (name[pos] != '_') - *fork = MAIN_FORKNUM; - else - { - int forkchar; - - forkchar = forkname_chars(&name[pos + 1], fork); - if (forkchar <= 0) - return false; - pos += forkchar + 1; - } - - /* Check for a segment number. */ - if (name[pos] == '.') - { - int segchar; - - for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) - ; - if (segchar <= 1) - return false; - pos += segchar; - } - - /* Now we should be at the end. */ - if (name[pos] != '\0') - return false; - return true; - } --- 386,388 diff --git a/src/common/relpath.c b/src/common/relpath.c index 66dfef1..83a1e3a 100644 *** a/src/common/relpath.c --- b/src/common/relpath.c *** GetRelationPath(Oid dbNode, Oid spcNode, *** 206,208 --- 206,264 } return path; } + + /* + * Basic parsing of putative relation filenames. + * + * This function returns true if the file appears to be in the correct format + * for a non-temporary relation and false otherwise. + * + * NB: If this function returns true, the caller is entitled to assume that + * *oidchars has been set to the a value no more than OIDCHARS, and thus + * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID + * portion of the filename. This is critical to protect against a possible + * buffer overrun. + */ + bool + parse_filename_for_nontemp_relation(const char *name, int *oidchars, + ForkNumber *fork) + { + int pos; + + /* Look for a non-empty string of digits (that isn't too long). */ + for (pos = 0; isdigit
Re: [HACKERS] File based Incremental backup v8
Il 29/01/15 18:57, Robert Haas ha scritto: > On Thu, Jan 29, 2015 at 9:47 AM, Marco Nenciarini > wrote: >> The current implementation of copydir function is incompatible with LSN >> based incremental backups. The problem is that new files are created, >> but their blocks are still with the old LSN, so they will not be backed >> up because they are looking old enough. > > I think this is trying to pollute what's supposed to be a pure > fs-level operation ("copy a directory") into something that is aware > of specific details like the PostgreSQL page format. I really think > that nothing in storage/file should know about the page format. If we > need a function that copies a file while replacing the LSNs, I think > it should be a new function living somewhere else. Given that the copydir function is used only during CREATE DATABASE and ALTER DATABASE SET TABLESPACE, we could move it/renaming it to a better place that clearly mark it as "knowing about page format". I'm open to suggestions on where to place it an on what should be the correct name. However the whole copydir patch here should be treated as a "temporary" thing. It is necessary until a proper WAL logging of CREATE DATABASE and ALTER DATABASE SET TABLESPACE will be implemented to support any form of LSN based incremental backup. > > A bigger problem is that you are proposing to stamp those files with > LSNs that are, for lack of a better word, fake. I would expect that > this would completely break if checksums are enabled. I'm sorry I completely ignored checksums in previous patch. The attached one works with checksums enabled. > Also, unlogged relations typically have an LSN of 0; this would > change that in some cases, and I don't know whether that's OK. > It shouldn't be a problem because all the code that uses unlogged relations normally skip all the WAL related operations. From the point of view of an incremental backup it is also not a problem, because restoring the backup the unlogged tables will get reinitialized because of crash recovery procedure. However if you think it is worth the effort, I can rewrite the copydir as a two pass operation detecting the unlogged tables on the first pass and avoiding the LSN update on unlogged tables. I personally think that it doesn't wort the effort unless someone identify a real path where settins LSNs in unlogged relations leads to an issue. > The issues here are similar to those in > http://www.postgresql.org/message-id/20150120152819.gc24...@alap3.anarazel.de > - basically, I think we need to make CREATE DATABASE and ALTER > DATABASE .. SET TABLESPACE fully WAL-logged operations, or this is > never going to work right. If we're not going to allow that, we need > to disallow hot backups while those operations are in progress. > This is right, but the problem Andres reported is orthogonal with the one I'm addressing here. Without this copydir patch (or without a proper WAL logging of copydir operations), you cannot take an incremental backup after a CREATE DATABASE or ALTER DATABASE SET TABLESPACE until you get a full backup and use it as base. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 3e451077283de8e99c4eceb748d49c34329c6ef8 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Thu, 29 Jan 2015 12:18:47 +0100 Subject: [PATCH 1/3] public parse_filename_for_nontemp_relation --- src/backend/storage/file/reinit.c | 58 --- src/common/relpath.c | 56 + src/include/common/relpath.h | 2 ++ 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index afd9255..02b5fee 100644 *** a/src/backend/storage/file/reinit.c --- b/src/backend/storage/file/reinit.c *** static void ResetUnloggedRelationsInTabl *** 28,35 int op); static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op); - static bool parse_filename_for_nontemp_relation(const char *name, - int *oidchars, ForkNumber *fork); typedef struct { --- 28,33 *** ResetUnloggedRelationsInDbspaceDir(const *** 388,446 fsync_fname((char *) dbspacedirname, true); } } - - /* - * Basic parsing of putative relation filenames. - * - * This function returns true if the file appears to be in the correct format - * for a non-temporary relation and false otherwise. - * - * NB: If this function re
Re: [HACKERS] pg_check_dir comments and implementation mismatch
Il 30/01/15 03:54, Michael Paquier ha scritto: > On Fri, Jan 30, 2015 at 2:49 AM, Tom Lane wrote: >> There is at least one other bug in that function now that I look at it: >> in event of a readdir() failure, it neglects to execute closedir(). >> Perhaps not too significant since all existing callers will just exit() >> anyway after a failure, but still ... > I would imagine that code scanners like coverity or similar would not > be happy about that. ISTM that it is better to closedir() > appropriately in all the code paths. > I've attached a new version of the patch fixing the missing closedir on readdir error. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From d2bfb6878aed404fdba1447d3f813edb4442ff47 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Sat, 31 Jan 2015 14:06:54 +0100 Subject: [PATCH] Improve pg_check_dir comments and code Update the pg_check_dir comment to explain all the possible return values (0 to 4). Fix the beaviour in presence of lost+found directory. The previous version was returning 3 (mount point) even if the readdir returns something after the lost+found directory. In this case the output should be 4 (not empty). Ensure that closedir are always called even if readdir returns an error. --- src/port/pgcheckdir.c | 18 ++ 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/port/pgcheckdir.c b/src/port/pgcheckdir.c index 7061893..02a048c 100644 *** a/src/port/pgcheckdir.c --- b/src/port/pgcheckdir.c *** *** 22,28 * Returns: *0 if nonexistent *1 if exists and empty ! *2 if exists and not empty *-1 if trouble accessing directory (errno reflects the error) */ int --- 22,30 * Returns: *0 if nonexistent *1 if exists and empty ! *2 if exists and contains _only_ dot files ! *3 if exists and contains a mount point ! *4 if exists and not empty *-1 if trouble accessing directory (errno reflects the error) */ int *** pg_check_dir(const char *dir) *** 32,37 --- 34,40 DIR*chkdir; struct dirent *file; booldot_found = false; + boolmount_found = false; chkdir = opendir(dir); if (chkdir == NULL) *** pg_check_dir(const char *dir) *** 51,60 { dot_found = true; } else if (strcmp("lost+found", file->d_name) == 0) { ! result = 3; /* not empty, mount point */ ! break; } #endif else --- 54,63 { dot_found = true; } + /* lost+found directory */ else if (strcmp("lost+found", file->d_name) == 0) { ! mount_found = true; } #endif else *** pg_check_dir(const char *dir) *** 64,72 } } ! if (errno || closedir(chkdir)) result = -1;/* some kind of I/O error? */ /* We report on dot-files if we _only_ find dot files */ if (result == 1 && dot_found) result = 2; --- 67,82 } } ! if (errno) result = -1;/* some kind of I/O error? */ + if (closedir(chkdir)) + result = -1;/* error executing closedir */ + + /* We report on mount point if we find a lost+found directory */ + if (result == 1 && mount_found) + result = 3; + /* We report on dot-files if we _only_ find dot files */ if (result == 1 && dot_found) result = 2; -- 2.2.2 signature.asc Description: OpenPGP digital signature
Re: [HACKERS] pg_check_dir comments and implementation mismatch
Il 29/01/15 18:37, Robert Haas ha scritto: > On Thu, Jan 29, 2015 at 11:00 AM, Marco Nenciarini > wrote: >> reading pgcheckdir.c code I noticed that the function comment >> was outdated. The code now can return values from 0 to 4 while the >> comment explains only values 0,1,2. > > This is not just a comment fix; you are clearly changing the behavior > of the function in some way. > The previous version was returning 3 (mount point) even if the dir contains something after the lost+found directory. I think this case deserves a 4 output. For example: lost+found zzz.txt give the result 3. If the directory contains aaa.txt lost+found the result is instead 4. This doesn't make much difference, as 3 and 4 are both error condition for all the callers, but the current behavior looks odd to me, and surely is not what one can expect reading the comments. My version returns 3 only if the lost+found file is alone in the directory (eventually ignoring dot files along it, as it was doing before) Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] pg_check_dir comments and implementation mismatch
Hi, reading pgcheckdir.c code I noticed that the function comment was outdated. The code now can return values from 0 to 4 while the comment explains only values 0,1,2. Patch attached. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 44623f67a124c4c77f7ff8097f13e116d20d83a5 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Thu, 29 Jan 2015 16:45:27 +0100 Subject: [PATCH] Update pg_check_dir comment --- src/port/pgcheckdir.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/port/pgcheckdir.c b/src/port/pgcheckdir.c index 7061893..9165ebb 100644 *** a/src/port/pgcheckdir.c --- b/src/port/pgcheckdir.c *** *** 22,28 * Returns: *0 if nonexistent *1 if exists and empty ! *2 if exists and not empty *-1 if trouble accessing directory (errno reflects the error) */ int --- 22,30 * Returns: *0 if nonexistent *1 if exists and empty ! *2 if exists and contains _only_ dot files ! *3 if exists and contains a mount point ! *4 if exists and not empty *-1 if trouble accessing directory (errno reflects the error) */ int *** pg_check_dir(const char *dir) *** 32,37 --- 34,40 DIR*chkdir; struct dirent *file; booldot_found = false; + boolmount_found = false; chkdir = opendir(dir); if (chkdir == NULL) *** pg_check_dir(const char *dir) *** 51,60 { dot_found = true; } else if (strcmp("lost+found", file->d_name) == 0) { ! result = 3; /* not empty, mount point */ ! break; } #endif else --- 54,63 { dot_found = true; } + /* lost+found directory */ else if (strcmp("lost+found", file->d_name) == 0) { ! mount_found = true; } #endif else *** pg_check_dir(const char *dir) *** 67,72 --- 70,79 if (errno || closedir(chkdir)) result = -1;/* some kind of I/O error? */ + /* We report on mount point if we find a lost+found directory */ + if (result == 1 && mount_found) + result = 3; + /* We report on dot-files if we _only_ find dot files */ if (result == 1 && dot_found) result = 2; -- 2.2.2 signature.asc Description: OpenPGP digital signature
[HACKERS] File based Incremental backup v8
The current implementation of copydir function is incompatible with LSN based incremental backups. The problem is that new files are created, but their blocks are still with the old LSN, so they will not be backed up because they are looking old enough. copydir function is used in: CREATE DATABASE ALTER DATABASE SET TABLESPACE I can imagine two possible solutions: a) wal log the whole copydir operations, setting the lsn accordingly b) pass to copydir the LSN of the operation which triggered it, and update the LSN of all the copied blocks The latter solution is IMO easier to be implemented and does not deviate much from the current implementation. I've implemented it and it's attached to this message. I've also moved the parse_filename_for_notntemp_relation function out of reinit.c to make it available both to copydir.c and basebackup.c. I've also limited the LSN comparison to the only MAIN fork, because: * LSN fork doesn't uses LSN * VM fork update LSN only when the visibility bit is set * INIT forks doesn't use LSN. It's only one page anyway. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 087faed899b9afab324aff7fa20e715c4f99eb4a Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Thu, 29 Jan 2015 12:18:47 +0100 Subject: [PATCH 1/3] public parse_filename_for_nontemp_relation --- src/backend/storage/file/reinit.c | 58 --- src/common/relpath.c | 56 + src/include/common/relpath.h | 2 ++ 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index afd9255..02b5fee 100644 *** a/src/backend/storage/file/reinit.c --- b/src/backend/storage/file/reinit.c *** static void ResetUnloggedRelationsInTabl *** 28,35 int op); static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op); - static bool parse_filename_for_nontemp_relation(const char *name, - int *oidchars, ForkNumber *fork); typedef struct { --- 28,33 *** ResetUnloggedRelationsInDbspaceDir(const *** 388,446 fsync_fname((char *) dbspacedirname, true); } } - - /* - * Basic parsing of putative relation filenames. - * - * This function returns true if the file appears to be in the correct format - * for a non-temporary relation and false otherwise. - * - * NB: If this function returns true, the caller is entitled to assume that - * *oidchars has been set to the a value no more than OIDCHARS, and thus - * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID - * portion of the filename. This is critical to protect against a possible - * buffer overrun. - */ - static bool - parse_filename_for_nontemp_relation(const char *name, int *oidchars, - ForkNumber *fork) - { - int pos; - - /* Look for a non-empty string of digits (that isn't too long). */ - for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) - ; - if (pos == 0 || pos > OIDCHARS) - return false; - *oidchars = pos; - - /* Check for a fork name. */ - if (name[pos] != '_') - *fork = MAIN_FORKNUM; - else - { - int forkchar; - - forkchar = forkname_chars(&name[pos + 1], fork); - if (forkchar <= 0) - return false; - pos += forkchar + 1; - } - - /* Check for a segment number. */ - if (name[pos] == '.') - { - int segchar; - - for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) - ; - if (segchar <= 1) - return false; - pos += segchar; - } - - /* Now we should be at the end. */ - if (name[pos] != '\0') - return false; - return true; - } --- 386,388 diff --git a/src/common/relpath.c b/src/common/relpath.c index 66dfef1..83a1e3a 100644 *** a/src/common/relpath.c --- b/src/common/relpath.c *** GetRelationPath(Oid dbNode, Oid spcNode, *** 206,208 --- 206,264 } return path; } + + /* + * Basic parsing of putative relation filenames. + * + * This function returns true if the file appears to be in the correct format + * for a non-temporary relation and false otherwise.
[HACKERS] File based Incremental backup v7
Il 27/01/15 10:25, Giuseppe Broccolo ha scritto:> Hi Marco, > > On 16/01/15 16:55, Marco Nenciarini wrote: >> On 14/01/15 17:22, Gabriele Bartolini wrote: >> > >> > My opinion, Marco, is that for version 5 of this patch, you: >> > >> > 1) update the information on the wiki (it is outdated - I know you have >> > been busy with LSN map optimisation) >> >> Done. >> >> > 2) modify pg_basebackup in order to accept a directory (or tar file) and >> > automatically detect the LSN from the backup profile >> >> New version of patch attached. The -I parameter now requires a backup >> profile from a previous backup. I've added a sanity check that forbid >> incremental file level backups if the base timeline is different from >> the current one. >> >> > 3) add the documentation regarding the backup profile and pg_basebackup >> > >> >> Next on my TODO list. >> >> > Once we have all of this, we can continue trying the patch. Some >> > unexplored paths are: >> > >> > * tablespace usage >> >> I've improved my pg_restorebackup python PoC. It now supports tablespaces. > > About tablespaces, I noticed that any pointing to tablespace locations > is lost during the recovery of an incremental backup changing the > tablespace mapping (-T option). Here the steps I followed: > > * creating and filling a test database obtained through pgbench > > psql -c "CREATE DATABASE pgbench" > pgbench -U postgres -i -s 5 -F 80 pgbench > > * a first base backup with pg_basebackup: > > mkdir -p backups/$(date '+%d%m%y%H%M')/data && pg_basebackup -v -F p -D backups/$(date '+%d%m%y%H%M')/data -x > > * creation of a new tablespace, alter the table "pgbench_accounts" to > set the new tablespace: > > mkdir -p /home/gbroccolo/pgsql/tbls > psql -c "CREATE TABLESPACE tbls LOCATION '/home/gbroccolo/pgsql/tbls'" > psql -c "ALTER TABLE pgbench_accounts SET TABLESPACE tbls" pgbench > > * Doing some work on the database: > > pgbench -U postgres -T 120 pgbench > > * a second incremental backup with pg_basebackup specifying the new > location for the tablespace through the tablespace mapping: > > mkdir -p backups/$(date '+%d%m%y%H%M')/data backups/$(date '+%d%m%y%H%M')/tbls && pg_basebackup -v -F p -D backups/$(date '+%d%m%y%H%M')/data -x -I backups/2601151641/data/backup_profile -T /home/gbroccolo/pgsql/tbls=/home/gbroccolo/pgsql/backups/$(date '+%d%m%y%H%M')/tbls > > * a recovery based on the tool pg_restorebackup.py attached in > http://www.postgresql.org/message-id/54b9428e.9020...@2ndquadrant.it > > ./pg_restorebackup.py backups/2601151641/data backups/2601151707/data /tmp/data -T /home/gbroccolo/pgsql/backups/2601151707/tbls=/tmp/tbls > > In the last step, I obtained the following stack trace: > > Traceback (most recent call last): > File "./pg_restorebackup.py", line 74, in > shutil.copy2(base_file, dest_file) > File "/home/gbroccolo/.pyenv/versions/2.7.5/lib/python2.7/shutil.py", line 130, in copy2 > copyfile(src, dst) > File "/home/gbroccolo/.pyenv/versions/2.7.5/lib/python2.7/shutil.py", line 82, in copyfile > with open(src, 'rb') as fsrc: > IOError: [Errno 2] No such file or directory: 'backups/2601151641/data/base/16384/16406_fsm' > > > Any idea on what's going wrong? > I've done some test and it looks like that FSM nodes always have InvalidXLogRecPtr as LSN. Ive updated the patch to always include files if all their pages have LSN == InvalidXLogRecPtr Updated patch v7 attached. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From bffcdf0d5c3258c8848215011eb8e8b3377d9f18 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Tue, 14 Oct 2014 14:31:28 +0100 Subject: [PATCH] File-based incremental backup v7 Add backup profiles and --incremental to pg_basebackup --- doc/src/sgml/protocol.sgml | 86 - doc/src/sgml/ref/pg_basebackup.sgml| 31 ++- src/backend/access/transam/xlog.c | 18 +- src/backend/access/transam/xlogfuncs.c | 2 +- src/backend/replication/basebackup.c | 344 +++-- src/backend/replication/repl_gram.y| 6 + src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 191 -- src/include/access/xlog.h | 3 +- src/include/replication/basebackup.h | 5 + 10 files ch
Re: [HACKERS] File based incremental backup v6
Hi, here it is another version of the file based incremental backup patch. Changelog from the previous one: * pg_basebackup --incremental option take the directory containing the base backup instead of the backup profile file * rename the backup_profile file at the same time of backup_label file when starting the first time from a backup. * handle "pg_basebackup -D -" appending the backup profile to the resulting tar stream * added documentation for -I/--incremental option to pg_basebackup doc * updated replication protocol documentation The reationale of moving the backup_profile out of the way during recovery is to avoid using a data directory which has been already started as a base of a backup. I've also lightly improved the pg_restorebackup PoC implementing the syntax advised by Gabriele: ./pg_restorebackup.py DESTINATION BACKUP_1 BACKUP_2 [BACKUP_3, ...] It also supports relocation of tablespace with -T option. The -T option is mandatory if there was any tablespace defined in the PostgreSQL instance when the incremental_backup was taken. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 56fed6e250280f8e5d5c17252db631f33a3c9d8f Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Tue, 14 Oct 2014 14:31:28 +0100 Subject: [PATCH] File-based incremental backup v6 Add backup profile to pg_basebackup INCREMENTAL option implementaion --- doc/src/sgml/protocol.sgml | 86 - doc/src/sgml/ref/pg_basebackup.sgml| 31 ++- src/backend/access/transam/xlog.c | 18 +- src/backend/access/transam/xlogfuncs.c | 2 +- src/backend/replication/basebackup.c | 335 +++-- src/backend/replication/repl_gram.y| 6 + src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 191 +-- src/include/access/xlog.h | 3 +- src/include/replication/basebackup.h | 5 + 10 files changed, 639 insertions(+), 39 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index efe75ea..fc24648 100644 *** a/doc/src/sgml/protocol.sgml --- b/doc/src/sgml/protocol.sgml *** The commands accepted in walsender mode *** 1882,1888 ! BASE_BACKUP [LABEL 'label'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE rate] BASE_BACKUP --- 1882,1888 ! BASE_BACKUP [LABEL 'label'] [INCREMENTAL 'start_lsn'] [PROGRESS] [FAST] [WAL] [NOWAIT] [MAX_RATE rate] BASE_BACKUP *** The commands accepted in walsender mode *** 1905,1910 --- 1905,1928 + INCREMENTAL 'start_lsn' + + + Requests a file-level incremental backup of all files changed after + start_lsn. When operating with + INCREMENTAL, the content of every block-organised + file will be analyzed and the file will be sent if at least one + block has a LSN higher than or equal to the provided + start_lsn. + + + The backup_profile will contain information on + every file that has been analyzed, even those that have not been sent. + + + + + PROGRESS *** The commands accepted in walsender mode *** 2022,2028 ustar interchange format specified in the POSIX 1003.1-2008 standard) dump of the tablespace contents, except that the two trailing blocks of zeroes specified in the standard are omitted. ! After the tar data is complete, a final ordinary result set will be sent, containing the WAL end position of the backup, in the same format as the start position. --- 2040,2046 ustar interchange format specified in the POSIX 1003.1-2008 standard) dump of the tablespace contents, except that the two trailing blocks of zeroes specified in the standard are omitted. ! After the tar data is complete, an ordinary result set will be sent, containing the WAL end position of the backup, in the same format as the start position. *** The commands accepted in walsender mode *** 2073,2082 the server supports it. ! Once all tablespaces have been sent, a final regular result set will be sent. This result set contains the end position of the backup, given in XLogRecPtr format as a single column in a single row. --- 2091,2162 the server supports it. ! Once all tablespaces have been sent, another regular result set will be sent. This result set contains the end position of the backup, given
Re: [HACKERS] [RFC] Incremental backup v3: incremental PoC
On 14/01/15 17:22, Gabriele Bartolini wrote: > > My opinion, Marco, is that for version 5 of this patch, you: > > 1) update the information on the wiki (it is outdated - I know you have > been busy with LSN map optimisation) Done. > 2) modify pg_basebackup in order to accept a directory (or tar file) and > automatically detect the LSN from the backup profile New version of patch attached. The -I parameter now requires a backup profile from a previous backup. I've added a sanity check that forbid incremental file level backups if the base timeline is different from the current one. > 3) add the documentation regarding the backup profile and pg_basebackup > Next on my TODO list. > Once we have all of this, we can continue trying the patch. Some > unexplored paths are: > > * tablespace usage I've improved my pg_restorebackup python PoC. It now supports tablespaces. > * tar format > * performance impact (in both "read-only" and heavily updated contexts) From the server point of view, the current code generates a load similar to normal backup. It only adds an initial scan of any data file to decide whether it has to send it. One it found a single newer page it immediately stop scanning and start sending the file. The IO impact should not be that big due to the filesystem cache, but I agree with you that it has to be measured. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From f7cf8b9dd7d32f64a30dafaeeaeb56cbcd2eafff Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Tue, 14 Oct 2014 14:31:28 +0100 Subject: [PATCH] File-based incremental backup v5 Add backup profile to pg_basebackup INCREMENTAL option implementaion --- src/backend/access/transam/xlog.c | 7 +- src/backend/access/transam/xlogfuncs.c | 2 +- src/backend/replication/basebackup.c | 335 +++-- src/backend/replication/repl_gram.y| 6 + src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 147 +-- src/include/access/xlog.h | 3 +- src/include/replication/basebackup.h | 4 + 8 files changed, 473 insertions(+), 32 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 629a457..1e50625 100644 *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *** XLogFileNameP(TimeLineID tli, XLogSegNo *** 9249,9255 * permissions of the calling user! */ XLogRecPtr ! do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, char **labelfile) { boolexclusive = (labelfile == NULL); --- 9249,9256 * permissions of the calling user! */ XLogRecPtr ! do_pg_start_backup(const char *backupidstr, bool fast, ! XLogRecPtr incremental_startpoint, TimeLineID *starttli_p, char **labelfile) { boolexclusive = (labelfile == NULL); *** do_pg_start_backup(const char *backupids *** 9468,9473 --- 9469,9478 (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename); appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n", (uint32) (checkpointloc >> 32), (uint32) checkpointloc); + if (incremental_startpoint > 0) + appendStringInfo(&labelfbuf, "INCREMENTAL FROM LOCATION: %X/%X\n", +(uint32) (incremental_startpoint >> 32), +(uint32) incremental_startpoint); appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n", exclusive ? "pg_start_backup" : "streamed"); appendStringInfo(&labelfbuf, "BACKUP FROM: %s\n", diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 2179bf7..ace84d8 100644 *** a/src/backend/access/transam/xlogfuncs.c --- b/src/backend/access/transam/xlogfuncs.c *** pg_start_backup(PG_FUNCTION_ARGS) *** 59,65 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to run a backup"))); ! startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL); PG_RETURN_LSN(startpoint); } --- 59,65 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to run a backup"))); ! startpoint = do_pg_start_backu
Re: [HACKERS] [RFC] Incremental backup v3: incremental PoC
Il 13/01/15 12:53, Gabriele Bartolini ha scritto: > Hi Marco, > > could you please send an updated version the patch against the current > HEAD in order to facilitate reviewers? > Here is the updated patch for incremental file based backup. It is based on the current HEAD. I'm now working to the client tool to rebuild a full backup starting from a file based incremental backup. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 50ff0872d3901a30b6742900170052eabe0e06dd Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Tue, 14 Oct 2014 14:31:28 +0100 Subject: [PATCH] File-based incremental backup v4 Add backup profile to pg_basebackup INCREMENTAL option implementaion --- src/backend/access/transam/xlog.c | 7 +- src/backend/access/transam/xlogfuncs.c | 2 +- src/backend/replication/basebackup.c | 335 +++-- src/backend/replication/repl_gram.y| 6 + src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 83 ++-- src/include/access/xlog.h | 3 +- src/include/replication/basebackup.h | 4 + 8 files changed, 409 insertions(+), 32 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 839ea7c..625a5df 100644 *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *** XLogFileNameP(TimeLineID tli, XLogSegNo *** 9249,9255 * permissions of the calling user! */ XLogRecPtr ! do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, char **labelfile) { boolexclusive = (labelfile == NULL); --- 9249,9256 * permissions of the calling user! */ XLogRecPtr ! do_pg_start_backup(const char *backupidstr, bool fast, ! XLogRecPtr incremental_startpoint, TimeLineID *starttli_p, char **labelfile) { boolexclusive = (labelfile == NULL); *** do_pg_start_backup(const char *backupids *** 9468,9473 --- 9469,9478 (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename); appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n", (uint32) (checkpointloc >> 32), (uint32) checkpointloc); + if (incremental_startpoint > 0) + appendStringInfo(&labelfbuf, "INCREMENTAL FROM LOCATION: %X/%X\n", +(uint32) (incremental_startpoint >> 32), +(uint32) incremental_startpoint); appendStringInfo(&labelfbuf, "BACKUP METHOD: %s\n", exclusive ? "pg_start_backup" : "streamed"); appendStringInfo(&labelfbuf, "BACKUP FROM: %s\n", diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 2179bf7..ace84d8 100644 *** a/src/backend/access/transam/xlogfuncs.c --- b/src/backend/access/transam/xlogfuncs.c *** pg_start_backup(PG_FUNCTION_ARGS) *** 59,65 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to run a backup"))); ! startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL); PG_RETURN_LSN(startpoint); } --- 59,65 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to run a backup"))); ! startpoint = do_pg_start_backup(backupidstr, fast, 0, NULL, NULL); PG_RETURN_LSN(startpoint); } diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 07030a2..05b19c5 100644 *** a/src/backend/replication/basebackup.c --- b/src/backend/replication/basebackup.c *** *** 30,40 --- 30,42 #include "replication/basebackup.h" #include "replication/walsender.h" #include "replication/walsender_private.h" + #include "storage/bufpage.h" #include "storage/fd.h" #include "storage/ipc.h" #include "utils/builtins.h" #include "utils/elog.h" #include "utils/ps_status.h" + #include "utils/pg_lsn.h" #include "utils/timestamp.h" *** typedef struct *** 46,56 boolnowait; boolincludewal; uint32 maxrate; } basebackup_options; ! static int64 sendDir(char *pat
Re: [HACKERS] [RFC] LSN Map
Il 08/01/15 20:18, Jim Nasby ha scritto: > On 1/7/15, 3:50 AM, Marco Nenciarini wrote: >> The current implementation tracks only heap LSN. It currently does not >> track any kind of indexes, but this can be easily added later. > > Would it make sense to do this at a buffer level, instead of at the heap > level? That means it would handle both heap and indexes. > I don't know if LSN is visible that far down though. Where exactly you are thinking to handle it? > > Also, this pattern is repeated several times; it would be good to put it > in it's own function: > + lsnmap_pin(reln, blkno, &lmbuffer); > + lsnmap_set(reln, blkno, lmbuffer, lsn); > + ReleaseBuffer(lmbuffer); Right. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v3: incremental PoC
Il 06/01/15 14:26, Robert Haas ha scritto: > I suggest leaving this out altogether for the first version. I can > think of three possible ways that we can determine which blocks need > to be backed up. One, just read every block in the database and look > at the LSN of each one. Two, maintain a cache of LSN information on a > per-segment (or smaller) basis, as you suggest here. Three, scan the > WAL generated since the incremental backup and summarize it into a > list of blocks that need to be backed up. This last idea could either > be done when the backup is requested, or it could be done as the WAL > is generated and used to populate the LSN cache. In the long run, I > think some variant of approach #3 is likely best, but in the short > run, approach #1 (scan everything) is certainly easiest. While it > doesn't optimize I/O, it still gives you the benefit of reducing the > amount of data that needs to be transferred and stored, and that's not > nothing. If we get that much working, we can improve things more > later. > Hi, The patch now uses the approach #1, but I've just sent a patch that uses the #2 approach. 54ad016e.9020...@2ndquadrant.it Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] [RFC] LSN Map
Hi Hackers, In order to make incremental backup (https://wiki.postgresql.org/wiki/Incremental_backup) efficient we need a way to track the LSN of a page in a way that we can retrieve it without reading the actual block. Below there is my proposal on how to achieve it. LSN Map --- The purpose of the LSN map is to quickly know if a page of a relation has been modified after a specified checkpoint. Implementation -- We create an additional fork which contains a raw stream of LSNs. To limit the space used, every entry represent the maximum LSN of a group of blocks of a fixed size. I chose arbitrarily the size of 2048 which is equivalent to 16MB of heap data, which means that we need 64k entry to track one terabyte of heap. Name I've called this map LSN map, and I've named the corresponding fork file as "lm". WAL logging --- At the moment the map is not wal logged, but is updated during the wal reply. I'm not enough deep in WAL mechanics to see if the current approach is sane or if we should change it. Current limits -- The current implementation tracks only heap LSN. It currently does not track any kind of indexes, but this can be easily added later. The implementation of commands that rewrite the whole table can be improved: cluster uses shared memory buffers instead of writing the map directly on the disk, and moving a table to another tablespace simply drops the map instead of updating it correctly. Further ideas - The current implementation updates an entry in the map every time the block get its LSN bumped, but we really only need to know which is the first checkpoint that contains expired data. So setting the entry to the last checkpoint LSN is probably enough, and will reduce the number of writes. To implement this we only need a backend local copy of the last checkpoint LSN, which is updated during each XLogInsert. Again, I'm not enough deep in replication mechanics to see if this approach could work on a standby using restartpoints instead of checkpoints. Please advice on the best way to implement it. Conclusions This code is incomplete, and the xlog reply part must be improved/fixed, but I think its a good start to have this feature. I will appreciate any review, advice or critic. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 89a943032f0a10fd093c126d15fbf81e5861dbe3 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Mon, 3 Nov 2014 17:52:27 +0100 Subject: [PATCH] LSN Map This is a WIP. Only heap is supported. No indexes, no sequences. --- src/backend/access/heap/Makefile | 2 +- src/backend/access/heap/heapam.c | 239 ++-- src/backend/access/heap/hio.c | 11 +- src/backend/access/heap/lsnmap.c | 336 ++ src/backend/access/heap/pruneheap.c | 10 + src/backend/access/heap/rewriteheap.c | 37 +++- src/backend/catalog/storage.c | 8 + src/backend/commands/tablecmds.c | 5 +- src/backend/commands/vacuumlazy.c | 35 +++- src/backend/storage/smgr/smgr.c | 1 + src/common/relpath.c | 5 +- src/include/access/hio.h | 3 +- src/include/access/lsnmap.h | 28 +++ src/include/common/relpath.h | 5 +- src/include/storage/smgr.h| 1 + 15 files changed, 687 insertions(+), 39 deletions(-) create mode 100644 src/backend/access/heap/lsnmap.c create mode 100644 src/include/access/lsnmap.h diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile index b83d496..776ee7d 100644 *** a/src/backend/access/heap/Makefile --- b/src/backend/access/heap/Makefile *** subdir = src/backend/access/heap *** 12,17 top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o include $(top_srcdir)/src/backend/common.mk --- 12,17 top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global ! OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o lsnmap.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 21e9d06..9486562 100644 *** a/src/backend/access/heap/heapam.c --- b/src/backend/access/heap/heapam.c *** *** 48,53 --- 48,54 #include "access/tuptoaster.h" #include "access/valid.h" #include "access/visibilitymap.h" + #include "access/lsnmap.h" #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" *** heap_insert(Relation relation, HeapTuple *** 2067,2073 TransactionId xid = GetCur
Re: [HACKERS] [RFC] Incremental backup v3: incremental PoC
I've noticed that I missed to add this to the commitfest. I've just added it. It is not meant to end up in a committable state, but at this point I'm searching for some code review and more discusison. I'm also about to send an additional patch to implement an LSN map as an additional fork for heap files. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v3: incremental PoC
I've noticed that I missed to add this to the commitfest. I've just added it. It is not meant to end up in a committable state, but at this point I'm searching for some code review and more discusison. I'm also about to send an additional patch to implement an LSN map as an additional fork for heap files. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Too strict check when starting from a basebackup taken off a standby
Il 11/12/14 12:38, Andres Freund ha scritto: > On December 11, 2014 9:56:09 AM CET, Heikki Linnakangas > wrote: >> On 12/11/2014 05:45 AM, Andres Freund wrote: >> >> Yeah. I was not able to reproduce this, but I'm clearly missing >> something, since both you and Sergey have seen this happening. Can you >> write a script to reproduce? > > Not right now, I only have my mobile... Its quite easy though. Create a > pg-basebackup from a standby. Create a recovery.conf with a broken primary > conninfo. Start. Shutdown. Fix conninfo. Start. > Just tested it. There steps are not sufficient to reproduce the issue on a test installation. I suppose because, on small test datadir, the checkpoint location and the redo location on the pg_control are the same present in the backup_label. To trigger this bug you need to have at least a restartpoint happened on standby between the start and the end of the backup. you could simulate it issuing a checkpoint on master, a checkpoint on standby (to force a restartpoint), then copying the pg_control from the standby. This way I've been able to reproduce it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [Windows,PATCH] Use faster, higher precision timer API
Il 01/12/14 14:16, Craig Ringer ha scritto: > > +#ifndef FRONTEND > +#include > +#endif > + I think this is a leftover, as you don't use elog afterwards. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] [RFC] Incremental backup v3: incremental PoC
Hi Hackers, following the advices gathered on the list I've prepared a third partial patch on the way of implementing incremental pg_basebackup as described here https://wiki.postgresql.org/wiki/Incremental_backup == Changes Compared to the previous version I've made the following changes: * The backup_profile is not optional anymore. Generating it is cheap enough not to bother the user with such a choice. * I've isolated the code which detects the maxLSN of a segment in a separate getMaxLSN function. At the moment it works scanning the whole file, but I'm looking to replace it in the next versions. * I've made possible to request an incremental backup passing a "-I " option to pg_basebackup. It is probably too "raw" to remain as is, but it's is useful at this stage to test the code. * I've modified the backup label to report the fact that the backup was taken with the incremental option. The result will be something like: START WAL LOCATION: 0/5228 (file 00010052) CHECKPOINT LOCATION: 0/5260 INCREMENTAL FROM LOCATION: 0/5128 BACKUP METHOD: streamed BACKUP FROM: master START TIME: 2014-10-14 16:05:04 CEST LABEL: pg_basebackup base backup == Testing it At this stage you can make an incremental file-level backup using this procedure: pg_basebackup -v -F p -D /tmp/x -x LSN=$(awk '/^START WAL/{print $4}' /tmp/x/backup_profile) pg_basebackup -v -F p -D /tmp/y -I $LSN -x the result will be an incremental backup in /tmp/y based on the full backup on /tmp/x. You can "reintegrate" the incremental backup in the /tmp/z directory with the following little python script, calling it as ./recover.py /tmp/x /tmp/y /tmp/z #!/usr/bin/env python # recover.py import os import shutil import sys if len(sys.argv) != 4: print >> sys.stderr, "usage: %s base incremental destination" sys.exit(1) base=sys.argv[1] incr=sys.argv[2] dest=sys.argv[3] if os.path.exists(dest): print >> sys.stderr, "error: destination must not exist (%s)" % dest sys.exit(1) profile=open(os.path.join(incr, 'backup_profile'), 'r') for line in profile: if line.strip() == 'FILE LIST': break shutil.copytree(incr, dest) for line in profile: tblspc, lsn, sent, date, size, path = line.strip().split('\t') if sent == 't' or lsn=='\\N': continue base_file = os.path.join(base, path) dest_file = os.path.join(dest, path) shutil.copy2(base_file, dest_file) It has obviously to be replaced by a full-fledged user tool, but it is enough to test the concept. == What next I would to replace the getMaxLSN function with a more-or-less persistent structure which contains the maxLSN for each data segment. To make it work I would hook into the ForwardFsyncRequest() function in src/backend/postmaster/checkpointer.c and update an in memory hash every time a block is going to be fsynced. The structure could be persisted on disk at some time (probably on checkpoint). I think a good key for the hash would be a BufferTag with blocknum "rounded" to the start of the segment. I'm here asking for comments and advices on how to implement it in an acceptable way. == Disclaimer The code here is an intermediate step, it does not contain any documentation beside the code comments and will be subject to deep and radical changes. However I believe it can be a base to allow PostgreSQL to have its file-based incremental backup, and a block-based incremental backup after it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it From 5a7365fc3115c831627c087311c702a79cb355bc Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Tue, 14 Oct 2014 14:31:28 +0100 Subject: [PATCH] File-based incremental backup Add backup profile to pg_basebackup INCREMENTAL option naive implementaion --- src/backend/access/transam/xlog.c | 7 +- src/backend/access/transam/xlogfuncs.c | 2 +- src/backend/replication/basebackup.c | 316 +++-- src/backend/replication/repl_gram.y| 6 + src/backend/replication/repl_scanner.l | 1 + src/bin/pg_basebackup/pg_basebackup.c | 83 +++-- src/include/access/xlog.h | 3 +- src/include/replication/basebackup.h | 4 + 8 files changed, 391 insertions(+), 31 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 235b442..4dc79f0 100644 *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *** XLogFileNameP(TimeLineID tli, XLogSegNo *** 9718,9724 * permissions of the calling user! */ XLogRecPtr ! do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, char
Re: [HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Il 06/10/14 17:50, Robert Haas ha scritto: > On Mon, Oct 6, 2014 at 11:33 AM, Marco Nenciarini > wrote: >>> 2. Take a differential backup. In the backup label file, note the LSN >>> of the fullback to which the differential backup is relative, and the >>> newest LSN guaranteed to be present in the differential backup. The >>> actual backup can consist of a series of 20-byte buffer tags, those >>> being the exact set of blocks newer than the base-backup's >>> latest-guaranteed-to-be-present LSN. Each buffer tag is followed by >>> an 8kB block of data. If a relfilenode is truncated or removed, you >>> need some way to indicate that in the backup; e.g. include a buffertag >>> with forknum = -(forknum + 1) and blocknum = the new number of blocks, >>> or InvalidBlockNumber if removed entirely. >> >> To have a working backup you need to ship each block which is newer than >> latest-guaranteed-to-be-present in full backup and not newer than >> latest-guaranteed-to-be-present in the current backup. Also, as a >> further optimization, you can think about not sending the empty space in >> the middle of each page. > > Right. Or compressing the data. If we want to introduce compression on server side, I think that compressing the whole tar stream would be more effective. > >> My main concern here is about how postgres can remember that a >> relfilenode has been deleted, in order to send the appropriate "deletion >> tag". > > You also need to handle truncation. Yes, of course. The current backup profile contains the file size, and it can be used to truncate the file to the right size. >> IMHO the easiest way is to send the full list of files along the backup >> and let to the client the task to delete unneeded files. The backup >> profile has this purpose. >> >> Moreover, I do not like the idea of using only a stream of block as the >> actual differential backup, for the following reasons: >> >> * AFAIK, with the current infrastructure, you cannot do a backup with a >> block stream only. To have a valid backup you need many files for which >> the concept of LSN doesn't apply. >> >> * I don't like to have all the data from the various >> tablespace/db/whatever all mixed in the same stream. I'd prefer to have >> the blocks saved on a per file basis. > > OK, that makes sense. But you still only need the file list when > sending a differential backup, not when sending a full backup. So > maybe a differential backup looks like this: > > - Ship a table-of-contents file with a list relation files currently > present and the length of each in blocks. Having the size in bytes allow you to use the same format for non-block files. Am I missing any advantage of having the size in blocks over having the size in bytes? Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Il 06/10/14 17:55, Robert Haas ha scritto: > On Mon, Oct 6, 2014 at 11:51 AM, Marco Nenciarini > wrote: >> I agree that a full backup does not need to include a profile. >> >> I've added the option to require the profile even for a full backup, as >> it can be useful for backup softwares. We could remove the option and >> build the profile only during incremental backups, if required. However, >> I would avoid the needing to scan the whole backup to know the size of >> the recovered data directory, hence the backup profile. > > That doesn't seem to be buying you much. Calling stat() on every file > in a directory tree is a pretty cheap operation. > In case of incremental backup it is not true. You have to read the delta file to know the final size. You can optimize it putting this information in the first few bytes, but in case of compressed tar format you will need to scan the whole archive. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Il 06/10/14 16:51, Robert Haas ha scritto: > On Mon, Oct 6, 2014 at 8:59 AM, Marco Nenciarini > wrote: >> Il 04/10/14 08:35, Michael Paquier ha scritto: >>> On Sat, Oct 4, 2014 at 12:31 AM, Marco Nenciarini >>> wrote: >>>> Compared to first version, we switched from a timestamp+checksum based >>>> approach to one based on LSN. >>> Cool. >>> >>>> This patch adds an option to pg_basebackup and to replication protocol >>>> BASE_BACKUP command to generate a backup_profile file. It is almost >>>> useless by itself, but it is the foundation on which we will build the >>>> file based incremental backup (and hopefully a block based incremental >>>> backup after it). >>> Hm. I am not convinced by the backup profile file. What's wrong with >>> having a client send only an LSN position to get a set of files (or >>> partial files filed with blocks) newer than the position given, and >>> have the client do all the rebuild analysis? >>> >> >> The main problem I see is the following: how a client can detect a >> truncated or removed file? > > When you take a differential backup, the server needs to send some > piece of information about every file so that the client can compare > that list against what it already has. But a full backup does not > need to include similar information. > I agree that a full backup does not need to include a profile. I've added the option to require the profile even for a full backup, as it can be useful for backup softwares. We could remove the option and build the profile only during incremental backups, if required. However, I would avoid the needing to scan the whole backup to know the size of the recovered data directory, hence the backup profile. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Il 03/10/14 22:47, Robert Haas ha scritto: > On Fri, Oct 3, 2014 at 12:08 PM, Marco Nenciarini > wrote: >> Il 03/10/14 17:53, Heikki Linnakangas ha scritto: >>> If we're going to need a profile file - and I'm not convinced of that - >>> is there any reason to not always include it in the backup? >> >> The main reason is to have a centralized list of files that need to be >> present. Without a profile, you have to insert some sort of placeholder >> for kipped files. > > Why do you need to do that? And where do you need to do that? > > It seems to me that there are three interesting operations: > > 1. Take a full backup. Basically, we already have this. In the > backup label file, make sure to note the newest LSN guaranteed to be > present in the backup. Don't we already have it in "START WAL LOCATION"? > > 2. Take a differential backup. In the backup label file, note the LSN > of the fullback to which the differential backup is relative, and the > newest LSN guaranteed to be present in the differential backup. The > actual backup can consist of a series of 20-byte buffer tags, those > being the exact set of blocks newer than the base-backup's > latest-guaranteed-to-be-present LSN. Each buffer tag is followed by > an 8kB block of data. If a relfilenode is truncated or removed, you > need some way to indicate that in the backup; e.g. include a buffertag > with forknum = -(forknum + 1) and blocknum = the new number of blocks, > or InvalidBlockNumber if removed entirely. To have a working backup you need to ship each block which is newer than latest-guaranteed-to-be-present in full backup and not newer than latest-guaranteed-to-be-present in the current backup. Also, as a further optimization, you can think about not sending the empty space in the middle of each page. My main concern here is about how postgres can remember that a relfilenode has been deleted, in order to send the appropriate "deletion tag". IMHO the easiest way is to send the full list of files along the backup and let to the client the task to delete unneeded files. The backup profile has this purpose. Moreover, I do not like the idea of using only a stream of block as the actual differential backup, for the following reasons: * AFAIK, with the current infrastructure, you cannot do a backup with a block stream only. To have a valid backup you need many files for which the concept of LSN doesn't apply. * I don't like to have all the data from the various tablespace/db/whatever all mixed in the same stream. I'd prefer to have the blocks saved on a per file basis. > > 3. Apply a differential backup to a full backup to create an updated > full backup. This is just a matter of scanning the full backup and > the differential backup and applying the changes in the differential > backup to the full backup. > > You might want combinations of these, like something that does 2+3 as > a single operation, for efficiency, or a way to copy a full backup and > apply a differential backup to it as you go. But that's it, right? > What else do you need? > Nothing else. Once we agree on definition of involved files and protocols formats, only the actual coding remains. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Il 04/10/14 08:35, Michael Paquier ha scritto: > On Sat, Oct 4, 2014 at 12:31 AM, Marco Nenciarini > wrote: >> Compared to first version, we switched from a timestamp+checksum based >> approach to one based on LSN. > Cool. > >> This patch adds an option to pg_basebackup and to replication protocol >> BASE_BACKUP command to generate a backup_profile file. It is almost >> useless by itself, but it is the foundation on which we will build the >> file based incremental backup (and hopefully a block based incremental >> backup after it). > Hm. I am not convinced by the backup profile file. What's wrong with > having a client send only an LSN position to get a set of files (or > partial files filed with blocks) newer than the position given, and > have the client do all the rebuild analysis? > The main problem I see is the following: how a client can detect a truncated or removed file? Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Il 03/10/14 23:12, Andres Freund ha scritto: > On 2014-10-03 17:31:45 +0200, Marco Nenciarini wrote: >> I've updated the wiki page >> https://wiki.postgresql.org/wiki/Incremental_backup following the result >> of discussion on hackers. >> >> Compared to first version, we switched from a timestamp+checksum based >> approach to one based on LSN. >> >> This patch adds an option to pg_basebackup and to replication protocol >> BASE_BACKUP command to generate a backup_profile file. It is almost >> useless by itself, but it is the foundation on which we will build the >> file based incremental backup (and hopefully a block based incremental >> backup after it). >> >> Any comment will be appreciated. In particular I'd appreciate comments >> on correctness of relnode files detection and LSN extraction code. > > Can you describe the algorithm you implemented in words? > Here it is the relnode files detection algorithm: I've added a has_relfiles parameter to the sendDir function. If has_relfiles is true every file in the directory is tested against the validateRelfilenodeName function. If the response is true, the maxLSN value is computed for the file. The sendDir function is called with has_relfiles=true by sendTablespace function and by sendDir itself when is recurring into a subdirectory * if has_relfiles is true * if we are recurring into a "./global" or "./base" directory The validateRelfilenodeName has been taken from pg_computemaxlsn patch. It's short enough to be pasted here: static bool validateRelfilenodename(char *name) { int pos = 0; while ((name[pos] >= '0') && (name[pos] <= '9')) pos++; if (name[pos] == '_') { pos++; while ((name[pos] >= 'a') && (name[pos] <= 'z')) pos++; } if (name[pos] == '.') { pos++; while ((name[pos] >= '0') && (name[pos] <= '9')) pos++; } if (name[pos] == 0) return true; return false; } To compute the maxLSN for a file, as the file is sent in TAR_SEND_SIZE chunks (32kb) and it is always a multiple of the block size, I've added the following code inside the send cycle: + char *page; + + /* Scan every page to find the max file LSN */ + for (page = buf; page < buf + (off_t) cnt; page += (off_t) BLCKSZ) { + pagelsn = PageGetLSN(page); + if (filemaxlsn < pagelsn) + filemaxlsn = pagelsn; + } + Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Il 03/10/14 17:53, Heikki Linnakangas ha scritto: > If we're going to need a profile file - and I'm not convinced of that - > is there any reason to not always include it in the backup? > The main reason is to have a centralized list of files that need to be present. Without a profile, you have to insert some sort of placeholder for kipped files. Moreover, the profile allows you to quickly know the size of the recovered backup (by simply summing the individual size). Another use could be to 'validate' the presence of all required files in a backup. >> Any comment will be appreciated. In particular I'd appreciate comments >> on correctness of relnode files detection and LSN extraction code. > > I didn't look at it in detail, but one future problem comes to mind: > Once you implement the server-side code that only sends a file if its > LSN is higher than the cutoff point that the client gave, you'll have to > scan the whole file first, to see if there are any blocks with a higher > LSN. At least until you find the first such block. So with a file-level > implementation of this sort, you'll have to scan all files twice, in the > worst case. > It's true. To solve this you have to keep a central maxLSN directory, but I think it introduces more issues than it solves. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
[HACKERS] [RFC] Incremental backup v2: add backup profile to base backup
Hi Hackers, I've updated the wiki page https://wiki.postgresql.org/wiki/Incremental_backup following the result of discussion on hackers. Compared to first version, we switched from a timestamp+checksum based approach to one based on LSN. This patch adds an option to pg_basebackup and to replication protocol BASE_BACKUP command to generate a backup_profile file. It is almost useless by itself, but it is the foundation on which we will build the file based incremental backup (and hopefully a block based incremental backup after it). Any comment will be appreciated. In particular I'd appreciate comments on correctness of relnode files detection and LSN extraction code. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it backup_profile_v2.patch.gz Description: GNU Zip compressed data signature.asc Description: OpenPGP digital signature
[HACKERS] [PATCH] Incremental backup: add backup profile to base backup
Hi Hackers, This is the first piece of file level incremental backup support, as described on wiki page https://wiki.postgresql.org/wiki/Incremental_backup It is not yet complete, but I wish to share it on the list to receive comments and suggestions. The point of the patch is adding an option to pg_basebackup and replication protocol BASE_BACKUP command to generate a backup_profile file. When taking a full backup with pg_basebackup, the user can request Postgres to generate a backup_profile file through the --profile option (-B short option, which I've arbitrarily picked up because both -P and -p was already taken) At the moment the backup profile consists of a file with one line per file detailing modification time, md5, size, tablespace and path relative to tablespace root (PGDATA or the tablespace) To calculate the md5 checksum I've used the md5 code present in pgcrypto contrib as the code in src/include/libpq/md5.h is not suitable for large files. Since a core feature cannot depend on a piece of contrib, I've moved the files contrib/pgcrypto/md5.c contrib/pgcrypto/md5.h to src/backend/utils/hash/md5.c src/include/utils/md5.h changing the pgcrypto extension to use them. There are still some TODOs: * User documentation * Remove the pg_basebackup code duplication I've introduced with the ReceiveAndUnpackTarFileToDir function, which is almost the same of ReceiveAndUnpackTarFile but does not expect to handle a tablespace. It instead simply extract a tar stream in a destination directory. The latter could probably be rewritten using the former as component, but it needs some adjustment to the "progress reporting" part, which is not present at the moment in ReceiveAndUnpackTarFileToDir. * Add header section to backup_profile file which at the moment contains only the body part. I'm thinking to change the original design and put the whole backup label as header, which is IMHO clearer and well known. I would use something like: START WAL LOCATION: 0/E28 (file 0001000E) CHECKPOINT LOCATION: 0/E60 BACKUP METHOD: streamed BACKUP FROM: master START TIME: 2014-08-14 18:54:01 CEST LABEL: pg_basebackup base backup END LABEL I've attached the current patch based on master. Any comment will be appreciated. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it diff --git a/contrib/pgcrypto/Makefile b/contrib/pgcrypto/Makefile index b6c9484..daf2ba3 100644 *** a/contrib/pgcrypto/Makefile --- b/contrib/pgcrypto/Makefile *** *** 1,6 # contrib/pgcrypto/Makefile ! INT_SRCS = md5.c sha1.c sha2.c internal.c internal-sha2.c blf.c rijndael.c \ fortuna.c random.c pgp-mpi-internal.c imath.c INT_TESTS = sha2 --- 1,6 # contrib/pgcrypto/Makefile ! INT_SRCS = sha1.c sha2.c internal.c internal-sha2.c blf.c rijndael.c \ fortuna.c random.c pgp-mpi-internal.c imath.c INT_TESTS = sha2 diff --git a/contrib/pgcrypto/internal.c b/contrib/pgcrypto/internal.c index cb8ba26..0d2db23 100644 *** a/contrib/pgcrypto/internal.c --- b/contrib/pgcrypto/internal.c *** *** 30,40 */ #include "postgres.h" #include #include "px.h" - #include "md5.h" #include "sha1.h" #include "blf.h" #include "rijndael.h" --- 30,40 */ #include "postgres.h" + #include "utils/md5.h" #include #include "px.h" #include "sha1.h" #include "blf.h" #include "rijndael.h" diff --git a/contrib/pgcrypto/md5.c b/contrib/pgcrypto/md5.c index cac4e40..e69de29 . *** a/contrib/pgcrypto/md5.c --- b/contrib/pgcrypto/md5.c *** *** 1,397 - /* $KAME: md5.c,v 1.3 2000/02/22 14:01:17 itojun Exp $ */ - - /* - * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FI
Re: [HACKERS] Proposal: Incremental Backup
Il 12/08/14 15:25, Claudio Freire ha scritto: > On Tue, Aug 12, 2014 at 6:41 AM, Marco Nenciarini > wrote: >> To declared two files identical they must have same size, >> same mtime and same *checksum*. > > Still not safe. Checksum collisions do happen, especially in big data sets. > IMHO it is still good-enough. We are not trying to protect from a malicious attack, we are using it to protect against some *casual* event. Even cosmic rays have a not null probability of corrupting your database in a not-noticeable way. And you can probably notice it better with a checksum than with a LSN :-) Given that, I think that whatever solution we choose, we should include checksums in it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Proposal: Incremental Backup
As I already stated, timestamps will be only used to early detect changed files. To declared two files identical they must have same size, same mtime and same *checksum*. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Proposal: Incremental Backup
Il 07/08/14 17:25, Bruce Momjian ha scritto: > On Thu, Aug 7, 2014 at 08:35:53PM +0900, Michael Paquier wrote: >> On Thu, Aug 7, 2014 at 8:11 PM, Fujii Masao wrote: >>> There are some data which don't have LSN, for example, postgresql.conf. >>> When such data has been modified since last backup, they also need to >>> be included in incremental backup? Probably yes. >> Definitely yes. That's as well the case of paths like pg_clog, >> pg_subtrans and pg_twophase. > > I assumed these would be unconditionally backed up during an incremental > backup because they relatively small and you don't want to make a mistake. > You could decide to always copy files which doesn't have LSN, but you don't know what the user could put inside PGDATA. I would avoid any assumption on files which are not owned by Postgres. With the current full backup procedure they are backed up, so I think that having them backed up with a rsync-like algorithm is what an user would expect for an incremental backup. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Proposal: Incremental Backup
Il 07/08/14 17:29, Bruce Momjian ha scritto: > I am a little worried that many users will not realize this until they > try it and are disappointed, e.g. "Why is PG writing to my static data > so often?" --- then we get beaten up about our hint bits and freezing > behavior. :-( > > I am just trying to set realistic expectations. > Our experience is that for big databases (size over about 50GB) the file-level approach is often enough to halve the size of the backup. Users which run Postgres as Data Warehouse surely will benefit from it, so we could present it as a DWH oriented feature. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Proposal: Incremental Backup
Il 25/07/14 20:44, Robert Haas ha scritto: > On Fri, Jul 25, 2014 at 2:21 PM, Claudio Freire > wrote: >> On Fri, Jul 25, 2014 at 10:14 AM, Marco Nenciarini >> wrote: >>> 1. Proposal >>> = >>> Our proposal is to introduce the concept of a backup profile. The backup >>> profile consists of a file with one line per file detailing tablespace, >>> path, modification time, size and checksum. >>> Using that file the BASE_BACKUP command can decide which file needs to >>> be sent again and which is not changed. The algorithm should be very >>> similar to rsync, but since our files are never bigger than 1 GB per >>> file that is probably granular enough not to worry about copying parts >>> of files, just whole files. >> >> That wouldn't nearly as useful as the LSN-based approach mentioned before. >> >> I've had my share of rsyncing live databases (when resizing >> filesystems, not for backup, but the anecdotal evidence applies >> anyhow) and with moderately write-heavy databases, even if you only >> modify a tiny portion of the records, you end up modifying a huge >> portion of the segments, because the free space choice is random. >> >> There have been patches going around to change the random nature of >> that choice, but none are very likely to make a huge difference for >> this application. In essence, file-level comparisons get you only a >> mild speed-up, and are not worth the effort. >> >> I'd go for the hybrid file+lsn method, or nothing. The hybrid avoids >> the I/O of inspecting the LSN of entire segments (necessary >> optimization for huge multi-TB databases) and backups only the >> portions modified when segments do contain changes, so it's the best >> of both worlds. Any partial implementation would either require lots >> of I/O (LSN only) or save very little (file only) unless it's an >> almost read-only database. > > I agree with much of that. However, I'd question whether we can > really seriously expect to rely on file modification times for > critical data-integrity operations. I wouldn't like it if somebody > ran ntpdate to fix the time while the base backup was running, and it > set the time backward, and the next differential backup consequently > omitted some blocks that had been modified during the base backup. > Our proposal doesn't rely on file modification times for data integrity. We are using the file mtime only as a fast indication that the file has changed, and transfer it again without performing the checksum. If timestamp and size match we rely on *checksums* to decide if it has to be sent. In "SMART MODE" we would use the file mtime to skip the checksum check in some cases, but it wouldn't be the default operation mode and it will have all the necessary warnings attached. However the "SMART MODE" isn't a core part of our proposal, and can be delayed until we agree on the safest way to bring it to the end user. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Proposal: Incremental Backup
Il 25/07/14 20:21, Claudio Freire ha scritto: > On Fri, Jul 25, 2014 at 10:14 AM, Marco Nenciarini > wrote: >> 1. Proposal >> = >> Our proposal is to introduce the concept of a backup profile. The backup >> profile consists of a file with one line per file detailing tablespace, >> path, modification time, size and checksum. >> Using that file the BASE_BACKUP command can decide which file needs to >> be sent again and which is not changed. The algorithm should be very >> similar to rsync, but since our files are never bigger than 1 GB per >> file that is probably granular enough not to worry about copying parts >> of files, just whole files. > > That wouldn't nearly as useful as the LSN-based approach mentioned before. > > I've had my share of rsyncing live databases (when resizing > filesystems, not for backup, but the anecdotal evidence applies > anyhow) and with moderately write-heavy databases, even if you only > modify a tiny portion of the records, you end up modifying a huge > portion of the segments, because the free space choice is random. > > There have been patches going around to change the random nature of > that choice, but none are very likely to make a huge difference for > this application. In essence, file-level comparisons get you only a > mild speed-up, and are not worth the effort. > > I'd go for the hybrid file+lsn method, or nothing. The hybrid avoids > the I/O of inspecting the LSN of entire segments (necessary > optimization for huge multi-TB databases) and backups only the > portions modified when segments do contain changes, so it's the best > of both worlds. Any partial implementation would either require lots > of I/O (LSN only) or save very little (file only) unless it's an > almost read-only database. > From my experience, if a database is big enough and there is any kind of historical data in the database, the "file only" approach works well. Moreover it has the advantage of being simple and easily verifiable. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] Proposal: Incremental Backup
Il 25/07/14 16:15, Michael Paquier ha scritto: > On Fri, Jul 25, 2014 at 10:14 PM, Marco Nenciarini > wrote: >> 0. Introduction: >> = >> This is a proposal for adding incremental backup support to streaming >> protocol and hence to pg_basebackup command. > Not sure that incremental is a right word as the existing backup > methods using WAL archives are already like that. I recall others > calling that differential backup from some previous threads. Would > that sound better? > "differential backup" is widely used to refer to a backup that is always based on a "full backup". An "incremental backup" can be based either on a "full backup" or on a previous "incremental backup". We picked that name to emphasize this property. >> 1. Proposal >> = >> Our proposal is to introduce the concept of a backup profile. > Sounds good. Thanks for looking at that. > >> The backup >> profile consists of a file with one line per file detailing tablespace, >> path, modification time, size and checksum. >> Using that file the BASE_BACKUP command can decide which file needs to >> be sent again and which is not changed. The algorithm should be very >> similar to rsync, but since our files are never bigger than 1 GB per >> file that is probably granular enough not to worry about copying parts >> of files, just whole files. > There are actually two levels of differential backups: file-level, > which is the approach you are taking, and block level. Block level > backup makes necessary a scan of all the blocks of all the relations > and take only the data from the blocks newer than the LSN given by the > BASE_BACKUP command. In the case of file-level approach, you could > already backup the relation file after finding at least one block > already modified. I like the idea of shortcutting the checksum when you find a block with a LSN newer than the previous backup START WAL LOCATION, however I see it as a further optimization. In any case, it is worth storing the backup start LSN in the header section of the backup_profile together with other useful information about the backup starting position. As a first step we would have a simple and robust method to produce a file-level incremental backup. > Btw, the size of relation files depends on the size > defined by --with-segsize when running configure. 1GB is the default > though, and the value usually used. Differential backups can reduce > the size of overall backups depending on the application, at the cost > of some CPU to analyze the relation blocks that need to be included in > the backup. We tested the idea on several multi-terabyte installations using a custom deduplication script which follows this approach. The result is that it can reduce the backup size of more than 50%. Also most of databases in the range 50GB - 1TB can take a big advantage of it. > >> It could also be used in 'refresh' mode, by allowing the pg_basebackup >> command to 'refresh' an old backup directory with a new backup. > I am not sure this is really helpful... Could you please elaborate the last sentence? > >> The final piece of this architecture is a new program called >> pg_restorebackup which is able to operate on a "chain of incremental >> backups", allowing the user to build an usable PGDATA from them or >> executing maintenance operations like verify the checksums or estimate >> the final size of recovered PGDATA. > Yes, right. Taking a differential backup is not difficult, but > rebuilding a constant base backup with a full based backup and a set > of differential ones is the tricky part, but you need to be sure that > all the pieces of the puzzle are here. If we limit it to be file-based, the recover procedure is conceptually simple. Read every involved manifest from the start and take the latest available version of any file (or mark it for deletion, if the last time it is named is in a backup_exceptions file). Keeping the algorithm as simple as possible is in our opinion the best way to go. > >> We created a wiki page with all implementation details at >> https://wiki.postgresql.org/wiki/Incremental_backup > I had a look at that, and I think that you are missing the shot in the > way differential backups should be taken. What would be necessary is > to pass a WAL position (or LSN, logical sequence number like > 0/260) with a new clause called DIFFERENTIAL (INCREMENTAL in your > first proposal) in the BASE BACKUP command, and then have the server > report back to client all the files that contain blocks newer than the > given LSN position given for file-level backup, or the blocks ne
[HACKERS] Proposal: Incremental Backup
0. Introduction: = This is a proposal for adding incremental backup support to streaming protocol and hence to pg_basebackup command. 1. Proposal = Our proposal is to introduce the concept of a backup profile. The backup profile consists of a file with one line per file detailing tablespace, path, modification time, size and checksum. Using that file the BASE_BACKUP command can decide which file needs to be sent again and which is not changed. The algorithm should be very similar to rsync, but since our files are never bigger than 1 GB per file that is probably granular enough not to worry about copying parts of files, just whole files. This way of operating has also some advantages over using rsync to take a physical backup: It does not require the files from the previous backup to be checksummed again, and they could even reside on some form of long-term, not-directly-accessible storage, like a tape cartridge or somewhere in the cloud (e.g. Amazon S3 or Amazon Glacier). It could also be used in 'refresh' mode, by allowing the pg_basebackup command to 'refresh' an old backup directory with a new backup. The final piece of this architecture is a new program called pg_restorebackup which is able to operate on a "chain of incremental backups", allowing the user to build an usable PGDATA from them or executing maintenance operations like verify the checksums or estimate the final size of recovered PGDATA. We created a wiki page with all implementation details at https://wiki.postgresql.org/wiki/Incremental_backup 2. Goals = The main goal of incremental backup is to reduce the size of the backup. A secondary goal is to reduce backup time also. 3. Development plan = Our development plan proposal is articulated in four phases: Phase 1: Add ‘PROFILE’ option to ‘BASE_BACKUP’ Phase 2: Add ‘INCREMENTAL’ option to ‘BASE_BACKUP’ Phase 3: Support of PROFILE and INCREMENTAL for pg_basebackup Phase 4: pg_restorebackup We are willing to get consensus over our design here before to start implementing it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it signature.asc Description: OpenPGP digital signature
Re: [HACKERS] [PATCH] Support for Array ELEMENT Foreign Keys
Thanks for your review. Please find the attached refreshed patch (v2) which fixes the loose ends you found. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it Array-ELEMENT-foreign-key-v2.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for Array ELEMENT Foreign Keys
Hi, please find attached the refreshed v1 patch. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it Array-ELEMENT-foreign-key-v1-refreshed.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] [PATCH] Support for Array ELEMENT Foreign Keys
Hi, please find attached version 1 of the patch introducing "Array ELEMENT Foreign Keys" support. This new thread and related patch substitute any previous discussion about "Support for foreign keys with arrays", as anticipated in http://archives.postgresql.org/pgsql-hackers/2012-07/msg01098.php This patch adds: * support for ELEMENT REFERENCES column constraint on array types - e.g. c1 INT[] ELEMENT REFERENCES t1 * support for array ELEMENT foreign key table constraints - e.g. FOREIGN KEY (ELEMENT c1) REFERENCES t1 * support for array ELEMENT foreign keys in multi-column foreign key table constraints - e.g. FOREIGN KEY (c1, ELEMENT c2) REFERENCES t1 (u1, u2) Array ELEMENT foreign keys are a special kind of foreign key constraint requiring the referencing column to be an array of elements of the same type as (or a compatible type to) the referenced column in the referenced table. Array ELEMENT foreign keys are an extension of PostgreSQL and are not included in the SQL standard. An usage example for this feature is the following: CREATE TABLE drivers ( driver_id integer PRIMARY KEY, first_name text, last_name text, ... ); CREATE TABLE races ( race_id integer PRIMARY KEY, title text, race_day DATE, ... final_positions integer[] ELEMENT REFERENCES drivers ); This initial patch present the following limitations: * Only one "ELEMENT" column allowed in a multi-column key - e.g. FOREIGN KEY (c1, ELEMENT c2, ELEMENT c3) REFERENCES t1 (u1, u2, u3) will throw an error * Supported actions: - NO ACTION - RESTRICT As noted in the last 9.2 commitfest, we feel it is important to consolidate the "array ELEMENT foreign key" syntax and to postpone decisions about referential integrity actions, allowing the community to have a clearer understanding of the feature goals and requirements. However, having array_replace() and array_remove() functions already being committed and using our previous patch as a basis, we are confident that a generally accepted syntax will come out in the next months through community collaborative dynamics. The patch includes documentation and an extensive coverage of tests (element_foreign_key.sql regression test file). Co-authors of this patch are Gabriele and Gianni from our Italian team at 2ndQuadrant. Thank you. Cheers, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it Array-ELEMENT-foreign-key-v1.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Support for array_remove and array_replace functions
On 30/06/2012 04:16, Alex Hunsaker wrote: > > Hi, I've been reviewing this patch. > > Good documentation, and regression tests. The code looked fine but I > didn't care for the code duplication between array_replace and > array_remove so I merged those into a helper function, > array_replace_internal(). Thoughts? It looks reasonable. There was a typo in array_replace which was caught by regression tests. I've fixed the typo and changed a comment in array_replace_internal. Patch v3 attached. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it array-functions-v3.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for foreign keys with arrays
Il giorno mer, 13/06/2012 alle 18.07 -0400, Noah Misch ha scritto: > On Wed, Jun 13, 2012 at 10:12:18PM +0200, Gabriele Bartolini wrote: > > Our goal is to work on this patch from the next commit fest. > > > > What we are about to do for this commit fest is to split the previous > > patch and send a small one just for the array_remove() and > > array_replace() functions. > > > > Then we will sit down and organise the development of the feature > > according to Peter's feedback. It is important indeed that we find a > > commonly accepted terminology and syntax for this feature. > > > > I hope this sounds like a reasonable plan. Thank you. > > Sounds fine. The 2012-01 CF entry for this patch has been moved to the > 2012-06 CF. Please mark that entry Returned with Feedback and create a new > entry for the subset you're repackaging for this CommitFest. > > Thanks, > nm > Dear Noah, I have added a new patch to the commitfest (miscellaneous category) entitled "Support for array_remove and array_replace functions". I have also marked the "Foreign Key Array" patch as " Returned with Feedback" as per your request. We will start working again on this patch in the next commitfest as anticipated by Gabriele, hoping that the array functions will be committed by then. Thank you. Cheers, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Support for array_remove and array_replace functions
Hi, following Gabriele's email regarding our previous patch on "Foreign Key Arrays"[1], I am sending a subset of that patch which includes only two array functions which will be needed in that patch: array_remove (limited to single-dimensional arrays) and array_replace. The patch includes changes to the documentation. Cheers, Marco [1] http://archives.postgresql.org/message-id/4FD8F422.40709%402ndQuadrant.it -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it array-functions.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Patch pg_is_in_backup()
Hi Gilles, unfortunately Gabriele has been very busy recently and he asked me to check again the status of this patch for this commit fest. In order to speed up the application of the patch, I am sending an updated version which correctly compiles with current head. Gabriele will then proceed with the review. Thank you, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it pg_backup_start_time-and-pg_is_in_backup-v4.1.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for foreign keys with arrays
Il giorno lun, 19/03/2012 alle 18.41 +0100, Marco Nenciarini ha scritto: > > Attached is v5, which should address all the remaining issues. Please find attached v6 of the EACH Foreign Key patch. From v5 only cosmetic changes to the documentation were made. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it EACH-foreign-keys.v6.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Error trying to compile a simple C trigger
Il giorno mar, 20/03/2012 alle 11.16 +, Peter Geoghegan ha scritto: > On 20 March 2012 10:53, Marco Nenciarini > wrote: > > alert.c: In function ‘dbms_alert_defered_signal’: > > alert.c:839:33: error: dereferencing pointer to incomplete type > > make: *** [alert.o] Error 1 > > > > I've also tried the example at > > > > http://www.postgresql.org/docs/devel/static/trigger-example.html > > > > and the result is exactly the same. > > > > trigtest.c: In function ‘trigf’: > > trigtest.c:44:36: error: dereferencing pointer to incomplete type > > make: *** [trigtest.o] Error 1 > > I'd say this is an unintended consequence of a pgrminclude run. Try adding > this: > > #include "access/tupdesc.h" It doesn't work. The error is stil the same. Regards, Marco -- Marco Nenciarini - System manager @ Devise.IT marco.nenciar...@devise.it | http://www.devise.it -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Error trying to compile a simple C trigger
Il giorno mar, 20/03/2012 alle 16.46 +0500, Asif Naeem ha scritto: > It seems that compiler is complain about "Relation" structure, can you > please try adding the following in trigtest.c i.e. > > #include "utils/rel.h" > It does the trick. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] Error trying to compile a simple C trigger
Il giorno mar, 20/03/2012 alle 11.16 +, Peter Geoghegan ha scritto: > On 20 March 2012 10:53, Marco Nenciarini > wrote: > > alert.c: In function ‘dbms_alert_defered_signal’: > > alert.c:839:33: error: dereferencing pointer to incomplete type > > make: *** [alert.o] Error 1 > > > > I've also tried the example at > > > > http://www.postgresql.org/docs/devel/static/trigger-example.html > > > > and the result is exactly the same. > > > > trigtest.c: In function ‘trigf’: > > trigtest.c:44:36: error: dereferencing pointer to incomplete type > > make: *** [trigtest.o] Error 1 > > I'd say this is an unintended consequence of a pgrminclude run. Try adding > this: > > #include "access/tupdesc.h" It doesn't work. The error is stil the same. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Error trying to compile a simple C trigger
I was trying to compile orafce on the current master and it yield an error at line tupdesc = trigdata->tg_relation->rd_att; alert.c: In function ‘dbms_alert_defered_signal’: alert.c:839:33: error: dereferencing pointer to incomplete type make: *** [alert.o] Error 1 I've also tried the example at http://www.postgresql.org/docs/devel/static/trigger-example.html and the result is exactly the same. trigtest.c: In function ‘trigf’: trigtest.c:44:36: error: dereferencing pointer to incomplete type make: *** [trigtest.o] Error 1 Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for foreign keys with arrays
Hi Noah, thank you again for your thorough review, which is much appreciated. > I think the patch has reached the stage where a committer can review > it without wasting much time on things that might change radically. > So, I'm marking it Ready for Committer. Please still submit an update > correcting the above; I'm sure your committer will appreciate it. Attached is v5, which should address all the remaining issues. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it On Fri, Mar 16, 2012 at 11:33:12PM -0400, Noah Misch wrote: > I recommend removing the new block of code in RI_FKey_eachcascade_del() and > letting array_remove() throw the error. If you find a way to throw a nicer > error without an extra scan, by all means submit that to a future CF as an > improvement. I don't think it's important enough to justify cycles at this > late hour of the current CF. It makes sense; we have removed the block of code and updated the error message following your suggestion. Now the error is thrown by array_remove() itself. We'll keep an eye on this for further improvements in the future. > > > pg_constraint.confeach needs documentation. > > > > Most of pg_constraint columns, including all the boolean ones, are > > documented only in the "description" column of > > > > http://www.postgresql.org/docs/9.1/static/catalog-pg-constraint.html#AEN85760 > > > > it seems that confiseach should not be an exception, since it just > > indicates whether the constraint is of a certain kind or not. > > Your patch adds two columns to pg_constraint, confiseach and confeach, but it > only mentions confiseach in documentation. Just add a similar minimal mention > of confeach. Sorry, our mistake; a mention for confeach has now been added, and both entries have been reordered to reflect the column position in pg_constraint). > That is to say, they start with a capital letter and end with a period. Your > old text was fine apart from the lack of a period. (Your new text also lacks > a period.) Fixed, it should be fine now (another misunderstanding on our side, apologies). > If the cost doesn't exceed O(F log P), where F is the size of the FK table and > P is the size of the PK table, I'm not worried. If it can be O(F^2), we would > have a problem to be documented, if not fixed. We have rewritten the old query in a simpler way; now its cost is O(F log P). Here F must represent the size of the "flattened" table, that is, the total number of values that must be checked, which seems a reasonable assumption in any case. > Your change to array_replace() made me look at array_remove() again and > realize that it needs the same treatment. This command yields a segfault: > SELECT array_remove('{a,null}'::text[], null); Fixed. > > This latest version introduces two calls to get_element_type(), both of which > should be get_base_element_type(). Fixed. > Patch "Avoid FK validations for no-rewrite ALTER TABLE ALTER TYPE", committed > between v3b and v4 of this patch, added code to ATAddForeignKeyConstraint() > requiring an update in this patch. Run this in the regression DB: > [local] regression=# alter table fktableforeachfk alter ftest1 type int[]; > ERROR: could not find cast from 1007 to 23 Thanks for pointing it out. We have added a regression test and then fixed the issue. > > RI_PLAN_EACHCASCADE_DEL_DOUPDATE should be RI_PLAN_EACHCASCADE_DEL_DODELETE. Fixed. EACH-foreign-key.v5.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for foreign keys with arrays
Il giorno gio, 08/03/2012 alle 08.11 -0500, Robert Haas ha scritto: > On Fri, Feb 24, 2012 at 9:01 PM, Noah Misch wrote: > > I consider these the core changes needed to reach Ready for Committer: > > > > - Fix crash in array_replace(arr, null, null). > > - Don't look through the domain before calling can_coerce_type(). > > - Compare operator family of pfeqop and TYPECACHE_EQ_OPR at creation. > > - Move post-processing from gram.y and remove "t"/"f" magic values. > > So, is someone working on making these changes? > We are working on it and I hope we can send the v4 version during the upcoming weekend. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for foreign keys with arrays
Hi guys, Please find attached version 3 of our patch. We thoroughly followed your suggestions and were able to implement "EACH foreign key constraints" with multi-column syntax as well. "EACH foreign key constraints" represent PostgreSQL implementation of what are also known as Foreign Key Arrays. Some limitations occur in this release, but as previously agreed these can be refined and defined in future release implementations. This patch adds: * support for EACH REFERENCES column constraint on array types - e.g. c1 INT[] EACH REFERENCES t1 * support for EACH foreign key table constraints - e.g. FOREIGN KEY (EACH c1) REFERENCES t1 * support for EACH foreign keys in multi-column foreign key table constraints - e.g. FOREIGN KEY (c1, EACH c2) REFERENCES t1 (u1, u2) * support for two new referential actions on update/delete operations for single-column only EACH foreign keys: ** EACH CASCADE (deletes or updates elements inside the referencing array) ** EACH SET NULL (sets to NULL referencing element inside the foreign array) * support for array_replace and array_remove functions as required by the above actions As previously said, we preferred to keep this patch simple for 9.2 and forbid EACH CASCADE and EACH SET NULL on multi-column foreign keys. After all, majority of use cases is represented by EACH foreign key column constraints (single-column foreign key arrays), and more complicated use cases can be discussed for 9.3 - should this patch make it. :) We can use multi-dimensional arrays as well as referencing columns. In that case though, ON DELETE EACH CASCADE will behave like ON DELETE EACH SET NULL. This is a safe way of implementing the action. We have some ideas on how to implement this, but we feel it is better to limit the behaviour for now. As far as documentation is concerned, we: * added actions and constraint info in the catalog * added an entire section on "EACH foreign key constraints" in the data definition language chapter (we've simplified the second example, greatly following Noah's advice - let us know if this is ok with you) * added array_remove (currently limited to single-dimensional arrays) and array_replace in the array functions chapter * modified REFERENCES/FOREIGN KEY section in the CREATE TABLE command's documentation and added a special section on the EACH REFERENCES clause (using square braces as suggested) Here follows a short list of notes for Noah: * You proposed these changes: ARRAY CASCADE -> EACH CASCADE and ARRAY SET NULL -> EACH SET NULL. We stack with EACH CASCADE and decided to prepend the "EACH" keyword to standard's CASCADE and SET NULL. Grammar is simpler and the emphasis is on the EACH keyword. * Multi-dimensional arrays: ON DELETE EACH CASCADE -> ON DELETE EACH SET NULL. We cannot determine the array's number of dimensions at definition time as it depends on the actual values. As anticipated above, we have some ideas on multi-dimensional element removal, but not for this patch for the aforementioned reasons. * Support of EACH CASCADE/SET NULL in ConvertTriggerToFK(): we decided to leave it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it EACH-foreign-key-constraints-aka-foreign-key-arrays.v3.patch.bz2 Description: application/bzip -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for foreign keys with arrays
Hello, Il giorno dom, 11/12/2011 alle 19.45 -0500, Noah Misch ha scritto: > On Sat, Dec 10, 2011 at 09:47:53AM +0100, Gabriele Bartolini wrote: > > So, here is a summary: > > > > --- - - > >| ON| ON| > > Action | DELETE | UPDATE | > > --- - - > > CASCADE| Row | Error | > > SET NULL | Row | Row | > > SET DEFAULT| Row | Row | > > ARRAY CASCADE | Element | Element | > > ARRAY SET NULL | Element | Element | > > NO ACTION |-|-| > > RESTRICT |-|-| > > --- - - > > > > If that's fine with you guys, Marco and I will refactor the development > > based on these assumptions. > > Looks fine. > This is our latest version of the patch. Gabriele, Gianni and I have discussed a lot and decided to send an initial patch which uses EACH REFERENCES instead of ARRAY REFERENCES. The reason behind this is that ARRAY REFERENCES generates an ambiguous grammar, and we all agreed that EACH REFERENCES makes sense (and the same time does not introduce any new keyword). This is however open for discussion. The patch now includes the following clauses on the delete/update actions - as per previous emails: --- - - | ON| ON| Action | DELETE | UPDATE | --- - - CASCADE| Row | Error | SET NULL | Row | Row | SET DEFAULT| Row | Row | ARRAY CASCADE | Element | Element | ARRAY SET NULL | Element | Element | NO ACTION |-|-| RESTRICT |-|-| --- - - We will resubmit the patch for the 2012-01 commit fest. Thank you, Marco -- Marco Nenciarini - System manager @ Devise.IT marco.nenciar...@devise.it | http://www.devise.it foreign-key-array-v2.patch.gz Description: GNU Zip compressed data -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
Re: [HACKERS] [PATCH] Support for foreign keys with arrays
Hello, Il giorno dom, 11/12/2011 alle 19.45 -0500, Noah Misch ha scritto: > On Sat, Dec 10, 2011 at 09:47:53AM +0100, Gabriele Bartolini wrote: > > So, here is a summary: > > > > --- - - > >| ON| ON| > > Action | DELETE | UPDATE | > > --- - - > > CASCADE| Row | Error | > > SET NULL | Row | Row | > > SET DEFAULT| Row | Row | > > ARRAY CASCADE | Element | Element | > > ARRAY SET NULL | Element | Element | > > NO ACTION |-|-| > > RESTRICT |-|-| > > --- - - > > > > If that's fine with you guys, Marco and I will refactor the development > > based on these assumptions. > > Looks fine. > This is our latest version of the patch. Gabriele, Gianni and I have discussed a lot and decided to send an initial patch which uses EACH REFERENCES instead of ARRAY REFERENCES. The reason behind this is that ARRAY REFERENCES generates an ambiguous grammar, and we all agreed that EACH REFERENCES makes sense (and the same time does not introduce any new keyword). This is however open for discussion. The patch now includes the following clauses on the delete/update actions - as per previous emails: --- - - | ON| ON| Action | DELETE | UPDATE | --- - - CASCADE| Row | Error | SET NULL | Row | Row | SET DEFAULT| Row | Row | ARRAY CASCADE | Element | Element | ARRAY SET NULL | Element | Element | NO ACTION |-|-| RESTRICT |-|-| --- - - We will resubmit the patch for the 2012-01 commit fest. Thank you, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it foreign-key-array-v2.patch.gz Description: GNU Zip compressed data -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
[HACKERS] Minor issues with docs
While I was working on automatic translation of PostgreSQL's documentation from SGML to XML, I found some minor issues. In the file doc/src/sgml/ecpg.sgml there are many lines of C code containing unescaped '<' characters. In the file doc/src/sgml/array.sgml there is a tag which has a case mismatch error with its end tag. Attached you can find the patch, if you want to apply it. Regards, Marco -- Marco Nenciarini - 2ndQuadrant Italy PostgreSQL Training, Services and Support marco.nenciar...@2ndquadrant.it | www.2ndQuadrant.it >From d22539bdb7cabcb6bfbf0ce1b80a59fbba283ca4 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Wed, 25 May 2011 19:35:36 +0200 Subject: [PATCH] Fix minor issues with documentation markup Signed-off-by: Marco Nenciarini --- doc/src/sgml/array.sgml |2 +- doc/src/sgml/ecpg.sgml | 20 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index bb318c5..3508ba3 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -369,7 +369,7 @@ UPDATE sal_emp SET pay_by_quarter = ARRAY[25000,25000,27000,27000] UPDATE sal_emp SET pay_by_quarter[4] = 15000 WHERE name = 'Bill'; - + or updated in a slice: diff --git a/doc/src/sgml/ecpg.sgml b/doc/src/sgml/ecpg.sgml index 9c6ca4c..a8ffde5 100644 --- a/doc/src/sgml/ecpg.sgml +++ b/doc/src/sgml/ecpg.sgml @@ -4154,7 +4154,7 @@ switch (v.sqltype) { case ECPGt_char: memset(&var_buf, 0, sizeof(var_buf)); -memcpy(&var_buf, sqldata, (sizeof(var_buf) <= sqllen ? sizeof(var_buf) - 1 : sqllen)); +memcpy(&var_buf, sqldata, (sizeof(var_buf) <= sqllen ? sizeof(var_buf) - 1 : sqllen)); break; case ECPGt_int: /* integer */ @@ -4390,7 +4390,7 @@ main(void) case ECPGt_char: memset(&var_buf, 0, sizeof(var_buf)); -memcpy(&var_buf, sqldata, (sizeof(var_buf) <= sqllen ? sizeof(var_buf)-1 : sqllen)); +memcpy(&var_buf, sqldata, (sizeof(var_buf) <= sqllen ? sizeof(var_buf)-1 : sqllen)); break; case ECPGt_int: /* integer */ @@ -5871,39 +5871,39 @@ main(void) /* create */ loid = lo_create(conn, 0); -if (loid < 0) +if (loid < 0) printf("lo_create() failed: %s", PQerrorMessage(conn)); printf("loid = %d\n", loid); /* write test */ fd = lo_open(conn, loid, INV_READ|INV_WRITE); -if (fd < 0) +if (fd < 0) printf("lo_open() failed: %s", PQerrorMessage(conn)); printf("fd = %d\n", fd); rc = lo_write(conn, fd, buf, buflen); -if (rc < 0) +if (rc < 0) printf("lo_write() failed\n"); rc = lo_close(conn, fd); -if (rc < 0) +if (rc < 0) printf("lo_close() failed: %s", PQerrorMessage(conn)); /* read test */ fd = lo_open(conn, loid, INV_READ); -if (fd < 0) +if (fd < 0) printf("lo_open() failed: %s", PQerrorMessage(conn)); printf("fd = %d\n", fd); rc = lo_read(conn, fd, buf2, buflen); -if (rc < 0) +if (rc < 0) printf("lo_read() failed\n"); rc = lo_close(conn, fd); -if (rc < 0) +if (rc < 0) printf("lo_close() failed: %s", PQerrorMessage(conn)); /* check */ @@ -5912,7 +5912,7 @@ main(void) /* cleanup */ rc = lo_unlink(conn, loid); -if (rc < 0) +if (rc < 0) printf("lo_unlink() failed: %s", PQerrorMessage(conn)); EXEC SQL COMMIT; -- 1.7.5.1 -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers