The attached patch adds an option to the COPY command to skip writing WAL when the following conditions are all met:

1) The table is empty (zero size on disk)
2) The copy command can obtain an access exclusive lock on the table with out blocking.
3) The WAL isn't needed for replication

For example

COPY a FROM '/tmp/a.txt' (SKIP_WAL);

A non-default option to the copy command is required because the copy will block out any concurrent access to the table which would be undesirable in some cases and is different from the current behaviour.

This can safely be done because if the transaction does not commit the empty version of the data files are still available. The COPY command already skips WAL if the table was created in the current transaction.


There was a discussion on something similar before[1] but I didn't see any discussion of having it only obtain the lock if it can do so without waiting (nor could I find in the archives what happened to that patch). I'm not attached to the SKIP_WAL vs LOCK as the option


1- see http://archives.postgresql.org/pgsql-patches/2005-12/msg00206.php

Steve
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index a73b022..3a0e521 100644
*** a/doc/src/sgml/ref/copy.sgml
--- b/doc/src/sgml/ref/copy.sgml
*************** COPY { <replaceable class="parameter">ta
*** 42,47 ****
--- 42,48 ----
      FORCE_QUOTE { ( <replaceable class="parameter">column</replaceable> [, ...] ) | * }
      FORCE_NOT_NULL ( <replaceable class="parameter">column</replaceable> [, ...] ) |
      ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
+     SKIP_WAL 
  </synopsis>
   </refsynopsisdiv>
  
*************** COPY { <replaceable class="parameter">ta
*** 293,298 ****
--- 294,312 ----
        for more details.
       </para>
      </listitem>
+ 	</varlistentry>
+ 	<varlistentry>
+ 	<term><literal>SKIP_WAL</></term>
+      <listitem>
+ 	   <para>
+         Specifies that the writing of WAL should be skipped if possible.
+         WAL can be skipped if the table being copied into is empty and
+         if an exclusive lock can be obtained without waiting.  If this
+         option is specified and WAL is skipped then the transaction will
+         hold an exclusive lock on the table being copied until the transaction
+         commits.
+ 		</para>
+ 	   </listitem>
     </varlistentry>
  
    </variablelist>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 528a3a1..bd81a4b 100644
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
***************
*** 29,34 ****
--- 29,35 ----
  #include "commands/defrem.h"
  #include "commands/trigger.h"
  #include "executor/executor.h"
+ #include "commands/tablecmds.h"
  #include "libpq/libpq.h"
  #include "libpq/pqformat.h"
  #include "mb/pg_wchar.h"
***************
*** 37,42 ****
--- 38,44 ----
  #include "parser/parse_relation.h"
  #include "rewrite/rewriteHandler.h"
  #include "storage/fd.h"
+ #include "storage/lmgr.h"
  #include "tcop/tcopprot.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
*************** typedef struct CopyStateData
*** 120,125 ****
--- 122,128 ----
  	bool	   *force_quote_flags;		/* per-column CSV FQ flags */
  	List	   *force_notnull;	/* list of column names */
  	bool	   *force_notnull_flags;	/* per-column CSV FNN flags */
+ 	bool		skip_wal;				/* skip WAL if able */
  
  	/* these are just for error messages, see CopyFromErrorCallback */
  	const char *cur_relname;	/* table name for error messages */
*************** ProcessCopyOptions(CopyState cstate,
*** 965,970 ****
--- 968,978 ----
  						 errmsg("argument to option \"%s\" must be a valid encoding name",
  								defel->defname)));
  		}
+ 		else if (strcmp(defel->defname,"skip_wal") == 0)
+ 		{
+ 
+ 			cstate->skip_wal=true;
+ 		}
  		else
  			ereport(ERROR,
  					(errcode(ERRCODE_SYNTAX_ERROR),
*************** CopyFrom(CopyState cstate)
*** 1910,1915 ****
--- 1918,1957 ----
  		if (!XLogIsNeeded())
  			hi_options |= HEAP_INSERT_SKIP_WAL;
  	}
+ 	
+ 	/*
+ 	 * if SKIP_WAL was requested we try to avoid writing
+ 	 * WAL if the table is 0 bytes on disk (empty) and
+ 	 * that we can obtain an exclusive lock on it without blocking. 
+ 	 * 
+ 	 */
+ 	if(cstate->skip_wal && !XLogIsNeeded() && 
+ 	   ConditionalLockRelationOid(cstate->rel->rd_id,AccessExclusiveLock))
+ 	{
+ 		
+ 		Datum size = DirectFunctionCall2(pg_relation_size,
+ 										 ObjectIdGetDatum(cstate->rel->rd_id),
+ 										 PointerGetDatum(cstring_to_text("main")));
+ 		if ( DatumGetInt64(size)==0)
+ 		{
+ 			/**
+ 			 * The relation is empty + unused.
+ 			 * truncate it so that if this transaction
+ 			 * rollsback then the changes to the relation files
+ 			 * will dissapear (the current relation files will
+ 			 * remain untouched)
+ 			 */
+ 			truncate_relation(cstate->rel);
+ 			hi_options |= HEAP_INSERT_SKIP_FSM;
+ 			hi_options |= HEAP_INSERT_SKIP_WAL;
+ 		}
+ 		else
+ 		{
+ 			UnlockRelation(cstate->rel,AccessExclusiveLock);
+ 		}
+ 					  
+ 	}
+ 
  
  	/*
  	 * We need a ResultRelInfo so we can use the regular executor's
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 4509cda..ff5bf8d 100644
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
*************** ExecuteTruncate(TruncateStmt *stmt)
*** 1053,1095 ****
  		}
  		else
  		{
! 			Oid			heap_relid;
! 			Oid			toast_relid;
! 
! 			/*
! 			 * This effectively deletes all rows in the table, and may be done
! 			 * in a serializable transaction.  In that case we must record a
! 			 * rw-conflict in to this transaction from each transaction
! 			 * holding a predicate lock on the table.
! 			 */
! 			CheckTableForSerializableConflictIn(rel);
! 
! 			/*
! 			 * Need the full transaction-safe pushups.
! 			 *
! 			 * Create a new empty storage file for the relation, and assign it
! 			 * as the relfilenode value. The old storage file is scheduled for
! 			 * deletion at commit.
! 			 */
! 			RelationSetNewRelfilenode(rel, RecentXmin);
! 
! 			heap_relid = RelationGetRelid(rel);
! 			toast_relid = rel->rd_rel->reltoastrelid;
! 
! 			/*
! 			 * The same for the toast table, if any.
! 			 */
! 			if (OidIsValid(toast_relid))
! 			{
! 				rel = relation_open(toast_relid, AccessExclusiveLock);
! 				RelationSetNewRelfilenode(rel, RecentXmin);
! 				heap_close(rel, NoLock);
! 			}
! 
! 			/*
! 			 * Reconstruct the indexes to match, and we're done.
! 			 */
! 			reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST);
  		}
  	}
  
--- 1053,1059 ----
  		}
  		else
  		{
! 			truncate_relation(rel);
  		}
  	}
  
*************** AtEOSubXact_on_commit_actions(bool isCom
*** 9752,9754 ****
--- 9716,9759 ----
  		}
  	}
  }
+ 
+ void truncate_relation(Relation rel)
+ {
+ 	Oid			heap_relid;
+ 	Oid			toast_relid;
+ 	
+ 	/*
+ 	 * This effectively deletes all rows in the table, and may be done
+ 	 * in a serializable transaction.  In that case we must record a
+ 	 * rw-conflict in to this transaction from each transaction
+ 	 * holding a predicate lock on the table.
+ 	 */
+ 	CheckTableForSerializableConflictIn(rel);
+ 	
+ 	/*
+ 	 * Need the full transaction-safe pushups.
+ 	 *
+ 	 * Create a new empty storage file for the relation, and assign it
+ 	 * as the relfilenode value. The old storage file is scheduled for
+ 	 * deletion at commit.
+ 	 */
+ 	RelationSetNewRelfilenode(rel, RecentXmin);
+ 	
+ 	heap_relid = RelationGetRelid(rel);
+ 	toast_relid = rel->rd_rel->reltoastrelid;
+ 
+ 	/*
+ 	 * The same for the toast table, if any.
+ 	 */
+ 	if (OidIsValid(toast_relid))
+ 	{
+ 		rel = relation_open(toast_relid, AccessExclusiveLock);
+ 		RelationSetNewRelfilenode(rel, RecentXmin);
+ 		heap_close(rel, NoLock);
+ 	}
+ 	
+ 	/*
+ 	 * Reconstruct the indexes to match, and we're done.
+ 	 */
+ 	reindex_relation(heap_relid, REINDEX_REL_PROCESS_TOAST);
+ }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index e9f3896..354cc3f 100644
*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
*************** static void processCASbits(int cas_bits,
*** 553,560 ****
  
  	SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES
  	SERIALIZABLE SERVER SESSION SESSION_USER SET SETOF SHARE
! 	SHOW SIMILAR SIMPLE SMALLINT SOME STABLE STANDALONE_P START STATEMENT
! 	STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING
  	SYMMETRIC SYSID SYSTEM_P
  
  	TABLE TABLES TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN TIME TIMESTAMP
--- 553,560 ----
  
  	SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES
  	SERIALIZABLE SERVER SESSION SESSION_USER SET SETOF SHARE
! 	SHOW SIMILAR SIMPLE SKIP_WAL SMALLINT SOME STABLE STANDALONE_P 
! 	START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING
  	SYMMETRIC SYSID SYSTEM_P
  
  	TABLE TABLES TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN TIME TIMESTAMP
*************** copy_opt_item:
*** 2297,2302 ****
--- 2297,2306 ----
  				{
  					$$ = makeDefElem("encoding", (Node *)makeString($2));
  				}
+ 			| SKIP_WAL
+ 				{
+ 				  $$ = makeDefElem("skip_wal", (Node *)makeString("skip_wal"));
+ 				}
  		;
  
  /* The following exist for backward compatibility with very old versions */
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index 0e8bbe0..5627c70 100644
*** a/src/include/commands/tablecmds.h
--- b/src/include/commands/tablecmds.h
*************** extern void AtEOXact_on_commit_actions(b
*** 71,75 ****
  extern void AtEOSubXact_on_commit_actions(bool isCommit,
  							  SubTransactionId mySubid,
  							  SubTransactionId parentSubid);
! 
  #endif   /* TABLECMDS_H */
--- 71,75 ----
  extern void AtEOSubXact_on_commit_actions(bool isCommit,
  							  SubTransactionId mySubid,
  							  SubTransactionId parentSubid);
! extern void truncate_relation(Relation rel);
  #endif   /* TABLECMDS_H */
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 8e2bc0c..d4915e7 100644
*** a/src/test/regress/expected/copy2.out
--- b/src/test/regress/expected/copy2.out
*************** a\.
*** 239,244 ****
--- 239,285 ----
  \.b
  c\.d
  "\."
+ -- test SKIP_WAL option
+ BEGIN;
+ CREATE TABLE test_notemp ( 
+ 	   a int4);
+ COMMIT;
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+  pg_relation_size 
+ ------------------
+                 0
+ (1 row)
+ 
+ COPY test_notemp FROM stdin WITH (skip_wal);
+ COPY test_notemp FROM stdin WITH (skip_wal);
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+  pg_relation_size 
+ ------------------
+              8192
+ (1 row)
+ 
+ truncate test_notemp;
+ BEGIN;
+ COPY test_notemp FROM stdin WITH (skip_wal);
+ ROLLBACK;
+ --expect size of 0
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+  pg_relation_size 
+ ------------------
+                 0
+ (1 row)
+ 
+ BEGIN;
+ COPY test_notemp FROM stdin csv;
+ ROLLBACK;
+ --expect non-zero size
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+  pg_relation_size 
+ ------------------
+              8192
+ (1 row)
+ 
+ DROP TABLE test_notemp;
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 6322c8f..e41a9d8 100644
*** a/src/test/regress/sql/copy2.sql
--- b/src/test/regress/sql/copy2.sql
*************** c\.d
*** 164,169 ****
--- 164,206 ----
  
  COPY testeoc TO stdout CSV;
  
+ -- test SKIP_WAL option
+ 
+ BEGIN;
+ CREATE TABLE test_notemp ( 
+ 	   a int4);
+ COMMIT;
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+ COPY test_notemp FROM stdin WITH (skip_wal);
+ 1
+ 2
+ \.
+ COPY test_notemp FROM stdin WITH (skip_wal);
+ 1
+ 2
+ \.
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+ truncate test_notemp;
+ BEGIN;
+ COPY test_notemp FROM stdin WITH (skip_wal);
+ 1
+ 2
+ \.
+ ROLLBACK;
+ --expect size of 0
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+ 
+ BEGIN;
+ COPY test_notemp FROM stdin csv;
+ 1
+ 2
+ \.
+ ROLLBACK;
+ --expect non-zero size
+ select pg_relation_size(oid) from pg_class where relname='test_notemp';
+ DROP TABLE test_notemp;
+ 
+ 
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to