Re: [HACKERS] raw output from copy

Pavel Stehule Fri, 26 Feb 2016 23:27:45 -0800

Hi

2015-08-06 10:37 GMT+02:00 Pavel Stehule <[email protected]>:


> Hi,
>
> Psql based implementation needs new infrastructure (more than few lines)
>
> Missing:
>
> * binary mode support
> * parametrized query support,
>
> I am not against, but both points I proposed, and both was rejected.
>
> So why dont use current infrastructure? Raw copy is trivial patch.
>

I was asked by Daniel Verite about reopening this patch in opened
commitfest.

I am sending rebased patch

Regards

Pavel

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
new file mode 100644
index 07e2f45..68fbfd8
*** a/doc/src/sgml/ref/copy.sgml
--- b/doc/src/sgml/ref/copy.sgml
*************** COPY { <replaceable class="parameter">ta
*** 197,203 ****
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       or <literal>binary</>.
        The default is <literal>text</>.
       </para>
      </listitem>
--- 197,203 ----
        Selects the data format to be read or written:
        <literal>text</>,
        <literal>csv</> (Comma Separated Values),
!       <literal>binary</> or <literal>raw</literal>.
        The default is <literal>text</>.
       </para>
      </listitem>
*************** OIDs to be shown as null if that ever pr
*** 888,893 ****
--- 888,925 ----
      </para>
     </refsect3>
    </refsect2>
+ 
+   <refsect2>
+      <title>Raw Format</title>
+ 
+    <para>
+     The <literal>raw</literal> format option causes all data to be
+     stored/read as binary format rather than as text. It shares format
+     for data with <literal>binary</literal> format. This format doesn't
+     use any metadata - only row data in network byte order are exported
+     or imported.
+    </para>
+ 
+    <para>
+     Because this format doesn't support any delimiter, only one value
+     can be exported or imported. NULL values are not allowed.
+    </para>
+    <para>
+     The <literal>raw</literal> format can be used for export or import
+     bytea values.
+ <programlisting>
+ COPY images(data) FROM '/usr1/proj/img/01.jpg' (FORMAT raw);
+ </programlisting>
+     It can be used successfully for export XML in different encoding
+     or import valid XML document with any supported encoding:
+ <screen><![CDATA[
+ SET client_encoding TO latin2;
+ 
+ COPY (SELECT xmlelement(NAME data, 'Hello')) TO stdout (FORMAT raw);
+ <?xml version="1.0" encoding="LATIN2"?><data>Hello</data>
+ ]]></screen>
+    </para>
+   </refsect2>
   </refsect1>
  
   <refsect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
new file mode 100644
index 3201476..beb9152
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
*************** typedef enum EolType
*** 89,94 ****
--- 89,99 ----
   * it's faster to make useless comparisons to trailing bytes than it is to
   * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is TRUE
   * when we have to do it the hard way.
+  *
+  * COPY supports three modes: text, binary and raw. The text format is plain
+  * text multiline format with specified delimiter. The binary format holds
+  * metadata (numbers, sizes) and data. The raw format holds data only and
+  * only one non NULL value can be processed.
   */
  typedef struct CopyStateData
  {
*************** typedef struct CopyStateData
*** 110,115 ****
--- 115,121 ----
  	char	   *filename;		/* filename, or NULL for STDIN/STDOUT */
  	bool		is_program;		/* is 'filename' a program to popen? */
  	bool		binary;			/* binary format? */
+ 	bool		raw;			/* required raw binary? */
  	bool		oids;			/* include OIDs? */
  	bool		freeze;			/* freeze rows on loading? */
  	bool		csv_mode;		/* Comma Separated Value format? */
*************** typedef struct CopyStateData
*** 199,204 ****
--- 205,213 ----
  	char	   *raw_buf;
  	int			raw_buf_index;	/* next byte to process */
  	int			raw_buf_len;	/* total # of bytes stored */
+ 
+ 	/* field for RAW mode */
+ 	bool		row_processed;		/* true, when first row was processed */
  } CopyStateData;
  
  /* DestReceiver for COPY (query) TO */
*************** SendCopyBegin(CopyState cstate)
*** 342,350 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'H');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
--- 351,366 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
+ 		if (cstate->raw)
+ 			format = 2;
+ 		else if (cstate->binary)
+ 			format = 1;
+ 		else
+ 			format = 0;
+ 
  		pq_beginmessage(&buf, 'H');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
*************** SendCopyBegin(CopyState cstate)
*** 356,362 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 372,378 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary && cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** SendCopyBegin(CopyState cstate)
*** 368,374 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 384,390 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary && cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** ReceiveCopyBegin(CopyState cstate)
*** 387,395 ****
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format = (cstate->binary ? 1 : 0);
  		int			i;
  
  		pq_beginmessage(&buf, 'G');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
--- 403,418 ----
  		/* new way */
  		StringInfoData buf;
  		int			natts = list_length(cstate->attnumlist);
! 		int16		format;
  		int			i;
  
+ 		if (cstate->raw)
+ 			format = 2;
+ 		else if (cstate->binary)
+ 			format = 1;
+ 		else
+ 			format = 0;
+ 
  		pq_beginmessage(&buf, 'G');
  		pq_sendbyte(&buf, format);		/* overall format */
  		pq_sendint(&buf, natts, 2);
*************** ReceiveCopyBegin(CopyState cstate)
*** 402,408 ****
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 425,431 ----
  	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
  	{
  		/* old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** ReceiveCopyBegin(CopyState cstate)
*** 414,420 ****
  	else
  	{
  		/* very old way */
! 		if (cstate->binary)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
--- 437,443 ----
  	else
  	{
  		/* very old way */
! 		if (cstate->binary || cstate->raw)
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
  			errmsg("COPY BINARY is not supported to stdout or from stdin")));
*************** CopySendEndOfRow(CopyState cstate)
*** 482,488 ****
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
--- 505,511 ----
  	switch (cstate->copy_dest)
  	{
  		case COPY_FILE:
! 			if (!cstate->binary && !cstate->raw)
  			{
  				/* Default line termination depends on platform */
  #ifndef WIN32
*************** CopySendEndOfRow(CopyState cstate)
*** 527,533 ****
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
--- 550,556 ----
  			break;
  		case COPY_OLD_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			if (pq_putbytes(fe_msgbuf->data, fe_msgbuf->len))
*************** CopySendEndOfRow(CopyState cstate)
*** 540,546 ****
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
--- 563,569 ----
  			break;
  		case COPY_NEW_FE:
  			/* The FE/BE protocol uses \n as newline for all platforms */
! 			if (!cstate->binary && !cstate->raw)
  				CopySendChar(cstate, '\n');
  
  			/* Dump the accumulated row as one CopyData message */
*************** CopyGetData(CopyState cstate, void *data
*** 597,602 ****
--- 620,626 ----
  			bytesread = minread;
  			break;
  		case COPY_NEW_FE:
+ 
  			while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
  			{
  				int			avail;
*************** CopyGetData(CopyState cstate, void *data
*** 619,624 ****
--- 643,649 ----
  								(errcode(ERRCODE_CONNECTION_FAILURE),
  								 errmsg("unexpected EOF on client connection with an open transaction")));
  					RESUME_CANCEL_INTERRUPTS();
+ 
  					switch (mtype)
  					{
  						case 'd':		/* CopyData */
*************** CopyLoadRawBuf(CopyState cstate)
*** 766,771 ****
--- 791,827 ----
  	return (inbytes > 0);
  }
  
+ /*
+  * CopyLoadallRawBuf load all file into raw_buf.
+  *
+  * It is used for reading content in raw mode. If original RAW_BUF_SIZE is not
+  * enough, the buffer is enlarged.
+  */
+ static void
+ CopyLoadallRawBuf(CopyState cstate)
+ {
+ 	int			nbytes = 0;
+ 	int			inbytes;
+ 	Size			raw_buf_size = RAW_BUF_SIZE;
+ 
+ 	do
+ 	{
+ 		/* hold enough space for one data packet */
+ 		if ((raw_buf_size - nbytes - 1) < 8 * 1024)
+ 		{
+ 			raw_buf_size += RAW_BUF_SIZE;
+ 			cstate->raw_buf = repalloc(cstate->raw_buf, raw_buf_size);
+ 		}
+ 
+ 		inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes, 1, raw_buf_size - nbytes - 1);
+ 		nbytes += inbytes;
+ 	}
+ 	while (inbytes > 0);
+ 
+ 	cstate->raw_buf[nbytes] = '\0';
+ 	cstate->raw_buf_index = 0;
+ 	cstate->raw_buf_len = nbytes;
+ }
  
  /*
   *	 DoCopy executes the SQL COPY statement
*************** ProcessCopyOptions(CopyState cstate,
*** 1013,1018 ****
--- 1069,1076 ----
  				cstate->csv_mode = true;
  			else if (strcmp(fmt, "binary") == 0)
  				cstate->binary = true;
+ 			else if (strcmp(fmt, "raw") == 0)
+ 				cstate->raw = true;
  			else
  				ereport(ERROR,
  						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
*************** ProcessCopyOptions(CopyState cstate,
*** 1162,1176 ****
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if (cstate->binary && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY mode")));
  
! 	if (cstate->binary && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
--- 1220,1239 ----
  	 * Check for incompatible options (must do these two before inserting
  	 * defaults)
  	 */
! 	if ((cstate->binary || cstate->raw) && cstate->delim)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify DELIMITER in BINARY or RAW mode")));
  
! 	if ((cstate->binary || cstate->raw) && cstate->null_print)
  		ereport(ERROR,
  				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify NULL in BINARY or RAW mode")));
! 
! 	if (cstate->raw && cstate->oids)
! 		ereport(ERROR,
! 				(errcode(ERRCODE_SYNTAX_ERROR),
! 				 errmsg("cannot specify OIDS in RAW mode")));
  
  	/* Set defaults for omitted options */
  	if (!cstate->delim)
*************** BeginCopy(bool is_from,
*** 1608,1613 ****
--- 1671,1690 ----
  		}
  	}
  
+ 	/*
+ 	 * Initializaze the field "row_processed" for one row output in RAW mode,
+ 	 * and ensure only one output column.
+ 	 */
+ 	if (cstate->raw)
+ 	{
+ 		cstate->row_processed = false;
+ 
+ 		if (num_phys_attrs > 1)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 					 errmsg("only single column result is allowed in RAW mode")));
+ 	}
+ 
  	/* Use client encoding when ENCODING option is not specified. */
  	if (cstate->file_encoding < 0)
  		cstate->file_encoding = pg_get_client_encoding();
*************** CopyTo(CopyState cstate)
*** 1876,1882 ****
  		Oid			out_func_oid;
  		bool		isvarlena;
  
! 		if (cstate->binary)
  			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
  									&out_func_oid,
  									&isvarlena);
--- 1953,1959 ----
  		Oid			out_func_oid;
  		bool		isvarlena;
  
! 		if (cstate->binary || cstate->raw)
  			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
  									&out_func_oid,
  									&isvarlena);
*************** CopyTo(CopyState cstate)
*** 1915,1921 ****
  		tmp = 0;
  		CopySendInt32(cstate, tmp);
  	}
! 	else
  	{
  		/*
  		 * For non-binary copy, we need to convert null_print to file
--- 1992,1998 ----
  		tmp = 0;
  		CopySendInt32(cstate, tmp);
  	}
! 	else if (!cstate->raw)
  	{
  		/*
  		 * For non-binary copy, we need to convert null_print to file
*************** CopyTo(CopyState cstate)
*** 1983,1989 ****
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
--- 2060,2066 ----
  	else
  	{
  		/* run the plan --- the dest receiver will send tuples */
! 		ExecutorRun(cstate->queryDesc, ForwardScanDirection, cstate->raw ? 2L : 0L);
  		processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
  	}
  
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2027,2032 ****
--- 2104,2117 ----
  			CopySendInt32(cstate, tupleOid);
  		}
  	}
+ 	else if (cstate->raw)
+ 	{
+ 		if (cstate->row_processed)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_TOO_MANY_ROWS),
+ 					 errmsg("only single row result is allowed in RAW mode")));
+ 		cstate->row_processed = true;
+ 	}
  	else
  	{
  		/* Text format has no per-tuple header, but send OID if wanted */
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2046,2052 ****
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!cstate->binary)
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
--- 2131,2137 ----
  		Datum		value = values[attnum - 1];
  		bool		isnull = nulls[attnum - 1];
  
! 		if (!cstate->binary && !cstate->raw)
  		{
  			if (need_delim)
  				CopySendChar(cstate, cstate->delim[0]);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2055,2068 ****
  
  		if (isnull)
  		{
! 			if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (!cstate->binary)
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
--- 2140,2171 ----
  
  		if (isnull)
  		{
! 			if (cstate->raw)
! 					ereport(ERROR,
! 						(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
! 						  errmsg("cannot to copy NULL value in RAW mode.")));
! 			else if (!cstate->binary)
  				CopySendString(cstate, cstate->null_print_client);
  			else
  				CopySendInt32(cstate, -1);
  		}
  		else
  		{
! 			if (cstate->binary || cstate->raw)
! 			{
! 				bytea	   *outputbytes;
! 
! 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
! 											   value);
! 
! 				/* send the size only in binary mode */
! 				if (cstate->binary)
! 					CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
! 
! 				CopySendData(cstate, VARDATA(outputbytes),
! 							 VARSIZE(outputbytes) - VARHDRSZ);
! 			}
! 			else
  			{
  				string = OutputFunctionCall(&out_functions[attnum - 1],
  											value);
*************** CopyOneRowTo(CopyState cstate, Oid tuple
*** 2073,2088 ****
  				else
  					CopyAttributeOutText(cstate, string);
  			}
- 			else
- 			{
- 				bytea	   *outputbytes;
- 
- 				outputbytes = SendFunctionCall(&out_functions[attnum - 1],
- 											   value);
- 				CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
- 				CopySendData(cstate, VARDATA(outputbytes),
- 							 VARSIZE(outputbytes) - VARHDRSZ);
- 			}
  		}
  	}
  
--- 2176,2181 ----
*************** BeginCopyFrom(Relation rel,
*** 2712,2718 ****
  			continue;
  
  		/* Fetch the input function and typioparam info */
! 		if (cstate->binary)
  			getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
  								   &in_func_oid, &typioparams[attnum - 1]);
  		else
--- 2805,2811 ----
  			continue;
  
  		/* Fetch the input function and typioparam info */
! 		if (cstate->binary || cstate->raw)
  			getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
  								   &in_func_oid, &typioparams[attnum - 1]);
  		else
*************** BeginCopyFrom(Relation rel,
*** 2811,2817 ****
  		}
  	}
  
! 	if (!cstate->binary)
  	{
  		/* must rely on user to tell us... */
  		cstate->file_has_oids = cstate->oids;
--- 2904,2910 ----
  		}
  	}
  
! 	if (!cstate->binary || cstate->raw)
  	{
  		/* must rely on user to tell us... */
  		cstate->file_has_oids = cstate->oids;
*************** BeginCopyFrom(Relation rel,
*** 2863,2869 ****
  	}
  
  	/* create workspace for CopyReadAttributes results */
! 	if (!cstate->binary)
  	{
  		AttrNumber	attr_count = list_length(cstate->attnumlist);
  		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
--- 2956,2962 ----
  	}
  
  	/* create workspace for CopyReadAttributes results */
! 	if (!cstate->binary && !cstate->raw)
  	{
  		AttrNumber	attr_count = list_length(cstate->attnumlist);
  		int			nfields = cstate->file_has_oids ? (attr_count + 1) : attr_count;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 2968,2975 ****
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (!cstate->binary)
  	{
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
--- 3061,3180 ----
  	MemSet(values, 0, num_phys_attrs * sizeof(Datum));
  	MemSet(nulls, true, num_phys_attrs * sizeof(bool));
  
! 	if (cstate->binary)
  	{
+ 		int16		fld_count;
+ 		ListCell   *cur;
+ 
+ 		cstate->cur_lineno++;
+ 
+ 		if (!CopyGetInt16(cstate, &fld_count))
+ 		{
+ 			/* EOF detected (end of file, or protocol-level EOF) */
+ 			return false;
+ 		}
+ 
+ 		if (fld_count == -1)
+ 		{
+ 			/*
+ 			 * Received EOF marker.  In a V3-protocol copy, wait for the
+ 			 * protocol-level EOF, and complain if it doesn't come
+ 			 * immediately.  This ensures that we correctly handle CopyFail,
+ 			 * if client chooses to send that now.
+ 			 *
+ 			 * Note that we MUST NOT try to read more data in an old-protocol
+ 			 * copy, since there is no protocol-level EOF marker then.  We
+ 			 * could go either way for copy from file, but choose to throw
+ 			 * error if there's data after the EOF marker, for consistency
+ 			 * with the new-protocol case.
+ 			 */
+ 			char		dummy;
+ 
+ 			if (cstate->copy_dest != COPY_OLD_FE &&
+ 				CopyGetData(cstate, &dummy, 1, 1) > 0)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 						 errmsg("received copy data after EOF marker")));
+ 			return false;
+ 		}
+ 
+ 		if (fld_count != attr_count)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 					 errmsg("row field count is %d, expected %d",
+ 							(int) fld_count, attr_count)));
+ 
+ 		if (file_has_oids)
+ 		{
+ 			Oid			loaded_oid;
+ 
+ 			cstate->cur_attname = "oid";
+ 			loaded_oid =
+ 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
+ 														 0,
+ 													&cstate->oid_in_function,
+ 													  cstate->oid_typioparam,
+ 														 -1,
+ 														 &isnull));
+ 			if (isnull || loaded_oid == InvalidOid)
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ 						 errmsg("invalid OID in COPY data")));
+ 			cstate->cur_attname = NULL;
+ 			if (cstate->oids && tupleOid != NULL)
+ 				*tupleOid = loaded_oid;
+ 		}
+ 
+ 		i = 0;
+ 		foreach(cur, cstate->attnumlist)
+ 		{
+ 			int			attnum = lfirst_int(cur);
+ 			int			m = attnum - 1;
+ 
+ 			cstate->cur_attname = NameStr(attr[m]->attname);
+ 			i++;
+ 			values[m] = CopyReadBinaryAttribute(cstate,
+ 												i,
+ 												&in_functions[m],
+ 												typioparams[m],
+ 												attr[m]->atttypmod,
+ 												&nulls[m]);
+ 			cstate->cur_attname = NULL;
+ 		}
+ 	}
+ 	else if (cstate->raw)
+ 	{
+ 		if (cstate->row_processed)
+ 			return false;
+ 
+ 		CopyLoadallRawBuf(cstate);
+ 		cstate->cur_attname = NameStr(attr[0]->attname);
+ 
+ 		if (cstate->attribute_buf.data != NULL)
+ 			pfree(cstate->attribute_buf.data);
+ 
+ 		cstate->attribute_buf.data = cstate->raw_buf;
+ 		cstate->attribute_buf.len = cstate->raw_buf_len;
+ 		cstate->attribute_buf.cursor = 0;
+ 
+ 		cstate->raw_buf = NULL;
+ 
+ 		/* Call the column type's binary input converter */
+ 		values[0] = ReceiveFunctionCall(&in_functions[0], &cstate->attribute_buf,
+ 								 typioparams[0], attr[0]->atttypmod);
+ 		nulls[0] = false;
+ 
+ 		/* Trouble if it didn't eat the whole buffer */
+ 		if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ 					 errmsg("incorrect binary data format")));
+ 
+ 		cstate->row_processed = true;
+ 	}
+ 	else
+ 	{
+ 		/* text */
  		char	  **field_strings;
  		ListCell   *cur;
  		int			fldct;
*************** NextCopyFrom(CopyState cstate, ExprConte
*** 3074,3161 ****
  
  		Assert(fieldno == nfields);
  	}
- 	else
- 	{
- 		/* binary */
- 		int16		fld_count;
- 		ListCell   *cur;
- 
- 		cstate->cur_lineno++;
- 
- 		if (!CopyGetInt16(cstate, &fld_count))
- 		{
- 			/* EOF detected (end of file, or protocol-level EOF) */
- 			return false;
- 		}
- 
- 		if (fld_count == -1)
- 		{
- 			/*
- 			 * Received EOF marker.  In a V3-protocol copy, wait for the
- 			 * protocol-level EOF, and complain if it doesn't come
- 			 * immediately.  This ensures that we correctly handle CopyFail,
- 			 * if client chooses to send that now.
- 			 *
- 			 * Note that we MUST NOT try to read more data in an old-protocol
- 			 * copy, since there is no protocol-level EOF marker then.  We
- 			 * could go either way for copy from file, but choose to throw
- 			 * error if there's data after the EOF marker, for consistency
- 			 * with the new-protocol case.
- 			 */
- 			char		dummy;
- 
- 			if (cstate->copy_dest != COPY_OLD_FE &&
- 				CopyGetData(cstate, &dummy, 1, 1) > 0)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("received copy data after EOF marker")));
- 			return false;
- 		}
- 
- 		if (fld_count != attr_count)
- 			ereport(ERROR,
- 					(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 					 errmsg("row field count is %d, expected %d",
- 							(int) fld_count, attr_count)));
- 
- 		if (file_has_oids)
- 		{
- 			Oid			loaded_oid;
- 
- 			cstate->cur_attname = "oid";
- 			loaded_oid =
- 				DatumGetObjectId(CopyReadBinaryAttribute(cstate,
- 														 0,
- 													&cstate->oid_in_function,
- 													  cstate->oid_typioparam,
- 														 -1,
- 														 &isnull));
- 			if (isnull || loaded_oid == InvalidOid)
- 				ereport(ERROR,
- 						(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- 						 errmsg("invalid OID in COPY data")));
- 			cstate->cur_attname = NULL;
- 			if (cstate->oids && tupleOid != NULL)
- 				*tupleOid = loaded_oid;
- 		}
- 
- 		i = 0;
- 		foreach(cur, cstate->attnumlist)
- 		{
- 			int			attnum = lfirst_int(cur);
- 			int			m = attnum - 1;
- 
- 			cstate->cur_attname = NameStr(attr[m]->attname);
- 			i++;
- 			values[m] = CopyReadBinaryAttribute(cstate,
- 												i,
- 												&in_functions[m],
- 												typioparams[m],
- 												attr[m]->atttypmod,
- 												&nulls[m]);
- 			cstate->cur_attname = NULL;
- 		}
- 	}
  
  	/*
  	 * Now compute and insert any defaults available for the columns not
--- 3279,3284 ----
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
new file mode 100644
index 5f6260a..90d2aa5
*** a/src/test/regress/expected/copy2.out
--- b/src/test/regress/expected/copy2.out
*************** DROP FUNCTION truncate_in_subxact();
*** 466,468 ****
--- 466,481 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+ AHOJ
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ AHOJ
+ DROP TABLE x;
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
new file mode 100644
index cb13606..d351b43
*** a/src/test/regress/input/copy.source
--- b/src/test/regress/input/copy.source
*************** this is just a line full of junk that wo
*** 133,135 ****
--- 133,195 ----
  \.
  
  copy copytest3 to stdout csv header;
+ 
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ TRUNCATE x;
+ 
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+ COPY x TO stdout (FORMAT raw);
+ 
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ 
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ 
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw)
+ 
+ SELECT md5(a), length(a) FROM x;
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
new file mode 100644
index b7e372d..f555f58
*** a/src/test/regress/output/copy.source
--- b/src/test/regress/output/copy.source
*************** copy copytest3 to stdout csv header;
*** 95,97 ****
--- 95,183 ----
  c1,"col with , comma","col with "" quote"
  1,a,1
  2,b,2
+ -- copy raw
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ INSERT INTO x VALUES('\x41484f4a0a');
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ ERROR:  only single column result is allowed in RAW mode
+ COPY (SELECT a FROM x) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ ERROR:  only single row result is allowed in RAW mode
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO '@abs_builddir@/results/raw.data' (FORMAT raw);
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw);
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ \COPY x TO '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ TRUNCATE x;
+ \COPY x FROM '@abs_builddir@/results/raw2.data' (FORMAT raw)
+ SELECT length(a) FROM x;
+  length 
+ --------
+       5
+ (1 row)
+ 
+ COPY x TO stdout (FORMAT raw);
+ AHOJ
+ -- test big file
+ TRUNCATE x;
+ -- use different mechanism for load to bytea
+ \lo_import @abs_builddir@/data/hash.data
+ \set lo_oid :LASTOID
+ INSERT INTO x VALUES(lo_get(:lo_oid));
+ \lo_unlink :lo_oid
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (3 rows)
+ 
+ TRUNCATE x;
+ COPY x FROM '@abs_builddir@/data/hash.data' (FORMAT raw);
+ COPY x TO '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x TO '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- read again
+ COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw)
+ -- cross
+ COPY x FROM '@abs_builddir@/results/hash3.data' (FORMAT raw);
+ \COPY x FROM '@abs_builddir@/results/hash2.data' (FORMAT raw)
+ SELECT md5(a), length(a) FROM x;
+                md5                | length 
+ ----------------------------------+--------
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+  e446fe6ea5a347e69670633412c7f8cb | 153749
+ (5 rows)
+ 
+ DROP TABLE x;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
new file mode 100644
index 39a9deb..e5703a5
*** a/src/test/regress/sql/copy2.sql
--- b/src/test/regress/sql/copy2.sql
*************** DROP FUNCTION truncate_in_subxact();
*** 333,335 ****
--- 333,348 ----
  DROP TABLE x, y;
  DROP FUNCTION fn_x_before();
  DROP FUNCTION fn_x_after();
+ 
+ CREATE TABLE x(a bytea);
+ INSERT INTO x VALUES('\x41484f4a0a');
+ INSERT INTO x VALUES('\x41484f4a0a');
+ 
+ -- should to fail
+ COPY (SELECT a,a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ COPY (SELECT a FROM x) TO STDOUT (FORMAT raw);
+ 
+ -- should be ok
+ COPY (SELECT a FROM x LIMIT 1) TO STDOUT (FORMAT raw);
+ 
+ DROP TABLE x;

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] raw output from copy

Reply via email to