On 6/27/20 3:43 PM, Tom Lane wrote:
> Joe Conway <m...@joeconway.com> writes:
>> The attached patch fixes this for me. I think it ought to be backpatched 
>> through
>> pg11.
> 
>> Comments?
> 
> 1. Doesn't seem to be accounting for the possibility of an error in fread().
> 
> 2. Don't we want to remove the stat() call altogether, if we're not
> going to believe its length?
> 
> 3. This bit might need to cast the RHS to int64:
>       if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
> otherwise it might be treated as an unsigned comparison.
> Or you could check for bytes_to_read < 0 separately.
> 
> 4. appendStringInfoString seems like quite the wrong thing to use
> when the input is binary data.
> 
> 5. Don't like the comment.  Whether the file is virtual or not isn't
> very relevant here.
> 
> 6. If the file size exceeds 1GB, I fear we'll get some rather opaque
> failure from the stringinfo infrastructure.  It'd be better to
> check for that here and give a file-too-large error.


All good stuff -- I believe the attached checks all the boxes.

I noted while at this, that the current code can never hit this case:

!       if (bytes_to_read < 0)
!       {
!               if (seek_offset < 0)
!                       bytes_to_read = -seek_offset;

The intention here seems to be that if you pass bytes_to_read = -1 with a
negative offset, it will give you the last offset bytes of the file.

But all of the SQL exposed paths disallow an explicit negative value for
bytes_to_read. This was also not documented as far as I can tell so I eliminated
that case in the attached. Is that actually a case I should fix/support instead?

Separately, it seems to me that a two argument version of pg_read_file() would
be useful:

  pg_read_file('filename', offset)

In that case bytes_to_read = -1 could be passed down in order to read the entire
file after the offset. In fact I think that would nicely handle the negative
offset case as well.

Joe

-- 
Crunchy Data - http://crunchydata.com
PostgreSQL Support for Secure Enterprises
Consulting, Training, & Open Source Development
diff --git a/contrib/adminpack/expected/adminpack.out b/contrib/adminpack/expected/adminpack.out
index 5738b0f..edf3ebf 100644
*** a/contrib/adminpack/expected/adminpack.out
--- b/contrib/adminpack/expected/adminpack.out
*************** SELECT pg_file_rename('test_file1', 'tes
*** 79,85 ****
  (1 row)
  
  SELECT pg_read_file('test_file1');  -- not there
! ERROR:  could not stat file "test_file1": No such file or directory
  SELECT pg_read_file('test_file2');
   pg_read_file 
  --------------
--- 79,85 ----
  (1 row)
  
  SELECT pg_read_file('test_file1');  -- not there
! ERROR:  could not open file "test_file1" for reading: No such file or directory
  SELECT pg_read_file('test_file2');
   pg_read_file 
  --------------
*************** SELECT pg_file_rename('test_file2', 'tes
*** 108,114 ****
  (1 row)
  
  SELECT pg_read_file('test_file2');  -- not there
! ERROR:  could not stat file "test_file2": No such file or directory
  SELECT pg_read_file('test_file3');
   pg_read_file 
  --------------
--- 108,114 ----
  (1 row)
  
  SELECT pg_read_file('test_file2');  -- not there
! ERROR:  could not open file "test_file2" for reading: No such file or directory
  SELECT pg_read_file('test_file3');
   pg_read_file 
  --------------
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index ceaa618..6363dbe 100644
*** a/src/backend/utils/adt/genfile.c
--- b/src/backend/utils/adt/genfile.c
***************
*** 36,41 ****
--- 36,42 ----
  #include "utils/syscache.h"
  #include "utils/timestamp.h"
  
+ #define READBUF_SIZE	4096
  
  /*
   * Convert a "text" filename argument to C string, and check it's allowable.
*************** read_binary_file(const char *filename, i
*** 106,138 ****
  				 bool missing_ok)
  {
  	bytea	   *buf;
! 	size_t		nbytes;
  	FILE	   *file;
  
! 	if (bytes_to_read < 0)
! 	{
! 		if (seek_offset < 0)
! 			bytes_to_read = -seek_offset;
! 		else
! 		{
! 			struct stat fst;
! 
! 			if (stat(filename, &fst) < 0)
! 			{
! 				if (missing_ok && errno == ENOENT)
! 					return NULL;
! 				else
! 					ereport(ERROR,
! 							(errcode_for_file_access(),
! 							 errmsg("could not stat file \"%s\": %m", filename)));
! 			}
! 
! 			bytes_to_read = fst.st_size - seek_offset;
! 		}
! 	}
! 
! 	/* not sure why anyone thought that int64 length was a good idea */
! 	if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
  		ereport(ERROR,
  				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  				 errmsg("requested length too large")));
--- 107,117 ----
  				 bool missing_ok)
  {
  	bytea	   *buf;
! 	size_t		nbytes = 0;
  	FILE	   *file;
  
! 	/* clamp request size to what we can actually deliver */
! 	if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
  		ereport(ERROR,
  				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  				 errmsg("requested length too large")));
*************** read_binary_file(const char *filename, i
*** 154,162 ****
  				(errcode_for_file_access(),
  				 errmsg("could not seek in file \"%s\": %m", filename)));
  
! 	buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
  
! 	nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
  
  	if (ferror(file))
  		ereport(ERROR,
--- 133,169 ----
  				(errcode_for_file_access(),
  				 errmsg("could not seek in file \"%s\": %m", filename)));
  
! 	if (bytes_to_read > 0)
! 	{
! 		/* If passed explicit read size just do it */
! 		buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
  
! 		nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
! 	}
! 	else
! 	{
! 		/* Negative read size, read rest of file */
! 		StringInfoData	sbuf;
! 		size_t			rbytes = 0;
! 		char			rbuf[READBUF_SIZE];
! 
! 		initStringInfo(&sbuf);
! 		while (!(feof(file) || ferror(file)))
! 		{
! 			rbytes = fread(rbuf, 1, (size_t) READBUF_SIZE, file);
! 
! 			nbytes += rbytes;
! 			if (nbytes > (MaxAllocSize - VARHDRSZ))
! 				ereport(ERROR,
! 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
! 						 errmsg("requested length too large")));
! 
! 			appendBinaryStringInfo(&sbuf, rbuf, rbytes);
! 		}
! 
! 		buf = (bytea *) palloc((Size) nbytes + VARHDRSZ);
! 		memcpy(VARDATA(buf), sbuf.data, nbytes);
! 	}
  
  	if (ferror(file))
  		ereport(ERROR,

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to