Daniel Verite wrote:

> And in enlargeStringInfo() the patch adds this:
>       /*
>        * Maximum size for strings with allow_long=true.
>        * It must not exceed INT_MAX, as the StringInfo routines
>        * expect offsets into the buffer to fit into an int.
>        */
>       const int max_alloc_long = 0x7fffffff;
> 
> On a 32-bit arch, we can expect max_alloc_long == MaxAllocHugeSize,
> but on a 64-bit arch, it will be much smaller with MaxAllocHugeSize
> at (2^63)-1.

Yeah, you're right.  Here's a v4 of this patch, in which I've done some
further very small adjustments to your v3:

* I changed the 0x7fffffff hardcoded value with some arithmetic which
sholud have the same result on most architectures:

        /*
         * Determine the upper size limit.  The fact that the StringInfo API 
uses
         * "int" everywhere limits us to int's width even for "long_ok" strings.
         */
        limit = str->long_ok ?
                (((Size) 1) << (Min(sizeof(int), sizeof(Size)) * 8 - 1)) - 1 :
                MaxAllocSize;

Initially I just had "sizeof(int)" instead of the Min(), and a
StaticAssert for sizeof(int) <= sizeof(Size), on the grounds that no
actual platform is likely to break that (though I think the C standard
does allow it).  But I realized that doing it this way is simple enough;
and furthermore, in any platforms where int is 8 bytes (ILP64), this
would automatically allow for 63-bit-sized stringinfos.  I don't think
this exists today, but wikipedia[1] lists two obsolete ones, [2] and [3].

[1] https://en.wikipedia.org/wiki/64-bit_computing#64-bit_data_models
[2] https://en.wikipedia.org/wiki/HAL_SPARC64
[3] https://en.wikipedia.org/wiki/UNICOS

* I reverted the enlargement looping logic in enlargeStringInfo() to
pretty much the original code (minus the cast).

* I tweaked a few comments.

The big advantage of your v3 patch is that it can be backpatched without
fear of breaking ABI, so I've struggled to maintain that property in my
changes.  We can further tweak in HEAD-only; for example change the API
to use Size instead of int.  I think that would be desirable, but let's
not do it until we have backpatched this one.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/access/common/heaptuple.c 
b/src/backend/access/common/heaptuple.c
index e27ec78..631e555 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -741,7 +741,9 @@ heap_form_tuple(TupleDesc tupleDescriptor,
         * Allocate and zero the space needed.  Note that the tuple body and
         * HeapTupleData management structure are allocated in one chunk.
         */
-       tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + len);
+       tuple = MemoryContextAllocExtended(CurrentMemoryContext,
+                                                                          
HEAPTUPLESIZE + len,
+                                                                          
MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
        tuple->t_data = td = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
 
        /*
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3c81906..ec5d6f1 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -385,7 +385,7 @@ ReceiveCopyBegin(CopyState cstate)
                        pq_sendint(&buf, format, 2);            /* per-column 
formats */
                pq_endmessage(&buf);
                cstate->copy_dest = COPY_NEW_FE;
-               cstate->fe_msgbuf = makeStringInfo();
+               cstate->fe_msgbuf = makeLongStringInfo();
        }
        else
        {
@@ -1907,7 +1907,7 @@ CopyTo(CopyState cstate)
        cstate->null_print_client = cstate->null_print;         /* default */
 
        /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
-       cstate->fe_msgbuf = makeStringInfo();
+       cstate->fe_msgbuf = makeLongStringInfo();
 
        /* Get info about the columns we need to process. */
        cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * 
sizeof(FmgrInfo));
@@ -2742,8 +2742,8 @@ BeginCopyFrom(ParseState *pstate,
        cstate->cur_attval = NULL;
 
        /* Set up variables to avoid per-attribute overhead. */
-       initStringInfo(&cstate->attribute_buf);
-       initStringInfo(&cstate->line_buf);
+       initLongStringInfo(&cstate->attribute_buf);
+       initLongStringInfo(&cstate->line_buf);
        cstate->line_buf_converted = false;
        cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
        cstate->raw_buf_index = cstate->raw_buf_len = 0;
diff --git a/src/backend/lib/stringinfo.c b/src/backend/lib/stringinfo.c
index 7382e08..35f2eab 100644
--- a/src/backend/lib/stringinfo.c
+++ b/src/backend/lib/stringinfo.c
@@ -37,10 +37,28 @@ makeStringInfo(void)
 }
 
 /*
+ * makeLongStringInfo
+ *
+ * Same as makeStringInfo for larger strings.
+ */
+StringInfo
+makeLongStringInfo(void)
+{
+       StringInfo      res;
+
+       res = (StringInfo) palloc(sizeof(StringInfoData));
+
+       initLongStringInfo(res);
+
+       return res;
+}
+
+
+/*
  * initStringInfo
  *
  * Initialize a StringInfoData struct (with previously undefined contents)
- * to describe an empty string.
+ * to describe an empty string; don't enable long strings yet.
  */
 void
 initStringInfo(StringInfo str)
@@ -49,10 +67,23 @@ initStringInfo(StringInfo str)
 
        str->data = (char *) palloc(size);
        str->maxlen = size;
+       str->long_ok = false;
        resetStringInfo(str);
 }
 
 /*
+ * initLongStringInfo
+ *
+ * As initStringInfo, plus enable long strings.
+ */
+void
+initLongStringInfo(StringInfo str)
+{
+       initStringInfo(str);
+       str->long_ok = true;
+}
+
+/*
  * resetStringInfo
  *
  * Reset the StringInfo: the data buffer remains valid, but its
@@ -142,7 +173,7 @@ appendStringInfoVA(StringInfo str, const char *fmt, va_list 
args)
        /*
         * Return pvsnprintf's estimate of the space needed.  (Although this is
         * given as a size_t, we know it will fit in int because it's not more
-        * than MaxAllocSize.)
+        * than either MaxAllocSize or half an int's width.)
         */
        return (int) nprinted;
 }
@@ -244,7 +275,16 @@ appendBinaryStringInfo(StringInfo str, const char *data, 
int datalen)
 void
 enlargeStringInfo(StringInfo str, int needed)
 {
-       int                     newlen;
+       Size            newlen;
+       Size            limit;
+
+       /*
+        * Determine the upper size limit.  The fact that the StringInfo API 
uses
+        * "int" everywhere limits us to int's width even for "long_ok" strings.
+        */
+       limit = str->long_ok ?
+               (((Size) 1) << (Min(sizeof(int), sizeof(Size)) * 8 - 1)) - 1 :
+               MaxAllocSize;
 
        /*
         * Guard against out-of-range "needed" values.  Without this, we can get
@@ -252,7 +292,7 @@ enlargeStringInfo(StringInfo str, int needed)
         */
        if (needed < 0)                         /* should not happen */
                elog(ERROR, "invalid string enlargement request size: %d", 
needed);
-       if (((Size) needed) >= (MaxAllocSize - (Size) str->len))
+       if (((Size) needed) >= (limit - (Size) str->len))
                ereport(ERROR,
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                 errmsg("out of memory"),
@@ -261,7 +301,7 @@ enlargeStringInfo(StringInfo str, int needed)
 
        needed += str->len + 1;         /* total space required now */
 
-       /* Because of the above test, we now have needed <= MaxAllocSize */
+       /* Because of the above test, we now have needed <= limit */
 
        if (needed <= str->maxlen)
                return;                                 /* got enough space 
already */
@@ -276,14 +316,14 @@ enlargeStringInfo(StringInfo str, int needed)
                newlen = 2 * newlen;
 
        /*
-        * Clamp to MaxAllocSize in case we went past it.  Note we are assuming
-        * here that MaxAllocSize <= INT_MAX/2, else the above loop could
-        * overflow.  We will still have newlen >= needed.
+        * Clamp to the limit in case we went past it.  Note we are assuming 
here
+        * that limit <= INT_MAX/2, else the above loop could overflow.  We will
+        * still have newlen >= needed.
         */
-       if (newlen > (int) MaxAllocSize)
-               newlen = (int) MaxAllocSize;
+       if (newlen > limit)
+               newlen = limit;
 
-       str->data = (char *) repalloc(str->data, newlen);
+       str->data = (char *) repalloc_huge(str->data, (Size) newlen);
 
        str->maxlen = newlen;
 }
diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h
index f644067..f65b12e 100644
--- a/src/include/lib/stringinfo.h
+++ b/src/include/lib/stringinfo.h
@@ -30,6 +30,8 @@
  *             cursor  is initialized to zero by makeStringInfo or 
initStringInfo,
  *                             but is not otherwise touched by the 
stringinfo.c routines.
  *                             Some routines use it to scan through a 
StringInfo.
+ *             long_ok whether this StringInfo can allocate more than 
MaxAllocSize
+ *                             bytes (but still up to 2GB).
  *-------------------------
  */
 typedef struct StringInfoData
@@ -38,6 +40,7 @@ typedef struct StringInfoData
        int                     len;
        int                     maxlen;
        int                     cursor;
+       bool            long_ok;
 } StringInfoData;
 
 typedef StringInfoData *StringInfo;
@@ -67,9 +70,12 @@ typedef StringInfoData *StringInfo;
 
 /*------------------------
  * makeStringInfo
- * Create an empty 'StringInfoData' & return a pointer to it.
+ * makeLongStringInfo
+ * Create an empty 'StringInfoData' & return a pointer to it.  The former
+ * allows up to 1 GB in size, per palloc(); the latter allows longer strings.
  */
 extern StringInfo makeStringInfo(void);
+extern StringInfo makeLongStringInfo(void);
 
 /*------------------------
  * initStringInfo
@@ -79,6 +85,12 @@ extern StringInfo makeStringInfo(void);
 extern void initStringInfo(StringInfo str);
 
 /*------------------------
+ * initLongStringInfo
+ * Same as initStringInfo but for larger strings (up to 2GB)
+ */
+extern void initLongStringInfo(StringInfo str);
+
+/*------------------------
  * resetStringInfo
  * Clears the current content of the StringInfo, if any. The
  * StringInfo remains valid.
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to