Daniel Verite wrote: > And in enlargeStringInfo() the patch adds this: > /* > * Maximum size for strings with allow_long=true. > * It must not exceed INT_MAX, as the StringInfo routines > * expect offsets into the buffer to fit into an int. > */ > const int max_alloc_long = 0x7fffffff; > > On a 32-bit arch, we can expect max_alloc_long == MaxAllocHugeSize, > but on a 64-bit arch, it will be much smaller with MaxAllocHugeSize > at (2^63)-1.
Yeah, you're right. Here's a v4 of this patch, in which I've done some further very small adjustments to your v3: * I changed the 0x7fffffff hardcoded value with some arithmetic which sholud have the same result on most architectures: /* * Determine the upper size limit. The fact that the StringInfo API uses * "int" everywhere limits us to int's width even for "long_ok" strings. */ limit = str->long_ok ? (((Size) 1) << (Min(sizeof(int), sizeof(Size)) * 8 - 1)) - 1 : MaxAllocSize; Initially I just had "sizeof(int)" instead of the Min(), and a StaticAssert for sizeof(int) <= sizeof(Size), on the grounds that no actual platform is likely to break that (though I think the C standard does allow it). But I realized that doing it this way is simple enough; and furthermore, in any platforms where int is 8 bytes (ILP64), this would automatically allow for 63-bit-sized stringinfos. I don't think this exists today, but wikipedia[1] lists two obsolete ones, [2] and [3]. [1] https://en.wikipedia.org/wiki/64-bit_computing#64-bit_data_models [2] https://en.wikipedia.org/wiki/HAL_SPARC64 [3] https://en.wikipedia.org/wiki/UNICOS * I reverted the enlargement looping logic in enlargeStringInfo() to pretty much the original code (minus the cast). * I tweaked a few comments. The big advantage of your v3 patch is that it can be backpatched without fear of breaking ABI, so I've struggled to maintain that property in my changes. We can further tweak in HEAD-only; for example change the API to use Size instead of int. I think that would be desirable, but let's not do it until we have backpatched this one. -- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index e27ec78..631e555 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -741,7 +741,9 @@ heap_form_tuple(TupleDesc tupleDescriptor, * Allocate and zero the space needed. Note that the tuple body and * HeapTupleData management structure are allocated in one chunk. */ - tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + len); + tuple = MemoryContextAllocExtended(CurrentMemoryContext, + HEAPTUPLESIZE + len, + MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO); tuple->t_data = td = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); /* diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 3c81906..ec5d6f1 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -385,7 +385,7 @@ ReceiveCopyBegin(CopyState cstate) pq_sendint(&buf, format, 2); /* per-column formats */ pq_endmessage(&buf); cstate->copy_dest = COPY_NEW_FE; - cstate->fe_msgbuf = makeStringInfo(); + cstate->fe_msgbuf = makeLongStringInfo(); } else { @@ -1907,7 +1907,7 @@ CopyTo(CopyState cstate) cstate->null_print_client = cstate->null_print; /* default */ /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */ - cstate->fe_msgbuf = makeStringInfo(); + cstate->fe_msgbuf = makeLongStringInfo(); /* Get info about the columns we need to process. */ cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo)); @@ -2742,8 +2742,8 @@ BeginCopyFrom(ParseState *pstate, cstate->cur_attval = NULL; /* Set up variables to avoid per-attribute overhead. */ - initStringInfo(&cstate->attribute_buf); - initStringInfo(&cstate->line_buf); + initLongStringInfo(&cstate->attribute_buf); + initLongStringInfo(&cstate->line_buf); cstate->line_buf_converted = false; cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1); cstate->raw_buf_index = cstate->raw_buf_len = 0; diff --git a/src/backend/lib/stringinfo.c b/src/backend/lib/stringinfo.c index 7382e08..35f2eab 100644 --- a/src/backend/lib/stringinfo.c +++ b/src/backend/lib/stringinfo.c @@ -37,10 +37,28 @@ makeStringInfo(void) } /* + * makeLongStringInfo + * + * Same as makeStringInfo for larger strings. + */ +StringInfo +makeLongStringInfo(void) +{ + StringInfo res; + + res = (StringInfo) palloc(sizeof(StringInfoData)); + + initLongStringInfo(res); + + return res; +} + + +/* * initStringInfo * * Initialize a StringInfoData struct (with previously undefined contents) - * to describe an empty string. + * to describe an empty string; don't enable long strings yet. */ void initStringInfo(StringInfo str) @@ -49,10 +67,23 @@ initStringInfo(StringInfo str) str->data = (char *) palloc(size); str->maxlen = size; + str->long_ok = false; resetStringInfo(str); } /* + * initLongStringInfo + * + * As initStringInfo, plus enable long strings. + */ +void +initLongStringInfo(StringInfo str) +{ + initStringInfo(str); + str->long_ok = true; +} + +/* * resetStringInfo * * Reset the StringInfo: the data buffer remains valid, but its @@ -142,7 +173,7 @@ appendStringInfoVA(StringInfo str, const char *fmt, va_list args) /* * Return pvsnprintf's estimate of the space needed. (Although this is * given as a size_t, we know it will fit in int because it's not more - * than MaxAllocSize.) + * than either MaxAllocSize or half an int's width.) */ return (int) nprinted; } @@ -244,7 +275,16 @@ appendBinaryStringInfo(StringInfo str, const char *data, int datalen) void enlargeStringInfo(StringInfo str, int needed) { - int newlen; + Size newlen; + Size limit; + + /* + * Determine the upper size limit. The fact that the StringInfo API uses + * "int" everywhere limits us to int's width even for "long_ok" strings. + */ + limit = str->long_ok ? + (((Size) 1) << (Min(sizeof(int), sizeof(Size)) * 8 - 1)) - 1 : + MaxAllocSize; /* * Guard against out-of-range "needed" values. Without this, we can get @@ -252,7 +292,7 @@ enlargeStringInfo(StringInfo str, int needed) */ if (needed < 0) /* should not happen */ elog(ERROR, "invalid string enlargement request size: %d", needed); - if (((Size) needed) >= (MaxAllocSize - (Size) str->len)) + if (((Size) needed) >= (limit - (Size) str->len)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("out of memory"), @@ -261,7 +301,7 @@ enlargeStringInfo(StringInfo str, int needed) needed += str->len + 1; /* total space required now */ - /* Because of the above test, we now have needed <= MaxAllocSize */ + /* Because of the above test, we now have needed <= limit */ if (needed <= str->maxlen) return; /* got enough space already */ @@ -276,14 +316,14 @@ enlargeStringInfo(StringInfo str, int needed) newlen = 2 * newlen; /* - * Clamp to MaxAllocSize in case we went past it. Note we are assuming - * here that MaxAllocSize <= INT_MAX/2, else the above loop could - * overflow. We will still have newlen >= needed. + * Clamp to the limit in case we went past it. Note we are assuming here + * that limit <= INT_MAX/2, else the above loop could overflow. We will + * still have newlen >= needed. */ - if (newlen > (int) MaxAllocSize) - newlen = (int) MaxAllocSize; + if (newlen > limit) + newlen = limit; - str->data = (char *) repalloc(str->data, newlen); + str->data = (char *) repalloc_huge(str->data, (Size) newlen); str->maxlen = newlen; } diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h index f644067..f65b12e 100644 --- a/src/include/lib/stringinfo.h +++ b/src/include/lib/stringinfo.h @@ -30,6 +30,8 @@ * cursor is initialized to zero by makeStringInfo or initStringInfo, * but is not otherwise touched by the stringinfo.c routines. * Some routines use it to scan through a StringInfo. + * long_ok whether this StringInfo can allocate more than MaxAllocSize + * bytes (but still up to 2GB). *------------------------- */ typedef struct StringInfoData @@ -38,6 +40,7 @@ typedef struct StringInfoData int len; int maxlen; int cursor; + bool long_ok; } StringInfoData; typedef StringInfoData *StringInfo; @@ -67,9 +70,12 @@ typedef StringInfoData *StringInfo; /*------------------------ * makeStringInfo - * Create an empty 'StringInfoData' & return a pointer to it. + * makeLongStringInfo + * Create an empty 'StringInfoData' & return a pointer to it. The former + * allows up to 1 GB in size, per palloc(); the latter allows longer strings. */ extern StringInfo makeStringInfo(void); +extern StringInfo makeLongStringInfo(void); /*------------------------ * initStringInfo @@ -79,6 +85,12 @@ extern StringInfo makeStringInfo(void); extern void initStringInfo(StringInfo str); /*------------------------ + * initLongStringInfo + * Same as initStringInfo but for larger strings (up to 2GB) + */ +extern void initLongStringInfo(StringInfo str); + +/*------------------------ * resetStringInfo * Clears the current content of the StringInfo, if any. The * StringInfo remains valid.
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers