I wrote: > The "offsets-and-lengths" patch seems like the approach we ought to > compare to my patch, but it looks pretty unfinished to me: AFAICS it > includes logic to understand offsets sprinkled into a mostly-lengths > array, but no logic that would actually *store* any such offsets, > which means it's going to act just like my patch for performance > purposes.
> In the interests of pushing this forward, I will work today on > trying to finish and review Heikki's offsets-and-lengths patch > so that we have something we can do performance testing on. > I doubt that the performance testing will tell us anything we > don't expect, but we should do it anyway. I've now done that, and attached is what I think would be a committable version. Having done this work, I no longer think that this approach is significantly messier code-wise than the all-lengths version, and it does have the merit of not degrading on very large objects/arrays. So at the moment I'm leaning to this solution not the all-lengths one. To get a sense of the compression effects of varying the stride distance, I repeated the compression measurements I'd done on 14 August with Pavel's geometry data (<24077.1408052...@sss.pgh.pa.us>). The upshot of that was min max avg external text representation 220 172685 880.3 JSON representation (compressed text) 224 78565 541.3 pg_column_size, JSONB HEAD repr. 225 82540 639.0 pg_column_size, all-lengths repr. 225 66794 531.1 Here's what I get with this patch and different stride distances: JB_OFFSET_STRIDE = 8 225 68551 559.7 JB_OFFSET_STRIDE = 16 225 67601 552.3 JB_OFFSET_STRIDE = 32 225 67120 547.4 JB_OFFSET_STRIDE = 64 225 66886 546.9 JB_OFFSET_STRIDE = 128 225 66879 546.9 JB_OFFSET_STRIDE = 256 225 66846 546.8 So at least for that test data, 32 seems like the sweet spot. We are giving up a couple percent of space in comparison to the all-lengths version, but this is probably an acceptable tradeoff for not degrading on very large arrays. I've not done any speed testing. regards, tom lane
diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c index 2fd87fc..9beebb3 100644 *** a/src/backend/utils/adt/jsonb.c --- b/src/backend/utils/adt/jsonb.c *************** jsonb_from_cstring(char *json, int len) *** 196,207 **** static size_t checkStringLen(size_t len) { ! if (len > JENTRY_POSMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string too long to represent as jsonb string"), errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.", ! JENTRY_POSMASK))); return len; } --- 196,207 ---- static size_t checkStringLen(size_t len) { ! if (len > JENTRY_OFFLENMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("string too long to represent as jsonb string"), errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.", ! JENTRY_OFFLENMASK))); return len; } diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c index 04f35bf..f157df3 100644 *** a/src/backend/utils/adt/jsonb_util.c --- b/src/backend/utils/adt/jsonb_util.c *************** *** 26,40 **** * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits * reserved for that in the JsonbContainer.header field. * ! * (the total size of an array's elements is also limited by JENTRY_POSMASK, ! * but we're not concerned about that here) */ #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK)) #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK)) ! static void fillJsonbValue(JEntry *array, int index, char *base_addr, JsonbValue *result); ! static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b); static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b); static Jsonb *convertToJsonb(JsonbValue *val); static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level); --- 26,41 ---- * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits * reserved for that in the JsonbContainer.header field. * ! * (The total size of an array's or object's elements is also limited by ! * JENTRY_OFFLENMASK, but we're not concerned about that here.) */ #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK)) #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK)) ! static void fillJsonbValue(JsonbContainer *container, int index, ! char *base_addr, uint32 offset, JsonbValue *result); ! static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b); static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b); static Jsonb *convertToJsonb(JsonbValue *val); static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level); *************** static void convertJsonbArray(StringInfo *** 42,48 **** static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level); static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal); ! static int reserveFromBuffer(StringInfo buffer, int len); static void appendToBuffer(StringInfo buffer, const char *data, int len); static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len); static short padBufferToInt(StringInfo buffer); --- 43,49 ---- static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level); static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal); ! static int reserveFromBuffer(StringInfo buffer, int len); static void appendToBuffer(StringInfo buffer, const char *data, int len); static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len); static short padBufferToInt(StringInfo buffer); *************** JsonbValueToJsonb(JsonbValue *val) *** 108,113 **** --- 109,166 ---- } /* + * Get the offset of the variable-length portion of a Jsonb node within + * the variable-length-data part of its container. The node is identified + * by index within the container's JEntry array. + */ + uint32 + getJsonbOffset(const JsonbContainer *jc, int index) + { + uint32 offset = 0; + int i; + + /* + * Start offset of this entry is equal to the end offset of the previous + * entry. Walk backwards to the most recent entry stored as an end + * offset, returning that offset plus any lengths in between. + */ + for (i = index - 1; i >= 0; i--) + { + offset += JBE_OFFLENFLD(jc->children[i]); + if (JBE_HAS_OFF(jc->children[i])) + break; + } + + return offset; + } + + /* + * Get the length of the variable-length portion of a Jsonb node. + * The node is identified by index within the container's JEntry array. + */ + uint32 + getJsonbLength(const JsonbContainer *jc, int index) + { + uint32 off; + uint32 len; + + /* + * If the length is stored directly in the JEntry, just return it. + * Otherwise, get the begin offset of the entry, and subtract that from + * the stored end+1 offset. + */ + if (JBE_HAS_OFF(jc->children[index])) + { + off = getJsonbOffset(jc, index); + len = JBE_OFFLENFLD(jc->children[index]) - off; + } + else + len = JBE_OFFLENFLD(jc->children[index]); + + return len; + } + + /* * BT comparator worker function. Returns an integer less than, equal to, or * greater than zero, indicating whether a is less than, equal to, or greater * than b. Consistent with the requirements for a B-Tree operator class *************** compareJsonbContainers(JsonbContainer *a *** 201,207 **** * * If the two values were of the same container type, then there'd * have been a chance to observe the variation in the number of ! * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're * either two heterogeneously-typed containers, or a container and * some scalar type. * --- 254,260 ---- * * If the two values were of the same container type, then there'd * have been a chance to observe the variation in the number of ! * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're * either two heterogeneously-typed containers, or a container and * some scalar type. * *************** findJsonbValueFromContainer(JsonbContain *** 272,295 **** { JEntry *children = container->children; int count = (container->header & JB_CMASK); ! JsonbValue *result = palloc(sizeof(JsonbValue)); Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0); if (flags & JB_FARRAY & container->header) { char *base_addr = (char *) (children + count); int i; for (i = 0; i < count; i++) { ! fillJsonbValue(children, i, base_addr, result); if (key->type == result->type) { if (equalsJsonbScalarValue(key, result)) return result; } } } else if (flags & JB_FOBJECT & container->header) --- 325,357 ---- { JEntry *children = container->children; int count = (container->header & JB_CMASK); ! JsonbValue *result; Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0); + /* Quick out without a palloc cycle if object/array is empty */ + if (count <= 0) + return NULL; + + result = palloc(sizeof(JsonbValue)); + if (flags & JB_FARRAY & container->header) { char *base_addr = (char *) (children + count); + uint32 offset = 0; int i; for (i = 0; i < count; i++) { ! fillJsonbValue(container, i, base_addr, offset, result); if (key->type == result->type) { if (equalsJsonbScalarValue(key, result)) return result; } + + JBE_ADVANCE_OFFSET(offset, children[i]); } } else if (flags & JB_FOBJECT & container->header) *************** findJsonbValueFromContainer(JsonbContain *** 297,332 **** /* Since this is an object, account for *Pairs* of Jentrys */ char *base_addr = (char *) (children + count * 2); uint32 stopLow = 0, ! stopMiddle; ! /* Object key past by caller must be a string */ Assert(key->type == jbvString); /* Binary search on object/pair keys *only* */ ! while (stopLow < count) { ! int index; int difference; JsonbValue candidate; ! /* ! * Note how we compensate for the fact that we're iterating ! * through pairs (not entries) throughout. ! */ ! stopMiddle = stopLow + (count - stopLow) / 2; ! ! index = stopMiddle * 2; candidate.type = jbvString; ! candidate.val.string.val = base_addr + JBE_OFF(children, index); ! candidate.val.string.len = JBE_LEN(children, index); difference = lengthCompareJsonbStringValue(&candidate, key); if (difference == 0) { ! /* Found our key, return value */ ! fillJsonbValue(children, index + 1, base_addr, result); return result; } --- 359,393 ---- /* Since this is an object, account for *Pairs* of Jentrys */ char *base_addr = (char *) (children + count * 2); uint32 stopLow = 0, ! stopHigh = count; ! /* Object key passed by caller must be a string */ Assert(key->type == jbvString); /* Binary search on object/pair keys *only* */ ! while (stopLow < stopHigh) { ! uint32 stopMiddle; int difference; JsonbValue candidate; ! stopMiddle = stopLow + (stopHigh - stopLow) / 2; candidate.type = jbvString; ! candidate.val.string.val = ! base_addr + getJsonbOffset(container, stopMiddle); ! candidate.val.string.len = getJsonbLength(container, stopMiddle); difference = lengthCompareJsonbStringValue(&candidate, key); if (difference == 0) { ! /* Found our key, return corresponding value */ ! int index = stopMiddle + count; ! ! fillJsonbValue(container, index, base_addr, ! getJsonbOffset(container, index), ! result); return result; } *************** findJsonbValueFromContainer(JsonbContain *** 335,341 **** if (difference < 0) stopLow = stopMiddle + 1; else ! count = stopMiddle; } } } --- 396,402 ---- if (difference < 0) stopLow = stopMiddle + 1; else ! stopHigh = stopMiddle; } } } *************** getIthJsonbValueFromContainer(JsonbConta *** 368,374 **** result = palloc(sizeof(JsonbValue)); ! fillJsonbValue(container->children, i, base_addr, result); return result; } --- 429,437 ---- result = palloc(sizeof(JsonbValue)); ! fillJsonbValue(container, i, base_addr, ! getJsonbOffset(container, i), ! result); return result; } *************** getIthJsonbValueFromContainer(JsonbConta *** 377,389 **** * A helper function to fill in a JsonbValue to represent an element of an * array, or a key or value of an object. * * A nested array or object will be returned as jbvBinary, ie. it won't be * expanded. */ static void ! fillJsonbValue(JEntry *children, int index, char *base_addr, JsonbValue *result) { ! JEntry entry = children[index]; if (JBE_ISNULL(entry)) { --- 440,459 ---- * A helper function to fill in a JsonbValue to represent an element of an * array, or a key or value of an object. * + * The node's JEntry is at container->children[index], and its variable-length + * data is at base_addr + offset. We make the caller determine the offset + * since in many cases the caller can amortize that work across multiple + * children. When it can't, it can just call getJsonbOffset(). + * * A nested array or object will be returned as jbvBinary, ie. it won't be * expanded. */ static void ! fillJsonbValue(JsonbContainer *container, int index, ! char *base_addr, uint32 offset, ! JsonbValue *result) { ! JEntry entry = container->children[index]; if (JBE_ISNULL(entry)) { *************** fillJsonbValue(JEntry *children, int ind *** 392,405 **** else if (JBE_ISSTRING(entry)) { result->type = jbvString; ! result->val.string.val = base_addr + JBE_OFF(children, index); ! result->val.string.len = JBE_LEN(children, index); Assert(result->val.string.len >= 0); } else if (JBE_ISNUMERIC(entry)) { result->type = jbvNumeric; ! result->val.numeric = (Numeric) (base_addr + INTALIGN(JBE_OFF(children, index))); } else if (JBE_ISBOOL_TRUE(entry)) { --- 462,475 ---- else if (JBE_ISSTRING(entry)) { result->type = jbvString; ! result->val.string.val = base_addr + offset; ! result->val.string.len = getJsonbLength(container, index); Assert(result->val.string.len >= 0); } else if (JBE_ISNUMERIC(entry)) { result->type = jbvNumeric; ! result->val.numeric = (Numeric) (base_addr + INTALIGN(offset)); } else if (JBE_ISBOOL_TRUE(entry)) { *************** fillJsonbValue(JEntry *children, int ind *** 415,422 **** { Assert(JBE_ISCONTAINER(entry)); result->type = jbvBinary; ! result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(JBE_OFF(children, index))); ! result->val.binary.len = JBE_LEN(children, index) - (INTALIGN(JBE_OFF(children, index)) - JBE_OFF(children, index)); } } --- 485,494 ---- { Assert(JBE_ISCONTAINER(entry)); result->type = jbvBinary; ! /* Remove alignment padding from data pointer and length */ ! result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset)); ! result->val.binary.len = getJsonbLength(container, index) - ! (INTALIGN(offset) - offset); } } *************** recurse: *** 668,680 **** * a full conversion */ val->val.array.rawScalar = (*it)->isScalar; ! (*it)->i = 0; /* Set state for next call */ (*it)->state = JBI_ARRAY_ELEM; return WJB_BEGIN_ARRAY; case JBI_ARRAY_ELEM: ! if ((*it)->i >= (*it)->nElems) { /* * All elements within array already processed. Report this --- 740,754 ---- * a full conversion */ val->val.array.rawScalar = (*it)->isScalar; ! (*it)->curIndex = 0; ! (*it)->curDataOffset = 0; ! (*it)->curValueOffset = 0; /* not actually used */ /* Set state for next call */ (*it)->state = JBI_ARRAY_ELEM; return WJB_BEGIN_ARRAY; case JBI_ARRAY_ELEM: ! if ((*it)->curIndex >= (*it)->nElems) { /* * All elements within array already processed. Report this *************** recurse: *** 686,692 **** return WJB_END_ARRAY; } ! fillJsonbValue((*it)->children, (*it)->i++, (*it)->dataProper, val); if (!IsAJsonbScalar(val) && !skipNested) { --- 760,772 ---- return WJB_END_ARRAY; } ! fillJsonbValue((*it)->container, (*it)->curIndex, ! (*it)->dataProper, (*it)->curDataOffset, ! val); ! ! JBE_ADVANCE_OFFSET((*it)->curDataOffset, ! (*it)->children[(*it)->curIndex]); ! (*it)->curIndex++; if (!IsAJsonbScalar(val) && !skipNested) { *************** recurse: *** 697,704 **** else { /* ! * Scalar item in array, or a container and caller didn't ! * want us to recurse into it. */ return WJB_ELEM; } --- 777,784 ---- else { /* ! * Scalar item in array, or a container and caller didn't want ! * us to recurse into it. */ return WJB_ELEM; } *************** recurse: *** 712,724 **** * v->val.object.pairs is not actually set, because we aren't * doing a full conversion */ ! (*it)->i = 0; /* Set state for next call */ (*it)->state = JBI_OBJECT_KEY; return WJB_BEGIN_OBJECT; case JBI_OBJECT_KEY: ! if ((*it)->i >= (*it)->nElems) { /* * All pairs within object already processed. Report this to --- 792,807 ---- * v->val.object.pairs is not actually set, because we aren't * doing a full conversion */ ! (*it)->curIndex = 0; ! (*it)->curDataOffset = 0; ! (*it)->curValueOffset = getJsonbOffset((*it)->container, ! (*it)->nElems); /* Set state for next call */ (*it)->state = JBI_OBJECT_KEY; return WJB_BEGIN_OBJECT; case JBI_OBJECT_KEY: ! if ((*it)->curIndex >= (*it)->nElems) { /* * All pairs within object already processed. Report this to *************** recurse: *** 732,738 **** else { /* Return key of a key/value pair. */ ! fillJsonbValue((*it)->children, (*it)->i * 2, (*it)->dataProper, val); if (val->type != jbvString) elog(ERROR, "unexpected jsonb type as object key"); --- 815,823 ---- else { /* Return key of a key/value pair. */ ! fillJsonbValue((*it)->container, (*it)->curIndex, ! (*it)->dataProper, (*it)->curDataOffset, ! val); if (val->type != jbvString) elog(ERROR, "unexpected jsonb type as object key"); *************** recurse: *** 745,752 **** /* Set state for next call */ (*it)->state = JBI_OBJECT_KEY; ! fillJsonbValue((*it)->children, ((*it)->i++) * 2 + 1, ! (*it)->dataProper, val); /* * Value may be a container, in which case we recurse with new, --- 830,844 ---- /* Set state for next call */ (*it)->state = JBI_OBJECT_KEY; ! fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems, ! (*it)->dataProper, (*it)->curValueOffset, ! val); ! ! JBE_ADVANCE_OFFSET((*it)->curDataOffset, ! (*it)->children[(*it)->curIndex]); ! JBE_ADVANCE_OFFSET((*it)->curValueOffset, ! (*it)->children[(*it)->curIndex + (*it)->nElems]); ! (*it)->curIndex++; /* * Value may be a container, in which case we recurse with new, *************** iteratorFromContainer(JsonbContainer *co *** 795,805 **** break; case JB_FOBJECT: - - /* - * Offset reflects that nElems indicates JsonbPairs in an object. - * Each key and each value contain Jentry metadata just the same. - */ it->dataProper = (char *) it->children + it->nElems * sizeof(JEntry) * 2; it->state = JBI_OBJECT_START; --- 887,892 ---- *************** reserveFromBuffer(StringInfo buffer, int *** 1209,1216 **** buffer->len += len; /* ! * Keep a trailing null in place, even though it's not useful for us; ! * it seems best to preserve the invariants of StringInfos. */ buffer->data[buffer->len] = '\0'; --- 1296,1303 ---- buffer->len += len; /* ! * Keep a trailing null in place, even though it's not useful for us; it ! * seems best to preserve the invariants of StringInfos. */ buffer->data[buffer->len] = '\0'; *************** convertToJsonb(JsonbValue *val) *** 1284,1291 **** /* * Note: the JEntry of the root is discarded. Therefore the root ! * JsonbContainer struct must contain enough information to tell what ! * kind of value it is. */ res = (Jsonb *) buffer.data; --- 1371,1378 ---- /* * Note: the JEntry of the root is discarded. Therefore the root ! * JsonbContainer struct must contain enough information to tell what kind ! * of value it is. */ res = (Jsonb *) buffer.data; *************** convertToJsonb(JsonbValue *val) *** 1298,1307 **** /* * Subroutine of convertJsonb: serialize a single JsonbValue into buffer. * ! * The JEntry header for this node is returned in *header. It is filled in ! * with the length of this value, but if it is stored in an array or an ! * object (which is always, except for the root node), it is the caller's ! * responsibility to adjust it with the offset within the container. * * If the value is an array or an object, this recurses. 'level' is only used * for debugging purposes. --- 1385,1394 ---- /* * Subroutine of convertJsonb: serialize a single JsonbValue into buffer. * ! * The JEntry header for this node is returned in *header. It is filled in ! * with the length of this value and appropriate type bits. If we wish to ! * store an end offset rather than a length, it is the caller's responsibility ! * to adjust for that. * * If the value is an array or an object, this recurses. 'level' is only used * for debugging purposes. *************** convertJsonbValue(StringInfo buffer, JEn *** 1315,1324 **** return; /* ! * A JsonbValue passed as val should never have a type of jbvBinary, ! * and neither should any of its sub-components. Those values will be ! * produced by convertJsonbArray and convertJsonbObject, the results of ! * which will not be passed back to this function as an argument. */ if (IsAJsonbScalar(val)) --- 1402,1411 ---- return; /* ! * A JsonbValue passed as val should never have a type of jbvBinary, and ! * neither should any of its sub-components. Those values will be produced ! * by convertJsonbArray and convertJsonbObject, the results of which will ! * not be passed back to this function as an argument. */ if (IsAJsonbScalar(val)) *************** convertJsonbValue(StringInfo buffer, JEn *** 1334,1457 **** static void convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) { ! int offset; ! int metaoffset; int i; int totallen; uint32 header; ! /* Initialize pointer into conversion buffer at this level */ ! offset = buffer->len; padBufferToInt(buffer); /* ! * Construct the header Jentry, stored in the beginning of the variable- ! * length payload. */ ! header = val->val.array.nElems | JB_FARRAY; if (val->val.array.rawScalar) { ! Assert(val->val.array.nElems == 1); Assert(level == 0); header |= JB_FSCALAR; } appendToBuffer(buffer, (char *) &header, sizeof(uint32)); ! /* reserve space for the JEntries of the elements. */ ! metaoffset = reserveFromBuffer(buffer, sizeof(JEntry) * val->val.array.nElems); totallen = 0; ! for (i = 0; i < val->val.array.nElems; i++) { JsonbValue *elem = &val->val.array.elems[i]; int len; JEntry meta; convertJsonbValue(buffer, &meta, elem, level + 1); ! len = meta & JENTRY_POSMASK; totallen += len; ! if (totallen > JENTRY_POSMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", ! JENTRY_POSMASK))); ! if (i > 0) ! meta = (meta & ~JENTRY_POSMASK) | totallen; ! copyToBuffer(buffer, metaoffset, (char *) &meta, sizeof(JEntry)); ! metaoffset += sizeof(JEntry); } ! totallen = buffer->len - offset; ! /* Initialize the header of this node, in the container's JEntry array */ *pheader = JENTRY_ISCONTAINER | totallen; } static void convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) { ! uint32 header; ! int offset; ! int metaoffset; int i; int totallen; ! /* Initialize pointer into conversion buffer at this level */ ! offset = buffer->len; padBufferToInt(buffer); ! /* Initialize header */ ! header = val->val.object.nPairs | JB_FOBJECT; appendToBuffer(buffer, (char *) &header, sizeof(uint32)); ! /* reserve space for the JEntries of the keys and values */ ! metaoffset = reserveFromBuffer(buffer, sizeof(JEntry) * val->val.object.nPairs * 2); totallen = 0; ! for (i = 0; i < val->val.object.nPairs; i++) { ! JsonbPair *pair = &val->val.object.pairs[i]; ! int len; ! JEntry meta; ! /* put key */ convertJsonbScalar(buffer, &meta, &pair->key); ! len = meta & JENTRY_POSMASK; totallen += len; ! if (totallen > JENTRY_POSMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", ! JENTRY_POSMASK))); ! if (i > 0) ! meta = (meta & ~JENTRY_POSMASK) | totallen; ! copyToBuffer(buffer, metaoffset, (char *) &meta, sizeof(JEntry)); ! metaoffset += sizeof(JEntry); ! convertJsonbValue(buffer, &meta, &pair->value, level); ! len = meta & JENTRY_POSMASK; totallen += len; ! if (totallen > JENTRY_POSMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", ! JENTRY_POSMASK))); ! meta = (meta & ~JENTRY_POSMASK) | totallen; ! copyToBuffer(buffer, metaoffset, (char *) &meta, sizeof(JEntry)); ! metaoffset += sizeof(JEntry); } ! totallen = buffer->len - offset; *pheader = JENTRY_ISCONTAINER | totallen; } --- 1421,1620 ---- static void convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) { ! int base_offset; ! int jentry_offset; int i; int totallen; uint32 header; + int nElems = val->val.array.nElems; ! /* Remember where in the buffer this array starts. */ ! base_offset = buffer->len; + /* Align to 4-byte boundary (any padding counts as part of my data) */ padBufferToInt(buffer); /* ! * Construct the header Jentry and store it in the beginning of the ! * variable-length payload. */ ! header = nElems | JB_FARRAY; if (val->val.array.rawScalar) { ! Assert(nElems == 1); Assert(level == 0); header |= JB_FSCALAR; } appendToBuffer(buffer, (char *) &header, sizeof(uint32)); ! ! /* Reserve space for the JEntries of the elements. */ ! jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems); totallen = 0; ! for (i = 0; i < nElems; i++) { JsonbValue *elem = &val->val.array.elems[i]; int len; JEntry meta; + /* + * Convert element, producing a JEntry and appending its + * variable-length data to buffer + */ convertJsonbValue(buffer, &meta, elem, level + 1); ! ! len = JBE_OFFLENFLD(meta); totallen += len; ! /* ! * Bail out if total variable-length data exceeds what will fit in a ! * JEntry length field. We check this in each iteration, not just ! * once at the end, to forestall possible integer overflow. ! */ ! if (totallen > JENTRY_OFFLENMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", ! JENTRY_OFFLENMASK))); ! /* ! * Convert each JB_OFFSET_STRIDE'th length to an offset. ! */ ! if ((i % JB_OFFSET_STRIDE) == 0) ! meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; ! ! copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); ! jentry_offset += sizeof(JEntry); } ! /* Total data size is everything we've appended to buffer */ ! totallen = buffer->len - base_offset; ! /* Check length again, since we didn't include the metadata above */ ! if (totallen > JENTRY_OFFLENMASK) ! ereport(ERROR, ! (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", ! JENTRY_OFFLENMASK))); ! ! /* Initialize the header of this node in the container's JEntry array */ *pheader = JENTRY_ISCONTAINER | totallen; } static void convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) { ! int base_offset; ! int jentry_offset; int i; int totallen; + uint32 header; + int nPairs = val->val.object.nPairs; ! /* Remember where in the buffer this object starts. */ ! base_offset = buffer->len; + /* Align to 4-byte boundary (any padding counts as part of my data) */ padBufferToInt(buffer); ! /* ! * Construct the header Jentry and store it in the beginning of the ! * variable-length payload. ! */ ! header = nPairs | JB_FOBJECT; appendToBuffer(buffer, (char *) &header, sizeof(uint32)); ! /* Reserve space for the JEntries of the keys and values. */ ! jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2); + /* + * Iterate over the keys, then over the values, since that is the ordering + * we want in the on-disk representation. + */ totallen = 0; ! for (i = 0; i < nPairs; i++) { ! JsonbPair *pair = &val->val.object.pairs[i]; ! int len; ! JEntry meta; ! /* ! * Convert key, producing a JEntry and appending its variable-length ! * data to buffer ! */ convertJsonbScalar(buffer, &meta, &pair->key); ! len = JBE_OFFLENFLD(meta); totallen += len; ! /* ! * Bail out if total variable-length data exceeds what will fit in a ! * JEntry length field. We check this in each iteration, not just ! * once at the end, to forestall possible integer overflow. ! */ ! if (totallen > JENTRY_OFFLENMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", ! JENTRY_OFFLENMASK))); ! /* ! * Convert each JB_OFFSET_STRIDE'th length to an offset. ! */ ! if ((i % JB_OFFSET_STRIDE) == 0) ! meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; ! copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); ! jentry_offset += sizeof(JEntry); ! } ! for (i = 0; i < nPairs; i++) ! { ! JsonbPair *pair = &val->val.object.pairs[i]; ! int len; ! JEntry meta; ! ! /* ! * Convert value, producing a JEntry and appending its variable-length ! * data to buffer ! */ ! convertJsonbValue(buffer, &meta, &pair->value, level + 1); ! ! len = JBE_OFFLENFLD(meta); totallen += len; ! /* ! * Bail out if total variable-length data exceeds what will fit in a ! * JEntry length field. We check this in each iteration, not just ! * once at the end, to forestall possible integer overflow. ! */ ! if (totallen > JENTRY_OFFLENMASK) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", ! JENTRY_OFFLENMASK))); ! /* ! * Convert each JB_OFFSET_STRIDE'th length to an offset. ! */ ! if (((i + nPairs) % JB_OFFSET_STRIDE) == 0) ! meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; ! ! copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); ! jentry_offset += sizeof(JEntry); } ! /* Total data size is everything we've appended to buffer */ ! totallen = buffer->len - base_offset; + /* Check length again, since we didn't include the metadata above */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", + JENTRY_OFFLENMASK))); + + /* Initialize the header of this node in the container's JEntry array */ *pheader = JENTRY_ISCONTAINER | totallen; } diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h index 91e3e14..b89e4cb 100644 *** a/src/include/utils/jsonb.h --- b/src/include/utils/jsonb.h *************** typedef struct JsonbValue JsonbValue; *** 83,91 **** * buffer is accessed, but they can also be deep copied and passed around. * * Jsonb is a tree structure. Each node in the tree consists of a JEntry ! * header, and a variable-length content. The JEntry header indicates what ! * kind of a node it is, e.g. a string or an array, and the offset and length ! * of its variable-length portion within the container. * * The JEntry and the content of a node are not stored physically together. * Instead, the container array or object has an array that holds the JEntrys --- 83,91 ---- * buffer is accessed, but they can also be deep copied and passed around. * * Jsonb is a tree structure. Each node in the tree consists of a JEntry ! * header and a variable-length content (possibly of zero size). The JEntry ! * header indicates what kind of a node it is, e.g. a string or an array, ! * and provides the length of its variable-length portion. * * The JEntry and the content of a node are not stored physically together. * Instead, the container array or object has an array that holds the JEntrys *************** typedef struct JsonbValue JsonbValue; *** 95,134 **** * hold its JEntry. Hence, no JEntry header is stored for the root node. It * is implicitly known that the root node must be an array or an object, * so we can get away without the type indicator as long as we can distinguish ! * the two. For that purpose, both an array and an object begins with a uint32 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked * scalar value needs to be stored as a Jsonb value, what we actually store is * an array with one element, with the flags in the array's header field set * to JB_FSCALAR | JB_FARRAY. * - * To encode the length and offset of the variable-length portion of each - * node in a compact way, the JEntry stores only the end offset within the - * variable-length portion of the container node. For the first JEntry in the - * container's JEntry array, that equals to the length of the node data. The - * begin offset and length of the rest of the entries can be calculated using - * the end offset of the previous JEntry in the array. - * * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct, * the variable-length portion of some node types is aligned to a 4-byte * boundary, while others are not. When alignment is needed, the padding is * in the beginning of the node that requires it. For example, if a numeric * node is stored after a string node, so that the numeric node begins at * offset 3, the variable-length portion of the numeric node will begin with ! * one padding byte. */ /* ! * Jentry format. * ! * The least significant 28 bits store the end offset of the entry (see ! * JBE_ENDPOS, JBE_OFF, JBE_LEN macros below). The next three bits ! * are used to store the type of the entry. The most significant bit ! * is unused, and should be set to zero. */ typedef uint32 JEntry; ! #define JENTRY_POSMASK 0x0FFFFFFF #define JENTRY_TYPEMASK 0x70000000 /* values stored in the type bits */ #define JENTRY_ISSTRING 0x00000000 --- 95,146 ---- * hold its JEntry. Hence, no JEntry header is stored for the root node. It * is implicitly known that the root node must be an array or an object, * so we can get away without the type indicator as long as we can distinguish ! * the two. For that purpose, both an array and an object begin with a uint32 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked * scalar value needs to be stored as a Jsonb value, what we actually store is * an array with one element, with the flags in the array's header field set * to JB_FSCALAR | JB_FARRAY. * * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct, * the variable-length portion of some node types is aligned to a 4-byte * boundary, while others are not. When alignment is needed, the padding is * in the beginning of the node that requires it. For example, if a numeric * node is stored after a string node, so that the numeric node begins at * offset 3, the variable-length portion of the numeric node will begin with ! * one padding byte so that the actual numeric data is 4-byte aligned. */ /* ! * JEntry format. * ! * The least significant 28 bits store either the data length of the entry, ! * or its end+1 offset from the start of the variable-length portion of the ! * containing object. The next three bits store the type of the entry, and ! * the high-order bit tells whether the least significant bits store a length ! * or an offset. ! * ! * The reason for the offset-or-length complication is to compromise between ! * access speed and data compressibility. In the initial design each JEntry ! * always stored an offset, but this resulted in JEntry arrays with horrible ! * compressibility properties, so that TOAST compression of a JSONB did not ! * work well. Storing only lengths would greatly improve compressibility, ! * but it makes random access into large arrays expensive (O(N) not O(1)). ! * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and ! * a length in the rest. This results in reasonably compressible data (as ! * long as the stride isn't too small). We may have to examine as many as ! * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any ! * given item, but that's still O(1) no matter how large the container is. ! * ! * We could avoid eating a flag bit for this purpose if we were to store ! * the stride in the container header, or if we were willing to treat the ! * stride as an unchangeable constant. Neither of those options is very ! * attractive though. */ typedef uint32 JEntry; ! #define JENTRY_OFFLENMASK 0x0FFFFFFF #define JENTRY_TYPEMASK 0x70000000 + #define JENTRY_HAS_OFF 0x80000000 /* values stored in the type bits */ #define JENTRY_ISSTRING 0x00000000 *************** typedef uint32 JEntry; *** 138,144 **** #define JENTRY_ISNULL 0x40000000 #define JENTRY_ISCONTAINER 0x50000000 /* array or object */ ! /* Note possible multiple evaluations */ #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING) #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC) #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER) --- 150,158 ---- #define JENTRY_ISNULL 0x40000000 #define JENTRY_ISCONTAINER 0x50000000 /* array or object */ ! /* Access macros. Note possible multiple evaluations */ ! #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK) ! #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0) #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING) #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC) #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER) *************** typedef uint32 JEntry; *** 147,166 **** #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE) #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_)) /* ! * Macros for getting the offset and length of an element. Note multiple ! * evaluations and access to prior array element. */ ! #define JBE_ENDPOS(je_) ((je_) & JENTRY_POSMASK) ! #define JBE_OFF(ja, i) ((i) == 0 ? 0 : JBE_ENDPOS((ja)[i - 1])) ! #define JBE_LEN(ja, i) ((i) == 0 ? JBE_ENDPOS((ja)[i]) \ ! : JBE_ENDPOS((ja)[i]) - JBE_ENDPOS((ja)[i - 1])) /* * A jsonb array or object node, within a Jsonb Datum. * ! * An array has one child for each element. An object has two children for ! * each key/value pair. */ typedef struct JsonbContainer { --- 161,194 ---- #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE) #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_)) + /* Macro for advancing an offset variable to the next JEntry */ + #define JBE_ADVANCE_OFFSET(offset, je) \ + do { \ + JEntry je_ = (je); \ + if (JBE_HAS_OFF(je_)) \ + (offset) = JBE_OFFLENFLD(je_); \ + else \ + (offset) += JBE_OFFLENFLD(je_); \ + } while(0) + /* ! * We store an offset, not a length, every JB_OFFSET_STRIDE children. ! * Caution: this macro should only be referenced when creating a JSONB ! * value. When examining an existing value, pay attention to the HAS_OFF ! * bits instead. This allows changes in the offset-placement heuristic ! * without breaking on-disk compatibility. */ ! #define JB_OFFSET_STRIDE 32 /* * A jsonb array or object node, within a Jsonb Datum. * ! * An array has one child for each element, stored in array order. ! * ! * An object has two children for each key/value pair. The keys all appear ! * first, in key sort order; then the values appear, in an order matching the ! * key order. This arrangement keeps the keys compact in memory, making a ! * search for a particular key more cache-friendly. */ typedef struct JsonbContainer { *************** typedef struct JsonbContainer *** 172,179 **** } JsonbContainer; /* flags for the header-field in JsonbContainer */ ! #define JB_CMASK 0x0FFFFFFF ! #define JB_FSCALAR 0x10000000 #define JB_FOBJECT 0x20000000 #define JB_FARRAY 0x40000000 --- 200,207 ---- } JsonbContainer; /* flags for the header-field in JsonbContainer */ ! #define JB_CMASK 0x0FFFFFFF /* mask for count field */ ! #define JB_FSCALAR 0x10000000 /* flag bits */ #define JB_FOBJECT 0x20000000 #define JB_FARRAY 0x40000000 *************** struct JsonbValue *** 248,265 **** (jsonbval)->type <= jbvBool) /* ! * Pair within an Object. * ! * Pairs with duplicate keys are de-duplicated. We store the order for the ! * benefit of doing so in a well-defined way with respect to the original ! * observed order (which is "last observed wins"). This is only used briefly ! * when originally constructing a Jsonb. */ struct JsonbPair { JsonbValue key; /* Must be a jbvString */ JsonbValue value; /* May be of any type */ ! uint32 order; /* preserves order of pairs with equal keys */ }; /* Conversion state used when parsing Jsonb from text, or for type coercion */ --- 276,295 ---- (jsonbval)->type <= jbvBool) /* ! * Key/value pair within an Object. * ! * This struct type is only used briefly while constructing a Jsonb; it is ! * *not* the on-disk representation. ! * ! * Pairs with duplicate keys are de-duplicated. We store the originally ! * observed pair ordering for the purpose of removing duplicates in a ! * well-defined way (which is "last observed wins"). */ struct JsonbPair { JsonbValue key; /* Must be a jbvString */ JsonbValue value; /* May be of any type */ ! uint32 order; /* Pair's index in original sequence */ }; /* Conversion state used when parsing Jsonb from text, or for type coercion */ *************** typedef struct JsonbIterator *** 287,306 **** { /* Container being iterated */ JsonbContainer *container; ! uint32 nElems; /* Number of elements in children array (will be ! * nPairs for objects) */ bool isScalar; /* Pseudo-array scalar value? */ ! JEntry *children; ! /* Current item in buffer (up to nElems, but must * 2 for objects) */ ! int i; /* ! * Data proper. This points just past end of children array. ! * We use the JBE_OFF() macro on the Jentrys to find offsets of each ! * child in this area. */ ! char *dataProper; /* Private state */ JsonbIterState state; --- 317,341 ---- { /* Container being iterated */ JsonbContainer *container; ! uint32 nElems; /* Number of elements in children array (will ! * be nPairs for objects) */ bool isScalar; /* Pseudo-array scalar value? */ ! JEntry *children; /* JEntrys for child nodes */ ! /* Data proper. This points to the beginning of the variable-length data */ ! char *dataProper; ! /* Current item in buffer (up to nElems) */ ! int curIndex; ! ! /* Data offset corresponding to current item */ ! uint32 curDataOffset; /* ! * If the container is an object, we want to return keys and values ! * alternately; so curDataOffset points to the current key, and ! * curValueOffset points to the current value. */ ! uint32 curValueOffset; /* Private state */ JsonbIterState state; *************** extern Datum gin_consistent_jsonb_path(P *** 344,349 **** --- 379,386 ---- extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS); /* Support functions */ + extern uint32 getJsonbOffset(const JsonbContainer *jc, int index); + extern uint32 getJsonbLength(const JsonbContainer *jc, int index); extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b); extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader, uint32 flags,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers