Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Mark Kirkwood wrote: I did a few cleanups on the last patch. Please examine this one instead. The changes are: 1. Add documentation for pg_datum_length builtin. 2. Correct some typos in the code comments. 3. Move the code in toastfuncs.c to varlena.c as it is probably the correct place. 4. Use ereport instead of elog. 5 Quiet compiler warning in pg_datum_length. I have modified your patch to simplify the logic, and renamed it to pg_column_size(), to be consistent with our soon-to-be-added pg_relation/tablespace/database functions from dbsize. Here is a sample usage: test= CREATE TABLE test (x INT, y TEXT); CREATE TABLE test= INSERT INTO test VALUES (4, repeat('x', 1)); INSERT 0 1 test= INSERT INTO test VALUES (4, repeat('x', 10)); INSERT 0 1 test= SELECT pg_column_size(x), pg_column_size(y) FROM test; pg_column_size | pg_column_size + 4 |121 4 | 1152 (2 rows) Interesting the 10-times larger column is 10-times larger in storage. Do we have some limit on how many repeated values we can record? Patch attached and applied. -- Bruce Momjian| http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 359-1001 + If your life is a hard drive, | 13 Roberts Road + Christ can be your backup.| Newtown Square, Pennsylvania 19073 Index: doc/src/sgml/func.sgml === RCS file: /cvsroot/pgsql/doc/src/sgml/func.sgml,v retrieving revision 1.262 diff -c -c -r1.262 func.sgml *** doc/src/sgml/func.sgml 29 Jun 2005 01:52:56 - 1.262 --- doc/src/sgml/func.sgml 6 Jul 2005 18:55:34 - *** *** 2187,2192 --- 2187,2200 /row row + entryliteralfunctionpg_column_size/function(parameterstring/parameter)/literal/entry +entrytypeinteger/type/entry +entryNumber of bytes required to store the value, which might be compressed/entry +entryliteralpg_column_size('jo\\000se'::bytea)/literal/entry +entryliteral5/literal/entry + /row + + row entryliteralfunctionposition/function(parametersubstring/parameter in parameterstring/parameter)/literal/entry entrytypeinteger/type/entry entryLocation of specified substring/entry Index: src/backend/access/heap/tuptoaster.c === RCS file: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v retrieving revision 1.49 diff -c -c -r1.49 tuptoaster.c *** src/backend/access/heap/tuptoaster.c21 Mar 2005 01:23:58 - 1.49 --- src/backend/access/heap/tuptoaster.c6 Jul 2005 18:55:35 - *** *** 1436,1438 --- 1436,1480 return result; } + + /* -- + * toast_datum_size + * + *Show the (possibly compressed) size of a datum + * -- + */ + Size + toast_datum_size(Datum value) + { + + varattrib *attr = (varattrib *) DatumGetPointer(value); + Sizeresult; + + if (VARATT_IS_EXTERNAL(attr)) + { + /* +* Attribute is stored externally - If it is compressed too, +* then we need to get the external datum and calculate its size, +* otherwise we just use the external rawsize. +*/ + if (VARATT_IS_COMPRESSED(attr)) + { + varattrib *attrext = toast_fetch_datum(attr); + result = VARSIZE(attrext); + pfree(attrext); + } + else + result = attr-va_content.va_external.va_rawsize; + } + else + { + /* +* Attribute is stored inline either compressed or not, just +* calculate the size of the datum in either case. +*/ + result = VARSIZE(attr); + } + + return result; + + } Index: src/backend/utils/adt/varlena.c === RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.124 diff -c -c -r1.124 varlena.c *** src/backend/utils/adt/varlena.c 4 Jul 2005 18:56:44 - 1.124 --- src/backend/utils/adt/varlena.c 6 Jul 2005 18:55:36 - *** *** 28,33 --- 28,34 #include utils/builtins.h #include utils/lsyscache.h #include utils/pg_locale.h + #include utils/syscache.h typedef struct varlena unknown; *** *** 2348,2350 --- 2349,2395 result_text = PG_STR_GET_TEXT(hexsum); PG_RETURN_TEXT_P(result_text); } + + /* + * Return the length of a datum, possibly compressed +
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Bruce Momjian wrote: + /* + * Return the length of a datum, possibly compressed + */ + Datum + pg_column_size(PG_FUNCTION_ARGS) + { + Datum value = PG_GETARG_DATUM(0); + int result; + + /* fn_extra stores the fixed column length, or -1 for varlena. */ + if (fcinfo-flinfo-fn_extra == NULL) /* first call? */ + { + /* On the first call lookup the datatype of the supplied argument */ [...] Is this optimization worth the code complexity? + tp = SearchSysCache(TYPEOID, + ObjectIdGetDatum(argtypeid), + 0, 0, 0); + if (!HeapTupleIsValid(tp)) + { + /* Oid not in pg_type, should never happen. */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), +errmsg(invalid typid: %u, argtypeid))); + } elog(ERROR) is usually used for can't happen errors; also, the usual error message in this scenario is cache lookup failed [...]. Perhaps better to use get_typlen() here, anyway. -Neil ---(end of broadcast)--- TIP 6: Have you searched our list archives? http://archives.postgresql.org
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Neil Conway wrote: Bruce Momjian wrote: + /* + * Return the length of a datum, possibly compressed + */ + Datum + pg_column_size(PG_FUNCTION_ARGS) + { + Datumvalue = PG_GETARG_DATUM(0); + intresult; + + /*fn_extra stores the fixed column length, or -1 for varlena. */ + if (fcinfo-flinfo-fn_extra == NULL)/* first call? */ + { + /* On the first call lookup the datatype of the supplied argument */ [...] Is this optimization worth the code complexity? I didn't performance test it, but the idea of hammering the catalogs for each value to be processed seemed a bad thing. + tp = SearchSysCache(TYPEOID, + ObjectIdGetDatum(argtypeid), + 0, 0, 0); + if (!HeapTupleIsValid(tp)) + { + /* Oid not in pg_type, should never happen. */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg(invalid typid: %u, argtypeid))); + } elog(ERROR) is usually used for can't happen errors; also, the usual error message in this scenario is cache lookup failed [...]. Perhaps better to use get_typlen() here, anyway. Ahem,..ah yes, get_typlen()! (ducks)- that is clearly much better! I have attached a little change to varlena.c that uses it. I left the ereport as it was, but am not fussed about it either way. BTW - Bruce, I like the changes, makes the beast more friendly to use! Best wishes Mark Index: src/backend/utils/adt/varlena.c === RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.125 diff -c -r1.125 varlena.c *** src/backend/utils/adt/varlena.c 6 Jul 2005 19:02:52 - 1.125 --- src/backend/utils/adt/varlena.c 7 Jul 2005 02:52:50 - *** *** 28,34 #include utils/builtins.h #include utils/lsyscache.h #include utils/pg_locale.h - #include utils/syscache.h typedef struct varlena unknown; --- 28,33 *** *** 2364,2385 { /* On the first call lookup the datatype of the supplied argument */ Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); ! HeapTuple tp; ! int typlen; ! tp = SearchSysCache(TYPEOID, ! ObjectIdGetDatum(argtypeid), ! 0, 0, 0); ! if (!HeapTupleIsValid(tp)) { /* Oid not in pg_type, should never happen. */ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), !errmsg(invalid typid: %u, argtypeid))); } ! ! typlen = ((Form_pg_type)GETSTRUCT(tp))-typlen; ! ReleaseSysCache(tp); fcinfo-flinfo-fn_extra = MemoryContextAlloc(fcinfo-flinfo-fn_mcxt, sizeof(int)); *(int *)fcinfo-flinfo-fn_extra = typlen; --- 2363,2379 { /* On the first call lookup the datatype of the supplied argument */ Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); ! int typlen= get_typlen(argtypeid); ! ! if (typlen == 0) { /* Oid not in pg_type, should never happen. */ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), !errmsg(cache lookup failed for type %u, argtypeid))); } ! fcinfo-flinfo-fn_extra = MemoryContextAlloc(fcinfo-flinfo-fn_mcxt, sizeof(int)); *(int *)fcinfo-flinfo-fn_extra = typlen; ---(end of broadcast)--- TIP 1: subscribe and unsubscribe commands go to [EMAIL PROTECTED]
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Mark Kirkwood wrote: I didn't performance test it, but the idea of hammering the catalogs for each value to be processed seemed a bad thing. Well, the syscache already sits in front of the catalogs themselves. I'd be curious to see what the performance difference actually is... -Neil ---(end of broadcast)--- TIP 4: Don't 'kill -9' the postmaster
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote: Neil Conway wrote: elog(ERROR) is usually used for can't happen errors; also, the usual error message in this scenario is cache lookup failed [...]. Perhaps better to use get_typlen() here, anyway. I have attached a little change to varlena.c that uses it. I left the ereport as it was, but am not fussed about it either way. I am, because it gives useless messages to the translators to work on. elog parameters are not marked for translation, ereport are (errmsg and friends, really). So please don't do that. -- Alvaro Herrera (alvherre[a]alvh.no-ip.org) ¿Cómo puedes confiar en algo que pagas y que no ves, y no confiar en algo que te dan y te lo muestran? (Germán Poo) ---(end of broadcast)--- TIP 9: In versions below 8.0, the planner will ignore your desire to choose an index scan if your joining column's datatypes do not match
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Alvaro Herrera wrote: On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote: Neil Conway wrote: elog(ERROR) is usually used for can't happen errors. I have attached a little change to varlena.c that uses it. I left the ereport as it was, but am not fussed about it either way. I am, because it gives useless messages to the translators to work on. elog parameters are not marked for translation, ereport are (errmsg and friends, really). So please don't do that. Ok, didn't realize the difference! Revised patch attached that uses elog. Neil, I will runs some tests to see if there is any performance saving with the first-call-only business. Cheers Mark Index: src/backend/utils/adt/varlena.c === RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.125 diff -c -r1.125 varlena.c *** src/backend/utils/adt/varlena.c 6 Jul 2005 19:02:52 - 1.125 --- src/backend/utils/adt/varlena.c 7 Jul 2005 03:40:44 - *** *** 28,34 #include utils/builtins.h #include utils/lsyscache.h #include utils/pg_locale.h - #include utils/syscache.h typedef struct varlena unknown; --- 28,33 *** *** 2364,2385 { /* On the first call lookup the datatype of the supplied argument */ Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); ! HeapTuple tp; ! int typlen; ! tp = SearchSysCache(TYPEOID, ! ObjectIdGetDatum(argtypeid), ! 0, 0, 0); ! if (!HeapTupleIsValid(tp)) { /* Oid not in pg_type, should never happen. */ ! ereport(ERROR, ! (errcode(ERRCODE_INTERNAL_ERROR), !errmsg(invalid typid: %u, argtypeid))); } ! ! typlen = ((Form_pg_type)GETSTRUCT(tp))-typlen; ! ReleaseSysCache(tp); fcinfo-flinfo-fn_extra = MemoryContextAlloc(fcinfo-flinfo-fn_mcxt, sizeof(int)); *(int *)fcinfo-flinfo-fn_extra = typlen; --- 2363,2377 { /* On the first call lookup the datatype of the supplied argument */ Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); ! int typlen= get_typlen(argtypeid); ! ! if (typlen == 0) { /* Oid not in pg_type, should never happen. */ ! elog(ERROR, cache lookup failed for type %u, argtypeid); } ! fcinfo-flinfo-fn_extra = MemoryContextAlloc(fcinfo-flinfo-fn_mcxt, sizeof(int)); *(int *)fcinfo-flinfo-fn_extra = typlen; ---(end of broadcast)--- TIP 5: Have you checked our extensive FAQ? http://www.postgresql.org/docs/faq
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Alvaro Herrera [EMAIL PROTECTED] writes: On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote: I have attached a little change to varlena.c that uses it. I left the ereport as it was, but am not fussed about it either way. I am, because it gives useless messages to the translators to work on. Exactly. I had already made a private note to fix that patch --- inventing your own random wording and punctuation for an extremely standard error message is simply Not Acceptable. regards, tom lane ---(end of broadcast)--- TIP 2: you can get off all lists at once with the unregister command (send unregister YourEmailAddressHere to [EMAIL PROTECTED])
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Neil Conway wrote: Mark Kirkwood wrote: I didn't performance test it, but the idea of hammering the catalogs for each value to be processed seemed a bad thing. Well, the syscache already sits in front of the catalogs themselves. I'd be curious to see what the performance difference actually is... I did some tests with a two tables, one small and one large, I am seeing a consistent difference favoring the first-call-only type checking: Table public.dim1 Column | Type | Modifiers ++--- d1key | integer| not null dat| character varying(100) | not null dattyp | character varying(20) | not null dfill | character varying(100) | not null INFO: dim1: scanned 24 of 24 pages, containing 1001 live rows and 0 dead rows; 1001 rows in sample, 1001 estimated total rows SELECT max(pg_column_size(dfill)) FROM dim1 Run 1st call only check? elapsed(ms) 1y5.5 2y3.1 3n11.3 4n4.1 Now try a bigger table: Table public.fact0 Column | Type | Modifiers ++--- d0key | integer| not null d1key | integer| not null d2key | integer| not null fval | integer| not null ffill | character varying(100) | not null INFO: fact0: scanned 3000 of 18182 pages, containing 165000 live rows and 0 dead rows; 3000 rows in sample, 110 estimated total rows SELECT max(pg_column_size(ffill)) FROM fact0 Run 1st call only check? elapsed(ms) 1y2497 2y2484 3y2496 4y2480 5n3572 6n3570 7n3558 8n3457 ---(end of broadcast)--- TIP 2: you can get off all lists at once with the unregister command (send unregister YourEmailAddressHere to [EMAIL PROTECTED])
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Patch applied. Thanks. --- Mark Kirkwood wrote: Alvaro Herrera wrote: On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote: Neil Conway wrote: elog(ERROR) is usually used for can't happen errors. I have attached a little change to varlena.c that uses it. I left the ereport as it was, but am not fussed about it either way. I am, because it gives useless messages to the translators to work on. elog parameters are not marked for translation, ereport are (errmsg and friends, really). So please don't do that. Ok, didn't realize the difference! Revised patch attached that uses elog. Neil, I will runs some tests to see if there is any performance saving with the first-call-only business. Cheers Mark Index: src/backend/utils/adt/varlena.c === RCS file: /projects/cvsroot/pgsql/src/backend/utils/adt/varlena.c,v retrieving revision 1.125 diff -c -r1.125 varlena.c *** src/backend/utils/adt/varlena.c 6 Jul 2005 19:02:52 - 1.125 --- src/backend/utils/adt/varlena.c 7 Jul 2005 03:40:44 - *** *** 28,34 #include utils/builtins.h #include utils/lsyscache.h #include utils/pg_locale.h - #include utils/syscache.h typedef struct varlena unknown; --- 28,33 *** *** 2364,2385 { /* On the first call lookup the datatype of the supplied argument */ Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); ! HeapTuple tp; ! int typlen; ! tp = SearchSysCache(TYPEOID, ! ObjectIdGetDatum(argtypeid), ! 0, 0, 0); ! if (!HeapTupleIsValid(tp)) { /* Oid not in pg_type, should never happen. */ ! ereport(ERROR, ! (errcode(ERRCODE_INTERNAL_ERROR), ! errmsg(invalid typid: %u, argtypeid))); } ! ! typlen = ((Form_pg_type)GETSTRUCT(tp))-typlen; ! ReleaseSysCache(tp); fcinfo-flinfo-fn_extra = MemoryContextAlloc(fcinfo-flinfo-fn_mcxt, sizeof(int)); *(int *)fcinfo-flinfo-fn_extra = typlen; --- 2363,2377 { /* On the first call lookup the datatype of the supplied argument */ Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); ! int typlen= get_typlen(argtypeid); ! ! if (typlen == 0) { /* Oid not in pg_type, should never happen. */ ! elog(ERROR, cache lookup failed for type %u, argtypeid); } ! fcinfo-flinfo-fn_extra = MemoryContextAlloc(fcinfo-flinfo-fn_mcxt, sizeof(int)); *(int *)fcinfo-flinfo-fn_extra = typlen; -- Bruce Momjian| http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 359-1001 + If your life is a hard drive, | 13 Roberts Road + Christ can be your backup.| Newtown Square, Pennsylvania 19073 ---(end of broadcast)--- TIP 8: explain analyze is your friend
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Tom Lane wrote: Alvaro Herrera [EMAIL PROTECTED] writes: On Thu, Jul 07, 2005 at 03:01:46PM +1200, Mark Kirkwood wrote: I have attached a little change to varlena.c that uses it. I left the ereport as it was, but am not fussed about it either way. I am, because it gives useless messages to the translators to work on. Exactly. I had already made a private note to fix that patch --- inventing your own random wording and punctuation for an extremely standard error message is simply Not Acceptable. Apologies all, my not fussed about it was highly misleading - I had already changed the error message as per Neil's suggestion in the revised patch, but that was not obvious from my comment :-( regards Mark ---(end of broadcast)--- TIP 8: explain analyze is your friend
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
I did a few cleanups on the last patch. Please examine this one instead. The changes are: 1. Add documentation for pg_datum_length builtin. 2. Correct some typos in the code comments. 3. Move the code in toastfuncs.c to varlena.c as it is probably the correct place. 4. Use ereport instead of elog. 5 Quiet compiler warning in pg_datum_length. Best wishes Mark Mark Kirkwood wrote: The next iteration - Hopefully I have got the idea basically right. I wonder if I have done the am I a varlena the long way.., pls advise if so! diff -Nacr ./doc/src/sgml/func.sgml.orig ./doc/src/sgml/func.sgml *** ./doc/src/sgml/func.sgml.orig Mon Jun 20 15:38:23 2005 --- ./doc/src/sgml/func.sgmlMon Jun 20 15:45:51 2005 *** *** 2187,2192 --- 2187,2200 /row row + entryliteralfunctionpg_datum_length/function(parameterstring/parameter)/literal/entry +entrytypeinteger/type/entry +entryNumber of bytes (before toast decompression) in string/entry +entryliteralpg_datum_length( 'jo\\000se'::bytea)/literal/entry +entryliteral5/literal/entry + /row + + row entryliteralfunctionposition/function(parametersubstring/parameter in parameterstring/parameter)/literal/entry entrytypeinteger/type/entry entryLocation of specified substring/entry diff -Nacr ./src/backend/access/heap/tuptoaster.c.orig ./src/backend/access/heap/tuptoaster.c *** ./src/backend/access/heap/tuptoaster.c.orig Mon Jun 20 17:11:37 2005 --- ./src/backend/access/heap/tuptoaster.c Mon Jun 20 17:11:44 2005 *** *** 1436,1438 --- 1436,1482 return result; } + + /* -- + * toast_datum_size + * + *Show the (possibly compressed) size of a datum + * -- + */ + Size + toast_datum_size(Datum value) + { + + varattrib *attr = (varattrib *) DatumGetPointer(value); + Sizeresult; + + if (VARATT_IS_EXTERNAL(attr)) + { + /* +* Attribute is stored externally - If it is compressed too, +* then we need to get the external datum and calculate its size, +* otherwise we just use the external rawsize. +*/ + if (VARATT_IS_COMPRESSED(attr)) + { + varattrib *attrext = toast_fetch_datum(attr); + result = VARSIZE(attrext); + pfree(attrext); + } + else + { + result = attr-va_content.va_external.va_rawsize; + } + } + else + { + /* +* Attribute is stored inline either compressed or not, just +* calculate the size of the datum in either case. +*/ + result = VARSIZE(attr); + } + + return result; + + } diff -Nacr ./src/backend/utils/adt/varlena.c.orig ./src/backend/utils/adt/varlena.c *** ./src/backend/utils/adt/varlena.c.orig Mon Jun 20 14:28:03 2005 --- ./src/backend/utils/adt/varlena.c Mon Jun 20 17:17:58 2005 *** *** 28,33 --- 28,34 #include utils/builtins.h #include utils/lsyscache.h #include utils/pg_locale.h + #include utils/syscache.h typedef struct varlena unknown; *** *** 2330,2333 --- 2331,2396 result_text = PG_STR_GET_TEXT(hexsum); PG_RETURN_TEXT_P(result_text); + } + + /* + * Show the (possibly compressed) length of a datum. + */ + Datum + pg_datum_length(PG_FUNCTION_ARGS) + { + + Datum value = PG_GETARG_DATUM(0); + int result; + + + if (fcinfo-flinfo-fn_extra == NULL) + { + /* +* On the first call lookup the datatype of the supplied argument +* and check if is a varlena. +*/ + Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); + HeapTuple tp; + int typlen = 0; + + + tp = SearchSysCache(TYPEOID, + ObjectIdGetDatum(argtypeid), + 0, 0, 0); + if (HeapTupleIsValid(tp)) + { + Form_pg_type typtup = (Form_pg_type) GETSTRUCT(tp); + typlen = typtup-typlen; + ReleaseSysCache(tp); + } + else + { + /* Oid not in pg_type, should never happen. */ + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), +errmsg(invalid typid: %u, argtypeid))); + } + + +
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
The next iteration - Hopefully I have got the idea basically right. I wonder if I have done the am I a varlena the long way.., pls advise if so! Cheers Mark Tom Lane wrote: My recollection of that discussion is that we just wanted something that would return the actual VARSIZE() of the datum. You're building something way too confusing ... A more interesting point is that the way you've declared the function, it will only work on text values, which is pretty limiting. Ideally we'd define this thing as pg_datum_length(any-varlena-type) returns int but there is no pseudotype corresponding to any varlena type. I was thinking about this the other day in connection with my proposal to make something that could return the TOAST value OID of an out-of-line datum. I think the only non-restrictive way to do it would be to declare the function as taking any, and then add a runtime check using the get_fn_expr_argtype() mechanism to test that what you've been given is in fact a varlena datatype. diff -Nacr ./src/backend/access/heap/tuptoaster.c.orig ./src/backend/access/heap/tuptoaster.c *** ./src/backend/access/heap/tuptoaster.c.orig Mon Mar 21 13:23:58 2005 --- ./src/backend/access/heap/tuptoaster.c Sun Jun 19 14:24:43 2005 *** *** 1436,1438 --- 1436,1482 return result; } + + /* -- + * toast_datum_size + * + *Show the (possibly compressed) size of a datum + * -- + */ + Size + toast_datum_size(Datum value) + { + + varattrib *attr = (varattrib *) DatumGetPointer(value); + Sizeresult; + + if (VARATT_IS_EXTERNAL(attr)) + { + /* +* Attribute is stored externally - If it is compressed too, +* then we need to get the external datum and calculate its size, +* otherwise we just use the external rawsize. +*/ + if (VARATT_IS_COMPRESSED(attr)) + { + varattrib *attrext = toast_fetch_datum(attr); + result = VARSIZE(attrext); + pfree(attrext); + } + else + { + result = attr-va_content.va_external.va_rawsize; + } + } + else + { + /* +* Attribute is stored inline either compressed or not, just +* calculate the size of the datum in either case. +*/ + result = VARSIZE(attr); + } + + return result; + + } diff -Nacr ./src/backend/utils/adt/Makefile.orig ./src/backend/utils/adt/Makefile *** ./src/backend/utils/adt/Makefile.orig Fri Apr 2 09:28:45 2004 --- ./src/backend/utils/adt/MakefileFri Jun 17 17:52:09 2005 *** *** 24,30 tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ network.o mac.o inet_net_ntop.o inet_net_pton.o \ ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \ ! ascii.o quote.o pgstatfuncs.o encode.o like.o: like.c like_match.c --- 24,30 tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ network.o mac.o inet_net_ntop.o inet_net_pton.o \ ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \ ! ascii.o quote.o pgstatfuncs.o encode.o toastfuncs.o like.o: like.c like_match.c diff -Nacr ./src/backend/utils/adt/toastfuncs.c.orig ./src/backend/utils/adt/toastfuncs.c *** ./src/backend/utils/adt/toastfuncs.c.orig Fri Jun 17 17:52:09 2005 --- ./src/backend/utils/adt/toastfuncs.cSun Jun 19 17:35:26 2005 *** *** 0 --- 1,74 + /*- + * + * toastfuncs.c + * Functions for accessing information about toasted data. + * + * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *- + */ + + #include postgres.h + #include fmgr.h + + #include catalog/pg_type.h + #include access/xact.h + #include access/tuptoaster.h + #include utils/builtins.h + #include utils/syscache.h + + Datum + pg_datum_length(PG_FUNCTION_ARGS) + { + + Datum value = PG_GETARG_DATUM(0); + int size; + + + if (fcinfo-flinfo-fn_extra == NULL) + { + /* +* On the first call check lookup the datatype of the supplied argument +* and check if is a varlena . +*/ + Oid argtypeid = get_fn_expr_argtype(fcinfo-flinfo, 0); + HeapTuple tp; + int typlen; +
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes (WIP)
Mark Kirkwood [EMAIL PROTECTED] writes: I thought I would have a look at: (Datatypes) Add function to return compressed length of TOAST data values. My recollection of that discussion is that we just wanted something that would return the actual VARSIZE() of the datum. You're building something way too confusing ... A more interesting point is that the way you've declared the function, it will only work on text values, which is pretty limiting. Ideally we'd define this thing as pg_datum_length(any-varlena-type) returns int but there is no pseudotype corresponding to any varlena type. I was thinking about this the other day in connection with my proposal to make something that could return the TOAST value OID of an out-of-line datum. I think the only non-restrictive way to do it would be to declare the function as taking any, and then add a runtime check using the get_fn_expr_argtype() mechanism to test that what you've been given is in fact a varlena datatype. regards, tom lane ---(end of broadcast)--- TIP 4: Don't 'kill -9' the postmaster
Re: [PATCHES] TODO Item - Return compressed length of TOAST datatypes
Tom Lane wrote: Mark Kirkwood [EMAIL PROTECTED] writes: I thought I would have a look at: (Datatypes) Add function to return compressed length of TOAST data values. My recollection of that discussion is that we just wanted something that would return the actual VARSIZE() of the datum. You're building something way too confusing ... I was guessing a little about exactly what was wanted - for some reason I couldn't access the mail archives for the last few days (however, can now). A more interesting point is that the way you've declared the function, it will only work on text values, which is pretty limiting. Ideally we'd define this thing as pg_datum_length(any-varlena-type) returns int but there is no pseudotype corresponding to any varlena type. I was thinking about this the other day in connection with my proposal to make something that could return the TOAST value OID of an out-of-line datum. I think the only non-restrictive way to do it would be to declare the function as taking any, and then add a runtime check using the get_fn_expr_argtype() mechanism to test that what you've been given is in fact a varlena datatype. Yes - was thinking I needed to check if the Datum was a varlena (and didn't know how to do it...), so thanks for the pointer! With these thoughts in mind, I'll have another go :-) Cheers Mark ---(end of broadcast)--- TIP 9: the planner will ignore your desire to choose an index scan if your joining column's datatypes do not match
[PATCHES] TODO Item - Return compressed length of TOAST datatypes (WIP)
I thought I would have a look at: (Datatypes) Add function to return compressed length of TOAST data values. A WIP patch is attached for comment (wanted to check I hadn't bitten off more than I could chew *before* asking questions!). A few questions come to mind: 1) The name - I have called it 'toast_compressed_length'. Seems longish - I'm wondering if just 'compressed_length' is ok? 2) What should be returned for toasted data that is not compressed (or plain stored data for that matter)? The WIP patch just gives the uncompressed size (I notice I may need to subtract VARHDRSZ in some cases). 3) What should be returned for non-varlena types? The WIP patch is treating everything as a varlena, so is returning incorrect information for that case. 4) The builtin is declared as immutable - I am not so sure about that (I am wondering if altering a column's storage from MAIN - EXTENDED and then updating the column to be itself will fool it). 5) Any multi-byte locale considerations? regards Mark diff -Nacr src/include/catalog/pg_proc.h.orig src/include/catalog/pg_proc.h *** src/include/catalog/pg_proc.h.orig Fri Jun 17 15:30:17 2005 --- src/include/catalog/pg_proc.h Fri Jun 17 17:08:18 2005 *** *** 3655,3660 --- 3655,3664 DATA(insert OID = 2560 ( pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 1184 _null_ _null_ _null_ pgsql_postmaster_start_time - _null_ )); DESCR(postmaster start time); + /* Toast compressed length */ + DATA(insert OID = 2561 ( toast_compressed_lengthPGNSP PGUID 12 f f t f i 1 23 25 _null_ _null_ _null_ toast_compressed_length - _null_ )); + DESCR(toast compressed length); + /* * Symbolic values for provolatile column: these indicate whether the result diff -Nacr src/include/access/tuptoaster.h.orig src/include/access/tuptoaster.h *** src/include/access/tuptoaster.h.origThu Jun 16 21:12:57 2005 --- src/include/access/tuptoaster.h Thu Jun 16 21:14:06 2005 *** *** 138,141 --- 138,149 */ extern Size toast_raw_datum_size(Datum value); + /* -- + * toast_compressed_datum_size - + * + *Return the compressed (toasted) size of a varlena datum + * -- + */ + extern Size toast_compressed_datum_size(Datum value); + #endif /* TUPTOASTER_H */ diff -Nacr src/include/utils/pg_lzcompress.h.orig src/include/utils/pg_lzcompress.h *** src/include/utils/pg_lzcompress.h.orig Thu Jun 16 21:21:37 2005 --- src/include/utils/pg_lzcompress.h Thu Jun 16 21:21:11 2005 *** *** 228,231 --- 228,238 extern intpglz_get_next_decomp_char_from_lzdata(PGLZ_DecompState *dstate); extern intpglz_get_next_decomp_char_from_plain(PGLZ_DecompState *dstate); + /* -- + * Function to get compressed size. + * Internal use only. + * -- + */ + extern intpglz_fetch_size(PGLZ_Header *source); + #endif /* _PG_LZCOMPRESS_H_ */ diff -Nacr src/include/utils/builtins.h.orig src/include/utils/builtins.h *** src/include/utils/builtins.h.orig Fri Jun 17 15:25:01 2005 --- src/include/utils/builtins.hFri Jun 17 15:27:30 2005 *** *** 828,831 --- 828,834 /* catalog/pg_conversion.c */ extern Datum pg_convert_using(PG_FUNCTION_ARGS); + /* toastfuncs.c */ + Datum toast_compressed_length(PG_FUNCTION_ARGS); + #endif /* BUILTINS_H */ diff -Nacr src/backend/access/heap/tuptoaster.c.orig src/backend/access/heap/tuptoaster.c *** src/backend/access/heap/tuptoaster.c.orig Thu Jun 16 20:56:59 2005 --- src/backend/access/heap/tuptoaster.cFri Jun 17 15:12:30 2005 *** *** 1436,1438 --- 1436,1499 return result; } + + /* -- + * toast_compressed_datum_size + * + *Show the compressed size of a datum + * -- + */ + Size + toast_compressed_datum_size(Datum value) + { + + + Sizesize; + varattrib *attr = (varattrib *) DatumGetPointer(value); + + if (!PointerIsValid(attr)) + { + /* +* No storage or NULL. +*/ + size = 0; + } + else if (VARATT_IS_EXTERNAL(attr)) + { + /* +* Attribute is stored externally +* If it is compressed too, then we need to get the external datum +* and interrogate *its* compressed size +* otherwise just use the external rawsize (i.e. no compression) +*/ + if (VARATT_IS_COMPRESSED(attr)) + { + varattrib *attrext = toast_fetch_datum(attr); + size = pglz_fetch_size((PGLZ_Header *)attrext); + pfree(attrext); + } + else + { + + size = attr-va_content.va_external.va_rawsize; + } + } + else if