Mark Kirkwood wrote:
> I did a few cleanups on the last patch. Please examine this one instead.
> The changes are:
>
> 1. Add documentation for pg_datum_length builtin.
> 2. Correct some typos in the code comments.
> 3. Move the code in toastfuncs.c to varlena.c as it is probably the
> correct place.
> 4. Use ereport instead of elog.
> 5 Quiet compiler warning in pg_datum_length.
I have modified your patch to simplify the logic, and renamed it to
pg_column_size(), to be consistent with our soon-to-be-added
pg_relation/tablespace/database functions from dbsize.
Here is a sample usage:
test=> CREATE TABLE test (x INT, y TEXT);
CREATE TABLE
test=> INSERT INTO test VALUES (4, repeat('x', 10000));
INSERT 0 1
test=> INSERT INTO test VALUES (4, repeat('x', 100000));
INSERT 0 1
test=> SELECT pg_column_size(x), pg_column_size(y) FROM test;
pg_column_size | pg_column_size
----------------+----------------
4 | 121
4 | 1152
(2 rows)
Interesting the 10-times larger column is 10-times larger in storage.
Do we have some limit on how many repeated values we can record?
Patch attached and applied.
--
Bruce Momjian | http://candle.pha.pa.us
[email protected] | (610) 359-1001
+ If your life is a hard drive, | 13 Roberts Road
+ Christ can be your backup. | Newtown Square, Pennsylvania 19073
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.262
diff -c -c -r1.262 func.sgml
*** doc/src/sgml/func.sgml 29 Jun 2005 01:52:56 -0000 1.262
--- doc/src/sgml/func.sgml 6 Jul 2005 18:55:34 -0000
***************
*** 2187,2192 ****
--- 2187,2200 ----
</row>
<row>
+
<entry><literal><function>pg_column_size</function>(<parameter>string</parameter>)</literal></entry>
+ <entry><type>integer</type></entry>
+ <entry>Number of bytes required to store the value, which might be
compressed</entry>
+ <entry><literal>pg_column_size('jo\\000se'::bytea)</literal></entry>
+ <entry><literal>5</literal></entry>
+ </row>
+
+ <row>
<entry><literal><function>position</function>(<parameter>substring</parameter>
in <parameter>string</parameter>)</literal></entry>
<entry><type>integer</type></entry>
<entry>Location of specified substring</entry>
Index: src/backend/access/heap/tuptoaster.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v
retrieving revision 1.49
diff -c -c -r1.49 tuptoaster.c
*** src/backend/access/heap/tuptoaster.c 21 Mar 2005 01:23:58 -0000
1.49
--- src/backend/access/heap/tuptoaster.c 6 Jul 2005 18:55:35 -0000
***************
*** 1436,1438 ****
--- 1436,1480 ----
return result;
}
+
+ /* ----------
+ * toast_datum_size
+ *
+ * Show the (possibly compressed) size of a datum
+ * ----------
+ */
+ Size
+ toast_datum_size(Datum value)
+ {
+
+ varattrib *attr = (varattrib *) DatumGetPointer(value);
+ Size result;
+
+ if (VARATT_IS_EXTERNAL(attr))
+ {
+ /*
+ * Attribute is stored externally - If it is compressed too,
+ * then we need to get the external datum and calculate its
size,
+ * otherwise we just use the external rawsize.
+ */
+ if (VARATT_IS_COMPRESSED(attr))
+ {
+ varattrib *attrext =
toast_fetch_datum(attr);
+ result = VARSIZE(attrext);
+ pfree(attrext);
+ }
+ else
+ result = attr->va_content.va_external.va_rawsize;
+ }
+ else
+ {
+ /*
+ * Attribute is stored inline either compressed or not, just
+ * calculate the size of the datum in either case.
+ */
+ result = VARSIZE(attr);
+ }
+
+ return result;
+
+ }
Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.124
diff -c -c -r1.124 varlena.c
*** src/backend/utils/adt/varlena.c 4 Jul 2005 18:56:44 -0000 1.124
--- src/backend/utils/adt/varlena.c 6 Jul 2005 18:55:36 -0000
***************
*** 28,33 ****
--- 28,34 ----
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
+ #include "utils/syscache.h"
typedef struct varlena unknown;
***************
*** 2348,2350 ****
--- 2349,2395 ----
result_text = PG_STR_GET_TEXT(hexsum);
PG_RETURN_TEXT_P(result_text);
}
+
+ /*
+ * Return the length of a datum, possibly compressed
+ */
+ Datum
+ pg_column_size(PG_FUNCTION_ARGS)
+ {
+ Datum value = PG_GETARG_DATUM(0);
+ int result;
+
+ /* fn_extra stores the fixed column length, or -1 for varlena. */
+ if (fcinfo->flinfo->fn_extra == NULL) /* first call? */
+ {
+ /* On the first call lookup the datatype of the supplied
argument */
+ Oid argtypeid =
get_fn_expr_argtype(fcinfo->flinfo, 0);
+ HeapTuple tp;
+ int typlen;
+
+ tp = SearchSysCache(TYPEOID,
+
ObjectIdGetDatum(argtypeid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(tp))
+ {
+ /* Oid not in pg_type, should never happen. */
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("invalid typid: %u",
argtypeid)));
+ }
+
+ typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen;
+ ReleaseSysCache(tp);
+ fcinfo->flinfo->fn_extra =
MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+
sizeof(int));
+ *(int *)fcinfo->flinfo->fn_extra = typlen;
+ }
+
+ if (*(int *)fcinfo->flinfo->fn_extra != -1)
+ PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra);
+ else
+ {
+ result = toast_datum_size(value) - VARHDRSZ;
+ PG_RETURN_INT32(result);
+ }
+ }
Index: src/include/access/tuptoaster.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/access/tuptoaster.h,v
retrieving revision 1.22
diff -c -c -r1.22 tuptoaster.h
*** src/include/access/tuptoaster.h 21 Mar 2005 01:24:04 -0000 1.22
--- src/include/access/tuptoaster.h 6 Jul 2005 18:55:37 -0000
***************
*** 138,141 ****
--- 138,149 ----
*/
extern Size toast_raw_datum_size(Datum value);
+ /* ----------
+ * toast_datum_size -
+ *
+ * Return the storage size of a varlena datum
+ * ----------
+ */
+ extern Size toast_datum_size(Datum value);
+
#endif /* TUPTOASTER_H */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.373
diff -c -c -r1.373 pg_proc.h
*** src/include/catalog/pg_proc.h 1 Jul 2005 19:19:03 -0000 1.373
--- src/include/catalog/pg_proc.h 6 Jul 2005 18:55:43 -0000
***************
*** 3658,3663 ****
--- 3658,3667 ----
DATA(insert OID = 2560 ( pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0
1184 "" _null_ _null_ _null_ pgsql_postmaster_start_time - _null_ ));
DESCR("postmaster start time");
+ /* Column storage size */
+ DATA(insert OID = 1269 ( pg_column_size PGNSP PGUID 12 f f t f i 1
23 "2276" _null_ _null_ _null_ pg_column_size - _null_ ));
+ DESCR("bytes required to store the value, perhaps with compression");
+
/* new functions for Y-direction rtree opclasses */
DATA(insert OID = 2562 ( box_below PGNSP PGUID 12 f f t f i 2
16 "603 603" _null_ _null_ _null_ box_below - _null_ ));
DESCR("is below");
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.258
diff -c -c -r1.258 builtins.h
*** src/include/utils/builtins.h 17 Jun 2005 22:32:50 -0000 1.258
--- src/include/utils/builtins.h 6 Jul 2005 18:55:43 -0000
***************
*** 601,606 ****
--- 601,607 ----
extern Datum byteapos(PG_FUNCTION_ARGS);
extern Datum bytea_substr(PG_FUNCTION_ARGS);
extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
+ extern Datum pg_column_size(PG_FUNCTION_ARGS);
/* version.c */
extern Datum pgsql_version(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend