Mark Kirkwood wrote:
> I did a few cleanups on the last patch. Please examine this one instead.
> The changes are:
> 
> 1. Add documentation for pg_datum_length builtin.
> 2. Correct some typos in the code comments.
> 3. Move the code in toastfuncs.c to varlena.c as it is probably the
> correct place.
> 4. Use ereport instead of elog.
> 5  Quiet compiler warning in pg_datum_length.

I have modified your patch to simplify the logic, and renamed it to 
pg_column_size(), to be consistent with our soon-to-be-added
pg_relation/tablespace/database functions from dbsize.

Here is a sample usage:
        
        test=> CREATE TABLE test (x INT, y TEXT);
        CREATE TABLE
        test=> INSERT INTO test VALUES (4, repeat('x', 10000));
        INSERT 0 1
        test=> INSERT INTO test VALUES (4, repeat('x', 100000));
        INSERT 0 1
        test=> SELECT pg_column_size(x), pg_column_size(y) FROM test;
         pg_column_size | pg_column_size
        ----------------+----------------
                      4 |            121
                      4 |           1152
        (2 rows)

Interesting the 10-times larger column is 10-times larger in storage. 
Do we have some limit on how many repeated values we can record?
        
Patch attached and applied.

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
Index: doc/src/sgml/func.sgml
===================================================================
RCS file: /cvsroot/pgsql/doc/src/sgml/func.sgml,v
retrieving revision 1.262
diff -c -c -r1.262 func.sgml
*** doc/src/sgml/func.sgml      29 Jun 2005 01:52:56 -0000      1.262
--- doc/src/sgml/func.sgml      6 Jul 2005 18:55:34 -0000
***************
*** 2187,2192 ****
--- 2187,2200 ----
        </row>
  
        <row>
+        
<entry><literal><function>pg_column_size</function>(<parameter>string</parameter>)</literal></entry>
+        <entry><type>integer</type></entry>
+        <entry>Number of bytes required to store the value, which might be 
compressed</entry>
+        <entry><literal>pg_column_size('jo\\000se'::bytea)</literal></entry>
+        <entry><literal>5</literal></entry>
+       </row>
+ 
+       <row>
         
<entry><literal><function>position</function>(<parameter>substring</parameter> 
in <parameter>string</parameter>)</literal></entry>
         <entry><type>integer</type></entry>
         <entry>Location of specified substring</entry>
Index: src/backend/access/heap/tuptoaster.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v
retrieving revision 1.49
diff -c -c -r1.49 tuptoaster.c
*** src/backend/access/heap/tuptoaster.c        21 Mar 2005 01:23:58 -0000      
1.49
--- src/backend/access/heap/tuptoaster.c        6 Jul 2005 18:55:35 -0000
***************
*** 1436,1438 ****
--- 1436,1480 ----
  
        return result;
  }
+ 
+ /* ----------
+  * toast_datum_size
+  *
+  *    Show the (possibly compressed) size of a datum
+  * ----------
+  */
+ Size 
+ toast_datum_size(Datum value)
+ {
+ 
+       varattrib       *attr = (varattrib *) DatumGetPointer(value);
+       Size            result;
+ 
+       if (VARATT_IS_EXTERNAL(attr))
+       {
+               /*
+                * Attribute is stored externally - If it is compressed too, 
+                * then we need to get the external datum and calculate its 
size,
+                * otherwise we just use the external rawsize.
+                */
+               if (VARATT_IS_COMPRESSED(attr))
+               {
+                       varattrib               *attrext = 
toast_fetch_datum(attr);
+                       result = VARSIZE(attrext);
+                       pfree(attrext);
+               }
+               else
+                       result = attr->va_content.va_external.va_rawsize;
+       }
+       else
+       {
+               /*
+                * Attribute is stored inline either compressed or not, just
+                * calculate the size of the datum in either case.
+                */
+               result = VARSIZE(attr);
+       }
+ 
+       return result;
+       
+ }
Index: src/backend/utils/adt/varlena.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v
retrieving revision 1.124
diff -c -c -r1.124 varlena.c
*** src/backend/utils/adt/varlena.c     4 Jul 2005 18:56:44 -0000       1.124
--- src/backend/utils/adt/varlena.c     6 Jul 2005 18:55:36 -0000
***************
*** 28,33 ****
--- 28,34 ----
  #include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/pg_locale.h"
+ #include "utils/syscache.h"
  
  
  typedef struct varlena unknown;
***************
*** 2348,2350 ****
--- 2349,2395 ----
        result_text = PG_STR_GET_TEXT(hexsum);
        PG_RETURN_TEXT_P(result_text);
  }
+ 
+ /* 
+  * Return the length of a datum, possibly compressed
+  */
+ Datum
+ pg_column_size(PG_FUNCTION_ARGS)
+ {
+       Datum                   value = PG_GETARG_DATUM(0);
+       int                             result;
+ 
+       /*      fn_extra stores the fixed column length, or -1 for varlena. */
+       if (fcinfo->flinfo->fn_extra == NULL)   /* first call? */
+       {
+               /* On the first call lookup the datatype of the supplied 
argument */
+               Oid                             argtypeid = 
get_fn_expr_argtype(fcinfo->flinfo, 0);
+               HeapTuple               tp;
+               int                             typlen;
+ 
+               tp = SearchSysCache(TYPEOID,
+                                                       
ObjectIdGetDatum(argtypeid),
+                                                       0, 0, 0);
+               if (!HeapTupleIsValid(tp))
+               {
+                       /* Oid not in pg_type, should never happen. */
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INTERNAL_ERROR),
+                                        errmsg("invalid typid: %u", 
argtypeid)));
+               }
+               
+               typlen = ((Form_pg_type)GETSTRUCT(tp))->typlen;
+               ReleaseSysCache(tp);
+               fcinfo->flinfo->fn_extra = 
MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+                                                                               
                          sizeof(int));
+               *(int *)fcinfo->flinfo->fn_extra = typlen;
+       }
+ 
+       if (*(int *)fcinfo->flinfo->fn_extra != -1)
+               PG_RETURN_INT32(*(int *)fcinfo->flinfo->fn_extra);
+       else
+       {
+               result = toast_datum_size(value) - VARHDRSZ;
+               PG_RETURN_INT32(result);
+       }
+ }
Index: src/include/access/tuptoaster.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/access/tuptoaster.h,v
retrieving revision 1.22
diff -c -c -r1.22 tuptoaster.h
*** src/include/access/tuptoaster.h     21 Mar 2005 01:24:04 -0000      1.22
--- src/include/access/tuptoaster.h     6 Jul 2005 18:55:37 -0000
***************
*** 138,141 ****
--- 138,149 ----
   */
  extern Size toast_raw_datum_size(Datum value);
  
+ /* ----------
+  * toast_datum_size -
+  *
+  *    Return the storage size of a varlena datum
+  * ----------
+  */
+ extern Size toast_datum_size(Datum value);
+ 
  #endif   /* TUPTOASTER_H */
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.373
diff -c -c -r1.373 pg_proc.h
*** src/include/catalog/pg_proc.h       1 Jul 2005 19:19:03 -0000       1.373
--- src/include/catalog/pg_proc.h       6 Jul 2005 18:55:43 -0000
***************
*** 3658,3663 ****
--- 3658,3667 ----
  DATA(insert OID = 2560 (  pg_postmaster_start_time PGNSP PGUID 12 f f t f s 0 
1184 "" _null_ _null_ _null_ pgsql_postmaster_start_time - _null_ ));
  DESCR("postmaster start time");
  
+ /* Column storage size */
+ DATA(insert OID = 1269 (  pg_column_size         PGNSP PGUID 12 f f t f i 1 
23 "2276" _null_ _null_ _null_  pg_column_size - _null_ ));
+ DESCR("bytes required to store the value, perhaps with compression");
+ 
  /* new functions for Y-direction rtree opclasses */
  DATA(insert OID = 2562 (  box_below              PGNSP PGUID 12 f f t f i 2 
16 "603 603" _null_ _null_ _null_ box_below - _null_ ));
  DESCR("is below");
Index: src/include/utils/builtins.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/builtins.h,v
retrieving revision 1.258
diff -c -c -r1.258 builtins.h
*** src/include/utils/builtins.h        17 Jun 2005 22:32:50 -0000      1.258
--- src/include/utils/builtins.h        6 Jul 2005 18:55:43 -0000
***************
*** 601,606 ****
--- 601,607 ----
  extern Datum byteapos(PG_FUNCTION_ARGS);
  extern Datum bytea_substr(PG_FUNCTION_ARGS);
  extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
+ extern Datum pg_column_size(PG_FUNCTION_ARGS);
  
  /* version.c */
  extern Datum pgsql_version(PG_FUNCTION_ARGS);
---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend

Reply via email to