Dear PostgreSQL developers,

Please find attached a small patch to convert bytea to bit strings and vice versa.

I used it in order to be able xor md5 results so as to checksum bundles of tuples together. The MD5 result is an hexa text convertible to bytea with decode, but then I was stuck...

ISTM that having these types explicitely castable may be useful to others, hence this small contribution. The cast allows to work on a bytea at the bit level and to perform bitwise operations.

./src/backend/utils/adt/varbit.c
 - add two conversion functions

./src/include/catalog/pg_proc.h
 - declare the above functions in the catalog

./src/include/catalog/pg_cast.h
 - declare the 4 explicit casts

./src/test/regress/sql/bit.sql
 - test all those new casts

./src/test/regress/expected/bit.out
 - new regression results

./src/test/regress/expected/opr_sanity.out
 - pg figures out that bit and varbit are binary compatible,
   which is the case (well, at least I assumed it).

--
Fabien.
*** ./src/backend/utils/adt/varbit.c.orig       Sun Mar  5 16:58:44 2006
--- ./src/backend/utils/adt/varbit.c    Thu May  4 15:57:34 2006
***************
*** 1484,1486 ****
--- 1484,1554 ----
        }
        PG_RETURN_INT32(0);
  }
+ 
+ /* create a bit string from a byte array.
+  */
+ Datum varbitfrombytea(PG_FUNCTION_ARGS)
+ {
+       bytea   *arg = PG_GETARG_BYTEA_P(0);
+       int32   typmod = PG_GETARG_INT32(1); /* for ::BIT(10) syntax */
+       int             datalen = VARSIZE(arg) - VARHDRSZ;
+       int             bitlen, len, resdatalen, needlen;
+       VarBit  *result;
+ 
+       /* truncate or expand if required */
+       if (typmod>=0) 
+       {
+               bitlen = typmod;
+               resdatalen = (bitlen + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
+               needlen = datalen>resdatalen? resdatalen: datalen;
+       }
+       else
+       {
+               resdatalen = datalen;
+               bitlen = BITS_PER_BYTE * datalen;
+               needlen = datalen;
+       }
+       
+       len = VARBITTOTALLEN(bitlen);
+       result = (VarBit *) palloc(len);
+       VARATT_SIZEP(result) = len;
+       VARBITLEN(result) = bitlen;
+       memcpy(VARBITS(result), VARDATA(arg), needlen);
+       
+       if (resdatalen > needlen)
+       {
+               char *ptr = VARBITS(result) + needlen;
+               while (needlen<resdatalen)
+               {
+                       *ptr++ = '\000';
+                       needlen++;
+               }
+       }
+       
+       PG_RETURN_VARBIT_P(result);
+ }
+ 
+ /* create a byte array from a bit string.
+  */
+ Datum varbittobytea(PG_FUNCTION_ARGS)
+ {
+       VarBit  *arg = PG_GETARG_VARBIT_P(0);
+       bool    isExplicit = PG_GETARG_BOOL(2);
+       int             bitlen = VARBITLEN(arg);
+       int             datalen = (bitlen + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
+       int             len = datalen + VARHDRSZ;
+       bytea   *result;
+   
+       /* no implicit cast if data size is changed */
+       if (!isExplicit && (bitlen != BITS_PER_BYTE*datalen))
+               ereport(ERROR,
+                               (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+                                errmsg("bit length %d would be round up, use 
explicit cast",
+                                               bitlen)));
+       
+       result = (bytea *) palloc(len);
+       VARATT_SIZEP(result) = len;
+       memcpy(VARDATA(result), VARBITS(arg), datalen);
+       
+       PG_RETURN_BYTEA_P(result);
+ }
*** ./src/include/catalog/pg_cast.h.orig        Sun Mar  5 16:58:54 2006
--- ./src/include/catalog/pg_cast.h     Thu May  4 16:16:49 2006
***************
*** 261,266 ****
--- 261,271 ----
  DATA(insert ( 23 1560 1683 e ));
  DATA(insert ( 1560     20 2076 e ));
  DATA(insert ( 1560     23 1684 e ));
+ /* bytea to/from bit and varbit casts */
+ DATA(insert ( 1560   17 2790 e ));
+ DATA(insert ( 1562   17 2791 e ));
+ DATA(insert (   17 1560 2792 e ));
+ DATA(insert (   17 1562 2793 e ));
  
  /*
   * Cross-category casts to and from TEXT
*** ./src/include/catalog/pg_proc.h.orig        Wed May  3 00:25:10 2006
--- ./src/include/catalog/pg_proc.h     Thu May  4 15:57:34 2006
***************
*** 3856,3861 ****
--- 3856,3871 ----
  DATA(insert OID = 2749 (  arraycontained         PGNSP PGUID 12 f f t f i 2 
16 "2277 2277" _null_ _null_ _null_ arraycontained - _null_ ));
  DESCR("anyarray contained");
  
+ /* BYTEA <-> BIT/VARBIT */
+ DATA(insert OID = 2790 ( bytea             PGNSP PGUID 12 f f t f i 3 17 
"1560 23 16" _null_ _null_ _null_ varbittobytea - _null_));
+ DESCR("convert bit() to bytea");
+ DATA(insert OID = 2791 ( bytea             PGNSP PGUID 12 f f t f i 3 17 
"1562 23 16" _null_ _null_ _null_ varbittobytea - _null_));
+ DESCR("convert varbit() to bytea");
+ DATA(insert OID = 2792 ( tobit             PGNSP PGUID 12 f f t f i 3 1560 
"17 23 16" _null_ _null_ _null_ varbitfrombytea - _null_));
+ DESCR("convert bytea to bit()");
+ DATA(insert OID = 2793 ( varbit             PGNSP PGUID 12 f f t f i 3 1562 
"17 23 16" _null_ _null_ _null_ varbitfrombytea - _null_));
+ DESCR("convert bytea to varbit()");
+ 
  /*
   * Symbolic values for provolatile column: these indicate whether the result
   * of a function is dependent *only* on the values of its explicit arguments,
*** ./src/test/regress/expected/bit.out.orig    Sun Jul 27 06:53:11 2003
--- ./src/test/regress/expected/bit.out Thu May  4 16:37:04 2006
***************
*** 509,511 ****
--- 509,598 ----
  
  DROP TABLE BIT_SHIFT_TABLE;
  DROP TABLE VARBIT_SHIFT_TABLE;
+ -- bytea casts
+ -- x80
+ SELECT 
+   (VARBIT '1')::BYTEA, 
+   (BIT '1')::BYTEA, 
+   (VARBIT '1000')::BYTEA, 
+   (BIT '1000')::BYTEA,
+   (VARBIT '10000000')::BYTEA,
+   (BIT '10000000')::BYTEA;
+  bytea | bytea | bytea | bytea | bytea | bytea 
+ -------+-------+-------+-------+-------+-------
+  \200  | \200  | \200  | \200  | \200  | \200
+ (1 row)
+ 
+ SELECT 
+   (VARBIT '0100001101000001010011')::BYTEA AS "CAL",
+   (VARBIT '0101011')::BYTEA AS "V",
+   (VARBIT '010010010100111')::BYTEA AS "IN";
+  CAL | V | IN 
+ -----+---+----
+  CAL | V | IN
+ (1 row)
+ 
+ -- bit and varbit casts
+ SELECT (BYTEA 'hello')::VARBIT AS "hello";
+                   hello                   
+ ------------------------------------------
+  0110100001100101011011000110110001101111
+ (1 row)
+ 
+ SELECT (BYTEA 'he')::VARBIT;
+       varbit      
+ ------------------
+  0110100001100101
+ (1 row)
+ 
+ SELECT (BYTEA 'he')::BIT(4) AS "restricted bit(4)";
+  restricted bit(4) 
+ -------------------
+  0110
+ (1 row)
+ 
+ SELECT (BYTEA 'he')::BIT(12) AS "restricted bit(12)";
+  restricted bit(12) 
+ --------------------
+  011010000110
+ (1 row)
+ 
+ SELECT (BYTEA 'he')::BIT(16) AS "bit(16)";
+      bit(16)      
+ ------------------
+  0110100001100101
+ (1 row)
+ 
+ SELECT (BYTEA 'he')::BIT(20) AS "expanded bit(20)";
+    expanded bit(20)   
+ ----------------------
+  01101000011001010000
+ (1 row)
+ 
+ SELECT (BYTEA 'he')::BIT(33) AS "expanded bit(33)";
+          expanded bit(33)          
+ -----------------------------------
+  011010000110010100000000000000000
+ (1 row)
+ 
+ -- bitwise operations available with bytea/bitstring casts
+ SELECT (BYTEA 'hello')::VARBIT & (BYTEA 'world')::VARBIT AS "hello & world";
+               hello & world               
+ ------------------------------------------
+  0110000001100101011000000110110001100100
+ (1 row)
+ 
+ SELECT (BYTEA 'hello')::BIT(40) & (BYTEA 'foo')::BIT(40) AS "hello & foo";
+                hello & foo                
+ ------------------------------------------
+  0110000001100101011011000000000000000000
+ (1 row)
+ 
+ SELECT 
+   DECODE(MD5('hello'),'hex')::VARBIT # DECODE(MD5('world'),'hex')::VARBIT
+   AS "md5('hello') xor md5('world')";
+                                                   md5('hello') xor 
md5('world')                                                   
+ 
----------------------------------------------------------------------------------------------------------------------------------
+  
00100000001110000111000000011101000111000011110100101011111100001110111000111010100111110001001111100010111000111111000001110101
+ (1 row)
+ 
*** ./src/test/regress/expected/opr_sanity.out.orig     Tue May  2 13:28:56 2006
--- ./src/test/regress/expected/opr_sanity.out  Thu May  4 16:40:25 2006
***************
*** 104,110 ****
  ------------+------------
           25 |       1043
         1114 |       1184
! (2 rows)
  
  SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0]
  FROM pg_proc AS p1, pg_proc AS p2
--- 104,111 ----
  ------------+------------
           25 |       1043
         1114 |       1184
!        1560 |       1562
! (3 rows)
  
  SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0]
  FROM pg_proc AS p1, pg_proc AS p2
*** ./src/test/regress/sql/bit.sql.orig Tue May 22 18:37:17 2001
--- ./src/test/regress/sql/bit.sql      Thu May  4 15:57:34 2006
***************
*** 184,186 ****
--- 184,217 ----
  
  DROP TABLE BIT_SHIFT_TABLE;
  DROP TABLE VARBIT_SHIFT_TABLE;
+ 
+ -- bytea casts
+ -- x80
+ SELECT 
+   (VARBIT '1')::BYTEA, 
+   (BIT '1')::BYTEA, 
+   (VARBIT '1000')::BYTEA, 
+   (BIT '1000')::BYTEA,
+   (VARBIT '10000000')::BYTEA,
+   (BIT '10000000')::BYTEA;
+ 
+ SELECT 
+   (VARBIT '0100001101000001010011')::BYTEA AS "CAL",
+   (VARBIT '0101011')::BYTEA AS "V",
+   (VARBIT '010010010100111')::BYTEA AS "IN";
+ 
+ -- bit and varbit casts
+ SELECT (BYTEA 'hello')::VARBIT AS "hello";
+ SELECT (BYTEA 'he')::VARBIT;
+ SELECT (BYTEA 'he')::BIT(4) AS "restricted bit(4)";
+ SELECT (BYTEA 'he')::BIT(12) AS "restricted bit(12)";
+ SELECT (BYTEA 'he')::BIT(16) AS "bit(16)";
+ SELECT (BYTEA 'he')::BIT(20) AS "expanded bit(20)";
+ SELECT (BYTEA 'he')::BIT(33) AS "expanded bit(33)";
+ 
+ -- bitwise operations available with bytea/bitstring casts
+ SELECT (BYTEA 'hello')::VARBIT & (BYTEA 'world')::VARBIT AS "hello & world";
+ SELECT (BYTEA 'hello')::BIT(40) & (BYTEA 'foo')::BIT(40) AS "hello & foo";
+ SELECT 
+   DECODE(MD5('hello'),'hex')::VARBIT # DECODE(MD5('world'),'hex')::VARBIT
+   AS "md5('hello') xor md5('world')";
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to