Dear PostgreSQL developers,
Please find attached a small patch to convert bytea to bit strings and
vice versa.
I used it in order to be able xor md5 results so as to checksum bundles of
tuples together. The MD5 result is an hexa text convertible to bytea with
decode, but then I was stuck...
ISTM that having these types explicitely castable may be useful to others,
hence this small contribution. The cast allows to work on a bytea at the
bit level and to perform bitwise operations.
./src/backend/utils/adt/varbit.c
- add two conversion functions
./src/include/catalog/pg_proc.h
- declare the above functions in the catalog
./src/include/catalog/pg_cast.h
- declare the 4 explicit casts
./src/test/regress/sql/bit.sql
- test all those new casts
./src/test/regress/expected/bit.out
- new regression results
./src/test/regress/expected/opr_sanity.out
- pg figures out that bit and varbit are binary compatible,
which is the case (well, at least I assumed it).
--
Fabien.
*** ./src/backend/utils/adt/varbit.c.orig Sun Mar 5 16:58:44 2006
--- ./src/backend/utils/adt/varbit.c Thu May 4 15:57:34 2006
***************
*** 1484,1486 ****
--- 1484,1554 ----
}
PG_RETURN_INT32(0);
}
+
+ /* create a bit string from a byte array.
+ */
+ Datum varbitfrombytea(PG_FUNCTION_ARGS)
+ {
+ bytea *arg = PG_GETARG_BYTEA_P(0);
+ int32 typmod = PG_GETARG_INT32(1); /* for ::BIT(10) syntax */
+ int datalen = VARSIZE(arg) - VARHDRSZ;
+ int bitlen, len, resdatalen, needlen;
+ VarBit *result;
+
+ /* truncate or expand if required */
+ if (typmod>=0)
+ {
+ bitlen = typmod;
+ resdatalen = (bitlen + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
+ needlen = datalen>resdatalen? resdatalen: datalen;
+ }
+ else
+ {
+ resdatalen = datalen;
+ bitlen = BITS_PER_BYTE * datalen;
+ needlen = datalen;
+ }
+
+ len = VARBITTOTALLEN(bitlen);
+ result = (VarBit *) palloc(len);
+ VARATT_SIZEP(result) = len;
+ VARBITLEN(result) = bitlen;
+ memcpy(VARBITS(result), VARDATA(arg), needlen);
+
+ if (resdatalen > needlen)
+ {
+ char *ptr = VARBITS(result) + needlen;
+ while (needlen<resdatalen)
+ {
+ *ptr++ = '\000';
+ needlen++;
+ }
+ }
+
+ PG_RETURN_VARBIT_P(result);
+ }
+
+ /* create a byte array from a bit string.
+ */
+ Datum varbittobytea(PG_FUNCTION_ARGS)
+ {
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ int bitlen = VARBITLEN(arg);
+ int datalen = (bitlen + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
+ int len = datalen + VARHDRSZ;
+ bytea *result;
+
+ /* no implicit cast if data size is changed */
+ if (!isExplicit && (bitlen != BITS_PER_BYTE*datalen))
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+ errmsg("bit length %d would be round up, use
explicit cast",
+ bitlen)));
+
+ result = (bytea *) palloc(len);
+ VARATT_SIZEP(result) = len;
+ memcpy(VARDATA(result), VARBITS(arg), datalen);
+
+ PG_RETURN_BYTEA_P(result);
+ }
*** ./src/include/catalog/pg_cast.h.orig Sun Mar 5 16:58:54 2006
--- ./src/include/catalog/pg_cast.h Thu May 4 16:16:49 2006
***************
*** 261,266 ****
--- 261,271 ----
DATA(insert ( 23 1560 1683 e ));
DATA(insert ( 1560 20 2076 e ));
DATA(insert ( 1560 23 1684 e ));
+ /* bytea to/from bit and varbit casts */
+ DATA(insert ( 1560 17 2790 e ));
+ DATA(insert ( 1562 17 2791 e ));
+ DATA(insert ( 17 1560 2792 e ));
+ DATA(insert ( 17 1562 2793 e ));
/*
* Cross-category casts to and from TEXT
*** ./src/include/catalog/pg_proc.h.orig Wed May 3 00:25:10 2006
--- ./src/include/catalog/pg_proc.h Thu May 4 15:57:34 2006
***************
*** 3856,3861 ****
--- 3856,3871 ----
DATA(insert OID = 2749 ( arraycontained PGNSP PGUID 12 f f t f i 2
16 "2277 2277" _null_ _null_ _null_ arraycontained - _null_ ));
DESCR("anyarray contained");
+ /* BYTEA <-> BIT/VARBIT */
+ DATA(insert OID = 2790 ( bytea PGNSP PGUID 12 f f t f i 3 17
"1560 23 16" _null_ _null_ _null_ varbittobytea - _null_));
+ DESCR("convert bit() to bytea");
+ DATA(insert OID = 2791 ( bytea PGNSP PGUID 12 f f t f i 3 17
"1562 23 16" _null_ _null_ _null_ varbittobytea - _null_));
+ DESCR("convert varbit() to bytea");
+ DATA(insert OID = 2792 ( tobit PGNSP PGUID 12 f f t f i 3 1560
"17 23 16" _null_ _null_ _null_ varbitfrombytea - _null_));
+ DESCR("convert bytea to bit()");
+ DATA(insert OID = 2793 ( varbit PGNSP PGUID 12 f f t f i 3 1562
"17 23 16" _null_ _null_ _null_ varbitfrombytea - _null_));
+ DESCR("convert bytea to varbit()");
+
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
*** ./src/test/regress/expected/bit.out.orig Sun Jul 27 06:53:11 2003
--- ./src/test/regress/expected/bit.out Thu May 4 16:37:04 2006
***************
*** 509,511 ****
--- 509,598 ----
DROP TABLE BIT_SHIFT_TABLE;
DROP TABLE VARBIT_SHIFT_TABLE;
+ -- bytea casts
+ -- x80
+ SELECT
+ (VARBIT '1')::BYTEA,
+ (BIT '1')::BYTEA,
+ (VARBIT '1000')::BYTEA,
+ (BIT '1000')::BYTEA,
+ (VARBIT '10000000')::BYTEA,
+ (BIT '10000000')::BYTEA;
+ bytea | bytea | bytea | bytea | bytea | bytea
+ -------+-------+-------+-------+-------+-------
+ \200 | \200 | \200 | \200 | \200 | \200
+ (1 row)
+
+ SELECT
+ (VARBIT '0100001101000001010011')::BYTEA AS "CAL",
+ (VARBIT '0101011')::BYTEA AS "V",
+ (VARBIT '010010010100111')::BYTEA AS "IN";
+ CAL | V | IN
+ -----+---+----
+ CAL | V | IN
+ (1 row)
+
+ -- bit and varbit casts
+ SELECT (BYTEA 'hello')::VARBIT AS "hello";
+ hello
+ ------------------------------------------
+ 0110100001100101011011000110110001101111
+ (1 row)
+
+ SELECT (BYTEA 'he')::VARBIT;
+ varbit
+ ------------------
+ 0110100001100101
+ (1 row)
+
+ SELECT (BYTEA 'he')::BIT(4) AS "restricted bit(4)";
+ restricted bit(4)
+ -------------------
+ 0110
+ (1 row)
+
+ SELECT (BYTEA 'he')::BIT(12) AS "restricted bit(12)";
+ restricted bit(12)
+ --------------------
+ 011010000110
+ (1 row)
+
+ SELECT (BYTEA 'he')::BIT(16) AS "bit(16)";
+ bit(16)
+ ------------------
+ 0110100001100101
+ (1 row)
+
+ SELECT (BYTEA 'he')::BIT(20) AS "expanded bit(20)";
+ expanded bit(20)
+ ----------------------
+ 01101000011001010000
+ (1 row)
+
+ SELECT (BYTEA 'he')::BIT(33) AS "expanded bit(33)";
+ expanded bit(33)
+ -----------------------------------
+ 011010000110010100000000000000000
+ (1 row)
+
+ -- bitwise operations available with bytea/bitstring casts
+ SELECT (BYTEA 'hello')::VARBIT & (BYTEA 'world')::VARBIT AS "hello & world";
+ hello & world
+ ------------------------------------------
+ 0110000001100101011000000110110001100100
+ (1 row)
+
+ SELECT (BYTEA 'hello')::BIT(40) & (BYTEA 'foo')::BIT(40) AS "hello & foo";
+ hello & foo
+ ------------------------------------------
+ 0110000001100101011011000000000000000000
+ (1 row)
+
+ SELECT
+ DECODE(MD5('hello'),'hex')::VARBIT # DECODE(MD5('world'),'hex')::VARBIT
+ AS "md5('hello') xor md5('world')";
+ md5('hello') xor
md5('world')
+
----------------------------------------------------------------------------------------------------------------------------------
+
00100000001110000111000000011101000111000011110100101011111100001110111000111010100111110001001111100010111000111111000001110101
+ (1 row)
+
*** ./src/test/regress/expected/opr_sanity.out.orig Tue May 2 13:28:56 2006
--- ./src/test/regress/expected/opr_sanity.out Thu May 4 16:40:25 2006
***************
*** 104,110 ****
------------+------------
25 | 1043
1114 | 1184
! (2 rows)
SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0]
FROM pg_proc AS p1, pg_proc AS p2
--- 104,111 ----
------------+------------
25 | 1043
1114 | 1184
! 1560 | 1562
! (3 rows)
SELECT DISTINCT p1.proargtypes[0], p2.proargtypes[0]
FROM pg_proc AS p1, pg_proc AS p2
*** ./src/test/regress/sql/bit.sql.orig Tue May 22 18:37:17 2001
--- ./src/test/regress/sql/bit.sql Thu May 4 15:57:34 2006
***************
*** 184,186 ****
--- 184,217 ----
DROP TABLE BIT_SHIFT_TABLE;
DROP TABLE VARBIT_SHIFT_TABLE;
+
+ -- bytea casts
+ -- x80
+ SELECT
+ (VARBIT '1')::BYTEA,
+ (BIT '1')::BYTEA,
+ (VARBIT '1000')::BYTEA,
+ (BIT '1000')::BYTEA,
+ (VARBIT '10000000')::BYTEA,
+ (BIT '10000000')::BYTEA;
+
+ SELECT
+ (VARBIT '0100001101000001010011')::BYTEA AS "CAL",
+ (VARBIT '0101011')::BYTEA AS "V",
+ (VARBIT '010010010100111')::BYTEA AS "IN";
+
+ -- bit and varbit casts
+ SELECT (BYTEA 'hello')::VARBIT AS "hello";
+ SELECT (BYTEA 'he')::VARBIT;
+ SELECT (BYTEA 'he')::BIT(4) AS "restricted bit(4)";
+ SELECT (BYTEA 'he')::BIT(12) AS "restricted bit(12)";
+ SELECT (BYTEA 'he')::BIT(16) AS "bit(16)";
+ SELECT (BYTEA 'he')::BIT(20) AS "expanded bit(20)";
+ SELECT (BYTEA 'he')::BIT(33) AS "expanded bit(33)";
+
+ -- bitwise operations available with bytea/bitstring casts
+ SELECT (BYTEA 'hello')::VARBIT & (BYTEA 'world')::VARBIT AS "hello & world";
+ SELECT (BYTEA 'hello')::BIT(40) & (BYTEA 'foo')::BIT(40) AS "hello & foo";
+ SELECT
+ DECODE(MD5('hello'),'hex')::VARBIT # DECODE(MD5('world'),'hex')::VARBIT
+ AS "md5('hello') xor md5('world')";
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?
http://archives.postgresql.org