On Wednesday 06 May 2009 18:47:57 Tom Lane wrote:
> So the ambiguous-input problem is solved if we define the new format(s)
> to be started by backslash and something that the old code would reject.
> I'd keep it short, like "\x", but there's still room for multiple
> formats if anyone really wants to go to the trouble.
Here is a first cut at a new hex bytea input and output format. Example:
SET bytea_output_hex = true;
SELECT E'\\xDeAdBeEf'::bytea;
bytea
------------
\xdeadbeef
(1 row)
Bernd did some performance testing for me, and it looked pretty good.
Questions:
Should this be the default format?
Should the configuration parameter be a boolean or an enum, opening
possibilities for other formats?
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index c944d8f..a6ac9c8 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL$ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.237 2009/04/27 16:27:35 momjian Exp $ -->
<chapter id="datatype">
<title id="datatype-title">Data Types</title>
@@ -1189,6 +1189,63 @@ SELECT b, char_length(b) FROM test2;
</para>
<para>
+ The <type>bytea</type> type supports two external formats for
+ input and output: the traditional bytea format that is particular
+ to PostgreSQL, and the hex format. Both of these are always
+ accepted on input. The output format depends on the configuration
+ parameter bytea_output_format; the default is hex. (Note that the
+ hex format was introduced in PostgreSQL 8.5; so earlier version
+ and some tools don't understand it.)
+ </para>
+
+ <para>
+ The <acronym>SQL</acronym> standard defines a different binary
+ string type, called <type>BLOB</type> or <type>BINARY LARGE
+ OBJECT</type>. The input format is different from
+ <type>bytea</type>, but the provided functions and operators are
+ mostly the same.
+ </para>
+
+ <sect2>
+ <title>Hex Format</title>
+
+ <para>
+ The hex format encodes the binary data as 2 hexadecimal digits per
+ byte, highest significant nibble first. The entire string ist
+ preceded by the sequence <literal>\x</literal> (to distinguish it
+ from the bytea format). In SQL literals, the backslash may need
+ to be escaped, but it is one logical backslash as far as the
+ <type>bytea</type> type is concerned. The hex format is compatible with a wide
+ range of external applications and protocols, and it tends to be
+ faster than the traditional bytea format, so its use is
+ somewhat preferrable.
+ </para>
+
+ <para>
+ Example:
+<programlisting>
+SELECT E'\\xDEADBEEF';
+</programlisting>
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Traditional Bytea Format</title>
+
+ <para>
+ The traditional bytea format takes the approach of representing a
+ binary string as a sequence of ASCII characters and escaping those
+ bytes that cannot be represented as an ASCII character by a
+ special escape sequence. If, from the point of view of the
+ application, representing bytes as characters makes sense, then
+ this representation can be convenient, but in practice it is
+ usually confusing becauses it fuzzes up the distinction between
+ binary strings and characters strings, and the particular escape
+ mechanism that was chosen is also somewhat unwieldy. So this
+ format should probably not be used for most new applications.
+ </para>
+
+ <para>
When entering <type>bytea</type> values, octets of certain
values <emphasis>must</emphasis> be escaped (but all octet
values <emphasis>can</emphasis> be escaped) when used as part
@@ -1341,14 +1398,7 @@ SELECT b, char_length(b) FROM test2;
have to escape line feeds and carriage returns if your interface
automatically translates these.
</para>
-
- <para>
- The <acronym>SQL</acronym> standard defines a different binary
- string type, called <type>BLOB</type> or <type>BINARY LARGE
- OBJECT</type>. The input format is different from
- <type>bytea</type>, but the provided functions and operators are
- mostly the same.
- </para>
+ </sect2>
</sect1>
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index eed799a..b8a3cef 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -122,8 +122,8 @@ static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
-static unsigned
-hex_encode(const char *src, unsigned len, char *dst)
+size_t
+hex_encode(const char *src, size_t len, char *dst)
{
const char *end = src + len;
@@ -152,8 +152,8 @@ get_hex(char c)
return (char) res;
}
-static unsigned
-hex_decode(const char *src, unsigned len, char *dst)
+size_t
+hex_decode(const char *src, size_t len, char *dst)
{
const char *s,
*srcend;
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4cf3966..3c24686 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -62,6 +62,8 @@ static text *text_substring(Datum str,
bool length_not_specified);
static void appendStringInfoText(StringInfo str, const text *t);
+bool bytea_output_hex = true;
+
/*****************************************************************************
* CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
@@ -189,6 +191,18 @@ byteain(PG_FUNCTION_ARGS)
int byte;
bytea *result;
+ if (inputText[0] == '\\' && inputText[1] == 'x')
+ {
+ size_t len = strlen(inputText);
+
+ byte = (len - 2)/2 + VARHDRSZ;
+ result = palloc(byte);
+ SET_VARSIZE(result, byte);
+ hex_decode(inputText + 2, len - 2, VARDATA(result));
+
+ PG_RETURN_BYTEA_P(result);
+ }
+
for (byte = 0, tp = inputText; *tp != '\0'; byte++)
{
if (tp[0] != '\\')
@@ -269,8 +283,17 @@ byteaout(PG_FUNCTION_ARGS)
char *result;
char *vp;
char *rp;
- int val; /* holds unprintable chars */
int i;
+
+ if (bytea_output_hex)
+ {
+ rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+ *rp++ = '\\';
+ *rp++ = 'x';
+ rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+ }
+ else
+ {
int len;
len = 1; /* empty string has 1 char */
@@ -295,6 +318,8 @@ byteaout(PG_FUNCTION_ARGS)
}
else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
{
+ int val; /* holds unprintable chars */
+
val = *vp;
rp[0] = '\\';
rp[3] = DIG(val & 07);
@@ -307,6 +332,7 @@ byteaout(PG_FUNCTION_ARGS)
else
*rp++ = *vp;
}
+ }
*rp = '\0';
PG_RETURN_CSTRING(result);
}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4f06725..19d07ff 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -584,6 +584,14 @@ const char *const config_type_names[] =
static struct config_bool ConfigureNamesBool[] =
{
{
+ {"bytea_output_hex", PGC_USERSET, CLIENT_CONN_OTHER,
+ gettext_noop("Sets the bytea output format to the hex format."),
+ NULL
+ },
+ &bytea_output_hex,
+ true, NULL, NULL
+ },
+ {
{"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of sequential-scan plans."),
NULL
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index c1b9393..d91a54c 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -704,6 +704,8 @@ extern Datum unknownout(PG_FUNCTION_ARGS);
extern Datum unknownrecv(PG_FUNCTION_ARGS);
extern Datum unknownsend(PG_FUNCTION_ARGS);
+extern bool bytea_output_hex;
+
extern Datum byteain(PG_FUNCTION_ARGS);
extern Datum byteaout(PG_FUNCTION_ARGS);
extern Datum bytearecv(PG_FUNCTION_ARGS);
@@ -728,6 +730,9 @@ extern Datum bytea_substr(PG_FUNCTION_ARGS);
extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS);
extern Datum pg_column_size(PG_FUNCTION_ARGS);
+extern size_t hex_encode(const char *src, size_t len, char *dst);
+extern size_t hex_decode(const char *src, size_t len, char *dst);
+
/* version.c */
extern Datum pgsql_version(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out
index e80e1a4..3742dc0 100644
--- a/src/test/regress/expected/conversion.out
+++ b/src/test/regress/expected/conversion.out
@@ -1,3 +1,4 @@
+SET bytea_output_hex TO false;
--
-- create user defined conversion
--
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index be8eb91..0c1fc41 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -97,6 +97,87 @@ LINE 1: SELECT U&'wrong: +0061' UESCAPE '+';
^
DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off.
RESET standard_conforming_strings;
+-- bytea
+SET bytea_output_hex = true;
+SELECT E'\\xDeAdBeEf'::bytea;
+ bytea
+------------
+ \xdeadbeef
+(1 row)
+
+SELECT E'\\xDeAdBeE'::bytea;
+ERROR: invalid hexadecimal data: odd number of digits
+LINE 1: SELECT E'\\xDeAdBeE'::bytea;
+ ^
+SELECT E'\\xDeAdBeEx'::bytea;
+ERROR: invalid hexadecimal digit: "x"
+LINE 1: SELECT E'\\xDeAdBeEx'::bytea;
+ ^
+SELECT E'\\xDe00BeEf'::bytea;
+ bytea
+------------
+ \xde00beef
+(1 row)
+
+SELECT E'DeAdBeEf'::bytea;
+ bytea
+--------------------
+ \x4465416442654566
+(1 row)
+
+SELECT E'De\\000dBeEf'::bytea;
+ bytea
+--------------------
+ \x4465006442654566
+(1 row)
+
+SELECT E'De\123dBeEf'::bytea;
+ bytea
+--------------------
+ \x4465536442654566
+(1 row)
+
+SELECT E'De\\123dBeEf'::bytea;
+ bytea
+--------------------
+ \x4465536442654566
+(1 row)
+
+SELECT E'De\\678dBeEf'::bytea;
+ERROR: invalid input syntax for type bytea
+LINE 1: SELECT E'De\\678dBeEf'::bytea;
+ ^
+SET bytea_output_hex = false;
+SELECT E'\\xDeAdBeEf'::bytea;
+ bytea
+------------------
+ \336\255\276\357
+(1 row)
+
+SELECT E'\\xDe00BeEf'::bytea;
+ bytea
+------------------
+ \336\000\276\357
+(1 row)
+
+SELECT E'DeAdBeEf'::bytea;
+ bytea
+----------
+ DeAdBeEf
+(1 row)
+
+SELECT E'De\\000dBeEf'::bytea;
+ bytea
+-------------
+ De\000dBeEf
+(1 row)
+
+SELECT E'De\\123dBeEf'::bytea;
+ bytea
+----------
+ DeSdBeEf
+(1 row)
+
--
-- test conversions between various string types
-- E021-10 implicit casting among the character data types
diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source
index 46ba926..43f6855 100644
--- a/src/test/regress/input/largeobject.source
+++ b/src/test/regress/input/largeobject.source
@@ -2,6 +2,8 @@
-- Test large object support
--
+SET bytea_output_hex TO false;
+
-- Load a file
CREATE TABLE lotest_stash_values (loid oid, fd integer);
-- lo_creat(mode integer) returns oid
diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source
index 9d69f6c..7012586 100644
--- a/src/test/regress/output/largeobject.source
+++ b/src/test/regress/output/largeobject.source
@@ -1,6 +1,7 @@
--
-- Test large object support
--
+SET bytea_output_hex TO false;
-- Load a file
CREATE TABLE lotest_stash_values (loid oid, fd integer);
-- lo_creat(mode integer) returns oid
diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source
index 1fbc29c..0ece752 100644
--- a/src/test/regress/output/largeobject_1.source
+++ b/src/test/regress/output/largeobject_1.source
@@ -1,6 +1,7 @@
--
-- Test large object support
--
+SET bytea_output_hex TO false;
-- Load a file
CREATE TABLE lotest_stash_values (loid oid, fd integer);
-- lo_creat(mode integer) returns oid
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql
index 99a9178..001de4e 100644
--- a/src/test/regress/sql/conversion.sql
+++ b/src/test/regress/sql/conversion.sql
@@ -1,3 +1,5 @@
+SET bytea_output_hex TO false;
+
--
-- create user defined conversion
--
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index a28c75a..8345534 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -43,6 +43,25 @@ SELECT U&'wrong: +0061' UESCAPE '+';
RESET standard_conforming_strings;
+-- bytea
+SET bytea_output_hex = true;
+SELECT E'\\xDeAdBeEf'::bytea;
+SELECT E'\\xDeAdBeE'::bytea;
+SELECT E'\\xDeAdBeEx'::bytea;
+SELECT E'\\xDe00BeEf'::bytea;
+SELECT E'DeAdBeEf'::bytea;
+SELECT E'De\\000dBeEf'::bytea;
+SELECT E'De\123dBeEf'::bytea;
+SELECT E'De\\123dBeEf'::bytea;
+SELECT E'De\\678dBeEf'::bytea;
+
+SET bytea_output_hex = false;
+SELECT E'\\xDeAdBeEf'::bytea;
+SELECT E'\\xDe00BeEf'::bytea;
+SELECT E'DeAdBeEf'::bytea;
+SELECT E'De\\000dBeEf'::bytea;
+SELECT E'De\\123dBeEf'::bytea;
+
--
-- test conversions between various string types
-- E021-10 implicit casting among the character data types
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers