On Thu, 12 Jul 2001, Bruce Momjian wrote:
> Your patch has been added to the PostgreSQL unapplied patches list at:
Here is a new version of that patch, with improvements from some feedback
I got from Barry Lind.
More of the encoding-related logic is moved into the Encoding class and
I've added som clarifying comments.
/Anders
> > With this patch I've done an attempt to make the handling of character
> > encoding in the JDBC driver a little clearer.
> >
> > * Cleans up the logic to select a JVM encoding for a backend encoding.
> > * Makes the connection setup code easier to read.
> > * Gathers character encoding and decoding in a single place.
> > * Adds unit tests for encoding.
> > * Introduces a new class, org.postgresql.core.Encoding, and the
> > corresponding unit test class, org.postgresql.test.jdbc2.EncodingTest.
_____________________________________________________________________
A n d e r s B e n g t s s o n [EMAIL PROTECTED]
Stockholm, Sweden
*** ./src/interfaces/jdbc/org/postgresql/Connection.java.orig Tue Jul 10 22:09:49
2001
--- ./src/interfaces/jdbc/org/postgresql/Connection.java Wed Jul 11 23:02:58
2001
***************
*** 8,13 ****
--- 8,14 ----
import org.postgresql.fastpath.*;
import org.postgresql.largeobject.*;
import org.postgresql.util.*;
+ import org.postgresql.core.Encoding;
/**
* $Id: Connection.java,v 1.17 2001/06/07 00:09:32 momjian Exp $
***************
*** 33,43 ****
/**
* The encoding to use for this connection.
- * If <b>null</b>, the encoding has not been specified by the
- * user, and the default encoding for the platform should be
- * used.
*/
! private String encoding;
public boolean CONNECTION_OK = true;
public boolean CONNECTION_BAD = false;
--- 34,41 ----
/**
* The encoding to use for this connection.
*/
! private Encoding encoding = Encoding.defaultEncoding();
public boolean CONNECTION_OK = true;
public boolean CONNECTION_BAD = false;
***************
*** 168,174 ****
// "User authentication failed"
//
throw new SQLException(pg_stream.ReceiveString
! (receive_sbuf, 4096, getEncoding()));
case 'R':
// Get the type of request
--- 166,172 ----
// "User authentication failed"
//
throw new SQLException(pg_stream.ReceiveString
! (receive_sbuf, 4096, encoding));
case 'R':
// Get the type of request
***************
*** 239,245 ****
case 'E':
case 'N':
throw new SQLException(pg_stream.ReceiveString
! (receive_sbuf, 4096, getEncoding()));
default:
throw new PSQLException("postgresql.con.setup");
}
--- 237,243 ----
case 'E':
case 'N':
throw new SQLException(pg_stream.ReceiveString
! (receive_sbuf, 4096, encoding));
default:
throw new PSQLException("postgresql.con.setup");
}
***************
*** 251,361 ****
break;
case 'E':
case 'N':
! throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096,
getEncoding()));
default:
throw new PSQLException("postgresql.con.setup");
}
- // Originally we issued a SHOW DATESTYLE statement to find the databases
default
- // datestyle. However, this caused some problems with timestamps, so in 6.5,
we
- // went the way of ODBC, and set the connection to ISO.
- //
- // This may cause some clients to break when they assume anything other than
ISO,
- // but then - they should be using the proper methods ;-)
- //
- // We also ask the DB for certain properties (i.e. DatabaseEncoding at this
time)
- //
firstWarning = null;
! java.sql.ResultSet initrset = ExecSQL("set datestyle to 'ISO'; " +
! "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else
getdatabaseencoding() end");
! String dbEncoding = null;
! //retrieve DB properties
! if(initrset.next()) {
!
! //handle DatabaseEncoding
! dbEncoding = initrset.getString(1);
! //convert from the PostgreSQL name to the Java name
! if (dbEncoding.equals("SQL_ASCII")) {
! dbEncoding = "ASCII";
! } else if (dbEncoding.equals("UNICODE")) {
! dbEncoding = "UTF8";
! } else if (dbEncoding.equals("LATIN1")) {
! dbEncoding = "ISO8859_1";
! } else if (dbEncoding.equals("LATIN2")) {
! dbEncoding = "ISO8859_2";
! } else if (dbEncoding.equals("LATIN3")) {
! dbEncoding = "ISO8859_3";
! } else if (dbEncoding.equals("LATIN4")) {
! dbEncoding = "ISO8859_4";
! } else if (dbEncoding.equals("LATIN5")) {
! dbEncoding = "ISO8859_5";
! } else if (dbEncoding.equals("LATIN6")) {
! dbEncoding = "ISO8859_6";
! } else if (dbEncoding.equals("LATIN7")) {
! dbEncoding = "ISO8859_7";
! } else if (dbEncoding.equals("LATIN8")) {
! dbEncoding = "ISO8859_8";
! } else if (dbEncoding.equals("LATIN9")) {
! dbEncoding = "ISO8859_9";
! } else if (dbEncoding.equals("EUC_JP")) {
! dbEncoding = "EUC_JP";
! } else if (dbEncoding.equals("EUC_CN")) {
! dbEncoding = "EUC_CN";
! } else if (dbEncoding.equals("EUC_KR")) {
! dbEncoding = "EUC_KR";
! } else if (dbEncoding.equals("EUC_TW")) {
! dbEncoding = "EUC_TW";
! } else if (dbEncoding.equals("KOI8")) {
! // try first if KOI8_U is present, it's a superset of KOI8_R
! try {
! dbEncoding = "KOI8_U";
! "test".getBytes(dbEncoding);
! }
! catch(UnsupportedEncodingException uee) {
! // well, KOI8_U is still not in standard JDK, falling back to KOI8_R :(
! dbEncoding = "KOI8_R";
! }
! } else if (dbEncoding.equals("WIN")) {
! dbEncoding = "Cp1252";
! } else if (dbEncoding.equals("UNKNOWN")) {
! //This isn't a multibyte database so we don't have an encoding to use
! //We leave dbEncoding null which will cause the default encoding for the
! //JVM to be used
! dbEncoding = null;
! } else {
! dbEncoding = null;
! }
! }
! //Set the encoding for this connection
! //Since the encoding could be specified or obtained from the DB we use the
! //following order:
! // 1. passed as a property
! // 2. value from DB if supported by current JVM
! // 3. default for JVM (leave encoding null)
! String passedEncoding = info.getProperty("charSet"); // could be null
!
! if (passedEncoding != null) {
! encoding = passedEncoding;
! } else {
! if (dbEncoding != null) {
! //test DB encoding
! try {
! "TEST".getBytes(dbEncoding);
! //no error the encoding is supported by the current JVM
! encoding = dbEncoding;
! } catch (UnsupportedEncodingException uee) {
! //dbEncoding is not supported by the current JVM
! encoding = null;
! }
! } else {
! encoding = null;
! }
}
// Initialise object handling
initObjectTypes();
--- 249,282 ----
break;
case 'E':
case 'N':
! throw new SQLException(pg_stream.ReceiveString(receive_sbuf, 4096,
encoding));
default:
throw new PSQLException("postgresql.con.setup");
}
firstWarning = null;
! String dbEncoding;
! // "pg_encoding_to_char(1)" will return 'EUC_JP' for a backend compiled with
multibyte,
! // otherwise it's hardcoded to 'SQL_ASCII'.
! // If the backend doesn't know about multibyte we can't assume anything about
the encoding
! // used, so we denote this with 'UNKNOWN'.
! final String encodingQuery =
! "select case when pg_encoding_to_char(1) = 'SQL_ASCII' then 'UNKNOWN' else
getdatabaseencoding() end";
+ // Set datestyle and fetch db encoding in a single call, to avoid making
+ // more than one round trip to the backend during connection startup.
! java.sql.ResultSet resultSet =
! ExecSQL("set datestyle to 'ISO'; " + encodingQuery);
!
! if (! resultSet.next()) {
! throw new PSQLException("postgresql.con.failed", "failed getting backend
encoding");
}
+ dbEncoding = resultSet.getString(1);
+ encoding = Encoding.getEncoding(dbEncoding, info.getProperty("charSet"));
// Initialise object handling
initObjectTypes();
***************
*** 455,476 ****
int insert_oid = 0;
SQLException final_error = null;
! // Commented out as the backend can now handle queries
! // larger than 8K. Peter June 6 2000
! //if (sql.length() > 8192)
! //throw new PSQLException("postgresql.con.toolong",sql);
!
! if (getEncoding() == null)
! buf = sql.getBytes();
! else {
! try {
! buf = sql.getBytes(getEncoding());
! } catch (UnsupportedEncodingException unse) {
! throw new PSQLException("postgresql.con.encoding",
! unse);
! }
! }
!
try
{
pg_stream.SendChar('Q');
--- 376,382 ----
int insert_oid = 0;
SQLException final_error = null;
! buf = encoding.encode(sql);
try
{
pg_stream.SendChar('Q');
***************
*** 491,497 ****
{
case 'A': // Asynchronous Notify
pid = pg_stream.ReceiveInteger(4);
! msg =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
break;
case 'B': // Binary Data Transfer
if (fields == null)
--- 397,403 ----
{
case 'A': // Asynchronous Notify
pid = pg_stream.ReceiveInteger(4);
! msg = pg_stream.ReceiveString(receive_sbuf,8192,encoding);
break;
case 'B': // Binary Data Transfer
if (fields == null)
***************
*** 502,508 ****
tuples.addElement(tup);
break;
case 'C': // Command Status
! recv_status =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
// Now handle the update count correctly.
if(recv_status.startsWith("INSERT") ||
recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") ||
recv_status.startsWith("MOVE")) {
--- 408,414 ----
tuples.addElement(tup);
break;
case 'C': // Command Status
! recv_status =
pg_stream.ReceiveString(receive_sbuf,8192,encoding);
// Now handle the update count correctly.
if(recv_status.startsWith("INSERT") ||
recv_status.startsWith("UPDATE") || recv_status.startsWith("DELETE") ||
recv_status.startsWith("MOVE")) {
***************
*** 544,550 ****
tuples.addElement(tup);
break;
case 'E': // Error Message
! msg =
pg_stream.ReceiveString(receive_sbuf,4096,getEncoding());
final_error = new SQLException(msg);
hfr = true;
break;
--- 450,456 ----
tuples.addElement(tup);
break;
case 'E': // Error Message
! msg = pg_stream.ReceiveString(receive_sbuf,4096,encoding);
final_error = new SQLException(msg);
hfr = true;
break;
***************
*** 559,568 ****
hfr = true;
break;
case 'N': // Error Notification
!
addWarning(pg_stream.ReceiveString(receive_sbuf,4096,getEncoding()));
break;
case 'P': // Portal Name
! String pname =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
break;
case 'T': // MetaData Field Description
if (fields != null)
--- 465,474 ----
hfr = true;
break;
case 'N': // Error Notification
!
addWarning(pg_stream.ReceiveString(receive_sbuf,4096,encoding));
break;
case 'P': // Portal Name
! String pname =
pg_stream.ReceiveString(receive_sbuf,8192,encoding);
break;
case 'T': // MetaData Field Description
if (fields != null)
***************
*** 595,601 ****
for (i = 0 ; i < nf ; ++i)
{
! String typname =
pg_stream.ReceiveString(receive_sbuf,8192,getEncoding());
int typid = pg_stream.ReceiveIntegerR(4);
int typlen = pg_stream.ReceiveIntegerR(2);
int typmod = pg_stream.ReceiveIntegerR(4);
--- 501,507 ----
for (i = 0 ; i < nf ; ++i)
{
! String typname = pg_stream.ReceiveString(receive_sbuf,8192,encoding);
int typid = pg_stream.ReceiveIntegerR(4);
int typlen = pg_stream.ReceiveIntegerR(2);
int typmod = pg_stream.ReceiveIntegerR(4);
***************
*** 665,671 ****
* default encoding.
*/
public String getEncoding() throws SQLException {
! return encoding;
}
/**
--- 571,577 ----
* default encoding.
*/
public String getEncoding() throws SQLException {
! return encoding.name();
}
/**
*** ./src/interfaces/jdbc/org/postgresql/PG_Stream.java.orig Tue Jul 10 22:09:49
2001
--- ./src/interfaces/jdbc/org/postgresql/PG_Stream.java Tue Jul 10 22:10:06 2001
***************
*** 10,16 ****
import org.postgresql.util.*;
/**
! * @version 1.0 15-APR-1997
*
* This class is used by Connection & PGlobj for communicating with the
* backend.
--- 10,16 ----
import org.postgresql.util.*;
/**
! * $Id$
*
* This class is used by Connection & PGlobj for communicating with the
* backend.
***************
*** 211,217 ****
public String ReceiveString(int maxsiz) throws SQLException
{
byte[] rst = bytePoolDim1.allocByte(maxsiz);
! return ReceiveString(rst, maxsiz, null);
}
/**
--- 211,217 ----
public String ReceiveString(int maxsiz) throws SQLException
{
byte[] rst = bytePoolDim1.allocByte(maxsiz);
! return ReceiveString(rst, maxsiz, Encoding.defaultEncoding());
}
/**
***************
*** 225,231 ****
* @return string from back end
* @exception SQLException if an I/O error occurs
*/
! public String ReceiveString(int maxsiz, String encoding) throws SQLException
{
byte[] rst = bytePoolDim1.allocByte(maxsiz);
return ReceiveString(rst, maxsiz, encoding);
--- 225,231 ----
* @return string from back end
* @exception SQLException if an I/O error occurs
*/
! public String ReceiveString(int maxsiz, Encoding encoding) throws SQLException
{
byte[] rst = bytePoolDim1.allocByte(maxsiz);
return ReceiveString(rst, maxsiz, encoding);
***************
*** 243,251 ****
* @return string from back end
* @exception SQLException if an I/O error occurs
*/
! public String ReceiveString(byte rst[], int maxsiz, String encoding)
throws SQLException
{
int s = 0;
try
--- 243,254 ----
* @return string from back end
* @exception SQLException if an I/O error occurs
*/
! public String ReceiveString(byte rst[], int maxsiz, Encoding encoding)
throws SQLException
{
+ if (encoding == null)
+ encoding = Encoding.defaultEncoding();
+
int s = 0;
try
***************
*** 266,282 ****
} catch (IOException e) {
throw new PSQLException("postgresql.stream.ioerror",e);
}
! String v = null;
! if (encoding == null)
! v = new String(rst, 0, s);
! else {
! try {
! v = new String(rst, 0, s, encoding);
! } catch (UnsupportedEncodingException unse) {
! throw new PSQLException("postgresql.stream.encoding", unse);
! }
! }
! return v;
}
/**
--- 269,275 ----
} catch (IOException e) {
throw new PSQLException("postgresql.stream.ioerror",e);
}
! return encoding.decode(rst, 0, s);
}
/**
*** ./src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java.orig Tue Jul 10
22:09:49 2001
--- ./src/interfaces/jdbc/org/postgresql/test/JDBC2Tests.java Tue Jul 10 22:10:06
2001
***************
*** 195,200 ****
--- 195,201 ----
suite.addTestSuite(DriverTest.class);
suite.addTestSuite(ConnectionTest.class);
suite.addTestSuite(DatabaseMetaDataTest.class);
+ suite.addTestSuite(EncodingTest.class);
// Connectivity/Protocols
package org.postgresql.core;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.sql.SQLException;
import org.postgresql.util.*;
/**
* Converts to and from the character encoding used by the backend.
*
* $Id$
*/
public class Encoding {
private static final Encoding DEFAULT_ENCODING = new Encoding(null);
/**
* Preferred JVM encodings for backend encodings.
*/
private static final Hashtable encodings = new Hashtable();
static {
encodings.put("SQL_ASCII", new String[] { "ASCII", "us-ascii" });
encodings.put("UNICODE", new String[] { "UTF-8", "UTF8" });
encodings.put("LATIN1", new String[] { "ISO8859_1" });
encodings.put("LATIN2", new String[] { "ISO8859_2" });
encodings.put("LATIN3", new String[] { "ISO8859_3" });
encodings.put("LATIN4", new String[] { "ISO8859_4" });
encodings.put("LATIN5", new String[] { "ISO8859_5" });
encodings.put("LATIN6", new String[] { "ISO8859_6" });
encodings.put("LATIN7", new String[] { "ISO8859_7" });
encodings.put("LATIN8", new String[] { "ISO8859_8" });
encodings.put("LATIN9", new String[] { "ISO8859_9" });
encodings.put("EUC_JP", new String[] { "EUC_JP" });
encodings.put("EUC_CN", new String[] { "EUC_CN" });
encodings.put("EUC_KR", new String[] { "EUC_KR" });
encodings.put("EUC_TW", new String[] { "EUC_TW" });
encodings.put("WIN", new String[] { "Cp1252" });
// We prefer KOI8-U, since it is a superset of KOI8-R.
encodings.put("KOI8", new String[] { "KOI8_U", "KOI8_R" });
// If the database isn't encoding-aware then we can't have
// any preferred encodings.
encodings.put("UNKNOWN", new String[0]);
}
private final String encoding;
public Encoding(String encoding) {
this.encoding = encoding;
}
/**
* Get an Encoding for from the given database encoding and
* the encoding passed in by the user.
*/
public static Encoding getEncoding(String databaseEncoding,
String passedEncoding)
{
if (passedEncoding != null) {
if (Encoding.isAvailable(passedEncoding)) {
return new Encoding(passedEncoding);
} else {
return defaultEncoding();
}
} else {
return encodingForDatabaseEncoding(databaseEncoding);
}
}
/**
* Get an Encoding matching the given database encoding.
*/
private static Encoding encodingForDatabaseEncoding(String databaseEncoding) {
// If the backend encoding is known and there is a suitable
// encoding in the JVM we use that. Otherwise we fall back
// to the default encoding of the JVM.
if (encodings.containsKey(databaseEncoding)) {
String[] candidates = (String[]) encodings.get(databaseEncoding);
for (int i = 0; i < candidates.length; i++) {
if (isAvailable(candidates[i])) {
return new Encoding(candidates[i]);
}
}
}
return defaultEncoding();
}
/**
* Name of the (JVM) encoding used.
*/
public String name() {
return encoding;
}
/**
* Encode a string to an array of bytes.
*/
public byte[] encode(String s) throws SQLException {
try {
if (encoding == null) {
return s.getBytes();
} else {
return s.getBytes(encoding);
}
} catch (UnsupportedEncodingException e) {
throw new PSQLException("postgresql.stream.encoding", e);
}
}
/**
* Decode an array of bytes into a string.
*/
public String decode(byte[] encodedString, int offset, int length) throws
SQLException {
try {
if (encoding == null) {
return new String(encodedString, offset, length);
} else {
return new String(encodedString, offset, length, encoding);
}
} catch (UnsupportedEncodingException e) {
throw new PSQLException("postgresql.stream.encoding", e);
}
}
/**
* Decode an array of bytes into a string.
*/
public String decode(byte[] encodedString) throws SQLException {
return decode(encodedString, 0, encodedString.length);
}
/**
* Get an Encoding using the default encoding for the JVM.
*/
public static Encoding defaultEncoding() {
return DEFAULT_ENCODING;
}
/**
* Test if an encoding is available in the JVM.
*/
private static boolean isAvailable(String encodingName) {
try {
"DUMMY".getBytes(encodingName);
return true;
} catch (UnsupportedEncodingException e) {
return false;
}
}
}
package org.postgresql.test.jdbc2;
import junit.framework.*;
import org.postgresql.core.Encoding;
/**
* Tests for the Encoding class.
*
* $Id$
*/
public class EncodingTest extends TestCase {
public EncodingTest(String name) {
super(name);
}
public void testCreation() throws Exception {
Encoding encoding;
encoding = Encoding.getEncoding("UNICODE", null);
assertEquals("UTF", encoding.name().substring(0, 3).toUpperCase());
encoding = Encoding.getEncoding("SQL_ASCII", null);
assert(encoding.name().toUpperCase().indexOf("ASCII") != -1);
assertEquals("When encoding is unknown the default encoding should be used",
Encoding.defaultEncoding(),
Encoding.getEncoding("UNKNOWN", null));
encoding = Encoding.getEncoding("SQL_ASCII", "utf-8");
assert("Encoding passed in by the user should be preferred",
encoding.name().toUpperCase().indexOf("UTF") != -1);
}
public void testTransformations() throws Exception {
Encoding encoding = Encoding.getEncoding("UNICODE", null);
assertEquals("ab", encoding.decode(new byte[] { 97, 98 }));
assertEquals(2, encoding.encode("ab").length);
assertEquals(97, encoding.encode("a")[0]);
assertEquals(98, encoding.encode("b")[0]);
encoding = Encoding.defaultEncoding();
assertEquals("a".getBytes()[0], encoding.encode("a")[0]);
assertEquals(new String(new byte[] { 97 }),
encoding.decode(new byte[] { 97 }));
}
}
---------------------------(end of broadcast)---------------------------
TIP 2: you can get off all lists at once with the unregister command
(send "unregister YourEmailAddressHere" to [EMAIL PROTECTED])